/* eslint-disable no-use-before-define */ import * as interpreter from 'words-to-numbers'; import { ClientUtils } from '../../ClientUtils'; import { Doc, Opt } from '../../fields/Doc'; import { List } from '../../fields/List'; import { RichTextField } from '../../fields/RichTextField'; import { listSpec } from '../../fields/Schema'; import { Cast, CastCtor } from '../../fields/Types'; import { AudioField, ImageField } from '../../fields/URLField'; import { AudioAnnoState } from '../../server/SharedMediaTypes'; import { Networking } from '../Network'; import { DocumentType } from '../documents/DocumentTypes'; import { Docs } from '../documents/Documents'; import { DictationOverlay } from '../views/DictationOverlay'; import { DocumentView } from '../views/nodes/DocumentView'; import { OpenWhere } from '../views/nodes/OpenWhere'; import { UndoManager } from './UndoManager'; /** * This namespace provides a singleton instance of a manager that * handles the listening and text-conversion of user speech. * * The basic manager functionality can be attained by the DictationManager.Controls namespace, which provide * a simple recording operation that returns the interpreted text as a string. * * Additionally, however, the DictationManager also exposes the ability to execute voice commands within Dash. * It stores a default library of registered commands that can be triggered by listen()'ing for a phrase and then * passing the results into the execute() function. * * In addition to compile-time default commands, you can invoke DictationManager.Commands.Register(Independent|Dependent) * to add new commands as classes or components are constructed. */ export namespace DictationManager { /** * Some type maneuvering to access Webkit's built-in * speech recognizer. */ namespace CORE { export interface IWindow extends Window { webkitSpeechRecognition: { new (): SpeechRecognition }; } } const { webkitSpeechRecognition }: CORE.IWindow = window as unknown as CORE.IWindow; export const placeholder = 'Listening...'; export namespace Controls { export const Infringed = 'unable to process: dictation manager still involved in previous session'; const browser = (() => { const identifier = navigator.userAgent.toLowerCase(); if (identifier.indexOf('safari') >= 0) { return 'Safari'; } if (identifier.indexOf('chrome') >= 0) { return 'Chrome'; } if (identifier.indexOf('firefox') >= 0) { return 'Firefox'; } return 'Unidentified Browser'; })(); const unsupported = `listening is not supported in ${browser}`; const intraSession = '. '; const interSession = ' ... '; let isListening = false; let isManuallyStopped = false; let current: string | undefined; let sessionResults: string[] = []; const recognizer: Opt = webkitSpeechRecognition ? new webkitSpeechRecognition() : undefined; export type InterimResultHandler = (results: string) => void; export type ContinuityArgs = { indefinite: boolean } | false; export type DelimiterArgs = { inter: string; intra: string }; export type ListeningUIStatus = { interim: boolean } | false; export interface ListeningOptions { useOverlay: boolean; language: string; continuous: ContinuityArgs; delimiters: DelimiterArgs; interimHandler: InterimResultHandler; tryExecute: boolean; terminators: string[]; } let pendingListen: Promise | string | undefined; export const listen = async (options?: Partial) => { if (pendingListen instanceof Promise) return pendingListen.then(() => innerListen(options)); return innerListen(options); }; const innerListen = async (options?: Partial) => { let results: string | undefined; const overlay = options?.useOverlay; if (overlay) { DictationOverlay.Instance.dictationOverlayVisible = true; DictationOverlay.Instance.isListening = { interim: false }; } try { results = await (pendingListen = listenImpl(options)); pendingListen = undefined; if (results) { ClientUtils.CopyText(results); if (overlay) { DictationOverlay.Instance.isListening = false; const execute = options?.tryExecute; DictationOverlay.Instance.dictatedPhrase = execute ? results.toLowerCase() : results; DictationOverlay.Instance.dictationSuccess = execute ? await DictationManager.Commands.execute(results) : true; } options?.tryExecute && (await DictationManager.Commands.execute(results)); } } catch (e) { console.log(e); if (overlay) { DictationOverlay.Instance.isListening = false; DictationOverlay.Instance.dictatedPhrase = results = `dictation error: ${(e as { error: string }).error || 'unknown error'}`; DictationOverlay.Instance.dictationSuccess = false; } } finally { overlay && DictationOverlay.Instance.initiateDictationFade(); } return results; }; const listenImpl = (options?: Partial) => { if (!recognizer) { console.log('DictationManager:' + unsupported); return unsupported; } if (isListening) { return Infringed; } isListening = true; const handler = options?.interimHandler; const continuous = options?.continuous; const indefinite = continuous && continuous.indefinite; const language = options?.language; const intra = options?.delimiters?.intra; const inter = options?.delimiters?.inter; recognizer.onstart = () => console.log('initiating speech recognition session...'); recognizer.interimResults = handler !== undefined; recognizer.continuous = continuous === undefined ? false : continuous !== false; recognizer.lang = language === undefined ? 'en-US' : language; recognizer.start(); return new Promise(resolve => { recognizer.onerror = e => { // e is SpeechRecognitionError but where is that defined? if (!(indefinite && e.error === 'no-speech')) { recognizer.stop(); resolve(e.message); } }; recognizer.onresult = (e: SpeechRecognitionEvent) => { current = synthesize(e, intra); const matchedTerminator = options?.terminators?.find(end => (current ? current.trim().toLowerCase().endsWith(end.toLowerCase()) : false)); if (options?.terminators && matchedTerminator) { current = matchedTerminator; recognizer.abort(); return complete(); } !isManuallyStopped && handler?.(current); // isManuallyStopped && complete() return undefined; }; recognizer.onend = () => { if (!indefinite || isManuallyStopped) { return complete(); } if (current) { !isManuallyStopped && sessionResults.push(current); current = undefined; } recognizer.start(); return undefined; }; const complete = () => { if (indefinite) { current && sessionResults.push(current); sessionResults.length && resolve(sessionResults.join(inter || interSession)); } else { resolve(current || ''); } current = undefined; sessionResults = []; isListening = false; isManuallyStopped = false; recognizer.onresult = null; recognizer.onerror = null; recognizer.onend = null; }; }); }; export const stop = (/* salvageSession = true */) => { if (!isListening || !recognizer) { return; } isListening = false; isManuallyStopped = true; recognizer.stop(); // salvageSession ? recognizer.stop() : recognizer.abort(); }; const synthesize = (e: SpeechRecognitionEvent, delimiter?: string) => { const { results } = e; const transcripts: string[] = []; for (let i = 0; i < results.length; i++) { transcripts.push(results.item(i).item(0).transcript.trim()); } return transcripts.join(delimiter || intraSession); }; } export namespace Commands { export const dictationFadeDuration = 2000; export type IndependentAction = (target: DocumentView) => void | Promise; export type IndependentEntry = { action: IndependentAction; restrictTo?: DocumentType[] }; export type DependentAction = (target: DocumentView, matches: RegExpExecArray) => void | Promise; export type DependentEntry = { expression: RegExp; action: DependentAction; restrictTo?: DocumentType[] }; export const RegisterIndependent = (key: string, value: IndependentEntry) => Independent.set(key, value); export const RegisterDependent = (entry: DependentEntry) => { const { expression, action, restrictTo } = entry; return Dependent.push({ expression, action, restrictTo: restrictTo ?? [] }); }; export const execute = async (phrase: string) => UndoManager.RunInBatch(async () => { console.log('PHRASE: ' + phrase); const targets = DocumentView.Selected(); if (!targets || !targets.length) { return undefined; } // eslint-disable-next-line no-param-reassign phrase = phrase.toLowerCase(); const entry = Independent.get(phrase); if (entry) { let success = false; const { restrictTo } = entry; for (const target of targets) { if (!restrictTo || validate(target, restrictTo)) { // eslint-disable-next-line no-await-in-loop await entry.action(target); success = true; } } return success; } for (const depEntry of Dependent) { const regex = depEntry.expression; const matches = regex.exec(phrase); regex.lastIndex = 0; if (matches !== null) { let success = false; const { restrictTo } = depEntry; for (const target of targets) { if (!restrictTo || validate(target, restrictTo)) { // eslint-disable-next-line no-await-in-loop await depEntry.action(target, matches); success = true; } } return success; } } return false; }, 'Execute Command'); const ConstructorMap = new Map([ [DocumentType.COL, listSpec(Doc)], [DocumentType.AUDIO, AudioField], [DocumentType.IMG, ImageField], [DocumentType.RTF, 'string'], ]); const tryCast = (view: DocumentView, type: DocumentType) => { const ctor = ConstructorMap.get(type); if (!ctor) { return false; } return Cast(Doc.GetProto(view.Document).data, ctor) !== undefined; }; const validate = (target: DocumentView, types: DocumentType[]) => { for (const type of types) { if (tryCast(target, type)) { return true; } } return false; }; const interpretNumber = (number: string) => { const initial = parseInt(number); if (!isNaN(initial)) { return initial; } const converted = interpreter.wordsToNumbers(number, { fuzzy: true }); if (converted === null) { return NaN; } return typeof converted === 'string' ? parseInt(converted) : converted; }; const Independent = new Map([ [ 'clear', { action: (target: DocumentView) => { Doc.GetProto(target.Document).data = new List(); }, restrictTo: [DocumentType.COL], }, ], [ 'new outline', { action: (target: DocumentView) => { const newBox = Docs.Create.TextDocument('', { _width: 400, _height: 200, title: 'My Outline', _layout_autoHeight: true }); const prompt = 'Press alt + r to start dictating here...'; const head = 3; const anchor = head + prompt.length; const proseMirrorState = `{"doc":{"type":"doc","content":[{"type":"ordered_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"type":"text","text":"${prompt}"}]}]}]}]},"selection":{"type":"text","anchor":${anchor},"head":${head}}}`; newBox.$data = new RichTextField(proseMirrorState, prompt); newBox.$backgroundColor = '#eeffff'; target.props.addDocTab(newBox, OpenWhere.addRight); }, }, ], ]); const Dependent = [ { expression: /create (\w+) documents of type (image|nested collection)/g, action: (target: DocumentView, matches: RegExpExecArray) => { const count = interpretNumber(matches[1]); const what = matches[2]; if (!isNaN(count)) { for (let i = 0; i < count; i++) { const created = (() => { switch (what) { case 'image': return Docs.Create.ImageDocument('https://upload.wikimedia.org/wikipedia/commons/thumb/3/3a/Cat03.jpg/1200px-Cat03.jpg'); case 'nested collection':return Docs.Create.FreeformDocument([], {}); } // prettier-ignore })(); created && Doc.AddDocToList(target.dataDoc, Doc.LayoutDataKey(target.Document), created); } } }, restrictTo: [DocumentType.COL], }, { expression: /view as (freeform|stacking|masonry|schema|tree)/g, action: (target: DocumentView, matches: RegExpExecArray) => { const mode = matches[1]; mode && (target.Document._type_collection = mode); }, restrictTo: [DocumentType.COL], }, ]; } export function recordAudioAnnotation(doc: Doc, fieldIn: string, onRecording?: (stop: () => void) => void, onEnd?: () => void) { const field = '$' + fieldIn + '_audioAnnotations'; let gumStream: MediaStream | undefined; let recorder: MediaRecorder | undefined; navigator.mediaDevices.getUserMedia({ audio: true }).then(stream => { let audioTextAnnos = Cast(doc[field + '_text'], listSpec('string'), null); if (audioTextAnnos) audioTextAnnos.push(''); else audioTextAnnos = doc[field + '_text'] = new List(['']); doc._layout_showTags = true; DictationManager.Controls.listen({ interimHandler: value => { audioTextAnnos[audioTextAnnos.length - 1] = value; }, // prettier-ignore continuous: { indefinite: false }, }).then(results => { if (results && [DictationManager.Controls.Infringed].includes(results)) { DictationManager.Controls.stop(); } onEnd?.(); }); gumStream = stream; recorder = new MediaRecorder(stream); recorder.ondataavailable = async (e: BlobEvent) => { const file: Blob & { name?: string; lastModified?: number; webkitRelativePath?: string } = e.data; file.name = ''; file.lastModified = 0; file.webkitRelativePath = ''; const [{ result }] = await Networking.UploadFilesToServer({ file: file as Blob & { name: string; lastModified: number; webkitRelativePath: string } }); if (!(result instanceof Error)) { const audioField = new AudioField(result.accessPaths.agnostic.client); const audioAnnos = Cast(doc[field], listSpec(AudioField), null); if (audioAnnos) audioAnnos.push(audioField); else doc[field] = new List([audioField]); } }; recorder.start(); const stopFunc = () => { recorder?.stop(); DictationManager.Controls.stop(/* false */); doc._audioAnnoState = AudioAnnoState.stopped; gumStream?.getAudioTracks()[0].stop(); }; if (onRecording) onRecording(stopFunc); else setTimeout(stopFunc, 5000); }); } }