import * as interpreter from 'words-to-numbers'; // @ts-ignore bcz: how are you supposed to include these definitions since dom-speech-recognition isn't a module? import type {} from '@types/dom-speech-recognition'; import { Doc, Opt } from '../../fields/Doc'; import { List } from '../../fields/List'; import { RichTextField } from '../../fields/RichTextField'; import { listSpec } from '../../fields/Schema'; import { Cast, CastCtor, DocCast } from '../../fields/Types'; import { AudioField, ImageField } from '../../fields/URLField'; import { Utils } from '../../Utils'; import { Docs } from '../documents/Documents'; import { DocumentType } from '../documents/DocumentTypes'; import { DictationOverlay } from '../views/DictationOverlay'; import { DocumentView, OpenWhere } from '../views/nodes/DocumentView'; import { SelectionManager } from './SelectionManager'; import { UndoManager } from './UndoManager'; /** * This namespace provides a singleton instance of a manager that * handles the listening and text-conversion of user speech. * * The basic manager functionality can be attained by the DictationManager.Controls namespace, which provide * a simple recording operation that returns the interpreted text as a string. * * Additionally, however, the DictationManager also exposes the ability to execute voice commands within Dash. * It stores a default library of registered commands that can be triggered by listen()'ing for a phrase and then * passing the results into the execute() function. * * In addition to compile-time default commands, you can invoke DictationManager.Commands.Register(Independent|Dependent) * to add new commands as classes or components are constructed. */ export namespace DictationManager { /** * Some type maneuvering to access Webkit's built-in * speech recognizer. */ namespace CORE { export interface IWindow extends Window { webkitSpeechRecognition: any; } } const { webkitSpeechRecognition }: CORE.IWindow = window as any as CORE.IWindow; export const placeholder = 'Listening...'; export namespace Controls { export const Infringed = 'unable to process: dictation manager still involved in previous session'; const browser = (() => { const identifier = navigator.userAgent.toLowerCase(); if (identifier.indexOf('safari') >= 0) { return 'Safari'; } if (identifier.indexOf('chrome') >= 0) { return 'Chrome'; } if (identifier.indexOf('firefox') >= 0) { return 'Firefox'; } return 'Unidentified Browser'; })(); const unsupported = `listening is not supported in ${browser}`; const intraSession = '. '; const interSession = ' ... '; export let isListening = false; let isManuallyStopped = false; let current: string | undefined = undefined; let sessionResults: string[] = []; const recognizer: Opt = webkitSpeechRecognition ? new webkitSpeechRecognition() : undefined; export type InterimResultHandler = (results: string) => any; export type ContinuityArgs = { indefinite: boolean } | false; export type DelimiterArgs = { inter: string; intra: string }; export type ListeningUIStatus = { interim: boolean } | false; export interface ListeningOptions { useOverlay: boolean; language: string; continuous: ContinuityArgs; delimiters: DelimiterArgs; interimHandler: InterimResultHandler; tryExecute: boolean; terminators: string[]; } let pendingListen: Promise | string | undefined; export const listen = async (options?: Partial) => { if (pendingListen instanceof Promise) return pendingListen.then(pl => innerListen(options)); return innerListen(options); }; const innerListen = async (options?: Partial) => { let results: string | undefined; const overlay = options?.useOverlay; if (overlay) { DictationOverlay.Instance.dictationOverlayVisible = true; DictationOverlay.Instance.isListening = { interim: false }; } try { results = await (pendingListen = listenImpl(options)); pendingListen = undefined; if (results) { Utils.CopyText(results); if (overlay) { DictationOverlay.Instance.isListening = false; const execute = options?.tryExecute; DictationOverlay.Instance.dictatedPhrase = execute ? results.toLowerCase() : results; DictationOverlay.Instance.dictationSuccess = execute ? await DictationManager.Commands.execute(results) : true; } options?.tryExecute && (await DictationManager.Commands.execute(results)); } } catch (e: any) { console.log(e); if (overlay) { DictationOverlay.Instance.isListening = false; DictationOverlay.Instance.dictatedPhrase = results = `dictation error: ${'error' in e ? e.error : 'unknown error'}`; DictationOverlay.Instance.dictationSuccess = false; } } finally { overlay && DictationOverlay.Instance.initiateDictationFade(); } return results; }; const listenImpl = (options?: Partial) => { if (!recognizer) { console.log('DictationManager:' + unsupported); return unsupported; } if (isListening) { return Infringed; } isListening = true; const handler = options?.interimHandler; const continuous = options?.continuous; const indefinite = continuous && continuous.indefinite; const language = options?.language; const intra = options?.delimiters?.intra; const inter = options?.delimiters?.inter; recognizer.onstart = () => console.log('initiating speech recognition session...'); recognizer.interimResults = handler !== undefined; recognizer.continuous = continuous === undefined ? false : continuous !== false; recognizer.lang = language === undefined ? 'en-US' : language; recognizer.start(); return new Promise((resolve, reject) => { recognizer.onerror = (e: any) => { // e is SpeechRecognitionError but where is that defined? if (!(indefinite && e.error === 'no-speech')) { recognizer.stop(); resolve(e); //reject(e); } }; recognizer.onresult = (e: SpeechRecognitionEvent) => { current = synthesize(e, intra); let matchedTerminator: string | undefined; if (options?.terminators && (matchedTerminator = options.terminators.find(end => (current ? current.trim().toLowerCase().endsWith(end.toLowerCase()) : false)))) { current = matchedTerminator; recognizer.abort(); return complete(); } !isManuallyStopped && handler?.(current); //isManuallyStopped && complete(); }; recognizer.onend = (e: Event) => { if (!indefinite || isManuallyStopped) { return complete(); } if (current) { !isManuallyStopped && sessionResults.push(current); current = undefined; } recognizer.start(); }; const complete = () => { if (indefinite) { current && sessionResults.push(current); sessionResults.length && resolve(sessionResults.join(inter || interSession)); } else { resolve(current || ''); } current = undefined; sessionResults = []; isListening = false; isManuallyStopped = false; recognizer.onresult = null; recognizer.onerror = null; recognizer.onend = null; }; }); }; export const stop = (salvageSession = true) => { if (!isListening || !recognizer) { return; } isListening = false; isManuallyStopped = true; recognizer.stop(); // salvageSession ? recognizer.stop() : recognizer.abort(); }; const synthesize = (e: SpeechRecognitionEvent, delimiter?: string) => { const results = e.results; const transcripts: string[] = []; for (let i = 0; i < results.length; i++) { transcripts.push(results.item(i).item(0).transcript.trim()); } return transcripts.join(delimiter || intraSession); }; } export namespace Commands { export const dictationFadeDuration = 2000; export type IndependentAction = (target: DocumentView) => any | Promise; export type IndependentEntry = { action: IndependentAction; restrictTo?: DocumentType[] }; export type DependentAction = (target: DocumentView, matches: RegExpExecArray) => any | Promise; export type DependentEntry = { expression: RegExp; action: DependentAction; restrictTo?: DocumentType[] }; export const RegisterIndependent = (key: string, value: IndependentEntry) => Independent.set(key, value); export const RegisterDependent = (entry: DependentEntry) => Dependent.push(entry); export const execute = async (phrase: string) => { return UndoManager.RunInBatch(async () => { console.log('PHRASE: ' + phrase); const targets = SelectionManager.Views; if (!targets || !targets.length) { return; } phrase = phrase.toLowerCase(); const entry = Independent.get(phrase); if (entry) { let success = false; const restrictTo = entry.restrictTo; for (const target of targets) { if (!restrictTo || validate(target, restrictTo)) { await entry.action(target); success = true; } } return success; } for (const entry of Dependent) { const regex = entry.expression; const matches = regex.exec(phrase); regex.lastIndex = 0; if (matches !== null) { let success = false; const restrictTo = entry.restrictTo; for (const target of targets) { if (!restrictTo || validate(target, restrictTo)) { await entry.action(target, matches); success = true; } } return success; } } return false; }, 'Execute Command'); }; const ConstructorMap = new Map([ [DocumentType.COL, listSpec(Doc)], [DocumentType.AUDIO, AudioField], [DocumentType.IMG, ImageField], [DocumentType.IMPORT, listSpec(Doc)], [DocumentType.RTF, 'string'], ]); const tryCast = (view: DocumentView, type: DocumentType) => { const ctor = ConstructorMap.get(type); if (!ctor) { return false; } return Cast(Doc.GetProto(view.props.Document).data, ctor) !== undefined; }; const validate = (target: DocumentView, types: DocumentType[]) => { for (const type of types) { if (tryCast(target, type)) { return true; } } return false; }; const interpretNumber = (number: string) => { const initial = parseInt(number); if (!isNaN(initial)) { return initial; } const converted = interpreter.wordsToNumbers(number, { fuzzy: true }); if (converted === null) { return NaN; } return typeof converted === 'string' ? parseInt(converted) : converted; }; const Independent = new Map([ [ 'clear', { action: (target: DocumentView) => (Doc.GetProto(target.props.Document).data = new List()), restrictTo: [DocumentType.COL], }, ], [ 'new outline', { action: (target: DocumentView) => { const newBox = Docs.Create.TextDocument('', { _width: 400, _height: 200, title: 'My Outline', _layout_autoHeight: true }); const proto = DocCast(newBox.proto); const prompt = 'Press alt + r to start dictating here...'; const head = 3; const anchor = head + prompt.length; const proseMirrorState = `{"doc":{"type":"doc","content":[{"type":"ordered_list","content":[{"type":"list_item","content":[{"type":"paragraph","content":[{"type":"text","text":"${prompt}"}]}]}]}]},"selection":{"type":"text","anchor":${anchor},"head":${head}}}`; proto.data = new RichTextField(proseMirrorState); proto.backgroundColor = '#eeffff'; target.props.addDocTab(newBox, OpenWhere.addRight); }, }, ], ]); const Dependent = new Array( { expression: /create (\w+) documents of type (image|nested collection)/g, action: (target: DocumentView, matches: RegExpExecArray) => { const count = interpretNumber(matches[1]); const what = matches[2]; const dataDoc = Doc.GetProto(target.props.Document); const fieldKey = 'data'; if (isNaN(count)) { return; } for (let i = 0; i < count; i++) { let created: Doc | undefined; switch (what) { case 'image': created = Docs.Create.ImageDocument('https://upload.wikimedia.org/wikipedia/commons/thumb/3/3a/Cat03.jpg/1200px-Cat03.jpg'); break; case 'nested collection': created = Docs.Create.FreeformDocument([], {}); break; } created && Doc.AddDocToList(dataDoc, fieldKey, created); } }, restrictTo: [DocumentType.COL], }, { expression: /view as (freeform|stacking|masonry|schema|tree)/g, action: (target: DocumentView, matches: RegExpExecArray) => { const mode = matches[1]; mode && (target.props.Document._type_collection = mode); }, restrictTo: [DocumentType.COL], } ); } }