From 3152e69dfafe1c393bed38f3aad1e55881e62a33 Mon Sep 17 00:00:00 2001 From: Sam Wilkins Date: Fri, 26 Jul 2019 02:39:43 -0400 Subject: initial commit --- src/client/cognitive_services/CognitiveServices.ts | 27 +++++++++- src/client/views/MainView.tsx | 3 ++ src/server/RouteStore.ts | 1 + src/server/index.ts | 59 +++++++++++++++++++++- 4 files changed, 88 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index d69378d0e..40bbe55a1 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -9,6 +9,10 @@ import { Utils } from "../../Utils"; import { CompileScript } from "../util/Scripting"; import { ComputedField } from "../../new_fields/ScriptField"; import { InkData } from "../../new_fields/InkField"; +import "microsoft-cognitiveservices-speech-sdk"; +import "fs"; +import { AudioInputStream } from "microsoft-cognitiveservices-speech-sdk"; +import { createReadStream, ReadStream } from "fs"; type APIManager = { converter: BodyConverter, requester: RequestExecutor, analyzer: AnalysisApplier }; type RequestExecutor = (apiKey: string, body: string, service: Service) => Promise; @@ -22,7 +26,8 @@ export type Rectangle = { top: number, left: number, width: number, height: numb export enum Service { ComputerVision = "vision", Face = "face", - Handwriting = "handwriting" + Handwriting = "handwriting", + Transcription = "transcription" } export enum Confidence { @@ -232,4 +237,24 @@ export namespace CognitiveServices { } + export namespace Transcription { + + export const Manager: APIManager = { + + converter: (data: string) => data, + + requester: async (apiKey: string, body: string, service: Service) => { + let analysis = await fetch(`${RouteStore.audioData}/${body}`).then(async response => JSON.parse(await response.json())); + console.log(analysis); + return ""; + }, + + analyzer: async (doc: Doc, keys: string[], filename: string) => { + let results = await executeQuery(Service.Transcription, Manager, filename); + } + + }; + + } + } \ No newline at end of file diff --git a/src/client/views/MainView.tsx b/src/client/views/MainView.tsx index 61a013963..ca75ab2c4 100644 --- a/src/client/views/MainView.tsx +++ b/src/client/views/MainView.tsx @@ -39,6 +39,7 @@ import { PreviewCursor } from './PreviewCursor'; import { FilterBox } from './search/FilterBox'; import { CollectionTreeView } from './collections/CollectionTreeView'; import { ClientUtils } from '../util/ClientUtils'; +import { CognitiveServices } from '../cognitive_services/CognitiveServices'; @observer export class MainView extends React.Component { @@ -67,6 +68,8 @@ export class MainView extends React.Component { componentWillMount() { var tag = document.createElement('script'); + CognitiveServices.Transcription.Manager.analyzer(new Doc, ["hello", "world"], "Sunflower.mp3"); + tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode!.insertBefore(tag, firstScriptTag); diff --git a/src/server/RouteStore.ts b/src/server/RouteStore.ts index e30015e39..53f176c81 100644 --- a/src/server/RouteStore.ts +++ b/src/server/RouteStore.ts @@ -13,6 +13,7 @@ export enum RouteStore { upload = "/upload", dataUriToImage = "/uploadURI", images = "/images", + audioData = "/audioData", // USER AND WORKSPACES getCurrUser = "/getCurrentUser", diff --git a/src/server/index.ts b/src/server/index.ts index 40c0e7981..0a02b667e 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -40,6 +40,8 @@ import { Search } from './Search'; import { debug } from 'util'; import _ = require('lodash'); import { Response } from 'express-serve-static-core'; +import { AudioInputStream, AudioConfig, SpeechConfig, SpeechRecognizer, SpeechRecognitionResult } from 'microsoft-cognitiveservices-speech-sdk'; +import { Opt } from '../new_fields/Doc'; const MongoStore = require('connect-mongo')(session); const mongoose = require('mongoose'); const probe = require("probe-image-size"); @@ -297,7 +299,8 @@ addSecureRoute( const ServicesApiKeyMap = new Map([ ["face", process.env.FACE], ["vision", process.env.VISION], - ["handwriting", process.env.HANDWRITING] + ["handwriting", process.env.HANDWRITING], + ["transcription", process.env.TRANSCRIPTION] ]); addSecureRoute(Method.GET, (user, res, req) => { @@ -305,6 +308,60 @@ addSecureRoute(Method.GET, (user, res, req) => { res.send(ServicesApiKeyMap.get(service)); }, undefined, `${RouteStore.cognitiveServices}/:requestedservice`); +addSecureRoute( + Method.GET, + (user, res, req) => { + let asset = req.params.asset; + let pushStream = AudioInputStream.createPushStream(); + let readStream = fs.createReadStream(path.join(__dirname, '../../deploy/assets/' + asset)); + + let apiKey = process.env.TRANSCRIPTION; + if (!apiKey) { + res.send(undefined); + return; + } + + console.log("API KEY FOUND: ", apiKey); + + readStream.on('data', arrayBuffer => { + pushStream.write(arrayBuffer.buffer); + console.log(arrayBuffer.buffer); + }); + readStream.on('end', () => pushStream.close()); + readStream.on('error', (error) => { + console.log("ERROR! ", error); + res.end(error); + }); + + let audioConfig = AudioConfig.fromStreamInput(pushStream); + let speechConfig = SpeechConfig.fromSubscription(apiKey, "eastus"); + + console.log("Here are the configs!"); + console.log(audioConfig); + console.log(speechConfig); + + speechConfig.speechRecognitionLanguage = "en-US"; + + let recognizer: Opt = new SpeechRecognizer(speechConfig, audioConfig); + recognizer.recognizeOnceAsync( + (result: SpeechRecognitionResult) => { + console.log("RESULT! ", result); + res.send(result); + recognizer && recognizer.close(); + recognizer = undefined; + }, + (error: string) => { + console.log("RESULT ERROR: ", error); + res.send(error); + recognizer && recognizer.close(); + recognizer = undefined; + }, + ); + }, + undefined, + `${RouteStore.audioData}/:asset` +); + class NodeCanvasFactory { create = (width: number, height: number) => { var canvas = createCanvas(width, height); -- cgit v1.2.3-70-g09d2 From e0851a9bcde60b71dbf64a256c611288a1da8025 Mon Sep 17 00:00:00 2001 From: Sam Wilkins Date: Tue, 30 Jul 2019 11:00:57 -0400 Subject: clean up --- src/client/cognitive_services/CognitiveServices.ts | 8 +++----- src/client/views/MainView.tsx | 1 - src/server/index.ts | 4 +--- 3 files changed, 4 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index 720892b61..b9c718cfa 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -5,8 +5,6 @@ import { Docs } from "../documents/Documents"; import { RouteStore } from "../../server/RouteStore"; import { Utils } from "../../Utils"; import { InkData } from "../../new_fields/InkField"; -import "microsoft-cognitiveservices-speech-sdk"; -import "fs"; import { UndoManager } from "../util/UndoManager"; type APIManager = { converter: BodyConverter, requester: RequestExecutor, analyzer: AnalysisApplier }; @@ -27,7 +25,7 @@ export type Rectangle = { top: number, left: number, width: number, height: numb export enum Service { ComputerVision = "vision", Face = "face", - Handwriting = "handwriting", + Handwriting = "handwriting" } export enum Confidence { @@ -221,7 +219,7 @@ export namespace CognitiveServices { export namespace Transcription { - export const analyzer = (doc: Doc, keys: string[]) => { + export const analyzer = (target: Doc, keys: string[]) => { let { webkitSpeechRecognition }: CORE.IWindow = window as CORE.IWindow; let recognizer = new webkitSpeechRecognition(); recognizer.interimResults = true; @@ -229,7 +227,7 @@ export namespace CognitiveServices { recognizer.onresult = (e: any) => { let result = e.results[0][0]; - doc[keys[0]] = result.transcript; + target[keys[0]] = result.transcript; }; recognizer.start(); diff --git a/src/client/views/MainView.tsx b/src/client/views/MainView.tsx index ccf8f571e..91c8fe57c 100644 --- a/src/client/views/MainView.tsx +++ b/src/client/views/MainView.tsx @@ -38,7 +38,6 @@ import { PreviewCursor } from './PreviewCursor'; import { FilterBox } from './search/FilterBox'; import { CollectionTreeView } from './collections/CollectionTreeView'; import { ClientUtils } from '../util/ClientUtils'; -import { CognitiveServices } from '../cognitive_services/CognitiveServices'; import { SchemaHeaderField, RandomPastel } from '../../new_fields/SchemaHeaderField'; @observer diff --git a/src/server/index.ts b/src/server/index.ts index f4bbd4423..adf218be6 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -40,8 +40,6 @@ import { Search } from './Search'; import { debug } from 'util'; import _ = require('lodash'); import { Response } from 'express-serve-static-core'; -import { AudioInputStream, AudioConfig, SpeechConfig, SpeechRecognizer, SpeechRecognitionResult } from 'microsoft-cognitiveservices-speech-sdk'; -import { Opt } from '../new_fields/Doc'; const MongoStore = require('connect-mongo')(session); const mongoose = require('mongoose'); const probe = require("probe-image-size"); @@ -299,7 +297,7 @@ addSecureRoute( const ServicesApiKeyMap = new Map([ ["face", process.env.FACE], ["vision", process.env.VISION], - ["handwriting", process.env.HANDWRITING], + ["handwriting", process.env.HANDWRITING] ]); addSecureRoute(Method.GET, (user, res, req) => { -- cgit v1.2.3-70-g09d2 From 1aac1e8820c62a5f06d7e7630394e0bd58b19a94 Mon Sep 17 00:00:00 2001 From: Sam Wilkins Date: Tue, 30 Jul 2019 12:56:25 -0400 Subject: refactored and implemented dictation manager and shift keyboard shortcut for voice commands --- package.json | 2 +- src/client/cognitive_services/CognitiveServices.ts | 24 ------------- src/client/util/DictationManager.ts | 39 ++++++++++++++++++++++ src/client/views/ContextMenu.tsx | 8 +++-- src/client/views/GlobalKeyHandler.ts | 29 ++++++++++++++-- src/client/views/nodes/DocumentView.tsx | 8 ++++- 6 files changed, 79 insertions(+), 31 deletions(-) create mode 100644 src/client/util/DictationManager.ts (limited to 'src') diff --git a/package.json b/package.json index 3f100a3ef..37052fde3 100644 --- a/package.json +++ b/package.json @@ -203,4 +203,4 @@ "xoauth2": "^1.2.0", "youtube": "^0.1.0" } -} \ No newline at end of file +} diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index b9c718cfa..c118d91d3 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -13,12 +13,6 @@ type AnalysisApplier = (target: Doc, relevantKeys: string[], ...args: any) => an type BodyConverter = (data: D) => string; type Converter = (results: any) => Field; -namespace CORE { - export interface IWindow extends Window { - webkitSpeechRecognition: any; - } -} - export type Tag = { name: string, confidence: number }; export type Rectangle = { top: number, left: number, width: number, height: number }; @@ -217,22 +211,4 @@ export namespace CognitiveServices { } - export namespace Transcription { - - export const analyzer = (target: Doc, keys: string[]) => { - let { webkitSpeechRecognition }: CORE.IWindow = window as CORE.IWindow; - let recognizer = new webkitSpeechRecognition(); - recognizer.interimResults = true; - recognizer.continuous = true; - - recognizer.onresult = (e: any) => { - let result = e.results[0][0]; - target[keys[0]] = result.transcript; - }; - - recognizer.start(); - }; - - } - } \ No newline at end of file diff --git a/src/client/util/DictationManager.ts b/src/client/util/DictationManager.ts new file mode 100644 index 000000000..b58bdb6c7 --- /dev/null +++ b/src/client/util/DictationManager.ts @@ -0,0 +1,39 @@ +namespace CORE { + export interface IWindow extends Window { + webkitSpeechRecognition: any; + } +} + +const { webkitSpeechRecognition }: CORE.IWindow = window as CORE.IWindow; + +export default class DictationManager { + public static Instance = new DictationManager(); + private isListening = false; + private recognizer: any; + + constructor() { + this.recognizer = new webkitSpeechRecognition(); + this.recognizer.interimResults = false; + this.recognizer.continuous = true; + } + + finish = (handler: any, data: any) => { + handler(data); + this.isListening = false; + this.recognizer.stop(); + } + + listen = () => { + if (this.isListening) { + return undefined; + } + this.isListening = true; + this.recognizer.start(); + return new Promise((resolve, reject) => { + this.recognizer.onresult = (e: any) => this.finish(resolve, e.results[0][0].transcript); + this.recognizer.onerror = (e: any) => this.finish(reject, e); + }); + + } + +} \ No newline at end of file diff --git a/src/client/views/ContextMenu.tsx b/src/client/views/ContextMenu.tsx index a608e448a..98025ac31 100644 --- a/src/client/views/ContextMenu.tsx +++ b/src/client/views/ContextMenu.tsx @@ -38,8 +38,12 @@ export class ContextMenu extends React.Component { this._items = []; } - findByDescription = (target: string) => { - return this._items.find(menuItem => menuItem.description === target); + findByDescription = (target: string, toLowerCase = false) => { + return this._items.find(menuItem => { + let reference = menuItem.description; + toLowerCase && (reference = reference.toLowerCase()); + reference === target; + }); } @action diff --git a/src/client/views/GlobalKeyHandler.ts b/src/client/views/GlobalKeyHandler.ts index e31b44514..373584b4e 100644 --- a/src/client/views/GlobalKeyHandler.ts +++ b/src/client/views/GlobalKeyHandler.ts @@ -5,9 +5,13 @@ import { MainView } from "./MainView"; import { DragManager } from "../util/DragManager"; import { action } from "mobx"; import { Doc } from "../../new_fields/Doc"; +import { CognitiveServices } from "../cognitive_services/CognitiveServices"; +import DictationManager from "../util/DictationManager"; +import { ContextMenu } from "./ContextMenu"; +import { ContextMenuProps } from "./ContextMenuItem"; const modifiers = ["control", "meta", "shift", "alt"]; -type KeyHandler = (keycode: string, e: KeyboardEvent) => KeyControlInfo; +type KeyHandler = (keycode: string, e: KeyboardEvent) => KeyControlInfo | Promise; type KeyControlInfo = { preventDefault: boolean, stopPropagation: boolean @@ -25,9 +29,10 @@ export default class KeyManager { this.router.set(isMac ? "0001" : "0100", this.ctrl); this.router.set(isMac ? "0100" : "0010", this.alt); this.router.set(isMac ? "1001" : "1100", this.ctrl_shift); + this.router.set("1000", this.shift); } - public handle = (e: KeyboardEvent) => { + public handle = async (e: KeyboardEvent) => { let keyname = e.key.toLowerCase(); this.handleGreedy(keyname); @@ -43,7 +48,7 @@ export default class KeyManager { return; } - let control = handleConstrained(keyname, e); + let control = await handleConstrained(keyname, e); control.stopPropagation && e.stopPropagation(); control.preventDefault && e.preventDefault(); @@ -95,6 +100,24 @@ export default class KeyManager { }; }); + private shift = async (keyname: string) => { + let stopPropagation = true; + let preventDefault = true; + + switch (keyname) { + case " ": + let transcript = await DictationManager.Instance.listen(); + console.log(`I heard${transcript ? `: ${transcript.toLowerCase()}` : " nothing: I thought I was still listening from an earlier session."}`); + let command: ContextMenuProps | undefined; + transcript && (command = ContextMenu.Instance.findByDescription(transcript, true)) && "event" in command && command.event(); + } + + return { + stopPropagation: stopPropagation, + preventDefault: preventDefault + }; + } + private alt = action((keyname: string) => { let stopPropagation = true; let preventDefault = true; diff --git a/src/client/views/nodes/DocumentView.tsx b/src/client/views/nodes/DocumentView.tsx index fd53cd451..dc56c1c8f 100644 --- a/src/client/views/nodes/DocumentView.tsx +++ b/src/client/views/nodes/DocumentView.tsx @@ -42,6 +42,7 @@ import { EditableView } from '../EditableView'; import { faHandPointer, faHandPointRight } from '@fortawesome/free-regular-svg-icons'; import { DocumentDecorations } from '../DocumentDecorations'; import { CognitiveServices } from '../../cognitive_services/CognitiveServices'; +import DictationManager from '../../util/DictationManager'; const JsxParser = require('react-jsx-parser').default; //TODO Why does this need to be imported like this? library.add(fa.faTrash); @@ -536,6 +537,11 @@ export class DocumentView extends DocComponent(Docu this.props.Document.lockedPosition = BoolCast(this.props.Document.lockedPosition) ? undefined : true; } + listen = async () => { + let transcript = await DictationManager.Instance.listen(); + transcript && (Doc.GetProto(this.props.Document).transcript = transcript); + } + @action onContextMenu = async (e: React.MouseEvent): Promise => { e.persist(); @@ -559,7 +565,7 @@ export class DocumentView extends DocComponent(Docu cm.addItem({ description: BoolCast(this.props.Document.ignoreAspect, false) || !this.props.Document.nativeWidth || !this.props.Document.nativeHeight ? "Freeze" : "Unfreeze", event: this.freezeNativeDimensions, icon: "snowflake" }); cm.addItem({ description: "Pin to Presentation", event: () => PresentationView.Instance.PinDoc(this.props.Document), icon: "map-pin" }); cm.addItem({ description: BoolCast(this.props.Document.lockedPosition) ? "Unlock Position" : "Lock Position", event: this.toggleLockPosition, icon: BoolCast(this.props.Document.lockedPosition) ? "unlock" : "lock" }); - cm.addItem({ description: "Transcribe Speech", event: () => CognitiveServices.Transcription.analyzer(Doc.GetProto(this.props.Document), ["transcript"]), icon: "microphone" }); + cm.addItem({ description: "Transcribe Speech", event: this.listen, icon: "microphone" }); let makes: ContextMenuProps[] = []; makes.push({ description: "Make Background", event: this.makeBackground, icon: BoolCast(this.props.Document.lockedPosition) ? "unlock" : "lock" }); makes.push({ description: this.props.Document.isButton ? "Remove Button" : "Make Button", event: this.makeBtnClicked, icon: "concierge-bell" }); -- cgit v1.2.3-70-g09d2