diff options
-rw-r--r-- | deploy/assets/Sunflower.mp3 | bin | 0 -> 7682122 bytes | |||
-rw-r--r-- | package.json | 1 | ||||
-rw-r--r-- | src/client/cognitive_services/CognitiveServices.ts | 27 | ||||
-rw-r--r-- | src/client/views/MainView.tsx | 3 | ||||
-rw-r--r-- | src/server/RouteStore.ts | 1 | ||||
-rw-r--r-- | src/server/index.ts | 59 |
6 files changed, 89 insertions, 2 deletions
diff --git a/deploy/assets/Sunflower.mp3 b/deploy/assets/Sunflower.mp3 Binary files differnew file mode 100644 index 000000000..ab04baac4 --- /dev/null +++ b/deploy/assets/Sunflower.mp3 diff --git a/package.json b/package.json index 4a15cbb2f..12f0cd302 100644 --- a/package.json +++ b/package.json @@ -139,6 +139,7 @@ "jsonwebtoken": "^8.5.0", "jsx-to-string": "^1.4.0", "lodash": "^4.17.11", + "microsoft-cognitiveservices-speech-sdk": "^1.6.0", "mobile-detect": "^1.4.3", "mobx": "^5.9.0", "mobx-react": "^5.3.5", diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts index d69378d0e..40bbe55a1 100644 --- a/src/client/cognitive_services/CognitiveServices.ts +++ b/src/client/cognitive_services/CognitiveServices.ts @@ -9,6 +9,10 @@ import { Utils } from "../../Utils"; import { CompileScript } from "../util/Scripting"; import { ComputedField } from "../../new_fields/ScriptField"; import { InkData } from "../../new_fields/InkField"; +import "microsoft-cognitiveservices-speech-sdk"; +import "fs"; +import { AudioInputStream } from "microsoft-cognitiveservices-speech-sdk"; +import { createReadStream, ReadStream } from "fs"; type APIManager<D> = { converter: BodyConverter<D>, requester: RequestExecutor, analyzer: AnalysisApplier }; type RequestExecutor = (apiKey: string, body: string, service: Service) => Promise<string>; @@ -22,7 +26,8 @@ export type Rectangle = { top: number, left: number, width: number, height: numb export enum Service { ComputerVision = "vision", Face = "face", - Handwriting = "handwriting" + Handwriting = "handwriting", + Transcription = "transcription" } export enum Confidence { @@ -232,4 +237,24 @@ export namespace CognitiveServices { } + export namespace Transcription { + + export const Manager: APIManager<string> = { + + converter: (data: string) => data, + + requester: async (apiKey: string, body: string, service: Service) => { + let analysis = await fetch(`${RouteStore.audioData}/${body}`).then(async response => JSON.parse(await response.json())); + console.log(analysis); + return ""; + }, + + analyzer: async (doc: Doc, keys: string[], filename: string) => { + let results = await executeQuery<string, any>(Service.Transcription, Manager, filename); + } + + }; + + } + }
\ No newline at end of file diff --git a/src/client/views/MainView.tsx b/src/client/views/MainView.tsx index 61a013963..ca75ab2c4 100644 --- a/src/client/views/MainView.tsx +++ b/src/client/views/MainView.tsx @@ -39,6 +39,7 @@ import { PreviewCursor } from './PreviewCursor'; import { FilterBox } from './search/FilterBox'; import { CollectionTreeView } from './collections/CollectionTreeView'; import { ClientUtils } from '../util/ClientUtils'; +import { CognitiveServices } from '../cognitive_services/CognitiveServices'; @observer export class MainView extends React.Component { @@ -67,6 +68,8 @@ export class MainView extends React.Component { componentWillMount() { var tag = document.createElement('script'); + CognitiveServices.Transcription.Manager.analyzer(new Doc, ["hello", "world"], "Sunflower.mp3"); + tag.src = "https://www.youtube.com/iframe_api"; var firstScriptTag = document.getElementsByTagName('script')[0]; firstScriptTag.parentNode!.insertBefore(tag, firstScriptTag); diff --git a/src/server/RouteStore.ts b/src/server/RouteStore.ts index e30015e39..53f176c81 100644 --- a/src/server/RouteStore.ts +++ b/src/server/RouteStore.ts @@ -13,6 +13,7 @@ export enum RouteStore { upload = "/upload", dataUriToImage = "/uploadURI", images = "/images", + audioData = "/audioData", // USER AND WORKSPACES getCurrUser = "/getCurrentUser", diff --git a/src/server/index.ts b/src/server/index.ts index 40c0e7981..0a02b667e 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -40,6 +40,8 @@ import { Search } from './Search'; import { debug } from 'util'; import _ = require('lodash'); import { Response } from 'express-serve-static-core'; +import { AudioInputStream, AudioConfig, SpeechConfig, SpeechRecognizer, SpeechRecognitionResult } from 'microsoft-cognitiveservices-speech-sdk'; +import { Opt } from '../new_fields/Doc'; const MongoStore = require('connect-mongo')(session); const mongoose = require('mongoose'); const probe = require("probe-image-size"); @@ -297,7 +299,8 @@ addSecureRoute( const ServicesApiKeyMap = new Map<string, string | undefined>([ ["face", process.env.FACE], ["vision", process.env.VISION], - ["handwriting", process.env.HANDWRITING] + ["handwriting", process.env.HANDWRITING], + ["transcription", process.env.TRANSCRIPTION] ]); addSecureRoute(Method.GET, (user, res, req) => { @@ -305,6 +308,60 @@ addSecureRoute(Method.GET, (user, res, req) => { res.send(ServicesApiKeyMap.get(service)); }, undefined, `${RouteStore.cognitiveServices}/:requestedservice`); +addSecureRoute( + Method.GET, + (user, res, req) => { + let asset = req.params.asset; + let pushStream = AudioInputStream.createPushStream(); + let readStream = fs.createReadStream(path.join(__dirname, '../../deploy/assets/' + asset)); + + let apiKey = process.env.TRANSCRIPTION; + if (!apiKey) { + res.send(undefined); + return; + } + + console.log("API KEY FOUND: ", apiKey); + + readStream.on('data', arrayBuffer => { + pushStream.write(arrayBuffer.buffer); + console.log(arrayBuffer.buffer); + }); + readStream.on('end', () => pushStream.close()); + readStream.on('error', (error) => { + console.log("ERROR! ", error); + res.end(error); + }); + + let audioConfig = AudioConfig.fromStreamInput(pushStream); + let speechConfig = SpeechConfig.fromSubscription(apiKey, "eastus"); + + console.log("Here are the configs!"); + console.log(audioConfig); + console.log(speechConfig); + + speechConfig.speechRecognitionLanguage = "en-US"; + + let recognizer: Opt<SpeechRecognizer> = new SpeechRecognizer(speechConfig, audioConfig); + recognizer.recognizeOnceAsync( + (result: SpeechRecognitionResult) => { + console.log("RESULT! ", result); + res.send(result); + recognizer && recognizer.close(); + recognizer = undefined; + }, + (error: string) => { + console.log("RESULT ERROR: ", error); + res.send(error); + recognizer && recognizer.close(); + recognizer = undefined; + }, + ); + }, + undefined, + `${RouteStore.audioData}/:asset` +); + class NodeCanvasFactory { create = (width: number, height: number) => { var canvas = createCanvas(width, height); |