aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--deploy/assets/Sunflower.mp3bin0 -> 7682122 bytes
-rw-r--r--package.json1
-rw-r--r--src/client/cognitive_services/CognitiveServices.ts27
-rw-r--r--src/client/views/MainView.tsx3
-rw-r--r--src/server/RouteStore.ts1
-rw-r--r--src/server/index.ts59
6 files changed, 89 insertions, 2 deletions
diff --git a/deploy/assets/Sunflower.mp3 b/deploy/assets/Sunflower.mp3
new file mode 100644
index 000000000..ab04baac4
--- /dev/null
+++ b/deploy/assets/Sunflower.mp3
Binary files differ
diff --git a/package.json b/package.json
index 4a15cbb2f..12f0cd302 100644
--- a/package.json
+++ b/package.json
@@ -139,6 +139,7 @@
"jsonwebtoken": "^8.5.0",
"jsx-to-string": "^1.4.0",
"lodash": "^4.17.11",
+ "microsoft-cognitiveservices-speech-sdk": "^1.6.0",
"mobile-detect": "^1.4.3",
"mobx": "^5.9.0",
"mobx-react": "^5.3.5",
diff --git a/src/client/cognitive_services/CognitiveServices.ts b/src/client/cognitive_services/CognitiveServices.ts
index d69378d0e..40bbe55a1 100644
--- a/src/client/cognitive_services/CognitiveServices.ts
+++ b/src/client/cognitive_services/CognitiveServices.ts
@@ -9,6 +9,10 @@ import { Utils } from "../../Utils";
import { CompileScript } from "../util/Scripting";
import { ComputedField } from "../../new_fields/ScriptField";
import { InkData } from "../../new_fields/InkField";
+import "microsoft-cognitiveservices-speech-sdk";
+import "fs";
+import { AudioInputStream } from "microsoft-cognitiveservices-speech-sdk";
+import { createReadStream, ReadStream } from "fs";
type APIManager<D> = { converter: BodyConverter<D>, requester: RequestExecutor, analyzer: AnalysisApplier };
type RequestExecutor = (apiKey: string, body: string, service: Service) => Promise<string>;
@@ -22,7 +26,8 @@ export type Rectangle = { top: number, left: number, width: number, height: numb
export enum Service {
ComputerVision = "vision",
Face = "face",
- Handwriting = "handwriting"
+ Handwriting = "handwriting",
+ Transcription = "transcription"
}
export enum Confidence {
@@ -232,4 +237,24 @@ export namespace CognitiveServices {
}
+ export namespace Transcription {
+
+ export const Manager: APIManager<string> = {
+
+ converter: (data: string) => data,
+
+ requester: async (apiKey: string, body: string, service: Service) => {
+ let analysis = await fetch(`${RouteStore.audioData}/${body}`).then(async response => JSON.parse(await response.json()));
+ console.log(analysis);
+ return "";
+ },
+
+ analyzer: async (doc: Doc, keys: string[], filename: string) => {
+ let results = await executeQuery<string, any>(Service.Transcription, Manager, filename);
+ }
+
+ };
+
+ }
+
} \ No newline at end of file
diff --git a/src/client/views/MainView.tsx b/src/client/views/MainView.tsx
index 61a013963..ca75ab2c4 100644
--- a/src/client/views/MainView.tsx
+++ b/src/client/views/MainView.tsx
@@ -39,6 +39,7 @@ import { PreviewCursor } from './PreviewCursor';
import { FilterBox } from './search/FilterBox';
import { CollectionTreeView } from './collections/CollectionTreeView';
import { ClientUtils } from '../util/ClientUtils';
+import { CognitiveServices } from '../cognitive_services/CognitiveServices';
@observer
export class MainView extends React.Component {
@@ -67,6 +68,8 @@ export class MainView extends React.Component {
componentWillMount() {
var tag = document.createElement('script');
+ CognitiveServices.Transcription.Manager.analyzer(new Doc, ["hello", "world"], "Sunflower.mp3");
+
tag.src = "https://www.youtube.com/iframe_api";
var firstScriptTag = document.getElementsByTagName('script')[0];
firstScriptTag.parentNode!.insertBefore(tag, firstScriptTag);
diff --git a/src/server/RouteStore.ts b/src/server/RouteStore.ts
index e30015e39..53f176c81 100644
--- a/src/server/RouteStore.ts
+++ b/src/server/RouteStore.ts
@@ -13,6 +13,7 @@ export enum RouteStore {
upload = "/upload",
dataUriToImage = "/uploadURI",
images = "/images",
+ audioData = "/audioData",
// USER AND WORKSPACES
getCurrUser = "/getCurrentUser",
diff --git a/src/server/index.ts b/src/server/index.ts
index 40c0e7981..0a02b667e 100644
--- a/src/server/index.ts
+++ b/src/server/index.ts
@@ -40,6 +40,8 @@ import { Search } from './Search';
import { debug } from 'util';
import _ = require('lodash');
import { Response } from 'express-serve-static-core';
+import { AudioInputStream, AudioConfig, SpeechConfig, SpeechRecognizer, SpeechRecognitionResult } from 'microsoft-cognitiveservices-speech-sdk';
+import { Opt } from '../new_fields/Doc';
const MongoStore = require('connect-mongo')(session);
const mongoose = require('mongoose');
const probe = require("probe-image-size");
@@ -297,7 +299,8 @@ addSecureRoute(
const ServicesApiKeyMap = new Map<string, string | undefined>([
["face", process.env.FACE],
["vision", process.env.VISION],
- ["handwriting", process.env.HANDWRITING]
+ ["handwriting", process.env.HANDWRITING],
+ ["transcription", process.env.TRANSCRIPTION]
]);
addSecureRoute(Method.GET, (user, res, req) => {
@@ -305,6 +308,60 @@ addSecureRoute(Method.GET, (user, res, req) => {
res.send(ServicesApiKeyMap.get(service));
}, undefined, `${RouteStore.cognitiveServices}/:requestedservice`);
+addSecureRoute(
+ Method.GET,
+ (user, res, req) => {
+ let asset = req.params.asset;
+ let pushStream = AudioInputStream.createPushStream();
+ let readStream = fs.createReadStream(path.join(__dirname, '../../deploy/assets/' + asset));
+
+ let apiKey = process.env.TRANSCRIPTION;
+ if (!apiKey) {
+ res.send(undefined);
+ return;
+ }
+
+ console.log("API KEY FOUND: ", apiKey);
+
+ readStream.on('data', arrayBuffer => {
+ pushStream.write(arrayBuffer.buffer);
+ console.log(arrayBuffer.buffer);
+ });
+ readStream.on('end', () => pushStream.close());
+ readStream.on('error', (error) => {
+ console.log("ERROR! ", error);
+ res.end(error);
+ });
+
+ let audioConfig = AudioConfig.fromStreamInput(pushStream);
+ let speechConfig = SpeechConfig.fromSubscription(apiKey, "eastus");
+
+ console.log("Here are the configs!");
+ console.log(audioConfig);
+ console.log(speechConfig);
+
+ speechConfig.speechRecognitionLanguage = "en-US";
+
+ let recognizer: Opt<SpeechRecognizer> = new SpeechRecognizer(speechConfig, audioConfig);
+ recognizer.recognizeOnceAsync(
+ (result: SpeechRecognitionResult) => {
+ console.log("RESULT! ", result);
+ res.send(result);
+ recognizer && recognizer.close();
+ recognizer = undefined;
+ },
+ (error: string) => {
+ console.log("RESULT ERROR: ", error);
+ res.send(error);
+ recognizer && recognizer.close();
+ recognizer = undefined;
+ },
+ );
+ },
+ undefined,
+ `${RouteStore.audioData}/:asset`
+);
+
class NodeCanvasFactory {
create = (width: number, height: number) => {
var canvas = createCanvas(width, height);