From 2daa348eb760dfd322a271bf3f9f69ebb713a91c Mon Sep 17 00:00:00 2001 From: Sam Wilkins <35748010+samwilkins333@users.noreply.github.com> Date: Wed, 12 Feb 2020 04:36:31 -0500 Subject: switched importer to websocket for continual updates --- src/client/documents/Documents.ts | 52 +++++++++++++++-------------- src/scraping/buxton/final/BuxtonImporter.ts | 52 ++++++++++++++++++----------- src/server/ApiManagers/UtilManager.ts | 9 ----- src/server/Message.ts | 5 ++- src/server/Websocket/Websocket.ts | 7 ++++ 5 files changed, 70 insertions(+), 55 deletions(-) (limited to 'src') diff --git a/src/client/documents/Documents.ts b/src/client/documents/Documents.ts index b00a1a91d..cc18dc0a6 100644 --- a/src/client/documents/Documents.ts +++ b/src/client/documents/Documents.ts @@ -55,6 +55,7 @@ import { InkingControl } from "../views/InkingControl"; import { RichTextField } from "../../new_fields/RichTextField"; import { Networking } from "../Network"; import { extname } from "path"; +import { MessageStore } from "../../server/Message"; const requestImageSize = require('../util/request-image-size'); const path = require('path'); @@ -346,6 +347,7 @@ export namespace Docs { export namespace Create { export function Buxton() { + let responded = false; const loading = new Doc; loading.title = "Please wait for the import script..."; const parent = TreeDocument([loading], { @@ -354,36 +356,36 @@ export namespace Docs { _height: 400, _LODdisable: true }); - Networking.FetchFromServer("/buxton").then(response => { - const devices = JSON.parse(response); - if (!Array.isArray(devices)) { - if ("error" in devices) { - loading.title = devices.error; - } else { - console.log(devices); - alert("The importer returned an unexpected import format. Check the console."); - } - return; + const parentProto = Doc.GetProto(parent); + const { _socket } = DocServer; + Utils.AddServerHandler(_socket, MessageStore.BuxtonDocumentResult, ({ device, errors }) => { + if (!responded) { + responded = true; + parentProto.data = new List(); } - const parentProto = Doc.GetProto(parent); - parentProto.data = new List(); - devices.forEach(device => { + if (device) { const { __images } = device; delete device.__images; const { ImageDocument, StackingDocument } = Docs.Create; - if (Array.isArray(__images)) { - const constructed = __images.map(relative => Utils.prepend(relative)); - const hero = constructed[0]; - constructed.splice(0, 1); - const deviceImages = constructed.map((url, i) => ImageDocument(url, { title: `image${i}.${extname(url)}` })); - const doc = StackingDocument(deviceImages, { title: device.title, _LODdisable: true }); - const deviceProto = Doc.GetProto(doc); - deviceProto.hero = new ImageField(hero); - Docs.Get.DocumentHierarchyFromJson(device, undefined, deviceProto); - Doc.AddDocToList(parentProto, "data", doc); - } - }); + const constructed = __images.map(relative => Utils.prepend(relative)); + const deviceImages = constructed.map((url, i) => ImageDocument(url, { title: `image${i}.${extname(url)}` })); + const doc = StackingDocument(deviceImages, { title: device.title, _LODdisable: true }); + const deviceProto = Doc.GetProto(doc); + deviceProto.hero = new ImageField(constructed[0]); + Docs.Get.DocumentHierarchyFromJson(device, undefined, deviceProto); + Doc.AddDocToList(parentProto, "data", doc); + } else if (errors) { + console.log(errors); + } else { + alert("A Buxton document import was completely empty (??)"); + } + }); + Utils.AddServerHandler(_socket, MessageStore.BuxtonImportComplete, ({ deviceCount, errorCount }) => { + _socket.off(MessageStore.BuxtonDocumentResult.Message); + _socket.off(MessageStore.BuxtonImportComplete.Message); + alert(`Successfully imported ${deviceCount} device${deviceCount === 1 ? "" : "s"}, with ${errorCount} error${errorCount === 1 ? "" : "s"}.`); }); + Utils.Emit(_socket, MessageStore.BeginBuxtonImport, ""); return parent; } diff --git a/src/scraping/buxton/final/BuxtonImporter.ts b/src/scraping/buxton/final/BuxtonImporter.ts index 3d7421e90..d9d48d68c 100644 --- a/src/scraping/buxton/final/BuxtonImporter.ts +++ b/src/scraping/buxton/final/BuxtonImporter.ts @@ -8,7 +8,6 @@ const StreamZip = require('node-stream-zip'); const createImageSizeStream = require("image-size-stream"); import { parseXml } from "libxmljs"; import { strictEqual } from "assert"; -import { BatchedArray, TimeUnit } from "array-batcher"; interface DocumentContents { body: string; @@ -24,21 +23,33 @@ export interface DeviceDocument { longDescription: string; company: string; year: number; - originalPrice: number; - degreesOfFreedom: number; + originalPrice?: number; + degreesOfFreedom?: number; dimensions?: string; primaryKey: string; secondaryKey: string; attribute: string; + __images: string[]; + hyperlinks: string[]; + captions: string[]; + embeddedFileNames: string[]; } -interface AnalysisResult { +export interface AnalysisResult { device?: DeviceDocument; - errors?: any; + errors?: { [key: string]: string }; } type Transformer = (raw: string) => { transformed?: T, error?: string }; +export interface ImportResults { + deviceCount: number, + errorCount: number +} + +type ResultCallback = (result: AnalysisResult) => void; +type TerminatorCallback = (result: ImportResults) => void; + interface Processor { exp: RegExp; matchIndex?: number; @@ -168,7 +179,7 @@ const successOut = "buxton.json"; const failOut = "incomplete.json"; const deviceKeys = Array.from(RegexMap.keys()); -export default async function executeImport() { +export default async function executeImport(emitter: ResultCallback, terminator: TerminatorCallback) { try { const contents = readdirSync(sourceDir); const wordDocuments = contents.filter(file => /.*\.docx?$/.test(file)).map(file => `${sourceDir}/${file}`); @@ -176,7 +187,7 @@ export default async function executeImport() { rimraf.sync(dir); mkdirSync(dir); }); - return parseFiles(wordDocuments); + return parseFiles(wordDocuments, emitter, terminator); } catch (e) { const message = [ "Unable to find a source directory.", @@ -188,23 +199,22 @@ export default async function executeImport() { } } -async function parseFiles(wordDocuments: string[]): Promise { - const imported = await BatchedArray.from(wordDocuments, { batchSize: 8 }).batchedMapPatientInterval<{ fileName: string, contents: DocumentContents }>({ magnitude: 10, unit: TimeUnit.Seconds }, async (batch, collector) => { - for (const filePath of batch) { - const fileName = path.basename(filePath).replace("Bill_Notes_", ""); - console.log(cyan(`\nExtracting contents from ${fileName}...`)); - collector.push({ fileName, contents: await extractFileContents(filePath) }); - } - }); - console.log(yellow("\nAnalyzing the extracted document text...\n")); - const results = imported.map(({ fileName, contents }) => analyze(fileName, contents)); +async function parseFiles(wordDocuments: string[], emitter: ResultCallback, terminator: TerminatorCallback): Promise { + const results: AnalysisResult[] = []; + for (const filePath of wordDocuments) { + const fileName = path.basename(filePath).replace("Bill_Notes_", ""); + console.log(cyan(`\nExtracting contents from ${fileName}...`)); + const result = analyze(fileName, await extractFileContents(filePath)); + emitter(result); + results.push(result); + } const masterDevices: DeviceDocument[] = []; - const masterErrors: any[] = []; + const masterErrors: { [key: string]: string }[] = []; results.forEach(({ device, errors }) => { if (device) { masterDevices.push(device); - } else { + } else if (errors) { masterErrors.push(errors); } }); @@ -219,6 +229,8 @@ async function parseFiles(wordDocuments: string[]): Promise { await writeOutputFile(failOut, masterErrors, total, false); console.log(); + terminator({ deviceCount: masterDevices.length, errorCount: masterErrors.length }); + return masterDevices; } @@ -311,7 +323,7 @@ function analyze(fileName: string, contents: DocumentContents): AnalysisResult { embeddedFileNames, __images: imageUrls }; - const errors: any = { fileName }; + const errors: { [key: string]: string } = { fileName }; for (const key of deviceKeys) { const { exp, transformer, matchIndex, required } = RegexMap.get(key)!; diff --git a/src/server/ApiManagers/UtilManager.ts b/src/server/ApiManagers/UtilManager.ts index 5aac8261e..8adc3da81 100644 --- a/src/server/ApiManagers/UtilManager.ts +++ b/src/server/ApiManagers/UtilManager.ts @@ -39,15 +39,6 @@ export default class UtilManager extends ApiManager { } }); - register({ - method: Method.GET, - subscription: "/buxton", - secureHandler: async ({ req, res }) => { - req.setTimeout(300000); - res.send(await executeImport()); - } - }); - register({ method: Method.GET, subscription: "/version", diff --git a/src/server/Message.ts b/src/server/Message.ts index 79b6fa1e0..2a03e2311 100644 --- a/src/server/Message.ts +++ b/src/server/Message.ts @@ -1,4 +1,5 @@ import { Utils } from "../Utils"; +import { AnalysisResult, ImportResults } from "../scraping/buxton/final/BuxtonImporter"; export class Message { private _name: string; @@ -56,6 +57,9 @@ export namespace MessageStore { export const GetDocument = new Message("Get Document"); export const DeleteAll = new Message("Delete All"); export const ConnectionTerminated = new Message("Connection Terminated"); + export const BeginBuxtonImport = new Message("Begin Buxton Import"); + export const BuxtonDocumentResult = new Message("Buxton Document Result"); + export const BuxtonImportComplete = new Message("Buxton Import Complete"); export const GetRefField = new Message("Get Ref Field"); export const GetRefFields = new Message("Get Ref Fields"); @@ -65,5 +69,4 @@ export namespace MessageStore { export const DeleteField = new Message("Delete field"); export const DeleteFields = new Message("Delete fields"); - } diff --git a/src/server/Websocket/Websocket.ts b/src/server/Websocket/Websocket.ts index ba7ca8f35..724221be1 100644 --- a/src/server/Websocket/Websocket.ts +++ b/src/server/Websocket/Websocket.ts @@ -12,6 +12,7 @@ import { timeMap } from "../ApiManagers/UserManager"; import { green } from "colors"; import { networkInterfaces, type } from "os"; import { object } from "serializr"; +import executeImport from "../../scraping/buxton/final/BuxtonImporter"; export namespace WebSocket { @@ -106,6 +107,12 @@ export namespace WebSocket { Utils.AddServerHandler(socket, MessageStore.DeleteFields, ids => DeleteFields(socket, ids)); Utils.AddServerHandlerCallback(socket, MessageStore.GetRefField, GetRefField); Utils.AddServerHandlerCallback(socket, MessageStore.GetRefFields, GetRefFields); + Utils.AddServerHandler(socket, MessageStore.BeginBuxtonImport, () => { + executeImport( + deviceOrError => Utils.Emit(socket, MessageStore.BuxtonDocumentResult, deviceOrError), + results => Utils.Emit(socket, MessageStore.BuxtonImportComplete, results) + ); + }); disconnect = () => { socket.broadcast.emit("connection_terminated", Date.now()); -- cgit v1.2.3-70-g09d2