diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/client/documents/Documents.ts | 2 | ||||
-rw-r--r-- | src/client/util/CurrentUserUtils.ts | 56 | ||||
-rw-r--r-- | src/scraping/buxton/final/BuxtonImporter.ts | 604 | ||||
-rw-r--r-- | src/server/Message.ts | 4 | ||||
-rw-r--r-- | src/server/websocket.ts | 7 |
5 files changed, 2 insertions, 671 deletions
diff --git a/src/client/documents/Documents.ts b/src/client/documents/Documents.ts index e2f82fc62..9bcd23aa0 100644 --- a/src/client/documents/Documents.ts +++ b/src/client/documents/Documents.ts @@ -179,7 +179,7 @@ export class DocumentOptions { layout?: string | Doc; // default layout string for a document contentPointerEvents?: string; // pointer events allowed for content of a document view. eg. set to "none" in menuSidebar for sharedDocs so that you can select a document, but not interact with its contents childLimitHeight?: number; // whether to limit the height of collection children. 0 - means height can be no bigger than width - childLayoutTemplate?: Doc; // template for collection to use to render its children (see PresBox or Buxton layout in tree view) + childLayoutTemplate?: Doc; // template for collection to use to render its children (see PresBox layout in tree view) childLayoutString?: string; // template string for collection to use to render its children childDontRegisterViews?: boolean; childHideLinkButton?: boolean; // hide link buttons on all children diff --git a/src/client/util/CurrentUserUtils.ts b/src/client/util/CurrentUserUtils.ts index c7f293f2c..f02c5a5a1 100644 --- a/src/client/util/CurrentUserUtils.ts +++ b/src/client/util/CurrentUserUtils.ts @@ -1,4 +1,5 @@ import { computed, observable, reaction } from "mobx"; +import * as rp from 'request-promise'; import { DataSym, Doc, DocListCast, DocListCastAsync } from "../../fields/Doc"; import { Id } from "../../fields/FieldSymbols"; import { InkTool } from "../../fields/InkField"; @@ -6,7 +7,6 @@ import { List } from "../../fields/List"; import { PrefetchProxy } from "../../fields/Proxy"; import { RichTextField } from "../../fields/RichTextField"; import { listSpec } from "../../fields/Schema"; -import { SchemaHeaderField } from "../../fields/SchemaHeaderField"; import { ComputedField, ScriptField } from "../../fields/ScriptField"; import { BoolCast, Cast, DateCast, NumCast, PromiseValue, StrCast } from "../../fields/Types"; import { nullAudio } from "../../fields/URLField"; @@ -36,7 +36,6 @@ import { ColorScheme } from "./SettingsManager"; import { SharingManager } from "./SharingManager"; import { SnappingManager } from "./SnappingManager"; import { UndoManager } from "./UndoManager"; -import * as rp from 'request-promise'; interface Button { title?: string; @@ -191,59 +190,6 @@ export class CurrentUserUtils { // }); // } - if (doc["template-button-detail"] === undefined) { - const { TextDocument, MasonryDocument, CarouselDocument } = Docs.Create; - - const openInTarget = ScriptField.MakeScript("openOnRight(self.doubleClickView)"); - const carousel = CarouselDocument([], { - title: "data", _height: 350, _itemIndex: 0, "_carousel-caption-xMargin": 10, "_carousel-caption-yMargin": 10, - onChildDoubleClick: openInTarget, backgroundColor: "#9b9b9b3F", system: true - }); - - const details = TextDocument("", { title: "details", _height: 200, _autoHeight: true, system: true }); - const short = TextDocument("", { title: "shortDescription", treeViewOpen: true, treeViewExpandedView: "layout", _height: 75, _autoHeight: true, system: true }); - const long = TextDocument("", { title: "longDescription", treeViewOpen: false, treeViewExpandedView: "layout", _height: 150, _autoHeight: true, system: true }); - - const buxtonFieldKeys = ["year", "originalPrice", "degreesOfFreedom", "company", "attribute", "primaryKey", "secondaryKey", "dimensions"]; - const detailedTemplate = { - doc: { - type: "doc", content: buxtonFieldKeys.map(fieldKey => ({ - type: "paragraph", - content: [{ type: "dashField", attrs: { fieldKey } }] - })) - }, - selection: { type: "text", anchor: 1, head: 1 }, - storedMarks: [] - }; - details.text = new RichTextField(JSON.stringify(detailedTemplate), buxtonFieldKeys.join(" ")); - - const shared = { _autoHeight: true, _xMargin: 0 }; - const detailViewOpts = { title: "detailView", _width: 300, _fontFamily: "Arial", _fontSize: "12px" }; - const descriptionWrapperOpts = { title: "descriptions", _height: 300, _columnWidth: -1, treeViewHideTitle: true, _pivotField: "title", system: true }; - - const descriptionWrapper = MasonryDocument([details, short, long], { ...shared, ...descriptionWrapperOpts }); - descriptionWrapper._columnHeaders = new List<SchemaHeaderField>([ - new SchemaHeaderField("[A Short Description]", "dimgray", undefined, undefined, undefined, false), - new SchemaHeaderField("[Long Description]", "dimgray", undefined, undefined, undefined, true), - new SchemaHeaderField("[Details]", "dimgray", undefined, undefined, undefined, true), - ]); - const detailView = Docs.Create.StackingDocument([carousel, descriptionWrapper], { ...shared, ...detailViewOpts, _chromeHidden: true, system: true }); - detailView.isTemplateDoc = makeTemplate(detailView); - - details.title = "Details"; - short.title = "A Short Description"; - long.title = "Long Description"; - - doc["template-button-detail"] = CurrentUserUtils.createToolButton({ - onDragStart: ScriptField.MakeFunction('copyDragFactory(this.dragFactory)'), - dragFactory: new PrefetchProxy(detailView) as any as Doc, - title: "detailView", - icon: "window-maximize", - system: true, - btnType: ButtonType.ToolButton, - }); - } - const requiredTypes = [ doc["template-button-slides"] as Doc, doc["template-mobile-button"] as Doc, diff --git a/src/scraping/buxton/final/BuxtonImporter.ts b/src/scraping/buxton/final/BuxtonImporter.ts deleted file mode 100644 index ee8dd5b5b..000000000 --- a/src/scraping/buxton/final/BuxtonImporter.ts +++ /dev/null @@ -1,604 +0,0 @@ -import { readdirSync, writeFile, mkdirSync, createReadStream, createWriteStream, existsSync, statSync } from "fs"; -import * as path from "path"; -import { red, cyan, yellow } from "colors"; -import { Utils } from "../../../Utils"; -import rimraf = require("rimraf"); -import { DashUploadUtils } from "../../../server/DashUploadUtils"; -const StreamZip = require('node-stream-zip'); -const createImageSizeStream = require("image-size-stream"); -import { parseXml } from "libxmljs"; -import { strictEqual } from "assert"; -import { Readable, PassThrough } from "stream"; -import { Directory, serverPathToFile, pathToDirectory } from "../../../server/ApiManagers/UploadManager"; - -/** - * This is an arbitrary bundle of data that gets populated - * in extractFileContents - */ -interface DocumentContents { - body: string; - imageData: ImageData[]; - hyperlinks: string[]; - tableData: TableData[]; - longDescription: string; -} - -/** - * A rough schema for everything that Bill has - * included for each document - */ -export interface DeviceDocument { - title: string; - shortDescription: string; - longDescription: string; - company: string; - year: number; - originalPrice?: number; - degreesOfFreedom?: number; - dimensions?: string; - primaryKey: string; - secondaryKey: string; - attribute: string; - __images: ImageData[]; - additionalMedia: ({ [type: string]: string } | undefined)[]; - hyperlinks: string[]; - captions: string[]; // from the table column - embeddedFileNames: string[]; // from the table column -} - -/** - * A layer of abstraction around a single parsing - * attempt. The error is not a TypeScript error, but - * rather an invalidly formatted value for a given key. - */ -export interface AnalysisResult { - device?: DeviceDocument; - invalid?: { [deviceProperty: string]: string }; -} - -/** - * A mini API that takes in a string and returns - * either the given T or an error indicating that the - * transformation was rejected. - */ -type Transformer<T> = (raw: string) => TransformResult<T>; -interface TransformResult<T> { - transformed?: T; - error?: string; -} - -/** - * Simple bundle counting successful and failed imports - */ -export interface ImportResults { - deviceCount: number; - errorCount: number; -} - -/** - * Definitions for callback functions. Such instances are - * just invoked by when a single document has been parsed - * or the entire import is over. As of this writing, these - * callbacks are supplied by WebSocket.ts and used to inform - * the client of these events. - */ -type ResultCallback = (result: AnalysisResult) => void; -type TerminatorCallback = (result: ImportResults) => void; - -/** - * Defines everything needed to define how a single key should be - * formatted within the plain body text. The association between - * keys and their format definitions is stored FormatMap - */ -interface ValueFormatDefinition<T> { - exp: RegExp; // the expression that the key's value should match - matchIndex?: number; // defaults to 0, but can be overridden to account for grouping in @param exp - transformer?: Transformer<T>; // if desirable, how to transform the Regex match - required?: boolean; // defaults to true, confirms that for a whole document to be counted successful, - // all of its required values should be present and properly formatted -} - -/** - * The basic data we extract from each image in the document - */ -interface ImageData { - url: string; - nativeWidth: number; - nativeHeight: number; -} - -namespace Utilities { - - /** - * Numeric 'try parse', fits with the Transformer API - * @param raw the serialized number - */ - export function numberValue(raw: string): TransformResult<number> { - const transformed = Number(raw); - if (isNaN(transformed)) { - return { error: `${raw} cannot be parsed to a numeric value.` }; - } - return { transformed }; - } - - /** - * A simple tokenizer that splits along 'and' and commas, and removes duplicates - * Helpful mainly for attribute and primary key lists - * @param raw the string to tokenize - */ - export function collectUniqueTokens(raw: string): TransformResult<string[]> { - const pieces = raw.replace(/,|\s+and\s+/g, " ").split(/\s+/).filter(piece => piece.length); - const unique = new Set(pieces.map(token => token.toLowerCase().trim())); - return { transformed: Array.from(unique).map(capitalize).sort() }; - } - - /** - * Tries to correct XML text parsing artifact where some sentences lose their separating space, - * and others gain excess whitespace - * @param raw - */ - export function correctSentences(raw: string): TransformResult<string> { - raw = raw.replace(/\./g, ". ").replace(/\:/g, ": ").replace(/\,/g, ", ").replace(/\?/g, "? ").trimRight(); - raw = raw.replace(/\s{2,}/g, " "); - return { transformed: raw }; - } - - /** - * Simple capitalization - * @param word to capitalize - */ - export function capitalize(word: string): string { - const clean = word.trim(); - if (!clean.length) { - return word; - } - return word.charAt(0).toUpperCase() + word.slice(1); - } - - /** - * Streams the requeted file at the relative path to the - * root of the zip, then parses it with a library - * @param zip the zip instance data source - * @param relativePath the path to a .xml file within the zip to parse - */ - export async function readAndParseXml(zip: any, relativePath: string) { - console.log(`Text streaming ${relativePath}`); - const contents = await new Promise<string>((resolve, reject) => { - let body = ""; - zip.stream(relativePath, (error: any, stream: any) => { - if (error) { - reject(error); - } - stream.on('data', (chunk: any) => body += chunk.toString()); - stream.on('end', () => resolve(body)); - }); - }); - return parseXml(contents); - } -} - -/** - * Defines how device values should be formatted. As you can see, the formatting is - * not super consistent and has changed over time as edge cases have been found, but this - * at least imposes some constraints, and will notify you if a document doesn't match the specifications - * in this map. - */ -const FormatMap = new Map<keyof DeviceDocument, ValueFormatDefinition<any>>([ - ["title", { - exp: /contact\s+(.*)Short Description:/ - }], - ["company", { - exp: /Company:\s+([^\|]*)\s+\|/, - transformer: (raw: string) => ({ transformed: raw.replace(/\./g, "") }) - }], - ["year", { - exp: /Year:\s+([^\|]*)\s+\|/, - transformer: (raw: string) => Utilities.numberValue(/[0-9]{4}/.exec(raw)![0]) - }], - ["primaryKey", { - exp: /Primary:\s+(.*)(Secondary|Additional):/, - transformer: raw => { - const { transformed, error } = Utilities.collectUniqueTokens(raw); - return transformed ? { transformed: transformed[0] } : { error }; - } - }], - ["secondaryKey", { - exp: /(Secondary|Additional):\s+(.*)Attributes?:/, - transformer: raw => { - const { transformed, error } = Utilities.collectUniqueTokens(raw); - return transformed ? { transformed: transformed[0] } : { error }; - }, - matchIndex: 2 - }], - ["attribute", { - exp: /Attributes?:\s+(.*)Links/, - transformer: raw => { - const { transformed, error } = Utilities.collectUniqueTokens(raw); - return transformed ? { transformed: transformed[0] } : { error }; - }, - }], - ["originalPrice", { - exp: /Original Price \(USD\)\:\s+(\$[0-9\,]+\.[0-9]+|NFS)/, - transformer: (raw: string) => { - raw = raw.replace(/\,/g, ""); - if (raw === "NFS") { - return { transformed: -1 }; - } - return Utilities.numberValue(raw.slice(1)); - }, - required: false - }], - ["degreesOfFreedom", { - exp: /Degrees of Freedom:\s+([0-9]+)/, - transformer: Utilities.numberValue, - required: false - }], - ["dimensions", { - exp: /Dimensions\s+\(L x W x H\):\s+([0-9\.]+\s+x\s+[0-9\.]+\s+x\s+[0-9\.]+\s\([A-Za-z]+\))/, - transformer: (raw: string) => { - const [length, width, group] = raw.split(" x "); - const [height, unit] = group.split(" "); - return { - transformed: { - dim_length: Number(length), - dim_width: Number(width), - dim_height: Number(height), - dim_unit: unit.replace(/[\(\)]+/g, "") - } - }; - }, - required: false - }], - ["shortDescription", { - exp: /Short Description:\s+(.*)Bill Buxton[’']s Notes/, - transformer: Utilities.correctSentences - }], -]); - -const sourceDir = path.resolve(__dirname, "source"); // where the Word documents are assumed to be stored -const assetDir = path.resolve(__dirname, "assets"); // where any additional media content like pdfs will be stored. Each subdirectory of this -// must follow the enum Directory.<type> naming scheme -const outDir = path.resolve(__dirname, "json"); // where the JSON output of these device documents will be written -const imageDir = path.resolve(__dirname, "../../../server/public/files/images/buxton"); // where, in the server, these images will be written -const successOut = "buxton.json"; // the JSON list representing properly formatted documents -const failOut = "incomplete.json"; // the JSON list representing improperly formatted documents -const deviceKeys = Array.from(FormatMap.keys()); // a way to iterate through all keys of the DeviceDocument interface - -/** - * Starts by REMOVING ALL EXISTING BUXTON RESOURCES. This might need to be - * changed going forward - * @param emitter the callback when each document is completed - * @param terminator the callback when the entire import is completed - */ -export default async function executeImport(emitter: ResultCallback, terminator: TerminatorCallback) { - try { - // get all Word documents in the source directory - const contents = readdirSync(sourceDir); - const wordDocuments = contents.filter(file => /.*\.docx?$/.test(file)).map(file => `${sourceDir}/${file}`); - // removal takes place here - [outDir, imageDir].forEach(dir => { - rimraf.sync(dir); - mkdirSync(dir); - }); - await transferAssets(); - return parseFiles(wordDocuments, emitter, terminator); - } catch (e: any) { - const message = [ - "Unable to find a source directory.", - "Please ensure that the following directory exists:", - `${e.message}` - ].join('\n'); - console.log(red(message)); - return { error: message }; - } -} - -/** - * Builds a mirrored directory structure of all media / asset files - * within the server's public directory. - */ -async function transferAssets() { - for (const assetType of readdirSync(assetDir)) { - const subroot = path.resolve(assetDir, assetType); - if (!statSync(subroot).isDirectory()) { - continue; - } - const outputSubroot = serverPathToFile(assetType as Directory, "buxton"); - if (existsSync(outputSubroot)) { - continue; - } else { - mkdirSync(outputSubroot); - } - for (const fileName of readdirSync(subroot)) { - const readStream = createReadStream(path.resolve(subroot, fileName)); - const writeStream = createWriteStream(path.resolve(outputSubroot, fileName)); - await new Promise<void>(resolve => { - readStream.pipe(writeStream).on("close", resolve); - }); - } - } -} - -/** - * Parse every Word document in the directory, notifying any callers as needed - * at each iteration via the emitter. - * @param wordDocuments the string list of Word document names to parse - * @param emitter the callback when each document is completed - * @param terminator the callback when the entire import is completed - */ -async function parseFiles(wordDocuments: string[], emitter: ResultCallback, terminator: TerminatorCallback): Promise<DeviceDocument[]> { - // execute parent-most parse function - const results: AnalysisResult[] = []; - for (const filePath of wordDocuments) { - const fileName = path.basename(filePath).replace("Bill_Notes_", ""); // not strictly needed, but cleaner - console.log(cyan(`\nExtracting contents from ${fileName}...`)); - const result = analyze(fileName, await extractFileContents(filePath)); - emitter(result); - results.push(result); - } - - // collect information about errors and successes - const masterDevices: DeviceDocument[] = []; - const masterErrors: { [key: string]: string }[] = []; - results.forEach(({ device, invalid: errors }) => { - if (device) { - masterDevices.push(device); - } else if (errors) { - masterErrors.push(errors); - } - }); - - // something went wrong, since errors and successes should sum to total inputs - const total = wordDocuments.length; - if (masterDevices.length + masterErrors.length !== total) { - throw new Error(`Encountered a ${masterDevices.length} to ${masterErrors.length} mismatch in device / error split!`); - } - - // write the external JSON representations of this import - console.log(); - await writeOutputFile(successOut, masterDevices, total, true); - await writeOutputFile(failOut, masterErrors, total, false); - console.log(); - - // notify the caller that the import has finished - terminator({ deviceCount: masterDevices.length, errorCount: masterErrors.length }); - - return masterDevices; -} - -/** - * XPath definitions for desired XML targets in respective hierarchies. - * - * For table cells, can be read as: "find me anything that looks like <w:tc> in XML, whose - * parent looks like <w:tr>, whose parent looks like <w:tbl>" - * - * <w:tbl> - * <w:tr> - * <w:tc> - * - * These are found by trial and error, and using an online XML parser / prettifier - * to inspect the structure, since the Node XML library does not expose the parsed - * structure very well for searching, say in the debug console. - */ -const xPaths = { - paragraphs: '//*[name()="w:p"]', - tableCells: '//*[name()="w:tbl"]/*[name()="w:tr"]/*[name()="w:tc"]', - hyperlinks: '//*[name()="Relationship" and contains(@Type, "hyperlink")]' -}; - -interface TableData { - fileName: string; - caption: string; - additionalMedia?: { [type: string]: string }; -} - -const SuffixDirectoryMap = new Map<string, Directory>([ - ["p", Directory.pdfs] -]); - -/** - * The meat of the script, images and text content are extracted here - * @param pathToDocument the path to the document relative to the root of the zip - */ -async function extractFileContents(pathToDocument: string): Promise<DocumentContents> { - console.log('Extracting text...'); - const zip = new StreamZip({ file: pathToDocument, storeEntries: true }); - await new Promise<void>(resolve => zip.on('ready', resolve)); - - // extract the body of the document and, specifically, its captions - const document = await Utilities.readAndParseXml(zip, "word/document.xml"); - // get plain text - const body = document.root()?.text() ?? "No body found. Check the import script's XML parser."; - const captions: string[] = []; - const tableData: TableData[] = []; - // preserve paragraph formatting and line breaks that would otherwise get lost in the plain text parsing - // of the XML hierarchy - const paragraphs = document.find(xPaths.paragraphs).map(node => Utilities.correctSentences(node.text()).transformed!); - const start = paragraphs.indexOf(paragraphs.find(el => /Bill Buxton[’']s Notes/.test(el))!) + 1; - const end = paragraphs.indexOf("Device Details"); - const longDescription = paragraphs.slice(start, end).filter(paragraph => paragraph.length).join("\n\n"); - - // extract captions from the table cells - const tableRowsFlattened = document.find(xPaths.tableCells).map(node => node.text().trim()); - const { length } = tableRowsFlattened; - const numCols = 4; - strictEqual(length > numCols, true, "No captions written."); // first row has the headers, not content - strictEqual(length % numCols === 0, true, "Improper caption formatting."); - - // break the flat list of strings into groups of numColumns. Thus, each group represents - // a row in the table, where the first row has no text content since it's - // the image, the second has the file name and the third has the caption (maybe additional columns - // have been added or reordered since this was written, but follow the same appraoch) - for (let i = numCols; i < tableRowsFlattened.length; i += numCols) { - const row = tableRowsFlattened.slice(i, i + numCols); - const entry: TableData = { fileName: row[1], caption: row[2] }; - const key = SuffixDirectoryMap.get(row[3].toLowerCase()); - if (key) { - const media: any = {}; - media[key] = `${entry.fileName.split(".")[0]}.pdf`; - entry.additionalMedia = media; - } - tableData.push(entry); - } - - // extract all hyperlinks embedded in the document - const rels = await Utilities.readAndParseXml(zip, "word/_rels/document.xml.rels"); - const hyperlinks = rels.find(xPaths.hyperlinks).map(el => el.attrs()[2].value()); - console.log("Text extracted."); - - // write out the images for this document - console.log("Beginning image extraction..."); - const imageData = await writeImages(zip); - console.log(`Extracted ${imageData.length} images.`); - - // cleanup - zip.close(); - - return { body, longDescription, imageData, tableData, hyperlinks }; -} - -// zip relative path from root expression / filter used to isolate only media assets -const imageEntry = /^word\/media\/\w+\.(jpeg|jpg|png|gif)/; - -/** - * Image dimensions and file suffix, - */ -interface ImageAttrs { - width: number; - height: number; - type: string; -} - -/** - * For each image, stream the file, get its size, check if it's an icon - * (if it is, ignore it) - * @param zip the zip instance data source - */ -async function writeImages(zip: any): Promise<ImageData[]> { - const allEntries = Object.values<any>(zip.entries()).map(({ name }) => name); - const imageEntries = allEntries.filter(name => imageEntry.test(name)); - - const imageUrls: ImageData[] = []; - const valid: any[] = []; - - const getImageStream = (mediaPath: string) => new Promise<Readable>((resolve, reject) => { - zip.stream(mediaPath, (error: any, stream: any) => error ? reject(error) : resolve(stream)); - }); - - for (const mediaPath of imageEntries) { - const { width, height, type } = await new Promise<ImageAttrs>(async resolve => { - const sizeStream = (createImageSizeStream() as PassThrough).on('size', (dimensions: ImageAttrs) => { - readStream.destroy(); - resolve(dimensions); - }).on("error", () => readStream.destroy()); - const readStream = await getImageStream(mediaPath); - readStream.pipe(sizeStream); - }); - - // if it's not an icon, by this rough heuristic, i.e. is it not square - const number = Number(/image(\d+)/.exec(mediaPath)![1]); - if (number > 5 || width - height > 10) { - valid.push({ width, height, type, mediaPath, number }); - } - } - - valid.sort((a, b) => a.number - b.number); - - const [{ width: first_w, height: first_h }, { width: second_w, height: second_h }] = valid; - if (Math.abs(first_w / second_w - first_h / second_h) < 0.01) { - const first_size = first_w * first_h; - const second_size = second_w * second_h; - const target = first_size >= second_size ? 1 : 0; - valid.splice(target, 1); - console.log(`Heuristically removed image with size ${target ? second_size : first_size}`); - } - - // for each valid image, output the _o, _l, _m, and _s files - // THIS IS WHERE THE SCRIPT SPENDS MOST OF ITS TIME - for (const { type, width, height, mediaPath } of valid) { - const generatedFileName = `upload_${Utils.GenerateGuid()}.${type.toLowerCase()}`; - await DashUploadUtils.outputResizedImages(() => getImageStream(mediaPath), generatedFileName, imageDir); - imageUrls.push({ - url: `/files/images/buxton/${generatedFileName}`, - nativeWidth: width, - nativeHeight: height - }); - } - - return imageUrls; -} - -/** - * Takes the results of extractFileContents, which relative to this is sort of the - * external media / preliminary text processing, and now tests the given file name to - * with those value definitions to make sure the body of the document contains all - * required fields, properly formatted - * @param fileName the file whose body to inspect - * @param contents the data already computed / parsed by extractFileContents - */ -function analyze(fileName: string, contents: DocumentContents): AnalysisResult { - const { body, imageData, hyperlinks, tableData, longDescription } = contents; - const device: any = { - hyperlinks, - captions: tableData.map(({ caption }) => caption), - embeddedFileNames: tableData.map(({ fileName }) => fileName), - additionalMedia: tableData.map(({ additionalMedia }) => additionalMedia), - longDescription, - __images: imageData - }; - const errors: { [key: string]: string } = { fileName }; - - for (const key of deviceKeys) { - const { exp, transformer, matchIndex, required } = FormatMap.get(key)!; - const matches = exp.exec(body); - - let captured: string; - // if we matched and we got the specific match we're after - if (matches && (captured = matches[matchIndex ?? 1])) { // matchIndex defaults to 1 - captured = captured.replace(/\s{2,}/g, " "); // remove excess whitespace - // if supplied, apply the required transformation (recall this is specified in FormatMap) - if (transformer) { - const { error, transformed } = transformer(captured); - if (error) { - // we hit a snag trying to transform the valid match - // still counts as a fundamental error - errors[key] = `__ERR__${key.toUpperCase()}__TRANSFORM__: ${error}`; - continue; - } - captured = transformed; - } - device[key] = captured; - } else if (required ?? true) { - // the field was either implicitly or explicitly required, and failed to match the definition in - // FormatMap - errors[key] = `ERR__${key.toUpperCase()}__: outer match ${matches === null ? "wasn't" : "was"} captured.`; - continue; - } - } - - // print errors - this can be removed - const errorKeys = Object.keys(errors); - if (errorKeys.length > 1) { - console.log(red(`@ ${cyan(fileName.toUpperCase())}...`)); - errorKeys.forEach(key => key !== "filename" && console.log(red(errors[key]))); - return { invalid: errors }; - } - - return { device }; -} - -/** - * A utility function that writes the JSON results for this import out to the desired path - * @param relativePath where to write the JSON file - * @param data valid device document objects, or errors - * @param total used for more informative printing - * @param success whether or not the caller is writing the successful parses or the failures - */ -async function writeOutputFile(relativePath: string, data: any[], total: number, success: boolean) { - console.log(yellow(`Encountered ${data.length} ${success ? "valid" : "invalid"} documents out of ${total} candidates. Writing ${relativePath}...`)); - return new Promise<void>((resolve, reject) => { - const destination = path.resolve(outDir, relativePath); - const contents = JSON.stringify(data, undefined, 4); // format the JSON - writeFile(destination, contents, err => err ? reject(err) : resolve()); - }); -}
\ No newline at end of file diff --git a/src/server/Message.ts b/src/server/Message.ts index 59b24cd82..d87ae5027 100644 --- a/src/server/Message.ts +++ b/src/server/Message.ts @@ -1,5 +1,4 @@ import { Point } from "../pen-gestures/ndollar"; -import { AnalysisResult, ImportResults } from "../scraping/buxton/final/BuxtonImporter"; import { Utils } from "../Utils"; export class Message<T> { @@ -82,9 +81,6 @@ export namespace MessageStore { export const GetDocument = new Message<string>("Get Document"); export const DeleteAll = new Message<any>("Delete All"); export const ConnectionTerminated = new Message<string>("Connection Terminated"); - export const BeginBuxtonImport = new Message<string>("Begin Buxton Import"); - export const BuxtonDocumentResult = new Message<AnalysisResult>("Buxton Document Result"); - export const BuxtonImportComplete = new Message<ImportResults>("Buxton Import Complete"); export const GesturePoints = new Message<GestureContent>("Gesture Points"); export const MobileInkOverlayTrigger = new Message<MobileInkOverlayContent>("Trigger Mobile Ink Overlay"); diff --git a/src/server/websocket.ts b/src/server/websocket.ts index 0c79c1bbb..1b7f5919f 100644 --- a/src/server/websocket.ts +++ b/src/server/websocket.ts @@ -4,7 +4,6 @@ import { createServer, Server } from "https"; import { networkInterfaces } from "os"; import * as sio from 'socket.io'; import { Socket } from "socket.io"; -import executeImport from "../scraping/buxton/final/BuxtonImporter"; import { Utils } from "../Utils"; import { logPort } from './ActionUtilities'; import { timeMap } from "./ApiManagers/UserManager"; @@ -125,12 +124,6 @@ export namespace WebSocket { * as an emitter and a terminator the functions that simply broadcast a result * or indicate termination to the client via the web socket */ - Utils.AddServerHandler(socket, MessageStore.BeginBuxtonImport, () => { - executeImport( - deviceOrError => Utils.Emit(socket, MessageStore.BuxtonDocumentResult, deviceOrError), - results => Utils.Emit(socket, MessageStore.BuxtonImportComplete, results) - ); - }); disconnect = () => { socket.broadcast.emit("connection_terminated", Date.now()); |