diff options
author | anika-ahluwalia <anika.ahluwalia@gmail.com> | 2020-06-01 15:29:04 -0500 |
---|---|---|
committer | anika-ahluwalia <anika.ahluwalia@gmail.com> | 2020-06-01 15:29:04 -0500 |
commit | 3f333751cd96d12a9302b839067a6c8fcddfdae9 (patch) | |
tree | f1ec14b8692dcfadb6eb3467ca53fe60ee942e39 | |
parent | 823b8728b5506697b7d305a8c07979984b8351c8 (diff) | |
parent | 9b0ba3940ec9b718cbffb945298095bd2ddcedb9 (diff) |
Merge branch 'master' of https://github.com/browngraphicslab/Dash-Web into script_documents
-rw-r--r-- | package-lock.json | 88 | ||||
-rw-r--r-- | package.json | 3 | ||||
-rw-r--r-- | src/client/documents/Documents.ts | 25 | ||||
-rw-r--r-- | src/client/util/DocumentManager.ts | 14 | ||||
-rw-r--r-- | src/client/views/MainView.tsx | 10 | ||||
-rw-r--r-- | src/client/views/collections/CollectionTreeView.tsx | 7 | ||||
-rw-r--r-- | src/client/views/nodes/DocumentView.tsx | 8 | ||||
-rw-r--r-- | src/client/views/pdf/PDFViewer.tsx | 14 | ||||
-rw-r--r-- | src/scraping/buxton/final/BuxtonImporter.ts | 74 | ||||
-rw-r--r-- | src/scraping/buxton/final/assets/pdfs/3DCad_Brochure.pdf | bin | 0 -> 107790 bytes | |||
-rw-r--r-- | src/server/ApiManagers/PDFManager.ts | 43 |
11 files changed, 226 insertions, 60 deletions
diff --git a/package-lock.json b/package-lock.json index aa6caf9cf..af474ff88 100644 --- a/package-lock.json +++ b/package-lock.json @@ -412,6 +412,15 @@ "integrity": "sha512-t7uW6eFafjO+qJ3BIV2gGUyZs27egcNRkUdalkud+Qa3+kg//f129iuOFivHDXQ+vnU3fDXuwgv0cqMCbcE8sw==", "dev": true }, + "@types/chrome": { + "version": "0.0.114", + "resolved": "https://registry.npmjs.org/@types/chrome/-/chrome-0.0.114.tgz", + "integrity": "sha512-i7qRr74IrxHtbnrZSKUuP5Uvd5EOKwlwJq/yp7+yTPihOXnPhNQO4Z5bqb1XTnrjdbUKEJicaVVbhcgtRijmLA==", + "requires": { + "@types/filesystem": "*", + "@types/har-format": "*" + } + }, "@types/color": { "version": "3.0.1", "resolved": "https://registry.npmjs.org/@types/color/-/color-3.0.1.tgz", @@ -547,6 +556,19 @@ "express-validator": "*" } }, + "@types/filesystem": { + "version": "0.0.29", + "resolved": "https://registry.npmjs.org/@types/filesystem/-/filesystem-0.0.29.tgz", + "integrity": "sha512-85/1KfRedmfPGsbK8YzeaQUyV1FQAvMPMTuWFQ5EkLd2w7szhNO96bk3Rh/SKmOfd9co2rCLf0Voy4o7ECBOvw==", + "requires": { + "@types/filewriter": "*" + } + }, + "@types/filewriter": { + "version": "0.0.28", + "resolved": "https://registry.npmjs.org/@types/filewriter/-/filewriter-0.0.28.tgz", + "integrity": "sha1-wFTor02d11205jq8dviFFocU1LM=" + }, "@types/formidable": { "version": "1.0.31", "resolved": "https://registry.npmjs.org/@types/formidable/-/formidable-1.0.31.tgz", @@ -584,6 +606,11 @@ "integrity": "sha512-L8O9HAVFZj0TuiS8h5ORthiMsrrhjxTC8XUusp5k47oXCst4VTm+qWKvrAvmYMybZVokbp4Udco1mNwJrTNZPQ==", "dev": true }, + "@types/har-format": { + "version": "1.2.4", + "resolved": "https://registry.npmjs.org/@types/har-format/-/har-format-1.2.4.tgz", + "integrity": "sha512-iUxzm1meBm3stxUMzRqgOVHjj4Kgpgu5w9fm4X7kPRfSgVRzythsucEN7/jtOo8SQzm+HfcxWWzJS0mJDH/3DQ==" + }, "@types/jquery": { "version": "3.3.36", "resolved": "https://registry.npmjs.org/@types/jquery/-/jquery-3.3.36.tgz", @@ -3290,6 +3317,15 @@ "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz", "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==" }, + "chrome": { + "version": "0.1.0", + "resolved": "https://registry.npmjs.org/chrome/-/chrome-0.1.0.tgz", + "integrity": "sha1-9h2beS/v6MGUxwVt3BAscmqGQyk=", + "requires": { + "exeq": "^2.2.0", + "plist": "^1.1.0" + } + }, "chrome-trace-event": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/chrome-trace-event/-/chrome-trace-event-1.0.2.tgz", @@ -5332,6 +5368,15 @@ "resolved": "https://registry.npmjs.org/exenv/-/exenv-1.2.2.tgz", "integrity": "sha1-KueOhdmJQVhnCwPUe+wfA72Ru50=" }, + "exeq": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/exeq/-/exeq-2.4.0.tgz", + "integrity": "sha1-Td8qaEZIxCeteZNJzzO9dTWPiEo=", + "requires": { + "bluebird": "^3.0.3", + "native-or-bluebird": "^1.2.0" + } + }, "exif": { "version": "0.6.0", "resolved": "https://registry.npmjs.org/exif/-/exif-0.6.0.tgz", @@ -8737,6 +8782,11 @@ "resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-1.0.2.tgz", "integrity": "sha512-ONmRUqK7zj7DWX0D9ADe03wbwOBZxNAfF20PlGfCWQcD3+/MakShIHrMqx9YwPTfxDdF1zLeL+RGZiR9kGMLdg==" }, + "native-or-bluebird": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/native-or-bluebird/-/native-or-bluebird-1.2.0.tgz", + "integrity": "sha1-OcR7/Xgl0fuf+tMiEK4l2q3xAck=" + }, "needle": { "version": "2.4.1", "resolved": "https://registry.npmjs.org/needle/-/needle-2.4.1.tgz", @@ -13302,6 +13352,24 @@ "semver-compare": "^1.0.0" } }, + "plist": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/plist/-/plist-1.2.0.tgz", + "integrity": "sha1-CEtQk93JJQbiWfh0uNmxr7jHlZM=", + "requires": { + "base64-js": "0.0.8", + "util-deprecate": "1.0.2", + "xmlbuilder": "4.0.0", + "xmldom": "0.1.x" + }, + "dependencies": { + "base64-js": { + "version": "0.0.8", + "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-0.0.8.tgz", + "integrity": "sha1-EQHpVE9KdrG8OybUUsqW16NeeXg=" + } + } + }, "pn": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/pn/-/pn-1.1.0.tgz", @@ -17863,12 +17931,32 @@ "integrity": "sha512-A5CUptxDsvxKJEU3yO6DuWBSJz/qizqzJKOMIfUJHETbBw/sFaDxgd6fxm1ewUaM0jZ444Fc5vC5ROYurg/4Pw==", "dev": true }, + "xmlbuilder": { + "version": "4.0.0", + "resolved": "https://registry.npmjs.org/xmlbuilder/-/xmlbuilder-4.0.0.tgz", + "integrity": "sha1-mLj2UcowqmJANvEn0RzGbce5B6M=", + "requires": { + "lodash": "^3.5.0" + }, + "dependencies": { + "lodash": { + "version": "3.10.1", + "resolved": "https://registry.npmjs.org/lodash/-/lodash-3.10.1.tgz", + "integrity": "sha1-W/Rejkm6QYnhfUgnid/RW9FAt7Y=" + } + } + }, "xmlchars": { "version": "2.2.0", "resolved": "https://registry.npmjs.org/xmlchars/-/xmlchars-2.2.0.tgz", "integrity": "sha512-JZnDKK8B0RCDw84FNdDAIpZK+JuJw+s7Lz8nksI7SIuU3UXJJslUthsi+uWBUYOwPFwW7W7PRLRfUKpxjtjFCw==", "dev": true }, + "xmldom": { + "version": "0.1.31", + "resolved": "https://registry.npmjs.org/xmldom/-/xmldom-0.1.31.tgz", + "integrity": "sha512-yS2uJflVQs6n+CyjHoaBmVSqIDevTAWrzMmjG1Gc7h1qQ7uVozNhEPJAwZXWyGQ/Gafo3fCwrcaokezLPupVyQ==" + }, "xmlhttprequest-ssl": { "version": "1.5.5", "resolved": "https://registry.npmjs.org/xmlhttprequest-ssl/-/xmlhttprequest-ssl-1.5.5.tgz", diff --git a/package.json b/package.json index ecda98596..ff13cf2d5 100644 --- a/package.json +++ b/package.json @@ -130,6 +130,7 @@ "bootstrap": "^4.4.1", "canvas": "^2.5.0", "child_process": "^1.0.2", + "chrome": "^0.1.0", "class-transformer": "^0.2.0", "color": "^3.1.2", "colors": "^1.4.0", @@ -230,4 +231,4 @@ "xoauth2": "^1.2.0", "xregexp": "^4.3.0" } -} +}
\ No newline at end of file diff --git a/src/client/documents/Documents.ts b/src/client/documents/Documents.ts index 9b1912fb6..5b32bf76a 100644 --- a/src/client/documents/Documents.ts +++ b/src/client/documents/Documents.ts @@ -432,17 +432,28 @@ export namespace Docs { parentProto.data = new List<Doc>(); } if (device) { - const { __images } = device; + const { title, __images, additionalMedia } = device; delete device.__images; + delete device.additionalMedia; const { ImageDocument, StackingDocument } = Docs.Create; const constructed = __images.map(({ url, nativeWidth, nativeHeight }) => ({ url: Utils.prepend(url), nativeWidth, nativeHeight })); - const deviceImages = constructed.map(({ url, nativeWidth, nativeHeight }, i) => ImageDocument(url, { - title: `image${i}.${extname(url)}`, - _nativeWidth: nativeWidth, - _nativeHeight: nativeHeight - })); + const deviceImages = constructed.map(({ url, nativeWidth, nativeHeight }, i) => { + const imageDoc = ImageDocument(url, { + title: `image${i}.${extname(url)}`, + _nativeWidth: nativeWidth, + _nativeHeight: nativeHeight + }); + const media = additionalMedia[i]; + if (media) { + for (const key of Object.keys(media)) { + imageDoc[`additionalMedia_${key}`] = Utils.prepend(`/files/${key}/buxton/${media[key]}`); + } + } + return imageDoc; + }); // the main document we create - const doc = StackingDocument(deviceImages, { title: device.title, _LODdisable: true, hero: new ImageField(constructed[0].url) }); + const doc = StackingDocument(deviceImages, { title, _LODdisable: true, hero: new ImageField(constructed[0].url) }); + doc.nameAliases = new List<string>([title.toLowerCase()]); // add the parsed attributes to this main document Docs.Get.FromJson({ data: device, appendToExisting: { targetDoc: Doc.GetProto(doc) } }); Doc.AddDocToList(parentProto, "data", doc); diff --git a/src/client/util/DocumentManager.ts b/src/client/util/DocumentManager.ts index ab087335e..67f2f244c 100644 --- a/src/client/util/DocumentManager.ts +++ b/src/client/util/DocumentManager.ts @@ -126,13 +126,13 @@ export class DocumentManager { finished?.(); } public jumpToDocument = async ( - targetDoc: Doc, - willZoom: boolean, - createViewFunc = DocumentManager.addRightSplit, - docContext?: Doc, - linkId?: string, - closeContextIfNotFound: boolean = false, - originatingDoc: Opt<Doc> = undefined, + targetDoc: Doc, // document to display + willZoom: boolean, // whether to zoom doc to take up most of screen + createViewFunc = DocumentManager.addRightSplit, // how to create a view of the doc if it doesn't exist + docContext?: Doc, // context to load that should contain the target + linkId?: string, // link that's being followed + closeContextIfNotFound: boolean = false, // after opening a context where the document should be, this determines whether the context should be closed if the Doc isn't actually there + originatingDoc: Opt<Doc> = undefined, // doc that initiated the display of the target odoc finished?: () => void ): Promise<void> => { const getFirstDocView = DocumentManager.Instance.getFirstDocumentView; diff --git a/src/client/views/MainView.tsx b/src/client/views/MainView.tsx index a0a6adab7..a1d1b0ece 100644 --- a/src/client/views/MainView.tsx +++ b/src/client/views/MainView.tsx @@ -51,6 +51,8 @@ import { PreviewCursor } from './PreviewCursor'; import { ScriptField } from '../../fields/ScriptField'; import { TimelineMenu } from './animationtimeline/TimelineMenu'; import { SnappingManager } from '../util/SnappingManager'; +import { FormattedTextBox } from './nodes/formattedText/FormattedTextBox'; +import { DocumentManager } from '../util/DocumentManager'; @observer export class MainView extends React.Component { @@ -83,6 +85,14 @@ export class MainView extends React.Component { window.removeEventListener("keydown", KeyManager.Instance.handle); window.addEventListener("keydown", KeyManager.Instance.handle); window.addEventListener("paste", KeyManager.Instance.paste as any); + document.addEventListener("dash", (e: any) => { // event used by chrome plugin to tell Dash which document to focus on + const id = FormattedTextBox.GetDocFromUrl(e.detail); + DocServer.GetRefField(id).then(doc => { + if (doc instanceof Doc) { + DocumentManager.Instance.jumpToDocument(doc, false, undefined); + } + }); + }); } componentWillUnMount() { diff --git a/src/client/views/collections/CollectionTreeView.tsx b/src/client/views/collections/CollectionTreeView.tsx index 87fe79e19..b2e1c0f73 100644 --- a/src/client/views/collections/CollectionTreeView.tsx +++ b/src/client/views/collections/CollectionTreeView.tsx @@ -717,7 +717,7 @@ export class CollectionTreeView extends CollectionSubView<Document, Partial<coll } ContextMenu.Instance.addItem({ description: "Buxton Layout", icon: "eye", event: () => { - const { ImageDocument } = Docs.Create; + const { ImageDocument, PdfDocument } = Docs.Create; const { Document } = this.props; const fallbackImg = "http://www.cs.brown.edu/~bcz/face.gif"; const detailView = Cast(Cast(Doc.UserDoc()["template-button-detail"], Doc, null)?.dragFactory, Doc, null); @@ -726,13 +726,14 @@ export class CollectionTreeView extends CollectionSubView<Document, Partial<coll heroView._showTitle = "title"; heroView._showTitleHover = "titlehover"; - const doubleClickView = ImageDocument("http://cs.brown.edu/~bcz/face.gif", { _width: 400 }); // replace with desired double click target + const fallback = ImageDocument("http://cs.brown.edu/~bcz/face.gif", { _width: 400 }); // replace with desired double click target + let pdfContent: string; DocListCast(this.dataDoc[this.props.fieldKey]).map(d => { DocListCast(d.data).map((img, i) => { const caption = (d.captions as any)[i]; if (caption) { Doc.GetProto(img).caption = caption; - Doc.GetProto(img).doubleClickView = doubleClickView; + Doc.GetProto(img).doubleClickView = (pdfContent = StrCast(img.additionalMedia_pdfs)) ? PdfDocument(pdfContent, { title: pdfContent }) : fallback; } }); Doc.GetProto(d).type = "buxton"; diff --git a/src/client/views/nodes/DocumentView.tsx b/src/client/views/nodes/DocumentView.tsx index 7a4ecfa9c..e245e045c 100644 --- a/src/client/views/nodes/DocumentView.tsx +++ b/src/client/views/nodes/DocumentView.tsx @@ -742,16 +742,16 @@ export class DocumentView extends DocComponent<DocumentViewProps, Document>(Docu const more = cm.findByDescription("More..."); const moreItems: ContextMenuProps[] = more && "subitems" in more ? more.subitems : []; - moreItems.push({ description: "Make Add Only", event: () => this.layoutDoc.ACL = this.dataDoc.ACL = "addOnly", icon: "concierge-bell" }); - moreItems.push({ description: "Make Read Only", event: () => this.layoutDoc.ACL = this.dataDoc.ACL = "readOnly", icon: "concierge-bell" }); - moreItems.push({ description: "Make Private", event: () => this.layoutDoc[AclSym] = this.dataDoc[AclSym] = "noAccess", icon: "concierge-bell" }); + moreItems.push({ description: "Make Add Only", event: () => this.dataDoc.ACL = this.layoutDoc.ACL = "addOnly", icon: "concierge-bell" }); + moreItems.push({ description: "Make Read Only", event: () => this.dataDoc.ACL = this.layoutDoc.ACL = "readOnly", icon: "concierge-bell" }); + moreItems.push({ description: "Make Private", event: () => this.dataDoc.ACL = this.layoutDoc.ACL = "noAccess", icon: "concierge-bell" }); moreItems.push({ description: "Make View of Metadata Field", event: () => Doc.MakeMetadataFieldTemplate(this.props.Document, this.props.DataDoc), icon: "concierge-bell" }); moreItems.push({ description: `${this.Document._chromeStatus !== "disabled" ? "Hide" : "Show"} Chrome`, event: () => this.Document._chromeStatus = (this.Document._chromeStatus !== "disabled" ? "disabled" : "enabled"), icon: "project-diagram" }); moreItems.push({ description: this.Document.lockedPosition ? "Unlock Position" : "Lock Position", event: this.toggleLockPosition, icon: BoolCast(this.Document.lockedPosition) ? "unlock" : "lock" }); if (!ClientUtils.RELEASE) { // let copies: ContextMenuProps[] = []; - moreItems.push({ description: "Copy ID", event: () => Utils.CopyText(this.props.Document[Id]), icon: "fingerprint" }); + moreItems.push({ description: "Copy ID", event: () => Utils.CopyText(Utils.prepend("/doc/" + this.props.Document[Id])), icon: "fingerprint" }); // cm.addItem({ description: "Copy...", subitems: copies, icon: "copy" }); } if (Cast(Doc.GetProto(this.props.Document).data, listSpec(Doc))) { diff --git a/src/client/views/pdf/PDFViewer.tsx b/src/client/views/pdf/PDFViewer.tsx index 217032c1f..5bad248be 100644 --- a/src/client/views/pdf/PDFViewer.tsx +++ b/src/client/views/pdf/PDFViewer.tsx @@ -34,6 +34,7 @@ import React = require("react"); import { SnappingManager } from "../../util/SnappingManager"; const PDFJSViewer = require("pdfjs-dist/web/pdf_viewer"); const pdfjsLib = require("pdfjs-dist"); +import { Networking } from "../../Network"; export const pageSchema = createSchema({ curPage: "number", @@ -129,8 +130,17 @@ export class PDFViewer extends ViewBoxAnnotatableComponent<IViewerProps, PdfDocu // change the address to be the file address of the PNG version of each page // file address of the pdf const { url: { href } } = Cast(this.dataDoc[this.props.fieldKey], PdfField)!; - const addr = Utils.prepend(`/thumbnail${this.props.url.substring("files/pdfs/".length, this.props.url.length - ".pdf".length)}-${(this.Document.curPage || 1)}.png`); - this._coverPath = href.startsWith(window.location.origin) ? JSON.parse(await rp.get(addr)) : { width: 100, height: 100, path: "" }; + const { url: relative } = this.props; + const pathComponents = relative.split("/pdfs/")[1].split("/"); + const coreFilename = pathComponents.pop()!.split(".")[0]; + const params: any = { + coreFilename, + pageNum: this.Document.curPage || 1, + }; + if (pathComponents.length) { + params.subtree = `${pathComponents.join("/")}/`; + } + this._coverPath = href.startsWith(window.location.origin) ? await Networking.PostToServer("/thumbnail", params) : { width: 100, height: 100, path: "" }; runInAction(() => this._showWaiting = this._showCover = true); this.props.startupLive && this.setupPdfJsViewer(); this._searchReactionDisposer = reaction(() => this.Document.searchMatch, search => { diff --git a/src/scraping/buxton/final/BuxtonImporter.ts b/src/scraping/buxton/final/BuxtonImporter.ts index e55850b29..684c00c0d 100644 --- a/src/scraping/buxton/final/BuxtonImporter.ts +++ b/src/scraping/buxton/final/BuxtonImporter.ts @@ -1,4 +1,4 @@ -import { readdirSync, writeFile, mkdirSync } from "fs"; +import { readdirSync, writeFile, mkdirSync, createReadStream, createWriteStream, existsSync, statSync } from "fs"; import * as path from "path"; import { red, cyan, yellow } from "colors"; import { Utils } from "../../../Utils"; @@ -9,6 +9,7 @@ const createImageSizeStream = require("image-size-stream"); import { parseXml } from "libxmljs"; import { strictEqual } from "assert"; import { Readable, PassThrough } from "stream"; +import { Directory, serverPathToFile, pathToDirectory } from "../../../server/ApiManagers/UploadManager"; /** * This is an arbitrary bundle of data that gets populated @@ -18,8 +19,7 @@ interface DocumentContents { body: string; imageData: ImageData[]; hyperlinks: string[]; - captions: string[]; - embeddedFileNames: string[]; + tableData: TableData[]; longDescription: string; } @@ -40,6 +40,7 @@ export interface DeviceDocument { secondaryKey: string; attribute: string; __images: ImageData[]; + additionalMedia: ({ [type: string]: string } | undefined)[]; hyperlinks: string[]; captions: string[]; // from the table column embeddedFileNames: string[]; // from the table column @@ -255,6 +256,8 @@ const FormatMap = new Map<keyof DeviceDocument, ValueFormatDefinition<any>>([ ]); const sourceDir = path.resolve(__dirname, "source"); // where the Word documents are assumed to be stored +const assetDir = path.resolve(__dirname, "assets"); // where any additional media content like pdfs will be stored. Each subdirectory of this +// must follow the enum Directory.<type> naming scheme const outDir = path.resolve(__dirname, "json"); // where the JSON output of these device documents will be written const imageDir = path.resolve(__dirname, "../../../server/public/files/images/buxton"); // where, in the server, these images will be written const successOut = "buxton.json"; // the JSON list representing properly formatted documents @@ -277,12 +280,13 @@ export default async function executeImport(emitter: ResultCallback, terminator: rimraf.sync(dir); mkdirSync(dir); }); + await transferAssets(); return parseFiles(wordDocuments, emitter, terminator); } catch (e) { const message = [ "Unable to find a source directory.", - "Please ensure that the following directory exists and is populated with Word documents:", - `${sourceDir}` + "Please ensure that the following directory exists:", + `${e.message}` ].join('\n'); console.log(red(message)); return { error: message }; @@ -290,6 +294,32 @@ export default async function executeImport(emitter: ResultCallback, terminator: } /** + * Builds a mirrored directory structure of all media / asset files + * within the server's public directory. + */ +async function transferAssets() { + for (const assetType of readdirSync(assetDir)) { + const subroot = path.resolve(assetDir, assetType); + if (!statSync(subroot).isDirectory()) { + continue; + } + const outputSubroot = serverPathToFile(assetType as Directory, "buxton"); + if (existsSync(outputSubroot)) { + continue; + } else { + mkdirSync(outputSubroot); + } + for (const fileName of readdirSync(subroot)) { + const readStream = createReadStream(path.resolve(subroot, fileName)); + const writeStream = createWriteStream(path.resolve(outputSubroot, fileName)); + await new Promise<void>(resolve => { + readStream.pipe(writeStream).on("close", resolve); + }); + } + } +} + +/** * Parse every Word document in the directory, notifying any callers as needed * at each iteration via the emitter. * @param wordDocuments the string list of Word document names to parse @@ -356,6 +386,16 @@ const xPaths = { hyperlinks: '//*[name()="Relationship" and contains(@Type, "hyperlink")]' }; +interface TableData { + fileName: string; + caption: string; + additionalMedia?: { [type: string]: string }; +} + +const SuffixDirectoryMap = new Map<string, Directory>([ + ["p", Directory.pdfs] +]); + /** * The meat of the script, images and text content are extracted here * @param pathToDocument the path to the document relative to the root of the zip @@ -370,8 +410,7 @@ async function extractFileContents(pathToDocument: string): Promise<DocumentCont // get plain text const body = document.root()?.text() ?? "No body found. Check the import script's XML parser."; const captions: string[] = []; - const embeddedFileNames: string[] = []; - + const tableData: TableData[] = []; // preserve paragraph formatting and line breaks that would otherwise get lost in the plain text parsing // of the XML hierarchy const paragraphs = document.find(xPaths.paragraphs).map(node => Utilities.correctSentences(node.text()).transformed!); @@ -382,7 +421,7 @@ async function extractFileContents(pathToDocument: string): Promise<DocumentCont // extract captions from the table cells const tableRowsFlattened = document.find(xPaths.tableCells).map(node => node.text().trim()); const { length } = tableRowsFlattened; - const numCols = 3; + const numCols = 4; strictEqual(length > numCols, true, "No captions written."); // first row has the headers, not content strictEqual(length % numCols === 0, true, "Improper caption formatting."); @@ -392,8 +431,14 @@ async function extractFileContents(pathToDocument: string): Promise<DocumentCont // have been added or reordered since this was written, but follow the same appraoch) for (let i = numCols; i < tableRowsFlattened.length; i += numCols) { const row = tableRowsFlattened.slice(i, i + numCols); - embeddedFileNames.push(row[1]); - captions.push(row[2]); + const entry: TableData = { fileName: row[1], caption: row[2] }; + const key = SuffixDirectoryMap.get(row[3].toLowerCase()); + if (key) { + const media: any = {}; + media[key] = `${entry.fileName.split(".")[0]}.pdf`; + entry.additionalMedia = media; + } + tableData.push(entry); } // extract all hyperlinks embedded in the document @@ -409,7 +454,7 @@ async function extractFileContents(pathToDocument: string): Promise<DocumentCont // cleanup zip.close(); - return { body, longDescription, imageData, captions, embeddedFileNames, hyperlinks }; + return { body, longDescription, imageData, tableData, hyperlinks }; } // zip relative path from root expression / filter used to isolate only media assets @@ -492,11 +537,12 @@ async function writeImages(zip: any): Promise<ImageData[]> { * @param contents the data already computed / parsed by extractFileContents */ function analyze(fileName: string, contents: DocumentContents): AnalysisResult { - const { body, imageData, captions, hyperlinks, embeddedFileNames, longDescription } = contents; + const { body, imageData, hyperlinks, tableData, longDescription } = contents; const device: any = { hyperlinks, - captions, - embeddedFileNames, + captions: tableData.map(({ caption }) => caption), + embeddedFileNames: tableData.map(({ fileName }) => fileName), + additionalMedia: tableData.map(({ additionalMedia }) => additionalMedia), longDescription, __images: imageData }; diff --git a/src/scraping/buxton/final/assets/pdfs/3DCad_Brochure.pdf b/src/scraping/buxton/final/assets/pdfs/3DCad_Brochure.pdf Binary files differnew file mode 100644 index 000000000..4746d2f41 --- /dev/null +++ b/src/scraping/buxton/final/assets/pdfs/3DCad_Brochure.pdf diff --git a/src/server/ApiManagers/PDFManager.ts b/src/server/ApiManagers/PDFManager.ts index 0136b758e..d2a9e9cce 100644 --- a/src/server/ApiManagers/PDFManager.ts +++ b/src/server/ApiManagers/PDFManager.ts @@ -7,54 +7,54 @@ import { createCanvas } from "canvas"; const imageSize = require("probe-image-size"); import * as express from "express"; import * as path from "path"; -import { Directory, serverPathToFile, clientPathToFile } from "./UploadManager"; +import { Directory, serverPathToFile, clientPathToFile, pathToDirectory } from "./UploadManager"; import { red } from "colors"; +import { resolve } from "path"; export default class PDFManager extends ApiManager { protected initialize(register: Registration): void { register({ - method: Method.GET, - subscription: new RouteSubscriber("thumbnail").add("filename"), - secureHandler: ({ req, res }) => getOrCreateThumbnail(req.params.filename, res) + method: Method.POST, + subscription: new RouteSubscriber("thumbnail"), + secureHandler: async ({ req, res }) => { + const { coreFilename, pageNum, subtree } = req.body; + return getOrCreateThumbnail(coreFilename, pageNum, res, subtree); + } }); } } -async function getOrCreateThumbnail(thumbnailName: string, res: express.Response): Promise<void> { - const noExtension = thumbnailName.substring(0, thumbnailName.length - ".png".length); - const pageString = noExtension.split('-')[1]; - const pageNumber = parseInt(pageString); +async function getOrCreateThumbnail(coreFilename: string, pageNum: number, res: express.Response, subtree?: string): Promise<void> { + const resolved = `${coreFilename}-${pageNum}.png`; return new Promise<void>(async resolve => { - const path = serverPathToFile(Directory.pdf_thumbnails, thumbnailName); + const path = serverPathToFile(Directory.pdf_thumbnails, resolved); if (existsSync(path)) { const existingThumbnail = createReadStream(path); const { err, viewport } = await new Promise<any>(resolve => { imageSize(existingThumbnail, (err: any, viewport: any) => resolve({ err, viewport })); }); if (err) { - console.log(red(`In PDF thumbnail response, unable to determine dimensions of ${thumbnailName}:`)); + console.log(red(`In PDF thumbnail response, unable to determine dimensions of ${resolved}:`)); console.log(err); return; } - dispatchThumbnail(res, viewport, thumbnailName); + dispatchThumbnail(res, viewport, resolved); } else { - const offset = thumbnailName.length - pageString.length - 5; - const name = thumbnailName.substring(0, offset) + ".pdf"; - const path = serverPathToFile(Directory.pdfs, name); - await CreateThumbnail(path, pageNumber, res); + await CreateThumbnail(coreFilename, pageNum, res, subtree); } resolve(); }); } -async function CreateThumbnail(file: string, pageNumber: number, res: express.Response) { - const documentProxy = await Pdfjs.getDocument(file).promise; +async function CreateThumbnail(coreFilename: string, pageNum: number, res: express.Response, subtree?: string) { + const sourcePath = resolve(pathToDirectory(Directory.pdfs), `${subtree ?? ""}${coreFilename}.pdf`); + const documentProxy = await Pdfjs.getDocument(sourcePath).promise; const factory = new NodeCanvasFactory(); - const page = await documentProxy.getPage(pageNumber); + const page = await documentProxy.getPage(pageNum); const viewport = page.getViewport(1 as any); const { canvas, context } = factory.create(viewport.width, viewport.height); const renderContext = { @@ -64,14 +64,13 @@ async function CreateThumbnail(file: string, pageNumber: number, res: express.Re }; await page.render(renderContext).promise; const pngStream = canvas.createPNGStream(); - const filenames = path.basename(file).split("."); - const thumbnailName = `${filenames[0]}-${pageNumber}.png`; - const pngFile = serverPathToFile(Directory.pdf_thumbnails, thumbnailName); + const resolved = `${coreFilename}-${pageNum}.png`; + const pngFile = serverPathToFile(Directory.pdf_thumbnails, resolved); const out = createWriteStream(pngFile); pngStream.pipe(out); return new Promise<void>((resolve, reject) => { out.on("finish", () => { - dispatchThumbnail(res, viewport, thumbnailName); + dispatchThumbnail(res, viewport, resolved); resolve(); }); out.on("error", error => { |