From 1681c03a0509c2e8989fe268e7997dbe912770ec Mon Sep 17 00:00:00 2001 From: Sam Wilkins Date: Sat, 30 May 2020 15:42:07 -0700 Subject: buxton pdf linking implementation --- src/client/documents/Documents.ts | 25 +++++-- .../views/collections/CollectionTreeView.tsx | 7 +- src/scraping/buxton/final/BuxtonImporter.ts | 74 +++++++++++++++++---- .../buxton/final/assets/pdfs/3DCad_Brochure.pdf | Bin 0 -> 107790 bytes 4 files changed, 82 insertions(+), 24 deletions(-) create mode 100644 src/scraping/buxton/final/assets/pdfs/3DCad_Brochure.pdf (limited to 'src') diff --git a/src/client/documents/Documents.ts b/src/client/documents/Documents.ts index 7f5b62f22..f7e19eecd 100644 --- a/src/client/documents/Documents.ts +++ b/src/client/documents/Documents.ts @@ -432,17 +432,28 @@ export namespace Docs { parentProto.data = new List(); } if (device) { - const { __images } = device; + const { title, __images, additionalMedia } = device; delete device.__images; + delete device.additionalMedia; const { ImageDocument, StackingDocument } = Docs.Create; const constructed = __images.map(({ url, nativeWidth, nativeHeight }) => ({ url: Utils.prepend(url), nativeWidth, nativeHeight })); - const deviceImages = constructed.map(({ url, nativeWidth, nativeHeight }, i) => ImageDocument(url, { - title: `image${i}.${extname(url)}`, - _nativeWidth: nativeWidth, - _nativeHeight: nativeHeight - })); + const deviceImages = constructed.map(({ url, nativeWidth, nativeHeight }, i) => { + const imageDoc = ImageDocument(url, { + title: `image${i}.${extname(url)}`, + _nativeWidth: nativeWidth, + _nativeHeight: nativeHeight + }); + const media = additionalMedia[i]; + if (media) { + for (const key of Object.keys(media)) { + imageDoc[`additionalMedia_${key}`] = Utils.prepend(`/files/${key}/buxton/${media[key]}`); + } + } + return imageDoc; + }); // the main document we create - const doc = StackingDocument(deviceImages, { title: device.title, _LODdisable: true, hero: new ImageField(constructed[0].url) }); + const doc = StackingDocument(deviceImages, { title, _LODdisable: true, hero: new ImageField(constructed[0].url) }); + doc.nameAliases = new List([title.toLowerCase()]); // add the parsed attributes to this main document Docs.Get.FromJson({ data: device, appendToExisting: { targetDoc: Doc.GetProto(doc) } }); Doc.AddDocToList(parentProto, "data", doc); diff --git a/src/client/views/collections/CollectionTreeView.tsx b/src/client/views/collections/CollectionTreeView.tsx index 87fe79e19..4d67b1e2c 100644 --- a/src/client/views/collections/CollectionTreeView.tsx +++ b/src/client/views/collections/CollectionTreeView.tsx @@ -717,7 +717,7 @@ export class CollectionTreeView extends CollectionSubView { - const { ImageDocument } = Docs.Create; + const { ImageDocument, PdfDocument } = Docs.Create; const { Document } = this.props; const fallbackImg = "http://www.cs.brown.edu/~bcz/face.gif"; const detailView = Cast(Cast(Doc.UserDoc()["template-button-detail"], Doc, null)?.dragFactory, Doc, null); @@ -726,13 +726,14 @@ export class CollectionTreeView extends CollectionSubView { DocListCast(d.data).map((img, i) => { const caption = (d.captions as any)[i]; if (caption) { Doc.GetProto(img).caption = caption; - Doc.GetProto(img).doubleClickView = doubleClickView; + Doc.GetProto(img).doubleClickView = (pdfContent = StrCast(img.additionalMedia_pdfs)) ? PdfDocument(pdfContent) : fallback; } }); Doc.GetProto(d).type = "buxton"; diff --git a/src/scraping/buxton/final/BuxtonImporter.ts b/src/scraping/buxton/final/BuxtonImporter.ts index e55850b29..684c00c0d 100644 --- a/src/scraping/buxton/final/BuxtonImporter.ts +++ b/src/scraping/buxton/final/BuxtonImporter.ts @@ -1,4 +1,4 @@ -import { readdirSync, writeFile, mkdirSync } from "fs"; +import { readdirSync, writeFile, mkdirSync, createReadStream, createWriteStream, existsSync, statSync } from "fs"; import * as path from "path"; import { red, cyan, yellow } from "colors"; import { Utils } from "../../../Utils"; @@ -9,6 +9,7 @@ const createImageSizeStream = require("image-size-stream"); import { parseXml } from "libxmljs"; import { strictEqual } from "assert"; import { Readable, PassThrough } from "stream"; +import { Directory, serverPathToFile, pathToDirectory } from "../../../server/ApiManagers/UploadManager"; /** * This is an arbitrary bundle of data that gets populated @@ -18,8 +19,7 @@ interface DocumentContents { body: string; imageData: ImageData[]; hyperlinks: string[]; - captions: string[]; - embeddedFileNames: string[]; + tableData: TableData[]; longDescription: string; } @@ -40,6 +40,7 @@ export interface DeviceDocument { secondaryKey: string; attribute: string; __images: ImageData[]; + additionalMedia: ({ [type: string]: string } | undefined)[]; hyperlinks: string[]; captions: string[]; // from the table column embeddedFileNames: string[]; // from the table column @@ -255,6 +256,8 @@ const FormatMap = new Map>([ ]); const sourceDir = path.resolve(__dirname, "source"); // where the Word documents are assumed to be stored +const assetDir = path.resolve(__dirname, "assets"); // where any additional media content like pdfs will be stored. Each subdirectory of this +// must follow the enum Directory. naming scheme const outDir = path.resolve(__dirname, "json"); // where the JSON output of these device documents will be written const imageDir = path.resolve(__dirname, "../../../server/public/files/images/buxton"); // where, in the server, these images will be written const successOut = "buxton.json"; // the JSON list representing properly formatted documents @@ -277,18 +280,45 @@ export default async function executeImport(emitter: ResultCallback, terminator: rimraf.sync(dir); mkdirSync(dir); }); + await transferAssets(); return parseFiles(wordDocuments, emitter, terminator); } catch (e) { const message = [ "Unable to find a source directory.", - "Please ensure that the following directory exists and is populated with Word documents:", - `${sourceDir}` + "Please ensure that the following directory exists:", + `${e.message}` ].join('\n'); console.log(red(message)); return { error: message }; } } +/** + * Builds a mirrored directory structure of all media / asset files + * within the server's public directory. + */ +async function transferAssets() { + for (const assetType of readdirSync(assetDir)) { + const subroot = path.resolve(assetDir, assetType); + if (!statSync(subroot).isDirectory()) { + continue; + } + const outputSubroot = serverPathToFile(assetType as Directory, "buxton"); + if (existsSync(outputSubroot)) { + continue; + } else { + mkdirSync(outputSubroot); + } + for (const fileName of readdirSync(subroot)) { + const readStream = createReadStream(path.resolve(subroot, fileName)); + const writeStream = createWriteStream(path.resolve(outputSubroot, fileName)); + await new Promise(resolve => { + readStream.pipe(writeStream).on("close", resolve); + }); + } + } +} + /** * Parse every Word document in the directory, notifying any callers as needed * at each iteration via the emitter. @@ -356,6 +386,16 @@ const xPaths = { hyperlinks: '//*[name()="Relationship" and contains(@Type, "hyperlink")]' }; +interface TableData { + fileName: string; + caption: string; + additionalMedia?: { [type: string]: string }; +} + +const SuffixDirectoryMap = new Map([ + ["p", Directory.pdfs] +]); + /** * The meat of the script, images and text content are extracted here * @param pathToDocument the path to the document relative to the root of the zip @@ -370,8 +410,7 @@ async function extractFileContents(pathToDocument: string): Promise Utilities.correctSentences(node.text()).transformed!); @@ -382,7 +421,7 @@ async function extractFileContents(pathToDocument: string): Promise node.text().trim()); const { length } = tableRowsFlattened; - const numCols = 3; + const numCols = 4; strictEqual(length > numCols, true, "No captions written."); // first row has the headers, not content strictEqual(length % numCols === 0, true, "Improper caption formatting."); @@ -392,8 +431,14 @@ async function extractFileContents(pathToDocument: string): Promise { * @param contents the data already computed / parsed by extractFileContents */ function analyze(fileName: string, contents: DocumentContents): AnalysisResult { - const { body, imageData, captions, hyperlinks, embeddedFileNames, longDescription } = contents; + const { body, imageData, hyperlinks, tableData, longDescription } = contents; const device: any = { hyperlinks, - captions, - embeddedFileNames, + captions: tableData.map(({ caption }) => caption), + embeddedFileNames: tableData.map(({ fileName }) => fileName), + additionalMedia: tableData.map(({ additionalMedia }) => additionalMedia), longDescription, __images: imageData }; diff --git a/src/scraping/buxton/final/assets/pdfs/3DCad_Brochure.pdf b/src/scraping/buxton/final/assets/pdfs/3DCad_Brochure.pdf new file mode 100644 index 000000000..4746d2f41 Binary files /dev/null and b/src/scraping/buxton/final/assets/pdfs/3DCad_Brochure.pdf differ -- cgit v1.2.3-70-g09d2 From 85721c9ed95b4c026d0a1c7891e1fee311e9f50e Mon Sep 17 00:00:00 2001 From: Sam Wilkins Date: Sat, 30 May 2020 17:11:23 -0700 Subject: buxton pdf fix --- .../views/collections/CollectionTreeView.tsx | 2 +- src/client/views/pdf/PDFViewer.tsx | 14 ++++++- src/server/ApiManagers/PDFManager.ts | 43 +++++++++++----------- 3 files changed, 34 insertions(+), 25 deletions(-) (limited to 'src') diff --git a/src/client/views/collections/CollectionTreeView.tsx b/src/client/views/collections/CollectionTreeView.tsx index 4d67b1e2c..b2e1c0f73 100644 --- a/src/client/views/collections/CollectionTreeView.tsx +++ b/src/client/views/collections/CollectionTreeView.tsx @@ -733,7 +733,7 @@ export class CollectionTreeView extends CollectionSubView this._showWaiting = this._showCover = true); this.props.startupLive && this.setupPdfJsViewer(); this._searchReactionDisposer = reaction(() => this.Document.searchMatch, search => { diff --git a/src/server/ApiManagers/PDFManager.ts b/src/server/ApiManagers/PDFManager.ts index 0136b758e..d2a9e9cce 100644 --- a/src/server/ApiManagers/PDFManager.ts +++ b/src/server/ApiManagers/PDFManager.ts @@ -7,54 +7,54 @@ import { createCanvas } from "canvas"; const imageSize = require("probe-image-size"); import * as express from "express"; import * as path from "path"; -import { Directory, serverPathToFile, clientPathToFile } from "./UploadManager"; +import { Directory, serverPathToFile, clientPathToFile, pathToDirectory } from "./UploadManager"; import { red } from "colors"; +import { resolve } from "path"; export default class PDFManager extends ApiManager { protected initialize(register: Registration): void { register({ - method: Method.GET, - subscription: new RouteSubscriber("thumbnail").add("filename"), - secureHandler: ({ req, res }) => getOrCreateThumbnail(req.params.filename, res) + method: Method.POST, + subscription: new RouteSubscriber("thumbnail"), + secureHandler: async ({ req, res }) => { + const { coreFilename, pageNum, subtree } = req.body; + return getOrCreateThumbnail(coreFilename, pageNum, res, subtree); + } }); } } -async function getOrCreateThumbnail(thumbnailName: string, res: express.Response): Promise { - const noExtension = thumbnailName.substring(0, thumbnailName.length - ".png".length); - const pageString = noExtension.split('-')[1]; - const pageNumber = parseInt(pageString); +async function getOrCreateThumbnail(coreFilename: string, pageNum: number, res: express.Response, subtree?: string): Promise { + const resolved = `${coreFilename}-${pageNum}.png`; return new Promise(async resolve => { - const path = serverPathToFile(Directory.pdf_thumbnails, thumbnailName); + const path = serverPathToFile(Directory.pdf_thumbnails, resolved); if (existsSync(path)) { const existingThumbnail = createReadStream(path); const { err, viewport } = await new Promise(resolve => { imageSize(existingThumbnail, (err: any, viewport: any) => resolve({ err, viewport })); }); if (err) { - console.log(red(`In PDF thumbnail response, unable to determine dimensions of ${thumbnailName}:`)); + console.log(red(`In PDF thumbnail response, unable to determine dimensions of ${resolved}:`)); console.log(err); return; } - dispatchThumbnail(res, viewport, thumbnailName); + dispatchThumbnail(res, viewport, resolved); } else { - const offset = thumbnailName.length - pageString.length - 5; - const name = thumbnailName.substring(0, offset) + ".pdf"; - const path = serverPathToFile(Directory.pdfs, name); - await CreateThumbnail(path, pageNumber, res); + await CreateThumbnail(coreFilename, pageNum, res, subtree); } resolve(); }); } -async function CreateThumbnail(file: string, pageNumber: number, res: express.Response) { - const documentProxy = await Pdfjs.getDocument(file).promise; +async function CreateThumbnail(coreFilename: string, pageNum: number, res: express.Response, subtree?: string) { + const sourcePath = resolve(pathToDirectory(Directory.pdfs), `${subtree ?? ""}${coreFilename}.pdf`); + const documentProxy = await Pdfjs.getDocument(sourcePath).promise; const factory = new NodeCanvasFactory(); - const page = await documentProxy.getPage(pageNumber); + const page = await documentProxy.getPage(pageNum); const viewport = page.getViewport(1 as any); const { canvas, context } = factory.create(viewport.width, viewport.height); const renderContext = { @@ -64,14 +64,13 @@ async function CreateThumbnail(file: string, pageNumber: number, res: express.Re }; await page.render(renderContext).promise; const pngStream = canvas.createPNGStream(); - const filenames = path.basename(file).split("."); - const thumbnailName = `${filenames[0]}-${pageNumber}.png`; - const pngFile = serverPathToFile(Directory.pdf_thumbnails, thumbnailName); + const resolved = `${coreFilename}-${pageNum}.png`; + const pngFile = serverPathToFile(Directory.pdf_thumbnails, resolved); const out = createWriteStream(pngFile); pngStream.pipe(out); return new Promise((resolve, reject) => { out.on("finish", () => { - dispatchThumbnail(res, viewport, thumbnailName); + dispatchThumbnail(res, viewport, resolved); resolve(); }); out.on("error", error => { -- cgit v1.2.3-70-g09d2