diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/client/documents/Documents.ts | 25 | ||||
-rw-r--r-- | src/client/views/collections/CollectionTreeView.tsx | 7 | ||||
-rw-r--r-- | src/scraping/buxton/final/BuxtonImporter.ts | 74 | ||||
-rw-r--r-- | src/scraping/buxton/final/assets/pdfs/3DCad_Brochure.pdf | bin | 0 -> 107790 bytes |
4 files changed, 82 insertions, 24 deletions
diff --git a/src/client/documents/Documents.ts b/src/client/documents/Documents.ts index 7f5b62f22..f7e19eecd 100644 --- a/src/client/documents/Documents.ts +++ b/src/client/documents/Documents.ts @@ -432,17 +432,28 @@ export namespace Docs { parentProto.data = new List<Doc>(); } if (device) { - const { __images } = device; + const { title, __images, additionalMedia } = device; delete device.__images; + delete device.additionalMedia; const { ImageDocument, StackingDocument } = Docs.Create; const constructed = __images.map(({ url, nativeWidth, nativeHeight }) => ({ url: Utils.prepend(url), nativeWidth, nativeHeight })); - const deviceImages = constructed.map(({ url, nativeWidth, nativeHeight }, i) => ImageDocument(url, { - title: `image${i}.${extname(url)}`, - _nativeWidth: nativeWidth, - _nativeHeight: nativeHeight - })); + const deviceImages = constructed.map(({ url, nativeWidth, nativeHeight }, i) => { + const imageDoc = ImageDocument(url, { + title: `image${i}.${extname(url)}`, + _nativeWidth: nativeWidth, + _nativeHeight: nativeHeight + }); + const media = additionalMedia[i]; + if (media) { + for (const key of Object.keys(media)) { + imageDoc[`additionalMedia_${key}`] = Utils.prepend(`/files/${key}/buxton/${media[key]}`); + } + } + return imageDoc; + }); // the main document we create - const doc = StackingDocument(deviceImages, { title: device.title, _LODdisable: true, hero: new ImageField(constructed[0].url) }); + const doc = StackingDocument(deviceImages, { title, _LODdisable: true, hero: new ImageField(constructed[0].url) }); + doc.nameAliases = new List<string>([title.toLowerCase()]); // add the parsed attributes to this main document Docs.Get.FromJson({ data: device, appendToExisting: { targetDoc: Doc.GetProto(doc) } }); Doc.AddDocToList(parentProto, "data", doc); diff --git a/src/client/views/collections/CollectionTreeView.tsx b/src/client/views/collections/CollectionTreeView.tsx index 87fe79e19..4d67b1e2c 100644 --- a/src/client/views/collections/CollectionTreeView.tsx +++ b/src/client/views/collections/CollectionTreeView.tsx @@ -717,7 +717,7 @@ export class CollectionTreeView extends CollectionSubView<Document, Partial<coll } ContextMenu.Instance.addItem({ description: "Buxton Layout", icon: "eye", event: () => { - const { ImageDocument } = Docs.Create; + const { ImageDocument, PdfDocument } = Docs.Create; const { Document } = this.props; const fallbackImg = "http://www.cs.brown.edu/~bcz/face.gif"; const detailView = Cast(Cast(Doc.UserDoc()["template-button-detail"], Doc, null)?.dragFactory, Doc, null); @@ -726,13 +726,14 @@ export class CollectionTreeView extends CollectionSubView<Document, Partial<coll heroView._showTitle = "title"; heroView._showTitleHover = "titlehover"; - const doubleClickView = ImageDocument("http://cs.brown.edu/~bcz/face.gif", { _width: 400 }); // replace with desired double click target + const fallback = ImageDocument("http://cs.brown.edu/~bcz/face.gif", { _width: 400 }); // replace with desired double click target + let pdfContent: string; DocListCast(this.dataDoc[this.props.fieldKey]).map(d => { DocListCast(d.data).map((img, i) => { const caption = (d.captions as any)[i]; if (caption) { Doc.GetProto(img).caption = caption; - Doc.GetProto(img).doubleClickView = doubleClickView; + Doc.GetProto(img).doubleClickView = (pdfContent = StrCast(img.additionalMedia_pdfs)) ? PdfDocument(pdfContent) : fallback; } }); Doc.GetProto(d).type = "buxton"; diff --git a/src/scraping/buxton/final/BuxtonImporter.ts b/src/scraping/buxton/final/BuxtonImporter.ts index e55850b29..684c00c0d 100644 --- a/src/scraping/buxton/final/BuxtonImporter.ts +++ b/src/scraping/buxton/final/BuxtonImporter.ts @@ -1,4 +1,4 @@ -import { readdirSync, writeFile, mkdirSync } from "fs"; +import { readdirSync, writeFile, mkdirSync, createReadStream, createWriteStream, existsSync, statSync } from "fs"; import * as path from "path"; import { red, cyan, yellow } from "colors"; import { Utils } from "../../../Utils"; @@ -9,6 +9,7 @@ const createImageSizeStream = require("image-size-stream"); import { parseXml } from "libxmljs"; import { strictEqual } from "assert"; import { Readable, PassThrough } from "stream"; +import { Directory, serverPathToFile, pathToDirectory } from "../../../server/ApiManagers/UploadManager"; /** * This is an arbitrary bundle of data that gets populated @@ -18,8 +19,7 @@ interface DocumentContents { body: string; imageData: ImageData[]; hyperlinks: string[]; - captions: string[]; - embeddedFileNames: string[]; + tableData: TableData[]; longDescription: string; } @@ -40,6 +40,7 @@ export interface DeviceDocument { secondaryKey: string; attribute: string; __images: ImageData[]; + additionalMedia: ({ [type: string]: string } | undefined)[]; hyperlinks: string[]; captions: string[]; // from the table column embeddedFileNames: string[]; // from the table column @@ -255,6 +256,8 @@ const FormatMap = new Map<keyof DeviceDocument, ValueFormatDefinition<any>>([ ]); const sourceDir = path.resolve(__dirname, "source"); // where the Word documents are assumed to be stored +const assetDir = path.resolve(__dirname, "assets"); // where any additional media content like pdfs will be stored. Each subdirectory of this +// must follow the enum Directory.<type> naming scheme const outDir = path.resolve(__dirname, "json"); // where the JSON output of these device documents will be written const imageDir = path.resolve(__dirname, "../../../server/public/files/images/buxton"); // where, in the server, these images will be written const successOut = "buxton.json"; // the JSON list representing properly formatted documents @@ -277,12 +280,13 @@ export default async function executeImport(emitter: ResultCallback, terminator: rimraf.sync(dir); mkdirSync(dir); }); + await transferAssets(); return parseFiles(wordDocuments, emitter, terminator); } catch (e) { const message = [ "Unable to find a source directory.", - "Please ensure that the following directory exists and is populated with Word documents:", - `${sourceDir}` + "Please ensure that the following directory exists:", + `${e.message}` ].join('\n'); console.log(red(message)); return { error: message }; @@ -290,6 +294,32 @@ export default async function executeImport(emitter: ResultCallback, terminator: } /** + * Builds a mirrored directory structure of all media / asset files + * within the server's public directory. + */ +async function transferAssets() { + for (const assetType of readdirSync(assetDir)) { + const subroot = path.resolve(assetDir, assetType); + if (!statSync(subroot).isDirectory()) { + continue; + } + const outputSubroot = serverPathToFile(assetType as Directory, "buxton"); + if (existsSync(outputSubroot)) { + continue; + } else { + mkdirSync(outputSubroot); + } + for (const fileName of readdirSync(subroot)) { + const readStream = createReadStream(path.resolve(subroot, fileName)); + const writeStream = createWriteStream(path.resolve(outputSubroot, fileName)); + await new Promise<void>(resolve => { + readStream.pipe(writeStream).on("close", resolve); + }); + } + } +} + +/** * Parse every Word document in the directory, notifying any callers as needed * at each iteration via the emitter. * @param wordDocuments the string list of Word document names to parse @@ -356,6 +386,16 @@ const xPaths = { hyperlinks: '//*[name()="Relationship" and contains(@Type, "hyperlink")]' }; +interface TableData { + fileName: string; + caption: string; + additionalMedia?: { [type: string]: string }; +} + +const SuffixDirectoryMap = new Map<string, Directory>([ + ["p", Directory.pdfs] +]); + /** * The meat of the script, images and text content are extracted here * @param pathToDocument the path to the document relative to the root of the zip @@ -370,8 +410,7 @@ async function extractFileContents(pathToDocument: string): Promise<DocumentCont // get plain text const body = document.root()?.text() ?? "No body found. Check the import script's XML parser."; const captions: string[] = []; - const embeddedFileNames: string[] = []; - + const tableData: TableData[] = []; // preserve paragraph formatting and line breaks that would otherwise get lost in the plain text parsing // of the XML hierarchy const paragraphs = document.find(xPaths.paragraphs).map(node => Utilities.correctSentences(node.text()).transformed!); @@ -382,7 +421,7 @@ async function extractFileContents(pathToDocument: string): Promise<DocumentCont // extract captions from the table cells const tableRowsFlattened = document.find(xPaths.tableCells).map(node => node.text().trim()); const { length } = tableRowsFlattened; - const numCols = 3; + const numCols = 4; strictEqual(length > numCols, true, "No captions written."); // first row has the headers, not content strictEqual(length % numCols === 0, true, "Improper caption formatting."); @@ -392,8 +431,14 @@ async function extractFileContents(pathToDocument: string): Promise<DocumentCont // have been added or reordered since this was written, but follow the same appraoch) for (let i = numCols; i < tableRowsFlattened.length; i += numCols) { const row = tableRowsFlattened.slice(i, i + numCols); - embeddedFileNames.push(row[1]); - captions.push(row[2]); + const entry: TableData = { fileName: row[1], caption: row[2] }; + const key = SuffixDirectoryMap.get(row[3].toLowerCase()); + if (key) { + const media: any = {}; + media[key] = `${entry.fileName.split(".")[0]}.pdf`; + entry.additionalMedia = media; + } + tableData.push(entry); } // extract all hyperlinks embedded in the document @@ -409,7 +454,7 @@ async function extractFileContents(pathToDocument: string): Promise<DocumentCont // cleanup zip.close(); - return { body, longDescription, imageData, captions, embeddedFileNames, hyperlinks }; + return { body, longDescription, imageData, tableData, hyperlinks }; } // zip relative path from root expression / filter used to isolate only media assets @@ -492,11 +537,12 @@ async function writeImages(zip: any): Promise<ImageData[]> { * @param contents the data already computed / parsed by extractFileContents */ function analyze(fileName: string, contents: DocumentContents): AnalysisResult { - const { body, imageData, captions, hyperlinks, embeddedFileNames, longDescription } = contents; + const { body, imageData, hyperlinks, tableData, longDescription } = contents; const device: any = { hyperlinks, - captions, - embeddedFileNames, + captions: tableData.map(({ caption }) => caption), + embeddedFileNames: tableData.map(({ fileName }) => fileName), + additionalMedia: tableData.map(({ additionalMedia }) => additionalMedia), longDescription, __images: imageData }; diff --git a/src/scraping/buxton/final/assets/pdfs/3DCad_Brochure.pdf b/src/scraping/buxton/final/assets/pdfs/3DCad_Brochure.pdf Binary files differnew file mode 100644 index 000000000..4746d2f41 --- /dev/null +++ b/src/scraping/buxton/final/assets/pdfs/3DCad_Brochure.pdf |