aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/client/documents/Documents.ts25
-rw-r--r--src/client/views/collections/CollectionTreeView.tsx7
-rw-r--r--src/scraping/buxton/final/BuxtonImporter.ts74
-rw-r--r--src/scraping/buxton/final/assets/pdfs/3DCad_Brochure.pdfbin0 -> 107790 bytes
4 files changed, 82 insertions, 24 deletions
diff --git a/src/client/documents/Documents.ts b/src/client/documents/Documents.ts
index 7f5b62f22..f7e19eecd 100644
--- a/src/client/documents/Documents.ts
+++ b/src/client/documents/Documents.ts
@@ -432,17 +432,28 @@ export namespace Docs {
parentProto.data = new List<Doc>();
}
if (device) {
- const { __images } = device;
+ const { title, __images, additionalMedia } = device;
delete device.__images;
+ delete device.additionalMedia;
const { ImageDocument, StackingDocument } = Docs.Create;
const constructed = __images.map(({ url, nativeWidth, nativeHeight }) => ({ url: Utils.prepend(url), nativeWidth, nativeHeight }));
- const deviceImages = constructed.map(({ url, nativeWidth, nativeHeight }, i) => ImageDocument(url, {
- title: `image${i}.${extname(url)}`,
- _nativeWidth: nativeWidth,
- _nativeHeight: nativeHeight
- }));
+ const deviceImages = constructed.map(({ url, nativeWidth, nativeHeight }, i) => {
+ const imageDoc = ImageDocument(url, {
+ title: `image${i}.${extname(url)}`,
+ _nativeWidth: nativeWidth,
+ _nativeHeight: nativeHeight
+ });
+ const media = additionalMedia[i];
+ if (media) {
+ for (const key of Object.keys(media)) {
+ imageDoc[`additionalMedia_${key}`] = Utils.prepend(`/files/${key}/buxton/${media[key]}`);
+ }
+ }
+ return imageDoc;
+ });
// the main document we create
- const doc = StackingDocument(deviceImages, { title: device.title, _LODdisable: true, hero: new ImageField(constructed[0].url) });
+ const doc = StackingDocument(deviceImages, { title, _LODdisable: true, hero: new ImageField(constructed[0].url) });
+ doc.nameAliases = new List<string>([title.toLowerCase()]);
// add the parsed attributes to this main document
Docs.Get.FromJson({ data: device, appendToExisting: { targetDoc: Doc.GetProto(doc) } });
Doc.AddDocToList(parentProto, "data", doc);
diff --git a/src/client/views/collections/CollectionTreeView.tsx b/src/client/views/collections/CollectionTreeView.tsx
index 87fe79e19..4d67b1e2c 100644
--- a/src/client/views/collections/CollectionTreeView.tsx
+++ b/src/client/views/collections/CollectionTreeView.tsx
@@ -717,7 +717,7 @@ export class CollectionTreeView extends CollectionSubView<Document, Partial<coll
}
ContextMenu.Instance.addItem({
description: "Buxton Layout", icon: "eye", event: () => {
- const { ImageDocument } = Docs.Create;
+ const { ImageDocument, PdfDocument } = Docs.Create;
const { Document } = this.props;
const fallbackImg = "http://www.cs.brown.edu/~bcz/face.gif";
const detailView = Cast(Cast(Doc.UserDoc()["template-button-detail"], Doc, null)?.dragFactory, Doc, null);
@@ -726,13 +726,14 @@ export class CollectionTreeView extends CollectionSubView<Document, Partial<coll
heroView._showTitle = "title";
heroView._showTitleHover = "titlehover";
- const doubleClickView = ImageDocument("http://cs.brown.edu/~bcz/face.gif", { _width: 400 }); // replace with desired double click target
+ const fallback = ImageDocument("http://cs.brown.edu/~bcz/face.gif", { _width: 400 }); // replace with desired double click target
+ let pdfContent: string;
DocListCast(this.dataDoc[this.props.fieldKey]).map(d => {
DocListCast(d.data).map((img, i) => {
const caption = (d.captions as any)[i];
if (caption) {
Doc.GetProto(img).caption = caption;
- Doc.GetProto(img).doubleClickView = doubleClickView;
+ Doc.GetProto(img).doubleClickView = (pdfContent = StrCast(img.additionalMedia_pdfs)) ? PdfDocument(pdfContent) : fallback;
}
});
Doc.GetProto(d).type = "buxton";
diff --git a/src/scraping/buxton/final/BuxtonImporter.ts b/src/scraping/buxton/final/BuxtonImporter.ts
index e55850b29..684c00c0d 100644
--- a/src/scraping/buxton/final/BuxtonImporter.ts
+++ b/src/scraping/buxton/final/BuxtonImporter.ts
@@ -1,4 +1,4 @@
-import { readdirSync, writeFile, mkdirSync } from "fs";
+import { readdirSync, writeFile, mkdirSync, createReadStream, createWriteStream, existsSync, statSync } from "fs";
import * as path from "path";
import { red, cyan, yellow } from "colors";
import { Utils } from "../../../Utils";
@@ -9,6 +9,7 @@ const createImageSizeStream = require("image-size-stream");
import { parseXml } from "libxmljs";
import { strictEqual } from "assert";
import { Readable, PassThrough } from "stream";
+import { Directory, serverPathToFile, pathToDirectory } from "../../../server/ApiManagers/UploadManager";
/**
* This is an arbitrary bundle of data that gets populated
@@ -18,8 +19,7 @@ interface DocumentContents {
body: string;
imageData: ImageData[];
hyperlinks: string[];
- captions: string[];
- embeddedFileNames: string[];
+ tableData: TableData[];
longDescription: string;
}
@@ -40,6 +40,7 @@ export interface DeviceDocument {
secondaryKey: string;
attribute: string;
__images: ImageData[];
+ additionalMedia: ({ [type: string]: string } | undefined)[];
hyperlinks: string[];
captions: string[]; // from the table column
embeddedFileNames: string[]; // from the table column
@@ -255,6 +256,8 @@ const FormatMap = new Map<keyof DeviceDocument, ValueFormatDefinition<any>>([
]);
const sourceDir = path.resolve(__dirname, "source"); // where the Word documents are assumed to be stored
+const assetDir = path.resolve(__dirname, "assets"); // where any additional media content like pdfs will be stored. Each subdirectory of this
+// must follow the enum Directory.<type> naming scheme
const outDir = path.resolve(__dirname, "json"); // where the JSON output of these device documents will be written
const imageDir = path.resolve(__dirname, "../../../server/public/files/images/buxton"); // where, in the server, these images will be written
const successOut = "buxton.json"; // the JSON list representing properly formatted documents
@@ -277,12 +280,13 @@ export default async function executeImport(emitter: ResultCallback, terminator:
rimraf.sync(dir);
mkdirSync(dir);
});
+ await transferAssets();
return parseFiles(wordDocuments, emitter, terminator);
} catch (e) {
const message = [
"Unable to find a source directory.",
- "Please ensure that the following directory exists and is populated with Word documents:",
- `${sourceDir}`
+ "Please ensure that the following directory exists:",
+ `${e.message}`
].join('\n');
console.log(red(message));
return { error: message };
@@ -290,6 +294,32 @@ export default async function executeImport(emitter: ResultCallback, terminator:
}
/**
+ * Builds a mirrored directory structure of all media / asset files
+ * within the server's public directory.
+ */
+async function transferAssets() {
+ for (const assetType of readdirSync(assetDir)) {
+ const subroot = path.resolve(assetDir, assetType);
+ if (!statSync(subroot).isDirectory()) {
+ continue;
+ }
+ const outputSubroot = serverPathToFile(assetType as Directory, "buxton");
+ if (existsSync(outputSubroot)) {
+ continue;
+ } else {
+ mkdirSync(outputSubroot);
+ }
+ for (const fileName of readdirSync(subroot)) {
+ const readStream = createReadStream(path.resolve(subroot, fileName));
+ const writeStream = createWriteStream(path.resolve(outputSubroot, fileName));
+ await new Promise<void>(resolve => {
+ readStream.pipe(writeStream).on("close", resolve);
+ });
+ }
+ }
+}
+
+/**
* Parse every Word document in the directory, notifying any callers as needed
* at each iteration via the emitter.
* @param wordDocuments the string list of Word document names to parse
@@ -356,6 +386,16 @@ const xPaths = {
hyperlinks: '//*[name()="Relationship" and contains(@Type, "hyperlink")]'
};
+interface TableData {
+ fileName: string;
+ caption: string;
+ additionalMedia?: { [type: string]: string };
+}
+
+const SuffixDirectoryMap = new Map<string, Directory>([
+ ["p", Directory.pdfs]
+]);
+
/**
* The meat of the script, images and text content are extracted here
* @param pathToDocument the path to the document relative to the root of the zip
@@ -370,8 +410,7 @@ async function extractFileContents(pathToDocument: string): Promise<DocumentCont
// get plain text
const body = document.root()?.text() ?? "No body found. Check the import script's XML parser.";
const captions: string[] = [];
- const embeddedFileNames: string[] = [];
-
+ const tableData: TableData[] = [];
// preserve paragraph formatting and line breaks that would otherwise get lost in the plain text parsing
// of the XML hierarchy
const paragraphs = document.find(xPaths.paragraphs).map(node => Utilities.correctSentences(node.text()).transformed!);
@@ -382,7 +421,7 @@ async function extractFileContents(pathToDocument: string): Promise<DocumentCont
// extract captions from the table cells
const tableRowsFlattened = document.find(xPaths.tableCells).map(node => node.text().trim());
const { length } = tableRowsFlattened;
- const numCols = 3;
+ const numCols = 4;
strictEqual(length > numCols, true, "No captions written."); // first row has the headers, not content
strictEqual(length % numCols === 0, true, "Improper caption formatting.");
@@ -392,8 +431,14 @@ async function extractFileContents(pathToDocument: string): Promise<DocumentCont
// have been added or reordered since this was written, but follow the same appraoch)
for (let i = numCols; i < tableRowsFlattened.length; i += numCols) {
const row = tableRowsFlattened.slice(i, i + numCols);
- embeddedFileNames.push(row[1]);
- captions.push(row[2]);
+ const entry: TableData = { fileName: row[1], caption: row[2] };
+ const key = SuffixDirectoryMap.get(row[3].toLowerCase());
+ if (key) {
+ const media: any = {};
+ media[key] = `${entry.fileName.split(".")[0]}.pdf`;
+ entry.additionalMedia = media;
+ }
+ tableData.push(entry);
}
// extract all hyperlinks embedded in the document
@@ -409,7 +454,7 @@ async function extractFileContents(pathToDocument: string): Promise<DocumentCont
// cleanup
zip.close();
- return { body, longDescription, imageData, captions, embeddedFileNames, hyperlinks };
+ return { body, longDescription, imageData, tableData, hyperlinks };
}
// zip relative path from root expression / filter used to isolate only media assets
@@ -492,11 +537,12 @@ async function writeImages(zip: any): Promise<ImageData[]> {
* @param contents the data already computed / parsed by extractFileContents
*/
function analyze(fileName: string, contents: DocumentContents): AnalysisResult {
- const { body, imageData, captions, hyperlinks, embeddedFileNames, longDescription } = contents;
+ const { body, imageData, hyperlinks, tableData, longDescription } = contents;
const device: any = {
hyperlinks,
- captions,
- embeddedFileNames,
+ captions: tableData.map(({ caption }) => caption),
+ embeddedFileNames: tableData.map(({ fileName }) => fileName),
+ additionalMedia: tableData.map(({ additionalMedia }) => additionalMedia),
longDescription,
__images: imageData
};
diff --git a/src/scraping/buxton/final/assets/pdfs/3DCad_Brochure.pdf b/src/scraping/buxton/final/assets/pdfs/3DCad_Brochure.pdf
new file mode 100644
index 000000000..4746d2f41
--- /dev/null
+++ b/src/scraping/buxton/final/assets/pdfs/3DCad_Brochure.pdf
Binary files differ