diff options
author | Sam Wilkins <samwilkins333@gmail.com> | 2020-05-03 15:05:04 -0700 |
---|---|---|
committer | Sam Wilkins <samwilkins333@gmail.com> | 2020-05-03 15:05:04 -0700 |
commit | b8a62e6404a695e57ab1305fd13be23e8d935360 (patch) | |
tree | 34fe4ff172510237fae4591de0e08351880d7974 | |
parent | 952bc0d744833ab79f69f2f13abde1e4cee68408 (diff) |
cleanup
-rw-r--r-- | src/client/apis/google_docs/GooglePhotosClientUtils.ts | 28 | ||||
-rw-r--r-- | src/scraping/buxton/final/BuxtonImporter.ts | 28 | ||||
-rw-r--r-- | src/server/DashUploadUtils.ts | 7 |
3 files changed, 28 insertions, 35 deletions
diff --git a/src/client/apis/google_docs/GooglePhotosClientUtils.ts b/src/client/apis/google_docs/GooglePhotosClientUtils.ts index e3f801c46..ff471853a 100644 --- a/src/client/apis/google_docs/GooglePhotosClientUtils.ts +++ b/src/client/apis/google_docs/GooglePhotosClientUtils.ts @@ -76,7 +76,6 @@ export namespace GooglePhotos { } export const CollectionToAlbum = async (options: AlbumCreationOptions): Promise<Opt<AlbumCreationResult>> => { - await GoogleAuthenticationManager.Instance.fetchOrGenerateAccessToken(); const { collection, title, descriptionKey, tag } = options; const dataDocument = Doc.GetProto(collection); const images = ((await DocListCastAsync(dataDocument.data)) || []).filter(doc => Cast(doc.data, ImageField)); @@ -157,24 +156,20 @@ export namespace GooglePhotos { images && images.forEach(image => tagMapping.set(image[Id], ContentCategories.NONE)); const values = Object.values(ContentCategories); for (const value of values) { - if (value !== ContentCategories.NONE) { - const results = await ContentSearch({ included: [value] }); - if (results.mediaItems) { - const ids = results.mediaItems.map(item => item.id); - for (const id of ids) { - const image = await Cast(idMapping[id], Doc); - if (image) { - const key = image[Id]; - const tags = tagMapping.get(key)!; - if (!tags.includes(value)) { - tagMapping.set(key, tags + delimiter + value); - } - } - } + if (value === ContentCategories.NONE) { + continue; + } + for (const id of (await ContentSearch({ included: [value] }))?.mediaItems?.map(({ id }) => id)) { + const image = await Cast(idMapping[id], Doc); + if (!image) { + continue; } + const key = image[Id]; + const tags = tagMapping.get(key); + !tags?.includes(value) && tagMapping.set(key, tags + delimiter + value); } } - images && images.forEach(image => { + images?.forEach(image => { const concatenated = tagMapping.get(image[Id])!; const tags = concatenated.split(delimiter); if (tags.length > 1) { @@ -184,7 +179,6 @@ export namespace GooglePhotos { image.googlePhotosTags = ContentCategories.NONE; } }); - }; interface DateRange { diff --git a/src/scraping/buxton/final/BuxtonImporter.ts b/src/scraping/buxton/final/BuxtonImporter.ts index 21363f848..94302c7b3 100644 --- a/src/scraping/buxton/final/BuxtonImporter.ts +++ b/src/scraping/buxton/final/BuxtonImporter.ts @@ -350,8 +350,11 @@ async function parseFiles(wordDocuments: string[], emitter: ResultCallback, term * to inspect the structure, since the Node XML library does not expose the parsed * structure very well for searching, say in the debug console. */ -const tableCellXPath = '//*[name()="w:tbl"]/*[name()="w:tr"]/*[name()="w:tc"]'; -const hyperlinkXPath = '//*[name()="Relationship" and contains(@Type, "hyperlink")]'; +const xPaths = { + paragraphs: '//*[name()="w:p"]', + tableCells: '//*[name()="w:tbl"]/*[name()="w:tr"]/*[name()="w:tc"]', + hyperlinks: '//*[name()="Relationship" and contains(@Type, "hyperlink")]' +}; /** * The meat of the script, images and text content are extracted here @@ -371,30 +374,31 @@ async function extractFileContents(pathToDocument: string): Promise<DocumentCont // preserve paragraph formatting and line breaks that would otherwise get lost in the plain text parsing // of the XML hierarchy - const paragraphs = document.find('//*[name()="w:p"]').map(node => Utilities.correctSentences(node.text()).transformed!); + const paragraphs = document.find(xPaths.paragraphs).map(node => Utilities.correctSentences(node.text()).transformed!); const start = paragraphs.indexOf(paragraphs.find(el => /Bill Buxton[’']s Notes/.test(el))!) + 1; const end = paragraphs.indexOf("Device Details"); const longDescription = paragraphs.slice(start, end).filter(paragraph => paragraph.length).join("\n\n"); // extract captions from the table cells - const tableRowsFlattened = document.find(tableCellXPath).map(node => node.text().trim()); + const tableRowsFlattened = document.find(xPaths.tableCells).map(node => node.text().trim()); const { length } = tableRowsFlattened; - strictEqual(length > 3, true, "No captions written."); - strictEqual(length % 3 === 0, true, "Improper caption formatting."); + const numCols = 3; + strictEqual(length > numCols, true, "No captions written."); // first row has the headers, not content + strictEqual(length % numCols === 0, true, "Improper caption formatting."); - // break the flat list of strings into groups of three, since there - // currently are three columns in the table. Thus, each group represents + // break the flat list of strings into groups of numColumns. Thus, each group represents // a row in the table, where the first row has no text content since it's - // the image, the second has the file name and the third has the caption - for (let i = 3; i < tableRowsFlattened.length; i += 3) { - const row = tableRowsFlattened.slice(i, i + 3); + // the image, the second has the file name and the third has the caption (maybe additional columns + // have been added or reordered since this was written, but follow the same appraoch) + for (let i = numCols; i < tableRowsFlattened.length; i += numCols) { + const row = tableRowsFlattened.slice(i, i + numCols); embeddedFileNames.push(row[1]); captions.push(row[2]); } // extract all hyperlinks embedded in the document const rels = await Utilities.readAndParseXml(zip, "word/_rels/document.xml.rels"); - const hyperlinks = rels.find(hyperlinkXPath).map(el => el.attrs()[2].value()); + const hyperlinks = rels.find(xPaths.hyperlinks).map(el => el.attrs()[2].value()); console.log("Text extracted."); // write out the images for this document diff --git a/src/server/DashUploadUtils.ts b/src/server/DashUploadUtils.ts index 3f903a861..8567631cd 100644 --- a/src/server/DashUploadUtils.ts +++ b/src/server/DashUploadUtils.ts @@ -325,12 +325,7 @@ export namespace DashUploadUtils { const outputPath = path.resolve(outputDirectory, writtenFiles[suffix] = InjectSize(outputFileName, suffix)); await new Promise<void>(async (resolve, reject) => { const source = streamProvider(); - let readStream: Stream; - if (source instanceof Promise) { - readStream = await source; - } else { - readStream = source; - } + let readStream: Stream = source instanceof Promise ? await source : source; if (resizer) { readStream = readStream.pipe(resizer.withMetadata()); } |