From f15269bf5b1006bc8169892940dcf027ff6c023d Mon Sep 17 00:00:00 2001 From: Sam Wilkins Date: Sat, 22 Feb 2020 12:10:32 -0500 Subject: importer finalize pre comment --- src/scraping/buxton/final/BuxtonImporter.ts | 33 ++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 10 deletions(-) (limited to 'src') diff --git a/src/scraping/buxton/final/BuxtonImporter.ts b/src/scraping/buxton/final/BuxtonImporter.ts index 098671942..319486593 100644 --- a/src/scraping/buxton/final/BuxtonImporter.ts +++ b/src/scraping/buxton/final/BuxtonImporter.ts @@ -40,11 +40,15 @@ export interface AnalysisResult { errors?: { [key: string]: string }; } -type Transformer = (raw: string) => { transformed?: T, error?: string }; +type Transformer = (raw: string) => TransformResult; +interface TransformResult { + transformed?: T; + error?: string; +} export interface ImportResults { - deviceCount: number, - errorCount: number + deviceCount: number; + errorCount: number; } type ResultCallback = (result: AnalysisResult) => void; @@ -65,7 +69,7 @@ interface ImageData { namespace Utilities { - export function numberValue(raw: string) { + export function numberValue(raw: string): TransformResult { const transformed = Number(raw); if (isNaN(transformed)) { return { error: `${raw} cannot be parsed to a numeric value.` }; @@ -73,13 +77,13 @@ namespace Utilities { return { transformed }; } - export function collectUniqueTokens(raw: string) { + export function collectUniqueTokens(raw: string): TransformResult { const pieces = raw.replace(/,|\s+and\s+/g, " ").split(/\s+/).filter(piece => piece.length); const unique = new Set(pieces.map(token => token.toLowerCase().trim())); return { transformed: Array.from(unique).map(capitalize).sort() }; } - export function correctSentences(raw: string) { + export function correctSentences(raw: string): TransformResult { raw = raw.replace(/\./g, ". ").replace(/\:/g, ": ").replace(/\,/g, ", ").replace(/\?/g, "? ").trimRight(); raw = raw.replace(/\s{2,}/g, " "); return { transformed: raw }; @@ -125,16 +129,25 @@ const RegexMap = new Map>([ }], ["primaryKey", { exp: /Primary:\s+(.*)(Secondary|Additional):/, - transformer: raw => ({ transformed: Utilities.collectUniqueTokens(raw).transformed[0] }) + transformer: raw => { + const { transformed, error } = Utilities.collectUniqueTokens(raw); + return transformed ? { transformed: transformed[0] } : { error }; + } }], ["secondaryKey", { exp: /(Secondary|Additional):\s+(.*)Attributes?:/, - transformer: raw => ({ transformed: Utilities.collectUniqueTokens(raw).transformed[0] }), + transformer: raw => { + const { transformed, error } = Utilities.collectUniqueTokens(raw); + return transformed ? { transformed: transformed[0] } : { error }; + }, matchIndex: 2 }], ["attribute", { exp: /Attributes?:\s+(.*)Links/, - transformer: raw => ({ transformed: Utilities.collectUniqueTokens(raw).transformed[0] }), + transformer: raw => { + const { transformed, error } = Utilities.collectUniqueTokens(raw); + return transformed ? { transformed: transformed[0] } : { error }; + }, }], ["originalPrice", { exp: /Original Price \(USD\)\:\s+(\$[0-9\,]+\.[0-9]+|NFS)/, @@ -300,7 +313,7 @@ async function writeImages(zip: any): Promise { const { width, height, type } = await new Promise(async resolve => { const sizeStream = createImageSizeStream().on('size', (dimensions: Dimensions) => { readStream.destroy(); - resolve(dimensions) + resolve(dimensions); }); const readStream = await streamImage(); readStream.pipe(sizeStream); -- cgit v1.2.3-70-g09d2