diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/buxton/scraper.py | 9 | ||||
-rw-r--r-- | src/buxton/source/Extra/Bill_Notes_Braun_T3.docx (renamed from src/buxton/source/Bill_Notes_Braun_T3.docx) | bin | 1671968 -> 1671968 bytes | |||
-rw-r--r-- | src/buxton/source/Extra/Bill_Notes_CasioC801.docx (renamed from src/buxton/source/Bill_Notes_CasioC801.docx) | bin | 574664 -> 574664 bytes | |||
-rw-r--r-- | src/buxton/source/Extra/Bill_Notes_Casio_Mini.docx (renamed from src/buxton/source/Bill_Notes_Casio_Mini.docx) | bin | 581069 -> 581069 bytes | |||
-rw-r--r-- | src/buxton/source/Extra/Bill_Notes_FingerWorks_Prototype.docx (renamed from src/buxton/source/Bill_Notes_FingerWorks_Prototype.docx) | bin | 585090 -> 585090 bytes | |||
-rw-r--r-- | src/buxton/source/Extra/Bill_Notes_Fingerworks_TouchStream.docx (renamed from src/buxton/source/Bill_Notes_Fingerworks_TouchStream.docx) | bin | 1722555 -> 1722555 bytes | |||
-rw-r--r-- | src/buxton/source/Extra/Bill_Notes_FrogPad.docx (renamed from src/buxton/source/Bill_Notes_FrogPad.docx) | bin | 840173 -> 840173 bytes | |||
-rw-r--r-- | src/buxton/source/Extra/Bill_Notes_Gavilan_SC.docx (renamed from src/buxton/source/Bill_Notes_Gavilan_SC.docx) | bin | 1695290 -> 1695290 bytes | |||
-rw-r--r-- | src/buxton/source/Extra/Bill_Notes_Grandjean_Stenotype.docx (renamed from src/buxton/source/Bill_Notes_Grandjean_Stenotype.docx) | bin | 2094142 -> 2094142 bytes | |||
-rw-r--r-- | src/buxton/source/Extra/Bill_Notes_Matias.docx (renamed from src/buxton/source/Bill_Notes_Matias.docx) | bin | 590407 -> 590407 bytes | |||
-rw-r--r-- | src/buxton/source/Extra/Bill_Notes_MousePen.docx (renamed from src/buxton/source/Bill_Notes_MousePen.docx) | bin | 505322 -> 505322 bytes | |||
-rw-r--r-- | src/buxton/source/Extra/Bill_Notes_NewO.docx (renamed from src/buxton/source/Bill_Notes_NewO.docx) | bin | 2264571 -> 2264571 bytes | |||
-rw-r--r-- | src/buxton/source/Extra/Bill_Notes_OLPC.docx (renamed from src/buxton/source/Bill_Notes_OLPC.docx) | bin | 6883659 -> 6883659 bytes | |||
-rw-r--r-- | src/buxton/source/Extra/Bill_Notes_PARCkbd.docx (renamed from src/buxton/source/Bill_Notes_PARCkbd.docx) | bin | 631959 -> 631959 bytes | |||
-rw-r--r-- | src/buxton/source/Extra/Bill_Notes_Philco_Mystery_Control.docx (renamed from src/buxton/source/Bill_Notes_Philco_Mystery_Control.docx) | bin | 1994439 -> 1994439 bytes | |||
-rw-r--r-- | src/buxton/source/Extra/Bill_Notes_TASA_Kbd.docx (renamed from src/buxton/source/Bill_Notes_TASA_Kbd.docx) | bin | 461199 -> 461199 bytes | |||
-rw-r--r-- | src/buxton/source/Extra/Bill_Notes_The_Tap.docx (renamed from src/buxton/source/Bill_Notes_The_Tap.docx) | bin | 711321 -> 711321 bytes |
17 files changed, 6 insertions, 3 deletions
diff --git a/src/buxton/scraper.py b/src/buxton/scraper.py index a3bbc75ac..c7efd8f09 100644 --- a/src/buxton/scraper.py +++ b/src/buxton/scraper.py @@ -78,7 +78,7 @@ def write_image(folder, name): }, "data": { "url": path, - "type": "image" + "__type": "image" }, "title": name, "nativeWidth": native_width, @@ -105,9 +105,10 @@ def write_image(folder, name): def parse_document(file_name: str): print(f"Parsing {file_name}...") - result = {} pure_name = file_name.split(".")[0] + result = {} + dir_path = dist + "/" + pure_name mkdir_if_absent(dir_path) @@ -116,6 +117,8 @@ def parse_document(file_name: str): print("Extracting images...\n") for image in os.listdir(dir_path): view_doc_guids.append(write_image(pure_name, image)) + os.rename(dir_path + "/" + image, dir_path + + "/" + image.replace(".", "_m.", 1)) print() def sanitize(line): return re.sub("[\n\t]+", "", line).replace(u"\u00A0", " ").replace( @@ -234,7 +237,7 @@ for doc in mongofied: db.newDocuments.insert_one(doc) proxified = list( - map(lambda guid: {"fieldId": guid, "type": "proxy"}, view_doc_guids)) + map(lambda guid: {"fieldId": guid, "__type": "proxy"}, view_doc_guids)) db.newDocuments.update_one( {"fields.title": "WS collection 1"}, {"$push": {"fields.data.fields": {"$each": proxified}}} diff --git a/src/buxton/source/Bill_Notes_Braun_T3.docx b/src/buxton/source/Extra/Bill_Notes_Braun_T3.docx Binary files differindex 356697092..356697092 100644 --- a/src/buxton/source/Bill_Notes_Braun_T3.docx +++ b/src/buxton/source/Extra/Bill_Notes_Braun_T3.docx diff --git a/src/buxton/source/Bill_Notes_CasioC801.docx b/src/buxton/source/Extra/Bill_Notes_CasioC801.docx Binary files differindex cd89fb97b..cd89fb97b 100644 --- a/src/buxton/source/Bill_Notes_CasioC801.docx +++ b/src/buxton/source/Extra/Bill_Notes_CasioC801.docx diff --git a/src/buxton/source/Bill_Notes_Casio_Mini.docx b/src/buxton/source/Extra/Bill_Notes_Casio_Mini.docx Binary files differindex a503cddfc..a503cddfc 100644 --- a/src/buxton/source/Bill_Notes_Casio_Mini.docx +++ b/src/buxton/source/Extra/Bill_Notes_Casio_Mini.docx diff --git a/src/buxton/source/Bill_Notes_FingerWorks_Prototype.docx b/src/buxton/source/Extra/Bill_Notes_FingerWorks_Prototype.docx Binary files differindex 4d13a8cf5..4d13a8cf5 100644 --- a/src/buxton/source/Bill_Notes_FingerWorks_Prototype.docx +++ b/src/buxton/source/Extra/Bill_Notes_FingerWorks_Prototype.docx diff --git a/src/buxton/source/Bill_Notes_Fingerworks_TouchStream.docx b/src/buxton/source/Extra/Bill_Notes_Fingerworks_TouchStream.docx Binary files differindex 578a1be08..578a1be08 100644 --- a/src/buxton/source/Bill_Notes_Fingerworks_TouchStream.docx +++ b/src/buxton/source/Extra/Bill_Notes_Fingerworks_TouchStream.docx diff --git a/src/buxton/source/Bill_Notes_FrogPad.docx b/src/buxton/source/Extra/Bill_Notes_FrogPad.docx Binary files differindex d01e1bf5c..d01e1bf5c 100644 --- a/src/buxton/source/Bill_Notes_FrogPad.docx +++ b/src/buxton/source/Extra/Bill_Notes_FrogPad.docx diff --git a/src/buxton/source/Bill_Notes_Gavilan_SC.docx b/src/buxton/source/Extra/Bill_Notes_Gavilan_SC.docx Binary files differindex 7bd28b376..7bd28b376 100644 --- a/src/buxton/source/Bill_Notes_Gavilan_SC.docx +++ b/src/buxton/source/Extra/Bill_Notes_Gavilan_SC.docx diff --git a/src/buxton/source/Bill_Notes_Grandjean_Stenotype.docx b/src/buxton/source/Extra/Bill_Notes_Grandjean_Stenotype.docx Binary files differindex 0615c4953..0615c4953 100644 --- a/src/buxton/source/Bill_Notes_Grandjean_Stenotype.docx +++ b/src/buxton/source/Extra/Bill_Notes_Grandjean_Stenotype.docx diff --git a/src/buxton/source/Bill_Notes_Matias.docx b/src/buxton/source/Extra/Bill_Notes_Matias.docx Binary files differindex 547603256..547603256 100644 --- a/src/buxton/source/Bill_Notes_Matias.docx +++ b/src/buxton/source/Extra/Bill_Notes_Matias.docx diff --git a/src/buxton/source/Bill_Notes_MousePen.docx b/src/buxton/source/Extra/Bill_Notes_MousePen.docx Binary files differindex 4e1056636..4e1056636 100644 --- a/src/buxton/source/Bill_Notes_MousePen.docx +++ b/src/buxton/source/Extra/Bill_Notes_MousePen.docx diff --git a/src/buxton/source/Bill_Notes_NewO.docx b/src/buxton/source/Extra/Bill_Notes_NewO.docx Binary files differindex a514926d2..a514926d2 100644 --- a/src/buxton/source/Bill_Notes_NewO.docx +++ b/src/buxton/source/Extra/Bill_Notes_NewO.docx diff --git a/src/buxton/source/Bill_Notes_OLPC.docx b/src/buxton/source/Extra/Bill_Notes_OLPC.docx Binary files differindex bfca0a9bb..bfca0a9bb 100644 --- a/src/buxton/source/Bill_Notes_OLPC.docx +++ b/src/buxton/source/Extra/Bill_Notes_OLPC.docx diff --git a/src/buxton/source/Bill_Notes_PARCkbd.docx b/src/buxton/source/Extra/Bill_Notes_PARCkbd.docx Binary files differindex c0cf6ba9a..c0cf6ba9a 100644 --- a/src/buxton/source/Bill_Notes_PARCkbd.docx +++ b/src/buxton/source/Extra/Bill_Notes_PARCkbd.docx diff --git a/src/buxton/source/Bill_Notes_Philco_Mystery_Control.docx b/src/buxton/source/Extra/Bill_Notes_Philco_Mystery_Control.docx Binary files differindex ad06903f3..ad06903f3 100644 --- a/src/buxton/source/Bill_Notes_Philco_Mystery_Control.docx +++ b/src/buxton/source/Extra/Bill_Notes_Philco_Mystery_Control.docx diff --git a/src/buxton/source/Bill_Notes_TASA_Kbd.docx b/src/buxton/source/Extra/Bill_Notes_TASA_Kbd.docx Binary files differindex e4c659de9..e4c659de9 100644 --- a/src/buxton/source/Bill_Notes_TASA_Kbd.docx +++ b/src/buxton/source/Extra/Bill_Notes_TASA_Kbd.docx diff --git a/src/buxton/source/Bill_Notes_The_Tap.docx b/src/buxton/source/Extra/Bill_Notes_The_Tap.docx Binary files differindex 8ceebc71e..8ceebc71e 100644 --- a/src/buxton/source/Bill_Notes_The_Tap.docx +++ b/src/buxton/source/Extra/Bill_Notes_The_Tap.docx |