From cbb2f4191e31d72c8c727976b5616983af15af45 Mon Sep 17 00:00:00 2001 From: Sam Wilkins Date: Wed, 19 Jun 2019 14:38:13 -0400 Subject: Image import working! --- src/buxton/scraper.py | 9 ++++++--- src/buxton/source/Bill_Notes_Braun_T3.docx | Bin 1671968 -> 0 bytes src/buxton/source/Bill_Notes_CasioC801.docx | Bin 574664 -> 0 bytes src/buxton/source/Bill_Notes_Casio_Mini.docx | Bin 581069 -> 0 bytes .../source/Bill_Notes_FingerWorks_Prototype.docx | Bin 585090 -> 0 bytes .../source/Bill_Notes_Fingerworks_TouchStream.docx | Bin 1722555 -> 0 bytes src/buxton/source/Bill_Notes_FrogPad.docx | Bin 840173 -> 0 bytes src/buxton/source/Bill_Notes_Gavilan_SC.docx | Bin 1695290 -> 0 bytes src/buxton/source/Bill_Notes_Grandjean_Stenotype.docx | Bin 2094142 -> 0 bytes src/buxton/source/Bill_Notes_Matias.docx | Bin 590407 -> 0 bytes src/buxton/source/Bill_Notes_MousePen.docx | Bin 505322 -> 0 bytes src/buxton/source/Bill_Notes_NewO.docx | Bin 2264571 -> 0 bytes src/buxton/source/Bill_Notes_OLPC.docx | Bin 6883659 -> 0 bytes src/buxton/source/Bill_Notes_PARCkbd.docx | Bin 631959 -> 0 bytes .../source/Bill_Notes_Philco_Mystery_Control.docx | Bin 1994439 -> 0 bytes src/buxton/source/Bill_Notes_TASA_Kbd.docx | Bin 461199 -> 0 bytes src/buxton/source/Bill_Notes_The_Tap.docx | Bin 711321 -> 0 bytes src/buxton/source/Extra/Bill_Notes_Braun_T3.docx | Bin 0 -> 1671968 bytes src/buxton/source/Extra/Bill_Notes_CasioC801.docx | Bin 0 -> 574664 bytes src/buxton/source/Extra/Bill_Notes_Casio_Mini.docx | Bin 0 -> 581069 bytes .../Extra/Bill_Notes_FingerWorks_Prototype.docx | Bin 0 -> 585090 bytes .../Extra/Bill_Notes_Fingerworks_TouchStream.docx | Bin 0 -> 1722555 bytes src/buxton/source/Extra/Bill_Notes_FrogPad.docx | Bin 0 -> 840173 bytes src/buxton/source/Extra/Bill_Notes_Gavilan_SC.docx | Bin 0 -> 1695290 bytes .../source/Extra/Bill_Notes_Grandjean_Stenotype.docx | Bin 0 -> 2094142 bytes src/buxton/source/Extra/Bill_Notes_Matias.docx | Bin 0 -> 590407 bytes src/buxton/source/Extra/Bill_Notes_MousePen.docx | Bin 0 -> 505322 bytes src/buxton/source/Extra/Bill_Notes_NewO.docx | Bin 0 -> 2264571 bytes src/buxton/source/Extra/Bill_Notes_OLPC.docx | Bin 0 -> 6883659 bytes src/buxton/source/Extra/Bill_Notes_PARCkbd.docx | Bin 0 -> 631959 bytes .../Extra/Bill_Notes_Philco_Mystery_Control.docx | Bin 0 -> 1994439 bytes src/buxton/source/Extra/Bill_Notes_TASA_Kbd.docx | Bin 0 -> 461199 bytes src/buxton/source/Extra/Bill_Notes_The_Tap.docx | Bin 0 -> 711321 bytes 33 files changed, 6 insertions(+), 3 deletions(-) delete mode 100644 src/buxton/source/Bill_Notes_Braun_T3.docx delete mode 100644 src/buxton/source/Bill_Notes_CasioC801.docx delete mode 100644 src/buxton/source/Bill_Notes_Casio_Mini.docx delete mode 100644 src/buxton/source/Bill_Notes_FingerWorks_Prototype.docx delete mode 100644 src/buxton/source/Bill_Notes_Fingerworks_TouchStream.docx delete mode 100644 src/buxton/source/Bill_Notes_FrogPad.docx delete mode 100644 src/buxton/source/Bill_Notes_Gavilan_SC.docx delete mode 100644 src/buxton/source/Bill_Notes_Grandjean_Stenotype.docx delete mode 100644 src/buxton/source/Bill_Notes_Matias.docx delete mode 100644 src/buxton/source/Bill_Notes_MousePen.docx delete mode 100644 src/buxton/source/Bill_Notes_NewO.docx delete mode 100644 src/buxton/source/Bill_Notes_OLPC.docx delete mode 100644 src/buxton/source/Bill_Notes_PARCkbd.docx delete mode 100644 src/buxton/source/Bill_Notes_Philco_Mystery_Control.docx delete mode 100644 src/buxton/source/Bill_Notes_TASA_Kbd.docx delete mode 100644 src/buxton/source/Bill_Notes_The_Tap.docx create mode 100644 src/buxton/source/Extra/Bill_Notes_Braun_T3.docx create mode 100644 src/buxton/source/Extra/Bill_Notes_CasioC801.docx create mode 100644 src/buxton/source/Extra/Bill_Notes_Casio_Mini.docx create mode 100644 src/buxton/source/Extra/Bill_Notes_FingerWorks_Prototype.docx create mode 100644 src/buxton/source/Extra/Bill_Notes_Fingerworks_TouchStream.docx create mode 100644 src/buxton/source/Extra/Bill_Notes_FrogPad.docx create mode 100644 src/buxton/source/Extra/Bill_Notes_Gavilan_SC.docx create mode 100644 src/buxton/source/Extra/Bill_Notes_Grandjean_Stenotype.docx create mode 100644 src/buxton/source/Extra/Bill_Notes_Matias.docx create mode 100644 src/buxton/source/Extra/Bill_Notes_MousePen.docx create mode 100644 src/buxton/source/Extra/Bill_Notes_NewO.docx create mode 100644 src/buxton/source/Extra/Bill_Notes_OLPC.docx create mode 100644 src/buxton/source/Extra/Bill_Notes_PARCkbd.docx create mode 100644 src/buxton/source/Extra/Bill_Notes_Philco_Mystery_Control.docx create mode 100644 src/buxton/source/Extra/Bill_Notes_TASA_Kbd.docx create mode 100644 src/buxton/source/Extra/Bill_Notes_The_Tap.docx (limited to 'src') diff --git a/src/buxton/scraper.py b/src/buxton/scraper.py index a3bbc75ac..c7efd8f09 100644 --- a/src/buxton/scraper.py +++ b/src/buxton/scraper.py @@ -78,7 +78,7 @@ def write_image(folder, name): }, "data": { "url": path, - "type": "image" + "__type": "image" }, "title": name, "nativeWidth": native_width, @@ -105,9 +105,10 @@ def write_image(folder, name): def parse_document(file_name: str): print(f"Parsing {file_name}...") - result = {} pure_name = file_name.split(".")[0] + result = {} + dir_path = dist + "/" + pure_name mkdir_if_absent(dir_path) @@ -116,6 +117,8 @@ def parse_document(file_name: str): print("Extracting images...\n") for image in os.listdir(dir_path): view_doc_guids.append(write_image(pure_name, image)) + os.rename(dir_path + "/" + image, dir_path + + "/" + image.replace(".", "_m.", 1)) print() def sanitize(line): return re.sub("[\n\t]+", "", line).replace(u"\u00A0", " ").replace( @@ -234,7 +237,7 @@ for doc in mongofied: db.newDocuments.insert_one(doc) proxified = list( - map(lambda guid: {"fieldId": guid, "type": "proxy"}, view_doc_guids)) + map(lambda guid: {"fieldId": guid, "__type": "proxy"}, view_doc_guids)) db.newDocuments.update_one( {"fields.title": "WS collection 1"}, {"$push": {"fields.data.fields": {"$each": proxified}}} diff --git a/src/buxton/source/Bill_Notes_Braun_T3.docx b/src/buxton/source/Bill_Notes_Braun_T3.docx deleted file mode 100644 index 356697092..000000000 Binary files a/src/buxton/source/Bill_Notes_Braun_T3.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_CasioC801.docx b/src/buxton/source/Bill_Notes_CasioC801.docx deleted file mode 100644 index cd89fb97b..000000000 Binary files a/src/buxton/source/Bill_Notes_CasioC801.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_Casio_Mini.docx b/src/buxton/source/Bill_Notes_Casio_Mini.docx deleted file mode 100644 index a503cddfc..000000000 Binary files a/src/buxton/source/Bill_Notes_Casio_Mini.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_FingerWorks_Prototype.docx b/src/buxton/source/Bill_Notes_FingerWorks_Prototype.docx deleted file mode 100644 index 4d13a8cf5..000000000 Binary files a/src/buxton/source/Bill_Notes_FingerWorks_Prototype.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_Fingerworks_TouchStream.docx b/src/buxton/source/Bill_Notes_Fingerworks_TouchStream.docx deleted file mode 100644 index 578a1be08..000000000 Binary files a/src/buxton/source/Bill_Notes_Fingerworks_TouchStream.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_FrogPad.docx b/src/buxton/source/Bill_Notes_FrogPad.docx deleted file mode 100644 index d01e1bf5c..000000000 Binary files a/src/buxton/source/Bill_Notes_FrogPad.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_Gavilan_SC.docx b/src/buxton/source/Bill_Notes_Gavilan_SC.docx deleted file mode 100644 index 7bd28b376..000000000 Binary files a/src/buxton/source/Bill_Notes_Gavilan_SC.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_Grandjean_Stenotype.docx b/src/buxton/source/Bill_Notes_Grandjean_Stenotype.docx deleted file mode 100644 index 0615c4953..000000000 Binary files a/src/buxton/source/Bill_Notes_Grandjean_Stenotype.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_Matias.docx b/src/buxton/source/Bill_Notes_Matias.docx deleted file mode 100644 index 547603256..000000000 Binary files a/src/buxton/source/Bill_Notes_Matias.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_MousePen.docx b/src/buxton/source/Bill_Notes_MousePen.docx deleted file mode 100644 index 4e1056636..000000000 Binary files a/src/buxton/source/Bill_Notes_MousePen.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_NewO.docx b/src/buxton/source/Bill_Notes_NewO.docx deleted file mode 100644 index a514926d2..000000000 Binary files a/src/buxton/source/Bill_Notes_NewO.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_OLPC.docx b/src/buxton/source/Bill_Notes_OLPC.docx deleted file mode 100644 index bfca0a9bb..000000000 Binary files a/src/buxton/source/Bill_Notes_OLPC.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_PARCkbd.docx b/src/buxton/source/Bill_Notes_PARCkbd.docx deleted file mode 100644 index c0cf6ba9a..000000000 Binary files a/src/buxton/source/Bill_Notes_PARCkbd.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_Philco_Mystery_Control.docx b/src/buxton/source/Bill_Notes_Philco_Mystery_Control.docx deleted file mode 100644 index ad06903f3..000000000 Binary files a/src/buxton/source/Bill_Notes_Philco_Mystery_Control.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_TASA_Kbd.docx b/src/buxton/source/Bill_Notes_TASA_Kbd.docx deleted file mode 100644 index e4c659de9..000000000 Binary files a/src/buxton/source/Bill_Notes_TASA_Kbd.docx and /dev/null differ diff --git a/src/buxton/source/Bill_Notes_The_Tap.docx b/src/buxton/source/Bill_Notes_The_Tap.docx deleted file mode 100644 index 8ceebc71e..000000000 Binary files a/src/buxton/source/Bill_Notes_The_Tap.docx and /dev/null differ diff --git a/src/buxton/source/Extra/Bill_Notes_Braun_T3.docx b/src/buxton/source/Extra/Bill_Notes_Braun_T3.docx new file mode 100644 index 000000000..356697092 Binary files /dev/null and b/src/buxton/source/Extra/Bill_Notes_Braun_T3.docx differ diff --git a/src/buxton/source/Extra/Bill_Notes_CasioC801.docx b/src/buxton/source/Extra/Bill_Notes_CasioC801.docx new file mode 100644 index 000000000..cd89fb97b Binary files /dev/null and b/src/buxton/source/Extra/Bill_Notes_CasioC801.docx differ diff --git a/src/buxton/source/Extra/Bill_Notes_Casio_Mini.docx b/src/buxton/source/Extra/Bill_Notes_Casio_Mini.docx new file mode 100644 index 000000000..a503cddfc Binary files /dev/null and b/src/buxton/source/Extra/Bill_Notes_Casio_Mini.docx differ diff --git a/src/buxton/source/Extra/Bill_Notes_FingerWorks_Prototype.docx b/src/buxton/source/Extra/Bill_Notes_FingerWorks_Prototype.docx new file mode 100644 index 000000000..4d13a8cf5 Binary files /dev/null and b/src/buxton/source/Extra/Bill_Notes_FingerWorks_Prototype.docx differ diff --git a/src/buxton/source/Extra/Bill_Notes_Fingerworks_TouchStream.docx b/src/buxton/source/Extra/Bill_Notes_Fingerworks_TouchStream.docx new file mode 100644 index 000000000..578a1be08 Binary files /dev/null and b/src/buxton/source/Extra/Bill_Notes_Fingerworks_TouchStream.docx differ diff --git a/src/buxton/source/Extra/Bill_Notes_FrogPad.docx b/src/buxton/source/Extra/Bill_Notes_FrogPad.docx new file mode 100644 index 000000000..d01e1bf5c Binary files /dev/null and b/src/buxton/source/Extra/Bill_Notes_FrogPad.docx differ diff --git a/src/buxton/source/Extra/Bill_Notes_Gavilan_SC.docx b/src/buxton/source/Extra/Bill_Notes_Gavilan_SC.docx new file mode 100644 index 000000000..7bd28b376 Binary files /dev/null and b/src/buxton/source/Extra/Bill_Notes_Gavilan_SC.docx differ diff --git a/src/buxton/source/Extra/Bill_Notes_Grandjean_Stenotype.docx b/src/buxton/source/Extra/Bill_Notes_Grandjean_Stenotype.docx new file mode 100644 index 000000000..0615c4953 Binary files /dev/null and b/src/buxton/source/Extra/Bill_Notes_Grandjean_Stenotype.docx differ diff --git a/src/buxton/source/Extra/Bill_Notes_Matias.docx b/src/buxton/source/Extra/Bill_Notes_Matias.docx new file mode 100644 index 000000000..547603256 Binary files /dev/null and b/src/buxton/source/Extra/Bill_Notes_Matias.docx differ diff --git a/src/buxton/source/Extra/Bill_Notes_MousePen.docx b/src/buxton/source/Extra/Bill_Notes_MousePen.docx new file mode 100644 index 000000000..4e1056636 Binary files /dev/null and b/src/buxton/source/Extra/Bill_Notes_MousePen.docx differ diff --git a/src/buxton/source/Extra/Bill_Notes_NewO.docx b/src/buxton/source/Extra/Bill_Notes_NewO.docx new file mode 100644 index 000000000..a514926d2 Binary files /dev/null and b/src/buxton/source/Extra/Bill_Notes_NewO.docx differ diff --git a/src/buxton/source/Extra/Bill_Notes_OLPC.docx b/src/buxton/source/Extra/Bill_Notes_OLPC.docx new file mode 100644 index 000000000..bfca0a9bb Binary files /dev/null and b/src/buxton/source/Extra/Bill_Notes_OLPC.docx differ diff --git a/src/buxton/source/Extra/Bill_Notes_PARCkbd.docx b/src/buxton/source/Extra/Bill_Notes_PARCkbd.docx new file mode 100644 index 000000000..c0cf6ba9a Binary files /dev/null and b/src/buxton/source/Extra/Bill_Notes_PARCkbd.docx differ diff --git a/src/buxton/source/Extra/Bill_Notes_Philco_Mystery_Control.docx b/src/buxton/source/Extra/Bill_Notes_Philco_Mystery_Control.docx new file mode 100644 index 000000000..ad06903f3 Binary files /dev/null and b/src/buxton/source/Extra/Bill_Notes_Philco_Mystery_Control.docx differ diff --git a/src/buxton/source/Extra/Bill_Notes_TASA_Kbd.docx b/src/buxton/source/Extra/Bill_Notes_TASA_Kbd.docx new file mode 100644 index 000000000..e4c659de9 Binary files /dev/null and b/src/buxton/source/Extra/Bill_Notes_TASA_Kbd.docx differ diff --git a/src/buxton/source/Extra/Bill_Notes_The_Tap.docx b/src/buxton/source/Extra/Bill_Notes_The_Tap.docx new file mode 100644 index 000000000..8ceebc71e Binary files /dev/null and b/src/buxton/source/Extra/Bill_Notes_The_Tap.docx differ -- cgit v1.2.3-70-g09d2