diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/buxton/scraper.py | 113 | ||||
-rw-r--r-- | src/buxton/source/Bill_Notes_Braun_T3.docx (renamed from src/buxton/source/Extra/Bill_Notes_Braun_T3.docx) | bin | 1671968 -> 1671968 bytes | |||
-rw-r--r-- | src/buxton/source/Bill_Notes_CasioC801.docx (renamed from src/buxton/source/Extra/Bill_Notes_CasioC801.docx) | bin | 574664 -> 574664 bytes | |||
-rw-r--r-- | src/buxton/source/Bill_Notes_Casio_Mini.docx (renamed from src/buxton/source/Extra/Bill_Notes_Casio_Mini.docx) | bin | 581069 -> 581069 bytes | |||
-rw-r--r-- | src/buxton/source/Bill_Notes_FingerWorks_Prototype.docx (renamed from src/buxton/source/Extra/Bill_Notes_FingerWorks_Prototype.docx) | bin | 585090 -> 585090 bytes | |||
-rw-r--r-- | src/buxton/source/Bill_Notes_Fingerworks_TouchStream.docx (renamed from src/buxton/source/Extra/Bill_Notes_Fingerworks_TouchStream.docx) | bin | 1722555 -> 1722555 bytes | |||
-rw-r--r-- | src/buxton/source/Bill_Notes_FrogPad.docx (renamed from src/buxton/source/Extra/Bill_Notes_FrogPad.docx) | bin | 840173 -> 840173 bytes | |||
-rw-r--r-- | src/buxton/source/Bill_Notes_Gavilan_SC.docx (renamed from src/buxton/source/Extra/Bill_Notes_Gavilan_SC.docx) | bin | 1695290 -> 1695290 bytes | |||
-rw-r--r-- | src/buxton/source/Bill_Notes_Grandjean_Stenotype.docx (renamed from src/buxton/source/Extra/Bill_Notes_Grandjean_Stenotype.docx) | bin | 2094142 -> 2094142 bytes | |||
-rw-r--r-- | src/buxton/source/Bill_Notes_Matias.docx (renamed from src/buxton/source/Extra/Bill_Notes_Matias.docx) | bin | 590407 -> 590407 bytes | |||
-rw-r--r-- | src/buxton/source/Bill_Notes_MousePen.docx (renamed from src/buxton/source/Extra/Bill_Notes_MousePen.docx) | bin | 505322 -> 505322 bytes | |||
-rw-r--r-- | src/buxton/source/Bill_Notes_NewO.docx (renamed from src/buxton/source/Extra/Bill_Notes_NewO.docx) | bin | 2264571 -> 2264571 bytes | |||
-rw-r--r-- | src/buxton/source/Bill_Notes_OLPC.docx (renamed from src/buxton/source/Extra/Bill_Notes_OLPC.docx) | bin | 6883659 -> 6883659 bytes | |||
-rw-r--r-- | src/buxton/source/Bill_Notes_PARCkbd.docx (renamed from src/buxton/source/Extra/Bill_Notes_PARCkbd.docx) | bin | 631959 -> 631959 bytes | |||
-rw-r--r-- | src/buxton/source/Bill_Notes_Philco_Mystery_Control.docx (renamed from src/buxton/source/Extra/Bill_Notes_Philco_Mystery_Control.docx) | bin | 1994439 -> 1994439 bytes | |||
-rw-r--r-- | src/buxton/source/Bill_Notes_TASA_Kbd.docx (renamed from src/buxton/source/Extra/Bill_Notes_TASA_Kbd.docx) | bin | 461199 -> 461199 bytes | |||
-rw-r--r-- | src/buxton/source/Bill_Notes_The_Tap.docx (renamed from src/buxton/source/Extra/Bill_Notes_The_Tap.docx) | bin | 711321 -> 711321 bytes |
17 files changed, 80 insertions, 33 deletions
diff --git a/src/buxton/scraper.py b/src/buxton/scraper.py index c7efd8f09..c89961f54 100644 --- a/src/buxton/scraper.py +++ b/src/buxton/scraper.py @@ -13,7 +13,7 @@ source = "./source" dist = "../server/public/files" db = MongoClient("localhost", 27017)["Dash"] -view_doc_guids = [] +schema_guids = [] def extract_links(fileName): @@ -24,7 +24,7 @@ def extract_links(fileName): item = rels[rel] if item.reltype == RT.HYPERLINK and ".aspx" not in item._target: links.append(item._target) - return links + return listify(links) def extract_value(kv_string): @@ -44,6 +44,66 @@ def guid(): return str(uuid.uuid4()) +def listify(list): + return { + "fields": list, + "__type": "list" + } + + +def protofy(fieldId): + return { + "fieldId": fieldId, + "__type": "proxy" + } + + +def write_schema(parse_results): + view_guids = parse_results["view_guids"] + + data_doc = parse_results["schema"] + fields = data_doc["fields"] + + view_doc_guid = guid() + + view_doc = { + "_id": view_doc_guid, + "fields": { + "proto": protofy(data_doc["_id"]), + "x": 10, + "y": 10, + "width": 900, + "height": 600, + "panX": 0, + "panY": 0, + "zoomBasis": 0.5, + "zIndex": 2, + "libraryBrush": False, + "viewType": 2 + }, + "__type": "Doc" + } + + fields["proto"] = protofy("collectionProto") + fields["data"] = listify(proxify_guids(view_guids)) + fields["schemaColumns"] = listify(["title", "data"]) + fields["backgroundColor"] = "white" + fields["scale"] = 0.5 + fields["viewType"] = 2 + fields["author"] = "Bill Buxton" + fields["creationDate"] = { + "date": datetime.datetime.utcnow().microsecond, + "__type": "date" + } + fields["isPrototype"] = True + fields["page"] = -1 + + db.newDocuments.insert_one(data_doc) + db.newDocuments.insert_one(view_doc) + + return view_doc_guid + + def write_image(folder, name): path = f"http://localhost:1050/files/{folder}/{name}" @@ -53,10 +113,7 @@ def write_image(folder, name): view_doc = { "_id": view_doc_guid, "fields": { - "proto": { - "fieldId": data_doc_guid, - "__type": "proxy" - }, + "proto": protofy(data_doc_guid), "x": 10, "y": 10, "width": 300, @@ -72,10 +129,7 @@ def write_image(folder, name): data_doc = { "_id": data_doc_guid, "fields": { - "proto": { - "_id": "imageProto", - "__type": "proxy" - }, + "proto": protofy("imageProto"), "data": { "url": path, "__type": "image" @@ -115,8 +169,9 @@ def parse_document(file_name: str): raw = str(docx2txt.process(source + "/" + file_name, dir_path)) print("Extracting images...\n") + view_guids = [] for image in os.listdir(dir_path): - view_doc_guids.append(write_image(pure_name, image)) + view_guids.append(write_image(pure_name, image)) os.rename(dir_path + "/" + image, dir_path + "/" + image.replace(".", "_m.", 1)) print() @@ -178,7 +233,7 @@ def parse_document(file_name: str): while lines[cur] != "Image": link_descriptions.append(lines[cur].strip()) cur += 1 - result["link_descriptions"] = link_descriptions + result["link_descriptions"] = listify(link_descriptions) result["hyperlinks"] = extract_links(source + "/" + file_name) @@ -189,8 +244,8 @@ def parse_document(file_name: str): images.append(lines[cur]) captions.append(lines[cur + 1]) cur += 2 - result["images"] = images - result["captions"] = captions + result["images"] = listify(images) + result["captions"] = listify(captions) notes = [] if (cur < len(lines) and lines[cur] == "NOTES:"): @@ -199,24 +254,22 @@ def parse_document(file_name: str): notes.append(lines[cur]) cur += 1 if len(notes) > 0: - result["notes"] = notes + result["notes"] = listify(notes) print("...contents dictionary constructed.") - return result - - -def wrap(document): return { - "_id": guid(), - "fields": document, - "__type": "Doc" + "schema": { + "_id": guid(), + "fields": result, + "__type": "Doc" + }, + "view_guids": view_guids } -def upload(collection, mongofied): - for doc in mongofied: - collection.insert_one(doc) +def proxify_guids(guids): + return list(map(lambda guid: {"fieldId": guid, "__type": "proxy"}, guids)) if os.path.exists(dist): @@ -227,20 +280,14 @@ os.mkdir(dist) mkdir_if_absent(source) candidates = 0 -mongofied = [] for file_name in os.listdir(source): if file_name.endswith('.docx'): candidates += 1 - mongofied.append(wrap(parse_document(file_name))) - -for doc in mongofied: - db.newDocuments.insert_one(doc) + schema_guids.append(write_schema(parse_document(file_name))) -proxified = list( - map(lambda guid: {"fieldId": guid, "__type": "proxy"}, view_doc_guids)) db.newDocuments.update_one( {"fields.title": "WS collection 1"}, - {"$push": {"fields.data.fields": {"$each": proxified}}} + {"$push": {"fields.data.fields": {"$each": proxify_guids(schema_guids)}}} ) print("...dictionaries written to Dash Document.\n") diff --git a/src/buxton/source/Extra/Bill_Notes_Braun_T3.docx b/src/buxton/source/Bill_Notes_Braun_T3.docx Binary files differindex 356697092..356697092 100644 --- a/src/buxton/source/Extra/Bill_Notes_Braun_T3.docx +++ b/src/buxton/source/Bill_Notes_Braun_T3.docx diff --git a/src/buxton/source/Extra/Bill_Notes_CasioC801.docx b/src/buxton/source/Bill_Notes_CasioC801.docx Binary files differindex cd89fb97b..cd89fb97b 100644 --- a/src/buxton/source/Extra/Bill_Notes_CasioC801.docx +++ b/src/buxton/source/Bill_Notes_CasioC801.docx diff --git a/src/buxton/source/Extra/Bill_Notes_Casio_Mini.docx b/src/buxton/source/Bill_Notes_Casio_Mini.docx Binary files differindex a503cddfc..a503cddfc 100644 --- a/src/buxton/source/Extra/Bill_Notes_Casio_Mini.docx +++ b/src/buxton/source/Bill_Notes_Casio_Mini.docx diff --git a/src/buxton/source/Extra/Bill_Notes_FingerWorks_Prototype.docx b/src/buxton/source/Bill_Notes_FingerWorks_Prototype.docx Binary files differindex 4d13a8cf5..4d13a8cf5 100644 --- a/src/buxton/source/Extra/Bill_Notes_FingerWorks_Prototype.docx +++ b/src/buxton/source/Bill_Notes_FingerWorks_Prototype.docx diff --git a/src/buxton/source/Extra/Bill_Notes_Fingerworks_TouchStream.docx b/src/buxton/source/Bill_Notes_Fingerworks_TouchStream.docx Binary files differindex 578a1be08..578a1be08 100644 --- a/src/buxton/source/Extra/Bill_Notes_Fingerworks_TouchStream.docx +++ b/src/buxton/source/Bill_Notes_Fingerworks_TouchStream.docx diff --git a/src/buxton/source/Extra/Bill_Notes_FrogPad.docx b/src/buxton/source/Bill_Notes_FrogPad.docx Binary files differindex d01e1bf5c..d01e1bf5c 100644 --- a/src/buxton/source/Extra/Bill_Notes_FrogPad.docx +++ b/src/buxton/source/Bill_Notes_FrogPad.docx diff --git a/src/buxton/source/Extra/Bill_Notes_Gavilan_SC.docx b/src/buxton/source/Bill_Notes_Gavilan_SC.docx Binary files differindex 7bd28b376..7bd28b376 100644 --- a/src/buxton/source/Extra/Bill_Notes_Gavilan_SC.docx +++ b/src/buxton/source/Bill_Notes_Gavilan_SC.docx diff --git a/src/buxton/source/Extra/Bill_Notes_Grandjean_Stenotype.docx b/src/buxton/source/Bill_Notes_Grandjean_Stenotype.docx Binary files differindex 0615c4953..0615c4953 100644 --- a/src/buxton/source/Extra/Bill_Notes_Grandjean_Stenotype.docx +++ b/src/buxton/source/Bill_Notes_Grandjean_Stenotype.docx diff --git a/src/buxton/source/Extra/Bill_Notes_Matias.docx b/src/buxton/source/Bill_Notes_Matias.docx Binary files differindex 547603256..547603256 100644 --- a/src/buxton/source/Extra/Bill_Notes_Matias.docx +++ b/src/buxton/source/Bill_Notes_Matias.docx diff --git a/src/buxton/source/Extra/Bill_Notes_MousePen.docx b/src/buxton/source/Bill_Notes_MousePen.docx Binary files differindex 4e1056636..4e1056636 100644 --- a/src/buxton/source/Extra/Bill_Notes_MousePen.docx +++ b/src/buxton/source/Bill_Notes_MousePen.docx diff --git a/src/buxton/source/Extra/Bill_Notes_NewO.docx b/src/buxton/source/Bill_Notes_NewO.docx Binary files differindex a514926d2..a514926d2 100644 --- a/src/buxton/source/Extra/Bill_Notes_NewO.docx +++ b/src/buxton/source/Bill_Notes_NewO.docx diff --git a/src/buxton/source/Extra/Bill_Notes_OLPC.docx b/src/buxton/source/Bill_Notes_OLPC.docx Binary files differindex bfca0a9bb..bfca0a9bb 100644 --- a/src/buxton/source/Extra/Bill_Notes_OLPC.docx +++ b/src/buxton/source/Bill_Notes_OLPC.docx diff --git a/src/buxton/source/Extra/Bill_Notes_PARCkbd.docx b/src/buxton/source/Bill_Notes_PARCkbd.docx Binary files differindex c0cf6ba9a..c0cf6ba9a 100644 --- a/src/buxton/source/Extra/Bill_Notes_PARCkbd.docx +++ b/src/buxton/source/Bill_Notes_PARCkbd.docx diff --git a/src/buxton/source/Extra/Bill_Notes_Philco_Mystery_Control.docx b/src/buxton/source/Bill_Notes_Philco_Mystery_Control.docx Binary files differindex ad06903f3..ad06903f3 100644 --- a/src/buxton/source/Extra/Bill_Notes_Philco_Mystery_Control.docx +++ b/src/buxton/source/Bill_Notes_Philco_Mystery_Control.docx diff --git a/src/buxton/source/Extra/Bill_Notes_TASA_Kbd.docx b/src/buxton/source/Bill_Notes_TASA_Kbd.docx Binary files differindex e4c659de9..e4c659de9 100644 --- a/src/buxton/source/Extra/Bill_Notes_TASA_Kbd.docx +++ b/src/buxton/source/Bill_Notes_TASA_Kbd.docx diff --git a/src/buxton/source/Extra/Bill_Notes_The_Tap.docx b/src/buxton/source/Bill_Notes_The_Tap.docx Binary files differindex 8ceebc71e..8ceebc71e 100644 --- a/src/buxton/source/Extra/Bill_Notes_The_Tap.docx +++ b/src/buxton/source/Bill_Notes_The_Tap.docx |