From 76ca4c61c5bf68649b889ed6ac8b5f6dc5b16e0b Mon Sep 17 00:00:00 2001 From: Sam Wilkins Date: Sat, 18 Jan 2020 23:25:22 -0500 Subject: added initial DocumentFromField implementation --- src/scraping/buxton/scraper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/scraping/buxton/scraper.py') diff --git a/src/scraping/buxton/scraper.py b/src/scraping/buxton/scraper.py index a9256073b..4c79af437 100644 --- a/src/scraping/buxton/scraper.py +++ b/src/scraping/buxton/scraper.py @@ -17,7 +17,7 @@ dist = "../../server/public/files" db = MongoClient("localhost", 27017)["Dash"] target_collection = db.newDocuments -target_doc_title = "Workspace 1" +target_doc_title = "Collection 1" schema_guids = [] common_proto_id = "" -- cgit v1.2.3-70-g09d2 From fb25212fa60c55b39386771994b70884773ce412 Mon Sep 17 00:00:00 2001 From: Sam Wilkins Date: Sun, 19 Jan 2020 13:55:52 -0500 Subject: fixed python scraper script --- src/scraping/buxton/scraper.py | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'src/scraping/buxton/scraper.py') diff --git a/src/scraping/buxton/scraper.py b/src/scraping/buxton/scraper.py index 4c79af437..90205c40b 100644 --- a/src/scraping/buxton/scraper.py +++ b/src/scraping/buxton/scraper.py @@ -13,7 +13,8 @@ import math import sys source = "./source" -dist = "../../server/public/files" +filesPath = "../../server/public/files" +image_dist = filesPath + "/images/buxton" db = MongoClient("localhost", 27017)["Dash"] target_collection = db.newDocuments @@ -167,12 +168,12 @@ def write_text_doc(content): def write_image(folder, name): - path = f"http://localhost:1050/files/{folder}/{name}" + path = f"http://localhost:1050/files/images/buxton/{folder}/{name}" data_doc_guid = guid() view_doc_guid = guid() - image = Image.open(f"{dist}/{folder}/{name}") + image = Image.open(f"{image_dist}/{folder}/{name}") native_width, native_height = image.size view_doc = { @@ -222,7 +223,8 @@ def parse_document(file_name: str): result = {} - dir_path = dist + "/" + pure_name + dir_path = image_dist + "/" + pure_name + print(dir_path) mkdir_if_absent(dir_path) raw = str(docx2txt.process(source + "/" + file_name, dir_path)) @@ -232,10 +234,11 @@ def parse_document(file_name: str): for image in os.listdir(dir_path): count += 1 view_guids.append(write_image(pure_name, image)) - copyfile(dir_path + "/" + image, dir_path + - "/" + image.replace(".", "_o.", 1)) - copyfile(dir_path + "/" + image, dir_path + - "/" + image.replace(".", "_m.", 1)) + resolved = dir_path + "/" + image + original = dir_path + "/" + image.replace(".", "_o.", 1) + medium = dir_path + "/" + image.replace(".", "_m.", 1) + copyfile(resolved, original) + copyfile(resolved, medium) print(f"extracted {count} images...") def sanitize(line): return re.sub("[\n\t]+", "", line).replace(u"\u00A0", " ").replace( @@ -366,11 +369,11 @@ def write_common_proto(): return id -if os.path.exists(dist): - shutil.rmtree(dist) -while os.path.exists(dist): +if os.path.exists(image_dist): + shutil.rmtree(image_dist) +while os.path.exists(image_dist): pass -os.mkdir(dist) +os.mkdir(image_dist) mkdir_if_absent(source) common_proto_id = write_common_proto() @@ -400,7 +403,7 @@ target_collection.update_one( print("rewriting .gitignore...\n") lines = ['*', '!.gitignore'] -with open(dist + "/.gitignore", 'w') as f: +with open(filesPath + "/.gitignore", 'w') as f: f.write('\n'.join(lines)) suffix = "" if candidates == 1 else "s" -- cgit v1.2.3-70-g09d2 From d281270053e2c0edb2cb761dd1cbce6306369863 Mon Sep 17 00:00:00 2001 From: Sam Wilkins Date: Sun, 19 Jan 2020 23:07:08 -0500 Subject: further adjustments to import script --- src/client/views/nodes/FieldView.tsx | 2 +- src/scraping/buxton/scraper.py | 44 ++++++++++++++++++++++++------------ 2 files changed, 30 insertions(+), 16 deletions(-) (limited to 'src/scraping/buxton/scraper.py') diff --git a/src/client/views/nodes/FieldView.tsx b/src/client/views/nodes/FieldView.tsx index c56fde186..6e6ee1712 100644 --- a/src/client/views/nodes/FieldView.tsx +++ b/src/client/views/nodes/FieldView.tsx @@ -54,7 +54,7 @@ export interface FieldViewProps { @observer export class FieldView extends React.Component { public static LayoutString(fieldType: { name: string }, fieldStr: string) { - return `<${fieldType.name} {...props} fieldKey={'${fieldStr}'}/>`; //e.g., "" + return `<${fieldType.name} {...props} fieldKey={'${fieldStr}'}/>`; //e.g., "" } @computed diff --git a/src/scraping/buxton/scraper.py b/src/scraping/buxton/scraper.py index 90205c40b..15b5844f7 100644 --- a/src/scraping/buxton/scraper.py +++ b/src/scraping/buxton/scraper.py @@ -71,7 +71,7 @@ def text_doc_map(string_list): return listify(proxify_guids(list(map(guid_map, string_list)))) -def write_collection(parse_results, display_fields, storage_key, viewType=2): +def write_collection(parse_results, display_fields, storage_key, viewType): view_guids = parse_results["child_guids"] data_doc = parse_results["schema"] @@ -107,6 +107,11 @@ def write_collection(parse_results, display_fields, storage_key, viewType=2): "date": datetime.datetime.utcnow().microsecond, "__type": "date" } + if "image_urls" in parse_results: + fields["hero"] = { + "url": parse_results["image_urls"][0], + "__type": "image" + } fields["isPrototype"] = True fields["page"] = -1 @@ -176,6 +181,9 @@ def write_image(folder, name): image = Image.open(f"{image_dist}/{folder}/{name}") native_width, native_height = image.size + if abs(native_width - native_height) < 10: + return None + view_doc = { "_id": view_doc_guid, "fields": { @@ -214,7 +222,10 @@ def write_image(folder, name): target_collection.insert_one(view_doc) target_collection.insert_one(data_doc) - return view_doc_guid + return { + "layout_id": view_doc_guid, + "url": path + } def parse_document(file_name: str): @@ -229,16 +240,20 @@ def parse_document(file_name: str): raw = str(docx2txt.process(source + "/" + file_name, dir_path)) + urls = [] view_guids = [] count = 0 for image in os.listdir(dir_path): - count += 1 - view_guids.append(write_image(pure_name, image)) - resolved = dir_path + "/" + image - original = dir_path + "/" + image.replace(".", "_o.", 1) - medium = dir_path + "/" + image.replace(".", "_m.", 1) - copyfile(resolved, original) - copyfile(resolved, medium) + created = write_image(pure_name, image) + if created != None: + urls.append(created["url"]) + view_guids.append(created["layout_id"]) + count += 1 + resolved = dir_path + "/" + image + original = dir_path + "/" + image.replace(".", "_o.", 1) + medium = dir_path + "/" + image.replace(".", "_m.", 1) + copyfile(resolved, original) + copyfile(resolved, medium) print(f"extracted {count} images...") def sanitize(line): return re.sub("[\n\t]+", "", line).replace(u"\u00A0", " ").replace( @@ -345,7 +360,8 @@ def parse_document(file_name: str): "fields": result, "__type": "Doc" }, - "child_guids": view_guids + "child_guids": view_guids, + "image_urls": urls } @@ -359,13 +375,11 @@ def write_common_proto(): "_id": id, "fields": { "proto": protofy("collectionProto"), - "title": "Common Import Proto", + "title": "The Buxton Collection", }, "__type": "Doc" } - target_collection.insert_one(common_proto) - return id @@ -383,7 +397,7 @@ for file_name in os.listdir(source): if file_name.endswith('.docx'): candidates += 1 schema_guids.append(write_collection( - parse_document(file_name), ["title", "data"], "image_data")) + parse_document(file_name), ["title", "data"], "data", 5)) print("writing parent schema...") parent_guid = write_collection({ @@ -393,7 +407,7 @@ parent_guid = write_collection({ "__type": "Doc" }, "child_guids": schema_guids -}, ["title", "short_description", "original_price"], "data", 1) +}, ["title", "short_description", "original_price"], "data", 4) print("appending parent schema to main workspace...\n") target_collection.update_one( -- cgit v1.2.3-70-g09d2 From cdde5624bac14ba6e0520a2d30d8d3926f2cc27f Mon Sep 17 00:00:00 2001 From: Sam Wilkins Date: Mon, 20 Jan 2020 12:19:40 -0500 Subject: buxton layout improvements --- src/client/util/DropConverter.ts | 4 +-- .../views/collections/CollectionTreeView.tsx | 30 +++++++++++++--------- src/scraping/buxton/scraper.py | 2 +- 3 files changed, 21 insertions(+), 15 deletions(-) (limited to 'src/scraping/buxton/scraper.py') diff --git a/src/client/util/DropConverter.ts b/src/client/util/DropConverter.ts index 3e2cc6a2e..ff0e19347 100644 --- a/src/client/util/DropConverter.ts +++ b/src/client/util/DropConverter.ts @@ -7,7 +7,7 @@ import { StrCast } from "../../new_fields/Types"; import { Docs } from "../documents/Documents"; import { ScriptField } from "../../new_fields/ScriptField"; -export function makeTemplate(doc: Doc): boolean { +export function makeTemplate(doc: Doc, suppressTitle = false): boolean { const layoutDoc = doc.layout instanceof Doc && doc.layout.isTemplateField ? doc.layout : doc; const layout = StrCast(layoutDoc.layout).match(/fieldKey={'[^']*'}/)![0]; const fieldKey = layout.replace("fieldKey={'", "").replace(/'}$/, ""); @@ -16,7 +16,7 @@ export function makeTemplate(doc: Doc): boolean { docs.forEach(d => { if (!StrCast(d.title).startsWith("-")) { any = true; - Doc.MakeMetadataFieldTemplate(d, Doc.GetProto(layoutDoc)); + Doc.MakeMetadataFieldTemplate(d, Doc.GetProto(layoutDoc), suppressTitle); } else if (d.type === DocumentType.COL) { any = makeTemplate(d) || any; } diff --git a/src/client/views/collections/CollectionTreeView.tsx b/src/client/views/collections/CollectionTreeView.tsx index 0f61756f4..a48208bd9 100644 --- a/src/client/views/collections/CollectionTreeView.tsx +++ b/src/client/views/collections/CollectionTreeView.tsx @@ -633,27 +633,33 @@ export class CollectionTreeView extends CollectionSubView(Document) { } ContextMenu.Instance.addItem({ description: "Buxton Layout", icon: "eye", event: () => { - // const [first, second, third] = new Array(3).map(() => Docs.Create.MulticolumnDocument([], {})); - const year = Docs.Create.TextDocument({ title: "year" }); - const wrapper = Docs.Create.StackingDocument([year], { autoHeight: true, chromeStatus: "disabled" }); + const { TextDocument, ImageDocument } = Docs.Create; + const wrapper = Docs.Create.StackingDocument([ + ImageDocument("http://www.cs.brown.edu/~bcz/face.gif", { title: "hero" }), + TextDocument({ title: "year" }), + TextDocument({ title: "degrees_of_freedom" }), + TextDocument({ title: "company" }), + TextDocument({ title: "short_description" }), + ], { autoHeight: true, chromeStatus: "disabled" }); wrapper.disableLOD = true; - makeTemplate(wrapper); - delete Doc.GetProto(year).showTitle; - delete year.showTitle; - + makeTemplate(wrapper, true); const detailedLayout = Doc.MakeAlias(wrapper); const cardLayout = ImageBox.LayoutString("hero"); this.childLayoutPairs.forEach(({ layout }) => { - Doc.GetProto(layout).layout = cardLayout; - Doc.GetProto(layout).layout_detailed = detailedLayout; - // Doc.ApplyTemplateTo(wrapper, layout, "layout_detailed"); + const proto = Doc.GetProto(layout); + proto.layout = cardLayout; + proto.layout_detailed = detailedLayout; + layout.showTitle = "title"; + layout.showTitleHover = "titlehover"; }); - CollectionDockingView.AddRightSplit(wrapper, undefined); } }); const existingOnClick = ContextMenu.Instance.findByDescription("OnClick..."); const onClicks: ContextMenuProps[] = existingOnClick && "subitems" in existingOnClick ? existingOnClick.subitems : []; - onClicks.push({ description: "Edit onChecked Script", icon: "edit", event: (obj: any) => ScriptBox.EditButtonScript("On Checked Changed ...", this.props.Document, "onCheckedClick", obj.x, obj.y, { heading: "boolean", checked: "boolean" }) }); + onClicks.push({ + description: "Edit onChecked Script", icon: "edit", event: (obj: any) => ScriptBox.EditButtonScript("On Checked Changed ...", this.props.Document, + "onCheckedClick", obj.x, obj.y, { heading: "boolean", checked: "boolean", context: Doc.name }) + }); !existingOnClick && ContextMenu.Instance.addItem({ description: "OnClick...", subitems: onClicks, icon: "hand-point-right" }); } outerXf = () => Utils.GetScreenTransform(this._mainEle!); diff --git a/src/scraping/buxton/scraper.py b/src/scraping/buxton/scraper.py index 15b5844f7..998c7617d 100644 --- a/src/scraping/buxton/scraper.py +++ b/src/scraping/buxton/scraper.py @@ -100,9 +100,9 @@ def write_collection(parse_results, display_fields, storage_key, viewType): fields[storage_key] = listify(proxify_guids(view_guids)) fields["schemaColumns"] = listify(display_fields) fields["backgroundColor"] = "white" - fields["scale"] = 0.5 fields["viewType"] = 2 fields["author"] = "Bill Buxton" + fielsd["disableLOD"] = True, fields["creationDate"] = { "date": datetime.datetime.utcnow().microsecond, "__type": "date" -- cgit v1.2.3-70-g09d2 From e8865c49295cc931b97d92db56dc9627159b3550 Mon Sep 17 00:00:00 2001 From: Sam Wilkins Date: Mon, 20 Jan 2020 12:26:21 -0500 Subject: fixed typo --- src/scraping/buxton/scraper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/scraping/buxton/scraper.py') diff --git a/src/scraping/buxton/scraper.py b/src/scraping/buxton/scraper.py index 998c7617d..e077c89e0 100644 --- a/src/scraping/buxton/scraper.py +++ b/src/scraping/buxton/scraper.py @@ -102,7 +102,7 @@ def write_collection(parse_results, display_fields, storage_key, viewType): fields["backgroundColor"] = "white" fields["viewType"] = 2 fields["author"] = "Bill Buxton" - fielsd["disableLOD"] = True, + fields["disableLOD"] = True, fields["creationDate"] = { "date": datetime.datetime.utcnow().microsecond, "__type": "date" -- cgit v1.2.3-70-g09d2 From ba5d8f584c69c0cbbde20283d1a09e00faea8feb Mon Sep 17 00:00:00 2001 From: Sam Wilkins Date: Mon, 20 Jan 2020 12:39:53 -0500 Subject: scraper typo --- src/scraping/buxton/scraper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/scraping/buxton/scraper.py') diff --git a/src/scraping/buxton/scraper.py b/src/scraping/buxton/scraper.py index e077c89e0..2d1a5ca32 100644 --- a/src/scraping/buxton/scraper.py +++ b/src/scraping/buxton/scraper.py @@ -102,7 +102,7 @@ def write_collection(parse_results, display_fields, storage_key, viewType): fields["backgroundColor"] = "white" fields["viewType"] = 2 fields["author"] = "Bill Buxton" - fields["disableLOD"] = True, + fields["disableLOD"] = True fields["creationDate"] = { "date": datetime.datetime.utcnow().microsecond, "__type": "date" -- cgit v1.2.3-70-g09d2