diff options
| author | Sam Wilkins <samwilkins333@gmail.com> | 2019-08-06 22:03:59 -0400 |
|---|---|---|
| committer | Sam Wilkins <samwilkins333@gmail.com> | 2019-08-06 22:03:59 -0400 |
| commit | d8c065935a0cb5a89b6d76407b7729f8584424eb (patch) | |
| tree | 89a470fc58ce4ebb31f4c8e07db81559e97abdd7 /src/scraping | |
| parent | 572c4196e0f41ec6bae8cae403812f9b97d5a3c7 (diff) | |
pivot view
Diffstat (limited to 'src/scraping')
| -rw-r--r-- | src/scraping/buxton/scraper.py | 15 | ||||
| -rw-r--r-- | src/scraping/buxton/scripts/initialization.txt | 46 | ||||
| -rw-r--r-- | src/scraping/buxton/scripts/layout.txt | 1 |
3 files changed, 55 insertions, 7 deletions
diff --git a/src/scraping/buxton/scraper.py b/src/scraping/buxton/scraper.py index f0f45d8f9..1c19118fd 100644 --- a/src/scraping/buxton/scraper.py +++ b/src/scraping/buxton/scraper.py @@ -17,6 +17,7 @@ dist = "../../server/public/files" db = MongoClient("localhost", 27017)["Dash"] target_collection = db.newDocuments +target_doc_title = "Workspace 1" schema_guids = [] common_proto_id = "" @@ -69,7 +70,7 @@ def text_doc_map(string_list): return listify(proxify_guids(list(map(guid_map, string_list)))) -def write_schema(parse_results, display_fields, storage_key): +def write_collection(parse_results, display_fields, storage_key, viewType=2): view_guids = parse_results["child_guids"] data_doc = parse_results["schema"] @@ -90,7 +91,7 @@ def write_schema(parse_results, display_fields, storage_key): "zoomBasis": 1, "zIndex": 2, "libraryBrush": False, - "viewType": 2 + "viewType": viewType }, "__type": "Doc" } @@ -237,7 +238,7 @@ def parse_document(file_name: str): copyfile(dir_path + "/" + image, dir_path + "/" + image.replace(".", "_o.", 1)) copyfile(dir_path + "/" + image, dir_path + - "/" + image.replace(".", "_m.", 1)) + "/" + image.replace(".", "_m.", 1)) print(f"extracted {count} images...") def sanitize(line): return re.sub("[\n\t]+", "", line).replace(u"\u00A0", " ").replace( @@ -381,22 +382,22 @@ candidates = 0 for file_name in os.listdir(source): if file_name.endswith('.docx'): candidates += 1 - schema_guids.append(write_schema( + schema_guids.append(write_collection( parse_document(file_name), ["title", "data"], "image_data")) print("writing parent schema...") -parent_guid = write_schema({ +parent_guid = write_collection({ "schema": { "_id": guid(), "fields": {}, "__type": "Doc" }, "child_guids": schema_guids -}, ["title", "short_description", "original_price"], "data") +}, ["title", "short_description", "original_price"], "data", 1) print("appending parent schema to main workspace...\n") target_collection.update_one( - {"fields.title": "WS collection 1"}, + {"fields.title": target_doc_title}, {"$push": {"fields.data.fields": {"fieldId": parent_guid, "__type": "proxy"}}} ) diff --git a/src/scraping/buxton/scripts/initialization.txt b/src/scraping/buxton/scripts/initialization.txt new file mode 100644 index 000000000..53f3f0d53 --- /dev/null +++ b/src/scraping/buxton/scripts/initialization.txt @@ -0,0 +1,46 @@ +const field = collection.pivotField || "title"; +const width = collection.pivotWidth || 200; + +const groups = new Map; + +for (const doc of docs) { + const val = doc[field]; + if (val === undefined) continue; + + const l = groups.get(val); + if (l) { + l.push(doc); + } else { + groups.set(val, [doc]); + } + +} + +let minSize = Infinity; + +groups.forEach((val, key) => { + minSize = Math.min(minSize, val.length); +}); + +const numCols = collection.pivotNumColumns || Math.ceil(Math.sqrt(minSize)); + +const docMap = new Map; +const groupNames = []; + +let x = 0; +groups.forEach((val, key) => { + let y = 0; + let xCount = 0; + groupNames.push({type:"text", text:String(key), x, y:width + 50, width: width * 1.25 * numCols, height:100, fontSize:collection.pivotFontSize}); + for (const doc of val) { + docMap.set(doc, {x: x + xCount * width * 1.25, y:-y, width, height:width}); + xCount++; + if (xCount >= numCols) { + xCount = 0; + y += width * 1.25; + } + } + x += width * 1.25 * (numCols + 1); +}); + +return {state:{ map: docMap}, views:groupNames };
\ No newline at end of file diff --git a/src/scraping/buxton/scripts/layout.txt b/src/scraping/buxton/scripts/layout.txt new file mode 100644 index 000000000..46b6dbaac --- /dev/null +++ b/src/scraping/buxton/scripts/layout.txt @@ -0,0 +1 @@ +return state.map.get(doc)
\ No newline at end of file |
