1 files changed, 58 insertions, 3 deletions
diff --git a/src/scraping/buxton/scraper.py b/src/scraping/buxton/scraper.py
index 02c6d8b74..700269727 100644
--- a/src/scraping/buxton/scraper.py
+++ b/src/scraping/buxton/scraper.py
@@ -26,7 +26,7 @@ def extract_links(fileName):
         item = rels[rel]
         if item.reltype == RT.HYPERLINK and ".aspx" not in item._target:
             links.append(item._target)
-    return listify(links)
+    return text_doc_map(links)
 
 
 def extract_value(kv_string):
@@ -60,6 +60,12 @@ def protofy(fieldId):
     }
 
 
+def text_doc_map(string_list):
+    def guid_map(caption):
+        return write_text_doc(caption)
+    return listify(proxify_guids(list(map(guid_map, string_list))))
+
+
 def write_schema(parse_results, display_fields, storage_key):
     view_guids = parse_results["child_guids"]
 
@@ -110,6 +116,54 @@ def write_schema(parse_results, display_fields, storage_key):
     return view_doc_guid
 
 
+def write_text_doc(content):
+    data_doc_guid = guid()
+    view_doc_guid = guid()
+
+    view_doc = {
+        "_id": view_doc_guid,
+        "fields": {
+            "proto": protofy(data_doc_guid),
+            "x": 10,
+            "y": 10,
+            "width": 400,
+            "zIndex": 2,
+            "libraryBrush": False
+        },
+        "__type": "Doc"
+    }
+
+    data_doc = {
+        "_id": data_doc_guid,
+        "fields": {
+            "proto": protofy("textProto"),
+            "data": {
+                "Data": '{"doc":{"type":"doc","content":[{"type":"paragraph","content":[{"type":"text","text":"' + content + '"}]}]},"selection":{"type":"text","anchor":1,"head":1}' + '}',
+                "__type": "RichTextField"
+            },
+            "title": content,
+            "nativeWidth": 200,
+            "author": "Bill Buxton",
+            "creationDate": {
+                "date": datetime.datetime.utcnow().microsecond,
+                "__type": "date"
+            },
+            "isPrototype": True,
+            "autoHeight": True,
+            "page": -1,
+            "nativeHeight": 200,
+            "height": 200,
+            "data_text": content
+        },
+        "__type": "Doc"
+    }
+
+    db.newDocuments.insert_one(view_doc)
+    db.newDocuments.insert_one(data_doc)
+
+    return view_doc_guid
+
+
 def write_image(folder, name):
     path = f"http://localhost:1050/files/{folder}/{name}"
 
@@ -253,7 +307,7 @@ def parse_document(file_name: str):
     while lines[cur] != "Image":
         link_descriptions.append(lines[cur].strip())
         cur += 1
-    result["link_descriptions"] = listify(link_descriptions)
+    result["link_descriptions"] = text_doc_map(link_descriptions)
 
     result["hyperlinks"] = extract_links(source + "/" + file_name)
 
@@ -265,7 +319,8 @@ def parse_document(file_name: str):
         captions.append(lines[cur + 1])
         cur += 2
     result["images"] = listify(images)
-    result["captions"] = listify(captions)
+
+    result["captions"] = text_doc_map(captions)
 
     notes = []
     if (cur < len(lines) and lines[cur] == "NOTES:"):