4 files changed, 99 insertions, 4 deletions
diff --git a/src/scraping/acm/.gitignore b/src/scraping/acm/.gitignore
new file mode 100644
index 000000000..caca8b99c
--- /dev/null
+++ b/src/scraping/acm/.gitignore
@@ -0,0 +1,2 @@
+./citations.txt
+./results.txt
+\ No newline at end of file
diff --git a/src/scraping/acm/debug.log b/src/scraping/acm/debug.log
new file mode 100644
index 000000000..8c0a148f4
--- /dev/null
+++ b/src/scraping/acm/debug.log
@@ -0,0 +1,38 @@
+[0625/170004.768:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0625/170004.769:ERROR:exception_snapshot_win.cc(98)] thread ID 17604 not found in process
+[0625/171124.644:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0625/171124.645:ERROR:exception_snapshot_win.cc(98)] thread ID 14348 not found in process
+[0625/171853.989:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0625/171853.990:ERROR:exception_snapshot_win.cc(98)] thread ID 12080 not found in process
+[0625/171947.744:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0625/171947.745:ERROR:exception_snapshot_win.cc(98)] thread ID 16160 not found in process
+[0625/172007.424:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0625/172007.425:ERROR:exception_snapshot_win.cc(98)] thread ID 13472 not found in process
+[0625/172059.353:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0625/172059.354:ERROR:exception_snapshot_win.cc(98)] thread ID 6396 not found in process
+[0625/172402.795:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0625/172402.796:ERROR:exception_snapshot_win.cc(98)] thread ID 10720 not found in process
+[0625/172618.850:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0625/172618.850:ERROR:exception_snapshot_win.cc(98)] thread ID 21136 not found in process
+[0625/172819.875:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0625/172819.876:ERROR:exception_snapshot_win.cc(98)] thread ID 17624 not found in process
+[0625/172953.674:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0625/172953.675:ERROR:exception_snapshot_win.cc(98)] thread ID 15180 not found in process
+[0625/173412.182:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0625/173412.182:ERROR:exception_snapshot_win.cc(98)] thread ID 13952 not found in process
+[0625/173447.806:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0625/173447.807:ERROR:exception_snapshot_win.cc(98)] thread ID 1572 not found in process
+[0625/173516.188:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0625/173516.189:ERROR:exception_snapshot_win.cc(98)] thread ID 5472 not found in process
+[0625/173528.446:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0625/173528.447:ERROR:exception_snapshot_win.cc(98)] thread ID 20420 not found in process
+[0625/173539.436:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0625/173539.437:ERROR:exception_snapshot_win.cc(98)] thread ID 16192 not found in process
+[0625/173643.139:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0625/173643.140:ERROR:exception_snapshot_win.cc(98)] thread ID 15716 not found in process
+[0625/173659.376:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0625/173659.377:ERROR:exception_snapshot_win.cc(98)] thread ID 11828 not found in process
+[0625/201137.209:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0625/201137.210:ERROR:exception_snapshot_win.cc(98)] thread ID 7688 not found in process
+[0625/210240.476:ERROR:process_reader_win.cc(123)] NtOpenThread: {Access Denied} A process has requested access to an object, but has not been granted those access rights. (0xc0000022)
+[0625/210240.477:ERROR:exception_snapshot_win.cc(98)] thread ID 20828 not found in process
diff --git a/src/scraping/acm/index.js b/src/scraping/acm/index.js
index 51781dba8..b71d55226 100644
--- a/src/scraping/acm/index.js
+++ b/src/scraping/acm/index.js
@@ -276,4 +276,4 @@ log_read("target references");
 
 readFile(target_source, {
     encoding: "utf8"
-}, scrape_targets);
-\ No newline at end of file
+}, scrape_targets);
diff --git a/src/scraping/buxton/scraper.py b/src/scraping/buxton/scraper.py
index 02c6d8b74..700269727 100644
--- a/src/scraping/buxton/scraper.py
+++ b/src/scraping/buxton/scraper.py
@@ -26,7 +26,7 @@ def extract_links(fileName):
         item = rels[rel]
         if item.reltype == RT.HYPERLINK and ".aspx" not in item._target:
             links.append(item._target)
-    return listify(links)
+    return text_doc_map(links)
 
 
 def extract_value(kv_string):
@@ -60,6 +60,12 @@ def protofy(fieldId):
     }
 
 
+def text_doc_map(string_list):
+    def guid_map(caption):
+        return write_text_doc(caption)
+    return listify(proxify_guids(list(map(guid_map, string_list))))
+
+
 def write_schema(parse_results, display_fields, storage_key):
     view_guids = parse_results["child_guids"]
 
@@ -110,6 +116,54 @@ def write_schema(parse_results, display_fields, storage_key):
     return view_doc_guid
 
 
+def write_text_doc(content):
+    data_doc_guid = guid()
+    view_doc_guid = guid()
+
+    view_doc = {
+        "_id": view_doc_guid,
+        "fields": {
+            "proto": protofy(data_doc_guid),
+            "x": 10,
+            "y": 10,
+            "width": 400,
+            "zIndex": 2,
+            "libraryBrush": False
+        },
+        "__type": "Doc"
+    }
+
+    data_doc = {
+        "_id": data_doc_guid,
+        "fields": {
+            "proto": protofy("textProto"),
+            "data": {
+                "Data": '{"doc":{"type":"doc","content":[{"type":"paragraph","content":[{"type":"text","text":"' + content + '"}]}]},"selection":{"type":"text","anchor":1,"head":1}' + '}',
+                "__type": "RichTextField"
+            },
+            "title": content,
+            "nativeWidth": 200,
+            "author": "Bill Buxton",
+            "creationDate": {
+                "date": datetime.datetime.utcnow().microsecond,
+                "__type": "date"
+            },
+            "isPrototype": True,
+            "autoHeight": True,
+            "page": -1,
+            "nativeHeight": 200,
+            "height": 200,
+            "data_text": content
+        },
+        "__type": "Doc"
+    }
+
+    db.newDocuments.insert_one(view_doc)
+    db.newDocuments.insert_one(data_doc)
+
+    return view_doc_guid
+
+
 def write_image(folder, name):
     path = f"http://localhost:1050/files/{folder}/{name}"
 
@@ -253,7 +307,7 @@ def parse_document(file_name: str):
     while lines[cur] != "Image":
         link_descriptions.append(lines[cur].strip())
         cur += 1
-    result["link_descriptions"] = listify(link_descriptions)
+    result["link_descriptions"] = text_doc_map(link_descriptions)
 
     result["hyperlinks"] = extract_links(source + "/" + file_name)
 
@@ -265,7 +319,8 @@ def parse_document(file_name: str):
         captions.append(lines[cur + 1])
         cur += 2
     result["images"] = listify(images)
-    result["captions"] = listify(captions)
+
+    result["captions"] = text_doc_map(captions)
 
     notes = []
     if (cur < len(lines) and lines[cur] == "NOTES:"):