aboutsummaryrefslogtreecommitdiff
path: root/src/scraping/buxton/narratives.py
diff options
context:
space:
mode:
authoryipstanley <stanley_yip@brown.edu>2020-02-29 14:18:43 -0500
committeryipstanley <stanley_yip@brown.edu>2020-02-29 14:18:43 -0500
commit2f6e27c67d1790d4350eede3003f0b614460f4d1 (patch)
treeef5e70925b8cdeb8229af849e33e6f3a4cceae7f /src/scraping/buxton/narratives.py
parentf1fcbeea5fb103b7623e795e72aacd4dfacc6c70 (diff)
parent640f14da28d97600fb32d09023fc932e3a4052c4 (diff)
Merge branch 'master' of https://github.com/browngraphicslab/Dash-Web into pen
Diffstat (limited to 'src/scraping/buxton/narratives.py')
-rw-r--r--src/scraping/buxton/narratives.py38
1 files changed, 38 insertions, 0 deletions
diff --git a/src/scraping/buxton/narratives.py b/src/scraping/buxton/narratives.py
new file mode 100644
index 000000000..947d60f91
--- /dev/null
+++ b/src/scraping/buxton/narratives.py
@@ -0,0 +1,38 @@
+from docx import Document
+import tempfile
+from zipfile import ZipFile
+import shutil
+from pathlib import Path
+from os import mkdir
+
+path = "./narratives/Theme - Chord Kbds.docx"
+doc = Document(path)
+
+# IMAGE_EXT = ('png', 'jpeg', 'jpg')
+#
+# with tempfile.TemporaryDirectory() as working_dir:
+# with ZipFile(path) as working_zip:
+# image_list = [name for name in working_zip.namelist() if any(name.endswith(ext) for ext in IMAGE_EXT)]
+# working_zip.extractall(working_dir, image_list)
+# mkdir("./test")
+# for image in image_list:
+# shutil.copy(Path(working_dir).resolve() / image, "./test")
+
+paragraphs = doc.paragraphs
+for i in range(len(paragraphs)):
+ print(f"{i}: {paragraphs[i].text}")
+
+# for section in doc.sections:
+# print(section.orientation)
+
+# for shape in doc.inline_shapes:
+# print(shape._inline)
+
+# images = doc.tables[0]
+# for row in images.rows:
+# contents = []
+# for cell in row.cells:
+# contents.append(cell.text)
+ # print(contents)
+
+