aboutsummaryrefslogtreecommitdiff
path: root/src/scraping/buxton/narratives.py
diff options
context:
space:
mode:
authorMohammad Amoush <47069173+mamoush34@users.noreply.github.com>2020-02-08 17:03:12 -0500
committerMohammad Amoush <47069173+mamoush34@users.noreply.github.com>2020-02-08 17:03:12 -0500
commitf9855e8d1ec83405ae3cc7d0113b46de63fc0848 (patch)
treebf4be61a021e59b771c1cd5958fd9fd43cac8693 /src/scraping/buxton/narratives.py
parent87f5f043388b591c52e96a795fa461a79770550d (diff)
parent1b046f76cf39f1f6cb1875aa84b45db74b6d994e (diff)
Merge branch 'master' of https://github.com/browngraphicslab/Dash-Web into webcam_mohammad
Diffstat (limited to 'src/scraping/buxton/narratives.py')
-rw-r--r--src/scraping/buxton/narratives.py38
1 files changed, 38 insertions, 0 deletions
diff --git a/src/scraping/buxton/narratives.py b/src/scraping/buxton/narratives.py
new file mode 100644
index 000000000..947d60f91
--- /dev/null
+++ b/src/scraping/buxton/narratives.py
@@ -0,0 +1,38 @@
+from docx import Document
+import tempfile
+from zipfile import ZipFile
+import shutil
+from pathlib import Path
+from os import mkdir
+
+path = "./narratives/Theme - Chord Kbds.docx"
+doc = Document(path)
+
+# IMAGE_EXT = ('png', 'jpeg', 'jpg')
+#
+# with tempfile.TemporaryDirectory() as working_dir:
+# with ZipFile(path) as working_zip:
+# image_list = [name for name in working_zip.namelist() if any(name.endswith(ext) for ext in IMAGE_EXT)]
+# working_zip.extractall(working_dir, image_list)
+# mkdir("./test")
+# for image in image_list:
+# shutil.copy(Path(working_dir).resolve() / image, "./test")
+
+paragraphs = doc.paragraphs
+for i in range(len(paragraphs)):
+ print(f"{i}: {paragraphs[i].text}")
+
+# for section in doc.sections:
+# print(section.orientation)
+
+# for shape in doc.inline_shapes:
+# print(shape._inline)
+
+# images = doc.tables[0]
+# for row in images.rows:
+# contents = []
+# for cell in row.cells:
+# contents.append(cell.text)
+ # print(contents)
+
+