diff options
author | Mohammad Amoush <47069173+mamoush34@users.noreply.github.com> | 2020-02-08 17:03:12 -0500 |
---|---|---|
committer | Mohammad Amoush <47069173+mamoush34@users.noreply.github.com> | 2020-02-08 17:03:12 -0500 |
commit | f9855e8d1ec83405ae3cc7d0113b46de63fc0848 (patch) | |
tree | bf4be61a021e59b771c1cd5958fd9fd43cac8693 /src/scraping/buxton/narratives.py | |
parent | 87f5f043388b591c52e96a795fa461a79770550d (diff) | |
parent | 1b046f76cf39f1f6cb1875aa84b45db74b6d994e (diff) |
Merge branch 'master' of https://github.com/browngraphicslab/Dash-Web into webcam_mohammad
Diffstat (limited to 'src/scraping/buxton/narratives.py')
-rw-r--r-- | src/scraping/buxton/narratives.py | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/src/scraping/buxton/narratives.py b/src/scraping/buxton/narratives.py new file mode 100644 index 000000000..947d60f91 --- /dev/null +++ b/src/scraping/buxton/narratives.py @@ -0,0 +1,38 @@ +from docx import Document +import tempfile +from zipfile import ZipFile +import shutil +from pathlib import Path +from os import mkdir + +path = "./narratives/Theme - Chord Kbds.docx" +doc = Document(path) + +# IMAGE_EXT = ('png', 'jpeg', 'jpg') +# +# with tempfile.TemporaryDirectory() as working_dir: +# with ZipFile(path) as working_zip: +# image_list = [name for name in working_zip.namelist() if any(name.endswith(ext) for ext in IMAGE_EXT)] +# working_zip.extractall(working_dir, image_list) +# mkdir("./test") +# for image in image_list: +# shutil.copy(Path(working_dir).resolve() / image, "./test") + +paragraphs = doc.paragraphs +for i in range(len(paragraphs)): + print(f"{i}: {paragraphs[i].text}") + +# for section in doc.sections: +# print(section.orientation) + +# for shape in doc.inline_shapes: +# print(shape._inline) + +# images = doc.tables[0] +# for row in images.rows: +# contents = [] +# for cell in row.cells: +# contents.append(cell.text) + # print(contents) + + |