diff options
author | yipstanley <stanley_yip@brown.edu> | 2020-02-29 14:18:43 -0500 |
---|---|---|
committer | yipstanley <stanley_yip@brown.edu> | 2020-02-29 14:18:43 -0500 |
commit | 2f6e27c67d1790d4350eede3003f0b614460f4d1 (patch) | |
tree | ef5e70925b8cdeb8229af849e33e6f3a4cceae7f /src/scraping/buxton/narratives.py | |
parent | f1fcbeea5fb103b7623e795e72aacd4dfacc6c70 (diff) | |
parent | 640f14da28d97600fb32d09023fc932e3a4052c4 (diff) |
Merge branch 'master' of https://github.com/browngraphicslab/Dash-Web into pen
Diffstat (limited to 'src/scraping/buxton/narratives.py')
-rw-r--r-- | src/scraping/buxton/narratives.py | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/src/scraping/buxton/narratives.py b/src/scraping/buxton/narratives.py new file mode 100644 index 000000000..947d60f91 --- /dev/null +++ b/src/scraping/buxton/narratives.py @@ -0,0 +1,38 @@ +from docx import Document +import tempfile +from zipfile import ZipFile +import shutil +from pathlib import Path +from os import mkdir + +path = "./narratives/Theme - Chord Kbds.docx" +doc = Document(path) + +# IMAGE_EXT = ('png', 'jpeg', 'jpg') +# +# with tempfile.TemporaryDirectory() as working_dir: +# with ZipFile(path) as working_zip: +# image_list = [name for name in working_zip.namelist() if any(name.endswith(ext) for ext in IMAGE_EXT)] +# working_zip.extractall(working_dir, image_list) +# mkdir("./test") +# for image in image_list: +# shutil.copy(Path(working_dir).resolve() / image, "./test") + +paragraphs = doc.paragraphs +for i in range(len(paragraphs)): + print(f"{i}: {paragraphs[i].text}") + +# for section in doc.sections: +# print(section.orientation) + +# for shape in doc.inline_shapes: +# print(shape._inline) + +# images = doc.tables[0] +# for row in images.rows: +# contents = [] +# for cell in row.cells: +# contents.append(cell.text) + # print(contents) + + |