diff options
author | Bob Zeleznik <zzzman@gmail.com> | 2020-02-08 13:48:11 -0500 |
---|---|---|
committer | Bob Zeleznik <zzzman@gmail.com> | 2020-02-08 13:48:11 -0500 |
commit | 90d7fb57a64011763ad1d608126eacb052061e43 (patch) | |
tree | fd96990ebd0ffe38f2285fbbceca942c1fb45587 /src/scraping/buxton/narratives.py | |
parent | e310c0fdcef6ac71ee492470d4ac689cbb094167 (diff) | |
parent | 1b046f76cf39f1f6cb1875aa84b45db74b6d994e (diff) |
Merge branch 'master' into monika_animation
Diffstat (limited to 'src/scraping/buxton/narratives.py')
-rw-r--r-- | src/scraping/buxton/narratives.py | 38 |
1 files changed, 38 insertions, 0 deletions
diff --git a/src/scraping/buxton/narratives.py b/src/scraping/buxton/narratives.py new file mode 100644 index 000000000..947d60f91 --- /dev/null +++ b/src/scraping/buxton/narratives.py @@ -0,0 +1,38 @@ +from docx import Document +import tempfile +from zipfile import ZipFile +import shutil +from pathlib import Path +from os import mkdir + +path = "./narratives/Theme - Chord Kbds.docx" +doc = Document(path) + +# IMAGE_EXT = ('png', 'jpeg', 'jpg') +# +# with tempfile.TemporaryDirectory() as working_dir: +# with ZipFile(path) as working_zip: +# image_list = [name for name in working_zip.namelist() if any(name.endswith(ext) for ext in IMAGE_EXT)] +# working_zip.extractall(working_dir, image_list) +# mkdir("./test") +# for image in image_list: +# shutil.copy(Path(working_dir).resolve() / image, "./test") + +paragraphs = doc.paragraphs +for i in range(len(paragraphs)): + print(f"{i}: {paragraphs[i].text}") + +# for section in doc.sections: +# print(section.orientation) + +# for shape in doc.inline_shapes: +# print(shape._inline) + +# images = doc.tables[0] +# for row in images.rows: +# contents = [] +# for cell in row.cells: +# contents.append(cell.text) + # print(contents) + + |