aboutsummaryrefslogtreecommitdiff
path: root/src/scraping/buxton/scraper.py
diff options
context:
space:
mode:
authoryipstanley <stanley_yip@brown.edu>2019-07-27 20:12:10 -0400
committeryipstanley <stanley_yip@brown.edu>2019-07-27 20:12:10 -0400
commitf62c275090a0b227a5f77ed14e0a6c7d19d20052 (patch)
tree6e72aacb0ca2c2527c6dfc6e19900030eed57111 /src/scraping/buxton/scraper.py
parentd53ad748d90ca1c863a7ef52d0835573ce967a54 (diff)
parent5cbbd71ceb98a554109ac2a4469a02b28a836e61 (diff)
Merge branch 'master' of https://github.com/browngraphicslab/Dash-Web into pdf_paste_backlink
Diffstat (limited to 'src/scraping/buxton/scraper.py')
-rw-r--r--src/scraping/buxton/scraper.py3
1 files changed, 3 insertions, 0 deletions
diff --git a/src/scraping/buxton/scraper.py b/src/scraping/buxton/scraper.py
index 48b8fe3fa..182b22a1a 100644
--- a/src/scraping/buxton/scraper.py
+++ b/src/scraping/buxton/scraper.py
@@ -1,4 +1,5 @@
import os
+from shutil import copyfile
import docx2txt
from docx import Document
from docx.opc.constants import RELATIONSHIP_TYPE as RT
@@ -233,6 +234,8 @@ def parse_document(file_name: str):
for image in os.listdir(dir_path):
count += 1
view_guids.append(write_image(pure_name, image))
+ copyfile(dir_path + "/" + image, dir_path +
+ "/" + image.replace(".", "_o.", 1))
os.rename(dir_path + "/" + image, dir_path +
"/" + image.replace(".", "_m.", 1))
print(f"extracted {count} images...")