From c1377587f27044d89ec84befa9953de627d49873 Mon Sep 17 00:00:00 2001 From: Tyler Schicke Date: Sat, 27 Jul 2019 14:38:53 -0400 Subject: Fixed up buxton scraper to get fullscreen images and remove bad images --- src/scraping/buxton/scraper.py | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/scraping/buxton/scraper.py') diff --git a/src/scraping/buxton/scraper.py b/src/scraping/buxton/scraper.py index 48b8fe3fa..182b22a1a 100644 --- a/src/scraping/buxton/scraper.py +++ b/src/scraping/buxton/scraper.py @@ -1,4 +1,5 @@ import os +from shutil import copyfile import docx2txt from docx import Document from docx.opc.constants import RELATIONSHIP_TYPE as RT @@ -233,6 +234,8 @@ def parse_document(file_name: str): for image in os.listdir(dir_path): count += 1 view_guids.append(write_image(pure_name, image)) + copyfile(dir_path + "/" + image, dir_path + + "/" + image.replace(".", "_o.", 1)) os.rename(dir_path + "/" + image, dir_path + "/" + image.replace(".", "_m.", 1)) print(f"extracted {count} images...") -- cgit v1.2.3-70-g09d2