From 09d7d63d1f248a0bf1d36e4da804cbde5e12e209 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Mon, 4 Nov 2024 13:26:27 -0500 Subject: fixing chunking and doc names --- src/server/chunker/pdf_chunker.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/server/chunker/pdf_chunker.py b/src/server/chunker/pdf_chunker.py index 130987343..48b2dbf97 100644 --- a/src/server/chunker/pdf_chunker.py +++ b/src/server/chunker/pdf_chunker.py @@ -116,10 +116,11 @@ class ElementExtractor: # Save the full page with the red outline table_filename = f"table_page{page_num + 1}_{idx + 1}.png" - file_path_for_client = f"{self.doc_id}/{table_filename}" table_path = os.path.join(self.output_folder, table_filename) page_with_outline.save(table_path) + file_path_for_client = f"{self.doc_id}/{table_filename}" + tables.append({ 'metadata': { "type": "table", @@ -173,10 +174,11 @@ class ElementExtractor: # Save the full page with the red outline image_filename = f"image_page{page_num + 1}_{img_index + 1}.png" - file_path_for_client = f"{self.doc_id}/{image_filename}" image_path = os.path.join(self.output_folder, image_filename) page_with_outline.save(image_path) + file_path_for_client = f"{self.doc_id}/{image_filename}" + images.append({ 'metadata': { "type": "image", -- cgit v1.2.3-70-g09d2