diff options
author | A.J. Shulman <Shulman.aj@gmail.com> | 2024-11-04 13:26:27 -0500 |
---|---|---|
committer | A.J. Shulman <Shulman.aj@gmail.com> | 2024-11-04 13:26:27 -0500 |
commit | 09d7d63d1f248a0bf1d36e4da804cbde5e12e209 (patch) | |
tree | d3a8043fe313819b59801f561f23776ff18df7fc | |
parent | 07516f420ab38fbc63d54f3421bf33a493037ae8 (diff) |
fixing chunking and doc names
-rw-r--r-- | src/server/chunker/pdf_chunker.py | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/src/server/chunker/pdf_chunker.py b/src/server/chunker/pdf_chunker.py index 130987343..48b2dbf97 100644 --- a/src/server/chunker/pdf_chunker.py +++ b/src/server/chunker/pdf_chunker.py @@ -116,10 +116,11 @@ class ElementExtractor: # Save the full page with the red outline table_filename = f"table_page{page_num + 1}_{idx + 1}.png" - file_path_for_client = f"{self.doc_id}/{table_filename}" table_path = os.path.join(self.output_folder, table_filename) page_with_outline.save(table_path) + file_path_for_client = f"{self.doc_id}/{table_filename}" + tables.append({ 'metadata': { "type": "table", @@ -173,10 +174,11 @@ class ElementExtractor: # Save the full page with the red outline image_filename = f"image_page{page_num + 1}_{img_index + 1}.png" - file_path_for_client = f"{self.doc_id}/{image_filename}" image_path = os.path.join(self.output_folder, image_filename) page_with_outline.save(image_path) + file_path_for_client = f"{self.doc_id}/{image_filename}" + images.append({ 'metadata': { "type": "image", |