From 9092494778abd55b6aa299fe06b4f70e7c7a767f Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Mon, 7 Jul 2025 14:39:06 -0400 Subject: changes (seeing if they work) --- src/server/chunker/pdf_chunker.py | 2 +- src/server/chunker/requirements.txt | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/server') diff --git a/src/server/chunker/pdf_chunker.py b/src/server/chunker/pdf_chunker.py index 04d9f51a4..914594f1e 100644 --- a/src/server/chunker/pdf_chunker.py +++ b/src/server/chunker/pdf_chunker.py @@ -307,7 +307,7 @@ class PDFChunker: page_texts = await self.extract_text_from_masked_pages(pages, job_id) # Extract text from masked pages update_progress(job_id, "Processing text...", 0) - text_chunks = self.chunk_text_with_metadata(page_texts, max_words=1000, job_id=job_id) # Chunk text into smaller parts + text_chunks = self.chunk_text_with_metadata(page_texts, max_words=2000, job_id=job_id) # Chunk text into smaller parts # Combine text and visual elements into a unified structure (chunks) chunks = self.combine_chunks(text_chunks, [elem for page in pages for elem in page.elements], file_name, diff --git a/src/server/chunker/requirements.txt b/src/server/chunker/requirements.txt index 3df3cdd24..eceb56f97 100644 --- a/src/server/chunker/requirements.txt +++ b/src/server/chunker/requirements.txt @@ -7,7 +7,7 @@ # ─── LLM clients ───────────────────────────────────────────────────────────── openai==1.40.6 -httpx==0.27.2 # <0.28 → avoids "proxies=" crash +httpx==0.27.2 # <0.28 → avoids “proxies=” crash anthropic==0.34.0 cohere==5.8.0 @@ -33,4 +33,4 @@ scikit-learn==1.5.1 # ─── Utilities ────────────────────────────────────────────────────────────── tqdm==4.66.5 python-dotenv==1.0.1 -packaging==24.0 \ No newline at end of file +packaging==24.0 -- cgit v1.2.3-70-g09d2