aboutsummaryrefslogtreecommitdiff
path: root/src/server
diff options
context:
space:
mode:
authorA.J. Shulman <Shulman.aj@gmail.com>2025-07-07 14:39:06 -0400
committerA.J. Shulman <Shulman.aj@gmail.com>2025-07-07 14:39:06 -0400
commit9092494778abd55b6aa299fe06b4f70e7c7a767f (patch)
tree28aedb8db51224374e1a31d9557ffd28e1c7e8f9 /src/server
parent86c666427ff8b9d516450a150af641570e00f2d2 (diff)
changes (seeing if they work)
Diffstat (limited to 'src/server')
-rw-r--r--src/server/chunker/pdf_chunker.py2
-rw-r--r--src/server/chunker/requirements.txt4
2 files changed, 3 insertions, 3 deletions
diff --git a/src/server/chunker/pdf_chunker.py b/src/server/chunker/pdf_chunker.py
index 04d9f51a4..914594f1e 100644
--- a/src/server/chunker/pdf_chunker.py
+++ b/src/server/chunker/pdf_chunker.py
@@ -307,7 +307,7 @@ class PDFChunker:
page_texts = await self.extract_text_from_masked_pages(pages, job_id) # Extract text from masked pages
update_progress(job_id, "Processing text...", 0)
- text_chunks = self.chunk_text_with_metadata(page_texts, max_words=1000, job_id=job_id) # Chunk text into smaller parts
+ text_chunks = self.chunk_text_with_metadata(page_texts, max_words=2000, job_id=job_id) # Chunk text into smaller parts
# Combine text and visual elements into a unified structure (chunks)
chunks = self.combine_chunks(text_chunks, [elem for page in pages for elem in page.elements], file_name,
diff --git a/src/server/chunker/requirements.txt b/src/server/chunker/requirements.txt
index 3df3cdd24..eceb56f97 100644
--- a/src/server/chunker/requirements.txt
+++ b/src/server/chunker/requirements.txt
@@ -7,7 +7,7 @@
# ─── LLM clients ─────────────────────────────────────────────────────────────
openai==1.40.6
-httpx==0.27.2 # <0.28 → avoids "proxies=" crash
+httpx==0.27.2 # <0.28 → avoids “proxies=” crash
anthropic==0.34.0
cohere==5.8.0
@@ -33,4 +33,4 @@ scikit-learn==1.5.1
# ─── Utilities ──────────────────────────────────────────────────────────────
tqdm==4.66.5
python-dotenv==1.0.1
-packaging==24.0 \ No newline at end of file
+packaging==24.0