aboutsummaryrefslogtreecommitdiff
path: root/src/server
diff options
context:
space:
mode:
Diffstat (limited to 'src/server')
-rw-r--r--src/server/ApiManagers/AssistantManager.ts8
-rw-r--r--src/server/chunker/pdf_chunker.py13
2 files changed, 11 insertions, 10 deletions
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index 6d2779163..378f14094 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -559,7 +559,7 @@ export default class AssistantManager extends ApiManager {
method: Method.POST,
subscription: '/createDocument',
secureHandler: async ({ req, res }) => {
- const { file_path } = req.body;
+ const { file_path, doc_id } = req.body;
const public_path = path.join(publicDirectory, file_path); // Resolve the file path in the public directory
const file_name = path.basename(file_path); // Extract the file name from the path
@@ -572,7 +572,7 @@ export default class AssistantManager extends ApiManager {
// Spawn the Python process and track its progress/output
// eslint-disable-next-line no-use-before-define
- spawnPythonProcess(jobId, public_path);
+ spawnPythonProcess(jobId, public_path, doc_id);
// Send the job ID back to the client for tracking
res.send({ jobId });
@@ -850,7 +850,7 @@ export default class AssistantManager extends ApiManager {
* @param file_name The name of the file to process.
* @param file_path The filepath of the file to process.
*/
-function spawnPythonProcess(jobId: string, file_path: string) {
+function spawnPythonProcess(jobId: string, file_path: string, doc_id: string) {
const venvPath = path.join(__dirname, '../chunker/venv');
const requirementsPath = path.join(__dirname, '../chunker/requirements.txt');
const pythonScriptPath = path.join(__dirname, '../chunker/pdf_chunker.py');
@@ -860,7 +860,7 @@ function spawnPythonProcess(jobId: string, file_path: string) {
function runPythonScript() {
const pythonPath = process.platform === 'win32' ? path.join(venvPath, 'Scripts', 'python') : path.join(venvPath, 'bin', 'python3');
- const pythonProcess = spawn(pythonPath, [pythonScriptPath, jobId, file_path, outputDirectory]);
+ const pythonProcess = spawn(pythonPath, [pythonScriptPath, jobId, file_path, outputDirectory, doc_id]);
let pythonOutput = '';
let stderrOutput = '';
diff --git a/src/server/chunker/pdf_chunker.py b/src/server/chunker/pdf_chunker.py
index e9b9ef2b3..e34753176 100644
--- a/src/server/chunker/pdf_chunker.py
+++ b/src/server/chunker/pdf_chunker.py
@@ -622,7 +622,7 @@ class Document:
Represents a document being processed, such as a PDF, handling chunking, embedding, and summarization.
"""
- def __init__(self, file_path: str, file_name: str, job_id: str, output_folder: str):
+ def __init__(self, file_path: str, file_name: str, job_id: str, output_folder: str, doc_id: str):
"""
Initialize the Document with file data, file name, and job ID.
@@ -635,7 +635,7 @@ class Document:
self.file_path = file_path
self.job_id = job_id
self.type = self._get_document_type(file_name) # Determine the document type (PDF, CSV, etc.)
- self.doc_id = job_id # Use the job ID as the document ID
+ self.doc_id = doc_id # Use the job ID as the document ID
self.chunks = [] # List to hold text and visual chunks
self.num_pages = 0 # Number of pages in the document (if applicable)
self.summary = "" # The generated summary for the document
@@ -755,7 +755,7 @@ class Document:
"doc_id": self.doc_id
}, indent=2) # Convert the document's attributes to JSON format
-def process_document(file_path, job_id, output_folder):
+def process_document(file_path, job_id, output_folder, doc_id):
"""
Top-level function to process a document and return the JSON output.
@@ -763,26 +763,27 @@ def process_document(file_path, job_id, output_folder):
:param job_id: The job ID for this document processing task.
:return: The processed document's data in JSON format.
"""
- new_document = Document(file_path, file_path, job_id, output_folder)
+ new_document = Document(file_path, file_path, job_id, output_folder, doc_id)
return new_document.to_json()
def main():
"""
Main entry point for the script, called with arguments from Node.js.
"""
- if len(sys.argv) != 4:
+ if len(sys.argv) != 5:
print(json.dumps({"error": "Invalid arguments"}), file=sys.stderr)
return
job_id = sys.argv[1]
file_path = sys.argv[2]
output_folder = sys.argv[3] # Get the output folder from arguments
+ doc_id = sys.argv[4]
try:
os.makedirs(output_folder, exist_ok=True)
# Process the document
- document_result = process_document(file_path, job_id, output_folder) # Pass output_folder
+ document_result = process_document(file_path, job_id, output_folder,doc_id) # Pass output_folder
# Output the final result as JSON to stdout
print(document_result)