2 files changed, 11 insertions, 10 deletions
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index 6d2779163..378f14094 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -559,7 +559,7 @@ export default class AssistantManager extends ApiManager {
             method: Method.POST,
             subscription: '/createDocument',
             secureHandler: async ({ req, res }) => {
-                const { file_path } = req.body;
+                const { file_path, doc_id } = req.body;
                 const public_path = path.join(publicDirectory, file_path); // Resolve the file path in the public directory
                 const file_name = path.basename(file_path); // Extract the file name from the path
 
@@ -572,7 +572,7 @@ export default class AssistantManager extends ApiManager {
 
                     // Spawn the Python process and track its progress/output
                     // eslint-disable-next-line no-use-before-define
-                    spawnPythonProcess(jobId, public_path);
+                    spawnPythonProcess(jobId, public_path, doc_id);
 
                     // Send the job ID back to the client for tracking
                     res.send({ jobId });
@@ -850,7 +850,7 @@ export default class AssistantManager extends ApiManager {
  * @param file_name The name of the file to process.
  * @param file_path The filepath of the file to process.
  */
-function spawnPythonProcess(jobId: string, file_path: string) {
+function spawnPythonProcess(jobId: string, file_path: string, doc_id: string) {
     const venvPath = path.join(__dirname, '../chunker/venv');
     const requirementsPath = path.join(__dirname, '../chunker/requirements.txt');
     const pythonScriptPath = path.join(__dirname, '../chunker/pdf_chunker.py');
@@ -860,7 +860,7 @@ function spawnPythonProcess(jobId: string, file_path: string) {
     function runPythonScript() {
         const pythonPath = process.platform === 'win32' ? path.join(venvPath, 'Scripts', 'python') : path.join(venvPath, 'bin', 'python3');
 
-        const pythonProcess = spawn(pythonPath, [pythonScriptPath, jobId, file_path, outputDirectory]);
+        const pythonProcess = spawn(pythonPath, [pythonScriptPath, jobId, file_path, outputDirectory, doc_id]);
 
         let pythonOutput = '';
         let stderrOutput = '';
diff --git a/src/server/chunker/pdf_chunker.py b/src/server/chunker/pdf_chunker.py
index e9b9ef2b3..e34753176 100644
--- a/src/server/chunker/pdf_chunker.py
+++ b/src/server/chunker/pdf_chunker.py
@@ -622,7 +622,7 @@ class Document:
     Represents a document being processed, such as a PDF, handling chunking, embedding, and summarization.
     """
 
-    def __init__(self, file_path: str, file_name: str, job_id: str, output_folder: str):
+    def __init__(self, file_path: str, file_name: str, job_id: str, output_folder: str, doc_id: str):
         """
         Initialize the Document with file data, file name, and job ID.
 
@@ -635,7 +635,7 @@ class Document:
         self.file_path = file_path
         self.job_id = job_id
         self.type = self._get_document_type(file_name)  # Determine the document type (PDF, CSV, etc.)
-        self.doc_id = job_id  # Use the job ID as the document ID
+        self.doc_id = doc_id  # Use the job ID as the document ID
         self.chunks = []  # List to hold text and visual chunks
         self.num_pages = 0  # Number of pages in the document (if applicable)
         self.summary = ""  # The generated summary for the document
@@ -755,7 +755,7 @@ class Document:
             "doc_id": self.doc_id
         }, indent=2)  # Convert the document's attributes to JSON format
 
-def process_document(file_path, job_id, output_folder):
+def process_document(file_path, job_id, output_folder, doc_id):
     """
     Top-level function to process a document and return the JSON output.
 
@@ -763,26 +763,27 @@ def process_document(file_path, job_id, output_folder):
     :param job_id: The job ID for this document processing task.
     :return: The processed document's data in JSON format.
     """
-    new_document = Document(file_path, file_path, job_id, output_folder)
+    new_document = Document(file_path, file_path, job_id, output_folder, doc_id)
     return new_document.to_json()
 
 def main():
     """
     Main entry point for the script, called with arguments from Node.js.
     """
-    if len(sys.argv) != 4:
+    if len(sys.argv) != 5:
         print(json.dumps({"error": "Invalid arguments"}), file=sys.stderr)
         return
 
     job_id = sys.argv[1]
     file_path = sys.argv[2]
     output_folder = sys.argv[3]  # Get the output folder from arguments
+    doc_id = sys.argv[4]
 
     try:
         os.makedirs(output_folder, exist_ok=True)
         
         # Process the document
-        document_result = process_document(file_path, job_id, output_folder)  # Pass output_folder
+        document_result = process_document(file_path, job_id, output_folder,doc_id)  # Pass output_folder
 
         # Output the final result as JSON to stdout
         print(document_result)