From 0e98320d3b237f1927b9f1367494dccd7f66eda9 Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Wed, 21 May 2025 12:38:55 -0400
Subject: Added codebase search and retrieval to Vectorstore

Summary indexing: Added functionality to embed and index file summaries from file_summaries.json in Pinecone
Vector search: Implemented semantic search to find the top 5 most relevant files for a query
Content retrieval: Added method to fetch full file content from file_content.json
API endpoints:
/getFileSummaries - Retrieves all file summaries
/getFileContent - Fetches file content by path
/getRawFileContent - Returns content as plain text to avoid JSON parsing errors
Error handling: Added comprehensive error handling and debugging throughout
Initialization: Implemented proper async initialization sequence with verification
Performance: Added streaming for large files to improve memory efficiency
Testing: Added automated test queries to validate functionality
---
 .../nodes/chatbot/utils/AgentDocumentManager.ts      | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

(limited to 'src/client/views/nodes/chatbot/utils')
diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
index 33eec5972..3c8b49f33 100644
--- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -153,9 +153,9 @@ export class AgentDocumentManager {
                 console.log(`Found ${linkedDocs.length} linked documents via LinkManager`);
 
                 // Process the linked documents
-                linkedDocs.forEach((doc: Doc | undefined) => {
+                linkedDocs.forEach(async (doc: Doc | undefined) => {
                     if (doc) {
-                        this.processDocument(doc);
+                        await this.processDocument(doc);
                         console.log('Processed linked document:', doc[Id], doc.title, doc.type);
                     }
                 });
@@ -170,7 +170,7 @@ export class AgentDocumentManager {
      * @param doc The document to process
      */
     @action
-    public processDocument(doc: Doc): string {
+    public async processDocument(doc: Doc): Promise<string> {
         // Ensure document has a persistent ID
         const docId = this.ensureDocumentId(doc);
         if (doc.chunk_simplified) {
@@ -900,7 +900,7 @@ export class AgentDocumentManager {
                     }
                 });
 
-                const id = this.processDocument(doc);
+                const id = await this.processDocument(doc);
                 return id;
             } else {
                 throw new Error(`Error creating document. Created document not found.`);
@@ -1081,6 +1081,18 @@ export class AgentDocumentManager {
         return { foundChunk: this.simplifiedChunks.get(chunkId), doc: this.getDocument(this.simplifiedChunks.get(chunkId)?.doc_id || chunkId), dataDoc: this.getDataDocument(this.simplifiedChunks.get(chunkId)?.doc_id || chunkId) };
     }
 
+    public getChunkIdsFromDocIds(docIds: string[]): string[] {
+        return docIds
+            .map(docId => {
+                for (const chunk of this.simplifiedChunks.values()) {
+                    if (chunk.doc_id === docId) {
+                        return chunk.chunkId;
+                    }
+                }
+            })
+            .filter(chunkId => chunkId !== undefined) as string[];
+    }
+
     /**
      * Gets the original segments from a media document
      * @param doc The document containing original media segments
-- 
cgit v1.2.3-70-g09d2