Added codebase search and retrieval to Vectorstore

Summary indexing: Added functionality to embed and index file summaries from file_summaries.json in Pinecone Vector search: Implemented semantic search to find the top 5 most relevant files for a query Content retrieval: Added method to fetch full file content from file_content.json API endpoints: /getFileSummaries - Retrieves all file summaries /getFileContent - Fetches file content by path /getRawFileContent - Returns content as plain text to avoid JSON parsing errors Error handling: Added comprehensive error handling and debugging throughout Initialization: Implemented proper async initialization sequence with verification Performance: Added streaming for large files to improve memory efficiency Testing: Added automated test queries to validate functionality
author: A.J. Shulman <Shulman.aj@gmail.com> 2025-05-21 12:38:55 -0400
committer: A.J. Shulman <Shulman.aj@gmail.com> 2025-05-21 12:38:55 -0400
commit: 0e98320d3b237f1927b9f1367494dccd7f66eda9 (patch)
tree: 112fc95b0dfd2da8a93a37bbb2e1139067c993bd /src/server/ApiManagers/AssistantManager.ts
parent: 9437753fdebfc7c4b172eeda53610c08abe7287a (diff)
1 files changed, 180 insertions, 0 deletions
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index b7ce4f663..9d0427b52 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -39,6 +39,7 @@ export enum Directory {
     csv = 'csv',
     chunk_images = 'chunk_images',
     scrape_images = 'scrape_images',
+    vectorstore = 'vectorstore',
 }
 
 // In-memory job tracking
@@ -92,6 +93,119 @@ export default class AssistantManager extends ApiManager {
         const customsearch = google.customsearch('v1');
         const openai = new OpenAI({ apiKey: env.OPENAI_API_KEY });
 
+        // Register an endpoint to retrieve file summaries from the json file
+        register({
+            method: Method.GET,
+            subscription: '/getFileSummaries',
+            secureHandler: async ({ req, res }) => {
+                try {
+                    // Read the file summaries JSON file
+                    const filePath = path.join(filesDirectory, Directory.vectorstore, 'file_summaries.json');
+
+                    if (!fs.existsSync(filePath)) {
+                        res.status(404).send({ error: 'File summaries not found' });
+                        return;
+                    }
+
+                    const data = fs.readFileSync(filePath, 'utf8');
+                    res.send(data);
+                } catch (error) {
+                    console.error('Error retrieving file summaries:', error);
+                    res.status(500).send({
+                        error: 'Failed to retrieve file summaries',
+                    });
+                }
+            },
+        });
+
+        // Register an endpoint to retrieve file content from the content json file
+        register({
+            method: Method.POST,
+            subscription: '/getFileContent',
+            secureHandler: async ({ req, res }) => {
+                const { filepath } = req.body;
+
+                if (!filepath) {
+                    res.status(400).send({ error: 'Filepath is required' });
+                    return;
+                }
+
+                try {
+                    // Read the file content JSON file
+                    const filePath = path.join(filesDirectory, Directory.vectorstore, 'file_content.json');
+
+                    if (!fs.existsSync(filePath)) {
+                        res.status(404).send({ error: 'File content database not found' });
+                        return;
+                    }
+
+                    console.log(`[DEBUG] Retrieving content for: ${filepath}`);
+
+                    // Read the JSON file in chunks to handle large files
+                    const readStream = fs.createReadStream(filePath, { encoding: 'utf8' });
+                    let jsonData = '';
+
+                    readStream.on('data', chunk => {
+                        jsonData += chunk;
+                    });
+
+                    readStream.on('end', () => {
+                        try {
+                            // Parse the JSON
+                            const contentMap = JSON.parse(jsonData);
+
+                            // Check if the filepath exists in the map
+                            if (!contentMap[filepath]) {
+                                console.log(`[DEBUG] Content not found for: ${filepath}`);
+                                res.status(404).send({ error: `Content not found for filepath: ${filepath}` });
+                                return;
+                            }
+
+                            // Return the file content as is, not as JSON
+                            console.log(`[DEBUG] Found content for: ${filepath} (${contentMap[filepath].length} chars)`);
+                            res.send(contentMap[filepath]);
+                        } catch (parseError) {
+                            console.error('Error parsing file_content.json:', parseError);
+                            res.status(500).send({
+                                error: 'Failed to parse file content database',
+                            });
+                        }
+                    });
+
+                    readStream.on('error', streamError => {
+                        console.error('Error reading file_content.json:', streamError);
+                        res.status(500).send({
+                            error: 'Failed to read file content database',
+                        });
+                    });
+                } catch (error) {
+                    console.error('Error retrieving file content:', error);
+                    res.status(500).send({
+                        error: 'Failed to retrieve file content',
+                    });
+                }
+            },
+        });
+
+        // Register an endpoint to search file summaries
+        register({
+            method: Method.POST,
+            subscription: '/searchFileSummaries',
+            secureHandler: async ({ req, res }) => {
+                const { query, topK } = req.body;
+
+                if (!query) {
+                    res.status(400).send({ error: 'Search query is required' });
+                    return;
+                }
+
+                // This endpoint will be called by the client-side Vectorstore to perform the search
+                // The actual search is implemented in the Vectorstore class
+
+                res.send({ message: 'This endpoint should be called through the Vectorstore class' });
+            },
+        });
+
         // Register Wikipedia summary API route
         register({
             method: Method.POST,
@@ -848,6 +962,72 @@ export default class AssistantManager extends ApiManager {
                 }
             },
         });
+
+        // Register an endpoint to retrieve raw file content as plain text (no JSON parsing)
+        register({
+            method: Method.POST,
+            subscription: '/getRawFileContent',
+            secureHandler: async ({ req, res }) => {
+                const { filepath } = req.body;
+
+                if (!filepath) {
+                    res.status(400).send('Filepath is required');
+                    return;
+                }
+
+                try {
+                    // Read the file content JSON file
+                    const filePath = path.join(filesDirectory, Directory.vectorstore, 'file_content.json');
+
+                    if (!fs.existsSync(filePath)) {
+                        res.status(404).send('File content database not found');
+                        return;
+                    }
+
+                    console.log(`[DEBUG] Retrieving raw content for: ${filepath}`);
+
+                    // Read the JSON file
+                    const readStream = fs.createReadStream(filePath, { encoding: 'utf8' });
+                    let jsonData = '';
+
+                    readStream.on('data', chunk => {
+                        jsonData += chunk;
+                    });
+
+                    readStream.on('end', () => {
+                        try {
+                            // Parse the JSON
+                            const contentMap = JSON.parse(jsonData);
+
+                            // Check if the filepath exists in the map
+                            if (!contentMap[filepath]) {
+                                console.log(`[DEBUG] Content not found for: ${filepath}`);
+                                res.status(404).send(`Content not found for filepath: ${filepath}`);
+                                return;
+                            }
+
+                            // Set content type to plain text to avoid JSON parsing
+                            res.setHeader('Content-Type', 'text/plain');
+
+                            // Return the file content as plain text
+                            console.log(`[DEBUG] Found content for: ${filepath} (${contentMap[filepath].length} chars)`);
+                            res.send(contentMap[filepath]);
+                        } catch (parseError) {
+                            console.error('Error parsing file_content.json:', parseError);
+                            res.status(500).send('Failed to parse file content database');
+                        }
+                    });
+
+                    readStream.on('error', streamError => {
+                        console.error('Error reading file_content.json:', streamError);
+                        res.status(500).send('Failed to read file content database');
+                    });
+                } catch (error) {
+                    console.error('Error retrieving file content:', error);
+                    res.status(500).send('Failed to retrieve file content');
+                }
+            },
+        });
     }
 }
author	A.J. Shulman <Shulman.aj@gmail.com>	2025-05-21 12:38:55 -0400
committer	A.J. Shulman <Shulman.aj@gmail.com>	2025-05-21 12:38:55 -0400
commit	0e98320d3b237f1927b9f1367494dccd7f66eda9 (patch)
tree	112fc95b0dfd2da8a93a37bbb2e1139067c993bd /src/server/ApiManagers/AssistantManager.ts
parent	9437753fdebfc7c4b172eeda53610c08abe7287a (diff)