aboutsummaryrefslogtreecommitdiff
path: root/src/server/ApiManagers/AssistantManager.ts
diff options
context:
space:
mode:
authorA.J. Shulman <Shulman.aj@gmail.com>2025-05-21 12:38:55 -0400
committerA.J. Shulman <Shulman.aj@gmail.com>2025-05-21 12:38:55 -0400
commit0e98320d3b237f1927b9f1367494dccd7f66eda9 (patch)
tree112fc95b0dfd2da8a93a37bbb2e1139067c993bd /src/server/ApiManagers/AssistantManager.ts
parent9437753fdebfc7c4b172eeda53610c08abe7287a (diff)
Added codebase search and retrieval to Vectorstore
Summary indexing: Added functionality to embed and index file summaries from file_summaries.json in Pinecone Vector search: Implemented semantic search to find the top 5 most relevant files for a query Content retrieval: Added method to fetch full file content from file_content.json API endpoints: /getFileSummaries - Retrieves all file summaries /getFileContent - Fetches file content by path /getRawFileContent - Returns content as plain text to avoid JSON parsing errors Error handling: Added comprehensive error handling and debugging throughout Initialization: Implemented proper async initialization sequence with verification Performance: Added streaming for large files to improve memory efficiency Testing: Added automated test queries to validate functionality
Diffstat (limited to 'src/server/ApiManagers/AssistantManager.ts')
-rw-r--r--src/server/ApiManagers/AssistantManager.ts180
1 files changed, 180 insertions, 0 deletions
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index b7ce4f663..9d0427b52 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -39,6 +39,7 @@ export enum Directory {
csv = 'csv',
chunk_images = 'chunk_images',
scrape_images = 'scrape_images',
+ vectorstore = 'vectorstore',
}
// In-memory job tracking
@@ -92,6 +93,119 @@ export default class AssistantManager extends ApiManager {
const customsearch = google.customsearch('v1');
const openai = new OpenAI({ apiKey: env.OPENAI_API_KEY });
+ // Register an endpoint to retrieve file summaries from the json file
+ register({
+ method: Method.GET,
+ subscription: '/getFileSummaries',
+ secureHandler: async ({ req, res }) => {
+ try {
+ // Read the file summaries JSON file
+ const filePath = path.join(filesDirectory, Directory.vectorstore, 'file_summaries.json');
+
+ if (!fs.existsSync(filePath)) {
+ res.status(404).send({ error: 'File summaries not found' });
+ return;
+ }
+
+ const data = fs.readFileSync(filePath, 'utf8');
+ res.send(data);
+ } catch (error) {
+ console.error('Error retrieving file summaries:', error);
+ res.status(500).send({
+ error: 'Failed to retrieve file summaries',
+ });
+ }
+ },
+ });
+
+ // Register an endpoint to retrieve file content from the content json file
+ register({
+ method: Method.POST,
+ subscription: '/getFileContent',
+ secureHandler: async ({ req, res }) => {
+ const { filepath } = req.body;
+
+ if (!filepath) {
+ res.status(400).send({ error: 'Filepath is required' });
+ return;
+ }
+
+ try {
+ // Read the file content JSON file
+ const filePath = path.join(filesDirectory, Directory.vectorstore, 'file_content.json');
+
+ if (!fs.existsSync(filePath)) {
+ res.status(404).send({ error: 'File content database not found' });
+ return;
+ }
+
+ console.log(`[DEBUG] Retrieving content for: ${filepath}`);
+
+ // Read the JSON file in chunks to handle large files
+ const readStream = fs.createReadStream(filePath, { encoding: 'utf8' });
+ let jsonData = '';
+
+ readStream.on('data', chunk => {
+ jsonData += chunk;
+ });
+
+ readStream.on('end', () => {
+ try {
+ // Parse the JSON
+ const contentMap = JSON.parse(jsonData);
+
+ // Check if the filepath exists in the map
+ if (!contentMap[filepath]) {
+ console.log(`[DEBUG] Content not found for: ${filepath}`);
+ res.status(404).send({ error: `Content not found for filepath: ${filepath}` });
+ return;
+ }
+
+ // Return the file content as is, not as JSON
+ console.log(`[DEBUG] Found content for: ${filepath} (${contentMap[filepath].length} chars)`);
+ res.send(contentMap[filepath]);
+ } catch (parseError) {
+ console.error('Error parsing file_content.json:', parseError);
+ res.status(500).send({
+ error: 'Failed to parse file content database',
+ });
+ }
+ });
+
+ readStream.on('error', streamError => {
+ console.error('Error reading file_content.json:', streamError);
+ res.status(500).send({
+ error: 'Failed to read file content database',
+ });
+ });
+ } catch (error) {
+ console.error('Error retrieving file content:', error);
+ res.status(500).send({
+ error: 'Failed to retrieve file content',
+ });
+ }
+ },
+ });
+
+ // Register an endpoint to search file summaries
+ register({
+ method: Method.POST,
+ subscription: '/searchFileSummaries',
+ secureHandler: async ({ req, res }) => {
+ const { query, topK } = req.body;
+
+ if (!query) {
+ res.status(400).send({ error: 'Search query is required' });
+ return;
+ }
+
+ // This endpoint will be called by the client-side Vectorstore to perform the search
+ // The actual search is implemented in the Vectorstore class
+
+ res.send({ message: 'This endpoint should be called through the Vectorstore class' });
+ },
+ });
+
// Register Wikipedia summary API route
register({
method: Method.POST,
@@ -848,6 +962,72 @@ export default class AssistantManager extends ApiManager {
}
},
});
+
+ // Register an endpoint to retrieve raw file content as plain text (no JSON parsing)
+ register({
+ method: Method.POST,
+ subscription: '/getRawFileContent',
+ secureHandler: async ({ req, res }) => {
+ const { filepath } = req.body;
+
+ if (!filepath) {
+ res.status(400).send('Filepath is required');
+ return;
+ }
+
+ try {
+ // Read the file content JSON file
+ const filePath = path.join(filesDirectory, Directory.vectorstore, 'file_content.json');
+
+ if (!fs.existsSync(filePath)) {
+ res.status(404).send('File content database not found');
+ return;
+ }
+
+ console.log(`[DEBUG] Retrieving raw content for: ${filepath}`);
+
+ // Read the JSON file
+ const readStream = fs.createReadStream(filePath, { encoding: 'utf8' });
+ let jsonData = '';
+
+ readStream.on('data', chunk => {
+ jsonData += chunk;
+ });
+
+ readStream.on('end', () => {
+ try {
+ // Parse the JSON
+ const contentMap = JSON.parse(jsonData);
+
+ // Check if the filepath exists in the map
+ if (!contentMap[filepath]) {
+ console.log(`[DEBUG] Content not found for: ${filepath}`);
+ res.status(404).send(`Content not found for filepath: ${filepath}`);
+ return;
+ }
+
+ // Set content type to plain text to avoid JSON parsing
+ res.setHeader('Content-Type', 'text/plain');
+
+ // Return the file content as plain text
+ console.log(`[DEBUG] Found content for: ${filepath} (${contentMap[filepath].length} chars)`);
+ res.send(contentMap[filepath]);
+ } catch (parseError) {
+ console.error('Error parsing file_content.json:', parseError);
+ res.status(500).send('Failed to parse file content database');
+ }
+ });
+
+ readStream.on('error', streamError => {
+ console.error('Error reading file_content.json:', streamError);
+ res.status(500).send('Failed to read file content database');
+ });
+ } catch (error) {
+ console.error('Error retrieving file content:', error);
+ res.status(500).send('Failed to retrieve file content');
+ }
+ },
+ });
}
}