From 0e98320d3b237f1927b9f1367494dccd7f66eda9 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Wed, 21 May 2025 12:38:55 -0400 Subject: Added codebase search and retrieval to Vectorstore Summary indexing: Added functionality to embed and index file summaries from file_summaries.json in Pinecone Vector search: Implemented semantic search to find the top 5 most relevant files for a query Content retrieval: Added method to fetch full file content from file_content.json API endpoints: /getFileSummaries - Retrieves all file summaries /getFileContent - Fetches file content by path /getRawFileContent - Returns content as plain text to avoid JSON parsing errors Error handling: Added comprehensive error handling and debugging throughout Initialization: Implemented proper async initialization sequence with verification Performance: Added streaming for large files to improve memory efficiency Testing: Added automated test queries to validate functionality --- src/client/views/nodes/chatbot/tools/RAGTool.ts | 8 +++++++- src/client/views/nodes/chatbot/tools/SearchTool.ts | 2 +- src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts | 6 ++++-- 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'src/client/views/nodes/chatbot/tools') diff --git a/src/client/views/nodes/chatbot/tools/RAGTool.ts b/src/client/views/nodes/chatbot/tools/RAGTool.ts index 90b803d21..af44de520 100644 --- a/src/client/views/nodes/chatbot/tools/RAGTool.ts +++ b/src/client/views/nodes/chatbot/tools/RAGTool.ts @@ -12,6 +12,12 @@ const ragToolParams = [ description: "A detailed prompt representing an ideal chunk to embed and compare against document vectors to retrieve the most relevant content for answering the user's query.", required: true, }, + { + name: 'doc_ids', + type: 'string[]', + description: 'An optional array of document IDs to retrieve chunks from. If you want to retrieve chunks from all documents, leave this as an empty array: [] (DO NOT LEAVE THIS EMPTY).', + required: false, + }, ] as const; type RAGToolParamsType = typeof ragToolParams; @@ -69,7 +75,7 @@ export class RAGTool extends BaseTool { } async execute(args: ParametersType): Promise { - const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk); + const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk, undefined, args.doc_ids ?? undefined); const formattedChunks = await this.getFormattedChunks(relevantChunks); return formattedChunks; } diff --git a/src/client/views/nodes/chatbot/tools/SearchTool.ts b/src/client/views/nodes/chatbot/tools/SearchTool.ts index 43f14ea83..8e6edce8c 100644 --- a/src/client/views/nodes/chatbot/tools/SearchTool.ts +++ b/src/client/views/nodes/chatbot/tools/SearchTool.ts @@ -22,7 +22,7 @@ type SearchToolParamsType = typeof searchToolParams; const searchToolInfo: ToolInfo = { name: 'searchTool', - citationRules: 'No citation needed. Cannot cite search results for a response. Use web scraping tools to cite specific information.', + citationRules: 'Always cite the search results for a response, if the search results are relevant to the response. Use the chunk_id to cite the search results. If the search results are not relevant to the response, do not cite them. ', parameterRules: searchToolParams, description: 'Search the web to find a wide range of websites related to a query or multiple queries. Returns a list of websites and their overviews based on the search queries.', }; diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts index 495a985cb..727d35e2c 100644 --- a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts +++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts @@ -22,6 +22,7 @@ const websiteInfoScraperToolInfo: ToolInfo = { name: 'websiteInfoScraper', description: 'Scrape detailed information from specific websites relevant to the user query. Returns the text content of the webpages for further analysis and grounding.', citationRules: ` + !IMPORTANT! THESE CHUNKS REPLACE THE CHUNKS THAT ARE RETURNED FROM THE SEARCHTOOL. Your task is to provide a comprehensive response to the user's prompt using the content scraped from relevant websites. Ensure you follow these guidelines for structuring your response: 1. Grounded Text Tag Structure: @@ -88,6 +89,7 @@ export class WebsiteInfoScraperTool extends BaseTool\n${website_plain_text}\nNote: Limited content was retrieved from this URL.\n`, + text: `\n${website_plain_text}\nNote: Limited content was retrieved from this URL.\n`, } as Observation; } @@ -142,7 +144,7 @@ export class WebsiteInfoScraperTool extends BaseTool\n${website_plain_text}\n`, + text: `\n${website_plain_text}\n`, } as Observation; } catch (error) { lastError = error instanceof Error ? error.message : 'Unknown error'; -- cgit v1.2.3-70-g09d2