aboutsummaryrefslogtreecommitdiff
path: root/src/client/views/nodes/chatbot/tools
diff options
context:
space:
mode:
authorA.J. Shulman <Shulman.aj@gmail.com>2025-05-21 12:38:55 -0400
committerA.J. Shulman <Shulman.aj@gmail.com>2025-05-21 12:38:55 -0400
commit0e98320d3b237f1927b9f1367494dccd7f66eda9 (patch)
tree112fc95b0dfd2da8a93a37bbb2e1139067c993bd /src/client/views/nodes/chatbot/tools
parent9437753fdebfc7c4b172eeda53610c08abe7287a (diff)
Added codebase search and retrieval to Vectorstore
Summary indexing: Added functionality to embed and index file summaries from file_summaries.json in Pinecone Vector search: Implemented semantic search to find the top 5 most relevant files for a query Content retrieval: Added method to fetch full file content from file_content.json API endpoints: /getFileSummaries - Retrieves all file summaries /getFileContent - Fetches file content by path /getRawFileContent - Returns content as plain text to avoid JSON parsing errors Error handling: Added comprehensive error handling and debugging throughout Initialization: Implemented proper async initialization sequence with verification Performance: Added streaming for large files to improve memory efficiency Testing: Added automated test queries to validate functionality
Diffstat (limited to 'src/client/views/nodes/chatbot/tools')
-rw-r--r--src/client/views/nodes/chatbot/tools/RAGTool.ts8
-rw-r--r--src/client/views/nodes/chatbot/tools/SearchTool.ts2
-rw-r--r--src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts6
3 files changed, 12 insertions, 4 deletions
diff --git a/src/client/views/nodes/chatbot/tools/RAGTool.ts b/src/client/views/nodes/chatbot/tools/RAGTool.ts
index 90b803d21..af44de520 100644
--- a/src/client/views/nodes/chatbot/tools/RAGTool.ts
+++ b/src/client/views/nodes/chatbot/tools/RAGTool.ts
@@ -12,6 +12,12 @@ const ragToolParams = [
description: "A detailed prompt representing an ideal chunk to embed and compare against document vectors to retrieve the most relevant content for answering the user's query.",
required: true,
},
+ {
+ name: 'doc_ids',
+ type: 'string[]',
+ description: 'An optional array of document IDs to retrieve chunks from. If you want to retrieve chunks from all documents, leave this as an empty array: [] (DO NOT LEAVE THIS EMPTY).',
+ required: false,
+ },
] as const;
type RAGToolParamsType = typeof ragToolParams;
@@ -69,7 +75,7 @@ export class RAGTool extends BaseTool<RAGToolParamsType> {
}
async execute(args: ParametersType<RAGToolParamsType>): Promise<Observation[]> {
- const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk);
+ const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk, undefined, args.doc_ids ?? undefined);
const formattedChunks = await this.getFormattedChunks(relevantChunks);
return formattedChunks;
}
diff --git a/src/client/views/nodes/chatbot/tools/SearchTool.ts b/src/client/views/nodes/chatbot/tools/SearchTool.ts
index 43f14ea83..8e6edce8c 100644
--- a/src/client/views/nodes/chatbot/tools/SearchTool.ts
+++ b/src/client/views/nodes/chatbot/tools/SearchTool.ts
@@ -22,7 +22,7 @@ type SearchToolParamsType = typeof searchToolParams;
const searchToolInfo: ToolInfo<SearchToolParamsType> = {
name: 'searchTool',
- citationRules: 'No citation needed. Cannot cite search results for a response. Use web scraping tools to cite specific information.',
+ citationRules: 'Always cite the search results for a response, if the search results are relevant to the response. Use the chunk_id to cite the search results. If the search results are not relevant to the response, do not cite them. ',
parameterRules: searchToolParams,
description: 'Search the web to find a wide range of websites related to a query or multiple queries. Returns a list of websites and their overviews based on the search queries.',
};
diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
index 495a985cb..727d35e2c 100644
--- a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
+++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
@@ -22,6 +22,7 @@ const websiteInfoScraperToolInfo: ToolInfo<WebsiteInfoScraperToolParamsType> = {
name: 'websiteInfoScraper',
description: 'Scrape detailed information from specific websites relevant to the user query. Returns the text content of the webpages for further analysis and grounding.',
citationRules: `
+ !IMPORTANT! THESE CHUNKS REPLACE THE CHUNKS THAT ARE RETURNED FROM THE SEARCHTOOL.
Your task is to provide a comprehensive response to the user's prompt using the content scraped from relevant websites. Ensure you follow these guidelines for structuring your response:
1. Grounded Text Tag Structure:
@@ -88,6 +89,7 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
console.log(url);
console.log(chunkDoc);
console.log(chunkDoc.data);
+ const id = chunkDoc.id;
// Validate URL format
try {
new URL(url); // This will throw if URL is invalid
@@ -130,7 +132,7 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
if (retryCount === maxRetries) {
return {
type: 'text',
- text: `<chunk chunk_id="${chunkDoc.id}" chunk_type="url">\n${website_plain_text}\nNote: Limited content was retrieved from this URL.\n</chunk>`,
+ text: `<chunk chunk_id="${id}" chunk_type="url">\n${website_plain_text}\nNote: Limited content was retrieved from this URL.\n</chunk>`,
} as Observation;
}
@@ -142,7 +144,7 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
// Process and return content if it looks good
return {
type: 'text',
- text: `<chunk chunk_id="${chunkDoc.id}" chunk_type="url">\n${website_plain_text}\n</chunk>`,
+ text: `<chunk chunk_id="${id}" chunk_type="url">\n${website_plain_text}\n</chunk>`,
} as Observation;
} catch (error) {
lastError = error instanceof Error ? error.message : 'Unknown error';