diff options
| author | A.J. Shulman <Shulman.aj@gmail.com> | 2025-05-21 12:38:55 -0400 |
|---|---|---|
| committer | A.J. Shulman <Shulman.aj@gmail.com> | 2025-05-21 12:38:55 -0400 |
| commit | 0e98320d3b237f1927b9f1367494dccd7f66eda9 (patch) | |
| tree | 112fc95b0dfd2da8a93a37bbb2e1139067c993bd /src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts | |
| parent | 9437753fdebfc7c4b172eeda53610c08abe7287a (diff) | |
Added codebase search and retrieval to Vectorstore
Summary indexing: Added functionality to embed and index file summaries from file_summaries.json in Pinecone
Vector search: Implemented semantic search to find the top 5 most relevant files for a query
Content retrieval: Added method to fetch full file content from file_content.json
API endpoints:
/getFileSummaries - Retrieves all file summaries
/getFileContent - Fetches file content by path
/getRawFileContent - Returns content as plain text to avoid JSON parsing errors
Error handling: Added comprehensive error handling and debugging throughout
Initialization: Implemented proper async initialization sequence with verification
Performance: Added streaming for large files to improve memory efficiency
Testing: Added automated test queries to validate functionality
Diffstat (limited to 'src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts')
| -rw-r--r-- | src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts index 495a985cb..727d35e2c 100644 --- a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts +++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts @@ -22,6 +22,7 @@ const websiteInfoScraperToolInfo: ToolInfo<WebsiteInfoScraperToolParamsType> = { name: 'websiteInfoScraper', description: 'Scrape detailed information from specific websites relevant to the user query. Returns the text content of the webpages for further analysis and grounding.', citationRules: ` + !IMPORTANT! THESE CHUNKS REPLACE THE CHUNKS THAT ARE RETURNED FROM THE SEARCHTOOL. Your task is to provide a comprehensive response to the user's prompt using the content scraped from relevant websites. Ensure you follow these guidelines for structuring your response: 1. Grounded Text Tag Structure: @@ -88,6 +89,7 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam console.log(url); console.log(chunkDoc); console.log(chunkDoc.data); + const id = chunkDoc.id; // Validate URL format try { new URL(url); // This will throw if URL is invalid @@ -130,7 +132,7 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam if (retryCount === maxRetries) { return { type: 'text', - text: `<chunk chunk_id="${chunkDoc.id}" chunk_type="url">\n${website_plain_text}\nNote: Limited content was retrieved from this URL.\n</chunk>`, + text: `<chunk chunk_id="${id}" chunk_type="url">\n${website_plain_text}\nNote: Limited content was retrieved from this URL.\n</chunk>`, } as Observation; } @@ -142,7 +144,7 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam // Process and return content if it looks good return { type: 'text', - text: `<chunk chunk_id="${chunkDoc.id}" chunk_type="url">\n${website_plain_text}\n</chunk>`, + text: `<chunk chunk_id="${id}" chunk_type="url">\n${website_plain_text}\n</chunk>`, } as Observation; } catch (error) { lastError = error instanceof Error ? error.message : 'Unknown error'; |
