From e141307dbd9b951f76c908610e7b89e296ad92b8 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Sun, 11 May 2025 17:18:18 -0400 Subject: chanegd everything to be more consistent - made both web related tools use doc manager and chunk Ids --- .../nodes/chatbot/tools/DocumentMetadataTool.ts | 2 +- src/client/views/nodes/chatbot/tools/SearchTool.ts | 26 ++++++++++--------- .../nodes/chatbot/tools/WebsiteInfoScraperTool.ts | 30 ++++++++++++---------- .../views/nodes/chatbot/tools/WikipediaTool.ts | 2 +- 4 files changed, 33 insertions(+), 27 deletions(-) (limited to 'src/client/views/nodes/chatbot/tools') diff --git a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts index 5297292bf..405949c1e 100644 --- a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts +++ b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts @@ -408,7 +408,7 @@ export class DocumentMetadataTool extends BaseTool { query, max_results: this._max_results, })) as { results: { url: string; snippet: string }[] }; - const data = results.map((result: { url: string; snippet: string }) => { - // Create a web document with the URL - const id = this._docManager.createDocInDash('web', result.url, { - title: `Search Result: ${result.url}`, - text_html: result.snippet, - data_useCors: true, - }); + const data = await Promise.all( + results.map(async (result: { url: string; snippet: string }) => { + // Create a web document with the URL + const id = await this._docManager.createDocInDash('web', result.url, { + title: `Search Result: ${result.url}`, + text_html: result.snippet, + data_useCors: true, + }); - return { - type: 'text' as const, - text: `${result.url}${result.snippet}`, - }; - }); + return { + type: 'text' as const, + text: `${result.url}${result.snippet}`, + }; + }) + ); return data; } catch (error) { console.log(error); diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts index 3c7b4e3db..495a985cb 100644 --- a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts +++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts @@ -3,12 +3,14 @@ import { Networking } from '../../../../Network'; import { BaseTool } from './BaseTool'; import { Observation } from '../types/types'; import { ParametersType, ToolInfo } from '../types/tool_types'; - +import { AgentDocumentManager } from '../utils/AgentDocumentManager'; +import { Doc } from '../../../../../fields/Doc'; +import { StrCast, WebCast } from '../../../../../fields/Types'; const websiteInfoScraperToolParams = [ { - name: 'urls', + name: 'chunk_ids', type: 'string[]', - description: 'The URLs of the websites to scrape', + description: 'The chunk_ids of the urls to scrape from the SearchTool.', required: true, max_inputs: 3, }, @@ -66,11 +68,11 @@ const websiteInfoScraperToolInfo: ToolInfo = { }; export class WebsiteInfoScraperTool extends BaseTool { - private _getLinkedUrlDocId: (url: string) => string[]; + private _docManager: AgentDocumentManager; - constructor(getLinkedUrlDocIds: (url: string) => string[]) { + constructor(docManager: AgentDocumentManager) { super(websiteInfoScraperToolInfo); - this._getLinkedUrlDocId = getLinkedUrlDocIds; + this._docManager = docManager; } /** @@ -79,10 +81,13 @@ export class WebsiteInfoScraperTool extends BaseTool { + private async scrapeWithRetry(chunkDoc: Doc, maxRetries = 2): Promise { let lastError = ''; let retryCount = 0; - + const url = WebCast(chunkDoc.data!)!.url.href; + console.log(url); + console.log(chunkDoc); + console.log(chunkDoc.data); // Validate URL format try { new URL(url); // This will throw if URL is invalid @@ -110,7 +115,6 @@ export class WebsiteInfoScraperTool extends BaseTool\n${website_plain_text}\nNote: Limited content was retrieved from this URL.\n`, + text: `\n${website_plain_text}\nNote: Limited content was retrieved from this URL.\n`, } as Observation; } @@ -138,7 +142,7 @@ export class WebsiteInfoScraperTool extends BaseTool\n${website_plain_text}\n`, + text: `\n${website_plain_text}\n`, } as Observation; } catch (error) { lastError = error instanceof Error ? error.message : 'Unknown error'; @@ -156,10 +160,10 @@ export class WebsiteInfoScraperTool extends BaseTool): Promise { - const urls = args.urls; + const chunk_ids = args.chunk_ids; // Create an array of promises, each one handling a website scrape for a URL - const scrapingPromises = urls.map(url => this.scrapeWithRetry(url)); + const scrapingPromises = chunk_ids.map(chunk_id => this.scrapeWithRetry(this._docManager.getDocument(chunk_id)!)); // Wait for all scraping promises to resolve const results = await Promise.all(scrapingPromises); diff --git a/src/client/views/nodes/chatbot/tools/WikipediaTool.ts b/src/client/views/nodes/chatbot/tools/WikipediaTool.ts index ee815532a..ec5d83e52 100644 --- a/src/client/views/nodes/chatbot/tools/WikipediaTool.ts +++ b/src/client/views/nodes/chatbot/tools/WikipediaTool.ts @@ -32,7 +32,7 @@ export class WikipediaTool extends BaseTool { async execute(args: ParametersType): Promise { try { - const { text } = await Networking.PostToServer('/getWikipediaSummary', { title: args.title }); + const { text } = (await Networking.PostToServer('/getWikipediaSummary', { title: args.title })) as { text: string }; const id = uuidv4(); const url = `https://en.wikipedia.org/wiki/${args.title.replace(/ /g, '_')}`; this._addLinkedUrlDoc(url, id); -- cgit v1.2.3-70-g09d2