From e141307dbd9b951f76c908610e7b89e296ad92b8 Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Sun, 11 May 2025 17:18:18 -0400
Subject: chanegd everything to be more consistent

- made both web related tools use doc manager and chunk Ids
---
 .../nodes/chatbot/tools/DocumentMetadataTool.ts    |  2 +-
 src/client/views/nodes/chatbot/tools/SearchTool.ts | 26 ++++++++++---------
 .../nodes/chatbot/tools/WebsiteInfoScraperTool.ts  | 30 ++++++++++++----------
 .../views/nodes/chatbot/tools/WikipediaTool.ts     |  2 +-
 4 files changed, 33 insertions(+), 27 deletions(-)

(limited to 'src/client/views/nodes/chatbot/tools')
diff --git a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts
index 5297292bf..405949c1e 100644
--- a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts
+++ b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts
@@ -408,7 +408,7 @@ export class DocumentMetadataTool extends BaseTool<DocumentMetadataToolParamsTyp
                     const title = String(args.title);
                     const data = String(args.data);
 
-                    const id = this._docManager.createDocInDash(docType, data, { title: title });
+                    const id = await this._docManager.createDocInDash(docType, data, { title: title });
 
                     if (!id) {
                         return [
diff --git a/src/client/views/nodes/chatbot/tools/SearchTool.ts b/src/client/views/nodes/chatbot/tools/SearchTool.ts
index 53f5fc109..43f14ea83 100644
--- a/src/client/views/nodes/chatbot/tools/SearchTool.ts
+++ b/src/client/views/nodes/chatbot/tools/SearchTool.ts
@@ -48,19 +48,21 @@ export class SearchTool extends BaseTool<SearchToolParamsType> {
                     query,
                     max_results: this._max_results,
                 })) as { results: { url: string; snippet: string }[] };
-                const data = results.map((result: { url: string; snippet: string }) => {
-                    // Create a web document with the URL
-                    const id = this._docManager.createDocInDash('web', result.url, {
-                        title: `Search Result: ${result.url}`,
-                        text_html: result.snippet,
-                        data_useCors: true,
-                    });
+                const data = await Promise.all(
+                    results.map(async (result: { url: string; snippet: string }) => {
+                        // Create a web document with the URL
+                        const id = await this._docManager.createDocInDash('web', result.url, {
+                            title: `Search Result: ${result.url}`,
+                            text_html: result.snippet,
+                            data_useCors: true,
+                        });
 
-                    return {
-                        type: 'text' as const,
-                        text: `<chunk chunk_id="${id}" chunk_type="url"><url>${result.url}</url><overview>${result.snippet}</overview></chunk>`,
-                    };
-                });
+                        return {
+                            type: 'text' as const,
+                            text: `<chunk chunk_id="${id}" chunk_type="url"><url>${result.url}</url><overview>${result.snippet}</overview></chunk>`,
+                        };
+                    })
+                );
                 return data;
             } catch (error) {
                 console.log(error);
diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
index 3c7b4e3db..495a985cb 100644
--- a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
+++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
@@ -3,12 +3,14 @@ import { Networking } from '../../../../Network';
 import { BaseTool } from './BaseTool';
 import { Observation } from '../types/types';
 import { ParametersType, ToolInfo } from '../types/tool_types';
-
+import { AgentDocumentManager } from '../utils/AgentDocumentManager';
+import { Doc } from '../../../../../fields/Doc';
+import { StrCast, WebCast } from '../../../../../fields/Types';
 const websiteInfoScraperToolParams = [
     {
-        name: 'urls',
+        name: 'chunk_ids',
         type: 'string[]',
-        description: 'The URLs of the websites to scrape',
+        description: 'The chunk_ids of the urls to scrape from the SearchTool.',
         required: true,
         max_inputs: 3,
     },
@@ -66,11 +68,11 @@ const websiteInfoScraperToolInfo: ToolInfo<WebsiteInfoScraperToolParamsType> = {
 };
 
 export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParamsType> {
-    private _getLinkedUrlDocId: (url: string) => string[];
+    private _docManager: AgentDocumentManager;
 
-    constructor(getLinkedUrlDocIds: (url: string) => string[]) {
+    constructor(docManager: AgentDocumentManager) {
         super(websiteInfoScraperToolInfo);
-        this._getLinkedUrlDocId = getLinkedUrlDocIds;
+        this._docManager = docManager;
     }
 
     /**
@@ -79,10 +81,13 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
      * @param maxRetries Maximum number of retry attempts
      * @returns The scraped content or error message
      */
-    private async scrapeWithRetry(url: string, maxRetries = 2): Promise<Observation> {
+    private async scrapeWithRetry(chunkDoc: Doc, maxRetries = 2): Promise<Observation> {
         let lastError = '';
         let retryCount = 0;
-
+        const url = WebCast(chunkDoc.data!)!.url.href;
+        console.log(url);
+        console.log(chunkDoc);
+        console.log(chunkDoc.data);
         // Validate URL format
         try {
             new URL(url); // This will throw if URL is invalid
@@ -110,7 +115,6 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
                 }
 
                 const { website_plain_text } = response as { website_plain_text: string };
-                const id = this._getLinkedUrlDocId(url);
 
                 // Validate content quality
                 if (!website_plain_text) {
@@ -126,7 +130,7 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
                     if (retryCount === maxRetries) {
                         return {
                             type: 'text',
-                            text: `<chunk chunk_id="${id}" chunk_type="url">\n${website_plain_text}\nNote: Limited content was retrieved from this URL.\n</chunk>`,
+                            text: `<chunk chunk_id="${chunkDoc.id}" chunk_type="url">\n${website_plain_text}\nNote: Limited content was retrieved from this URL.\n</chunk>`,
                         } as Observation;
                     }
 
@@ -138,7 +142,7 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
                 // Process and return content if it looks good
                 return {
                     type: 'text',
-                    text: `<chunk chunk_id="${id}" chunk_type="url">\n${website_plain_text}\n</chunk>`,
+                    text: `<chunk chunk_id="${chunkDoc.id}" chunk_type="url">\n${website_plain_text}\n</chunk>`,
                 } as Observation;
             } catch (error) {
                 lastError = error instanceof Error ? error.message : 'Unknown error';
@@ -156,10 +160,10 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
     }
 
     async execute(args: ParametersType<WebsiteInfoScraperToolParamsType>): Promise<Observation[]> {
-        const urls = args.urls;
+        const chunk_ids = args.chunk_ids;
 
         // Create an array of promises, each one handling a website scrape for a URL
-        const scrapingPromises = urls.map(url => this.scrapeWithRetry(url));
+        const scrapingPromises = chunk_ids.map(chunk_id => this.scrapeWithRetry(this._docManager.getDocument(chunk_id)!));
 
         // Wait for all scraping promises to resolve
         const results = await Promise.all(scrapingPromises);
diff --git a/src/client/views/nodes/chatbot/tools/WikipediaTool.ts b/src/client/views/nodes/chatbot/tools/WikipediaTool.ts
index ee815532a..ec5d83e52 100644
--- a/src/client/views/nodes/chatbot/tools/WikipediaTool.ts
+++ b/src/client/views/nodes/chatbot/tools/WikipediaTool.ts
@@ -32,7 +32,7 @@ export class WikipediaTool extends BaseTool<WikipediaToolParamsType> {
 
     async execute(args: ParametersType<WikipediaToolParamsType>): Promise<Observation[]> {
         try {
-            const { text } = await Networking.PostToServer('/getWikipediaSummary', { title: args.title });
+            const { text } = (await Networking.PostToServer('/getWikipediaSummary', { title: args.title })) as { text: string };
             const id = uuidv4();
             const url = `https://en.wikipedia.org/wiki/${args.title.replace(/ /g, '_')}`;
             this._addLinkedUrlDoc(url, id);
-- 
cgit v1.2.3-70-g09d2