Merge branch 'master' into alyssa-starter

author: bobzel <zzzman@gmail.com> 2024-10-17 17:19:25 -0400
committer: bobzel <zzzman@gmail.com> 2024-10-17 17:19:25 -0400
commit: 8ac260db2fdffc37ff9b6e91971f287df6a70528 (patch)
tree: c4bad3d44cb4c374b84834a39f5fc664345784f7 /src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
parent: 3067940f28563d1217056f6eb428d377365077a8 (diff)
parent: dd93f5175064850c6c0e47f025cd7bbba1f23106 (diff)
1 files changed, 65 insertions, 49 deletions
diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
index 1efb389b8..f2e3863a6 100644
--- a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
+++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
@@ -1,83 +1,99 @@
 import { v4 as uuidv4 } from 'uuid';
 import { Networking } from '../../../../Network';
 import { BaseTool } from './BaseTool';
+import { Observation } from '../types/types';
+import { ParametersType } from './ToolTypes';
 
-export class WebsiteInfoScraperTool extends BaseTool<{ url: string | string[] }> {
+const websiteInfoScraperToolParams = [
+    {
+        name: 'urls',
+        type: 'string[]',
+        description: 'The URLs of the websites to scrape',
+        required: true,
+        max_inputs: 3,
+    },
+] as const;
+
+type WebsiteInfoScraperToolParamsType = typeof websiteInfoScraperToolParams;
+
+export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParamsType> {
     private _addLinkedUrlDoc: (url: string, id: string) => void;
 
     constructor(addLinkedUrlDoc: (url: string, id: string) => void) {
         super(
             'websiteInfoScraper',
             'Scrape detailed information from specific websites relevant to the user query',
-            {
-                url: {
-                    type: 'string',
-                    description: 'The URL(s) of the website(s) to scrape',
-                    required: true,
-                    max_inputs: 3,
-                },
-            },
+            websiteInfoScraperToolParams,
             `
-            Your task is to provide a comprehensive response to the user's prompt using the content scraped from relevant websites. Ensure you follow these guidelines for structuring your response:
+      Your task is to provide a comprehensive response to the user's prompt using the content scraped from relevant websites. Ensure you follow these guidelines for structuring your response:
 
-            1. Grounded Text Tag Structure:
-               - Wrap all text derived from the scraped website(s) in <grounded_text> tags.
-               - **Do not include non-sourced information** in <grounded_text> tags.
-               - Use a single <grounded_text> tag for content derived from a single website. If citing multiple websites, create new <grounded_text> tags for each.
-               - Ensure each <grounded_text> tag has a citation index corresponding to the scraped URL.
+      1. Grounded Text Tag Structure:
+         - Wrap all text derived from the scraped website(s) in <grounded_text> tags.
+         - **Do not include non-sourced information** in <grounded_text> tags.
+         - Use a single <grounded_text> tag for content derived from a single website. If citing multiple websites, create new <grounded_text> tags for each.
+         - Ensure each <grounded_text> tag has a citation index corresponding to the scraped URL.
 
-            2. Citation Tag Structure:
-               - Create a <citation> tag for each distinct piece of information used from the website(s).
-               - Each <citation> tag must reference a URL chunk using the chunk_id attribute.
-               - For URL-based citations, leave the citation content empty, but reference the chunk_id and type as 'url'.
+      2. Citation Tag Structure:
+         - Create a <citation> tag for each distinct piece of information used from the website(s).
+         - Each <citation> tag must reference a URL chunk using the chunk_id attribute.
+         - For URL-based citations, leave the citation content empty, but reference the chunk_id and type as 'url'.
 
-            3. Structural Integrity Checks:
-               - Ensure all opening and closing tags are matched properly.
-               - Verify that all citation_index attributes in <grounded_text> tags correspond to valid citations.
-               - Do not over-cite—cite only the most relevant parts of the websites.
+      3. Structural Integrity Checks:
+         - Ensure all opening and closing tags are matched properly.
+         - Verify that all citation_index attributes in <grounded_text> tags correspond to valid citations.
+         - Do not over-cite—cite only the most relevant parts of the websites.
 
-            Example Usage:
+      Example Usage:
 
-            <answer>
-                <grounded_text citation_index="1">
-                Based on data from the World Bank, economic growth has stabilized in recent years, following a surge in investments.
-                </grounded_text>
-                <grounded_text citation_index="2">
-                According to information retrieved from the International Monetary Fund, the inflation rate has been gradually decreasing since 2020.
-                </grounded_text>
+      <answer>
+          <grounded_text citation_index="1">
+          Based on data from the World Bank, economic growth has stabilized in recent years, following a surge in investments.
+          </grounded_text>
+          <grounded_text citation_index="2">
+          According to information retrieved from the International Monetary Fund, the inflation rate has been gradually decreasing since 2020.
+          </grounded_text>
 
-                <citations>
-                    <citation index="1" chunk_id="1234" type="url"></citation>
-                    <citation index="2" chunk_id="5678" type="url"></citation>
-                </citations>
+          <citations>
+              <citation index="1" chunk_id="1234" type="url"></citation>
+              <citation index="2" chunk_id="5678" type="url"></citation>
+          </citations>
 
-                <follow_up_questions>
-                    <question>What are the long-term economic impacts of increased investments on GDP?</question>
-                    <question>How might inflation trends affect future monetary policy?</question>
-                    <question>Are there additional factors that could influence economic growth beyond investments and inflation?</question>
-                </follow_up_questions>
-            </answer>
-            `,
+          <follow_up_questions>
+              <question>What are the long-term economic impacts of increased investments on GDP?</question>
+              <question>How might inflation trends affect future monetary policy?</question>
+              <question>Are there additional factors that could influence economic growth beyond investments and inflation?</question>
+          </follow_up_questions>
+      </answer>
+      `,
             'Returns the text content of the webpages for further analysis and grounding.'
         );
         this._addLinkedUrlDoc = addLinkedUrlDoc;
     }
 
-    async execute(args: { url: string | string[] }): Promise<unknown> {
-        const urls = Array.isArray(args.url) ? args.url : [args.url];
-        const results = [];
+    async execute(args: ParametersType<WebsiteInfoScraperToolParamsType>): Promise<Observation[]> {
+        const urls = args.urls;
 
-        for (const url of urls) {
+        // Create an array of promises, each one handling a website scrape for a URL
+        const scrapingPromises = urls.map(async url => {
             try {
                 const { website_plain_text } = await Networking.PostToServer('/scrapeWebsite', { url });
                 const id = uuidv4();
                 this._addLinkedUrlDoc(url, id);
-                results.push({ type: 'text', text: `<chunk chunk_id=${id} chunk_type=url>\n${website_plain_text}\n</chunk>\n` });
+                return {
+                    type: 'text',
+                    text: `<chunk chunk_id="${id}" chunk_type="url">\n${website_plain_text}\n</chunk>`,
+                } as Observation;
             } catch (error) {
                 console.log(error);
-                results.push({ type: 'text', text: `An error occurred while scraping the website: ${url}` });
+                return {
+                    type: 'text',
+                    text: `An error occurred while scraping the website: ${url}`,
+                } as Observation;
             }
-        }
+        });
+
+        // Wait for all scraping promises to resolve
+        const results = await Promise.all(scrapingPromises);
 
         return results;
     }
author	bobzel <zzzman@gmail.com>	2024-10-17 17:19:25 -0400
committer	bobzel <zzzman@gmail.com>	2024-10-17 17:19:25 -0400
commit	8ac260db2fdffc37ff9b6e91971f287df6a70528 (patch)
tree	c4bad3d44cb4c374b84834a39f5fc664345784f7 /src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
parent	3067940f28563d1217056f6eb428d377365077a8 (diff)
parent	dd93f5175064850c6c0e47f025cd7bbba1f23106 (diff)