From 4791cd23af08da70895204a3a7fbaf889d9af2d5 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Sat, 7 Sep 2024 12:43:05 -0400 Subject: completely restructured, added comments, and significantly reduced the length of the prompt (~72% shorter and cheaper) --- .../nodes/chatbot/tools/WebsiteInfoScraperTool.ts | 83 ++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts (limited to 'src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts') diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts new file mode 100644 index 000000000..2118218f6 --- /dev/null +++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts @@ -0,0 +1,83 @@ +import { Networking } from '../../../../Network'; +import { BaseTool } from './BaseTool'; +import { v4 as uuidv4 } from 'uuid'; + +export class WebsiteInfoScraperTool extends BaseTool<{ url: string | string[] }> { + private _addLinkedUrlDoc: (url: string, id: string) => void; + + constructor(addLinkedUrlDoc: (url: string, id: string) => void) { + super( + 'websiteInfoScraper', + 'Scrape detailed information from specific websites relevant to the user query', + { + url: { + type: 'string', + description: 'The URL(s) of the website(s) to scrape', + required: true, + max_inputs: 3, + }, + }, + ` + Your task is to provide a comprehensive response to the user's prompt using the content scraped from relevant websites. Ensure you follow these guidelines for structuring your response: + + 1. Grounded Text Tag Structure: + - Wrap all text derived from the scraped website(s) in tags. + - **Do not include non-sourced information** in tags. + - Use a single tag for content derived from a single website. If citing multiple websites, create new tags for each. + - Ensure each tag has a citation index corresponding to the scraped URL. + + 2. Citation Tag Structure: + - Create a tag for each distinct piece of information used from the website(s). + - Each tag must reference a URL chunk using the chunk_id attribute. + - For URL-based citations, leave the citation content empty, but reference the chunk_id and type as 'url'. + + 3. Structural Integrity Checks: + - Ensure all opening and closing tags are matched properly. + - Verify that all citation_index attributes in tags correspond to valid citations. + - Do not over-cite—cite only the most relevant parts of the websites. + + Example Usage: + + + + Based on data from the World Bank, economic growth has stabilized in recent years, following a surge in investments. + + + According to information retrieved from the International Monetary Fund, the inflation rate has been gradually decreasing since 2020. + + + + + + + + + What are the long-term economic impacts of increased investments on GDP? + How might inflation trends affect future monetary policy? + Are there additional factors that could influence economic growth beyond investments and inflation? + + + `, + 'Returns the text content of the webpages for further analysis and grounding.' + ); + this._addLinkedUrlDoc = addLinkedUrlDoc; + } + + async execute(args: { url: string | string[] }): Promise { + const urls = Array.isArray(args.url) ? args.url : [args.url]; + const results = []; + + for (const url of urls) { + try { + const { website_plain_text } = await Networking.PostToServer('/scrapeWebsite', { url }); + const id = uuidv4(); + this._addLinkedUrlDoc(url, id); + results.push({ type: 'text', text: `\n${website_plain_text}\n\n` }); + } catch (error) { + results.push({ type: 'text', text: `An error occurred while scraping the website: ${url}` }); + } + } + + return results; + } +} -- cgit v1.2.3-70-g09d2