diff options
| author | bobzel <zzzman@gmail.com> | 2024-10-17 17:19:25 -0400 |
|---|---|---|
| committer | bobzel <zzzman@gmail.com> | 2024-10-17 17:19:25 -0400 |
| commit | 8ac260db2fdffc37ff9b6e91971f287df6a70528 (patch) | |
| tree | c4bad3d44cb4c374b84834a39f5fc664345784f7 /src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts | |
| parent | 3067940f28563d1217056f6eb428d377365077a8 (diff) | |
| parent | dd93f5175064850c6c0e47f025cd7bbba1f23106 (diff) | |
Merge branch 'master' into alyssa-starter
Diffstat (limited to 'src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts')
| -rw-r--r-- | src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts | 114 |
1 files changed, 65 insertions, 49 deletions
diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts index 1efb389b8..f2e3863a6 100644 --- a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts +++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts @@ -1,83 +1,99 @@ import { v4 as uuidv4 } from 'uuid'; import { Networking } from '../../../../Network'; import { BaseTool } from './BaseTool'; +import { Observation } from '../types/types'; +import { ParametersType } from './ToolTypes'; -export class WebsiteInfoScraperTool extends BaseTool<{ url: string | string[] }> { +const websiteInfoScraperToolParams = [ + { + name: 'urls', + type: 'string[]', + description: 'The URLs of the websites to scrape', + required: true, + max_inputs: 3, + }, +] as const; + +type WebsiteInfoScraperToolParamsType = typeof websiteInfoScraperToolParams; + +export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParamsType> { private _addLinkedUrlDoc: (url: string, id: string) => void; constructor(addLinkedUrlDoc: (url: string, id: string) => void) { super( 'websiteInfoScraper', 'Scrape detailed information from specific websites relevant to the user query', - { - url: { - type: 'string', - description: 'The URL(s) of the website(s) to scrape', - required: true, - max_inputs: 3, - }, - }, + websiteInfoScraperToolParams, ` - Your task is to provide a comprehensive response to the user's prompt using the content scraped from relevant websites. Ensure you follow these guidelines for structuring your response: + Your task is to provide a comprehensive response to the user's prompt using the content scraped from relevant websites. Ensure you follow these guidelines for structuring your response: - 1. Grounded Text Tag Structure: - - Wrap all text derived from the scraped website(s) in <grounded_text> tags. - - **Do not include non-sourced information** in <grounded_text> tags. - - Use a single <grounded_text> tag for content derived from a single website. If citing multiple websites, create new <grounded_text> tags for each. - - Ensure each <grounded_text> tag has a citation index corresponding to the scraped URL. + 1. Grounded Text Tag Structure: + - Wrap all text derived from the scraped website(s) in <grounded_text> tags. + - **Do not include non-sourced information** in <grounded_text> tags. + - Use a single <grounded_text> tag for content derived from a single website. If citing multiple websites, create new <grounded_text> tags for each. + - Ensure each <grounded_text> tag has a citation index corresponding to the scraped URL. - 2. Citation Tag Structure: - - Create a <citation> tag for each distinct piece of information used from the website(s). - - Each <citation> tag must reference a URL chunk using the chunk_id attribute. - - For URL-based citations, leave the citation content empty, but reference the chunk_id and type as 'url'. + 2. Citation Tag Structure: + - Create a <citation> tag for each distinct piece of information used from the website(s). + - Each <citation> tag must reference a URL chunk using the chunk_id attribute. + - For URL-based citations, leave the citation content empty, but reference the chunk_id and type as 'url'. - 3. Structural Integrity Checks: - - Ensure all opening and closing tags are matched properly. - - Verify that all citation_index attributes in <grounded_text> tags correspond to valid citations. - - Do not over-cite—cite only the most relevant parts of the websites. + 3. Structural Integrity Checks: + - Ensure all opening and closing tags are matched properly. + - Verify that all citation_index attributes in <grounded_text> tags correspond to valid citations. + - Do not over-cite—cite only the most relevant parts of the websites. - Example Usage: + Example Usage: - <answer> - <grounded_text citation_index="1"> - Based on data from the World Bank, economic growth has stabilized in recent years, following a surge in investments. - </grounded_text> - <grounded_text citation_index="2"> - According to information retrieved from the International Monetary Fund, the inflation rate has been gradually decreasing since 2020. - </grounded_text> + <answer> + <grounded_text citation_index="1"> + Based on data from the World Bank, economic growth has stabilized in recent years, following a surge in investments. + </grounded_text> + <grounded_text citation_index="2"> + According to information retrieved from the International Monetary Fund, the inflation rate has been gradually decreasing since 2020. + </grounded_text> - <citations> - <citation index="1" chunk_id="1234" type="url"></citation> - <citation index="2" chunk_id="5678" type="url"></citation> - </citations> + <citations> + <citation index="1" chunk_id="1234" type="url"></citation> + <citation index="2" chunk_id="5678" type="url"></citation> + </citations> - <follow_up_questions> - <question>What are the long-term economic impacts of increased investments on GDP?</question> - <question>How might inflation trends affect future monetary policy?</question> - <question>Are there additional factors that could influence economic growth beyond investments and inflation?</question> - </follow_up_questions> - </answer> - `, + <follow_up_questions> + <question>What are the long-term economic impacts of increased investments on GDP?</question> + <question>How might inflation trends affect future monetary policy?</question> + <question>Are there additional factors that could influence economic growth beyond investments and inflation?</question> + </follow_up_questions> + </answer> + `, 'Returns the text content of the webpages for further analysis and grounding.' ); this._addLinkedUrlDoc = addLinkedUrlDoc; } - async execute(args: { url: string | string[] }): Promise<unknown> { - const urls = Array.isArray(args.url) ? args.url : [args.url]; - const results = []; + async execute(args: ParametersType<WebsiteInfoScraperToolParamsType>): Promise<Observation[]> { + const urls = args.urls; - for (const url of urls) { + // Create an array of promises, each one handling a website scrape for a URL + const scrapingPromises = urls.map(async url => { try { const { website_plain_text } = await Networking.PostToServer('/scrapeWebsite', { url }); const id = uuidv4(); this._addLinkedUrlDoc(url, id); - results.push({ type: 'text', text: `<chunk chunk_id=${id} chunk_type=url>\n${website_plain_text}\n</chunk>\n` }); + return { + type: 'text', + text: `<chunk chunk_id="${id}" chunk_type="url">\n${website_plain_text}\n</chunk>`, + } as Observation; } catch (error) { console.log(error); - results.push({ type: 'text', text: `An error occurred while scraping the website: ${url}` }); + return { + type: 'text', + text: `An error occurred while scraping the website: ${url}`, + } as Observation; } - } + }); + + // Wait for all scraping promises to resolve + const results = await Promise.all(scrapingPromises); return results; } |
