diff options
Diffstat (limited to 'src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts')
| -rw-r--r-- | src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts | 52 |
1 files changed, 30 insertions, 22 deletions
diff --git a/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts index c59afefbd..739329bea 100644 --- a/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts +++ b/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts @@ -2,43 +2,51 @@ import { Networking } from '../../../../Network'; import { BaseTool } from './BaseTool'; import { v4 as uuidv4 } from 'uuid'; -export class WebsiteInfoScraperTool extends BaseTool<{ url: string }> { +export class WebsiteInfoScraperTool extends BaseTool<{ url: string | string[] }> { private _addLinkedUrlDoc: (url: string, id: string) => void; constructor(addLinkedUrlDoc: (url: string, id: string) => void) { super( 'websiteInfoScraper', - 'Scrape detailed information from a specific website identified as the most relevant', + 'Scrape detailed information from specific websites identified as relevant to the user query', { url: { type: 'string', - description: 'The URL of the website to scrape', - required: true, + description: 'The URL(s) of the website(s) to scrape', + required: 'true', + max_inputs: '3', }, }, - 'Provide the URL of the website that you have identified as the most relevant from the previous search. This tool will scrape and process detailed information from that specific website. It will also create a document from the scraped content for future reference.', - 'Returns the full HTML content from the provided URL and creates a document from the content for further analysis.' + 'Provide up to 3 URLs of websites that you have identified as the most relevant from the previous search. This tool will provide screenshots of those specific websites. It will also create a document from the scraped content for future reference. When providing a response to the user, ideally reference as many of the websites as possible in order to provide a well grounded result.', + 'Returns the full content of the webpages as images for analysis.' ); this._addLinkedUrlDoc = addLinkedUrlDoc; } - async execute(args: { url: string }): Promise<any> { - try { - const { website_image_base64 } = await Networking.PostToServer('/scrapeWebsite', { url: args.url }); - const id = uuidv4(); - this._addLinkedUrlDoc(args.url, id); - return [ - { type: 'text', text: `<chunk chunk_id=${id} chunk_type=url> ` }, - { - type: 'image_url', - image_url: { - url: `data:image/jpeg;base64,${website_image_base64}`, + async execute(args: { url: string | string[] }): Promise<any> { + const urls = Array.isArray(args.url) ? args.url : [args.url]; + const results = []; + + for (const url of urls) { + try { + const { website_image_base64 } = await Networking.PostToServer('/scrapeWebsite', { url }); + const id = uuidv4(); + this._addLinkedUrlDoc(url, id); + results.push( + { type: 'text', text: `<chunk chunk_id=${id} chunk_type=url> ` }, + { + type: 'image_url', + image_url: { + url: `data:image/jpeg;base64,${website_image_base64}`, + }, }, - }, - { type: 'text', text: `</chunk>\n` }, - ]; - } catch (error) { - return [{ type: 'text', text: 'An error occurred while scraping the website.' }]; + { type: 'text', text: `</chunk>\n` } + ); + } catch (error) { + results.push({ type: 'text', text: `An error occurred while scraping the website: ${url}` }); + } } + + return results; } } |
