aboutsummaryrefslogtreecommitdiff
path: root/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
diff options
context:
space:
mode:
authorA.J. Shulman <Shulman.aj@gmail.com>2024-10-17 10:41:49 -0400
committerA.J. Shulman <Shulman.aj@gmail.com>2024-10-17 10:41:49 -0400
commit80d86bd5ae3e1d3dc70e7636f72a872a5fb2f01d (patch)
tree0eaea49f596bd16720f05a6535958ab8270673c8 /src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
parent596502c232ea6b6b88c3c58486e139074ea056ff (diff)
Implemented strict typechecking for tools, specifically tool inputs
Diffstat (limited to 'src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts')
-rw-r--r--src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts104
1 files changed, 58 insertions, 46 deletions
diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
index 8a4181b43..e91ebdad1 100644
--- a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
+++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
@@ -2,70 +2,76 @@ import { v4 as uuidv4 } from 'uuid';
import { Networking } from '../../../../Network';
import { BaseTool } from './BaseTool';
import { Observation } from '../types/types';
+import { ParametersType } from './ToolTypes';
-export class WebsiteInfoScraperTool extends BaseTool<{ url: string | string[] }> {
+const websiteInfoScraperToolParams = [
+ {
+ name: 'urls',
+ type: 'string[]',
+ description: 'The URLs of the websites to scrape',
+ required: true,
+ max_inputs: 3,
+ },
+] as const;
+
+type WebsiteInfoScraperToolParamsType = typeof websiteInfoScraperToolParams;
+
+export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParamsType> {
private _addLinkedUrlDoc: (url: string, id: string) => void;
constructor(addLinkedUrlDoc: (url: string, id: string) => void) {
super(
'websiteInfoScraper',
'Scrape detailed information from specific websites relevant to the user query',
- {
- url: {
- type: 'string',
- description: 'The URL(s) of the website(s) to scrape',
- required: true,
- max_inputs: 3,
- },
- },
+ websiteInfoScraperToolParams,
`
- Your task is to provide a comprehensive response to the user's prompt using the content scraped from relevant websites. Ensure you follow these guidelines for structuring your response:
+ Your task is to provide a comprehensive response to the user's prompt using the content scraped from relevant websites. Ensure you follow these guidelines for structuring your response:
- 1. Grounded Text Tag Structure:
- - Wrap all text derived from the scraped website(s) in <grounded_text> tags.
- - **Do not include non-sourced information** in <grounded_text> tags.
- - Use a single <grounded_text> tag for content derived from a single website. If citing multiple websites, create new <grounded_text> tags for each.
- - Ensure each <grounded_text> tag has a citation index corresponding to the scraped URL.
+ 1. Grounded Text Tag Structure:
+ - Wrap all text derived from the scraped website(s) in <grounded_text> tags.
+ - **Do not include non-sourced information** in <grounded_text> tags.
+ - Use a single <grounded_text> tag for content derived from a single website. If citing multiple websites, create new <grounded_text> tags for each.
+ - Ensure each <grounded_text> tag has a citation index corresponding to the scraped URL.
- 2. Citation Tag Structure:
- - Create a <citation> tag for each distinct piece of information used from the website(s).
- - Each <citation> tag must reference a URL chunk using the chunk_id attribute.
- - For URL-based citations, leave the citation content empty, but reference the chunk_id and type as 'url'.
+ 2. Citation Tag Structure:
+ - Create a <citation> tag for each distinct piece of information used from the website(s).
+ - Each <citation> tag must reference a URL chunk using the chunk_id attribute.
+ - For URL-based citations, leave the citation content empty, but reference the chunk_id and type as 'url'.
- 3. Structural Integrity Checks:
- - Ensure all opening and closing tags are matched properly.
- - Verify that all citation_index attributes in <grounded_text> tags correspond to valid citations.
- - Do not over-cite—cite only the most relevant parts of the websites.
+ 3. Structural Integrity Checks:
+ - Ensure all opening and closing tags are matched properly.
+ - Verify that all citation_index attributes in <grounded_text> tags correspond to valid citations.
+ - Do not over-cite—cite only the most relevant parts of the websites.
- Example Usage:
+ Example Usage:
- <answer>
- <grounded_text citation_index="1">
- Based on data from the World Bank, economic growth has stabilized in recent years, following a surge in investments.
- </grounded_text>
- <grounded_text citation_index="2">
- According to information retrieved from the International Monetary Fund, the inflation rate has been gradually decreasing since 2020.
- </grounded_text>
+ <answer>
+ <grounded_text citation_index="1">
+ Based on data from the World Bank, economic growth has stabilized in recent years, following a surge in investments.
+ </grounded_text>
+ <grounded_text citation_index="2">
+ According to information retrieved from the International Monetary Fund, the inflation rate has been gradually decreasing since 2020.
+ </grounded_text>
- <citations>
- <citation index="1" chunk_id="1234" type="url"></citation>
- <citation index="2" chunk_id="5678" type="url"></citation>
- </citations>
+ <citations>
+ <citation index="1" chunk_id="1234" type="url"></citation>
+ <citation index="2" chunk_id="5678" type="url"></citation>
+ </citations>
- <follow_up_questions>
- <question>What are the long-term economic impacts of increased investments on GDP?</question>
- <question>How might inflation trends affect future monetary policy?</question>
- <question>Are there additional factors that could influence economic growth beyond investments and inflation?</question>
- </follow_up_questions>
- </answer>
- `,
+ <follow_up_questions>
+ <question>What are the long-term economic impacts of increased investments on GDP?</question>
+ <question>How might inflation trends affect future monetary policy?</question>
+ <question>Are there additional factors that could influence economic growth beyond investments and inflation?</question>
+ </follow_up_questions>
+ </answer>
+ `,
'Returns the text content of the webpages for further analysis and grounding.'
);
this._addLinkedUrlDoc = addLinkedUrlDoc;
}
- async execute(args: { url: string | string[] }): Promise<Observation[]> {
- const urls = Array.isArray(args.url) ? args.url : [args.url];
+ async execute(args: ParametersType<WebsiteInfoScraperToolParamsType>): Promise<Observation[]> {
+ const urls = args.urls;
const results: Observation[] = [];
for (const url of urls) {
@@ -73,10 +79,16 @@ export class WebsiteInfoScraperTool extends BaseTool<{ url: string | string[] }>
const { website_plain_text } = await Networking.PostToServer('/scrapeWebsite', { url });
const id = uuidv4();
this._addLinkedUrlDoc(url, id);
- results.push({ type: 'text', text: `<chunk chunk_id=${id} chunk_type=url>\n${website_plain_text}\n</chunk>` });
+ results.push({
+ type: 'text',
+ text: `<chunk chunk_id="${id}" chunk_type="url">\n${website_plain_text}\n</chunk>`,
+ });
} catch (error) {
console.log(error);
- results.push({ type: 'text', text: `An error occurred while scraping the website: ${url}` });
+ results.push({
+ type: 'text',
+ text: `An error occurred while scraping the website: ${url}`,
+ });
}
}