From e8b724c22bed4b6ed01e34ba661228c348f50378 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Tue, 22 Oct 2024 13:47:46 -0400 Subject: fixed websearch tool endpoint so it only returns displayable results in Dash; also fixed type checking but needs to be improved --- .../views/nodes/chatbot/agentsystem/Agent.ts | 87 +++++++++++----------- .../views/nodes/chatbot/agentsystem/prompts.ts | 40 ++++++++-- .../nodes/chatbot/chatboxcomponents/ChatBox.tsx | 25 +------ src/client/views/nodes/chatbot/tools/SearchTool.ts | 7 +- src/client/views/nodes/chatbot/types/tool_types.ts | 2 +- src/client/views/nodes/chatbot/types/types.ts | 1 - src/server/ApiManagers/AssistantManager.ts | 64 ++++++++++++++-- 7 files changed, 144 insertions(+), 82 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts index 9253175d5..870abbc47 100644 --- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts +++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts @@ -15,7 +15,7 @@ import { AgentMessage, AssistantMessage, Observation, PROCESSING_TYPE, Processin import { Vectorstore } from '../vectorstore/Vectorstore'; import { getReactPrompt } from './prompts'; import { BaseTool } from '../tools/BaseTool'; -import { Parameter, ParametersType } from '../types/tool_types'; +import { Parameter, ParametersType, TypeMap } from '../types/tool_types'; import { CreateTextDocTool } from '../tools/CreateTextDocumentTool'; import { DocumentOptions } from '../../../../documents/Documents'; @@ -267,12 +267,36 @@ export class Agent { return fullResponse; } + /** + * Helper function to check if a string can be parsed as an array of the expected type. + * @param input The input string to check. + * @param expectedType The expected type of the array elements ('string', 'number', or 'boolean'). + * @returns The parsed array if valid, otherwise throws an error. + */ + private parseArray(input: string, expectedType: 'string' | 'number' | 'boolean'): T[] { + try { + // Parse the input string into a JSON object + const parsed = JSON.parse(input); + + // Check if the parsed object is an array and if all elements are of the expected type + if (Array.isArray(parsed) && parsed.every(item => typeof item === expectedType)) { + return parsed; + } else { + throw new Error(`Invalid ${expectedType} array format.`); + } + } catch (error) { + throw new Error(`Failed to parse ${expectedType} array: ` + error); + } + } + /** * Processes a specific action by invoking the appropriate tool with the provided inputs. * This method ensures that the action exists and validates the types of `actionInput` * based on the tool's parameter rules. It throws errors for missing required parameters * or mismatched types before safely executing the tool with the validated input. * + * NOTE: In the future, it should typecheck for specific tool parameter types using the `TypeMap` or otherwise. + * * Type validation includes checks for: * - `string`, `number`, `boolean` * - `string[]`, `number[]` (arrays of strings or numbers) @@ -282,56 +306,35 @@ export class Agent { * @returns A promise that resolves to an array of `Observation` objects representing the result of the action. * @throws An error if the action is unknown, if required parameters are missing, or if input types don't match the expected parameter types. */ - private async processAction(action: string, actionInput: Record): Promise { + private async processAction(action: string, actionInput: ParametersType>): Promise { // Check if the action exists in the tools list if (!(action in this.tools)) { throw new Error(`Unknown action: ${action}`); } + console.log(actionInput); - const tool = this.tools[action]; - - // Validate actionInput based on tool's parameter rules - for (const paramRule of tool.parameterRules) { - const inputValue = actionInput[paramRule.name]; - - if (paramRule.required && inputValue === undefined) { - throw new Error(`Missing required parameter: ${paramRule.name}`); + for (const param of this.tools[action].parameterRules) { + // Check if the parameter is required and missing in the input + if (param.required && !(param.name in actionInput)) { + throw new Error(`Missing required parameter: ${param.name}`); } - // If the parameter is defined, check its type - if (inputValue !== undefined) { - switch (paramRule.type) { - case 'string': - if (typeof inputValue !== 'string') { - throw new Error(`Expected parameter '${paramRule.name}' to be a string.`); - } - break; - case 'number': - if (typeof inputValue !== 'number') { - throw new Error(`Expected parameter '${paramRule.name}' to be a number.`); - } - break; - case 'boolean': - if (typeof inputValue !== 'boolean') { - throw new Error(`Expected parameter '${paramRule.name}' to be a boolean.`); - } - break; - case 'string[]': - if (!Array.isArray(inputValue) || !inputValue.every(item => typeof item === 'string')) { - throw new Error(`Expected parameter '${paramRule.name}' to be an array of strings.`); - } - break; - case 'number[]': - if (!Array.isArray(inputValue) || !inputValue.every(item => typeof item === 'number')) { - throw new Error(`Expected parameter '${paramRule.name}' to be an array of numbers.`); - } - break; - default: - throw new Error(`Unsupported parameter type: ${paramRule.type}`); - } + // Check if the parameter type matches the expected type + const expectedType = param.type.replace('[]', '') as 'string' | 'number' | 'boolean'; + const isArray = param.type.endsWith('[]'); + const input = actionInput[param.name]; + + if (isArray) { + // Check if the input is a valid array of the expected type + const parsedArray = this.parseArray(input as string, expectedType); + actionInput[param.name] = parsedArray as TypeMap[typeof param.type]; + } else if (typeof input !== expectedType) { + throw new Error(`Invalid type for parameter ${param.name}: expected ${expectedType}`); } } - return await tool.execute(actionInput as ParametersType); + const tool = this.tools[action]; + + return await tool.execute(actionInput); } } diff --git a/src/client/views/nodes/chatbot/agentsystem/prompts.ts b/src/client/views/nodes/chatbot/agentsystem/prompts.ts index f5aec3130..140587b2f 100644 --- a/src/client/views/nodes/chatbot/agentsystem/prompts.ts +++ b/src/client/views/nodes/chatbot/agentsystem/prompts.ts @@ -7,9 +7,10 @@ * and summarizing content from provided text chunks. */ -import { Tool } from '../types/types'; +import { BaseTool } from '../tools/BaseTool'; +import { Parameter } from '../types/tool_types'; -export function getReactPrompt(tools: Tool[], summaries: () => string, chatHistory: string): string { +export function getReactPrompt(tools: BaseTool>[], summaries: () => string, chatHistory: string): string { const toolDescriptions = tools .map( tool => ` @@ -143,9 +144,9 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto - With key moments from the World Cup retrieved, I will now use the website scraper tool to gather data on Qatar's tourism impact during the World Cup. + With key moments from the World Cup retrieved, I will now use the search tool to gather data on Qatar's tourism impact during the World Cup. - websiteInfoScraper + searchTool @@ -156,7 +157,7 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto Scraping websites for information about Qatar's tourism impact during the 2022 World Cup. - Tourism impact of the 2022 World Cup in Qatar + ["Tourism impact of the 2022 World Cup in Qatar"] @@ -167,10 +168,39 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto https://www.qatartourism.com/world-cup-impact During the 2022 World Cup, Qatar saw a 40% increase in tourism, with over 1.5 million visitors attending. + ***Additional URLs and overviews omitted*** + + After retrieving the urls of relevant sites, I will now use the website scraping tool to gather data on Qatar's tourism impact during the World Cup from these sites. + websiteInfoScraper + + + + ***Action rules omitted*** + + + + + Getting information from the relevant websites about Qatar's tourism impact during the World Cup. + + [***URLS to search elided, but they will be comma seperated double quoted strings"] + + + + + + + + ***Data from the websites scraped*** + + ***Additional scraped sites omitted*** + + + + Now that I have gathered both key moments from the World Cup and tourism impact data from Qatar, I will summarize the information in my final response. diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx index 98f242ebf..fcbaf2e27 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -355,29 +355,11 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { const linkDoc = Docs.Create.LinkDocument(this.Document, doc); LinkManager.Instance.addLink(linkDoc); - let canDisplay; - - try { - // Fetch the URL content through the proxy - const { data } = await Networking.PostToServer('/proxyFetch', { url }); - - // Simulating header behavior since we can't fetch headers via proxy - const xFrameOptions = data.headers?.['x-frame-options']; - - if (xFrameOptions && xFrameOptions.toUpperCase() === 'SAMEORIGIN') { - canDisplay = false; - } else { - canDisplay = true; - } - } catch (error) { - console.error('Error fetching the URL from the server:', error); - } const chunkToAdd = { chunkId: id, chunkType: CHUNK_TYPE.URL, url: url, - canDisplay: canDisplay, }; doc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] }); @@ -487,11 +469,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }); break; case CHUNK_TYPE.URL: - if (!foundChunk.canDisplay) { - window.open(StrCast(doc.displayUrl), '_blank'); - } else if (foundChunk.canDisplay) { - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); - } + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); + break; case CHUNK_TYPE.CSV: DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); diff --git a/src/client/views/nodes/chatbot/tools/SearchTool.ts b/src/client/views/nodes/chatbot/tools/SearchTool.ts index 03340aae5..d22f4c189 100644 --- a/src/client/views/nodes/chatbot/tools/SearchTool.ts +++ b/src/client/views/nodes/chatbot/tools/SearchTool.ts @@ -6,7 +6,7 @@ import { ParametersType } from '../types/tool_types'; const searchToolParams = [ { - name: 'query', + name: 'queries', type: 'string[]', description: 'The search query or queries to use for finding websites', required: true, @@ -20,7 +20,7 @@ export class SearchTool extends BaseTool { private _addLinkedUrlDoc: (url: string, id: string) => void; private _max_results: number; - constructor(addLinkedUrlDoc: (url: string, id: string) => void, max_results: number = 5) { + constructor(addLinkedUrlDoc: (url: string, id: string) => void, max_results: number = 4) { super( 'searchTool', 'Search the web to find a wide range of websites related to a query or multiple queries', @@ -33,8 +33,9 @@ export class SearchTool extends BaseTool { } async execute(args: ParametersType): Promise { - const queries = args.query; + const queries = args.queries; + console.log(`Searching the web for queries: ${queries[0]}`); // Create an array of promises, each one handling a search for a query const searchPromises = queries.map(async query => { try { diff --git a/src/client/views/nodes/chatbot/types/tool_types.ts b/src/client/views/nodes/chatbot/types/tool_types.ts index c1150534d..b2e05efe4 100644 --- a/src/client/views/nodes/chatbot/types/tool_types.ts +++ b/src/client/views/nodes/chatbot/types/tool_types.ts @@ -19,7 +19,7 @@ export type Parameter = { * A utility type that maps string representations of types to actual TypeScript types. * This is used to convert the `type` field of a `Parameter` into a concrete TypeScript type. */ -type TypeMap = { +export type TypeMap = { string: string; number: number; boolean: boolean; diff --git a/src/client/views/nodes/chatbot/types/types.ts b/src/client/views/nodes/chatbot/types/types.ts index 7abad85f0..c65ac9820 100644 --- a/src/client/views/nodes/chatbot/types/types.ts +++ b/src/client/views/nodes/chatbot/types/types.ts @@ -102,7 +102,6 @@ export interface SimplifiedChunk { location?: string; chunkType: CHUNK_TYPE; url?: string; - canDisplay?: boolean; } export interface AI_Document { diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 8447a4934..d7b72bac7 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -9,7 +9,7 @@ */ import { Readability } from '@mozilla/readability'; -import axios from 'axios'; +import axios, { AxiosResponse } from 'axios'; import { spawn } from 'child_process'; import * as fs from 'fs'; import { writeFile } from 'fs'; @@ -115,29 +115,79 @@ export default class AssistantManager extends ApiManager { }, }); - // Register Google Web Search Results API route register({ method: Method.POST, subscription: '/getWebSearchResults', secureHandler: async ({ req, res }) => { const { query, max_results } = req.body; - try { - // Fetch search results using Google Custom Search API - const response = await customsearch.cse.list({ + const MIN_VALID_RESULTS_RATIO = 0.75; // 3/4 threshold + let startIndex = 1; // Start at the first result initially + let validResults: any[] = []; + + const fetchSearchResults = async (start: number) => { + return customsearch.cse.list({ q: query, cx: process.env._CLIENT_GOOGLE_SEARCH_ENGINE_ID, key: process.env._CLIENT_GOOGLE_API_KEY, safe: 'active', num: max_results, + start, // This controls which result index the search starts from }); + }; + + const filterResultsByXFrameOptions = async (results: any[]) => { + const filteredResults = await Promise.all( + results.map(async result => { + try { + const urlResponse: AxiosResponse = await axios.head(result.url, { timeout: 5000 }); + const xFrameOptions = urlResponse.headers['x-frame-options']; + if (xFrameOptions && xFrameOptions.toUpperCase() === 'SAMEORIGIN') { + return result; + } + } catch (error) { + console.error(`Error checking x-frame-options for URL: ${result.url}`, error); + } + return null; // Exclude the result if it doesn't match + }) + ); + return filteredResults.filter(result => result !== null); // Remove null results + }; - const results = + try { + // Fetch initial search results + let response = await fetchSearchResults(startIndex); + let initialResults = response.data.items?.map(item => ({ url: item.link, snippet: item.snippet, })) || []; - res.send({ results }); + // Filter the initial results + validResults = await filterResultsByXFrameOptions(initialResults); + + // If valid results are less than 3/4 of max_results, fetch more results + while (validResults.length < max_results * MIN_VALID_RESULTS_RATIO) { + // Increment the start index by the max_results to fetch the next set of results + startIndex += max_results; + response = await fetchSearchResults(startIndex); + + const additionalResults = + response.data.items?.map(item => ({ + url: item.link, + snippet: item.snippet, + })) || []; + + const additionalValidResults = await filterResultsByXFrameOptions(additionalResults); + validResults = [...validResults, ...additionalValidResults]; // Combine valid results + + // Break if no more results are available + if (additionalValidResults.length === 0 || response.data.items?.length === 0) { + break; + } + } + + // Return the filtered valid results + res.send({ results: validResults.slice(0, max_results) }); // Limit the results to max_results } catch (error) { console.error('Error performing web search:', error); res.status(500).send({ -- cgit v1.2.3-70-g09d2