import dotenv from 'dotenv'; import { XMLBuilder, XMLParser } from 'fast-xml-parser'; import { escape } from 'lodash'; // Imported escape from lodash import OpenAI from 'openai'; import { DocumentOptions } from '../../../../documents/Documents'; import { AnswerParser } from '../response_parsers/AnswerParser'; import { StreamedAnswerParser } from '../response_parsers/StreamedAnswerParser'; import { BaseTool } from '../tools/BaseTool'; import { CalculateTool } from '../tools/CalculateTool'; //import { CreateAnyDocumentTool } from '../tools/CreateAnyDocTool'; import { CreateDocTool } from '../tools/CreateDocumentTool'; import { DataAnalysisTool } from '../tools/DataAnalysisTool'; import { ImageCreationTool } from '../tools/ImageCreationTool'; import { NoTool } from '../tools/NoTool'; import { SearchTool } from '../tools/SearchTool'; import { Parameter, ParametersType, TypeMap } from '../types/tool_types'; import { AgentMessage, ASSISTANT_ROLE, AssistantMessage, Observation, PROCESSING_TYPE, ProcessingInfo, TEXT_TYPE } from '../types/types'; import { Vectorstore } from '../vectorstore/Vectorstore'; import { getReactPrompt } from './prompts'; //import { DictionaryTool } from '../tools/DictionaryTool'; import { ChatCompletionMessageParam } from 'openai/resources'; import { Doc } from '../../../../../fields/Doc'; import { parsedDoc } from '../chatboxcomponents/ChatBox'; import { WebsiteInfoScraperTool } from '../tools/WebsiteInfoScraperTool'; import { Upload } from '../../../../../server/SharedMediaTypes'; import { RAGTool } from '../tools/RAGTool'; //import { CreateTextDocTool } from '../tools/CreateTextDocumentTool'; dotenv.config(); /** * The Agent class handles the interaction between the assistant and the tools available, * processes user queries, and manages the communication flow between the tools and OpenAI. */ export class Agent { // Private properties private client: OpenAI; private messages: AgentMessage[] = []; private interMessages: AgentMessage[] = []; private vectorstore: Vectorstore; private _history: () => string; private _summaries: () => string; private _csvData: () => { filename: string; id: string; text: string }[]; private actionNumber: number = 0; private thoughtNumber: number = 0; private processingNumber: number = 0; private processingInfo: ProcessingInfo[] = []; private streamedAnswerParser: StreamedAnswerParser = new StreamedAnswerParser(); private tools: Record>>; /** * The constructor initializes the agent with the vector store and toolset, and sets up the OpenAI client. * @param _vectorstore Vector store instance for document storage and retrieval. * @param summaries A function to retrieve document summaries. * @param history A function to retrieve chat history. * @param csvData A function to retrieve CSV data linked to the assistant. * @param addLinkedUrlDoc A function to add a linked document from a URL. * @param createCSVInDash A function to create a CSV document in the dashboard. */ constructor( _vectorstore: Vectorstore, summaries: () => string, history: () => string, csvData: () => { filename: string; id: string; text: string }[], addLinkedUrlDoc: (url: string, id: string) => void, createImage: (result: Upload.FileInformation & Upload.InspectionResults, options: DocumentOptions) => void, addLinkedDoc: (doc: parsedDoc) => Doc | undefined, // eslint-disable-next-line @typescript-eslint/no-unused-vars createCSVInDash: (url: string, title: string, id: string, data: string) => void ) { // Initialize OpenAI client with API key from environment this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); this.vectorstore = _vectorstore; this._history = history; this._summaries = summaries; this._csvData = csvData; // Define available tools for the assistant this.tools = { calculate: new CalculateTool(), rag: new RAGTool(this.vectorstore), dataAnalysis: new DataAnalysisTool(csvData), websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc), searchTool: new SearchTool(addLinkedUrlDoc), // createCSV: new CreateCSVTool(createCSVInDash), noTool: new NoTool(), imageCreationTool: new ImageCreationTool(createImage), // createTextDoc: new CreateTextDocTool(addLinkedDoc), createDoc: new CreateDocTool(addLinkedDoc), // createAnyDocument: new CreateAnyDocumentTool(addLinkedDoc), // dictionary: new DictionaryTool(), }; } /** * This method handles the conversation flow with the assistant, processes user queries, * and manages the assistant's decision-making process, including tool actions. * @param question The user's question. * @param onProcessingUpdate Callback function for processing updates. * @param onAnswerUpdate Callback function for answer updates. * @param maxTurns The maximum number of turns to allow in the conversation. * @returns The final response from the assistant. */ async askAgent(question: string, onProcessingUpdate: (processingUpdate: ProcessingInfo[]) => void, onAnswerUpdate: (answerUpdate: string) => void, maxTurns: number = 30): Promise { console.log(`Starting query: ${question}`); const MAX_QUERY_LENGTH = 1000; // adjust the limit as needed // Check if the question exceeds the maximum length if (question.length > MAX_QUERY_LENGTH) { return { role: ASSISTANT_ROLE.ASSISTANT, content: [{ text: 'User query too long. Please shorten your question and try again.', index: 0, type: TEXT_TYPE.NORMAL, citation_ids: null }], processing_info: [] }; } const sanitizedQuestion = escape(question); // Sanitized user input // Push sanitized user's question to message history this.messages.push({ role: 'user', content: sanitizedQuestion }); // Retrieve chat history and generate system prompt const chatHistory = this._history(); const systemPrompt = getReactPrompt(Object.values(this.tools), this._summaries, chatHistory); // Initialize intermediate messages this.interMessages = [{ role: 'system', content: systemPrompt }]; this.interMessages.push({ role: 'user', content: this.constructUserPrompt(1, 'user', `${sanitizedQuestion}`), }); // Setup XML parser and builder const parser = new XMLParser({ ignoreAttributes: false, attributeNamePrefix: '@_', textNodeName: '_text', isArray: name => ['query', 'url'].indexOf(name) !== -1, processEntities: false, // Disable processing of entities stopNodes: ['*.entity'], // Do not process any entities }); const builder = new XMLBuilder({ ignoreAttributes: false, attributeNamePrefix: '@_' }); let currentAction: string | undefined; this.processingInfo = []; let i = 2; while (i < maxTurns) { console.log(this.interMessages); console.log(`Turn ${i}/${maxTurns}`); // eslint-disable-next-line no-await-in-loop const result = await this.execute(onProcessingUpdate, onAnswerUpdate); this.interMessages.push({ role: 'assistant', content: result }); i += 2; let parsedResult; try { // Parse XML result from the assistant parsedResult = parser.parse(result); // Validate the structure of the parsedResult this.validateAssistantResponse(parsedResult); } catch (error) { throw new Error(`Error parsing or validating response: ${error}`); } // Extract the stage from the parsed result const stage = parsedResult.stage; if (!stage) { throw new Error(`Error: No stage found in response`); } // Handle different stage elements (thoughts, actions, inputs, answers) for (const key in stage) { if (key === 'thought') { // Handle assistant's thoughts console.log(`Thought: ${stage[key]}`); this.processingNumber++; } else if (key === 'action') { // Handle action stage currentAction = stage[key] as string; console.log(`Action: ${currentAction}`); if (this.tools[currentAction]) { // Prepare the next action based on the current tool const nextPrompt = [ { type: 'text', text: `` + builder.build({ action_rules: this.tools[currentAction].getActionRule() }) + ``, } as Observation, ]; this.interMessages.push({ role: 'user', content: nextPrompt }); break; } else { // Handle error in case of an invalid action console.log('Error: No valid action'); this.interMessages.push({ role: 'user', content: `No valid action, try again.`, }); break; } } else if (key === 'action_input') { // Handle action input stage const actionInput = stage[key]; console.log(`Action input full:`, actionInput); console.log(`Action input:`, actionInput.inputs); if (currentAction) { try { // Process the action with its input // eslint-disable-next-line no-await-in-loop const observation = (await this.processAction(currentAction, actionInput.inputs)) as Observation[]; const nextPrompt = [{ type: 'text', text: ` ` }, ...observation, { type: 'text', text: '' }] as Observation[]; console.log(observation); this.interMessages.push({ role: 'user', content: nextPrompt }); this.processingNumber++; break; } catch (error) { throw new Error(`Error processing action: ${error}`); } } else { throw new Error('Error: Action input without a valid action'); } } else if (key === 'answer') { // If an answer is found, end the query console.log('Answer found. Ending query.'); this.streamedAnswerParser.reset(); const parsedAnswer = AnswerParser.parse(result, this.processingInfo); return parsedAnswer; } } } throw new Error('Reached maximum turns. Ending query.'); } private constructUserPrompt(stageNumber: number, role: string, content: string): string { return `${content}`; } /** * Executes a step in the conversation, processing the assistant's response and parsing it in real-time. * @param onProcessingUpdate Callback for processing updates. * @param onAnswerUpdate Callback for answer updates. * @returns The full response from the assistant. */ private async execute(onProcessingUpdate: (processingUpdate: ProcessingInfo[]) => void, onAnswerUpdate: (answerUpdate: string) => void): Promise { // Stream OpenAI response for real-time updates const stream = await this.client.chat.completions.create({ model: 'gpt-4o', messages: this.interMessages as ChatCompletionMessageParam[], temperature: 0, stream: true, stop: [''], }); let fullResponse: string = ''; let currentTag: string = ''; let currentContent: string = ''; let isInsideTag: boolean = false; // Process each chunk of the streamed response for await (const chunk of stream) { const content = chunk.choices[0]?.delta?.content || ''; fullResponse += content; // Parse the streamed content character by character for (const char of content) { if (currentTag === 'answer') { // Handle answer parsing for real-time updates currentContent += char; const streamedAnswer = this.streamedAnswerParser.parse(char); onAnswerUpdate(streamedAnswer); continue; } else if (char === '<') { // Start of a new tag isInsideTag = true; currentTag = ''; currentContent = ''; } else if (char === '>') { // End of the tag isInsideTag = false; if (currentTag.startsWith('/')) { currentTag = ''; } } else if (isInsideTag) { // Append characters to the tag name currentTag += char; } else if (currentTag === 'thought' || currentTag === 'action_input_description') { // Handle processing information for thought or action input description currentContent += char; const current_info = this.processingInfo.find(info => info.index === this.processingNumber); if (current_info) { current_info.content = currentContent.trim(); onProcessingUpdate(this.processingInfo); } else { this.processingInfo.push({ index: this.processingNumber, type: currentTag === 'thought' ? PROCESSING_TYPE.THOUGHT : PROCESSING_TYPE.ACTION, content: currentContent.trim(), }); onProcessingUpdate(this.processingInfo); } } } } return fullResponse; } /** * Validates the assistant's response to ensure it conforms to the expected XML structure. * @param response The parsed XML response from the assistant. * @throws An error if the response does not meet the expected structure. */ private validateAssistantResponse(response: { stage: { [key: string]: object | string } }) { if (!response.stage) { throw new Error('Response does not contain a element'); } // Validate that the stage has the required attributes const stage = response.stage; if (!stage['@_number'] || !stage['@_role']) { throw new Error('Stage element must have "number" and "role" attributes'); } // Extract the role of the stage to determine expected content const role = stage['@_role']; // Depending on the role, validate the presence of required elements if (role === 'assistant') { // Assistant's response should contain either 'thought', 'action', 'action_input', or 'answer' if (!('thought' in stage || 'action' in stage || 'action_input' in stage || 'answer' in stage)) { throw new Error('Assistant stage must contain a thought, action, action_input, or answer element'); } // If 'thought' is present, validate it if ('thought' in stage) { if (typeof stage.thought !== 'string' || stage.thought.trim() === '') { throw new Error('Thought must be a non-empty string'); } } // If 'action' is present, validate it if ('action' in stage) { if (typeof stage.action !== 'string' || stage.action.trim() === '') { throw new Error('Action must be a non-empty string'); } // Optional: Check if the action is among allowed actions const allowedActions = Object.keys(this.tools); if (!allowedActions.includes(stage.action)) { throw new Error(`Action "${stage.action}" is not a valid tool`); } } // If 'action_input' is present, validate its structure if ('action_input' in stage) { const actionInput = stage.action_input as object; if (!('action_input_description' in actionInput) || typeof actionInput.action_input_description !== 'string') { throw new Error('action_input must contain an action_input_description string'); } if (!('inputs' in actionInput)) { throw new Error('action_input must contain an inputs object'); } // Further validation of inputs can be done here based on the expected parameters of the action } // If 'answer' is present, validate its structure if ('answer' in stage) { const answer = stage.answer as object; // Ensure answer contains at least one of the required elements if (!('grounded_text' in answer || 'normal_text' in answer)) { throw new Error('Answer must contain grounded_text or normal_text'); } // Validate follow_up_questions if (!('follow_up_questions' in answer)) { throw new Error('Answer must contain follow_up_questions'); } // Validate loop_summary if (!('loop_summary' in answer)) { throw new Error('Answer must contain a loop_summary'); } // Additional validation for citations, grounded_text, etc., can be added here } } else if (role === 'user') { // User's stage should contain 'query' or 'observation' if (!('query' in stage || 'observation' in stage)) { throw new Error('User stage must contain a query or observation element'); } // Validate 'query' if present if ('query' in stage && typeof stage.query !== 'string') { throw new Error('Query must be a string'); } // Validate 'observation' if present if ('observation' in stage) { // Ensure observation has the correct structure // This can be expanded based on how observations are structured } } else { throw new Error(`Unknown role "${role}" in stage`); } // Add any additional validation rules as necessary } /** * Helper function to check if a string can be parsed as an array of the expected type. * @param input The input string to check. * @param expectedType The expected type of the array elements ('string', 'number', or 'boolean'). * @returns The parsed array if valid, otherwise throws an error. */ private parseArray(input: string, expectedType: 'string' | 'number' | 'boolean'): T[] { try { // Parse the input string into a JSON object const parsed = JSON.parse(input); // Check if the parsed object is an array and if all elements are of the expected type if (Array.isArray(parsed) && parsed.every(item => typeof item === expectedType)) { return parsed; } else { throw new Error(`Invalid ${expectedType} array format.`); } } catch (error) { throw new Error(`Failed to parse ${expectedType} array: ` + error); } } /** * Processes a specific action by invoking the appropriate tool with the provided inputs. * This method ensures that the action exists and validates the types of `actionInput` * based on the tool's parameter rules. It throws errors for missing required parameters * or mismatched types before safely executing the tool with the validated input. * * NOTE: In the future, it should typecheck for specific tool parameter types using the `TypeMap` or otherwise. * * Type validation includes checks for: * - `string`, `number`, `boolean` * - `string[]`, `number[]` (arrays of strings or numbers) * * @param action The action to perform. It corresponds to a registered tool. * @param actionInput The inputs for the action, passed as an object where each key is a parameter name. * @returns A promise that resolves to an array of `Observation` objects representing the result of the action. * @throws An error if the action is unknown, if required parameters are missing, or if input types don't match the expected parameter types. */ private async processAction(action: string, actionInput: ParametersType>): Promise { // Check if the action exists in the tools list if (!(action in this.tools)) { throw new Error(`Unknown action: ${action}`); } console.log(actionInput); for (const param of this.tools[action].parameterRules) { // Check if the parameter is required and missing in the input if (param.required && !(param.name in actionInput) && !this.tools[action].inputValidator(actionInput)) { throw new Error(`Missing required parameter: ${param.name}`); } // Check if the parameter type matches the expected type const expectedType = param.type.replace('[]', '') as 'string' | 'number' | 'boolean'; const isArray = param.type.endsWith('[]'); const input = actionInput[param.name]; if (isArray) { // Check if the input is a valid array of the expected type const parsedArray = this.parseArray(input as string, expectedType); actionInput[param.name] = parsedArray as TypeMap[typeof param.type]; } else if (input !== undefined && typeof input !== expectedType) { throw new Error(`Invalid type for parameter ${param.name}: expected ${expectedType}`); } } const tool = this.tools[action]; return await tool.execute(actionInput); } }