completely restructured, added comments, and significantly reduced the length of the prompt (~72% shorter and cheaper)

author: A.J. Shulman <Shulman.aj@gmail.com> 2024-09-07 12:43:05 -0400
committer: A.J. Shulman <Shulman.aj@gmail.com> 2024-09-07 12:43:05 -0400
commit: 4791cd23af08da70895204a3a7fbaf889d9af2d5 (patch)
tree: c4c2534e64724d62bae9152763f1a74cd5a963e0 /src/client/views/nodes/chatbot/agentsystem
parent: 210f8f5f1cd19e9416a12524cce119b273334fd3 (diff)
2 files changed, 459 insertions, 0 deletions
diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
new file mode 100644
index 000000000..180d05cf3
--- /dev/null
+++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
@@ -0,0 +1,278 @@
+import OpenAI from 'openai';
+import { Tool, AgentMessage, AssistantMessage, TEXT_TYPE, CHUNK_TYPE, ASSISTANT_ROLE, ProcessingInfo, PROCESSING_TYPE } from '../types/types';
+import { getReactPrompt } from './prompts';
+import { XMLParser, XMLBuilder } from 'fast-xml-parser';
+import { Vectorstore } from '../vectorstore/Vectorstore';
+import { ChatCompletionMessageParam } from 'openai/resources';
+import dotenv from 'dotenv';
+import { CalculateTool } from '../tools/CalculateTool';
+import { RAGTool } from '../tools/RAGTool';
+import { DataAnalysisTool } from '../tools/DataAnalysisTool';
+import { WebsiteInfoScraperTool } from '../tools/WebsiteInfoScraperTool';
+import { SearchTool } from '../tools/SearchTool';
+import { NoTool } from '../tools/NoTool';
+import { v4 as uuidv4 } from 'uuid';
+import { AnswerParser } from '../response_parsers/AnswerParser';
+import { StreamedAnswerParser } from '../response_parsers/StreamedAnswerParser';
+import { CreateCSVTool } from '../tools/CreateCSVTool';
+
+dotenv.config();
+
+/**
+ * The Agent class handles the interaction between the assistant and the tools available,
+ * processes user queries, and manages the communication flow between the tools and OpenAI.
+ */
+export class Agent {
+    // Private properties
+    private client: OpenAI;
+    private tools: Record<string, Tool<any>>;
+    private messages: AgentMessage[] = [];
+    private interMessages: AgentMessage[] = [];
+    private vectorstore: Vectorstore;
+    private _history: () => string;
+    private _summaries: () => string;
+    private _csvData: () => { filename: string; id: string; text: string }[];
+    private actionNumber: number = 0;
+    private thoughtNumber: number = 0;
+    private processingNumber: number = 0;
+    private processingInfo: ProcessingInfo[] = [];
+    private streamedAnswerParser: StreamedAnswerParser = new StreamedAnswerParser();
+
+    /**
+     * The constructor initializes the agent with the vector store and toolset, and sets up the OpenAI client.
+     * @param _vectorstore Vector store instance for document storage and retrieval.
+     * @param summaries A function to retrieve document summaries.
+     * @param history A function to retrieve chat history.
+     * @param csvData A function to retrieve CSV data linked to the assistant.
+     * @param addLinkedUrlDoc A function to add a linked document from a URL.
+     * @param createCSVInDash A function to create a CSV document in the dashboard.
+     */
+    constructor(
+        _vectorstore: Vectorstore,
+        summaries: () => string,
+        history: () => string,
+        csvData: () => { filename: string; id: string; text: string }[],
+        addLinkedUrlDoc: (url: string, id: string) => void,
+        createCSVInDash: (url: string, title: string, id: string, data: string) => void
+    ) {
+        // Initialize OpenAI client with API key from environment
+        this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true });
+        this.vectorstore = _vectorstore;
+        this._history = history;
+        this._summaries = summaries;
+        this._csvData = csvData;
+
+        // Define available tools for the assistant
+        this.tools = {
+            calculate: new CalculateTool(),
+            rag: new RAGTool(this.vectorstore),
+            dataAnalysis: new DataAnalysisTool(csvData),
+            websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc),
+            searchTool: new SearchTool(addLinkedUrlDoc),
+            createCSV: new CreateCSVTool(createCSVInDash),
+            no_tool: new NoTool(),
+        };
+    }
+
+    /**
+     * This method handles the conversation flow with the assistant, processes user queries,
+     * and manages the assistant's decision-making process, including tool actions.
+     * @param question The user's question.
+     * @param onProcessingUpdate Callback function for processing updates.
+     * @param onAnswerUpdate Callback function for answer updates.
+     * @param maxTurns The maximum number of turns to allow in the conversation.
+     * @returns The final response from the assistant.
+     */
+    async askAgent(question: string, onProcessingUpdate: (processingUpdate: ProcessingInfo[]) => void, onAnswerUpdate: (answerUpdate: string) => void, maxTurns: number = 30): Promise<AssistantMessage> {
+        console.log(`Starting query: ${question}`);
+
+        // Push user's question to message history
+        this.messages.push({ role: 'user', content: question });
+
+        // Retrieve chat history and generate system prompt
+        const chatHistory = this._history();
+        const systemPrompt = getReactPrompt(Object.values(this.tools), this._summaries, chatHistory);
+
+        // Initialize intermediate messages
+        this.interMessages = [{ role: 'system', content: systemPrompt }];
+        this.interMessages.push({ role: 'user', content: `<stage number="1" role="user"><query>${question}</query></stage>` });
+
+        // Setup XML parser and builder
+        const parser = new XMLParser({
+            ignoreAttributes: false,
+            attributeNamePrefix: '@_',
+            textNodeName: '_text',
+            isArray: (name, jpath, isLeafNode, isAttribute) => ['query', 'url'].indexOf(name) !== -1,
+        });
+        const builder = new XMLBuilder({ ignoreAttributes: false, attributeNamePrefix: '@_' });
+
+        let currentAction: string | undefined;
+        this.processingInfo = [];
+
+        // Conversation loop (up to maxTurns)
+        for (let i = 2; i < maxTurns; i += 2) {
+            console.log(this.interMessages);
+            console.log(`Turn ${i}/${maxTurns}`);
+
+            // Execute a step in the conversation and get the result
+            const result = await this.execute(onProcessingUpdate, onAnswerUpdate);
+            this.interMessages.push({ role: 'assistant', content: result });
+
+            let parsedResult;
+            try {
+                // Parse XML result from the assistant
+                parsedResult = parser.parse(result);
+            } catch (error) {
+                throw new Error(`Error parsing response: ${error}`);
+            }
+
+            // Extract the stage from the parsed result
+            const stage = parsedResult.stage;
+            if (!stage) {
+                throw new Error(`Error: No stage found in response`);
+            }
+
+            // Handle different stage elements (thoughts, actions, inputs, answers)
+            for (const key in stage) {
+                if (key === 'thought') {
+                    // Handle assistant's thoughts
+                    console.log(`Thought: ${stage[key]}`);
+                    this.processingNumber++;
+                } else if (key === 'action') {
+                    // Handle action stage
+                    currentAction = stage[key] as string;
+                    console.log(`Action: ${currentAction}`);
+
+                    if (this.tools[currentAction]) {
+                        // Prepare the next action based on the current tool
+                        const nextPrompt = [
+                            {
+                                type: 'text',
+                                text: `<stage number="${i + 1}" role="user">` + builder.build({ action_rules: this.tools[currentAction].getActionRule() }) + `</stage>`,
+                            },
+                        ];
+                        this.interMessages.push({ role: 'user', content: nextPrompt });
+                        break;
+                    } else {
+                        // Handle error in case of an invalid action
+                        console.log('Error: No valid action');
+                        this.interMessages.push({ role: 'user', content: `<stage number="${i + 1}" role="system-error-reporter">No valid action, try again.</stage>` });
+                        break;
+                    }
+                } else if (key === 'action_input') {
+                    // Handle action input stage
+                    const actionInput = stage[key];
+                    console.log(`Action input:`, actionInput.inputs);
+
+                    if (currentAction) {
+                        try {
+                            // Process the action with its input
+                            const observation = await this.processAction(currentAction, actionInput.inputs);
+                            const nextPrompt = [{ type: 'text', text: `<stage number="${i + 1}" role="user"> <observation>` }, ...observation, { type: 'text', text: '</observation></stage>' }];
+                            console.log(observation);
+                            this.interMessages.push({ role: 'user', content: nextPrompt });
+                            this.processingNumber++;
+                            break;
+                        } catch (error) {
+                            throw new Error(`Error processing action: ${error}`);
+                        }
+                    } else {
+                        throw new Error('Error: Action input without a valid action');
+                    }
+                } else if (key === 'answer') {
+                    // If an answer is found, end the query
+                    console.log('Answer found. Ending query.');
+                    this.streamedAnswerParser.reset();
+                    const parsedAnswer = AnswerParser.parse(result, this.processingInfo);
+                    return parsedAnswer;
+                }
+            }
+        }
+
+        throw new Error('Reached maximum turns. Ending query.');
+    }
+
+    /**
+     * Executes a step in the conversation, processing the assistant's response and parsing it in real-time.
+     * @param onProcessingUpdate Callback for processing updates.
+     * @param onAnswerUpdate Callback for answer updates.
+     * @returns The full response from the assistant.
+     */
+    private async execute(onProcessingUpdate: (processingUpdate: ProcessingInfo[]) => void, onAnswerUpdate: (answerUpdate: string) => void): Promise<string> {
+        // Stream OpenAI response for real-time updates
+        const stream = await this.client.chat.completions.create({
+            model: 'gpt-4o',
+            messages: this.interMessages as ChatCompletionMessageParam[],
+            temperature: 0,
+            stream: true,
+        });
+
+        let fullResponse: string = '';
+        let currentTag: string = '';
+        let currentContent: string = '';
+        let isInsideTag: boolean = false;
+
+        // Process each chunk of the streamed response
+        for await (const chunk of stream) {
+            let content = chunk.choices[0]?.delta?.content || '';
+            fullResponse += content;
+
+            // Parse the streamed content character by character
+            for (const char of content) {
+                if (currentTag === 'answer') {
+                    // Handle answer parsing for real-time updates
+                    currentContent += char;
+                    const streamedAnswer = this.streamedAnswerParser.parse(char);
+                    onAnswerUpdate(streamedAnswer);
+                    continue;
+                } else if (char === '<') {
+                    // Start of a new tag
+                    isInsideTag = true;
+                    currentTag = '';
+                    currentContent = '';
+                } else if (char === '>') {
+                    // End of the tag
+                    isInsideTag = false;
+                    if (currentTag.startsWith('/')) {
+                        currentTag = '';
+                    }
+                } else if (isInsideTag) {
+                    // Append characters to the tag name
+                    currentTag += char;
+                } else if (currentTag === 'thought' || currentTag === 'action_input_description') {
+                    // Handle processing information for thought or action input description
+                    currentContent += char;
+                    const current_info = this.processingInfo.find(info => info.index === this.processingNumber);
+                    if (current_info) {
+                        current_info.content = currentContent.trim();
+                        onProcessingUpdate(this.processingInfo);
+                    } else {
+                        this.processingInfo.push({
+                            index: this.processingNumber,
+                            type: currentTag === 'thought' ? PROCESSING_TYPE.THOUGHT : PROCESSING_TYPE.ACTION,
+                            content: currentContent.trim(),
+                        });
+                        onProcessingUpdate(this.processingInfo);
+                    }
+                }
+            }
+        }
+
+        return fullResponse;
+    }
+
+    /**
+     * Processes a specific action by invoking the appropriate tool with the provided inputs.
+     * @param action The action to perform.
+     * @param actionInput The inputs for the action.
+     * @returns The result of the action.
+     */
+    private async processAction(action: string, actionInput: any): Promise<any> {
+        if (!(action in this.tools)) {
+            throw new Error(`Unknown action: ${action}`);
+        }
+
+        const tool = this.tools[action];
+        return await tool.execute(actionInput);
+    }
+}
diff --git a/src/client/views/nodes/chatbot/agentsystem/prompts.ts b/src/client/views/nodes/chatbot/agentsystem/prompts.ts
new file mode 100644
index 000000000..9daabc35f
--- /dev/null
+++ b/src/client/views/nodes/chatbot/agentsystem/prompts.ts
@@ -0,0 +1,181 @@
+// prompts.ts
+
+import { Tool } from '../types/types';
+
+export function getReactPrompt(tools: Tool[], summaries: () => string, chatHistory: string): string {
+    const toolDescriptions = tools
+        .map(
+            tool => `
+        <tool>
+            <title>${tool.name}</title>
+            <brief_summary>${tool.briefSummary}</brief_summary>
+        </tool>`
+        )
+        .join('\n');
+
+    return `<system_message>
+    <task>
+        You are an advanced AI assistant equipped with tools to answer user queries efficiently. You operate in a loop that is RIGIDLY structured and requires the use of specific tags and formats for your responses. Your goal is to provide accurate and well-structured answers to user queries. Below are the guidelines and information you can use to structure your approach to accomplishing this task.
+    </task>
+
+    <critical_points>
+        <point>**STRUCTURE**: Always use the correct stage tags (e.g., <stage number="2" role="assistant">) for every response. Use only even-numbered stages for your responses.</point>
+        <point>**STOP after every stage and wait for input. Do not combine multiple stages in one response.**</point>
+        <point>If a tool is needed, select the most appropriate tool based on the query.</point>
+        <point>**If one tool does not yield satisfactory results or fails twice, try another tool that might work better for the query.**</point>
+        <point>Ensure that **ALL answers follow the answer structure**: grounded text wrapped in <grounded_text> tags with corresponding citations, normal text in <normal_text> tags, and three follow-up questions at the end.</point>
+    </critical_points>
+
+    <answer_structure>
+        <answer>
+            <grounded_text> - All information derived from tools or user documents must be wrapped in these tags with proper citation.</grounded_text>
+            <normal_text> - Use this tag for text not derived from tools or user documents.</normal_text>
+            <citations>
+                <citation> - Provide proper citations for each <grounded_text>, referencing the tool or document chunk used.</citation>
+            </citations>
+            <follow_up_questions> - Provide exactly three user-perspective follow-up questions.</follow_up_questions>
+            <loop_summary> - Summarize the actions and tools used in the conversation.</loop_summary>
+        </answer>
+    </answer_structure>
+
+    <grounded_text_guidelines>
+        <step>**Wrap ALL tool-based information** in <grounded_text> tags and provide citations.</step>
+        <step>Use separate <grounded_text> tags for distinct information or when switching to a different tool or document.</step>
+        <step>Ensure that **EVERY** <grounded_text> tag includes a citation index referencing the source of the information.</step>
+        <step>Over-citing is discouraged—only cite the information that is directly relevant to the user's query.</step>
+    </grounded_text_guidelines>
+
+    <normal_text_guidelines>
+        <step>Wrap general information or reasoning **not derived from tools or documents** in <normal_text> tags.</step>
+        <step>Never put information derived from user documents or tools in <normal_text> tags—use <grounded_text> for those.</step>
+    </normal_text_guidelines>
+
+    <operational_process>
+        <step>Carefully analyze the user query and determine if a tool is necessary to provide an accurate answer.</step>
+        <step>If a tool is needed, choose the most appropriate one and **stop after the action** to wait for system input.</step>
+        <step>If no tool is needed, use the 'no_tool' action but follow the structure.</step>
+        <step>When all observations are complete, format the final answer using <grounded_text> and <normal_text> tags with appropriate citations.</step>
+        <step>Include exactly three follow-up questions from the user's perspective.</step>
+        <step>Provide a loop summary at the end of the conversation.</step>
+    </operational_process>
+
+    <tools>
+        ${toolDescriptions}
+        <note>If no external tool is required, use 'no_tool', but if there might be relevant external information, use the appropriate tool.</note>
+    </tools>
+
+    <summaries>
+        ${summaries()}
+    </summaries>
+
+    <chat_history>
+        ${chatHistory}
+    </chat_history>
+
+    <example_interaction>
+        <interaction description="Correct use of RAG and website scraping tools">
+            <stage number="1" role="user">
+                <query>Can you provide key moments from the 2022 World Cup and its impact on tourism in Qatar?</query>
+            </stage>
+
+            <stage number="2" role="assistant">
+                <thought>
+                    I will use the RAG tool to retrieve key moments from the user's World Cup documents. Afterward, I will use the website scraper tool to gather tourism impact data on Qatar.
+                </thought>
+                <action>rag</action>
+            </stage>
+
+            <stage number="3" role="user">
+                <action_rules>***Action rules omitted***</action_rules>
+            </stage>
+
+            <stage number="4" role="assistant">
+                <action_input>
+                    <action_input_description>Searching user documents for key moments from the 2022 World Cup.</action_input_description>
+                    <inputs>
+                        <hypothetical_document_chunk>Key moments from the 2022 World Cup. Goals, assists, big wins, big losses.</hypothetical_document_chunk>
+                    </inputs>
+                </action_input>
+            </stage>
+
+            <stage number="5" role="user">
+                <observation>
+                    <chunk chunk_id="1234" chunk_type="text">
+                        The 2022 FIFA World Cup saw Argentina win, with Lionel Messi's performance being a key highlight. It was widely celebrated as a historical moment in sports.
+                    </chunk>
+                </observation>
+            </stage>
+
+            <stage number="6" role="assistant">
+                <thought>
+                    With key moments from the World Cup retrieved, I will now use the website scraper tool to gather data on Qatar’s tourism impact during the World Cup.
+                </thought>
+                <action>websiteInfoScraper</action>
+            </stage>
+
+            <stage number="7" role="user">
+                <action_rules>***Action rules omitted***</action_rules>
+            </stage>
+
+            <stage number="8" role="assistant">
+                <action_input>
+                    <action_input_description>Scraping websites for information about Qatar's tourism impact during the 2022 World Cup.</action_input_description>
+                    <inputs>
+                        <query>Tourism impact of the 2022 World Cup in Qatar</query>
+                    </inputs>
+                </action_input>
+            </stage>
+
+            <stage number="9" role="user">
+                <observation>
+                    <chunk chunk_id="5678" chunk_type="url">
+                        <url>https://www.qatartourism.com/world-cup-impact</url>
+                        <overview>During the 2022 World Cup, Qatar saw a 40% increase in tourism, with over 1.5 million visitors attending.</overview>
+                    </chunk>
+                </observation>
+            </stage>
+
+            <stage number="10" role="assistant">
+                <thought>
+                    Now that I have gathered both key moments from the World Cup and tourism impact data from Qatar, I will summarize the information in my final response.
+                </thought>
+                <answer>
+                    <grounded_text citation_index="1">**The 2022 World Cup** saw Argentina crowned champions, with **Lionel Messi** leading his team to victory, marking a historic moment in sports.</grounded_text>
+                    <grounded_text citation_index="2">**Qatar** experienced a **40% increase in tourism** during the World Cup, welcoming over **1.5 million visitors**, significantly boosting its economy.</grounded_text>
+                    <normal_text>Moments like **Messi’s triumph** often become ingrained in the legacy of World Cups, immortalizing these tournaments in both sports and cultural memory. The **long-term implications** of the World Cup on Qatar's **economy, tourism**, and **global image** remain important areas of interest as the country continues to build on the momentum generated by hosting this prestigious event.</normal_text>
+                    <citations>
+                        <citation index="1" chunk_id="1234" type="text">Key moments from the 2022 World Cup.</citation>
+                        <citation index="2" chunk_id="5678" type="url"></citation>
+                    </citations>
+                    <follow_up_questions>
+                        <question>What long-term effects has the World Cup had on Qatar's economy and infrastructure?</question>
+                        <question>Can you compare Qatar's tourism numbers with previous World Cup hosts?</question>
+                        <question>How has Qatar’s image on the global stage evolved post-World Cup?</question>
+                    </follow_up_questions>
+                    <loop_summary>
+                        The assistant first used the RAG tool to extract key moments from the user documents about the 2022 World Cup. Then, the assistant utilized the website scraping tool to gather data on Qatar's tourism impact. Both tools provided valuable information, and no additional tools were needed. 
+                    </loop_summary>
+                </answer>
+            </stage>
+        </interaction>
+    </example_interaction>
+
+    <final_instruction>
+        Process the user's query according to these rules. Ensure your final answer is comprehensive, well-structured, and includes citations where appropriate.
+    </final_instruction>
+</system_message>`;
+}
+
+export function getSummarizedChunksPrompt(chunks: string): string {
+    return `Please provide a comprehensive summary of what you think the document from which these chunks originated. 
+                    Ensure the summary captures the main ideas and key points from all provided chunks. Be concise and brief and only provide the summary in paragraph form.
+
+                    Text chunks:
+                    \`\`\`
+                    ${chunks}
+                    \`\`\``;
+}
+
+export function getSummarizedSystemPrompt(): string {
+    return 'You are an AI assistant tasked with summarizing a document. You are provided with important chunks from the document and provide a summary, as best you can, of what the document will contain overall. Be concise and brief with your response.';
+}
author	A.J. Shulman <Shulman.aj@gmail.com>	2024-09-07 12:43:05 -0400
committer	A.J. Shulman <Shulman.aj@gmail.com>	2024-09-07 12:43:05 -0400
commit	4791cd23af08da70895204a3a7fbaf889d9af2d5 (patch)
tree	c4c2534e64724d62bae9152763f1a74cd5a963e0 /src/client/views/nodes/chatbot/agentsystem
parent	210f8f5f1cd19e9416a12524cce119b273334fd3 (diff)