Merge branch 'master' into nathan-starter

author: bobzel <zzzman@gmail.com> 2024-10-10 18:58:39 -0400
committer: bobzel <zzzman@gmail.com> 2024-10-10 18:58:39 -0400
commit: 5752dff8ff7b1b2858542feec0b1bb037461bf1a (patch)
tree: 04080d4a596b0e5199b5ec95ab625fbb590f2a75 /src/client/views/nodes/chatbot/agentsystem
parent: 36735ff00a55ae587af5f69eef495533a1f35393 (diff)
parent: d347fc59feefd91a796012892da57511787bb6d0 (diff)
2 files changed, 493 insertions, 0 deletions
diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
new file mode 100644
index 000000000..ccf9caf15
--- /dev/null
+++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
@@ -0,0 +1,277 @@
+import dotenv from 'dotenv';
+import { XMLBuilder, XMLParser } from 'fast-xml-parser';
+import OpenAI from 'openai';
+import { ChatCompletionMessageParam } from 'openai/resources';
+import { AnswerParser } from '../response_parsers/AnswerParser';
+import { StreamedAnswerParser } from '../response_parsers/StreamedAnswerParser';
+import { CalculateTool } from '../tools/CalculateTool';
+import { CreateCSVTool } from '../tools/CreateCSVTool';
+import { DataAnalysisTool } from '../tools/DataAnalysisTool';
+import { NoTool } from '../tools/NoTool';
+import { RAGTool } from '../tools/RAGTool';
+import { SearchTool } from '../tools/SearchTool';
+import { WebsiteInfoScraperTool } from '../tools/WebsiteInfoScraperTool';
+import { AgentMessage, AssistantMessage, PROCESSING_TYPE, ProcessingInfo, Tool } from '../types/types';
+import { Vectorstore } from '../vectorstore/Vectorstore';
+import { getReactPrompt } from './prompts';
+
+dotenv.config();
+
+/**
+ * The Agent class handles the interaction between the assistant and the tools available,
+ * processes user queries, and manages the communication flow between the tools and OpenAI.
+ */
+export class Agent {
+    // Private properties
+    private client: OpenAI;
+    private tools: Record<string, Tool<any>>; // bcz: need a real type here
+    private messages: AgentMessage[] = [];
+    private interMessages: AgentMessage[] = [];
+    private vectorstore: Vectorstore;
+    private _history: () => string;
+    private _summaries: () => string;
+    private _csvData: () => { filename: string; id: string; text: string }[];
+    private actionNumber: number = 0;
+    private thoughtNumber: number = 0;
+    private processingNumber: number = 0;
+    private processingInfo: ProcessingInfo[] = [];
+    private streamedAnswerParser: StreamedAnswerParser = new StreamedAnswerParser();
+
+    /**
+     * The constructor initializes the agent with the vector store and toolset, and sets up the OpenAI client.
+     * @param _vectorstore Vector store instance for document storage and retrieval.
+     * @param summaries A function to retrieve document summaries.
+     * @param history A function to retrieve chat history.
+     * @param csvData A function to retrieve CSV data linked to the assistant.
+     * @param addLinkedUrlDoc A function to add a linked document from a URL.
+     * @param createCSVInDash A function to create a CSV document in the dashboard.
+     */
+    constructor(
+        _vectorstore: Vectorstore,
+        summaries: () => string,
+        history: () => string,
+        csvData: () => { filename: string; id: string; text: string }[],
+        addLinkedUrlDoc: (url: string, id: string) => void,
+        createCSVInDash: (url: string, title: string, id: string, data: string) => void
+    ) {
+        // Initialize OpenAI client with API key from environment
+        this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true });
+        this.vectorstore = _vectorstore;
+        this._history = history;
+        this._summaries = summaries;
+        this._csvData = csvData;
+
+        // Define available tools for the assistant
+        this.tools = {
+            calculate: new CalculateTool(),
+            rag: new RAGTool(this.vectorstore),
+            dataAnalysis: new DataAnalysisTool(csvData),
+            websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc),
+            searchTool: new SearchTool(addLinkedUrlDoc),
+            createCSV: new CreateCSVTool(createCSVInDash),
+            no_tool: new NoTool(),
+        };
+    }
+
+    /**
+     * This method handles the conversation flow with the assistant, processes user queries,
+     * and manages the assistant's decision-making process, including tool actions.
+     * @param question The user's question.
+     * @param onProcessingUpdate Callback function for processing updates.
+     * @param onAnswerUpdate Callback function for answer updates.
+     * @param maxTurns The maximum number of turns to allow in the conversation.
+     * @returns The final response from the assistant.
+     */
+    async askAgent(question: string, onProcessingUpdate: (processingUpdate: ProcessingInfo[]) => void, onAnswerUpdate: (answerUpdate: string) => void, maxTurns: number = 30): Promise<AssistantMessage> {
+        console.log(`Starting query: ${question}`);
+
+        // Push user's question to message history
+        this.messages.push({ role: 'user', content: question });
+
+        // Retrieve chat history and generate system prompt
+        const chatHistory = this._history();
+        const systemPrompt = getReactPrompt(Object.values(this.tools), this._summaries, chatHistory);
+
+        // Initialize intermediate messages
+        this.interMessages = [{ role: 'system', content: systemPrompt }];
+        this.interMessages.push({ role: 'user', content: `<stage number="1" role="user"><query>${question}</query></stage>` });
+
+        // Setup XML parser and builder
+        const parser = new XMLParser({
+            ignoreAttributes: false,
+            attributeNamePrefix: '@_',
+            textNodeName: '_text',
+            isArray: (name /* , jpath, isLeafNode, isAttribute */) => ['query', 'url'].indexOf(name) !== -1,
+        });
+        const builder = new XMLBuilder({ ignoreAttributes: false, attributeNamePrefix: '@_' });
+
+        let currentAction: string | undefined;
+        this.processingInfo = [];
+
+        // Conversation loop (up to maxTurns)
+        for (let i = 2; i < maxTurns; i += 2) {
+            console.log(this.interMessages);
+            console.log(`Turn ${i}/${maxTurns}`);
+
+            // Execute a step in the conversation and get the result
+            const result = await this.execute(onProcessingUpdate, onAnswerUpdate);
+            this.interMessages.push({ role: 'assistant', content: result });
+
+            let parsedResult;
+            try {
+                // Parse XML result from the assistant
+                parsedResult = parser.parse(result);
+            } catch (error) {
+                throw new Error(`Error parsing response: ${error}`);
+            }
+
+            // Extract the stage from the parsed result
+            const stage = parsedResult.stage;
+            if (!stage) {
+                throw new Error(`Error: No stage found in response`);
+            }
+
+            // Handle different stage elements (thoughts, actions, inputs, answers)
+            for (const key in stage) {
+                if (key === 'thought') {
+                    // Handle assistant's thoughts
+                    console.log(`Thought: ${stage[key]}`);
+                    this.processingNumber++;
+                } else if (key === 'action') {
+                    // Handle action stage
+                    currentAction = stage[key] as string;
+                    console.log(`Action: ${currentAction}`);
+
+                    if (this.tools[currentAction]) {
+                        // Prepare the next action based on the current tool
+                        const nextPrompt = [
+                            {
+                                type: 'text',
+                                text: `<stage number="${i + 1}" role="user">` + builder.build({ action_rules: this.tools[currentAction].getActionRule() }) + `</stage>`,
+                            },
+                        ];
+                        this.interMessages.push({ role: 'user', content: nextPrompt });
+                        break;
+                    } else {
+                        // Handle error in case of an invalid action
+                        console.log('Error: No valid action');
+                        this.interMessages.push({ role: 'user', content: `<stage number="${i + 1}" role="system-error-reporter">No valid action, try again.</stage>` });
+                        break;
+                    }
+                } else if (key === 'action_input') {
+                    // Handle action input stage
+                    const actionInput = stage[key];
+                    console.log(`Action input:`, actionInput.inputs);
+
+                    if (currentAction) {
+                        try {
+                            // Process the action with its input
+                            const observation = (await this.processAction(currentAction, actionInput.inputs)) as any; // bcz: really need a type here
+                            const nextPrompt = [{ type: 'text', text: `<stage number="${i + 1}" role="user"> <observation>` }, ...observation, { type: 'text', text: '</observation></stage>' }];
+                            console.log(observation);
+                            this.interMessages.push({ role: 'user', content: nextPrompt });
+                            this.processingNumber++;
+                            break;
+                        } catch (error) {
+                            throw new Error(`Error processing action: ${error}`);
+                        }
+                    } else {
+                        throw new Error('Error: Action input without a valid action');
+                    }
+                } else if (key === 'answer') {
+                    // If an answer is found, end the query
+                    console.log('Answer found. Ending query.');
+                    this.streamedAnswerParser.reset();
+                    const parsedAnswer = AnswerParser.parse(result, this.processingInfo);
+                    return parsedAnswer;
+                }
+            }
+        }
+
+        throw new Error('Reached maximum turns. Ending query.');
+    }
+
+    /**
+     * Executes a step in the conversation, processing the assistant's response and parsing it in real-time.
+     * @param onProcessingUpdate Callback for processing updates.
+     * @param onAnswerUpdate Callback for answer updates.
+     * @returns The full response from the assistant.
+     */
+    private async execute(onProcessingUpdate: (processingUpdate: ProcessingInfo[]) => void, onAnswerUpdate: (answerUpdate: string) => void): Promise<string> {
+        // Stream OpenAI response for real-time updates
+        const stream = await this.client.chat.completions.create({
+            model: 'gpt-4o',
+            messages: this.interMessages as ChatCompletionMessageParam[],
+            temperature: 0,
+            stream: true,
+        });
+
+        let fullResponse: string = '';
+        let currentTag: string = '';
+        let currentContent: string = '';
+        let isInsideTag: boolean = false;
+
+        // Process each chunk of the streamed response
+        for await (const chunk of stream) {
+            const content = chunk.choices[0]?.delta?.content || '';
+            fullResponse += content;
+
+            // Parse the streamed content character by character
+            for (const char of content) {
+                if (currentTag === 'answer') {
+                    // Handle answer parsing for real-time updates
+                    currentContent += char;
+                    const streamedAnswer = this.streamedAnswerParser.parse(char);
+                    onAnswerUpdate(streamedAnswer);
+                    continue;
+                } else if (char === '<') {
+                    // Start of a new tag
+                    isInsideTag = true;
+                    currentTag = '';
+                    currentContent = '';
+                } else if (char === '>') {
+                    // End of the tag
+                    isInsideTag = false;
+                    if (currentTag.startsWith('/')) {
+                        currentTag = '';
+                    }
+                } else if (isInsideTag) {
+                    // Append characters to the tag name
+                    currentTag += char;
+                } else if (currentTag === 'thought' || currentTag === 'action_input_description') {
+                    // Handle processing information for thought or action input description
+                    currentContent += char;
+                    const current_info = this.processingInfo.find(info => info.index === this.processingNumber);
+                    if (current_info) {
+                        current_info.content = currentContent.trim();
+                        onProcessingUpdate(this.processingInfo);
+                    } else {
+                        this.processingInfo.push({
+                            index: this.processingNumber,
+                            type: currentTag === 'thought' ? PROCESSING_TYPE.THOUGHT : PROCESSING_TYPE.ACTION,
+                            content: currentContent.trim(),
+                        });
+                        onProcessingUpdate(this.processingInfo);
+                    }
+                }
+            }
+        }
+
+        return fullResponse;
+    }
+
+    /**
+     * Processes a specific action by invoking the appropriate tool with the provided inputs.
+     * @param action The action to perform.
+     * @param actionInput The inputs for the action.
+     * @returns The result of the action.
+     */
+    private async processAction(action: string, actionInput: unknown): Promise<unknown> {
+        if (!(action in this.tools)) {
+            throw new Error(`Unknown action: ${action}`);
+        }
+
+        const tool = this.tools[action];
+        return await tool.execute(actionInput);
+    }
+}
diff --git a/src/client/views/nodes/chatbot/agentsystem/prompts.ts b/src/client/views/nodes/chatbot/agentsystem/prompts.ts
new file mode 100644
index 000000000..f5aec3130
--- /dev/null
+++ b/src/client/views/nodes/chatbot/agentsystem/prompts.ts
@@ -0,0 +1,216 @@
+/**
+ * @file prompts.ts
+ * @description This file contains functions that generate prompts for various AI tasks, including
+ * generating system messages for structured AI assistant interactions and summarizing document chunks.
+ * It defines prompt structures to ensure the AI follows specific guidelines for response formatting,
+ * tool usage, and citation rules, with a rigid structure in mind for tasks such as answering user queries
+ * and summarizing content from provided text chunks.
+ */
+
+import { Tool } from '../types/types';
+
+export function getReactPrompt(tools: Tool[], summaries: () => string, chatHistory: string): string {
+    const toolDescriptions = tools
+        .map(
+            tool => `
+        <tool>
+            <title>${tool.name}</title>
+            <brief_summary>${tool.briefSummary}</brief_summary>
+        </tool>`
+        )
+        .join('\n');
+
+    return `<system_message>
+    <task>
+        You are an advanced AI assistant equipped with tools to answer user queries efficiently. You operate in a loop that is RIGIDLY structured and requires the use of specific tags and formats for your responses. Your goal is to provide accurate and well-structured answers to user queries. Below are the guidelines and information you can use to structure your approach to accomplishing this task.
+    </task>
+
+    <critical_points>
+        <point>**STRUCTURE**: Always use the correct stage tags (e.g., <stage number="2" role="assistant">) for every response. Use only even-numbered stages for your responses.</point>
+        <point>**STOP after every stage and wait for input. Do not combine multiple stages in one response.**</point>
+        <point>If a tool is needed, select the most appropriate tool based on the query.</point>
+        <point>**If one tool does not yield satisfactory results or fails twice, try another tool that might work better for the query.** This often happens with the rag tool, which may not yeild great results. If this happens, try the search tool.</point>
+        <point>Ensure that **ALL answers follow the answer structure**: grounded text wrapped in <grounded_text> tags with corresponding citations, normal text in <normal_text> tags, and three follow-up questions at the end.</point>
+        <point>If you use a tool that will do something (i.e. creating a CSV), and want to also use a tool that will provide you with information (i.e. RAG), use the tool that will provide you with information first. Then proceed with the tool that will do something.</point>
+    </critical_points>
+
+    <thought_structure>
+        <thought>
+            <description>
+                Always provide a thought before each action to explain why you are choosing the next step or tool. This helps clarify your reasoning for the action you will take.
+            </description>
+        </thought>
+    </thought_structure>
+
+    <action_input_structure>
+        <action_input>
+            <action_input_description>
+                Always describe what the action will do in the <action_input_description> tag. Be clear about how the tool will process the input and why it is appropriate for this stage.
+            </action_input_description>
+            <inputs>
+                <description>
+                    Provide the actual inputs for the action in the <inputs> tag. Ensure that each input is specific to the tool being used. Inputs should match the expected parameters for the tool (e.g., a search term for the website scraper, document references for RAG).
+                </description>
+            </inputs>
+        </action_input>
+    </action_input_structure>
+
+    <answer_structure>
+        ALL answers must follow this structure and everything must be witin the <answer> tag:
+        <answer>
+            <grounded_text> - All information derived from tools or user documents must be wrapped in these tags with proper citation. This should not be word for word, but paraphrased from the text.</grounded_text>
+            <normal_text> - Use this tag for text not derived from tools or user documents. It should only be for narrative-like text or extremely common knowledge information.</normal_text>
+            <citations>
+                <citation> - Provide proper citations for each <grounded_text>, referencing the tool or document chunk used. ENSURE THAT THERE IS A CITATION WHOSE INDEX MATCHES FOR EVERY GROUNDED TEXT CITATION INDEX. </citation>
+            </citations>
+            <follow_up_questions> - Provide exactly three user-perspective follow-up questions.</follow_up_questions>
+            <loop_summary> - Summarize the actions and tools used in the conversation.</loop_summary>
+        </answer>
+    </answer_structure>
+
+    <grounded_text_guidelines>
+        <step>**Wrap ALL tool-based information** in <grounded_text> tags and provide citations.</step>
+        <step>Use separate <grounded_text> tags for distinct information or when switching to a different tool or document.</step>
+        <step>Ensure that **EVERY** <grounded_text> tag includes a citation index aligned with a citation that you provide that references the source of the information.</step>
+        <step>There should be a one-to-one relationship between <grounded_text> tags and citations.</step>
+        <step>Over-citing is discouraged—only cite the information that is directly relevant to the user's query.</step>
+        <step>Paraphrase the information in the <grounded_text> tags, but ensure that the meaning is preserved.</step>
+        <step>Do not include the full text of the chunk in the citation—only the relevant excerpt.</step>
+        <step>For text chunks, the citation content must reflect the exact subset of the original chunk that is relevant to the grounded_text tag.</step>
+        <step>Do not use citations from previous interactions. Only use citations from the current action loop.</step>
+    </grounded_text_guidelines>
+
+    <normal_text_guidelines>
+        <step>Wrap general information or reasoning **not derived from tools or documents** in <normal_text> tags.</step>
+        <step>Never put information derived from user documents or tools in <normal_text> tags—use <grounded_text> for those.</step>
+    </normal_text_guidelines>
+
+    <operational_process>
+        <step>Carefully analyze the user query and determine if a tool is necessary to provide an accurate answer.</step>
+        <step>If a tool is needed, choose the most appropriate one and **stop after the action** to wait for system input.</step>
+        <step>If no tool is needed, use the 'no_tool' action but follow the structure.</step>
+        <step>When all observations are complete, format the final answer using <grounded_text> and <normal_text> tags with appropriate citations.</step>
+        <step>Include exactly three follow-up questions from the user's perspective.</step>
+        <step>Provide a loop summary at the end of the conversation.</step>
+    </operational_process>
+
+    <tools>
+        ${toolDescriptions}
+        <note>If no external tool is required, use 'no_tool', but if there might be relevant external information, use the appropriate tool.</note>
+    </tools>
+
+    <summaries>
+        ${summaries()}
+    </summaries>
+
+    <chat_history>
+        ${chatHistory}
+    </chat_history>
+
+    <example_interaction>
+        <interaction description="Correct use of RAG and website scraping tools">
+            <stage number="1" role="user">
+                <query>Can you provide key moments from the 2022 World Cup and its impact on tourism in Qatar?</query>
+            </stage>
+
+            <stage number="2" role="assistant">
+                <thought>
+                    I will use the RAG tool to retrieve key moments from the user's World Cup documents. Afterward, I will use the website scraper tool to gather tourism impact data on Qatar.
+                </thought>
+                <action>rag</action>
+            </stage>
+
+            <stage number="3" role="user">
+                <action_rules>***Action rules omitted***</action_rules>
+            </stage>
+
+            <stage number="4" role="assistant">
+                <action_input>
+                    <action_input_description>Searching user documents for key moments from the 2022 World Cup.</action_input_description>
+                    <inputs>
+                        <hypothetical_document_chunk>Key moments from the 2022 World Cup. Goals, assists, big wins, big losses.</hypothetical_document_chunk>
+                    </inputs>
+                </action_input>
+            </stage>
+
+            <stage number="5" role="user">
+                <observation>
+                    <chunk chunk_id="1234" chunk_type="text">
+                        The 2022 FIFA World Cup saw Argentina win, with Lionel Messi's performance being a key highlight. It was widely celebrated as a historical moment in sports.
+                    </chunk>
+                </observation>
+            </stage>
+
+            <stage number="6" role="assistant">
+                <thought>
+                    With key moments from the World Cup retrieved, I will now use the website scraper tool to gather data on Qatar's tourism impact during the World Cup.
+                </thought>
+                <action>websiteInfoScraper</action>
+            </stage>
+
+            <stage number="7" role="user">
+                <action_rules>***Action rules omitted***</action_rules>
+            </stage>
+
+            <stage number="8" role="assistant">
+                <action_input>
+                    <action_input_description>Scraping websites for information about Qatar's tourism impact during the 2022 World Cup.</action_input_description>
+                    <inputs>
+                        <query>Tourism impact of the 2022 World Cup in Qatar</query>
+                    </inputs>
+                </action_input>
+            </stage>
+
+            <stage number="9" role="user">
+                <observation>
+                    <chunk chunk_id="5678" chunk_type="url">
+                        <url>https://www.qatartourism.com/world-cup-impact</url>
+                        <overview>During the 2022 World Cup, Qatar saw a 40% increase in tourism, with over 1.5 million visitors attending.</overview>
+                    </chunk>
+                </observation>
+            </stage>
+
+            <stage number="10" role="assistant">
+                <thought>
+                    Now that I have gathered both key moments from the World Cup and tourism impact data from Qatar, I will summarize the information in my final response.
+                </thought>
+                <answer>
+                    <grounded_text citation_index="1">**The 2022 World Cup** saw Argentina crowned champions, with **Lionel Messi** leading his team to victory, marking a historic moment in sports.</grounded_text>
+                    <grounded_text citation_index="2">**Qatar** experienced a **40% increase in tourism** during the World Cup, welcoming over **1.5 million visitors**, significantly boosting its economy.</grounded_text>
+                    <normal_text>Moments like **Messi’s triumph** often become ingrained in the legacy of World Cups, immortalizing these tournaments in both sports and cultural memory. The **long-term implications** of the World Cup on Qatar's **economy, tourism**, and **global image** remain important areas of interest as the country continues to build on the momentum generated by hosting this prestigious event.</normal_text>
+                    <citations>
+                        <citation index="1" chunk_id="1234" type="text">Key moments from the 2022 World Cup.</citation>
+                        <citation index="2" chunk_id="5678" type="url"></citation>
+                    </citations>
+                    <follow_up_questions>
+                        <question>What long-term effects has the World Cup had on Qatar's economy and infrastructure?</question>
+                        <question>Can you compare Qatar's tourism numbers with previous World Cup hosts?</question>
+                        <question>How has Qatar’s image on the global stage evolved post-World Cup?</question>
+                    </follow_up_questions>
+                    <loop_summary>
+                        The assistant first used the RAG tool to extract key moments from the user documents about the 2022 World Cup. Then, the assistant utilized the website scraping tool to gather data on Qatar's tourism impact. Both tools provided valuable information, and no additional tools were needed. 
+                    </loop_summary>
+                </answer>
+            </stage>
+        </interaction>
+    </example_interaction>
+
+    <final_instruction>
+        Process the user's query according to these rules. Ensure your final answer is comprehensive, well-structured, and includes citations where appropriate.
+    </final_instruction>
+</system_message>`;
+}
+
+export function getSummarizedChunksPrompt(chunks: string): string {
+    return `Please provide a comprehensive summary of what you think the document from which these chunks originated. 
+                    Ensure the summary captures the main ideas and key points from all provided chunks. Be concise and brief and only provide the summary in paragraph form.
+
+                    Text chunks:
+                    \`\`\`
+                    ${chunks}
+                    \`\`\``;
+}
+
+export function getSummarizedSystemPrompt(): string {
+    return 'You are an AI assistant tasked with summarizing a document. You are provided with important chunks from the document and provide a summary, as best you can, of what the document will contain overall. Be concise and brief with your response.';
+}
author	bobzel <zzzman@gmail.com>	2024-10-10 18:58:39 -0400
committer	bobzel <zzzman@gmail.com>	2024-10-10 18:58:39 -0400
commit	5752dff8ff7b1b2858542feec0b1bb037461bf1a (patch)
tree	04080d4a596b0e5199b5ec95ab625fbb590f2a75 /src/client/views/nodes/chatbot/agentsystem
parent	36735ff00a55ae587af5f69eef495533a1f35393 (diff)
parent	d347fc59feefd91a796012892da57511787bb6d0 (diff)