aboutsummaryrefslogtreecommitdiff
path: root/src/client/views/nodes/chatbot
diff options
context:
space:
mode:
Diffstat (limited to 'src/client/views/nodes/chatbot')
-rw-r--r--src/client/views/nodes/chatbot/agentsystem/Agent.ts278
-rw-r--r--src/client/views/nodes/chatbot/agentsystem/prompts.ts181
-rw-r--r--src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss279
-rw-r--r--src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx745
-rw-r--r--src/client/views/nodes/chatbot/chatboxcomponents/MessageComponent.tsx146
-rw-r--r--src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss69
-rw-r--r--src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.tsx22
-rw-r--r--src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts125
-rw-r--r--src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts73
-rw-r--r--src/client/views/nodes/chatbot/tools/BaseTool.ts24
-rw-r--r--src/client/views/nodes/chatbot/tools/CalculateTool.ts26
-rw-r--r--src/client/views/nodes/chatbot/tools/CreateCSVTool.ts51
-rw-r--r--src/client/views/nodes/chatbot/tools/CreateCollectionTool.ts36
-rw-r--r--src/client/views/nodes/chatbot/tools/DataAnalysisTool.ts59
-rw-r--r--src/client/views/nodes/chatbot/tools/GetDocsTool.ts29
-rw-r--r--src/client/views/nodes/chatbot/tools/NoTool.ts18
-rw-r--r--src/client/views/nodes/chatbot/tools/RAGTool.ts138
-rw-r--r--src/client/views/nodes/chatbot/tools/SearchTool.ts54
-rw-r--r--src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts83
-rw-r--r--src/client/views/nodes/chatbot/tools/WikipediaTool.ts37
-rw-r--r--src/client/views/nodes/chatbot/types/types.ts129
-rw-r--r--src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts258
22 files changed, 2860 insertions, 0 deletions
diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
new file mode 100644
index 000000000..180d05cf3
--- /dev/null
+++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
@@ -0,0 +1,278 @@
+import OpenAI from 'openai';
+import { Tool, AgentMessage, AssistantMessage, TEXT_TYPE, CHUNK_TYPE, ASSISTANT_ROLE, ProcessingInfo, PROCESSING_TYPE } from '../types/types';
+import { getReactPrompt } from './prompts';
+import { XMLParser, XMLBuilder } from 'fast-xml-parser';
+import { Vectorstore } from '../vectorstore/Vectorstore';
+import { ChatCompletionMessageParam } from 'openai/resources';
+import dotenv from 'dotenv';
+import { CalculateTool } from '../tools/CalculateTool';
+import { RAGTool } from '../tools/RAGTool';
+import { DataAnalysisTool } from '../tools/DataAnalysisTool';
+import { WebsiteInfoScraperTool } from '../tools/WebsiteInfoScraperTool';
+import { SearchTool } from '../tools/SearchTool';
+import { NoTool } from '../tools/NoTool';
+import { v4 as uuidv4 } from 'uuid';
+import { AnswerParser } from '../response_parsers/AnswerParser';
+import { StreamedAnswerParser } from '../response_parsers/StreamedAnswerParser';
+import { CreateCSVTool } from '../tools/CreateCSVTool';
+
+dotenv.config();
+
+/**
+ * The Agent class handles the interaction between the assistant and the tools available,
+ * processes user queries, and manages the communication flow between the tools and OpenAI.
+ */
+export class Agent {
+ // Private properties
+ private client: OpenAI;
+ private tools: Record<string, Tool<any>>;
+ private messages: AgentMessage[] = [];
+ private interMessages: AgentMessage[] = [];
+ private vectorstore: Vectorstore;
+ private _history: () => string;
+ private _summaries: () => string;
+ private _csvData: () => { filename: string; id: string; text: string }[];
+ private actionNumber: number = 0;
+ private thoughtNumber: number = 0;
+ private processingNumber: number = 0;
+ private processingInfo: ProcessingInfo[] = [];
+ private streamedAnswerParser: StreamedAnswerParser = new StreamedAnswerParser();
+
+ /**
+ * The constructor initializes the agent with the vector store and toolset, and sets up the OpenAI client.
+ * @param _vectorstore Vector store instance for document storage and retrieval.
+ * @param summaries A function to retrieve document summaries.
+ * @param history A function to retrieve chat history.
+ * @param csvData A function to retrieve CSV data linked to the assistant.
+ * @param addLinkedUrlDoc A function to add a linked document from a URL.
+ * @param createCSVInDash A function to create a CSV document in the dashboard.
+ */
+ constructor(
+ _vectorstore: Vectorstore,
+ summaries: () => string,
+ history: () => string,
+ csvData: () => { filename: string; id: string; text: string }[],
+ addLinkedUrlDoc: (url: string, id: string) => void,
+ createCSVInDash: (url: string, title: string, id: string, data: string) => void
+ ) {
+ // Initialize OpenAI client with API key from environment
+ this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true });
+ this.vectorstore = _vectorstore;
+ this._history = history;
+ this._summaries = summaries;
+ this._csvData = csvData;
+
+ // Define available tools for the assistant
+ this.tools = {
+ calculate: new CalculateTool(),
+ rag: new RAGTool(this.vectorstore),
+ dataAnalysis: new DataAnalysisTool(csvData),
+ websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc),
+ searchTool: new SearchTool(addLinkedUrlDoc),
+ createCSV: new CreateCSVTool(createCSVInDash),
+ no_tool: new NoTool(),
+ };
+ }
+
+ /**
+ * This method handles the conversation flow with the assistant, processes user queries,
+ * and manages the assistant's decision-making process, including tool actions.
+ * @param question The user's question.
+ * @param onProcessingUpdate Callback function for processing updates.
+ * @param onAnswerUpdate Callback function for answer updates.
+ * @param maxTurns The maximum number of turns to allow in the conversation.
+ * @returns The final response from the assistant.
+ */
+ async askAgent(question: string, onProcessingUpdate: (processingUpdate: ProcessingInfo[]) => void, onAnswerUpdate: (answerUpdate: string) => void, maxTurns: number = 30): Promise<AssistantMessage> {
+ console.log(`Starting query: ${question}`);
+
+ // Push user's question to message history
+ this.messages.push({ role: 'user', content: question });
+
+ // Retrieve chat history and generate system prompt
+ const chatHistory = this._history();
+ const systemPrompt = getReactPrompt(Object.values(this.tools), this._summaries, chatHistory);
+
+ // Initialize intermediate messages
+ this.interMessages = [{ role: 'system', content: systemPrompt }];
+ this.interMessages.push({ role: 'user', content: `<stage number="1" role="user"><query>${question}</query></stage>` });
+
+ // Setup XML parser and builder
+ const parser = new XMLParser({
+ ignoreAttributes: false,
+ attributeNamePrefix: '@_',
+ textNodeName: '_text',
+ isArray: (name, jpath, isLeafNode, isAttribute) => ['query', 'url'].indexOf(name) !== -1,
+ });
+ const builder = new XMLBuilder({ ignoreAttributes: false, attributeNamePrefix: '@_' });
+
+ let currentAction: string | undefined;
+ this.processingInfo = [];
+
+ // Conversation loop (up to maxTurns)
+ for (let i = 2; i < maxTurns; i += 2) {
+ console.log(this.interMessages);
+ console.log(`Turn ${i}/${maxTurns}`);
+
+ // Execute a step in the conversation and get the result
+ const result = await this.execute(onProcessingUpdate, onAnswerUpdate);
+ this.interMessages.push({ role: 'assistant', content: result });
+
+ let parsedResult;
+ try {
+ // Parse XML result from the assistant
+ parsedResult = parser.parse(result);
+ } catch (error) {
+ throw new Error(`Error parsing response: ${error}`);
+ }
+
+ // Extract the stage from the parsed result
+ const stage = parsedResult.stage;
+ if (!stage) {
+ throw new Error(`Error: No stage found in response`);
+ }
+
+ // Handle different stage elements (thoughts, actions, inputs, answers)
+ for (const key in stage) {
+ if (key === 'thought') {
+ // Handle assistant's thoughts
+ console.log(`Thought: ${stage[key]}`);
+ this.processingNumber++;
+ } else if (key === 'action') {
+ // Handle action stage
+ currentAction = stage[key] as string;
+ console.log(`Action: ${currentAction}`);
+
+ if (this.tools[currentAction]) {
+ // Prepare the next action based on the current tool
+ const nextPrompt = [
+ {
+ type: 'text',
+ text: `<stage number="${i + 1}" role="user">` + builder.build({ action_rules: this.tools[currentAction].getActionRule() }) + `</stage>`,
+ },
+ ];
+ this.interMessages.push({ role: 'user', content: nextPrompt });
+ break;
+ } else {
+ // Handle error in case of an invalid action
+ console.log('Error: No valid action');
+ this.interMessages.push({ role: 'user', content: `<stage number="${i + 1}" role="system-error-reporter">No valid action, try again.</stage>` });
+ break;
+ }
+ } else if (key === 'action_input') {
+ // Handle action input stage
+ const actionInput = stage[key];
+ console.log(`Action input:`, actionInput.inputs);
+
+ if (currentAction) {
+ try {
+ // Process the action with its input
+ const observation = await this.processAction(currentAction, actionInput.inputs);
+ const nextPrompt = [{ type: 'text', text: `<stage number="${i + 1}" role="user"> <observation>` }, ...observation, { type: 'text', text: '</observation></stage>' }];
+ console.log(observation);
+ this.interMessages.push({ role: 'user', content: nextPrompt });
+ this.processingNumber++;
+ break;
+ } catch (error) {
+ throw new Error(`Error processing action: ${error}`);
+ }
+ } else {
+ throw new Error('Error: Action input without a valid action');
+ }
+ } else if (key === 'answer') {
+ // If an answer is found, end the query
+ console.log('Answer found. Ending query.');
+ this.streamedAnswerParser.reset();
+ const parsedAnswer = AnswerParser.parse(result, this.processingInfo);
+ return parsedAnswer;
+ }
+ }
+ }
+
+ throw new Error('Reached maximum turns. Ending query.');
+ }
+
+ /**
+ * Executes a step in the conversation, processing the assistant's response and parsing it in real-time.
+ * @param onProcessingUpdate Callback for processing updates.
+ * @param onAnswerUpdate Callback for answer updates.
+ * @returns The full response from the assistant.
+ */
+ private async execute(onProcessingUpdate: (processingUpdate: ProcessingInfo[]) => void, onAnswerUpdate: (answerUpdate: string) => void): Promise<string> {
+ // Stream OpenAI response for real-time updates
+ const stream = await this.client.chat.completions.create({
+ model: 'gpt-4o',
+ messages: this.interMessages as ChatCompletionMessageParam[],
+ temperature: 0,
+ stream: true,
+ });
+
+ let fullResponse: string = '';
+ let currentTag: string = '';
+ let currentContent: string = '';
+ let isInsideTag: boolean = false;
+
+ // Process each chunk of the streamed response
+ for await (const chunk of stream) {
+ let content = chunk.choices[0]?.delta?.content || '';
+ fullResponse += content;
+
+ // Parse the streamed content character by character
+ for (const char of content) {
+ if (currentTag === 'answer') {
+ // Handle answer parsing for real-time updates
+ currentContent += char;
+ const streamedAnswer = this.streamedAnswerParser.parse(char);
+ onAnswerUpdate(streamedAnswer);
+ continue;
+ } else if (char === '<') {
+ // Start of a new tag
+ isInsideTag = true;
+ currentTag = '';
+ currentContent = '';
+ } else if (char === '>') {
+ // End of the tag
+ isInsideTag = false;
+ if (currentTag.startsWith('/')) {
+ currentTag = '';
+ }
+ } else if (isInsideTag) {
+ // Append characters to the tag name
+ currentTag += char;
+ } else if (currentTag === 'thought' || currentTag === 'action_input_description') {
+ // Handle processing information for thought or action input description
+ currentContent += char;
+ const current_info = this.processingInfo.find(info => info.index === this.processingNumber);
+ if (current_info) {
+ current_info.content = currentContent.trim();
+ onProcessingUpdate(this.processingInfo);
+ } else {
+ this.processingInfo.push({
+ index: this.processingNumber,
+ type: currentTag === 'thought' ? PROCESSING_TYPE.THOUGHT : PROCESSING_TYPE.ACTION,
+ content: currentContent.trim(),
+ });
+ onProcessingUpdate(this.processingInfo);
+ }
+ }
+ }
+ }
+
+ return fullResponse;
+ }
+
+ /**
+ * Processes a specific action by invoking the appropriate tool with the provided inputs.
+ * @param action The action to perform.
+ * @param actionInput The inputs for the action.
+ * @returns The result of the action.
+ */
+ private async processAction(action: string, actionInput: any): Promise<any> {
+ if (!(action in this.tools)) {
+ throw new Error(`Unknown action: ${action}`);
+ }
+
+ const tool = this.tools[action];
+ return await tool.execute(actionInput);
+ }
+}
diff --git a/src/client/views/nodes/chatbot/agentsystem/prompts.ts b/src/client/views/nodes/chatbot/agentsystem/prompts.ts
new file mode 100644
index 000000000..9daabc35f
--- /dev/null
+++ b/src/client/views/nodes/chatbot/agentsystem/prompts.ts
@@ -0,0 +1,181 @@
+// prompts.ts
+
+import { Tool } from '../types/types';
+
+export function getReactPrompt(tools: Tool[], summaries: () => string, chatHistory: string): string {
+ const toolDescriptions = tools
+ .map(
+ tool => `
+ <tool>
+ <title>${tool.name}</title>
+ <brief_summary>${tool.briefSummary}</brief_summary>
+ </tool>`
+ )
+ .join('\n');
+
+ return `<system_message>
+ <task>
+ You are an advanced AI assistant equipped with tools to answer user queries efficiently. You operate in a loop that is RIGIDLY structured and requires the use of specific tags and formats for your responses. Your goal is to provide accurate and well-structured answers to user queries. Below are the guidelines and information you can use to structure your approach to accomplishing this task.
+ </task>
+
+ <critical_points>
+ <point>**STRUCTURE**: Always use the correct stage tags (e.g., <stage number="2" role="assistant">) for every response. Use only even-numbered stages for your responses.</point>
+ <point>**STOP after every stage and wait for input. Do not combine multiple stages in one response.**</point>
+ <point>If a tool is needed, select the most appropriate tool based on the query.</point>
+ <point>**If one tool does not yield satisfactory results or fails twice, try another tool that might work better for the query.**</point>
+ <point>Ensure that **ALL answers follow the answer structure**: grounded text wrapped in <grounded_text> tags with corresponding citations, normal text in <normal_text> tags, and three follow-up questions at the end.</point>
+ </critical_points>
+
+ <answer_structure>
+ <answer>
+ <grounded_text> - All information derived from tools or user documents must be wrapped in these tags with proper citation.</grounded_text>
+ <normal_text> - Use this tag for text not derived from tools or user documents.</normal_text>
+ <citations>
+ <citation> - Provide proper citations for each <grounded_text>, referencing the tool or document chunk used.</citation>
+ </citations>
+ <follow_up_questions> - Provide exactly three user-perspective follow-up questions.</follow_up_questions>
+ <loop_summary> - Summarize the actions and tools used in the conversation.</loop_summary>
+ </answer>
+ </answer_structure>
+
+ <grounded_text_guidelines>
+ <step>**Wrap ALL tool-based information** in <grounded_text> tags and provide citations.</step>
+ <step>Use separate <grounded_text> tags for distinct information or when switching to a different tool or document.</step>
+ <step>Ensure that **EVERY** <grounded_text> tag includes a citation index referencing the source of the information.</step>
+ <step>Over-citing is discouraged—only cite the information that is directly relevant to the user's query.</step>
+ </grounded_text_guidelines>
+
+ <normal_text_guidelines>
+ <step>Wrap general information or reasoning **not derived from tools or documents** in <normal_text> tags.</step>
+ <step>Never put information derived from user documents or tools in <normal_text> tags—use <grounded_text> for those.</step>
+ </normal_text_guidelines>
+
+ <operational_process>
+ <step>Carefully analyze the user query and determine if a tool is necessary to provide an accurate answer.</step>
+ <step>If a tool is needed, choose the most appropriate one and **stop after the action** to wait for system input.</step>
+ <step>If no tool is needed, use the 'no_tool' action but follow the structure.</step>
+ <step>When all observations are complete, format the final answer using <grounded_text> and <normal_text> tags with appropriate citations.</step>
+ <step>Include exactly three follow-up questions from the user's perspective.</step>
+ <step>Provide a loop summary at the end of the conversation.</step>
+ </operational_process>
+
+ <tools>
+ ${toolDescriptions}
+ <note>If no external tool is required, use 'no_tool', but if there might be relevant external information, use the appropriate tool.</note>
+ </tools>
+
+ <summaries>
+ ${summaries()}
+ </summaries>
+
+ <chat_history>
+ ${chatHistory}
+ </chat_history>
+
+ <example_interaction>
+ <interaction description="Correct use of RAG and website scraping tools">
+ <stage number="1" role="user">
+ <query>Can you provide key moments from the 2022 World Cup and its impact on tourism in Qatar?</query>
+ </stage>
+
+ <stage number="2" role="assistant">
+ <thought>
+ I will use the RAG tool to retrieve key moments from the user's World Cup documents. Afterward, I will use the website scraper tool to gather tourism impact data on Qatar.
+ </thought>
+ <action>rag</action>
+ </stage>
+
+ <stage number="3" role="user">
+ <action_rules>***Action rules omitted***</action_rules>
+ </stage>
+
+ <stage number="4" role="assistant">
+ <action_input>
+ <action_input_description>Searching user documents for key moments from the 2022 World Cup.</action_input_description>
+ <inputs>
+ <hypothetical_document_chunk>Key moments from the 2022 World Cup. Goals, assists, big wins, big losses.</hypothetical_document_chunk>
+ </inputs>
+ </action_input>
+ </stage>
+
+ <stage number="5" role="user">
+ <observation>
+ <chunk chunk_id="1234" chunk_type="text">
+ The 2022 FIFA World Cup saw Argentina win, with Lionel Messi's performance being a key highlight. It was widely celebrated as a historical moment in sports.
+ </chunk>
+ </observation>
+ </stage>
+
+ <stage number="6" role="assistant">
+ <thought>
+ With key moments from the World Cup retrieved, I will now use the website scraper tool to gather data on Qatar’s tourism impact during the World Cup.
+ </thought>
+ <action>websiteInfoScraper</action>
+ </stage>
+
+ <stage number="7" role="user">
+ <action_rules>***Action rules omitted***</action_rules>
+ </stage>
+
+ <stage number="8" role="assistant">
+ <action_input>
+ <action_input_description>Scraping websites for information about Qatar's tourism impact during the 2022 World Cup.</action_input_description>
+ <inputs>
+ <query>Tourism impact of the 2022 World Cup in Qatar</query>
+ </inputs>
+ </action_input>
+ </stage>
+
+ <stage number="9" role="user">
+ <observation>
+ <chunk chunk_id="5678" chunk_type="url">
+ <url>https://www.qatartourism.com/world-cup-impact</url>
+ <overview>During the 2022 World Cup, Qatar saw a 40% increase in tourism, with over 1.5 million visitors attending.</overview>
+ </chunk>
+ </observation>
+ </stage>
+
+ <stage number="10" role="assistant">
+ <thought>
+ Now that I have gathered both key moments from the World Cup and tourism impact data from Qatar, I will summarize the information in my final response.
+ </thought>
+ <answer>
+ <grounded_text citation_index="1">**The 2022 World Cup** saw Argentina crowned champions, with **Lionel Messi** leading his team to victory, marking a historic moment in sports.</grounded_text>
+ <grounded_text citation_index="2">**Qatar** experienced a **40% increase in tourism** during the World Cup, welcoming over **1.5 million visitors**, significantly boosting its economy.</grounded_text>
+ <normal_text>Moments like **Messi’s triumph** often become ingrained in the legacy of World Cups, immortalizing these tournaments in both sports and cultural memory. The **long-term implications** of the World Cup on Qatar's **economy, tourism**, and **global image** remain important areas of interest as the country continues to build on the momentum generated by hosting this prestigious event.</normal_text>
+ <citations>
+ <citation index="1" chunk_id="1234" type="text">Key moments from the 2022 World Cup.</citation>
+ <citation index="2" chunk_id="5678" type="url"></citation>
+ </citations>
+ <follow_up_questions>
+ <question>What long-term effects has the World Cup had on Qatar's economy and infrastructure?</question>
+ <question>Can you compare Qatar's tourism numbers with previous World Cup hosts?</question>
+ <question>How has Qatar’s image on the global stage evolved post-World Cup?</question>
+ </follow_up_questions>
+ <loop_summary>
+ The assistant first used the RAG tool to extract key moments from the user documents about the 2022 World Cup. Then, the assistant utilized the website scraping tool to gather data on Qatar's tourism impact. Both tools provided valuable information, and no additional tools were needed.
+ </loop_summary>
+ </answer>
+ </stage>
+ </interaction>
+ </example_interaction>
+
+ <final_instruction>
+ Process the user's query according to these rules. Ensure your final answer is comprehensive, well-structured, and includes citations where appropriate.
+ </final_instruction>
+</system_message>`;
+}
+
+export function getSummarizedChunksPrompt(chunks: string): string {
+ return `Please provide a comprehensive summary of what you think the document from which these chunks originated.
+ Ensure the summary captures the main ideas and key points from all provided chunks. Be concise and brief and only provide the summary in paragraph form.
+
+ Text chunks:
+ \`\`\`
+ ${chunks}
+ \`\`\``;
+}
+
+export function getSummarizedSystemPrompt(): string {
+ return 'You are an AI assistant tasked with summarizing a document. You are provided with important chunks from the document and provide a summary, as best you can, of what the document will contain overall. Be concise and brief with your response.';
+}
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss
new file mode 100644
index 000000000..42f6a0d61
--- /dev/null
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss
@@ -0,0 +1,279 @@
+@import url('https://fonts.googleapis.com/css2?family=Atkinson+Hyperlegible:ital,wght@0,400;0,700;1,400;1,700&display=swap');
+
+$primary-color: #4a90e2;
+$secondary-color: #f5f8fa;
+$text-color: #333;
+$light-text-color: #777;
+$border-color: #e1e8ed;
+$shadow-color: rgba(0, 0, 0, 0.1);
+$transition: all 0.3s ease;
+.chat-box {
+ display: flex;
+ flex-direction: column;
+ height: 100%;
+ background-color: #fff;
+ font-family:
+ 'Atkinson Hyperlegible',
+ -apple-system,
+ BlinkMacSystemFont,
+ 'Segoe UI',
+ Roboto,
+ Helvetica,
+ Arial,
+ sans-serif;
+ border-radius: 12px;
+ overflow: hidden;
+ box-shadow: 0 4px 12px $shadow-color;
+ position: relative;
+
+ .chat-header {
+ background-color: $primary-color;
+ color: white;
+ padding: 15px;
+ text-align: center;
+ box-shadow: 0 2px 4px $shadow-color;
+ height: fit-content;
+
+ h2 {
+ margin: 0;
+ font-size: 1.3em;
+ font-weight: 500;
+ }
+ }
+
+ .chat-messages {
+ flex-grow: 1;
+ overflow-y: auto;
+ padding: 20px;
+ display: flex;
+ flex-direction: column;
+ gap: 10px; // Added to give space between elements
+
+ &::-webkit-scrollbar {
+ width: 6px;
+ }
+
+ &::-webkit-scrollbar-thumb {
+ background-color: $border-color;
+ border-radius: 3px;
+ }
+ }
+
+ .chat-input {
+ display: flex;
+ padding: 20px;
+ border-top: 1px solid $border-color;
+ background-color: #fff;
+
+ input {
+ flex-grow: 1;
+ padding: 12px 15px;
+ border: 1px solid $border-color;
+ border-radius: 24px;
+ font-size: 15px;
+ transition: $transition;
+
+ &:focus {
+ outline: none;
+ border-color: $primary-color;
+ box-shadow: 0 0 0 2px rgba($primary-color, 0.2);
+ }
+ }
+
+ .submit-button {
+ background-color: $primary-color;
+ color: white;
+ border: none;
+ border-radius: 50%;
+ width: 48px;
+ height: 48px;
+ margin-left: 10px;
+ cursor: pointer;
+ transition: $transition;
+ display: flex;
+ align-items: center;
+ justify-content: center;
+
+ &:hover {
+ background-color: darken($primary-color, 10%);
+ }
+
+ &:disabled {
+ background-color: $light-text-color;
+ cursor: not-allowed;
+ }
+
+ .spinner {
+ height: 24px;
+ width: 24px;
+ border: 3px solid rgba(255, 255, 255, 0.3);
+ border-top: 3px solid #fff;
+ border-radius: 50%;
+ animation: spin 1s linear infinite;
+ display: flex;
+ align-items: center;
+ justify-content: center;
+ }
+ }
+ }
+ .citation-popup {
+ position: fixed;
+ bottom: 50px;
+ left: 50%;
+ transform: translateX(-50%);
+ background-color: rgba(0, 0, 0, 0.8);
+ color: white;
+ padding: 10px 20px;
+ border-radius: 10px;
+ box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2);
+ z-index: 1000;
+ animation: fadeIn 0.3s ease-in-out;
+
+ p {
+ margin: 0;
+ font-size: 14px;
+ }
+
+ @keyframes fadeIn {
+ from {
+ opacity: 0;
+ }
+ to {
+ opacity: 1;
+ }
+ }
+ }
+}
+
+.message {
+ max-width: 80%;
+ margin-bottom: 20px;
+ padding: 16px 20px;
+ border-radius: 18px;
+ font-size: 15px;
+ line-height: 1.5;
+ box-shadow: 0 2px 4px $shadow-color;
+ word-wrap: break-word; // To handle long words
+
+ &.user {
+ align-self: flex-end;
+ background-color: $primary-color;
+ color: white;
+ border-bottom-right-radius: 4px;
+ }
+
+ &.chatbot {
+ align-self: flex-start;
+ background-color: $secondary-color;
+ color: $text-color;
+ border-bottom-left-radius: 4px;
+ }
+
+ .toggle-info {
+ background-color: transparent;
+ color: $primary-color;
+ border: 1px solid $primary-color;
+ width: 100%;
+ height: fit-content;
+ border-radius: 8px;
+ padding: 10px 16px;
+ font-size: 14px;
+ cursor: pointer;
+ transition: $transition;
+ margin-top: 10px;
+
+ &:hover {
+ background-color: rgba($primary-color, 0.1);
+ }
+ }
+}
+
+.follow-up-questions {
+ margin-top: 15px;
+
+ h4 {
+ font-size: 15px;
+ font-weight: 600;
+ margin-bottom: 10px;
+ }
+
+ .questions-list {
+ display: flex;
+ flex-direction: column;
+ gap: 10px;
+ }
+
+ .follow-up-button {
+ background-color: #fff;
+ color: $primary-color;
+ border: 1px solid $primary-color;
+ border-radius: 8px;
+ padding: 10px 16px;
+ font-size: 14px;
+ cursor: pointer;
+ transition: $transition;
+ text-align: left;
+ white-space: normal;
+ word-wrap: break-word;
+ width: 100%;
+ height: fit-content;
+
+ &:hover {
+ background-color: $primary-color;
+ color: #fff;
+ }
+ }
+}
+
+.citation-button {
+ display: inline-flex;
+ align-items: center;
+ justify-content: center;
+ width: 20px;
+ height: 20px;
+ border-radius: 50%;
+ background-color: rgba(0, 0, 0, 0.1);
+ color: $text-color;
+ font-size: 12px;
+ font-weight: bold;
+ margin-left: 5px;
+ cursor: pointer;
+ transition: $transition;
+ vertical-align: middle;
+
+ &:hover {
+ background-color: rgba(0, 0, 0, 0.2);
+ }
+}
+
+.uploading-overlay {
+ position: absolute;
+ top: 0;
+ left: 0;
+ right: 0;
+ bottom: 0;
+ background-color: rgba(255, 255, 255, 0.8);
+ display: flex;
+ justify-content: center;
+ align-items: center;
+ z-index: 1000;
+}
+
+@keyframes spin {
+ 0% {
+ transform: rotate(0deg);
+ }
+ 100% {
+ transform: rotate(360deg);
+ }
+}
+
+@media (max-width: 768px) {
+ .chat-box {
+ border-radius: 0;
+ }
+
+ .message {
+ max-width: 90%;
+ }
+}
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
new file mode 100644
index 000000000..6dc691798
--- /dev/null
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -0,0 +1,745 @@
+import { action, computed, makeObservable, observable, observe, reaction, runInAction, ObservableSet } from 'mobx';
+import { observer } from 'mobx-react';
+import OpenAI, { ClientOptions } from 'openai';
+import * as React from 'react';
+import { Doc, DocListCast } from '../../../../../fields/Doc';
+import { CsvCast, DocCast, PDFCast, RTFCast, StrCast } from '../../../../../fields/Types';
+import { DocumentType } from '../../../../documents/DocumentTypes';
+import { Docs } from '../../../../documents/Documents';
+import { LinkManager } from '../../../../util/LinkManager';
+import { ViewBoxAnnotatableComponent } from '../../../DocComponent';
+import { FieldView, FieldViewProps } from '../../FieldView';
+import './ChatBox.scss';
+import MessageComponentBox from './MessageComponent';
+import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, SimplifiedChunk, ProcessingInfo } from '../types/types';
+import { Vectorstore } from '../vectorstore/Vectorstore';
+import { Agent } from '../agentsystem/Agent';
+import dotenv from 'dotenv';
+import { DocData, DocViews } from '../../../../../fields/DocSymbols';
+import { DocumentManager } from '../../../../util/DocumentManager';
+import { v4 as uuidv4 } from 'uuid';
+import { DocUtils } from '../../../../documents/DocUtils';
+import { ClientUtils } from '../../../../../ClientUtils';
+import { ProgressBar } from './ProgressBar';
+import { DocumentView } from '../../DocumentView';
+import { Networking } from '../../../../Network';
+
+dotenv.config();
+
+/**
+ * ChatBox is the main class responsible for managing the interaction between the user and the assistant,
+ * handling documents, and integrating with OpenAI for tasks such as document analysis, chat functionality,
+ * and vector store interactions.
+ */
+@observer
+export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
+ // MobX observable properties to track UI state and data
+ @observable history: AssistantMessage[] = [];
+ @observable.deep current_message: AssistantMessage | undefined = undefined;
+ @observable isLoading: boolean = false;
+ @observable uploadProgress: number = 0;
+ @observable currentStep: string = '';
+ @observable expandedScratchpadIndex: number | null = null;
+ @observable inputValue: string = '';
+ @observable private linked_docs_to_add: ObservableSet = observable.set();
+ @observable private linked_csv_files: { filename: string; id: string; text: string }[] = [];
+ @observable private isUploadingDocs: boolean = false;
+ @observable private citationPopup: { text: string; visible: boolean } = { text: '', visible: false };
+
+ // Private properties for managing OpenAI API, vector store, agent, and UI elements
+ private openai: OpenAI;
+ private vectorstore_id: string;
+ private vectorstore: Vectorstore;
+ private agent: Agent;
+ private _oldWheel: HTMLDivElement | null = null;
+ private messagesRef: React.RefObject;
+
+ /**
+ * Static method that returns the layout string for the field.
+ * @param fieldKey Key to get the layout string.
+ */
+ public static LayoutString(fieldKey: string) {
+ return FieldView.LayoutString(ChatBox, fieldKey);
+ }
+
+ /**
+ * Constructor initializes the component, sets up OpenAI, vector store, and agent instances,
+ * and observes changes in the chat history to save the state in dataDoc.
+ * @param props The properties passed to the component.
+ */
+ constructor(props: FieldViewProps) {
+ super(props);
+ makeObservable(this); // Enable MobX observables
+
+ // Initialize OpenAI, vectorstore, and agent
+ this.openai = this.initializeOpenAI();
+ if (StrCast(this.dataDoc.vectorstore_id) == '') {
+ this.vectorstore_id = uuidv4();
+ this.dataDoc.vectorstore_id = this.vectorstore_id;
+ } else {
+ this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id);
+ }
+ this.vectorstore = new Vectorstore(this.vectorstore_id, this.retrieveDocIds);
+ this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.createCSVInDash);
+ this.messagesRef = React.createRef<HTMLDivElement>();
+
+ // Reaction to update dataDoc when chat history changes
+ reaction(
+ () =>
+ this.history.map((msg: AssistantMessage) => ({
+ role: msg.role,
+ content: msg.content,
+ follow_up_questions: msg.follow_up_questions,
+ citations: msg.citations,
+ })),
+ serializableHistory => {
+ this.dataDoc.data = JSON.stringify(serializableHistory);
+ }
+ );
+ }
+
+ /**
+ * Adds a document to the vectorstore for AI-based analysis.
+ * Handles the upload progress and errors during the process.
+ * @param newLinkedDoc The new document to add.
+ */
+ @action
+ addDocToVectorstore = async (newLinkedDoc: Doc) => {
+ this.uploadProgress = 0;
+ this.currentStep = 'Initializing...';
+ this.isUploadingDocs = true;
+
+ try {
+ // Add the document to the vectorstore
+ await this.vectorstore.addAIDoc(newLinkedDoc, this.updateProgress);
+ } catch (error) {
+ console.error('Error uploading document:', error);
+ this.currentStep = 'Error during upload';
+ } finally {
+ this.isUploadingDocs = false;
+ this.uploadProgress = 0;
+ this.currentStep = '';
+ }
+ };
+
+ /**
+ * Updates the upload progress and the current step in the UI.
+ * @param progress The percentage of the progress.
+ * @param step The current step name.
+ */
+ @action
+ updateProgress = (progress: number, step: string) => {
+ this.uploadProgress = progress;
+ this.currentStep = step;
+ };
+
+ /**
+ * Adds a CSV file for analysis by sending it to OpenAI and generating a summary.
+ * @param newLinkedDoc The linked document representing the CSV file.
+ * @param id Optional ID for the document.
+ */
+ @action
+ addCSVForAnalysis = async (newLinkedDoc: Doc, id?: string) => {
+ if (!newLinkedDoc.chunk_simpl) {
+ // Convert document text to CSV data
+ const csvData: string = StrCast(newLinkedDoc.text);
+
+ // Generate a summary using OpenAI API
+ const completion = await this.openai.chat.completions.create({
+ messages: [
+ {
+ role: 'system',
+ content:
+ 'You are an AI assistant tasked with summarizing the content of a CSV file. You will be provided with the data from the CSV file and your goal is to generate a concise summary that captures the main themes, trends, and key points represented in the data.',
+ },
+ {
+ role: 'user',
+ content: `Please provide a comprehensive summary of the CSV file based on the provided data. Ensure the summary highlights the most important information, patterns, and insights. Your response should be in paragraph form and be concise.
+ CSV Data:
+ ${csvData}
+ **********
+ Summary:`,
+ },
+ ],
+ model: 'gpt-3.5-turbo',
+ });
+
+ const csvId = id ?? uuidv4();
+
+ // Add CSV details to linked files
+ this.linked_csv_files.push({
+ filename: CsvCast(newLinkedDoc.data).url.pathname,
+ id: csvId,
+ text: csvData,
+ });
+
+ // Add a chunk for the CSV and assign the summary
+ const chunkToAdd = {
+ chunkId: csvId,
+ chunkType: CHUNK_TYPE.CSV,
+ };
+ newLinkedDoc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] });
+ newLinkedDoc.summary = completion.choices[0].message.content!;
+ }
+ };
+
+ /**
+ * Toggles the tool logs, expanding or collapsing the scratchpad at the given index.
+ * @param index Index of the tool log to toggle.
+ */
+ @action
+ toggleToolLogs = (index: number) => {
+ this.expandedScratchpadIndex = this.expandedScratchpadIndex === index ? null : index;
+ };
+
+ /**
+ * Initializes the OpenAI API client using the API key from environment variables.
+ * @returns OpenAI client instance.
+ */
+ initializeOpenAI() {
+ const configuration: ClientOptions = {
+ apiKey: process.env.OPENAI_KEY,
+ dangerouslyAllowBrowser: true,
+ };
+ return new OpenAI(configuration);
+ }
+
+ /**
+ * Adds a scroll event listener to detect user scrolling and handle passive wheel events.
+ */
+ addScrollListener = () => {
+ if (this.messagesRef.current) {
+ this.messagesRef.current.addEventListener('wheel', this.onPassiveWheel, { passive: false });
+ }
+ };
+
+ /**
+ * Removes the scroll event listener from the chat messages container.
+ */
+ removeScrollListener = () => {
+ if (this.messagesRef.current) {
+ this.messagesRef.current.removeEventListener('wheel', this.onPassiveWheel);
+ }
+ };
+
+ /**
+ * Scrolls the chat messages container to the bottom, ensuring the latest message is visible.
+ */
+ scrollToBottom = () => {
+ if (this.messagesRef.current) {
+ this.messagesRef.current.scrollTop = this.messagesRef.current.scrollHeight;
+ }
+ };
+
+ /**
+ * Event handler for detecting wheel scrolling and stopping the event propagation.
+ * @param e The wheel event.
+ */
+ onPassiveWheel = (e: WheelEvent) => {
+ if (this._props.isContentActive()) {
+ e.stopPropagation();
+ }
+ };
+
+ /**
+ * Sends the user's input to OpenAI, displays the loading indicator, and updates the chat history.
+ * @param event The form submission event.
+ */
+ @action
+ askGPT = async (event: React.FormEvent): Promise => {
+ event.preventDefault();
+ this.inputValue = '';
+
+ // Extract the user's message
+ const textInput = event.currentTarget.elements.namedItem('messageInput') as HTMLInputElement;
+ const trimmedText = textInput.value.trim();
+
+ if (trimmedText) {
+ try {
+ textInput.value = '';
+ // Add the user's message to the history
+ this.history.push({
+ role: ASSISTANT_ROLE.USER,
+ content: [{ index: 0, type: TEXT_TYPE.NORMAL, text: trimmedText, citation_ids: null }],
+ processing_info: [],
+ });
+ this.isLoading = true;
+ this.current_message = {
+ role: ASSISTANT_ROLE.ASSISTANT,
+ content: [],
+ citations: [],
+ processing_info: [],
+ };
+
+ // Define callbacks for real-time processing updates
+ const onProcessingUpdate = (processingUpdate: ProcessingInfo[]) => {
+ runInAction(() => {
+ if (this.current_message) {
+ this.current_message = {
+ ...this.current_message,
+ processing_info: processingUpdate,
+ };
+ }
+ });
+ this.scrollToBottom();
+ };
+
+ const onAnswerUpdate = (answerUpdate: string) => {
+ runInAction(() => {
+ if (this.current_message) {
+ this.current_message = {
+ ...this.current_message,
+ content: [{ text: answerUpdate, type: TEXT_TYPE.NORMAL, index: 0, citation_ids: [] }],
+ };
+ }
+ });
+ };
+
+ // Send the user's question to the assistant and get the final message
+ const finalMessage = await this.agent.askAgent(trimmedText, onProcessingUpdate, onAnswerUpdate);
+
+ // Update the history with the final assistant message
+ runInAction(() => {
+ if (this.current_message) {
+ this.history.push({ ...finalMessage });
+ this.current_message = undefined;
+ this.dataDoc.data = JSON.stringify(this.history);
+ }
+ });
+ } catch (err) {
+ console.error('Error:', err);
+ // Handle error in processing
+ this.history.push({
+ role: ASSISTANT_ROLE.ASSISTANT,
+ content: [{ index: 0, type: TEXT_TYPE.ERROR, text: 'Sorry, I encountered an error while processing your request.', citation_ids: null }],
+ processing_info: [],
+ });
+ } finally {
+ this.isLoading = false;
+ this.scrollToBottom();
+ }
+ }
+ this.scrollToBottom();
+ };
+
+ /**
+ * Updates the citations for a given message in the chat history.
+ * @param index The index of the message in the history.
+ * @param citations The list of citations to add to the message.
+ */
+ @action
+ updateMessageCitations = (index: number, citations: Citation[]) => {
+ if (this.history[index]) {
+ this.history[index].citations = citations;
+ }
+ };
+
+ /**
+ * Adds a linked document from a URL for future reference and analysis.
+ * @param url The URL of the document to add.
+ * @param id The unique identifier for the document.
+ */
+ @action
+ addLinkedUrlDoc = async (url: string, id: string) => {
+ const doc = Docs.Create.WebDocument(url, { data_useCors: true });
+
+ const linkDoc = Docs.Create.LinkDocument(this.Document, doc);
+ LinkManager.Instance.addLink(linkDoc);
+ let canDisplay;
+
+ try {
+ // Fetch the URL content through the proxy
+ const { data } = await Networking.PostToServer('/proxyFetch', { url });
+
+ // Simulating header behavior since we can't fetch headers via proxy
+ const xFrameOptions = data.headers?.['x-frame-options'];
+
+ if (xFrameOptions && xFrameOptions.toUpperCase() === 'SAMEORIGIN') {
+ canDisplay = false;
+ } else {
+ canDisplay = true;
+ }
+ } catch (error) {
+ console.error('Error fetching the URL from the server:', error);
+ }
+
+ const chunkToAdd = {
+ chunkId: id,
+ chunkType: CHUNK_TYPE.URL,
+ url: url,
+ canDisplay: canDisplay,
+ };
+
+ doc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] });
+ };
+
+ /**
+ * Getter to retrieve the current user's name from the client utils.
+ */
+ @computed
+ get userName() {
+ return ClientUtils.CurrentUserEmail;
+ }
+
+ /**
+ * Creates a CSV document in the dashboard and adds it for analysis.
+ * @param url The URL of the CSV.
+ * @param title The title of the CSV document.
+ * @param id The unique ID for the document.
+ * @param data The CSV data content.
+ */
+ @action
+ createCSVInDash = async (url: string, title: string, id: string, data: string) => {
+ const doc = DocCast(await DocUtils.DocumentFromType('csv', url, { title: title, text: RTFCast(data) }));
+
+ const linkDoc = Docs.Create.LinkDocument(this.Document, doc);
+ LinkManager.Instance.addLink(linkDoc);
+
+ doc && this._props.addDocument?.(doc);
+ await DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
+
+ this.addCSVForAnalysis(doc, id);
+ };
+
+ /**
+ * Event handler to manage citations click in the message components.
+ * @param citation The citation object clicked by the user.
+ */
+ @action
+ handleCitationClick = (citation: Citation) => {
+ const currentLinkedDocs: Doc[] = this.linkedDocs;
+
+ const chunkId = citation.chunk_id;
+
+ // Loop through the linked documents to find the matching chunk and handle its display
+ for (let doc of currentLinkedDocs) {
+ if (doc.chunk_simpl) {
+ const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] };
+ const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkId === chunkId);
+ if (foundChunk) {
+ // Handle different types of chunks (image, text, table, etc.)
+ switch (foundChunk.chunkType) {
+ case CHUNK_TYPE.IMAGE:
+ case CHUNK_TYPE.TABLE:
+ const values = foundChunk.location?.replace(/[\[\]]/g, '').split(',');
+
+ if (values?.length !== 4) {
+ console.error('Location string must contain exactly 4 numbers');
+ return;
+ }
+
+ const x1 = parseFloat(values[0]) * Doc.NativeWidth(doc);
+ const y1 = parseFloat(values[1]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc);
+ const x2 = parseFloat(values[2]) * Doc.NativeWidth(doc);
+ const y2 = parseFloat(values[3]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc);
+
+ const annotationKey = Doc.LayoutFieldKey(doc) + '_annotations';
+
+ const existingDoc = DocListCast(doc[DocData][annotationKey]).find(d => d.citation_id === citation.citation_id);
+ const highlightDoc = existingDoc ?? this.createImageCitationHighlight(x1, y1, x2, y2, citation, annotationKey, doc);
+
+ DocumentManager.Instance.showDocument(highlightDoc, { willZoomCentered: true }, () => {});
+ break;
+ case CHUNK_TYPE.TEXT:
+ this.citationPopup = { text: citation.direct_text ?? 'No text available', visible: true };
+ setTimeout(() => (this.citationPopup.visible = false), 3000); // Hide after 3 seconds
+
+ DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
+ const firstView = Array.from(doc[DocViews])[0] as DocumentView;
+ firstView.ComponentView?.search?.(citation.direct_text ?? '');
+ });
+ break;
+ case CHUNK_TYPE.URL:
+ if (!foundChunk.canDisplay) {
+ window.open(StrCast(doc.displayUrl), '_blank');
+ } else if (foundChunk.canDisplay) {
+ DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
+ }
+ break;
+ case CHUNK_TYPE.CSV:
+ DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
+ break;
+ default:
+ console.error('Chunk type not recognized:', foundChunk.chunkType);
+ break;
+ }
+ }
+ }
+ }
+ };
+
+ /**
+ * Creates an annotation highlight on a PDF document for image citations.
+ * @param x1 X-coordinate of the top-left corner of the highlight.
+ * @param y1 Y-coordinate of the top-left corner of the highlight.
+ * @param x2 X-coordinate of the bottom-right corner of the highlight.
+ * @param y2 Y-coordinate of the bottom-right corner of the highlight.
+ * @param citation The citation object to associate with the highlight.
+ * @param annotationKey The key used to store the annotation.
+ * @param pdfDoc The document where the highlight is created.
+ * @returns The highlighted document.
+ */
+ createImageCitationHighlight = (x1: number, y1: number, x2: number, y2: number, citation: Citation, annotationKey: string, pdfDoc: Doc): Doc => {
+ const highlight_doc = Docs.Create.FreeformDocument([], {
+ x: x1,
+ y: y1,
+ _width: x2 - x1,
+ _height: y2 - y1,
+ backgroundColor: 'rgba(255, 255, 0, 0.5)',
+ });
+ highlight_doc[DocData].citation_id = citation.citation_id;
+ Doc.AddDocToList(pdfDoc[DocData], annotationKey, highlight_doc);
+ highlight_doc.annotationOn = pdfDoc;
+ Doc.SetContainer(highlight_doc, pdfDoc);
+ return highlight_doc;
+ };
+
+ /**
+ * Lifecycle method that triggers when the component updates.
+ * Ensures the chat is scrolled to the bottom when new messages are added.
+ */
+ componentDidUpdate() {
+ this.scrollToBottom();
+ }
+
+ /**
+ * Lifecycle method that triggers when the component mounts.
+ * Initializes scroll listeners, sets up document reactions, and loads chat history from dataDoc if available.
+ */
+ componentDidMount() {
+ this._props.setContentViewBox?.(this);
+ if (this.dataDoc.data) {
+ try {
+ const storedHistory = JSON.parse(StrCast(this.dataDoc.data));
+ runInAction(() => {
+ this.history.push(
+ ...storedHistory.map((msg: AssistantMessage) => ({
+ role: msg.role,
+ content: msg.content,
+ follow_up_questions: msg.follow_up_questions,
+ citations: msg.citations,
+ }))
+ );
+ });
+ } catch (e) {
+ console.error('Failed to parse history from dataDoc:', e);
+ }
+ } else {
+ // Default welcome message
+ runInAction(() => {
+ this.history.push({
+ role: ASSISTANT_ROLE.ASSISTANT,
+ content: [
+ {
+ index: 0,
+ type: TEXT_TYPE.NORMAL,
+ text: `Hey, ${this.userName()}! Welcome to Your Friendly Assistant. Link a document or ask questions to get started.`,
+ citation_ids: null,
+ },
+ ],
+ processing_info: [],
+ });
+ });
+ }
+
+ // Set up reactions for linked documents
+ reaction(
+ () => {
+ const linkedDocs = LinkManager.Instance.getAllRelatedLinks(this.Document)
+ .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document)))
+ .map(d => DocCast(d?.annotationOn, d))
+ .filter(d => d);
+ return linkedDocs;
+ },
+ linked => linked.forEach(doc => this.linked_docs_to_add.add(doc))
+ );
+
+ // Observe changes to linked documents and handle document addition
+ observe(this.linked_docs_to_add, change => {
+ if (change.type === 'add') {
+ if (PDFCast(change.newValue.data)) {
+ this.addDocToVectorstore(change.newValue);
+ } else if (CsvCast(change.newValue.data)) {
+ this.addCSVForAnalysis(change.newValue);
+ }
+ } else if (change.type === 'delete') {
+ // Handle document removal
+ }
+ });
+ this.addScrollListener();
+ }
+
+ /**
+ * Lifecycle method that triggers when the component unmounts.
+ * Removes scroll listeners to avoid memory leaks.
+ */
+ componentWillUnmount() {
+ this.removeScrollListener();
+ }
+
+ /**
+ * Getter that retrieves all linked documents for the current document.
+ */
+ @computed
+ get linkedDocs() {
+ return LinkManager.Instance.getAllRelatedLinks(this.Document)
+ .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document)))
+ .map(d => DocCast(d?.annotationOn, d))
+ .filter(d => d);
+ }
+
+ /**
+ * Getter that retrieves document IDs of linked documents that have AI-related content.
+ */
+ @computed
+ get docIds() {
+ return LinkManager.Instance.getAllRelatedLinks(this.Document)
+ .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document)))
+ .map(d => DocCast(d?.annotationOn, d))
+ .filter(d => d)
+ .filter(d => d.ai_doc_id)
+ .map(d => StrCast(d.ai_doc_id));
+ }
+
+ /**
+ * Getter that retrieves summaries of all linked documents.
+ */
+ @computed
+ get summaries(): string {
+ return (
+ LinkManager.Instance.getAllRelatedLinks(this.Document)
+ .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document)))
+ .map(d => DocCast(d?.annotationOn, d))
+ .filter(d => d)
+ .filter(d => d.summary)
+ .map((doc, index) => {
+ if (PDFCast(doc.data)) {
+ return `<summary file_name="${PDFCast(doc.data).url.pathname}" applicable_tools=["rag"]>${doc.summary}</summary>`;
+ } else if (CsvCast(doc.data)) {
+ return `<summary file_name="${CsvCast(doc.data).url.pathname}" applicable_tools=["dataAnalysis"]>${doc.summary}</summary>`;
+ } else {
+ return `${index + 1}) ${doc.summary}`;
+ }
+ })
+ .join('\n') + '\n'
+ );
+ }
+
+ /**
+ * Getter that retrieves all linked CSV files for analysis.
+ */
+ @computed
+ get linkedCSVs(): { filename: string; id: string; text: string }[] {
+ return this.linked_csv_files;
+ }
+
+ /**
+ * Getter that formats the entire chat history as a string for the agent's system message.
+ */
+ @computed
+ get formattedHistory(): string {
+ let history = '<chat_history>\n';
+ for (const message of this.history) {
+ history += `<${message.role}>${message.content.map(content => content.text).join(' ')}`;
+ if (message.loop_summary) {
+ history += `<loop_summary>${message.loop_summary}</loop_summary>`;
+ }
+ history += `</${message.role}>\n`;
+ }
+ history += '</chat_history>';
+ return history;
+ }
+
+ // Other helper methods for retrieving document data and processing
+
+ retrieveSummaries = () => {
+ return this.summaries;
+ };
+
+ retrieveCSVData = () => {
+ return this.linkedCSVs;
+ };
+
+ retrieveFormattedHistory = () => {
+ return this.formattedHistory;
+ };
+
+ retrieveDocIds = () => {
+ return this.docIds;
+ };
+
+ /**
+ * Handles follow-up questions when the user clicks on them.
+ * Automatically sets the input value to the clicked follow-up question.
+ * @param question The follow-up question clicked by the user.
+ */
+ @action
+ handleFollowUpClick = (question: string) => {
+ this.inputValue = question;
+ };
+
+ /**
+ * Renders the chat interface, including the message list, input field, and other UI elements.
+ */
+ render() {
+ return (
+ <div className="chat-box">
+ {this.isUploadingDocs && (
+ <div className="uploading-overlay">
+ <div className="progress-container">
+ <ProgressBar />
+ <div className="step-name">{this.currentStep}</div>
+ </div>
+ </div>
+ )}
+ <div className="chat-header">
+ <h2>{this.userName()}'s AI Assistant</h2>
+ </div>
+ <div className="chat-messages" ref={this.messagesRef}>
+ {this.history.map((message, index) => (
+ <MessageComponentBox key={index} message={message} index={index} onFollowUpClick={this.handleFollowUpClick} onCitationClick={this.handleCitationClick} updateMessageCitations={this.updateMessageCitations} />
+ ))}
+ {this.current_message && (
+ <MessageComponentBox
+ key={this.history.length}
+ message={this.current_message}
+ index={this.history.length}
+ onFollowUpClick={this.handleFollowUpClick}
+ onCitationClick={this.handleCitationClick}
+ updateMessageCitations={this.updateMessageCitations}
+ />
+ )}
+ </div>
+ <form onSubmit={this.askGPT} className="chat-input">
+ <input type="text" name="messageInput" autoComplete="off" placeholder="Type your message here..." value={this.inputValue} onChange={e => (this.inputValue = e.target.value)} />
+ <button className="submit-button" type="submit" disabled={this.isLoading}>
+ {this.isLoading ? (
+ <div className="spinner"></div>
+ ) : (
+ <svg viewBox="0 0 24 24" width="24" height="24" stroke="currentColor" strokeWidth="2" fill="none" strokeLinecap="round" strokeLinejoin="round">
+ <line x1="22" y1="2" x2="11" y2="13"></line>
+ <polygon points="22 2 15 22 11 13 2 9 22 2"></polygon>
+ </svg>
+ )}
+ </button>
+ </form>
+ {/* Popup for citation */}
+ {this.citationPopup.visible && (
+ <div className="citation-popup">
+ <p>
+ <strong>Text from your document: </strong> {this.citationPopup.text}
+ </p>
+ </div>
+ )}
+ </div>
+ );
+ }
+}
+
+/**
+ * Register the ChatBox component as the template for CHAT document types.
+ */
+Docs.Prototypes.TemplateMap.set(DocumentType.CHAT, {
+ layout: { view: ChatBox, dataField: 'data' },
+ options: { acl: '', chat: '', chat_history: '', chat_thread_id: '', chat_assistant_id: '', chat_vector_store_id: '' },
+});
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/MessageComponent.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/MessageComponent.tsx
new file mode 100644
index 000000000..801becb64
--- /dev/null
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/MessageComponent.tsx
@@ -0,0 +1,146 @@
+import React, { useState } from 'react';
+import { observer } from 'mobx-react';
+import { AssistantMessage, Citation, MessageContent, PROCESSING_TYPE, ProcessingInfo, TEXT_TYPE } from '../types/types';
+import ReactMarkdown from 'react-markdown';
+
+/**
+ * Props for the MessageComponentBox.
+ * @interface MessageComponentProps
+ * @property {AssistantMessage} message - The message data to display.
+ * @property {number} index - The index of the message.
+ * @property {Function} onFollowUpClick - Callback to handle follow-up question clicks.
+ * @property {Function} onCitationClick - Callback to handle citation clicks.
+ * @property {Function} updateMessageCitations - Function to update message citations.
+ */
+interface MessageComponentProps {
+ message: AssistantMessage;
+ index: number;
+ onFollowUpClick: (question: string) => void;
+ onCitationClick: (citation: Citation) => void;
+ updateMessageCitations: (index: number, citations: Citation[]) => void;
+}
+
+/**
+ * MessageComponentBox displays the content of an AssistantMessage including text, citations,
+ * processing information, and follow-up questions.
+ * @param {MessageComponentProps} props - The props for the component.
+ */
+const MessageComponentBox: React.FC<MessageComponentProps> = ({ message, index, onFollowUpClick, onCitationClick, updateMessageCitations }) => {
+ // State for managing whether the dropdown is open or closed for processing info
+ const [dropdownOpen, setDropdownOpen] = useState(false);
+
+ /**
+ * Renders the content of the message based on the type (e.g., grounded text, normal text).
+ * @param {MessageContent} item - The content item to render.
+ * @returns {JSX.Element} JSX element rendering the content.
+ */
+ const renderContent = (item: MessageContent) => {
+ const i = item.index;
+
+ // Handle grounded text with citations
+ if (item.type === TEXT_TYPE.GROUNDED) {
+ const citation_ids = item.citation_ids || [];
+ return (
+ <span key={i} className="grounded-text">
+ <ReactMarkdown>{item.text}</ReactMarkdown>
+ {citation_ids.map((id, idx) => {
+ const citation = message.citations?.find(c => c.citation_id === id);
+ if (!citation) return null;
+ return (
+ <button key={i + idx} className="citation-button" onClick={() => onCitationClick(citation)}>
+ {i + 1}
+ </button>
+ );
+ })}
+ </span>
+ );
+ }
+
+ // Handle normal text
+ else if (item.type === TEXT_TYPE.NORMAL) {
+ return (
+ <span key={i} className="normal-text">
+ <ReactMarkdown>{item.text}</ReactMarkdown>
+ </span>
+ );
+ }
+
+ // Handle query type content
+ else if ('query' in item) {
+ return (
+ <span key={i} className="query-text">
+ <ReactMarkdown>{JSON.stringify(item.query)}</ReactMarkdown>
+ </span>
+ );
+ }
+
+ // Fallback for any other content type
+ else {
+ return (
+ <span key={i}>
+ <ReactMarkdown>{JSON.stringify(item)}</ReactMarkdown>
+ </span>
+ );
+ }
+ };
+
+ // Check if the message contains processing information (thoughts/actions)
+ const hasProcessingInfo = message.processing_info && message.processing_info.length > 0;
+
+ /**
+ * Renders processing information such as thoughts or actions during message handling.
+ * @param {ProcessingInfo} info - The processing information to render.
+ * @returns {JSX.Element | null} JSX element rendering the processing info or null.
+ */
+ const renderProcessingInfo = (info: ProcessingInfo) => {
+ if (info.type === PROCESSING_TYPE.THOUGHT) {
+ return (
+ <div key={info.index} className="dropdown-item">
+ <strong>Thought:</strong> {info.content}
+ </div>
+ );
+ } else if (info.type === PROCESSING_TYPE.ACTION) {
+ return (
+ <div key={info.index} className="dropdown-item">
+ <strong>Action:</strong> {info.content}
+ </div>
+ );
+ }
+ return null;
+ };
+
+ return (
+ <div className={`message ${message.role}`}>
+ {/* Processing Information Dropdown */}
+ {hasProcessingInfo && (
+ <div className="processing-info">
+ <button className="toggle-info" onClick={() => setDropdownOpen(!dropdownOpen)}>
+ {dropdownOpen ? 'Hide Agent Thoughts/Actions' : 'Show Agent Thoughts/Actions'}
+ </button>
+ {dropdownOpen && <div className="info-content">{message.processing_info.map(renderProcessingInfo)}</div>}
+ <br />
+ </div>
+ )}
+
+ {/* Message Content */}
+ <div className="message-content">{message.content && message.content.map(messageFragment => <React.Fragment key={messageFragment.index}>{renderContent(messageFragment)}</React.Fragment>)}</div>
+
+ {/* Follow-up Questions Section */}
+ {message.follow_up_questions && message.follow_up_questions.length > 0 && (
+ <div className="follow-up-questions">
+ <h4>Follow-up Questions:</h4>
+ <div className="questions-list">
+ {message.follow_up_questions.map((question, idx) => (
+ <button key={idx} className="follow-up-button" onClick={() => onFollowUpClick(question)}>
+ {question}
+ </button>
+ ))}
+ </div>
+ </div>
+ )}
+ </div>
+ );
+};
+
+// Export the observer-wrapped component to allow MobX to react to state changes
+export default observer(MessageComponentBox);
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss
new file mode 100644
index 000000000..ff5be4a38
--- /dev/null
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss
@@ -0,0 +1,69 @@
+.spinner-container {
+ display: flex;
+ flex-direction: column;
+ justify-content: center;
+ align-items: center;
+ height: 100%;
+}
+
+.spinner {
+ width: 60px;
+ height: 60px;
+ position: relative;
+ margin-bottom: 20px; // Space between spinner and text
+}
+
+.double-bounce1,
+.double-bounce2 {
+ width: 100%;
+ height: 100%;
+ border-radius: 50%;
+ background-color: #4a90e2;
+ opacity: 0.6;
+ position: absolute;
+ top: 0;
+ left: 0;
+ animation: bounce 2s infinite ease-in-out;
+}
+
+.double-bounce2 {
+ animation-delay: -1s;
+}
+
+@keyframes bounce {
+ 0%,
+ 100% {
+ transform: scale(0);
+ }
+ 50% {
+ transform: scale(1);
+ }
+}
+
+.uploading-overlay {
+ position: absolute;
+ top: 0;
+ left: 0;
+ right: 0;
+ bottom: 0;
+ background-color: rgba(255, 255, 255, 0.8);
+ display: flex;
+ align-items: center;
+ justify-content: center;
+ z-index: 1000;
+}
+
+.progress-container {
+ display: flex;
+ flex-direction: column;
+ align-items: center;
+ text-align: center;
+}
+
+.step-name {
+ font-size: 18px;
+ color: #333;
+ text-align: center;
+ width: 100%;
+ margin-top: -10px; // Adjust to move the text closer to the spinner
+}
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.tsx
new file mode 100644
index 000000000..b9fd08742
--- /dev/null
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.tsx
@@ -0,0 +1,22 @@
+import React from 'react';
+import './ProgressBar.scss';
+
+/**
+ * ProgressBar is a functional React component that displays a loading spinner
+ * to indicate progress or ongoing processing. It uses two bouncing elements
+ * to create a smooth animation that represents an active state.
+ *
+ * The animation consists of two divs (`double-bounce1` and `double-bounce2`),
+ * each of which will bounce in and out of view, creating a pulsating effect.
+ */
+export const ProgressBar: React.FC = () => {
+ return (
+ <div className="spinner-container">
+ {/* Spinner div containing two bouncing elements */}
+ <div className="spinner">
+ <div className="double-bounce1"></div> {/* First bouncing element */}
+ <div className="double-bounce2"></div> {/* Second bouncing element */}
+ </div>
+ </div>
+ );
+};
diff --git a/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts b/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts
new file mode 100644
index 000000000..3b4fdb6f5
--- /dev/null
+++ b/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts
@@ -0,0 +1,125 @@
+import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getChunkType, ProcessingInfo } from '../types/types';
+import { v4 as uuid } from 'uuid';
+
+export class AnswerParser {
+ static parse(xml: string, processingInfo: ProcessingInfo[]): AssistantMessage {
+ const answerRegex = /<answer>([\s\S]*?)<\/answer>/;
+ const citationsRegex = /<citations>([\s\S]*?)<\/citations>/;
+ const citationRegex = /<citation index="([^"]+)" chunk_id="([^"]+)" type="([^"]+)">([\s\S]*?)<\/citation>/g;
+ const followUpQuestionsRegex = /<follow_up_questions>([\s\S]*?)<\/follow_up_questions>/;
+ const questionRegex = /<question>(.*?)<\/question>/g;
+ const groundedTextRegex = /<grounded_text citation_index="([^"]+)">([\s\S]*?)<\/grounded_text>/g;
+ const normalTextRegex = /<normal_text>([\s\S]*?)<\/normal_text>/g;
+ const loopSummaryRegex = /<loop_summary>([\s\S]*?)<\/loop_summary>/;
+
+ const answerMatch = answerRegex.exec(xml);
+ const citationsMatch = citationsRegex.exec(xml);
+ const followUpQuestionsMatch = followUpQuestionsRegex.exec(xml);
+ const loopSummaryMatch = loopSummaryRegex.exec(xml);
+
+ if (!answerMatch) {
+ throw new Error('Invalid XML: Missing <answer> tag.');
+ }
+
+ let rawTextContent = answerMatch[1].trim();
+ let content: AssistantMessage['content'] = [];
+ let citations: Citation[] = [];
+ let contentIndex = 0;
+
+ // Remove citations and follow-up questions from rawTextContent
+ if (citationsMatch) {
+ rawTextContent = rawTextContent.replace(citationsMatch[0], '').trim();
+ }
+ if (followUpQuestionsMatch) {
+ rawTextContent = rawTextContent.replace(followUpQuestionsMatch[0], '').trim();
+ }
+ if (loopSummaryMatch) {
+ rawTextContent = rawTextContent.replace(loopSummaryMatch[0], '').trim();
+ }
+
+ // Parse citations
+ let citationMatch;
+ const citationMap = new Map<string, string>();
+ if (citationsMatch) {
+ const citationsContent = citationsMatch[1];
+ while ((citationMatch = citationRegex.exec(citationsContent)) !== null) {
+ const [_, index, chunk_id, type, direct_text] = citationMatch;
+ const citation_id = uuid();
+ citationMap.set(index, citation_id);
+ citations.push({
+ direct_text: direct_text.trim(),
+ type: getChunkType(type),
+ chunk_id,
+ citation_id,
+ });
+ }
+ }
+
+ rawTextContent = rawTextContent.replace(normalTextRegex, '$1');
+
+ // Parse text content (normal and grounded)
+ let lastIndex = 0;
+ let match;
+
+ while ((match = groundedTextRegex.exec(rawTextContent)) !== null) {
+ const [fullMatch, citationIndex, groundedText] = match;
+
+ // Add normal text that is before the grounded text
+ if (match.index > lastIndex) {
+ const normalText = rawTextContent.slice(lastIndex, match.index).trim();
+ if (normalText) {
+ content.push({
+ index: contentIndex++,
+ type: TEXT_TYPE.NORMAL,
+ text: normalText,
+ citation_ids: null,
+ });
+ }
+ }
+
+ // Add grounded text
+ const citation_ids = citationIndex.split(',').map(index => citationMap.get(index) || '');
+ content.push({
+ index: contentIndex++,
+ type: TEXT_TYPE.GROUNDED,
+ text: groundedText.trim(),
+ citation_ids,
+ });
+
+ lastIndex = match.index + fullMatch.length;
+ }
+
+ // Add any remaining normal text after the last grounded text
+ if (lastIndex < rawTextContent.length) {
+ const remainingText = rawTextContent.slice(lastIndex).trim();
+ if (remainingText) {
+ content.push({
+ index: contentIndex++,
+ type: TEXT_TYPE.NORMAL,
+ text: remainingText,
+ citation_ids: null,
+ });
+ }
+ }
+
+ let followUpQuestions: string[] = [];
+ if (followUpQuestionsMatch) {
+ const questionsText = followUpQuestionsMatch[1];
+ let questionMatch;
+ while ((questionMatch = questionRegex.exec(questionsText)) !== null) {
+ followUpQuestions.push(questionMatch[1].trim());
+ }
+ }
+
+ const assistantResponse: AssistantMessage = {
+ role: ASSISTANT_ROLE.ASSISTANT,
+ content,
+ follow_up_questions: followUpQuestions,
+ citations,
+ processing_info: processingInfo,
+ loop_summary: loopSummaryMatch ? loopSummaryMatch[1].trim() : undefined,
+ };
+
+ return assistantResponse;
+ }
+}
diff --git a/src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts b/src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts
new file mode 100644
index 000000000..3585cab4a
--- /dev/null
+++ b/src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts
@@ -0,0 +1,73 @@
+import { threadId } from 'worker_threads';
+
+enum ParserState {
+ Outside,
+ InGroundedText,
+ InNormalText,
+}
+
+export class StreamedAnswerParser {
+ private state: ParserState = ParserState.Outside;
+ private buffer: string = '';
+ private result: string = '';
+ private isStartOfLine: boolean = true;
+
+ public parse(char: string): string {
+ switch (this.state) {
+ case ParserState.Outside:
+ if (char === '<') {
+ this.buffer = '<';
+ } else if (char === '>') {
+ if (this.buffer.startsWith('<grounded_text')) {
+ this.state = ParserState.InGroundedText;
+ } else if (this.buffer.startsWith('<normal_text')) {
+ this.state = ParserState.InNormalText;
+ }
+ this.buffer = '';
+ } else {
+ this.buffer += char;
+ }
+ break;
+
+ case ParserState.InGroundedText:
+ case ParserState.InNormalText:
+ if (char === '<') {
+ this.buffer = '<';
+ } else if (this.buffer.startsWith('</grounded_text') && char === '>') {
+ this.state = ParserState.Outside;
+ this.buffer = '';
+ } else if (this.buffer.startsWith('</normal_text') && char === '>') {
+ this.state = ParserState.Outside;
+ this.buffer = '';
+ } else if (this.buffer.startsWith('<')) {
+ this.buffer += char;
+ } else {
+ this.processChar(char);
+ }
+ break;
+ }
+
+ return this.result.trim();
+ }
+
+ private processChar(char: string): void {
+ if (this.isStartOfLine && char === ' ') {
+ // Skip leading spaces
+ return;
+ }
+ if (char === '\n') {
+ this.result += char;
+ this.isStartOfLine = true;
+ } else {
+ this.result += char;
+ this.isStartOfLine = false;
+ }
+ }
+
+ public reset(): void {
+ this.state = ParserState.Outside;
+ this.buffer = '';
+ this.result = '';
+ this.isStartOfLine = true;
+ }
+}
diff --git a/src/client/views/nodes/chatbot/tools/BaseTool.ts b/src/client/views/nodes/chatbot/tools/BaseTool.ts
new file mode 100644
index 000000000..b57f1c8e4
--- /dev/null
+++ b/src/client/views/nodes/chatbot/tools/BaseTool.ts
@@ -0,0 +1,24 @@
+import { Tool } from '../types/types';
+
+export abstract class BaseTool<T extends Record<string, any> = Record<string, any>> implements Tool<T> {
+ constructor(
+ public name: string,
+ public description: string,
+ public parameters: Record<string, any>,
+ public citationRules: string,
+ public briefSummary: string
+ ) {}
+
+ abstract execute(args: T): Promise<any>;
+
+ getActionRule(): Record<string, any> {
+ return {
+ [this.name]: {
+ name: this.name,
+ citationRules: this.citationRules,
+ description: this.description,
+ parameters: this.parameters,
+ },
+ };
+ }
+}
diff --git a/src/client/views/nodes/chatbot/tools/CalculateTool.ts b/src/client/views/nodes/chatbot/tools/CalculateTool.ts
new file mode 100644
index 000000000..74b7ca27b
--- /dev/null
+++ b/src/client/views/nodes/chatbot/tools/CalculateTool.ts
@@ -0,0 +1,26 @@
+import { BaseTool } from './BaseTool';
+
+export class CalculateTool extends BaseTool<{ expression: string }> {
+ constructor() {
+ super(
+ 'calculate',
+ 'Perform a calculation',
+ {
+ expression: {
+ type: 'string',
+ description: 'The mathematical expression to evaluate',
+ required: 'true',
+ max_inputs: '1',
+ },
+ },
+ 'Provide a mathematical expression to calculate that would work with JavaScript eval().',
+ 'Runs a calculation and returns the number - uses JavaScript so be sure to use floating point syntax if necessary'
+ );
+ }
+
+ async execute(args: { expression: string }): Promise<any> {
+ // Note: Using eval() can be dangerous. Consider using a safer alternative.
+ const result = eval(args.expression);
+ return [{ type: 'text', text: result.toString() }];
+ }
+}
diff --git a/src/client/views/nodes/chatbot/tools/CreateCSVTool.ts b/src/client/views/nodes/chatbot/tools/CreateCSVTool.ts
new file mode 100644
index 000000000..55015846b
--- /dev/null
+++ b/src/client/views/nodes/chatbot/tools/CreateCSVTool.ts
@@ -0,0 +1,51 @@
+import { BaseTool } from './BaseTool';
+import { Networking } from '../../../../Network';
+
+export class CreateCSVTool extends BaseTool<{ csvData: string; filename: string }> {
+ private _handleCSVResult: (url: string, filename: string, id: string, data: string) => void;
+
+ constructor(handleCSVResult: (url: string, title: string, id: string, data: string) => void) {
+ super(
+ 'createCSV',
+ 'Creates a CSV file from raw CSV data and saves it to the server',
+ {
+ type: 'object',
+ properties: {
+ csvData: {
+ type: 'string',
+ description: 'A string of comma-separated values representing the CSV data.',
+ },
+ filename: {
+ type: 'string',
+ description: 'The base name of the CSV file to be created. Should end in ".csv".',
+ },
+ },
+ required: ['csvData', 'filename'],
+ },
+ 'Provide a CSV string and a filename to create a CSV file.',
+ 'Creates a CSV file from the provided CSV string and saves it to the server with a unique identifier, returning the file URL and UUID.'
+ );
+ this._handleCSVResult = handleCSVResult;
+ }
+
+ async execute(args: { csvData: string; filename: string }): Promise<any> {
+ try {
+ console.log('Creating CSV file:', args.filename, ' with data:', args.csvData);
+ // Post the raw CSV data to the createCSV endpoint on the server
+ const { fileUrl, id } = await Networking.PostToServer('/createCSV', { filename: args.filename, data: args.csvData });
+
+ // Handle the result by invoking the callback
+ this._handleCSVResult(fileUrl, args.filename, id, args.csvData);
+
+ return [
+ {
+ type: 'text',
+ text: `File successfully created: ${fileUrl}. \nNow a CSV file with this data and the name ${args.filename} is available as a user doc.`,
+ },
+ ];
+ } catch (error) {
+ console.error('Error creating CSV file:', error);
+ throw new Error('Failed to create CSV file.');
+ }
+ }
+}
diff --git a/src/client/views/nodes/chatbot/tools/CreateCollectionTool.ts b/src/client/views/nodes/chatbot/tools/CreateCollectionTool.ts
new file mode 100644
index 000000000..573428179
--- /dev/null
+++ b/src/client/views/nodes/chatbot/tools/CreateCollectionTool.ts
@@ -0,0 +1,36 @@
+import { DocCast } from '../../../../../fields/Types';
+import { DocServer } from '../../../../DocServer';
+import { Docs } from '../../../../documents/Documents';
+import { DocumentView } from '../../DocumentView';
+import { OpenWhere } from '../../OpenWhere';
+import { BaseTool } from './BaseTool';
+
+export class GetDocsContentTool extends BaseTool<{ title: string; document_ids: string[] }> {
+ private _docView: DocumentView;
+ constructor(docView: DocumentView) {
+ super(
+ 'retrieveDocs',
+ 'Retrieves the contents of all Documents that the user is interacting with in Dash ',
+ {
+ title: {
+ type: 'string',
+ description: 'the title of the collection that you will be making',
+ required: 'true',
+ max_inputs: '1',
+ },
+ },
+ 'Provide a mathematical expression to calculate that would work with JavaScript eval().',
+ 'Runs a calculation and returns the number - uses JavaScript so be sure to use floating point syntax if necessary'
+ );
+ this._docView = docView;
+ }
+
+ async execute(args: { title: string; document_ids: string[] }): Promise<any> {
+ // Note: Using eval() can be dangerous. Consider using a safer alternative.
+ const docs = args.document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id)));
+ const collection = Docs.Create.FreeformDocument(docs, { title: args.title });
+ this._docView._props.addDocTab(collection, OpenWhere.addRight); //in future, create popup prompting user where to add
+ return [{ type: 'text', text: 'Collection created in Dash called ' + args.title }];
+ }
+}
+//export function create_collection(docView: DocumentView, document_ids: string[], title: string): string {}
diff --git a/src/client/views/nodes/chatbot/tools/DataAnalysisTool.ts b/src/client/views/nodes/chatbot/tools/DataAnalysisTool.ts
new file mode 100644
index 000000000..a12ee46e5
--- /dev/null
+++ b/src/client/views/nodes/chatbot/tools/DataAnalysisTool.ts
@@ -0,0 +1,59 @@
+import { BaseTool } from './BaseTool';
+
+export class DataAnalysisTool extends BaseTool<{ csv_file_name: string | string[] }> {
+ private csv_files_function: () => { filename: string; id: string; text: string }[];
+
+ constructor(csv_files: () => { filename: string; id: string; text: string }[]) {
+ super(
+ 'dataAnalysis',
+ 'Analyzes, and provides insights, from one or more CSV files',
+ {
+ csv_file_name: {
+ type: 'string',
+ description: 'Name(s) of the CSV file(s) to analyze',
+ required: 'true',
+ max_inputs: '3',
+ },
+ },
+ 'Provide the name(s) of up to 3 CSV files to analyze based on the user query and whichever available CSV files may be relevant.',
+ 'Provides the full CSV file text for your analysis based on the user query and the available CSV file(s). '
+ );
+ this.csv_files_function = csv_files;
+ }
+
+ getFileContent(filename: string): string | undefined {
+ const files = this.csv_files_function();
+ const file = files.find(f => f.filename === filename);
+ return file?.text;
+ }
+
+ getFileID(filename: string): string | undefined {
+ const files = this.csv_files_function();
+ const file = files.find(f => f.filename === filename);
+ return file?.id;
+ }
+
+ async execute(args: { csv_file_name: string | string[] }): Promise<any> {
+ const filenames = Array.isArray(args.csv_file_name) ? args.csv_file_name : [args.csv_file_name];
+ const results = [];
+
+ for (const filename of filenames) {
+ const fileContent = this.getFileContent(filename);
+ const fileID = this.getFileID(filename);
+
+ if (fileContent && fileID) {
+ results.push({
+ type: 'text',
+ text: `<chunk chunk_id=${fileID} chunk_type=csv>${fileContent}</chunk>`,
+ });
+ } else {
+ results.push({
+ type: 'text',
+ text: `File not found: ${filename}`,
+ });
+ }
+ }
+
+ return results;
+ }
+}
diff --git a/src/client/views/nodes/chatbot/tools/GetDocsTool.ts b/src/client/views/nodes/chatbot/tools/GetDocsTool.ts
new file mode 100644
index 000000000..f970ca8ee
--- /dev/null
+++ b/src/client/views/nodes/chatbot/tools/GetDocsTool.ts
@@ -0,0 +1,29 @@
+import { DocCast } from '../../../../../fields/Types';
+import { DocServer } from '../../../../DocServer';
+import { Docs } from '../../../../documents/Documents';
+import { DocumentView } from '../../DocumentView';
+import { OpenWhere } from '../../OpenWhere';
+import { BaseTool } from './BaseTool';
+
+export class GetDocsTool extends BaseTool<{ title: string; document_ids: string[] }> {
+ private _docView: DocumentView;
+ constructor(docView: DocumentView) {
+ super(
+ 'retrieveDocs',
+ 'Retrieves the contents of all Documents that the user is interacting with in Dash',
+ {},
+ 'No need to provide anything. Just run the tool and it will retrieve the contents of all Documents that the user is interacting with in Dash.',
+ 'Returns the the documents in Dash in JSON form. This will include the title of the document, the location in the FreeFormDocument, and the content of the document, any applicable data fields, the layout of the document, etc.'
+ );
+ this._docView = docView;
+ }
+
+ async execute(args: { title: string; document_ids: string[] }): Promise<any> {
+ // Note: Using eval() can be dangerous. Consider using a safer alternative.
+ const docs = args.document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id)));
+ const collection = Docs.Create.FreeformDocument(docs, { title: args.title });
+ this._docView._props.addDocTab(collection, OpenWhere.addRight); //in future, create popup prompting user where to add
+ return [{ type: 'text', text: 'Collection created in Dash called ' + args.title }];
+ }
+}
+//export function create_collection(docView: DocumentView, document_ids: string[], title: string): string {}
diff --git a/src/client/views/nodes/chatbot/tools/NoTool.ts b/src/client/views/nodes/chatbot/tools/NoTool.ts
new file mode 100644
index 000000000..1f0830a77
--- /dev/null
+++ b/src/client/views/nodes/chatbot/tools/NoTool.ts
@@ -0,0 +1,18 @@
+// tools/NoTool.ts
+import { BaseTool } from './BaseTool';
+
+export class NoTool extends BaseTool<{}> {
+ constructor() {
+ super(
+ 'no_tool',
+ 'Use this when no external tool or action is required to answer the question.',
+ {},
+ 'When using the "no_tool" action, simply provide an empty <action_input> element. The observation will always be "No tool used. Proceed with answering the question."',
+ 'Use when no external tool or action is required to answer the question.'
+ );
+ }
+
+ async execute(args: {}): Promise<any> {
+ return [{ type: 'text', text: 'No tool used. Proceed with answering the question.' }];
+ }
+}
diff --git a/src/client/views/nodes/chatbot/tools/RAGTool.ts b/src/client/views/nodes/chatbot/tools/RAGTool.ts
new file mode 100644
index 000000000..c24306dcd
--- /dev/null
+++ b/src/client/views/nodes/chatbot/tools/RAGTool.ts
@@ -0,0 +1,138 @@
+import { BaseTool } from './BaseTool';
+import { Vectorstore } from '../vectorstore/Vectorstore';
+import { RAGChunk } from '../types/types';
+import * as fs from 'fs';
+import { Networking } from '../../../../Network';
+import { file } from 'jszip';
+import { ChatCompletion, ChatCompletionContentPart, ChatCompletionMessageParam } from 'openai/resources';
+
+export class RAGTool extends BaseTool {
+ constructor(private vectorstore: Vectorstore) {
+ super(
+ 'rag',
+ 'Perform a RAG search on user documents',
+ {
+ hypothetical_document_chunk: {
+ type: 'string',
+ description:
+ "Detailed version of the prompt that is effectively a hypothetical document chunk that would be ideal to embed and compare to the vectors of real document chunks to fetch the most relevant document chunks to answer the user's query",
+ required: 'true',
+ },
+ },
+ `
+ Your task is to provide a comprehensive response to the user's prompt based on the given chunks and chat history. Follow these structural guidelines meticulously:
+
+ 1. Overall Structure:
+ <answer>
+ [Main content with grounded_text tags interspersed with normal plain text (information that is not derived from chunks' information)]
+ <citations>
+ [Individual citation tags]
+ </citations>
+ <follow_up_questions>
+ [Three question tags]
+ </follow_up_questions>
+ </answer>
+
+ 2. Grounded Text Tag Structure:
+ - Basic format:
+ <grounded_text citation_index="[citation index number(s)]">
+ [Your generated text based on information from a subset of a chunk (a citation's direct text)]
+ </grounded_text>
+
+ 3. Citation Tag Structure:
+ <citation index="[unique number]" chunk_id="[UUID v4]" type="[text/image/table]">
+ [For text: relevant subset of original chunk]
+ [For image/table: leave empty]
+ </citation>
+
+ 4. Detailed Grounded Text Guidelines:
+ a. Wrap all information derived from chunks in grounded_text tags.
+ b. DO NOT PUT ANYTHING THAT IS NOT DIRECTLY DERIVED FROM INFORMATION FROM CHUNKS (EITHER IMAGE, TABLE, OR TEXT) IN GROUNDED_TEXT TAGS.
+ c. Use a single grounded_text tag for suquential and closely related information that references the same citation. If other citations' information are used sequentially, create new grounded_text tags.
+ d. Ensure every grounded_text tag has up to a few corresponding citations (should not be more than 3 and only 1 is fine). Multiple citation indices should be separated by commas.
+ e. Grounded text can be as short as a few words or as long as several sentences.
+ f. Avoid overlapping or nesting grounded_text tags; instead, use sequential tags.
+
+ 5. Detailed Citation Guidelines:
+ a. Create a unique citation for each distinct piece of information from the chunks that is used to support grounded_text.
+ b. ALL TEXT CITATIONS must have direct text in its element content (e.g. <citation ...>DIRECT TEXT HERE</citation>) that is a relevant SUBSET of the original text chunk that is being cited specifically.
+ c. DO NOT paraphrase or summarize the text; use the original text as much as possible.
+ d. DO NOT USE THE FULL TEXT CHUNK as the citation content; only use the relevant subset of the text that the grounded_text is base. AS SHORT AS POSSIBLE WHILE PROVIDING INFORMATION (ONE TO TWO SENTENCES USUALLY)!
+ e. Ensure each citation has a unique index number.
+ f. Specify the correct type: "text", "image", or "table".
+ g. For text chunks, the content of the citation should ALWAYS have the relevant subset of the original text that the grounded_text is based on.
+ h. For image/table chunks, leave the citation content empty.
+ i. One citation can be used for multiple grounded_text tags if they are based on the same chunk information.
+ j. !!!DO NOT OVERCITE - only include citations for information that is directly relevant to the grounded_text.
+
+ 6. Structural Integrity Checks:
+ a. Ensure all opening tags have corresponding closing tags.
+ b. Verify that all grounded_text tags have valid citation_index attributes (they should be equal to the associated citation(s) index field—not their chunk_id field).
+ c. Check that all cited indices in grounded_text tags have corresponding citations.
+
+ Example of grounded_text usage:
+
+ <answer>
+ <grounded_text citation_index="1,2">
+ Artificial Intelligence (AI) is revolutionizing various sectors, with healthcare experiencing significant transformations in areas such as diagnosis and treatment planning.
+ </grounded_text>
+ <grounded_text citation_index="2,3,4">
+ In the field of medical diagnosis, AI has shown remarkable capabilities, particularly in radiology. For instance, AI systems have drastically improved mammogram analysis, achieving 99% accuracy at a rate 30 times faster than human radiologists.
+ </grounded_text>
+ <grounded_text citation_index="4">
+ This advancement not only enhances the efficiency of healthcare systems but also significantly reduces the occurrence of false positives, leading to fewer unnecessary biopsies and reduced patient stress.
+ </grounded_text>
+
+ <grounded_text citation_index="5,6">
+ Beyond diagnosis, AI is playing a crucial role in drug discovery and development. By analyzing vast amounts of genetic and molecular data, AI algorithms can identify potential drug candidates much faster than traditional methods.
+ </grounded_text>
+ <grounded_text citation_index="6">
+ This could potentially reduce the time and cost of bringing new medications to market, especially for rare diseases that have historically received less attention due to limited market potential.
+ </grounded_text>
+
+ [... rest of the content ...]
+
+ <citations>
+ <citation index="1" chunk_id="123e4567-e89b-12d3-a456-426614174000" type="text">Artificial Intelligence is revolutionizing various industries, with healthcare being one of the most profoundly affected sectors.</citation>
+ <citation index="2" chunk_id="123e4567-e89b-12d3-a456-426614174001" type="text">AI has shown particular promise in the field of radiology, enhancing the accuracy and speed of image analysis.</citation>
+ <citation index="3" chunk_id="123e4567-e89b-12d3-a456-426614174002" type="text">According to recent studies, AI systems have achieved 99% accuracy in mammogram analysis, performing the task 30 times faster than human radiologists.</citation>
+ <citation index="4" chunk_id="123e4567-e89b-12d3-a456-426614174003" type="text">The improvement in mammogram accuracy has led to a significant reduction in false positives, decreasing the need for unnecessary biopsies and reducing patient anxiety.</citation>
+ <citation index="5" chunk_id="123e4567-e89b-12d3-a456-426614174004" type="text">AI is accelerating the drug discovery process by analyzing complex molecular and genetic data to identify potential drug candidates.</citation>
+ <citation index="6" chunk_id="123e4567-e89b-12d3-a456-426614174005" type="text">The use of AI in drug discovery could significantly reduce the time and cost associated with bringing new medications to market, particularly for rare diseases.</citation>
+ </citations>
+
+ <follow_up_questions>
+ <question>How might AI-driven personalized medicine impact the cost and accessibility of healthcare in the future?</question>
+ <question>What measures can be taken to ensure that AI systems in healthcare are free from biases and equally effective for diverse populations?</question>
+ <question>How could the role of healthcare professionals evolve as AI becomes more integrated into medical practices?</question>
+ </follow_up_questions>
+ </answer>
+ `,
+
+ `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a
+ set of document chunks (either images or text) that can be used to provide a grounded response based on
+ user documents`
+ );
+ }
+
+ async execute(args: { hypothetical_document_chunk: string }): Promise {
+ const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk);
+ const formatted_chunks = await this.getFormattedChunks(relevantChunks);
+ return formatted_chunks;
+ }
+
+ async getFormattedChunks(relevantChunks: RAGChunk[]): Promise {
+ try {
+ const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks });
+
+ if (!formattedChunks) {
+ throw new Error('Failed to format chunks');
+ }
+
+ return formattedChunks;
+ } catch (error) {
+ console.error('Error formatting chunks:', error);
+ throw error;
+ }
+ }
+}
diff --git a/src/client/views/nodes/chatbot/tools/SearchTool.ts b/src/client/views/nodes/chatbot/tools/SearchTool.ts
new file mode 100644
index 000000000..b926cbadc
--- /dev/null
+++ b/src/client/views/nodes/chatbot/tools/SearchTool.ts
@@ -0,0 +1,54 @@
+import { max } from 'lodash';
+import { Networking } from '../../../../Network';
+import { BaseTool } from './BaseTool';
+import { v4 as uuidv4 } from 'uuid';
+
+export class SearchTool extends BaseTool<{ query: string | string[] }> {
+ private _addLinkedUrlDoc: (url: string, id: string) => void;
+ private _max_results: number;
+ constructor(addLinkedUrlDoc: (url: string, id: string) => void, max_results: number = 5) {
+ super(
+ 'searchTool',
+ 'Search the web to find a wide range of websites related to a query or multiple queries',
+ {
+ query: {
+ type: 'string',
+ description: 'The search query or queries to use for finding websites',
+ required: 'true',
+ max_inputs: '3',
+ },
+ },
+ 'Provide up to 3 search queries to find a broad range of websites. This tool is intended to help you identify relevant websites, but not to be used for providing the final answer. Use this information to determine which specific website to investigate further.',
+ 'Returns a list of websites and their overviews based on the search queries, helping to identify which websites might contain relevant information.'
+ );
+ this._addLinkedUrlDoc = addLinkedUrlDoc;
+ this._max_results = max_results;
+ }
+
+ async execute(args: { query: string | string[] }): Promise<any> {
+ const queries = Array.isArray(args.query) ? args.query : [args.query];
+ const allResults = [];
+
+ for (const query of queries) {
+ try {
+ const { results } = await Networking.PostToServer('/getWebSearchResults', { query, max_results: this._max_results });
+ const data: { type: string; text: string }[] = results.map((result: { url: string; snippet: string }) => {
+ const id = uuidv4();
+ return {
+ type: 'text',
+ text: `<chunk chunk_id="${id}" chunk_type="text">
+ <url>${result.url}</url>
+ <overview>${result.snippet}</overview>
+ </chunk>`,
+ };
+ });
+ allResults.push(...data);
+ } catch (error) {
+ console.log(error);
+ allResults.push({ type: 'text', text: `An error occurred while performing the web search for query: ${query}` });
+ }
+ }
+
+ return allResults;
+ }
+}
diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
new file mode 100644
index 000000000..2118218f6
--- /dev/null
+++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
@@ -0,0 +1,83 @@
+import { Networking } from '../../../../Network';
+import { BaseTool } from './BaseTool';
+import { v4 as uuidv4 } from 'uuid';
+
+export class WebsiteInfoScraperTool extends BaseTool<{ url: string | string[] }> {
+ private _addLinkedUrlDoc: (url: string, id: string) => void;
+
+ constructor(addLinkedUrlDoc: (url: string, id: string) => void) {
+ super(
+ 'websiteInfoScraper',
+ 'Scrape detailed information from specific websites relevant to the user query',
+ {
+ url: {
+ type: 'string',
+ description: 'The URL(s) of the website(s) to scrape',
+ required: true,
+ max_inputs: 3,
+ },
+ },
+ `
+ Your task is to provide a comprehensive response to the user's prompt using the content scraped from relevant websites. Ensure you follow these guidelines for structuring your response:
+
+ 1. Grounded Text Tag Structure:
+ - Wrap all text derived from the scraped website(s) in <grounded_text> tags.
+ - **Do not include non-sourced information** in <grounded_text> tags.
+ - Use a single <grounded_text> tag for content derived from a single website. If citing multiple websites, create new <grounded_text> tags for each.
+ - Ensure each <grounded_text> tag has a citation index corresponding to the scraped URL.
+
+ 2. Citation Tag Structure:
+ - Create a <citation> tag for each distinct piece of information used from the website(s).
+ - Each <citation> tag must reference a URL chunk using the chunk_id attribute.
+ - For URL-based citations, leave the citation content empty, but reference the chunk_id and type as 'url'.
+
+ 3. Structural Integrity Checks:
+ - Ensure all opening and closing tags are matched properly.
+ - Verify that all citation_index attributes in <grounded_text> tags correspond to valid citations.
+ - Do not over-cite—cite only the most relevant parts of the websites.
+
+ Example Usage:
+
+ <answer>
+ <grounded_text citation_index="1">
+ Based on data from the World Bank, economic growth has stabilized in recent years, following a surge in investments.
+ </grounded_text>
+ <grounded_text citation_index="2">
+ According to information retrieved from the International Monetary Fund, the inflation rate has been gradually decreasing since 2020.
+ </grounded_text>
+
+ <citations>
+ <citation index="1" chunk_id="1234" type="url"></citation>
+ <citation index="2" chunk_id="5678" type="url"></citation>
+ </citations>
+
+ <follow_up_questions>
+ <question>What are the long-term economic impacts of increased investments on GDP?</question>
+ <question>How might inflation trends affect future monetary policy?</question>
+ <question>Are there additional factors that could influence economic growth beyond investments and inflation?</question>
+ </follow_up_questions>
+ </answer>
+ `,
+ 'Returns the text content of the webpages for further analysis and grounding.'
+ );
+ this._addLinkedUrlDoc = addLinkedUrlDoc;
+ }
+
+ async execute(args: { url: string | string[] }): Promise<any> {
+ const urls = Array.isArray(args.url) ? args.url : [args.url];
+ const results = [];
+
+ for (const url of urls) {
+ try {
+ const { website_plain_text } = await Networking.PostToServer('/scrapeWebsite', { url });
+ const id = uuidv4();
+ this._addLinkedUrlDoc(url, id);
+ results.push({ type: 'text', text: `<chunk chunk_id=${id} chunk_type=url>\n${website_plain_text}\n</chunk>\n` });
+ } catch (error) {
+ results.push({ type: 'text', text: `An error occurred while scraping the website: ${url}` });
+ }
+ }
+
+ return results;
+ }
+}
diff --git a/src/client/views/nodes/chatbot/tools/WikipediaTool.ts b/src/client/views/nodes/chatbot/tools/WikipediaTool.ts
new file mode 100644
index 000000000..143d91d80
--- /dev/null
+++ b/src/client/views/nodes/chatbot/tools/WikipediaTool.ts
@@ -0,0 +1,37 @@
+import { title } from 'process';
+import { Networking } from '../../../../Network';
+import { BaseTool } from './BaseTool';
+import axios from 'axios';
+import { v4 as uuidv4 } from 'uuid';
+
+export class WikipediaTool extends BaseTool<{ title: string }> {
+ private _addLinkedUrlDoc: (url: string, id: string) => void;
+ constructor(addLinkedUrlDoc: (url: string, id: string) => void) {
+ super(
+ 'wikipedia',
+ 'Search Wikipedia and return a summary',
+ {
+ title: {
+ type: 'string',
+ description: 'The title of the Wikipedia article to search',
+ required: true,
+ },
+ },
+ 'Provide simply the title you want to search on Wikipedia and nothing more. If re-using this tool, try a different title for different information.',
+ 'Returns a summary from searching an article title on Wikipedia'
+ );
+ this._addLinkedUrlDoc = addLinkedUrlDoc;
+ }
+
+ async execute(args: { title: string }): Promise<any> {
+ try {
+ const { text } = await Networking.PostToServer('/getWikipediaSummary', { title: args.title });
+ const id = uuidv4();
+ const url = `https://en.wikipedia.org/wiki/${args.title.replace(/ /g, '_')}`;
+ this._addLinkedUrlDoc(url, id);
+ return [{ type: 'text', text: `<chunk chunk_id=${id} chunk_type=csv}> ${text} </chunk>` }];
+ } catch (error) {
+ return [{ type: 'text', text: 'An error occurred while fetching the article.' }];
+ }
+ }
+}
diff --git a/src/client/views/nodes/chatbot/types/types.ts b/src/client/views/nodes/chatbot/types/types.ts
new file mode 100644
index 000000000..f5d14ad6a
--- /dev/null
+++ b/src/client/views/nodes/chatbot/types/types.ts
@@ -0,0 +1,129 @@
+export enum ASSISTANT_ROLE {
+ USER = 'user',
+ ASSISTANT = 'assistant',
+}
+
+export enum TEXT_TYPE {
+ NORMAL = 'normal',
+ GROUNDED = 'grounded',
+ ERROR = 'error',
+}
+
+export enum CHUNK_TYPE {
+ TEXT = 'text',
+ IMAGE = 'image',
+ TABLE = 'table',
+ URL = 'url',
+ CSV = 'CSV',
+}
+
+export enum PROCESSING_TYPE {
+ THOUGHT = 'thought',
+ ACTION = 'action',
+ //eventually migrate error to here
+}
+
+export function getChunkType(type: string): CHUNK_TYPE {
+ switch (type.toLowerCase()) {
+ case 'text':
+ return CHUNK_TYPE.TEXT;
+ break;
+ case 'image':
+ return CHUNK_TYPE.IMAGE;
+ break;
+ case 'table':
+ return CHUNK_TYPE.TABLE;
+ break;
+ case 'CSV':
+ return CHUNK_TYPE.CSV;
+ break;
+ case 'url':
+ return CHUNK_TYPE.URL;
+ break;
+ default:
+ return CHUNK_TYPE.TEXT;
+ break;
+ }
+}
+
+export interface ProcessingInfo {
+ index: number;
+ type: PROCESSING_TYPE;
+ content: string;
+}
+
+export interface AssistantMessage {
+ role: ASSISTANT_ROLE;
+ content: MessageContent[];
+ follow_up_questions?: string[];
+ citations?: Citation[];
+ processing_info: ProcessingInfo[];
+ loop_summary?: string;
+}
+
+export interface MessageContent {
+ index: number;
+ type: TEXT_TYPE;
+ text: string;
+ citation_ids: string[] | null;
+}
+
+export interface Citation {
+ direct_text?: string;
+ type: CHUNK_TYPE;
+ chunk_id: string;
+ citation_id: string;
+ url?: string;
+}
+
+export interface RAGChunk {
+ id: string;
+ values: number[];
+ metadata: {
+ text: string;
+ type: CHUNK_TYPE;
+ original_document: string;
+ file_path: string;
+ doc_id: string;
+ location: string;
+ start_page: number;
+ end_page: number;
+ base64_data?: string | undefined;
+ page_width?: number | undefined;
+ page_height?: number | undefined;
+ };
+}
+
+export interface SimplifiedChunk {
+ chunkId: string;
+ startPage: number;
+ endPage: number;
+ location?: string;
+ chunkType: CHUNK_TYPE;
+ url?: string;
+ canDisplay?: boolean;
+}
+
+export interface AI_Document {
+ purpose: string;
+ file_name: string;
+ num_pages: number;
+ summary: string;
+ chunks: RAGChunk[];
+ type: string;
+}
+
+export interface Tool<T extends Record<string, any> = Record<string, any>> {
+ name: string;
+ description: string;
+ parameters: Record<string, any>;
+ citationRules: string;
+ briefSummary: string;
+ execute: (args: T) => Promise<any>;
+ getActionRule: () => Record<string, any>;
+}
+
+export interface AgentMessage {
+ role: 'system' | 'user' | 'assistant';
+ content: string | { type: string; text?: string; image_url?: { url: string } }[];
+}
diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
new file mode 100644
index 000000000..07a2b73bc
--- /dev/null
+++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
@@ -0,0 +1,258 @@
+import { Pinecone, Index, IndexList, PineconeRecord, RecordMetadata, QueryResponse } from '@pinecone-database/pinecone';
+import { CohereClient } from 'cohere-ai';
+import { EmbedResponse } from 'cohere-ai/api';
+import dotenv from 'dotenv';
+import { RAGChunk, AI_Document, CHUNK_TYPE } from '../types/types';
+import { Doc } from '../../../../../fields/Doc';
+import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types';
+import { Networking } from '../../../../Network';
+
+dotenv.config();
+
+/**
+ * The Vectorstore class integrates with Pinecone for vector-based document indexing and retrieval,
+ * and Cohere for text embedding. It handles AI document management, uploads, and query-based retrieval.
+ */
+export class Vectorstore {
+ private pinecone: Pinecone; // Pinecone client for managing the vector index.
+ private index!: Index; // The specific Pinecone index used for document chunks.
+ private cohere: CohereClient; // Cohere client for generating embeddings.
+ private indexName: string = 'pdf-chatbot'; // Default name for the index.
+ private _id: string; // Unique ID for the Vectorstore instance.
+ private _doc_ids: string[] = []; // List of document IDs handled by this instance.
+
+ documents: AI_Document[] = []; // Store the documents indexed in the vectorstore.
+
+ /**
+ * Constructor initializes the Pinecone and Cohere clients, sets up the document ID list,
+ * and initializes the Pinecone index.
+ * @param id The unique identifier for the vectorstore instance.
+ * @param doc_ids A function that returns a list of document IDs.
+ */
+ constructor(id: string, doc_ids: () => string[]) {
+ const pineconeApiKey = process.env.PINECONE_API_KEY;
+ if (!pineconeApiKey) {
+ throw new Error('PINECONE_API_KEY is not defined.');
+ }
+
+ // Initialize Pinecone and Cohere clients with API keys from the environment.
+ this.pinecone = new Pinecone({ apiKey: pineconeApiKey });
+ this.cohere = new CohereClient({ token: process.env.COHERE_API_KEY });
+ this._id = id;
+ this._doc_ids = doc_ids();
+ this.initializeIndex();
+ }
+
+ /**
+ * Initializes the Pinecone index by checking if it exists, and creating it if not.
+ * The index is set to use the cosine metric for vector similarity.
+ */
+ private async initializeIndex() {
+ const indexList: IndexList = await this.pinecone.listIndexes();
+
+ // Check if the index already exists, otherwise create it.
+ if (!indexList.indexes?.some(index => index.name === this.indexName)) {
+ await this.pinecone.createIndex({
+ name: this.indexName,
+ dimension: 1024,
+ metric: 'cosine',
+ spec: {
+ serverless: {
+ cloud: 'aws',
+ region: 'us-east-1',
+ },
+ },
+ });
+ }
+
+ // Set the index for future use.
+ this.index = this.pinecone.Index(this.indexName);
+ }
+
+ /**
+ * Adds an AI document to the vectorstore. This method handles document chunking, uploading to the
+ * vectorstore, and updating the progress for long-running tasks like file uploads.
+ * @param doc The document to be added to the vectorstore.
+ * @param progressCallback Callback to update the progress of the upload.
+ */
+ async addAIDoc(doc: Doc, progressCallback: (progress: number, step: string) => void) {
+ console.log('Adding AI Document:', doc);
+ const ai_document_status: string = StrCast(doc.ai_document_status);
+
+ // Skip if the document is already in progress or completed.
+ if (ai_document_status !== undefined && ai_document_status.trim() !== '' && ai_document_status !== '{}') {
+ if (ai_document_status === 'IN PROGRESS') {
+ console.log('Already in progress.');
+ return;
+ }
+ if (!this._doc_ids.includes(StrCast(doc.ai_doc_id))) {
+ this._doc_ids.push(StrCast(doc.ai_doc_id));
+ }
+ } else {
+ // Start processing the document.
+ doc.ai_document_status = 'PROGRESS';
+ console.log(doc);
+
+ // Get the local file path (CSV or PDF).
+ const local_file_path: string = CsvCast(doc.data)?.url?.pathname ?? PDFCast(doc.data)?.url?.pathname;
+ console.log('Local File Path:', local_file_path);
+
+ if (local_file_path) {
+ console.log('Creating AI Document...');
+ // Start the document creation process by sending the file to the server.
+ const { jobId } = await Networking.PostToServer('/createDocument', { file_path: local_file_path });
+
+ // Poll the server for progress updates.
+ let inProgress: boolean = true;
+ let result: any = null;
+ while (inProgress) {
+ // Polling interval for status updates.
+ await new Promise(resolve => setTimeout(resolve, 2000));
+
+ // Check if the job is completed.
+ const resultResponse = await Networking.FetchFromServer(`/getResult/${jobId}`);
+ const resultResponseJson = JSON.parse(resultResponse);
+ if (resultResponseJson.status === 'completed') {
+ console.log('Result here:', resultResponseJson);
+ result = resultResponseJson;
+ break;
+ }
+
+ // Fetch progress information and update the progress callback.
+ const progressResponse = await Networking.FetchFromServer(`/getProgress/${jobId}`);
+ const progressResponseJson = JSON.parse(progressResponse);
+ if (progressResponseJson) {
+ const progress = progressResponseJson.progress;
+ const step = progressResponseJson.step;
+ progressCallback(progress, step);
+ }
+ }
+
+ // Once completed, process the document and add it to the vectorstore.
+ console.log('Document JSON:', result);
+ this.documents.push(result);
+ await this.indexDocument(result);
+ console.log(`Document added: ${result.file_name}`);
+
+ // Update document metadata such as summary, purpose, and vectorstore ID.
+ doc.summary = result.summary;
+ doc.ai_doc_id = result.doc_id;
+ this._doc_ids.push(result.doc_id);
+ doc.ai_purpose = result.purpose;
+
+ if (!doc.vectorstore_id) {
+ doc.vectorstore_id = JSON.stringify([this._id]);
+ } else {
+ doc.vectorstore_id = JSON.stringify(JSON.parse(StrCast(doc.vectorstore_id)).concat([this._id]));
+ }
+
+ if (!doc.chunk_simpl) {
+ doc.chunk_simpl = JSON.stringify({ chunks: [] });
+ }
+
+ // Process each chunk of the document and update the document's chunk_simpl field.
+ result.chunks.forEach((chunk: RAGChunk) => {
+ const chunkToAdd = {
+ chunkId: chunk.id,
+ startPage: chunk.metadata.start_page,
+ endPage: chunk.metadata.end_page,
+ location: chunk.metadata.location,
+ chunkType: chunk.metadata.type as CHUNK_TYPE,
+ text: chunk.metadata.text,
+ };
+ const new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl));
+ new_chunk_simpl.chunks = new_chunk_simpl.chunks.concat(chunkToAdd);
+ doc.chunk_simpl = JSON.stringify(new_chunk_simpl);
+ });
+
+ // Mark the document status as completed.
+ doc.ai_document_status = 'COMPLETED';
+ }
+ }
+ }
+
+ /**
+ * Indexes the processed document by uploading the document's vector chunks to the Pinecone index.
+ * @param document The processed document containing its chunks and metadata.
+ */
+ private async indexDocument(document: any) {
+ console.log('Uploading vectors to content namespace...');
+
+ // Prepare Pinecone records for each chunk in the document.
+ const pineconeRecords: PineconeRecord[] = (document.chunks as RAGChunk[]).map(chunk => ({
+ id: chunk.id,
+ values: chunk.values,
+ metadata: { ...chunk.metadata } as RecordMetadata,
+ }));
+
+ // Upload the records to Pinecone.
+ await this.index.upsert(pineconeRecords);
+ }
+
+ /**
+ * Retrieves the top K document chunks relevant to the user's query.
+ * This involves embedding the query using Cohere, then querying Pinecone for matching vectors.
+ * @param query The search query string.
+ * @param topK The number of top results to return (default is 10).
+ * @returns A list of document chunks that match the query.
+ */
+ async retrieve(query: string, topK: number = 10): Promise<RAGChunk[]> {
+ console.log(`Retrieving chunks for query: ${query}`);
+ try {
+ // Generate an embedding for the query using Cohere.
+ const queryEmbeddingResponse: EmbedResponse = await this.cohere.embed({
+ texts: [query],
+ model: 'embed-english-v3.0',
+ inputType: 'search_query',
+ });
+
+ let queryEmbedding: number[];
+
+ // Extract the embedding from the response.
+ if (Array.isArray(queryEmbeddingResponse.embeddings)) {
+ queryEmbedding = queryEmbeddingResponse.embeddings[0];
+ } else if (queryEmbeddingResponse.embeddings && 'embeddings' in queryEmbeddingResponse.embeddings) {
+ queryEmbedding = (queryEmbeddingResponse.embeddings as { embeddings: number[][] }).embeddings[0];
+ } else {
+ throw new Error('Invalid embedding response format');
+ }
+
+ if (!Array.isArray(queryEmbedding)) {
+ throw new Error('Query embedding is not an array');
+ }
+
+ // Query the Pinecone index using the embedding and filter by document IDs.
+ const queryResponse: QueryResponse = await this.index.query({
+ vector: queryEmbedding,
+ filter: {
+ doc_id: { $in: this._doc_ids },
+ },
+ topK,
+ includeValues: true,
+ includeMetadata: true,
+ });
+
+ // Map the results into RAGChunks and return them.
+ return queryResponse.matches.map(
+ match =>
+ ({
+ id: match.id,
+ values: match.values as number[],
+ metadata: match.metadata as {
+ text: string;
+ type: string;
+ original_document: string;
+ file_path: string;
+ doc_id: string;
+ location: string;
+ start_page: number;
+ end_page: number;
+ },
+ }) as RAGChunk
+ );
+ } catch (error) {
+ console.error(`Error retrieving chunks: ${error}`);
+ return [];
+ }
+ }
+}