From 7ff649c6582b7e00dcf2d7fe607699f850310ba0 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 30 May 2024 13:58:40 -0400 Subject: fixing chatbox features still messages appear weird when using code interpreter and now doesn't detect file links --- .../collections/CollectionNoteTakingViewColumn.tsx | 12 ++- src/client/views/nodes/ChatBox/ChatBox.tsx | 101 +++++++++++---------- .../views/nodes/ChatBox/MessageComponent.tsx | 37 +------- src/client/views/nodes/ChatBox/types.ts | 1 - src/server/ApiManagers/AssistantManager.ts | 7 +- 5 files changed, 70 insertions(+), 88 deletions(-) (limited to 'src') diff --git a/src/client/views/collections/CollectionNoteTakingViewColumn.tsx b/src/client/views/collections/CollectionNoteTakingViewColumn.tsx index 44ab1968d..2c6257cf2 100644 --- a/src/client/views/collections/CollectionNoteTakingViewColumn.tsx +++ b/src/client/views/collections/CollectionNoteTakingViewColumn.tsx @@ -1,6 +1,6 @@ /* eslint-disable jsx-a11y/control-has-associated-label */ import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'; -import { action, computed, makeObservable, observable } from 'mobx'; +import { action, computed, makeObservable, observable, runInAction } from 'mobx'; import { observer } from 'mobx-react'; import * as React from 'react'; import { lightOrDark, returnEmptyString } from '../../../ClientUtils'; @@ -88,12 +88,16 @@ export class CollectionNoteTakingViewColumn extends ObservableReactComponent { + this._ele && this.props.refList.push(this._ele); + }); } componentWillUnmount() { - this._ele && this.props.refList.splice(this._props.refList.indexOf(this._ele), 1); - this._ele = null; + runInAction(() => { + this._ele && this.props.refList.splice(this._props.refList.indexOf(this._ele), 1); + this._ele = null; + }); } @undoBatch diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 880c332ac..4c1550321 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -34,7 +34,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @observable linked_docs_to_add: Doc[] = []; private openai: OpenAI; - private interim_history: string = ''; private assistantID: string = ''; private threadID: string = ''; private _oldWheel: any; @@ -74,13 +73,14 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }, }; reaction( - () => this.history.map((msg: AssistantMessage) => ({ role: msg.role, text: msg.text, image: msg.image, tool_logs: msg.tool_logs, links: msg.links })), + () => this.history.map((msg: AssistantMessage) => ({ role: msg.role, text: msg.text, image: msg.image, tool_logs: msg.tool_logs })), serializableHistory => { this.dataDoc.data = JSON.stringify(serializableHistory); } ); } + @action toggleToolLogs = (index: number) => { this.expandedLogIndex = this.expandedLogIndex === index ? null : index; }; @@ -101,6 +101,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } initializeOpenAI() { + //console.log(process.env._CLIENT_OPENAI_KEY); const configuration: ClientOptions = { apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true, @@ -114,11 +115,14 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } }; - createLink = (linkInfo: string, startIndex: number, endIndex: number, linkType: ANNOTATION_LINK_TYPE, annotationIndex: number = 0) => { - const text = this.interim_history; - const subString = this.current_message?.text.substring(startIndex, endIndex) ?? ''; + createLink = (linkInfo: string, annotationText: string, linkType: ANNOTATION_LINK_TYPE) => { + console.log(this.current_message); + const text = this.current_message?.text; + console.log(text); + + const subString = annotationText; if (!text) return; - const textToDisplay = `${annotationIndex}`; + const textToDisplay = `DASHLINK`; let fileInfo = linkInfo; const fileName = subString.split('/')[subString.split('/').length - 1]; if (linkType === ANNOTATION_LINK_TYPE.DOWNLOAD_FILE) { @@ -127,17 +131,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { const formattedLink = `[${textToDisplay}](${fileInfo}~~~${linkType})`; console.log(formattedLink); - const newText = text.replace(subString, formattedLink); + const newText = text.split(subString).join(formattedLink); runInAction(() => { - this.interim_history = newText; - console.log(newText); - this.current_message?.links?.push({ - start: startIndex, - end: endIndex, - url: linkType === ANNOTATION_LINK_TYPE.DOWNLOAD_FILE ? fileName : linkInfo, - id: linkType === ANNOTATION_LINK_TYPE.DOWNLOAD_FILE ? linkInfo : undefined, - link_type: linkType, - }); + if (this.current_message) this.current_message.text = newText; }); }; @@ -192,8 +188,10 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { await this.createAssistant(); console.log('Assistant and thread created:', this.assistantID, this.threadID); } + let currentText: string = ''; let currentToolCallMessage: string = ''; + let current_message_id: string | null = null; // Send the user's input to the assistant await this.openai.beta.threads.messages.create(this.threadID, { @@ -207,14 +205,14 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { assistant_id: this.assistantID, }) .on('runStepCreated', (runStep: RunStep) => { - currentText = ''; runInAction(() => { - this.current_message = { role: ASSISTANT_ROLE.ASSISTANT, text: currentText, tool_logs: '', links: [] }; + this.isLoading = true; + //intentionally don't merge run steps' messages and keep them as seperate messages on the interface + this.current_message = { role: ASSISTANT_ROLE.ASSISTANT, text: '', tool_logs: '' }; }); - this.isLoading = true; }) .on('toolCallDelta', (toolCallDelta, snapshot) => { - this.isLoading = false; + runInAction(() => (this.isLoading = false)); if (toolCallDelta.type === 'code_interpreter') { if (toolCallDelta.code_interpreter?.input) { currentToolCallMessage += toolCallDelta.code_interpreter.input; @@ -243,31 +241,44 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { currentText += textDelta.value; runInAction(() => { if (this.current_message) { - // this.current_message = {...this.current_message, text: current_text}; this.current_message.text = currentText; } }); }) - .on('messageDone', async event => { - console.log(event); - const textItem = event.content.find(item => item.type === 'text'); + .on('messageDone', async message => { + console.log(this.current_message); + const textItem = message.content.find(item => item.type === 'text'); if (textItem && textItem.type === 'text') { const { text } = textItem; console.log(text.value); try { runInAction(() => { - this.interim_history = text.value; + if (this.current_message) { + this.current_message.text = text.value; + } }); } catch (e) { console.error('Error parsing JSON response:', e); } + console.log(this.current_message); const { annotations } = text; console.log('Annotations: ' + annotations); - let index = 0; - annotations.forEach(async annotation => { - console.log(' ' + annotation); - console.log(' ' + annotation.text); + const uniqueAnnotations = annotations.filter( + (annotation => { + const seenAnnotationTexts = new Set(); + return annotation => { + if (seenAnnotationTexts.has(annotation.text)) { + return false; + } else { + seenAnnotationTexts.add(annotation.text); + return true; + } + }; + })() + ); + + uniqueAnnotations.forEach(async annotation => { if (annotation.type === 'file_path') { const { file_path: filePath } = annotation; const fileToDownload = filePath.file_id; @@ -275,40 +286,38 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { if (filePath) { console.log(filePath); console.log(fileToDownload); - this.createLink(fileToDownload, annotation.start_index, annotation.end_index, ANNOTATION_LINK_TYPE.DOWNLOAD_FILE); + console.log(annotation.text); + this.createLink(fileToDownload, annotation.text, ANNOTATION_LINK_TYPE.DOWNLOAD_FILE); } } else { const { file_citation: fileCitation } = annotation; if (fileCitation) { const citedFile = await this.openai.files.retrieve(fileCitation.file_id); const citationUrl = citedFile.filename; - this.createLink(citationUrl, annotation.start_index, annotation.end_index, ANNOTATION_LINK_TYPE.DASH_DOC, index); - index++; + console.log(annotation.text); + console.log(this.current_message + 'fewjfjec'); + this.createLink(citationUrl, annotation.text, ANNOTATION_LINK_TYPE.DASH_DOC); } } }); - runInAction(() => { - if (this.current_message) { - console.log('current message: ' + this.current_message.text); - this.current_message.text = this.interim_history; - this.history.push({ ...this.current_message }); - this.current_message = undefined; - } - }); } + runInAction(() => { + if (this.current_message?.text) { + this.history.push({ ...this.current_message }); + //this.current_message = undefined; + } + }); }) - .on('toolCallDone', toolCall => { + .on('toolCallDone', async toolCall => { runInAction(() => { - if (this.current_message && currentToolCallMessage) { - this.current_message.tool_logs = currentToolCallMessage; + if (this.current_message?.tool_logs) { + this.history.push({ ...this.current_message }); + //this.current_message = undefined; } }); }) .on('imageFileDone', (content: ImageFile, snapshot: Message) => { console.log('Image file done:', content); - }) - .on('end', () => { - console.log('Streaming done'); }); }; diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx index fced0b4d5..ef6ce83b5 100644 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ b/src/client/views/nodes/ChatBox/MessageComponent.tsx @@ -3,8 +3,8 @@ import React from 'react'; import { observer } from 'mobx-react'; import { MathJax, MathJaxContext } from 'better-react-mathjax'; import ReactMarkdown from 'react-markdown'; -import { TbCircle0Filled, TbCircle1Filled, TbCircle2Filled, TbCircle3Filled, TbCircle4Filled, TbCircle5Filled, TbCircle6Filled, TbCircle7Filled, TbCircle8Filled, TbCircle9Filled } from 'react-icons/tb'; import { AssistantMessage } from './types'; +import { TbInfoCircleFilled } from 'react-icons/tb'; interface MessageComponentProps { message: AssistantMessage; @@ -29,40 +29,7 @@ const MessageComponent: React.FC = function ({ message, t const url = matches ? matches[1] : href; const linkType = matches ? matches[2] : null; if (linkType === 'citation') { - switch (children) { - case '0': - children = ; - break; - case '1': - children = ; - break; - case '2': - children = ; - break; - case '3': - children = ; - break; - case '4': - children = ; - break; - case '5': - children = ; - break; - case '6': - children = ; - break; - case '7': - children = ; - break; - case '8': - children = ; - break; - case '9': - children = ; - break; - default: - break; - } + children = ; } // console.log(linkType) const style = { diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts index 8212a7050..cfda0d40e 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/ChatBox/types.ts @@ -19,5 +19,4 @@ export interface AssistantMessage { quote?: string; image?: string; tool_logs?: string; - links?: { start: number; end: number; url: string; id?: string; link_type: ANNOTATION_LINK_TYPE }[]; } diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 82e48167a..f2ea83310 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -4,7 +4,7 @@ import OpenAI from 'openai'; import * as path from 'path'; import { promisify } from 'util'; import * as uuid from 'uuid'; -import { filesDirectory, publicDirectory } from '..'; +import { filesDirectory, publicDirectory } from '../SocketData'; import { Method } from '../RouteManager'; import ApiManager, { Registration } from './ApiManager'; @@ -36,7 +36,10 @@ const readFileAsync = promisify(fs.readFile); export default class AssistantManager extends ApiManager { protected initialize(register: Registration): void { - const openai = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); + const openai = new OpenAI({ + apiKey: process.env._CLIENT_OPENAI_KEY, // Use client key so don't have to set key seperately for client and server. + dangerouslyAllowBrowser: true, + }); register({ method: Method.POST, -- cgit v1.2.3-70-g09d2 From 0578f3c6a2159abed369ce03e600f8ff2668fcb0 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 30 May 2024 17:32:39 -0400 Subject: overhauled the entire link creation process and merged it also tweaked the runinaction vs action decorator to make it work better and more consistently --- src/client/views/nodes/ChatBox/ChatBox.tsx | 159 ++++++++++++++--------------- 1 file changed, 75 insertions(+), 84 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 4c1550321..3a2508c37 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -20,6 +20,7 @@ import { FieldView, FieldViewProps } from '../FieldView'; import './ChatBox.scss'; import MessageComponent from './MessageComponent'; import { ANNOTATION_LINK_TYPE, ASSISTANT_ROLE, AssistantMessage, DOWNLOAD_TYPE } from './types'; +import { Annotation } from 'mobx/dist/internal'; @observer export class ChatBox extends ViewBoxAnnotatableComponent() { @@ -115,28 +116,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } }; - createLink = (linkInfo: string, annotationText: string, linkType: ANNOTATION_LINK_TYPE) => { - console.log(this.current_message); - const text = this.current_message?.text; - console.log(text); - - const subString = annotationText; - if (!text) return; - const textToDisplay = `DASHLINK`; - let fileInfo = linkInfo; - const fileName = subString.split('/')[subString.split('/').length - 1]; - if (linkType === ANNOTATION_LINK_TYPE.DOWNLOAD_FILE) { - fileInfo = linkInfo + '!!!' + fileName; - } - - const formattedLink = `[${textToDisplay}](${fileInfo}~~~${linkType})`; - console.log(formattedLink); - const newText = text.split(subString).join(formattedLink); - runInAction(() => { - if (this.current_message) this.current_message.text = newText; - }); - }; - @action createAssistant = async () => { this.isInitializing = true; @@ -181,7 +160,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } }; - @action runAssistant = async (inputText: string) => { // Ensure an assistant and thread are created if (!this.assistantID || !this.threadID || !this.vectorStoreID) { @@ -212,34 +190,32 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }); }) .on('toolCallDelta', (toolCallDelta, snapshot) => { - runInAction(() => (this.isLoading = false)); - if (toolCallDelta.type === 'code_interpreter') { - if (toolCallDelta.code_interpreter?.input) { - currentToolCallMessage += toolCallDelta.code_interpreter.input; - runInAction(() => { + runInAction(() => { + if (toolCallDelta.type === 'code_interpreter') { + if (toolCallDelta.code_interpreter?.input) { + currentToolCallMessage += toolCallDelta.code_interpreter.input; + if (this.current_message) { this.current_message.tool_logs = currentToolCallMessage; } - }); - } - if (toolCallDelta.code_interpreter?.outputs) { - currentToolCallMessage += '\n Code interpreter output:'; - toolCallDelta.code_interpreter.outputs.forEach(output => { - if (output.type === 'logs') { - runInAction(() => { + } + + if (toolCallDelta.code_interpreter?.outputs) { + currentToolCallMessage += '\n Code interpreter output:'; + toolCallDelta.code_interpreter.outputs.forEach(output => { + if (output.type === 'logs') { if (this.current_message) { this.current_message.tool_logs += '\n|' + output.logs; } - }); - } - }); + } + }); + } } - } + }); }) .on('textDelta', (textDelta, snapshot) => { - this.isLoading = false; - currentText += textDelta.value; runInAction(() => { + currentText += textDelta.value; if (this.current_message) { this.current_message.text = currentText; } @@ -252,11 +228,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { const { text } = textItem; console.log(text.value); try { - runInAction(() => { - if (this.current_message) { - this.current_message.text = text.value; - } - }); + if (this.current_message) { + this.current_message.text = text.value; + } } catch (e) { console.error('Error parsing JSON response:', e); } @@ -264,47 +238,28 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { const { annotations } = text; console.log('Annotations: ' + annotations); - const uniqueAnnotations = annotations.filter( - (annotation => { - const seenAnnotationTexts = new Set(); - return annotation => { - if (seenAnnotationTexts.has(annotation.text)) { - return false; - } else { - seenAnnotationTexts.add(annotation.text); - return true; - } - }; - })() - ); - uniqueAnnotations.forEach(async annotation => { - if (annotation.type === 'file_path') { - const { file_path: filePath } = annotation; - const fileToDownload = filePath.file_id; - console.log(fileToDownload); - if (filePath) { - console.log(filePath); - console.log(fileToDownload); - console.log(annotation.text); - this.createLink(fileToDownload, annotation.text, ANNOTATION_LINK_TYPE.DOWNLOAD_FILE); - } - } else { - const { file_citation: fileCitation } = annotation; - if (fileCitation) { - const citedFile = await this.openai.files.retrieve(fileCitation.file_id); - const citationUrl = citedFile.filename; - console.log(annotation.text); - console.log(this.current_message + 'fewjfjec'); - this.createLink(citationUrl, annotation.text, ANNOTATION_LINK_TYPE.DASH_DOC); - } - } - }); + await this.createLinks( + annotations.filter( + (annotation => { + const seenAnnotationTexts = new Set(); + return annotation => { + if (seenAnnotationTexts.has(annotation.text)) { + return false; + } else { + seenAnnotationTexts.add(annotation.text); + return true; + } + }; + })() + ) + ); } runInAction(() => { - if (this.current_message?.text) { + if (this.current_message) { + console.log(this.current_message); this.history.push({ ...this.current_message }); - //this.current_message = undefined; + this.current_message = undefined; } }); }) @@ -312,7 +267,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { runInAction(() => { if (this.current_message?.tool_logs) { this.history.push({ ...this.current_message }); - //this.current_message = undefined; + this.current_message = undefined; } }); }) @@ -321,6 +276,42 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }); }; + createLinks = async (annotations: OpenAI.Beta.Threads.Messages.Annotation[]) => { + console.log(this.current_message); + let text = this.current_message?.text; + console.log(text); + await Promise.all( + annotations.map(async annotation => { + const subString = annotation.text; + const textToDisplay = `DASHLINK`; + let fileInfo = ''; + let formattedLink = ''; + const fileName = subString.split('/')[subString.split('/').length - 1]; + + if (annotation.type === 'file_path') { + const { file_path: filePath } = annotation; + if (filePath) { + fileInfo = filePath.file_id + '!!!' + fileName; + formattedLink = `[${textToDisplay}](${fileInfo}~~~${ANNOTATION_LINK_TYPE.DOWNLOAD_FILE})`; + } + } else { + const { file_citation: fileCitation } = annotation; + if (fileCitation) { + const citedFile = await this.openai.files.retrieve(fileCitation.file_id); + formattedLink = `[${textToDisplay}](${citedFile.filename}~~~${ANNOTATION_LINK_TYPE.DASH_DOC})`; + } + } + + console.log(formattedLink); + text = text?.split(subString).join(formattedLink); + console.log(text); + }) + ); + runInAction(() => { + if (this.current_message) this.current_message.text = text || ''; + }); + }; + @action goToLinkedDoc = async (link: string) => { const linkedDocs = LinkManager.Instance.getAllRelatedLinks(this.Document) -- cgit v1.2.3-70-g09d2 From 33621442bad6ffe78840dc95984199d3b339d832 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 6 Jun 2024 11:36:59 -0400 Subject: added intro message --- src/client/views/nodes/ChatBox/ChatBox.scss | 77 ++++++++++++++--------------- src/client/views/nodes/ChatBox/ChatBox.tsx | 53 ++++++++++---------- 2 files changed, 65 insertions(+), 65 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/ChatBox.scss b/src/client/views/nodes/ChatBox/ChatBox.scss index f1ad3d074..a08b98f50 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.scss +++ b/src/client/views/nodes/ChatBox/ChatBox.scss @@ -24,15 +24,14 @@ $border-radius: 8px; padding: 10px; display: flex; flex-direction: column-reverse; - + &::-webkit-scrollbar { - width: 8px; + width: 8px; } &::-webkit-scrollbar-thumb { - background-color: darken($background-color, 10%); - border-radius: $border-radius; + background-color: darken($background-color, 10%); + border-radius: $border-radius; } - .chat-content { display: flex; @@ -52,10 +51,10 @@ $border-radius: 8px; align-items: center; max-width: 70%; word-break: break-word; - .message-footer { // Assuming this is the container for the toggle button + .message-footer { + // Assuming this is the container for the toggle button //max-width: 70%; - - + .toggle-logs-button { margin-top: 10px; // Padding on sides to align with the text above width: 95%; @@ -88,35 +87,34 @@ $border-radius: 8px; max-height: 150px; // Ensuring it does not grow too large overflow-y: auto; } - - } - + } + .custom-link { color: lightblue; text-decoration: underline; cursor: pointer; - } + } &.user { - align-self: flex-end; - background-color: $button-color; - color: #fff; + align-self: flex-end; + background-color: $button-color; + color: #fff; } - + &.chatbot { - align-self: flex-start; - background-color: $input-background; - color: $text-color; + align-self: flex-start; + background-color: $input-background; + color: $text-color; } - + span { - flex-grow: 1; - padding-right: 10px; + flex-grow: 1; + padding-right: 10px; } - + img { - max-width: 50px; - max-height: 50px; - border-radius: 50%; + max-width: 50px; + max-height: 50px; + border-radius: 50%; } } } @@ -132,8 +130,8 @@ $border-radius: 8px; padding: 10px; background-color: $input-background; box-shadow: inset 0 -1px 2px $shadow-color; - - input[type="text"] { + + input[type='text'] { flex-grow: 1; border: 1px solid darken($input-background, 10%); border-radius: $border-radius; @@ -143,16 +141,16 @@ $border-radius: 8px; button { padding: 8px 16px; - background-color: $button-color; - color: #fff; - border: none; - border-radius: $border-radius; - cursor: pointer; - transition: background-color 0.3s; - - &:hover { - background-color: $button-hover-color; - } + background-color: $button-color; + color: #fff; + border: none; + border-radius: $border-radius; + cursor: pointer; + transition: background-color 0.3s; + + &:hover { + background-color: $button-hover-color; + } } margin-bottom: 0; } @@ -178,7 +176,6 @@ $border-radius: 8px; } } - .modal { position: fixed; top: 0; @@ -217,7 +214,7 @@ $border-radius: 8px; border: none; border-radius: $border-radius; cursor: pointer; - margin: 5px; + //margin: 5px; transition: background-color 0.3s; &:hover { diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 3a2508c37..390f13ce7 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -49,7 +49,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { super(props); makeObservable(this); this.openai = this.initializeOpenAI(); - this.history = []; + this.history = [{ role: ASSISTANT_ROLE.ASSISTANT, text: 'Welcome to the Document Analyser Assistant! Link a document or ask questions to get started.' }]; this.threadID = StrCast(this.dataDoc.thread_id); this.assistantID = StrCast(this.dataDoc.assistant_id); this.vectorStoreID = StrCast(this.dataDoc.vector_store_id); @@ -160,6 +160,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } }; + @action runAssistant = async (inputText: string) => { // Ensure an assistant and thread are created if (!this.assistantID || !this.threadID || !this.vectorStoreID) { @@ -169,7 +170,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { let currentText: string = ''; let currentToolCallMessage: string = ''; - let current_message_id: string | null = null; // Send the user's input to the assistant await this.openai.beta.threads.messages.create(this.threadID, { @@ -190,32 +190,32 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }); }) .on('toolCallDelta', (toolCallDelta, snapshot) => { - runInAction(() => { - if (toolCallDelta.type === 'code_interpreter') { - if (toolCallDelta.code_interpreter?.input) { - currentToolCallMessage += toolCallDelta.code_interpreter.input; + if (toolCallDelta.type === 'code_interpreter') { + if (toolCallDelta.code_interpreter?.input) { + currentToolCallMessage += toolCallDelta.code_interpreter.input; - if (this.current_message) { - this.current_message.tool_logs = currentToolCallMessage; - } + if (this.current_message) { + this.current_message.tool_logs = currentToolCallMessage; } + } - if (toolCallDelta.code_interpreter?.outputs) { - currentToolCallMessage += '\n Code interpreter output:'; - toolCallDelta.code_interpreter.outputs.forEach(output => { - if (output.type === 'logs') { + if (toolCallDelta.code_interpreter?.outputs) { + currentToolCallMessage += '\n Code interpreter output:'; + toolCallDelta.code_interpreter.outputs.forEach(output => { + if (output.type === 'logs') { + runInAction(() => { if (this.current_message) { this.current_message.tool_logs += '\n|' + output.logs; } - } - }); - } + }); + } + }); } - }); + } }) .on('textDelta', (textDelta, snapshot) => { + currentText += textDelta.value; runInAction(() => { - currentText += textDelta.value; if (this.current_message) { this.current_message.text = currentText; } @@ -431,6 +431,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } if (doc) { doc && this._props.addDocument?.(doc); + //add to overlay await DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); } }; @@ -505,13 +506,15 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { try { const storedHistory = JSON.parse(StrCast(this.dataDoc.data)); runInAction(() => { - this.history = storedHistory.map((msg: AssistantMessage) => ({ - role: msg.role, - text: msg.text, - quote: msg.quote, - tool_logs: msg.tool_logs, - image: msg.image, - })); + this.history.push( + ...storedHistory.map((msg: AssistantMessage) => ({ + role: msg.role, + text: msg.text, + quote: msg.quote, + tool_logs: msg.tool_logs, + image: msg.image, + })) + ); }); } catch (e) { console.error('Failed to parse history from dataDoc:', e); -- cgit v1.2.3-70-g09d2 From e297c75cdcc8bb5b1b138d1272f1f6f27b222f4c Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 27 Jun 2024 16:36:40 -0400 Subject: fixing to work with python API also added follow up questions --- package-lock.json | 2 +- package.json | 2 +- src/client/views/nodes/ChatBox/ChatBox.scss | 77 +++-- src/client/views/nodes/ChatBox/ChatBox.tsx | 351 ++++++--------------- .../views/nodes/ChatBox/MessageComponent.tsx | 53 +++- src/server/ApiManagers/AssistantManager.ts | 33 ++ 6 files changed, 218 insertions(+), 300 deletions(-) (limited to 'src') diff --git a/package-lock.json b/package-lock.json index b143dc5c1..9d58ead46 100644 --- a/package-lock.json +++ b/package-lock.json @@ -56,7 +56,7 @@ "adm-zip": "^0.5.10", "archiver": "^7.0.1", "async": "^3.2.5", - "axios": "^1.6.2", + "axios": "^1.7.2", "babel": "^6.23.0", "babel-loader": "^9.1.3", "bcrypt-nodejs": "0.0.3", diff --git a/package.json b/package.json index 52f627b63..49c0e4d91 100644 --- a/package.json +++ b/package.json @@ -141,7 +141,7 @@ "adm-zip": "^0.5.10", "archiver": "^7.0.1", "async": "^3.2.5", - "axios": "^1.6.2", + "axios": "^1.7.2", "babel": "^6.23.0", "babel-loader": "^9.1.3", "bcrypt-nodejs": "0.0.3", diff --git a/src/client/views/nodes/ChatBox/ChatBox.scss b/src/client/views/nodes/ChatBox/ChatBox.scss index a08b98f50..f1e3d3d67 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.scss +++ b/src/client/views/nodes/ChatBox/ChatBox.scss @@ -9,12 +9,10 @@ $border-radius: 8px; .chatBox { display: flex; flex-direction: column; - width: 100%; /* Adjust the width as needed, could be in percentage */ - height: 100%; /* Adjust the height as needed, could be in percentage */ + width: 100%; + height: 100%; background-color: $background-color; font-family: 'Helvetica Neue', Arial, sans-serif; - //margin: 20px auto; - //overflow: hidden; .scroll-box { flex-grow: 1; @@ -24,6 +22,7 @@ $border-radius: 8px; padding: 10px; display: flex; flex-direction: column-reverse; + padding-bottom: 0; &::-webkit-scrollbar { width: 8px; @@ -47,28 +46,24 @@ $border-radius: 8px; border-radius: $border-radius; background-color: lighten($background-color, 5%); box-shadow: 0 2px 5px $shadow-color; - //display: flex; - align-items: center; - max-width: 70%; + align-items: flex-start; + max-width: 90%; + width: 100%; word-break: break-word; + .message-footer { - // Assuming this is the container for the toggle button - //max-width: 70%; + width: 100%; .toggle-logs-button { - margin-top: 10px; // Padding on sides to align with the text above + margin-top: 10px; width: 95%; - //display: block; // Ensures the button extends the full width of its container - text-align: center; // Centers the text inside the button - //padding: 8px 0; // Adequate padding for touch targets + text-align: center; background-color: $button-color; color: #fff; border: none; border-radius: $border-radius; cursor: pointer; - //transition: background-color 0.3s; - //margin-top: 10px; // Adds space above the button - box-shadow: 0 2px 4px $shadow-color; // Consistent shadow with other elements + box-shadow: 0 2px 4px $shadow-color; &:hover { background-color: $button-hover-color; } @@ -78,13 +73,9 @@ $border-radius: 8px; background-color: $input-background; color: $text-color; margin-top: 5px; - //padding: 10px; - //border-radius: $border-radius; - //box-shadow: inset 0 2px 4px $shadow-color; - //transition: opacity 1s ease-in-out; font-family: monospace; overflow-x: auto; - max-height: 150px; // Ensuring it does not grow too large + max-height: 150px; overflow-y: auto; } } @@ -118,18 +109,17 @@ $border-radius: 8px; } } } - padding-bottom: 0; } .chat-form { display: flex; - flex-grow: 1; - //height: 50px; + flex-grow: 0; bottom: 0; width: 100%; padding: 10px; background-color: $input-background; box-shadow: inset 0 -1px 2px $shadow-color; + margin-bottom: 0; input[type='text'] { flex-grow: 1; @@ -147,12 +137,12 @@ $border-radius: 8px; border-radius: $border-radius; cursor: pointer; transition: background-color 0.3s; + min-width: 80px; &:hover { background-color: $button-hover-color; } } - margin-bottom: 0; } } @@ -168,7 +158,7 @@ $border-radius: 8px; align-items: center; font-size: 1.5em; color: $text-color; - z-index: 10; // Ensure it's above all other content (may be better solution) + z-index: 10; &::before { content: 'Initializing...'; @@ -214,7 +204,6 @@ $border-radius: 8px; border: none; border-radius: $border-radius; cursor: pointer; - //margin: 5px; transition: background-color 0.3s; &:hover { @@ -223,3 +212,37 @@ $border-radius: 8px; } } } + +.follow-up-questions { + margin-top: 10px; + width: 100%; + + h4 { + margin-bottom: 5px; + font-size: 14px; + } + + .follow-up-button { + background-color: #f0f0f0; + border: 1px solid #ddd; + border-radius: 8px; + padding: 8px 10px; + margin: 4px 0; + cursor: pointer; + transition: background-color 0.3s; + display: block; + width: 100%; + text-align: left; + white-space: normal; + word-wrap: break-word; + font-size: 12px; + color: $text-color; + min-height: 40px; + height: auto; // Allow the button to expand as needed + line-height: 1.3; + + &:hover { + background-color: #e0e0e0; + } + } +} diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 390f13ce7..b986c7393 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -21,6 +21,7 @@ import './ChatBox.scss'; import MessageComponent from './MessageComponent'; import { ANNOTATION_LINK_TYPE, ASSISTANT_ROLE, AssistantMessage, DOWNLOAD_TYPE } from './types'; import { Annotation } from 'mobx/dist/internal'; +import { FormEvent } from 'react'; @observer export class ChatBox extends ViewBoxAnnotatableComponent() { @@ -33,7 +34,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @observable isInitializing: boolean = true; @observable expandedLogIndex: number | null = null; @observable linked_docs_to_add: Doc[] = []; - + @observable inputValue: string = ''; private openai: OpenAI; private assistantID: string = ''; private threadID: string = ''; @@ -160,240 +161,66 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } }; - @action - runAssistant = async (inputText: string) => { - // Ensure an assistant and thread are created - if (!this.assistantID || !this.threadID || !this.vectorStoreID) { - await this.createAssistant(); - console.log('Assistant and thread created:', this.assistantID, this.threadID); - } - - let currentText: string = ''; - let currentToolCallMessage: string = ''; - - // Send the user's input to the assistant - await this.openai.beta.threads.messages.create(this.threadID, { - role: 'user', - content: inputText, - }); - - // Listen to the streaming responses - const stream = this.openai.beta.threads.runs - .stream(this.threadID, { - assistant_id: this.assistantID, - }) - .on('runStepCreated', (runStep: RunStep) => { - runInAction(() => { - this.isLoading = true; - //intentionally don't merge run steps' messages and keep them as seperate messages on the interface - this.current_message = { role: ASSISTANT_ROLE.ASSISTANT, text: '', tool_logs: '' }; - }); - }) - .on('toolCallDelta', (toolCallDelta, snapshot) => { - if (toolCallDelta.type === 'code_interpreter') { - if (toolCallDelta.code_interpreter?.input) { - currentToolCallMessage += toolCallDelta.code_interpreter.input; - - if (this.current_message) { - this.current_message.tool_logs = currentToolCallMessage; - } - } - - if (toolCallDelta.code_interpreter?.outputs) { - currentToolCallMessage += '\n Code interpreter output:'; - toolCallDelta.code_interpreter.outputs.forEach(output => { - if (output.type === 'logs') { - runInAction(() => { - if (this.current_message) { - this.current_message.tool_logs += '\n|' + output.logs; - } - }); - } - }); - } - } - }) - .on('textDelta', (textDelta, snapshot) => { - currentText += textDelta.value; - runInAction(() => { - if (this.current_message) { - this.current_message.text = currentText; - } - }); - }) - .on('messageDone', async message => { - console.log(this.current_message); - const textItem = message.content.find(item => item.type === 'text'); - if (textItem && textItem.type === 'text') { - const { text } = textItem; - console.log(text.value); - try { - if (this.current_message) { - this.current_message.text = text.value; - } - } catch (e) { - console.error('Error parsing JSON response:', e); - } - console.log(this.current_message); - - const { annotations } = text; - console.log('Annotations: ' + annotations); - - await this.createLinks( - annotations.filter( - (annotation => { - const seenAnnotationTexts = new Set(); - return annotation => { - if (seenAnnotationTexts.has(annotation.text)) { - return false; - } else { - seenAnnotationTexts.add(annotation.text); - return true; - } - }; - })() - ) - ); - } - runInAction(() => { - if (this.current_message) { - console.log(this.current_message); - this.history.push({ ...this.current_message }); - this.current_message = undefined; - } - }); - }) - .on('toolCallDone', async toolCall => { - runInAction(() => { - if (this.current_message?.tool_logs) { - this.history.push({ ...this.current_message }); - this.current_message = undefined; - } - }); - }) - .on('imageFileDone', (content: ImageFile, snapshot: Message) => { - console.log('Image file done:', content); - }); - }; - - createLinks = async (annotations: OpenAI.Beta.Threads.Messages.Annotation[]) => { - console.log(this.current_message); - let text = this.current_message?.text; - console.log(text); - await Promise.all( - annotations.map(async annotation => { - const subString = annotation.text; - const textToDisplay = `DASHLINK`; - let fileInfo = ''; - let formattedLink = ''; - const fileName = subString.split('/')[subString.split('/').length - 1]; - - if (annotation.type === 'file_path') { - const { file_path: filePath } = annotation; - if (filePath) { - fileInfo = filePath.file_id + '!!!' + fileName; - formattedLink = `[${textToDisplay}](${fileInfo}~~~${ANNOTATION_LINK_TYPE.DOWNLOAD_FILE})`; - } - } else { - const { file_citation: fileCitation } = annotation; - if (fileCitation) { - const citedFile = await this.openai.files.retrieve(fileCitation.file_id); - formattedLink = `[${textToDisplay}](${citedFile.filename}~~~${ANNOTATION_LINK_TYPE.DASH_DOC})`; - } - } - - console.log(formattedLink); - text = text?.split(subString).join(formattedLink); - console.log(text); - }) - ); - runInAction(() => { - if (this.current_message) this.current_message.text = text || ''; - }); - }; - - @action - goToLinkedDoc = async (link: string) => { - const linkedDocs = LinkManager.Instance.getAllRelatedLinks(this.Document) - .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) - .map(d => DocCast(d?.annotationOn, d)) - .filter(d => d); - - const linkedDoc = linkedDocs.find(doc => { - const docUrl = CsvCast(doc.data, PDFCast(doc.data)).url.pathname.replace('/files/pdfs/', '').replace('/files/csvs/', ''); - console.log('URL: ' + docUrl + ' Citation URL: ' + link); - return link === docUrl; - }); - - if (linkedDoc) { - await DocumentManager.Instance.showDocument(DocCast(linkedDoc), { willZoomCentered: true }, () => {}); - } - }; - @action askGPT = async (event: React.FormEvent): Promise => { event.preventDefault(); + this.inputValue = ''; const textInput = event.currentTarget.elements.namedItem('messageInput') as HTMLInputElement; const trimmedText = textInput.value.trim(); - if (!this.assistantID || !this.threadID) { - try { - await this.createAssistant(); - } catch (err) { - console.error('Error:', err); - } - } - if (trimmedText) { try { textInput.value = ''; runInAction(() => { this.history.push({ role: ASSISTANT_ROLE.USER, text: trimmedText }); }); - await this.runAssistant(trimmedText); - this.dataDoc.data = this.history.toString(); + const { response } = await Networking.PostToServer('/askAgent', { input: trimmedText }); + runInAction(() => { + this.history.push({ role: ASSISTANT_ROLE.ASSISTANT, text: response }); + }); + this.dataDoc.data = JSON.stringify(this.history); } catch (err) { console.error('Error:', err); } } }; - @action - uploadLinks = async (linkedDocs: Doc[]) => { - if (this.isInitializing) { - console.log('Initialization in progress, upload aborted.'); - return; - } - const urls = linkedDocs.map(doc => CsvCast(doc.data, PDFCast(doc.data)).url.pathname); - const csvUrls = urls.filter(url => url.endsWith('.csv')); - console.log(this.assistantID, this.threadID, urls); - - const { openai_file_ids: openaiFileIds } = await Networking.PostToServer('/uploadPDFToVectorStore', { urls, threadID: this.threadID, assistantID: this.assistantID, vector_store_id: this.vectorStoreID }); - - linkedDocs.forEach((doc, i) => { - doc[this.Document[Id] + '_ai_field_id'] = openaiFileIds[i]; - console.log('AI Field ID: ' + openaiFileIds[i]); - }); - - if (csvUrls.length > 0) { - for (let i = 0; i < csvUrls.length; i++) { - this.linkedCsvIDs.push(openaiFileIds[urls.indexOf(csvUrls[i])]); - } - console.log('linked csvs:' + this.linkedCsvIDs); - await this.openai.beta.assistants.update(this.assistantID, { - tools: [{ type: 'file_search' }, { type: 'code_interpreter' }], - tool_resources: { - file_search: { - vector_store_ids: [this.vectorStoreID], - }, - code_interpreter: { - file_ids: this.linkedCsvIDs, - }, - }, - }); - } - }; + // @action + // uploadLinks = async (linkedDocs: Doc[]) => { + // if (this.isInitializing) { + // console.log('Initialization in progress, upload aborted.'); + // return; + // } + // const urls: string[] = linkedDocs.map(doc => CsvCast(doc.data, PDFCast(doc.data)).url.pathname); + // const csvUrls: string[] = urls.filter(url => url.endsWith('.csv')); + // console.log(this.assistantID, this.threadID, urls); + + // await Networking.PostToServer('/uploadPDFs', { file_path: urls[0] }); + + // // linkedDocs.forEach((doc, i) => { + // // doc[this.Document[Id] + '_ai_field_id'] = openaiFileIds[i]; + // // console.log('AI Field ID: ' + openaiFileIds[i]); + // // }); + + // // if (csvUrls.length > 0) { + // // for (let i = 0; i < csvUrls.length; i++) { + // // this.linkedCsvIDs.push(openaiFileIds[urls.indexOf(csvUrls[i])]); + // // } + // // console.log('linked csvs:' + this.linkedCsvIDs); + // // await this.openai.beta.assistants.update(this.assistantID, { + // // tools: [{ type: 'file_search' }, { type: 'code_interpreter' }], + // // tool_resources: { + // // file_search: { + // // vector_store_ids: [this.vectorStoreID], + // // }, + // // code_interpreter: { + // // file_ids: this.linkedCsvIDs, + // // }, + // // }, + // // }); + // // } + // }; downloadToComputer = (url: string, fileName: string) => { fetch(url, { method: 'get', mode: 'no-cors', referrerPolicy: 'no-referrer' }) @@ -555,53 +382,59 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { ); } + @action + handleFollowUpClick = (question: string) => { + console.log('Follow-up question clicked:', question); + this.inputValue = question; + }; render() { return ( - -
- {this.isInitializing &&
Initializing...
} - {this.renderModal()} -
{ - this._oldWheel?.removeEventListener('wheel', this.onPassiveWheel); - this._oldWheel = r; - r?.addEventListener('wheel', this.onPassiveWheel, { passive: false }); - }}> -
- {this.history.map((message, index) => ( - - ))} - {!this.current_message ? null : ( - - )} -
+ /** **/ +
+ {this.isInitializing &&
Initializing...
} + {this.renderModal()} +
{ + this._oldWheel?.removeEventListener('wheel', this.onPassiveWheel); + this._oldWheel = r; + r?.addEventListener('wheel', this.onPassiveWheel, { passive: false }); + }}> +
+ {this.history.map((message, index) => ( + {}} + setCurrentFile={this.setCurrentFile} + onFollowUpClick={this.handleFollowUpClick} + /> + ))} + {!this.current_message ? null : ( + {}} + setCurrentFile={this.setCurrentFile} + onFollowUpClick={this.handleFollowUpClick} + /> + )}
-
- - -
- +
+ (this.inputValue = e.target.value)} /> + +
+
+ /**
**/ ); } } diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx index ef6ce83b5..15c0811fb 100644 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ b/src/client/views/nodes/ChatBox/MessageComponent.tsx @@ -14,24 +14,29 @@ interface MessageComponentProps { showModal: () => void; goToLinkedDoc: (url: string) => void; setCurrentFile: (file: { url: string }) => void; + onFollowUpClick: (question: string) => void; // New prop isCurrent?: boolean; } -const MessageComponent: React.FC = function ({ message, toggleToolLogs, expandedLogIndex, goToLinkedDoc, index, showModal, setCurrentFile, isCurrent = false }) { - // const messageClass = `${message.role} ${isCurrent ? 'current-message' : ''}`; - +const MessageComponent: React.FC = function ({ + message, + toggleToolLogs, + expandedLogIndex, + goToLinkedDoc, + index, + showModal, + setCurrentFile, + onFollowUpClick, // New prop + isCurrent = false, +}) { const LinkRenderer = ({ href, children }: { href: string; children: React.ReactNode }) => { - // console.log(href + " " + children) const regex = /([a-zA-Z0-9_.!-]+)~~~(citation|file_path)/; const matches = href.match(regex); - // console.log(href) - // console.log(matches) const url = matches ? matches[1] : href; const linkType = matches ? matches[2] : null; if (linkType === 'citation') { children = ; } - // console.log(linkType) const style = { color: 'lightblue', verticalAlign: linkType === 'citation' ? 'super' : 'baseline', @@ -56,14 +61,38 @@ const MessageComponent: React.FC = function ({ message, t ); }; + const parseMessage = (text: string) => { + const answerMatch = text.match(/([\s\S]*?)<\/answer>/); + const followUpMatch = text.match(/([\s\S]*?)<\/follow_up_question>/); + + const answer = answerMatch ? answerMatch[1] : text; + const followUpQuestions = followUpMatch + ? followUpMatch[1] + .split('\n') + .filter(q => q.trim()) + .map(q => q.replace(/^\d+\.\s*/, '').trim()) + : []; + + return { answer, followUpQuestions }; + }; + + const { answer, followUpQuestions } = parseMessage(message.text); + console.log('Parsed answer:', answer); + console.log('Parsed follow-up questions:', followUpQuestions); return (
- - - {message.text ? message.text : ''} - - + {answer} {message.image && } + {followUpQuestions.length > 0 && ( +
+

Follow-up Questions:

+ {followUpQuestions.map((question, idx) => ( + + ))} +
+ )}
{message.tool_logs && ( - - -
-
- ); - }; - @action - showModal = () => { - this.modalStatus = true; - }; - - @action - setCurrentFile = (file: { url: string }) => { - this.currentFile = file; - }; - componentDidMount() { this._props.setContentViewBox?.(this); if (this.dataDoc.data) { @@ -337,9 +179,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { ...storedHistory.map((msg: AssistantMessage) => ({ role: msg.role, text: msg.text, - quote: msg.quote, - tool_logs: msg.tool_logs, - image: msg.image, + follow_up_questions: msg.follow_up_questions, + citations: msg.citations, })) ); }); @@ -355,7 +196,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { .filter(d => d); return linkedDocs; }, - linked => this.linked_docs_to_add.push(...linked.filter(linkedDoc => !this.linked_docs_to_add.includes(linkedDoc))) ); @@ -370,7 +210,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { if ((change as any).addedCount > 0) { // maybe check here if its already in the urls datadoc array so doesn't add twice console.log((change as any).added as Doc[]); - this.uploadLinks((change as any).added as Doc[]); + ((change as any).added as Doc[]).forEach(doc => { + this.uploadNewDocument(doc); + }); } // (change as any).removed.forEach((link: any) => remLinkFromDoc(toRealField(link))); break; @@ -392,7 +234,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { /** **/
{this.isInitializing &&
Initializing...
} - {this.renderModal()}
{ @@ -401,32 +242,37 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { r?.addEventListener('wheel', this.onPassiveWheel, { passive: false }); }}>
- {this.history.map((message, index) => ( - {}} - setCurrentFile={this.setCurrentFile} - onFollowUpClick={this.handleFollowUpClick} - /> - ))} - {!this.current_message ? null : ( - {}} - setCurrentFile={this.setCurrentFile} - onFollowUpClick={this.handleFollowUpClick} - /> - )} + { + //this.history.map((message, index) => ( + // {}} + // setCurrentFile={this.setCurrentFile} + // onFollowUpClick={this.handleFollowUpClick} + // /> + //) + //) + } + { + //!this.current_message ? null : ( + // {}} + // setCurrentFile={this.setCurrentFile} + // onFollowUpClick={this.handleFollowUpClick} + // /> + //) + }
diff --git a/src/client/views/nodes/ChatBox/tools.ts b/src/client/views/nodes/ChatBox/tools.ts new file mode 100644 index 000000000..4035280a8 --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools.ts @@ -0,0 +1,26 @@ +import { DocCast } from '../../../../fields/Types'; +import { DocServer } from '../../../DocServer'; +import { Docs } from '../../../documents/Documents'; +import { DocUtils } from '../../../documents/DocUtils'; +import { TabDocView } from '../../collections/TabDocView'; +import { DocumentView } from '../DocumentView'; +import { OpenWhere } from '../OpenWhere'; + +export function retrieval(json: any): string { + return ''; +} + +export function create_collection(docView: DocumentView, document_ids: string[], title: string): string { + const docs = document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id))); + const collection = Docs.Create.FreeformDocument(docs, { title }); + docView._props.addDocTab(collection, OpenWhere.addRight); //in future, create popup prompting user where to add + return 'Collection created in Dash called ' + title; +} + +export function create_link(docView: DocumentView, document_ids: string[]): string { + //Make document_ids a size 2 array + const docs = document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id))); + const linkDoc = DocUtils.MakeLink(docs[0], docs[1], {})!; + DocumentView.linkCommonAncestor(linkDoc)?.ComponentView?.addDocument?.(linkDoc); + return 'Link created between ' + docs[0].title + ' and ' + docs[1].title; +} diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts index cfda0d40e..7acb96c15 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/ChatBox/types.ts @@ -1,22 +1,78 @@ export enum ASSISTANT_ROLE { - USER = 'User', - ASSISTANT = 'Assistant', + USER = 'user', + ASSISTANT = 'assistant', } -export enum ANNOTATION_LINK_TYPE { - DASH_DOC = 'citation', - DOWNLOAD_FILE = 'file_path', -} - -export enum DOWNLOAD_TYPE { - DASH = 'dash', - DEVICE = 'device', +export enum CHUNK_TYPE { + TEXT = 'text', + IMAGE = 'image', } export interface AssistantMessage { role: ASSISTANT_ROLE; text: string; - quote?: string; - image?: string; - tool_logs?: string; + follow_up_questions?: string[]; + citations?: Citation[]; +} + +export interface Citation { + text: string; + type: CHUNK_TYPE; + span: [number, number]; + chunk_id: string; + direct_text?: string; +} + +export interface Chunk { + id: string; + values: number[]; + metadata: { + text: string; + type: CHUNK_TYPE; + original_document: string; + file_path: string; + location: string; + start_page: number; + end_page: number; + }; +} + +export interface AI_Document { + purpose: string; + file_name: string; + num_pages: number; + summary: string; + chunks: Chunk[]; + type: string; +} + +export function convertToAIDocument(json: any): AI_Document { + if (!json) { + throw new Error('Invalid JSON object'); + } + + const chunks: Chunk[] = json.chunks.map((chunk: any) => ({ + id: chunk.id, + values: chunk.values, + metadata: { + text: chunk.metadata.text, + type: chunk.metadata.type as CHUNK_TYPE, // Ensure type casting + original_document: chunk.metadata.original_document, + file_path: chunk.metadata.file_path, + location: chunk.metadata.location, + start_page: chunk.metadata.start_page, + end_page: chunk.metadata.end_page, + }, + })); + + const aiDocument: AI_Document = { + purpose: json.purpose, + file_name: json.file_name, + num_pages: json.num_pages, + summary: json.summary, + chunks: chunks, + type: json.type, + }; + + return aiDocument; } diff --git a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts new file mode 100644 index 000000000..d16e117b6 --- /dev/null +++ b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts @@ -0,0 +1,117 @@ +import { Pinecone, Index, IndexList, PineconeRecord, RecordMetadata, QueryResponse } from '@pinecone-database/pinecone'; +import { CohereClient } from 'cohere-ai'; +import { EmbedResponse } from 'cohere-ai/api'; +import dotenv from 'dotenv'; +import { Chunk, AI_Document } from '../types'; + +dotenv.config(); + +export class Vectorstore { + private pinecone: Pinecone; + private index!: Index; + private cohere: CohereClient; + private indexName: string = 'pdf-chatbot'; + private documents: AI_Document[] = []; + + constructor() { + const pineconeApiKey = process.env.PINECONE_API_KEY; + if (!pineconeApiKey) { + throw new Error('PINECONE_API_KEY is not defined.'); + } + + this.pinecone = new Pinecone({ + apiKey: pineconeApiKey, + }); + this.cohere = new CohereClient({ + token: process.env.COHERE_API_KEY, + }); + this.initializeIndex(); + } + + private async initializeIndex() { + const indexList: IndexList = await this.pinecone.listIndexes(); + + if (!indexList.indexes?.some(index => index.name === this.indexName)) { + await this.pinecone.createIndex({ + name: this.indexName, + dimension: 1024, + metric: 'cosine', + spec: { + serverless: { + cloud: 'aws', + region: 'us-east-1', + }, + }, + }); + } + + this.index = this.pinecone.Index(this.indexName); + } + + async addDocument(document: AI_Document) { + this.documents.push(document); + await this.indexDocument(document); + console.log(`Document added: ${document.file_name}`); + } + + private async indexDocument(document: AI_Document) { + console.log('Uploading vectors to content namespace...'); + const pineconeRecords: PineconeRecord[] = document.chunks.map( + chunk => + ({ + id: chunk.id, + values: chunk.values, + metadata: chunk.metadata as RecordMetadata, + }) as PineconeRecord + ); + await this.index.upsert(pineconeRecords); + } + + async retrieve(query: string, topK: number = 10): Promise { + console.log(`Retrieving chunks for query: ${query}`); + try { + const queryEmbeddingResponse: EmbedResponse = await this.cohere.embed({ + texts: [query], + model: 'embed-english-v3.0', + inputType: 'search_query', + }); + + let queryEmbedding: number[]; + + if (Array.isArray(queryEmbeddingResponse.embeddings)) { + queryEmbedding = queryEmbeddingResponse.embeddings[0]; + } else if (queryEmbeddingResponse.embeddings && 'embeddings' in queryEmbeddingResponse.embeddings) { + queryEmbedding = (queryEmbeddingResponse.embeddings as { embeddings: number[][] }).embeddings[0]; + } else { + throw new Error('Invalid embedding response format'); + } + + if (!Array.isArray(queryEmbedding)) { + throw new Error('Query embedding is not an array'); + } + + const queryResponse: QueryResponse = await this.index.query({ + vector: queryEmbedding, + topK, + includeValues: true, + includeMetadata: true, + }); + + return queryResponse.matches.map( + match => + ({ + id: match.id, + values: match.values as number[], + metadata: match.metadata as { text: string; type: string; original_document: string; file_path: string; location: string; start_page: number; end_page: number }, + }) as Chunk + ); + } catch (error) { + console.error(`Error retrieving chunks: ${error}`); + return []; + } + } + + getSummaries(): string { + return this.documents.map((doc, index) => `${index + 1}) ${doc.summary}`).join('\n') + '\n'; + } +} diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 77d8af724..a35708ccd 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -160,10 +160,18 @@ export default class AssistantManager extends ApiManager { // Read file data and convert to base64 const file_data = fs.readFileSync(public_path, { encoding: 'base64' }); - const response = await axios.post('http://localhost:8080/createDocument', { - file_data, - file_name, - }); + const response = await axios.post( + 'http://localhost:8080/createDocument', + { + file_data, + file_name, + }, + { + headers: { + 'Content-Type': 'application/json', + }, + } + ); res.send({ document: response.data }); } catch (error: any) { diff --git a/src/server/RouteManager.ts b/src/server/RouteManager.ts index d8e0455f6..22e608868 100644 --- a/src/server/RouteManager.ts +++ b/src/server/RouteManager.ts @@ -8,6 +8,7 @@ import { DashUserModel } from './authentication/DashUserModel'; export enum Method { GET, POST, + PUT, } export interface CoreArguments { @@ -208,6 +209,9 @@ export default class RouteManager { case Method.POST: this.server.post(route, supervised); break; + case Method.PUT: + this.server.put(route, supervised); + break; default: } } -- cgit v1.2.3-70-g09d2 From d0e09ff3526e4f6b9aad824fad1020d083a87631 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Wed, 10 Jul 2024 15:13:46 -0400 Subject: document upload works --- src/client/views/nodes/ChatBox/ChatBox.tsx | 11 +++++------ src/server/ApiManagers/AssistantManager.ts | 14 +++++++++++++- 2 files changed, 18 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 9f4e6f07e..2283aad56 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -53,7 +53,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.openai = this.initializeOpenAI(); this.history = [{ role: ASSISTANT_ROLE.ASSISTANT, text: 'Welcome to the Document Analyser Assistant! Link a document or ask questions to get started.' }]; this.openai = this.initializeOpenAI(); - this.getLinkedDocs(); + this.getOtherDocs(); this.vectorstore = new Vectorstore(); reaction( @@ -64,13 +64,12 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { ); } - getLinkedDocs = async () => { - const visual_docs = (CollectionFreeFormDocumentView.from(this._props.DocumentView?.())?._props.parent as CollectionFreeFormView)?.childDocs.filter(doc => doc != this.Document); - console.log('All Docs:', visual_docs); + getOtherDocs = async () => { + const visible_docs = (CollectionFreeFormDocumentView.from(this._props.DocumentView?.())?._props.parent as CollectionFreeFormView)?.childDocs.filter(doc => doc != this.Document); + console.log('All Docs:', visible_docs); - visual_docs?.forEach(async doc => { + visible_docs?.forEach(async doc => { const local_file_path: string = CsvCast(doc.data, PDFCast(doc.data)).url?.pathname; - if (local_file_path) { const { document_json } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); const ai_document: AI_Document = convertToAIDocument(document_json); diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index a35708ccd..8a5f12c2b 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -173,7 +173,19 @@ export default class AssistantManager extends ApiManager { } ); - res.send({ document: response.data }); + const jobId = response.data.job_id; + + // Poll for results + let result; + while (!result) { + await new Promise(resolve => setTimeout(resolve, 5000)); // Wait for 1 second + const resultResponse = await axios.get(`http://localhost:8080/getResult/${jobId}`); + if (resultResponse.status === 200) { + result = resultResponse.data; + } + } + + res.send({ document_json: result }); } catch (error: any) { console.error('Error communicating with chatbot:', error); res.status(500).send({ error: 'Failed to communicate with the chatbot', details: error.message }); -- cgit v1.2.3-70-g09d2 From cab0311e2fd9a6379628c000d11ddcd805e01f64 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Wed, 10 Jul 2024 16:16:26 -0400 Subject: first attempt at integrating everything --- src/client/views/nodes/ChatBox/Agent.ts | 123 +++++++++++++++++++++ src/client/views/nodes/ChatBox/ChatBot.ts | 14 +++ src/client/views/nodes/ChatBox/ChatBox.tsx | 96 ++++++++++------ src/client/views/nodes/ChatBox/prompts.ts | 99 +++++++++++++++++ src/client/views/nodes/ChatBox/tools/BaseTool.ts | 24 ++++ .../views/nodes/ChatBox/tools/CalculateTool.ts | 25 +++++ src/client/views/nodes/ChatBox/tools/RAGTool.ts | 81 ++++++++++++++ .../views/nodes/ChatBox/tools/WikipediaTool.ts | 33 ++++++ src/client/views/nodes/ChatBox/types.ts | 15 +++ 9 files changed, 475 insertions(+), 35 deletions(-) create mode 100644 src/client/views/nodes/ChatBox/Agent.ts create mode 100644 src/client/views/nodes/ChatBox/ChatBot.ts create mode 100644 src/client/views/nodes/ChatBox/prompts.ts create mode 100644 src/client/views/nodes/ChatBox/tools/BaseTool.ts create mode 100644 src/client/views/nodes/ChatBox/tools/CalculateTool.ts create mode 100644 src/client/views/nodes/ChatBox/tools/RAGTool.ts create mode 100644 src/client/views/nodes/ChatBox/tools/WikipediaTool.ts (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts new file mode 100644 index 000000000..f20a75a8d --- /dev/null +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -0,0 +1,123 @@ +import OpenAI from 'openai'; +import { Tool, AgentMessage } from './types'; +import { getReactPrompt } from './prompts'; +import { XMLParser, XMLBuilder } from 'fast-xml-parser'; + +export class Agent { + private client: OpenAI; + private tools: Record; + private messages: AgentMessage[] = []; + private interMessages: AgentMessage[] = []; + private summaries: string; + + constructor(private vectorstore: Vectorstore) { + this.client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }); + this.summaries = this.vectorstore ? this.vectorstore.getSummaries() : 'No documents available.'; + this.tools = { + wikipedia: new WikipediaTool(), + calculate: new CalculateTool(), + rag: new RAGTool(vectorstore, this.summaries), + }; + } + + private formatChatHistory(): string { + let history = '\n'; + for (const message of this.messages) { + if (message.role === 'user') { + history += `${message.content}\n`; + } else if (message.role === 'assistant') { + history += `${message.content}\n`; + } + } + history += ''; + return history; + } + + async askAgent(question: string, maxTurns: number = 5): Promise { + console.log(`Starting query: ${question}`); + this.messages.push({ role: 'user', content: question }); + const chatHistory = this.formatChatHistory(); + console.log(`Chat history: ${chatHistory}`); + const systemPrompt = getReactPrompt(Object.values(this.tools), chatHistory); + console.log(`System prompt: ${systemPrompt}`); + this.interMessages = [{ role: 'system', content: systemPrompt }]; + + this.interMessages.push({ role: 'assistant', content: `${question}` }); + + for (let i = 0; i < maxTurns; i++) { + console.log(`Turn ${i + 1}/${maxTurns}`); + + const result = await this.execute(); + console.log(`Bot response: ${result}`); + this.interMessages.push({ role: 'assistant', content: result }); + + try { + const parser = new XMLParser(); + const parsedResult = parser.parse(result); + const step = parsedResult[`step${i + 1}`]; + + if (step.thought) console.log(`Thought: ${step.thought}`); + if (step.action) { + console.log(`Action: ${step.action}`); + const action = step.action; + const actionRules = new XMLBuilder().build({ + action_rules: this.tools[action].getActionRule(), + }); + this.interMessages.push({ role: 'user', content: actionRules }); + } + if (step.action_input) { + const actionInput = new XMLBuilder().build({ action_input: step.action_input }); + console.log(`Action input: ${actionInput}`); + try { + const observation = await this.processAction(action, step.action_input); + const nextPrompt = [{ type: 'text', text: '' }, ...observation, { type: 'text', text: '' }]; + this.interMessages.push({ role: 'user', content: nextPrompt }); + } catch (e) { + console.error(`Error processing action: ${e}`); + return `${e}`; + } + } + if (step.answer) { + console.log('Answer found. Ending query.'); + const answerContent = new XMLBuilder().build({ answer: step.answer }); + this.messages.push({ role: 'assistant', content: answerContent }); + this.interMessages = []; + return answerContent; + } + } catch (e) { + console.error('Error: Invalid XML response from bot'); + return 'Invalid response format.'; + } + } + + console.log('Reached maximum turns. Ending query.'); + return 'Reached maximum turns without finding an answer'; + } + + private async execute(): Promise { + const completion = await this.client.chat.completions.create({ + model: 'gpt-4', + messages: this.interMessages, + temperature: 0, + }); + return completion.choices[0].message.content; + } + + private async processAction(action: string, actionInput: any): Promise { + if (!(action in this.tools)) { + throw new Error(`Unknown action: ${action}`); + } + + const tool = this.tools[action]; + const args: Record = {}; + for (const paramName in tool.parameters) { + if (actionInput[paramName] !== undefined) { + args[paramName] = actionInput[paramName]; + } else { + throw new Error(`Missing required parameter '${paramName}' for action '${action}'`); + } + } + + return await tool.execute(args); + } +} diff --git a/src/client/views/nodes/ChatBox/ChatBot.ts b/src/client/views/nodes/ChatBox/ChatBot.ts new file mode 100644 index 000000000..31b4ea9e3 --- /dev/null +++ b/src/client/views/nodes/ChatBox/ChatBot.ts @@ -0,0 +1,14 @@ +import { Agent } from './Agent'; +import { Vectorstore } from './vectorstore/VectorstoreUpload'; + +export class ChatBot { + private agent: Agent; + + constructor(vectorstore: Vectorstore) { + this.agent = new Agent(vectorstore); + } + + async ask(question: string): Promise { + return await this.agent.askAgent(question); + } +} diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 2283aad56..73f35f501 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -27,6 +27,7 @@ import { Vectorstore } from './vectorstore/VectorstoreUpload'; import { DocumentView } from '../DocumentView'; import { CollectionFreeFormDocumentView } from '../CollectionFreeFormDocumentView'; import { CollectionFreeFormView } from '../../collections/collectionFreeForm'; +import { ChatBot } from './ChatBot'; @observer export class ChatBox extends ViewBoxAnnotatableComponent() { @@ -42,6 +43,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { private documents: AI_Document[] = []; private _oldWheel: any; private vectorstore: Vectorstore; + private chatbot: ChatBot; // Add the ChatBot instance public static LayoutString(fieldKey: string) { return FieldView.LayoutString(ChatBox, fieldKey); @@ -55,6 +57,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.openai = this.initializeOpenAI(); this.getOtherDocs(); this.vectorstore = new Vectorstore(); + this.chatbot = new ChatBot(this.vectorstore); // Initialize the ChatBot reaction( () => this.history.map((msg: AssistantMessage) => ({ role: msg.role, text: msg.text, follow_up_questions: msg.follow_up_questions, citations: msg.citations })), @@ -65,7 +68,11 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } getOtherDocs = async () => { - const visible_docs = (CollectionFreeFormDocumentView.from(this._props.DocumentView?.())?._props.parent as CollectionFreeFormView)?.childDocs.filter(doc => doc != this.Document); + const visible_docs = (CollectionFreeFormDocumentView.from(this._props.DocumentView?.())?._props.parent as CollectionFreeFormView)?.childDocs + .filter(doc => doc != this.Document) + .map(d => DocCast(d?.annotationOn, d)) + .filter(d => d); + console.log('All Docs:', visible_docs); visible_docs?.forEach(async doc => { @@ -121,17 +128,39 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { runInAction(() => { this.history.push({ role: ASSISTANT_ROLE.USER, text: trimmedText }); }); - const { response } = await Networking.PostToServer('/askAgent', { input: trimmedText }); + this.isLoading = true; + const response = await this.chatbot.ask(trimmedText); // Use the chatbot to get the response runInAction(() => { - this.history.push({ role: ASSISTANT_ROLE.ASSISTANT, text: response }); + this.history.push(this.parseAssistantResponse(response)); }); this.dataDoc.data = JSON.stringify(this.history); } catch (err) { console.error('Error:', err); + runInAction(() => { + this.history.push({ role: ASSISTANT_ROLE.ASSISTANT, text: 'Sorry, I encountered an error while processing your request.' }); + }); + } finally { + this.isLoading = false; } } }; + parseAssistantResponse(response: string): AssistantMessage { + const parser = new DOMParser(); + const xmlDoc = parser.parseFromString(response, 'text/xml'); + const answerElement = xmlDoc.querySelector('answer'); + const followUpQuestionsElement = xmlDoc.querySelector('follow_up_questions'); + + const text = answerElement ? answerElement.textContent || '' : ''; + const followUpQuestions = followUpQuestionsElement ? Array.from(followUpQuestionsElement.querySelectorAll('question')).map(q => q.textContent || '') : []; + + return { + role: ASSISTANT_ROLE.ASSISTANT, + text, + follow_up_questions: followUpQuestions, + }; + } + // @action // uploadLinks = async (linkedDocs: Doc[]) => { // if (this.isInitializing) { @@ -241,42 +270,39 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { r?.addEventListener('wheel', this.onPassiveWheel, { passive: false }); }}>
- { - //this.history.map((message, index) => ( - // {}} - // setCurrentFile={this.setCurrentFile} - // onFollowUpClick={this.handleFollowUpClick} - // /> - //) - //) - } - { - //!this.current_message ? null : ( - // {}} - // setCurrentFile={this.setCurrentFile} - // onFollowUpClick={this.handleFollowUpClick} - // /> - //) - } + {this.history.map((message, index) => ( + {}} // Implement this method if needed + goToLinkedDoc={() => {}} // Implement this method if needed + setCurrentFile={() => {}} // Implement this method if needed + onFollowUpClick={this.handleFollowUpClick} + /> + ))} + {this.current_message && ( + {}} // Implement this method if needed + goToLinkedDoc={() => {}} // Implement this method if needed + setCurrentFile={() => {}} // Implement this method if needed + onFollowUpClick={this.handleFollowUpClick} + /> + )}
(this.inputValue = e.target.value)} /> - +
/** **/ diff --git a/src/client/views/nodes/ChatBox/prompts.ts b/src/client/views/nodes/ChatBox/prompts.ts new file mode 100644 index 000000000..8835265e4 --- /dev/null +++ b/src/client/views/nodes/ChatBox/prompts.ts @@ -0,0 +1,99 @@ +// prompts.ts + +import { Tool } from './types'; + +export function getReactPrompt(tools: Tool[], chatHistory: string): string { + const toolDescriptions = tools.map(tool => `${tool.name}:\n${tool.briefSummary}`).join('\n*****\n'); + + return ` + You run in a loop of Thought, Action, PAUSE, Action Input, Pause, Observation. + (this Thought/Action/PAUSE/Action Input/PAUSE/Observation can repeat N times) + Contain each stage of the loop within an XML element that specifies the stage type (e.g. content of the thought). + At the end of the loop, you output an Answer with the answer content contained within an XML element with an tag. At the end of the answer should be an array of 3 potential follow-up questions for the user to ask you next, contained within a key. + Use to describe your thoughts about the question you have been asked. + Use to specify run one of the actions available to you - then return a element. + Then, you will be provided with action rules within an element that specifies how you should structure the input to the action and what the output of that action will look like - then return another element. + Then, provide within an element each parameter, with parameter names as element tags themselves with their values inside, following the structure defined in the action rules. + Observation, in an element will be the result of running those actions. + ********** + Your available actions are: + ***** + ${toolDescriptions} + ********** + Example: + You will be called with: + What is the capital of France? + + You will then output: + + I should look up France on Wikipedia + wikipedia + + + + You will be called again with this: + + { + "wikipedia": { + "name": "wikipedia", + "description": "Search Wikipedia and return a summary", + "parameters": [ + { + "title": { + "type": "string", + "description": "The title of the Wikipedia article to search", + "required": "true" + } + } + ] + } + } + + + You will then output (back in valid XML with the parameters each being a tag): + + + France + + + + You will then be called again with this: + France is a country. The capital is Paris. + + You then output: + + + The capital of France is Paris + + Where in France is Paris located? + What are some major tourist attractions in Paris? + What are some other major cities in France? + + + + ********** + Here is the history of your conversation with the user (all loop steps are ommitted, so it is just the user query and final answer): + ${chatHistory} + Use context from the past conversation if necessary. + ********** + If the response is inadequate, repeat the loop, either trying a different tool or changing the parameters for the action input. + + !!!IMPORTANT When you have an Answer, Write your entire response inside an element (which itself should be inside the step element for the current step). After you finish the answer, provide an array of 3 follow-up questions inside a array. These should relate to the query and the response and should aim to help the user better understand whatever they are looking for. + ********** + !!!IMPORTANT Every response, provide in full parsable and valid XML with the root element being the step number (e.g. ), iterated every time you output something new. + `; +} + +export function getSummarizedChunksPrompt(chunks: string): string { + return `Please provide a comprehensive summary of what you think the document from which these chunks originated. + Ensure the summary captures the main ideas and key points from all provided chunks. Be concise and brief and only provide the summary in paragraph form. + + Text chunks: + \`\`\` + ${chunks} + \`\`\``; +} + +export function getSummarizedSystemPrompt(): string { + return 'You are an AI assistant tasked with summarizing a document. You are provided with important chunks from the document and provide a summary, as best you can, of what the document will contain overall. Be concise and brief with your response.'; +} diff --git a/src/client/views/nodes/ChatBox/tools/BaseTool.ts b/src/client/views/nodes/ChatBox/tools/BaseTool.ts new file mode 100644 index 000000000..3511d9528 --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools/BaseTool.ts @@ -0,0 +1,24 @@ +import { Tool } from '../types'; + +export abstract class BaseTool implements Tool { + constructor( + public name: string, + public description: string, + public parameters: Record, + public useRules: string, + public briefSummary: string + ) {} + + abstract execute(args: Record): Promise; + + getActionRule(): Record { + return { + [this.name]: { + name: this.name, + useRules: this.useRules, + description: this.description, + parameters: this.parameters, + }, + }; + } +} diff --git a/src/client/views/nodes/ChatBox/tools/CalculateTool.ts b/src/client/views/nodes/ChatBox/tools/CalculateTool.ts new file mode 100644 index 000000000..b881d90fa --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools/CalculateTool.ts @@ -0,0 +1,25 @@ +import { BaseTool } from './BaseTool'; + +export class CalculateTool extends BaseTool { + constructor() { + super( + 'calculate', + 'Perform a calculation', + { + expression: { + type: 'string', + description: 'The mathematical expression to evaluate', + required: 'true', + }, + }, + 'Provide a mathematical expression to calculate that would work with JavaScript eval().', + 'Runs a calculation and returns the number - uses JavaScript so be sure to use floating point syntax if necessary' + ); + } + + async execute(args: { expression: string }): Promise { + // Note: Using eval() can be dangerous. Consider using a safer alternative. + const result = eval(args.expression); + return [{ type: 'text', text: result.toString() }]; + } +} diff --git a/src/client/views/nodes/ChatBox/tools/RAGTool.ts b/src/client/views/nodes/ChatBox/tools/RAGTool.ts new file mode 100644 index 000000000..84d5430e7 --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools/RAGTool.ts @@ -0,0 +1,81 @@ +import { BaseTool } from './BaseTool'; +import { Vectorstore } from '../vectorstore/VectorstoreUpload'; +import { Chunk } from '../types'; + +export class RAGTool extends BaseTool { + constructor( + private vectorstore: Vectorstore, + summaries: string + ) { + super( + 'rag', + 'Perform a RAG search on user documents', + { + hypothetical_document_chunk: { + type: 'string', + description: + "Detailed version of the prompt that is effectively a hypothetical document chunk that would be ideal to embed and compare to the vectors of real document chunks to fetch the most relevant document chunks to answer the user's query", + required: 'true', + }, + }, + `Your task is to first provide a response to the user's prompt based on the information given in the chunks and considering the chat history. Follow these steps: + + 1. Carefully read and analyze the provided chunks, which may include text, images, or tables. Each chunk has an associated chunk_id. + + 2. Review the prompt and chat history to understand the context of the user's question or request. + + 3. Formulate a response that addresses the prompt using information from the relevant chunks. Your response should be informative and directly answer the user's question or request. + + 4. Use citations to support your response. Citations should contain direct textual references to the granular, specific part of the original chunk that applies to the situation—with no text ommitted. Citations should be in the following format: + - For text: relevant direct text from the chunk that the citation in referencing specifically + - For images or tables: + + Place citations after the sentences they apply to. You can use multiple citations in a row. + + 5. If there's insufficient information in the provided chunks to answer the prompt sufficiently, ALWAYS respond with RAG not applicable + + Write your entire response, including follow-up questions, inside tags. Remember to use the citation format for both text and image references, and maintain a conversational tone throughout your response. + + !!!IMPORTANT Before you close the tag with , within the answer tags provide a set of 3 follow-up questions inside a tag and individually within tags. These should relate to the document, the current query, and the chat_history and should aim to help the user better understand whatever they are looking for. + Also, ensure that the answer tags are wrapped with the correct step tags as well.`, + `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a + set of document chunks (either images or text) that can be used to provide a grounded response based on + user documents + + !!!IMPORTANT Use the RAG tool ANYTIME the question may potentially (even if you are not sure) relate to one of the user's documents. + Here are the summaries of the user's documents: + ${summaries}` + ); + } + + async execute(args: { hypothetical_document_chunk: string }): Promise { + const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk); + return this.getFormattedChunks(relevantChunks); + } + + private getFormattedChunks(relevantChunks: Chunk[]): { type: string; text?: string; image_url?: { url: string } }[] { + const content: { type: string; text?: string; image_url?: { url: string } }[] = [{ type: 'text', text: '' }]; + + for (const chunk of relevantChunks) { + content.push({ + type: 'text', + text: ``, + }); + + if (chunk.metadata.type === 'image') { + // Implement image loading and base64 encoding here + // For now, we'll just add a placeholder + content.push({ + type: 'image_url', + image_url: { url: chunk.metadata.file_path }, + }); + } + + content.push({ type: 'text', text: `${chunk.metadata.text}\n\n` }); + } + + content.push({ type: 'text', text: '' }); + + return content; + } +} diff --git a/src/client/views/nodes/ChatBox/tools/WikipediaTool.ts b/src/client/views/nodes/ChatBox/tools/WikipediaTool.ts new file mode 100644 index 000000000..0aef58f61 --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools/WikipediaTool.ts @@ -0,0 +1,33 @@ +import { BaseTool } from './BaseTool'; +import axios from 'axios'; + +export class WikipediaTool extends BaseTool { + constructor() { + super( + 'wikipedia', + 'Search Wikipedia and return a summary', + { + title: { + type: 'string', + description: 'The title of the Wikipedia article to search', + required: 'true', + }, + }, + 'Provide simply the title you want to search on Wikipedia and nothing more. If re-using this tool, try a different title for different information.', + 'Returns a summary from searching an article title on Wikipedia' + ); + } + + async execute(args: { title: string }): Promise { + const response = await axios.get('https://en.wikipedia.org/w/api.php', { + params: { + action: 'query', + list: 'search', + srsearch: args.title, + format: 'json', + }, + }); + const result = response.data.query.search[0].snippet; + return [{ type: 'text', text: result }]; + } +} diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts index 7acb96c15..c60973be3 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/ChatBox/types.ts @@ -46,6 +46,21 @@ export interface AI_Document { type: string; } +export interface Tool { + name: string; + description: string; + parameters: Record; + useRules: string; + briefSummary: string; + execute: (args: Record) => Promise; + getActionRule: () => Record; +} + +export interface AgentMessage { + role: 'system' | 'user' | 'assistant'; + content: string | { type: string; text?: string; image_url?: { url: string } }[]; +} + export function convertToAIDocument(json: any): AI_Document { if (!json) { throw new Error('Invalid JSON object'); -- cgit v1.2.3-70-g09d2 From aa8b1248408846d6a158f8df1c76fa3015ce3aac Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Wed, 10 Jul 2024 16:35:11 -0400 Subject: Fixing bugs and attempting to get it to work --- src/client/views/nodes/ChatBox/Agent.ts | 16 ++- src/client/views/nodes/ChatBox/ChatBot.ts | 2 + src/client/views/nodes/ChatBox/ChatBox.tsx | 51 ++++----- .../views/nodes/ChatBox/MessageComponent.tsx | 127 ++++++++------------- src/client/views/nodes/ChatBox/tools/BaseTool.ts | 4 +- .../views/nodes/ChatBox/tools/CalculateTool.ts | 2 +- src/client/views/nodes/ChatBox/tools/RAGTool.ts | 29 +++-- .../views/nodes/ChatBox/tools/WikipediaTool.ts | 2 +- src/client/views/nodes/ChatBox/types.ts | 7 +- .../nodes/ChatBox/vectorstore/VectorstoreUpload.ts | 1 + 10 files changed, 107 insertions(+), 134 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index f20a75a8d..4c2838540 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -2,10 +2,17 @@ import OpenAI from 'openai'; import { Tool, AgentMessage } from './types'; import { getReactPrompt } from './prompts'; import { XMLParser, XMLBuilder } from 'fast-xml-parser'; +import { WikipediaTool } from './tools/WikipediaTool'; +import { CalculateTool } from './tools/CalculateTool'; +import { RAGTool } from './tools/RAGTool'; +import { Vectorstore } from './vectorstore/VectorstoreUpload'; +import { ChatCompletionAssistantMessageParam, ChatCompletionMessageParam } from 'openai/resources'; +import dotenv from 'dotenv'; +dotenv.config(); export class Agent { private client: OpenAI; - private tools: Record; + private tools: Record>; private messages: AgentMessage[] = []; private interMessages: AgentMessage[] = []; private summaries: string; @@ -69,7 +76,7 @@ export class Agent { const actionInput = new XMLBuilder().build({ action_input: step.action_input }); console.log(`Action input: ${actionInput}`); try { - const observation = await this.processAction(action, step.action_input); + const observation = await this.processAction(step.action, step.action_input); const nextPrompt = [{ type: 'text', text: '' }, ...observation, { type: 'text', text: '' }]; this.interMessages.push({ role: 'user', content: nextPrompt }); } catch (e) { @@ -97,10 +104,11 @@ export class Agent { private async execute(): Promise { const completion = await this.client.chat.completions.create({ model: 'gpt-4', - messages: this.interMessages, + messages: this.interMessages as ChatCompletionMessageParam[], temperature: 0, }); - return completion.choices[0].message.content; + if (completion.choices[0].message.content) return completion.choices[0].message.content; + else throw new Error('No completion content found'); } private async processAction(action: string, actionInput: any): Promise { diff --git a/src/client/views/nodes/ChatBox/ChatBot.ts b/src/client/views/nodes/ChatBox/ChatBot.ts index 31b4ea9e3..8b5e0982c 100644 --- a/src/client/views/nodes/ChatBox/ChatBot.ts +++ b/src/client/views/nodes/ChatBox/ChatBot.ts @@ -1,5 +1,7 @@ import { Agent } from './Agent'; import { Vectorstore } from './vectorstore/VectorstoreUpload'; +import dotenv from 'dotenv'; +dotenv.config(); export class ChatBot { private agent: Agent; diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 73f35f501..3ecb2d340 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -1,33 +1,24 @@ -import { MathJaxContext } from 'better-react-mathjax'; import { action, makeObservable, observable, observe, reaction, runInAction } from 'mobx'; import { observer } from 'mobx-react'; import OpenAI, { ClientOptions } from 'openai'; -import { ImageFile, Message } from 'openai/resources/beta/threads/messages'; -import { RunStep } from 'openai/resources/beta/threads/runs/steps'; import * as React from 'react'; import { Doc } from '../../../../fields/Doc'; -import { Id } from '../../../../fields/FieldSymbols'; import { CsvCast, DocCast, PDFCast, StrCast } from '../../../../fields/Types'; -import { CsvField } from '../../../../fields/URLField'; import { Networking } from '../../../Network'; -import { DocUtils } from '../../../documents/DocUtils'; import { DocumentType } from '../../../documents/DocumentTypes'; import { Docs } from '../../../documents/Documents'; -import { DocumentManager } from '../../../util/DocumentManager'; import { LinkManager } from '../../../util/LinkManager'; import { ViewBoxAnnotatableComponent } from '../../DocComponent'; import { FieldView, FieldViewProps } from '../FieldView'; import './ChatBox.scss'; import MessageComponent from './MessageComponent'; -import { ASSISTANT_ROLE, AssistantMessage, AI_Document, convertToAIDocument } from './types'; -import { Annotation } from 'mobx/dist/internal'; -import { FormEvent } from 'react'; -import { url } from 'inspector'; +import { ASSISTANT_ROLE, AssistantMessage, AI_Document, convertToAIDocument, Citation } from './types'; import { Vectorstore } from './vectorstore/VectorstoreUpload'; -import { DocumentView } from '../DocumentView'; import { CollectionFreeFormDocumentView } from '../CollectionFreeFormDocumentView'; import { CollectionFreeFormView } from '../../collections/collectionFreeForm'; import { ChatBot } from './ChatBot'; +import dotenv from 'dotenv'; +dotenv.config(); @observer export class ChatBox extends ViewBoxAnnotatableComponent() { @@ -151,7 +142,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { const answerElement = xmlDoc.querySelector('answer'); const followUpQuestionsElement = xmlDoc.querySelector('follow_up_questions'); - const text = answerElement ? answerElement.textContent || '' : ''; + const text = answerElement ? answerElement.innerHTML || '' : ''; // Use innerHTML to preserve citation tags const followUpQuestions = followUpQuestionsElement ? Array.from(followUpQuestionsElement.querySelectorAll('question')).map(q => q.textContent || '') : []; return { @@ -161,6 +152,19 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }; } + @action + updateMessageCitations = (index: number, citations: Citation[]) => { + if (this.history[index]) { + this.history[index].citations = citations; + } + }; + + @action + handleCitationClick = (citation: Citation) => { + console.log('Citation clicked:', citation); + // You can implement additional functionality here, such as showing a modal with the full citation content + }; + // @action // uploadLinks = async (linkedDocs: Doc[]) => { // if (this.isInitializing) { @@ -259,7 +263,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }; render() { return ( - /** **/
{this.isInitializing &&
Initializing...
}
() { }}>
{this.history.map((message, index) => ( - {}} // Implement this method if needed - goToLinkedDoc={() => {}} // Implement this method if needed - setCurrentFile={() => {}} // Implement this method if needed - onFollowUpClick={this.handleFollowUpClick} - /> + ))} {this.current_message && ( {}} // Implement this method if needed - goToLinkedDoc={() => {}} // Implement this method if needed - setCurrentFile={() => {}} // Implement this method if needed onFollowUpClick={this.handleFollowUpClick} + onCitationClick={this.handleCitationClick} + updateMessageCitations={this.updateMessageCitations} /> )}
@@ -305,7 +295,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() {
- /** **/ ); } } diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx index 15c0811fb..1baf6d7d5 100644 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ b/src/client/views/nodes/ChatBox/MessageComponent.tsx @@ -1,110 +1,73 @@ -/* eslint-disable react/require-default-props */ import React from 'react'; import { observer } from 'mobx-react'; -import { MathJax, MathJaxContext } from 'better-react-mathjax'; -import ReactMarkdown from 'react-markdown'; -import { AssistantMessage } from './types'; +import { AssistantMessage, CHUNK_TYPE, Citation } from './types'; import { TbInfoCircleFilled } from 'react-icons/tb'; interface MessageComponentProps { message: AssistantMessage; - toggleToolLogs: (index: number) => void; - expandedLogIndex: number | null; index: number; - showModal: () => void; - goToLinkedDoc: (url: string) => void; - setCurrentFile: (file: { url: string }) => void; - onFollowUpClick: (question: string) => void; // New prop - isCurrent?: boolean; + onFollowUpClick: (question: string) => void; + onCitationClick: (citation: Citation) => void; + updateMessageCitations: (index: number, citations: Citation[]) => void; } -const MessageComponent: React.FC = function ({ - message, - toggleToolLogs, - expandedLogIndex, - goToLinkedDoc, - index, - showModal, - setCurrentFile, - onFollowUpClick, // New prop - isCurrent = false, -}) { - const LinkRenderer = ({ href, children }: { href: string; children: React.ReactNode }) => { - const regex = /([a-zA-Z0-9_.!-]+)~~~(citation|file_path)/; - const matches = href.match(regex); - const url = matches ? matches[1] : href; - const linkType = matches ? matches[2] : null; - if (linkType === 'citation') { - children = ; - } - const style = { - color: 'lightblue', - verticalAlign: linkType === 'citation' ? 'super' : 'baseline', - fontSize: linkType === 'citation' ? 'smaller' : 'inherit', - }; +const MessageComponent: React.FC = function ({ message, index, onFollowUpClick, onCitationClick, updateMessageCitations }) { + const LinkRenderer = ({ children }: { children: React.ReactNode }) => { + const text = children as string; + const citationRegex = /([^<]*)<\/citation>/g; + const parts = []; + let lastIndex = 0; + let match; + const citations: Citation[] = []; - return ( - { - e.preventDefault(); - if (linkType === 'citation') { - goToLinkedDoc(url); - } else if (linkType === 'file_path') { - showModal(); - setCurrentFile({ url }); - } - }} - style={style}> - {children} - - ); - }; + while ((match = citationRegex.exec(text)) !== null) { + const [fullMatch, chunkId, type, content] = match; + const citation: Citation = { chunk_id: chunkId, type: type as CHUNK_TYPE, text: content }; + citations.push(citation); + + parts.push(text.slice(lastIndex, match.index)); + parts.push( + { + e.preventDefault(); + onCitationClick(citation); + }} + style={{ + color: 'lightblue', + verticalAlign: 'super', + fontSize: 'smaller', + }}> + + + ); + lastIndex = match.index + fullMatch.length; + } - const parseMessage = (text: string) => { - const answerMatch = text.match(/([\s\S]*?)<\/answer>/); - const followUpMatch = text.match(/([\s\S]*?)<\/follow_up_question>/); + parts.push(text.slice(lastIndex)); - const answer = answerMatch ? answerMatch[1] : text; - const followUpQuestions = followUpMatch - ? followUpMatch[1] - .split('\n') - .filter(q => q.trim()) - .map(q => q.replace(/^\d+\.\s*/, '').trim()) - : []; + // Update the message's citations in the ChatBox's history + updateMessageCitations(index, citations); - return { answer, followUpQuestions }; + return <>{parts}; }; - const { answer, followUpQuestions } = parseMessage(message.text); - console.log('Parsed answer:', answer); - console.log('Parsed follow-up questions:', followUpQuestions); return (
- {answer} - {message.image && } - {followUpQuestions.length > 0 && ( +
+ {message.text} +
+ {message.follow_up_questions && message.follow_up_questions.length > 0 && (

Follow-up Questions:

- {followUpQuestions.map((question, idx) => ( + {message.follow_up_questions.map((question, idx) => ( ))}
)} -
- {message.tool_logs && ( - - )} - {expandedLogIndex === index && ( -
-
{message.tool_logs}
-
- )} -
); }; diff --git a/src/client/views/nodes/ChatBox/tools/BaseTool.ts b/src/client/views/nodes/ChatBox/tools/BaseTool.ts index 3511d9528..903161bd5 100644 --- a/src/client/views/nodes/ChatBox/tools/BaseTool.ts +++ b/src/client/views/nodes/ChatBox/tools/BaseTool.ts @@ -1,6 +1,6 @@ import { Tool } from '../types'; -export abstract class BaseTool implements Tool { +export abstract class BaseTool = Record> implements Tool { constructor( public name: string, public description: string, @@ -9,7 +9,7 @@ export abstract class BaseTool implements Tool { public briefSummary: string ) {} - abstract execute(args: Record): Promise; + abstract execute(args: T): Promise; getActionRule(): Record { return { diff --git a/src/client/views/nodes/ChatBox/tools/CalculateTool.ts b/src/client/views/nodes/ChatBox/tools/CalculateTool.ts index b881d90fa..818332c44 100644 --- a/src/client/views/nodes/ChatBox/tools/CalculateTool.ts +++ b/src/client/views/nodes/ChatBox/tools/CalculateTool.ts @@ -1,6 +1,6 @@ import { BaseTool } from './BaseTool'; -export class CalculateTool extends BaseTool { +export class CalculateTool extends BaseTool<{ expression: string }> { constructor() { super( 'calculate', diff --git a/src/client/views/nodes/ChatBox/tools/RAGTool.ts b/src/client/views/nodes/ChatBox/tools/RAGTool.ts index 84d5430e7..185efa0ba 100644 --- a/src/client/views/nodes/ChatBox/tools/RAGTool.ts +++ b/src/client/views/nodes/ChatBox/tools/RAGTool.ts @@ -1,8 +1,9 @@ import { BaseTool } from './BaseTool'; import { Vectorstore } from '../vectorstore/VectorstoreUpload'; import { Chunk } from '../types'; +import * as fs from 'fs'; -export class RAGTool extends BaseTool { +export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { constructor( private vectorstore: Vectorstore, summaries: string @@ -59,16 +60,26 @@ export class RAGTool extends BaseTool { for (const chunk of relevantChunks) { content.push({ type: 'text', - text: ``, + text: ``, }); - if (chunk.metadata.type === 'image') { - // Implement image loading and base64 encoding here - // For now, we'll just add a placeholder - content.push({ - type: 'image_url', - image_url: { url: chunk.metadata.file_path }, - }); + if (chunk.metadata.type === 'image' || chunk.metadata.type === 'table') { + try { + const imageBuffer = fs.readFileSync(chunk.metadata.file_path); + const base64Image = imageBuffer.toString('base64'); + if (base64Image) { + content.push({ + type: 'image_url', + image_url: { + url: `data:image/jpeg;base64,${base64Image}`, + }, + }); + } else { + console.log(`Failed to encode image for chunk ${chunk.id}`); + } + } catch (error) { + console.error(`Error reading image file for chunk ${chunk.id}:`, error); + } } content.push({ type: 'text', text: `${chunk.metadata.text}\n\n` }); diff --git a/src/client/views/nodes/ChatBox/tools/WikipediaTool.ts b/src/client/views/nodes/ChatBox/tools/WikipediaTool.ts index 0aef58f61..8ef2830d4 100644 --- a/src/client/views/nodes/ChatBox/tools/WikipediaTool.ts +++ b/src/client/views/nodes/ChatBox/tools/WikipediaTool.ts @@ -1,7 +1,7 @@ import { BaseTool } from './BaseTool'; import axios from 'axios'; -export class WikipediaTool extends BaseTool { +export class WikipediaTool extends BaseTool<{ title: string }> { constructor() { super( 'wikipedia', diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts index c60973be3..0270b6256 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/ChatBox/types.ts @@ -6,6 +6,7 @@ export enum ASSISTANT_ROLE { export enum CHUNK_TYPE { TEXT = 'text', IMAGE = 'image', + TABLE = 'table', } export interface AssistantMessage { @@ -18,9 +19,7 @@ export interface AssistantMessage { export interface Citation { text: string; type: CHUNK_TYPE; - span: [number, number]; chunk_id: string; - direct_text?: string; } export interface Chunk { @@ -46,13 +45,13 @@ export interface AI_Document { type: string; } -export interface Tool { +export interface Tool = Record> { name: string; description: string; parameters: Record; useRules: string; briefSummary: string; - execute: (args: Record) => Promise; + execute: (args: T) => Promise; getActionRule: () => Record; } diff --git a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts index d16e117b6..1f483ad61 100644 --- a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts +++ b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts @@ -2,6 +2,7 @@ import { Pinecone, Index, IndexList, PineconeRecord, RecordMetadata, QueryRespon import { CohereClient } from 'cohere-ai'; import { EmbedResponse } from 'cohere-ai/api'; import dotenv from 'dotenv'; + import { Chunk, AI_Document } from '../types'; dotenv.config(); -- cgit v1.2.3-70-g09d2 From d2c968cb3705b314396c0503b089f8a233a26502 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Wed, 10 Jul 2024 17:54:59 -0400 Subject: Working now somewhat --- src/client/views/nodes/ChatBox/Agent.ts | 83 ++++++++++++++-------- src/client/views/nodes/ChatBox/ChatBot.ts | 16 ----- src/client/views/nodes/ChatBox/ChatBox.tsx | 27 +++++-- .../views/nodes/ChatBox/MessageComponent.tsx | 8 +-- src/client/views/nodes/ChatBox/prompts.ts | 5 +- 5 files changed, 77 insertions(+), 62 deletions(-) delete mode 100644 src/client/views/nodes/ChatBox/ChatBot.ts (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index 4c2838540..355acb19f 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -18,7 +18,7 @@ export class Agent { private summaries: string; constructor(private vectorstore: Vectorstore) { - this.client = new OpenAI({ apiKey: process.env.OPENAI_API_KEY }); + this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); this.summaries = this.vectorstore ? this.vectorstore.getSummaries() : 'No documents available.'; this.tools = { wikipedia: new WikipediaTool(), @@ -40,7 +40,7 @@ export class Agent { return history; } - async askAgent(question: string, maxTurns: number = 5): Promise { + async askAgent(question: string, maxTurns: number = 8): Promise { console.log(`Starting query: ${question}`); this.messages.push({ role: 'user', content: question }); const chatHistory = this.formatChatHistory(); @@ -51,6 +51,10 @@ export class Agent { this.interMessages.push({ role: 'assistant', content: `${question}` }); + const parser = new XMLParser(); + const builder = new XMLBuilder(); + let currentAction: string | undefined; + for (let i = 0; i < maxTurns; i++) { console.log(`Turn ${i + 1}/${maxTurns}`); @@ -58,42 +62,58 @@ export class Agent { console.log(`Bot response: ${result}`); this.interMessages.push({ role: 'assistant', content: result }); + let parsedResult; try { - const parser = new XMLParser(); - const parsedResult = parser.parse(result); - const step = parsedResult[`step${i + 1}`]; - - if (step.thought) console.log(`Thought: ${step.thought}`); - if (step.action) { - console.log(`Action: ${step.action}`); - const action = step.action; - const actionRules = new XMLBuilder().build({ - action_rules: this.tools[action].getActionRule(), - }); - this.interMessages.push({ role: 'user', content: actionRules }); - } - if (step.action_input) { - const actionInput = new XMLBuilder().build({ action_input: step.action_input }); + parsedResult = parser.parse(result); + } catch (error) { + console.log('Error: Invalid XML response from bot'); + return 'Invalid response format.'; + } + + const step = parsedResult[Object.keys(parsedResult)[0]]; + + for (const key in step) { + if (key === 'thought') { + console.log(`Thought: ${step[key]}`); + } else if (key === 'action') { + currentAction = step[key] as string; + console.log(`Action: ${currentAction}`); + if (this.tools[currentAction]) { + const nextPrompt = [ + { + type: 'text', + text: builder.build({ action_rules: this.tools[currentAction].getActionRule() }), + }, + ]; + this.interMessages.push({ role: 'assistant', content: nextPrompt }); + break; + } else { + console.log('Error: No valid action'); + } + } else if (key === 'action_input') { + const actionInput = builder.build({ action_input: step[key] }); console.log(`Action input: ${actionInput}`); - try { - const observation = await this.processAction(step.action, step.action_input); - const nextPrompt = [{ type: 'text', text: '' }, ...observation, { type: 'text', text: '' }]; - this.interMessages.push({ role: 'user', content: nextPrompt }); - } catch (e) { - console.error(`Error processing action: ${e}`); - return `${e}`; + if (currentAction) { + try { + const observation = await this.processAction(currentAction, step[key]); + const nextPrompt = [{ type: 'text', text: '' }, ...observation, { type: 'text', text: '' }]; + this.interMessages.push({ role: 'assistant', content: nextPrompt }); + break; + } catch (error) { + console.log(`Error processing action: ${error}`); + return `${error}`; + } + } else { + console.log('Error: Action input without a valid action'); + return 'Action input without a valid action'; } - } - if (step.answer) { + } else if (key === 'answer') { console.log('Answer found. Ending query.'); - const answerContent = new XMLBuilder().build({ answer: step.answer }); + const answerContent = builder.build({ answer: step[key] }); this.messages.push({ role: 'assistant', content: answerContent }); this.interMessages = []; return answerContent; } - } catch (e) { - console.error('Error: Invalid XML response from bot'); - return 'Invalid response format.'; } } @@ -102,8 +122,9 @@ export class Agent { } private async execute(): Promise { + console.log('Messages: ' + this.interMessages); const completion = await this.client.chat.completions.create({ - model: 'gpt-4', + model: 'gpt-4o', messages: this.interMessages as ChatCompletionMessageParam[], temperature: 0, }); diff --git a/src/client/views/nodes/ChatBox/ChatBot.ts b/src/client/views/nodes/ChatBox/ChatBot.ts deleted file mode 100644 index 8b5e0982c..000000000 --- a/src/client/views/nodes/ChatBox/ChatBot.ts +++ /dev/null @@ -1,16 +0,0 @@ -import { Agent } from './Agent'; -import { Vectorstore } from './vectorstore/VectorstoreUpload'; -import dotenv from 'dotenv'; -dotenv.config(); - -export class ChatBot { - private agent: Agent; - - constructor(vectorstore: Vectorstore) { - this.agent = new Agent(vectorstore); - } - - async ask(question: string): Promise { - return await this.agent.askAgent(question); - } -} diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 3ecb2d340..2ce1ebdd2 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -16,7 +16,7 @@ import { ASSISTANT_ROLE, AssistantMessage, AI_Document, convertToAIDocument, Cit import { Vectorstore } from './vectorstore/VectorstoreUpload'; import { CollectionFreeFormDocumentView } from '../CollectionFreeFormDocumentView'; import { CollectionFreeFormView } from '../../collections/collectionFreeForm'; -import { ChatBot } from './ChatBot'; +import { Agent } from './Agent'; import dotenv from 'dotenv'; dotenv.config(); @@ -34,7 +34,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { private documents: AI_Document[] = []; private _oldWheel: any; private vectorstore: Vectorstore; - private chatbot: ChatBot; // Add the ChatBot instance + private agent: Agent; // Add the ChatBot instance public static LayoutString(fieldKey: string) { return FieldView.LayoutString(ChatBox, fieldKey); @@ -48,7 +48,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.openai = this.initializeOpenAI(); this.getOtherDocs(); this.vectorstore = new Vectorstore(); - this.chatbot = new ChatBot(this.vectorstore); // Initialize the ChatBot + this.agent = new Agent(this.vectorstore); // Initialize the Agent reaction( () => this.history.map((msg: AssistantMessage) => ({ role: msg.role, text: msg.text, follow_up_questions: msg.follow_up_questions, citations: msg.citations })), @@ -58,6 +58,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { ); } + @action getOtherDocs = async () => { const visible_docs = (CollectionFreeFormDocumentView.from(this._props.DocumentView?.())?._props.parent as CollectionFreeFormView)?.childDocs .filter(doc => doc != this.Document) @@ -76,6 +77,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { doc['ai_document'] = document_json; } }); + this.isInitializing = false; }; @action @@ -120,7 +122,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.history.push({ role: ASSISTANT_ROLE.USER, text: trimmedText }); }); this.isLoading = true; - const response = await this.chatbot.ask(trimmedText); // Use the chatbot to get the response + const response = await this.agent.askAgent(trimmedText); // Use the chatbot to get the response runInAction(() => { this.history.push(this.parseAssistantResponse(response)); }); @@ -142,8 +144,21 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { const answerElement = xmlDoc.querySelector('answer'); const followUpQuestionsElement = xmlDoc.querySelector('follow_up_questions'); - const text = answerElement ? answerElement.innerHTML || '' : ''; // Use innerHTML to preserve citation tags - const followUpQuestions = followUpQuestionsElement ? Array.from(followUpQuestionsElement.querySelectorAll('question')).map(q => q.textContent || '') : []; + let text = ''; + let followUpQuestions: string[] = []; + + if (answerElement) { + // Remove the follow_up_questions element from the answer + const followUpElement = answerElement.querySelector('follow_up_questions'); + if (followUpElement) { + followUpElement.remove(); + } + text = answerElement.innerHTML.trim(); + } + + if (followUpQuestionsElement) { + followUpQuestions = Array.from(followUpQuestionsElement.querySelectorAll('question')).map(q => q.textContent || ''); + } return { role: ASSISTANT_ROLE.ASSISTANT, diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx index 1baf6d7d5..91671a24a 100644 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ b/src/client/views/nodes/ChatBox/MessageComponent.tsx @@ -12,8 +12,7 @@ interface MessageComponentProps { } const MessageComponent: React.FC = function ({ message, index, onFollowUpClick, onCitationClick, updateMessageCitations }) { - const LinkRenderer = ({ children }: { children: React.ReactNode }) => { - const text = children as string; + const renderContent = (text: string) => { const citationRegex = /([^<]*)<\/citation>/g; const parts = []; let lastIndex = 0; @@ -47,7 +46,6 @@ const MessageComponent: React.FC = function ({ message, i parts.push(text.slice(lastIndex)); - // Update the message's citations in the ChatBox's history updateMessageCitations(index, citations); return <>{parts}; @@ -55,9 +53,7 @@ const MessageComponent: React.FC = function ({ message, i return (
-
- {message.text} -
+
{renderContent(message.text)}
{message.follow_up_questions && message.follow_up_questions.length > 0 && (

Follow-up Questions:

diff --git a/src/client/views/nodes/ChatBox/prompts.ts b/src/client/views/nodes/ChatBox/prompts.ts index 8835265e4..ffea13788 100644 --- a/src/client/views/nodes/ChatBox/prompts.ts +++ b/src/client/views/nodes/ChatBox/prompts.ts @@ -6,12 +6,12 @@ export function getReactPrompt(tools: Tool[], chatHistory: string): string { const toolDescriptions = tools.map(tool => `${tool.name}:\n${tool.briefSummary}`).join('\n*****\n'); return ` - You run in a loop of Thought, Action, PAUSE, Action Input, Pause, Observation. + You run in a loop of Thought, Action, (PAUSE), Action Input, (PAUSE), Observation. (this Thought/Action/PAUSE/Action Input/PAUSE/Observation can repeat N times) Contain each stage of the loop within an XML element that specifies the stage type (e.g. content of the thought). At the end of the loop, you output an Answer with the answer content contained within an XML element with an tag. At the end of the answer should be an array of 3 potential follow-up questions for the user to ask you next, contained within a key. Use to describe your thoughts about the question you have been asked. - Use to specify run one of the actions available to you - then return a element. + Use to specify run one of the actions available to you. Then, you will be provided with action rules within an element that specifies how you should structure the input to the action and what the output of that action will look like - then return another element. Then, provide within an element each parameter, with parameter names as element tags themselves with their values inside, following the structure defined in the action rules. Observation, in an element will be the result of running those actions. @@ -28,7 +28,6 @@ export function getReactPrompt(tools: Tool[], chatHistory: string): string { I should look up France on Wikipedia wikipedia - You will be called again with this: -- cgit v1.2.3-70-g09d2 From ff5fa6654a74096bbd9f14c5d236b8d08861fe2f Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Wed, 10 Jul 2024 18:02:24 -0400 Subject: Works much better now. --- src/client/views/nodes/ChatBox/Agent.ts | 9 +++++---- src/client/views/nodes/ChatBox/prompts.ts | 4 ++++ 2 files changed, 9 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index 355acb19f..cec5e1770 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -49,7 +49,7 @@ export class Agent { console.log(`System prompt: ${systemPrompt}`); this.interMessages = [{ role: 'system', content: systemPrompt }]; - this.interMessages.push({ role: 'assistant', content: `${question}` }); + this.interMessages.push({ role: 'user', content: `${question}` }); const parser = new XMLParser(); const builder = new XMLBuilder(); @@ -85,7 +85,7 @@ export class Agent { text: builder.build({ action_rules: this.tools[currentAction].getActionRule() }), }, ]; - this.interMessages.push({ role: 'assistant', content: nextPrompt }); + this.interMessages.push({ role: 'user', content: nextPrompt }); break; } else { console.log('Error: No valid action'); @@ -97,7 +97,8 @@ export class Agent { try { const observation = await this.processAction(currentAction, step[key]); const nextPrompt = [{ type: 'text', text: '' }, ...observation, { type: 'text', text: '' }]; - this.interMessages.push({ role: 'assistant', content: nextPrompt }); + console.log('Action result: ' + observation); + this.interMessages.push({ role: 'user', content: nextPrompt }); break; } catch (error) { console.log(`Error processing action: ${error}`); @@ -122,7 +123,7 @@ export class Agent { } private async execute(): Promise { - console.log('Messages: ' + this.interMessages); + console.log(this.interMessages); const completion = await this.client.chat.completions.create({ model: 'gpt-4o', messages: this.interMessages as ChatCompletionMessageParam[], diff --git a/src/client/views/nodes/ChatBox/prompts.ts b/src/client/views/nodes/ChatBox/prompts.ts index ffea13788..c011d65b7 100644 --- a/src/client/views/nodes/ChatBox/prompts.ts +++ b/src/client/views/nodes/ChatBox/prompts.ts @@ -30,6 +30,8 @@ export function getReactPrompt(tools: Tool[], chatHistory: string): string { wikipedia + THEN PAUSE AND DO NOT OUTPUT ANYTHING. + You will be called again with this: { @@ -56,6 +58,8 @@ export function getReactPrompt(tools: Tool[], chatHistory: string): string { + THEN PAUSE AND DO NOT OUTPUT ANYTHING. + You will then be called again with this: France is a country. The capital is Paris. -- cgit v1.2.3-70-g09d2 From f82343158f423fdca136a639ec61c1e8d93b5467 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Wed, 10 Jul 2024 18:13:44 -0400 Subject: trying to update RAG when new docs are added --- src/client/views/nodes/ChatBox/Agent.ts | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index cec5e1770..63363ab0b 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -41,6 +41,7 @@ export class Agent { } async askAgent(question: string, maxTurns: number = 8): Promise { + this.tools.rag = new RAGTool(this.vectorstore, this.vectorstore ? this.vectorstore.getSummaries() : 'No documents available.'); console.log(`Starting query: ${question}`); this.messages.push({ role: 'user', content: question }); const chatHistory = this.formatChatHistory(); -- cgit v1.2.3-70-g09d2 From e0e4871224e626240dc899c653cd0eb9f54c3693 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 11 Jul 2024 10:21:56 -0400 Subject: working with new documents --- src/client/views/nodes/ChatBox/Agent.ts | 12 +++- src/client/views/nodes/ChatBox/ChatBox.tsx | 68 ++++++++++++---------- .../views/nodes/ChatBox/tools/WikipediaTool.ts | 20 +++---- src/server/ApiManagers/AssistantManager.ts | 27 +++++++++ 4 files changed, 82 insertions(+), 45 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index 63363ab0b..6757b2ce8 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -27,6 +27,11 @@ export class Agent { }; } + private refreshSummaries(): void { + this.summaries = this.vectorstore ? this.vectorstore.getSummaries() : 'No documents available.'; + this.tools.rag = new RAGTool(this.vectorstore, this.summaries); + } + private formatChatHistory(): string { let history = '\n'; for (const message of this.messages) { @@ -41,7 +46,7 @@ export class Agent { } async askAgent(question: string, maxTurns: number = 8): Promise { - this.tools.rag = new RAGTool(this.vectorstore, this.vectorstore ? this.vectorstore.getSummaries() : 'No documents available.'); + this.refreshSummaries(); console.log(`Starting query: ${question}`); this.messages.push({ role: 'user', content: question }); const chatHistory = this.formatChatHistory(); @@ -98,7 +103,7 @@ export class Agent { try { const observation = await this.processAction(currentAction, step[key]); const nextPrompt = [{ type: 'text', text: '' }, ...observation, { type: 'text', text: '' }]; - console.log('Action result: ' + observation); + console.log(observation); this.interMessages.push({ role: 'user', content: nextPrompt }); break; } catch (error) { @@ -114,11 +119,12 @@ export class Agent { const answerContent = builder.build({ answer: step[key] }); this.messages.push({ role: 'assistant', content: answerContent }); this.interMessages = []; + console.log(this.messages); return answerContent; } } } - + console.log(this.messages); console.log('Reached maximum turns. Ending query.'); return 'Reached maximum turns without finding an answer'; } diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 2ce1ebdd2..64ab2888b 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -1,4 +1,4 @@ -import { action, makeObservable, observable, observe, reaction, runInAction } from 'mobx'; +import { action, computed, makeObservable, observable, observe, reaction, runInAction } from 'mobx'; import { observer } from 'mobx-react'; import OpenAI, { ClientOptions } from 'openai'; import * as React from 'react'; @@ -18,6 +18,7 @@ import { CollectionFreeFormDocumentView } from '../CollectionFreeFormDocumentVie import { CollectionFreeFormView } from '../../collections/collectionFreeForm'; import { Agent } from './Agent'; import dotenv from 'dotenv'; +import { DocData } from '../../../../fields/DocSymbols'; dotenv.config(); @observer @@ -30,7 +31,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @observable expandedScratchpadIndex: number | null = null; @observable linked_docs_to_add: Doc[] = []; @observable inputValue: string = ''; + @observable private _visibleDocs: Doc[] = []; private openai: OpenAI; + private vectorstore_id: string; private documents: AI_Document[] = []; private _oldWheel: any; private vectorstore: Vectorstore; @@ -46,7 +49,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.openai = this.initializeOpenAI(); this.history = [{ role: ASSISTANT_ROLE.ASSISTANT, text: 'Welcome to the Document Analyser Assistant! Link a document or ask questions to get started.' }]; this.openai = this.initializeOpenAI(); - this.getOtherDocs(); this.vectorstore = new Vectorstore(); this.agent = new Agent(this.vectorstore); // Initialize the Agent @@ -59,22 +61,20 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } @action - getOtherDocs = async () => { - const visible_docs = (CollectionFreeFormDocumentView.from(this._props.DocumentView?.())?._props.parent as CollectionFreeFormView)?.childDocs - .filter(doc => doc != this.Document) - .map(d => DocCast(d?.annotationOn, d)) - .filter(d => d); - + addAIDocs = async (visible_docs: Doc[]) => { console.log('All Docs:', visible_docs); - visible_docs?.forEach(async doc => { - const local_file_path: string = CsvCast(doc.data, PDFCast(doc.data)).url?.pathname; - if (local_file_path) { - const { document_json } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); - const ai_document: AI_Document = convertToAIDocument(document_json); - this.documents.push(ai_document); - await this.vectorstore.addDocument(ai_document); - doc['ai_document'] = document_json; + if (doc[DocData].ai_document) { + this.documents.push(convertToAIDocument(JSON.parse(StrCast(doc[DocData].ai_document)))); + } else { + const local_file_path: string = CsvCast(doc.data, PDFCast(doc.data))?.url?.pathname; + if (local_file_path) { + const { document_json } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); + const ai_document: AI_Document = convertToAIDocument(document_json); + this.documents.push(ai_document); + await this.vectorstore.addDocument(ai_document); + doc[DocData].ai_document = JSON.stringify(document_json); + } } }); this.isInitializing = false; @@ -120,8 +120,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { textInput.value = ''; runInAction(() => { this.history.push({ role: ASSISTANT_ROLE.USER, text: trimmedText }); + this.isLoading = true; }); - this.isLoading = true; + const response = await this.agent.askAgent(trimmedText); // Use the chatbot to get the response runInAction(() => { this.history.push(this.parseAssistantResponse(response)); @@ -133,7 +134,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.history.push({ role: ASSISTANT_ROLE.ASSISTANT, text: 'Sorry, I encountered an error while processing your request.' }); }); } finally { - this.isLoading = false; + runInAction(() => { + this.isLoading = false; + }); } } }; @@ -236,20 +239,15 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } } reaction( - () => { - const linkedDocs = LinkManager.Instance.getAllRelatedLinks(this.Document) - .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) - .map(d => DocCast(d?.annotationOn, d)) - .filter(d => d); - return linkedDocs; - }, - linked => this.linked_docs_to_add.push(...linked.filter(linkedDoc => !this.linked_docs_to_add.includes(linkedDoc))) + () => this.visibleDocs, + visibleDocs => { + this._visibleDocs = visibleDocs; + } ); - observe( // right now this skips during initialization which is necessary because it would be blank // However, it will upload the same link twice when it is - this.linked_docs_to_add, + this._visibleDocs, change => { // observe pushes/splices on a user link DB 'data' field (should only happen for local changes) switch (change.type as any) { @@ -257,9 +255,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { if ((change as any).addedCount > 0) { // maybe check here if its already in the urls datadoc array so doesn't add twice console.log((change as any).added as Doc[]); - ((change as any).added as Doc[]).forEach(doc => { - this.uploadNewDocument(doc); - }); + this.addAIDocs((change as any).added as Doc[]); } // (change as any).removed.forEach((link: any) => remLinkFromDoc(toRealField(link))); break; @@ -269,6 +265,16 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }, true ); + runInAction(() => { + if (!this._visibleDocs.length) { + this.isInitializing = false; + } + }); + } + + @computed + get visibleDocs() { + return (CollectionFreeFormDocumentView.from(this._props.DocumentView?.())?._props.parent as CollectionFreeFormView)?.childDocs.filter(doc => doc != this.Document) ?? []; } @action diff --git a/src/client/views/nodes/ChatBox/tools/WikipediaTool.ts b/src/client/views/nodes/ChatBox/tools/WikipediaTool.ts index 8ef2830d4..e2c5009a1 100644 --- a/src/client/views/nodes/ChatBox/tools/WikipediaTool.ts +++ b/src/client/views/nodes/ChatBox/tools/WikipediaTool.ts @@ -1,3 +1,5 @@ +import { title } from 'process'; +import { Networking } from '../../../../Network'; import { BaseTool } from './BaseTool'; import axios from 'axios'; @@ -10,7 +12,7 @@ export class WikipediaTool extends BaseTool<{ title: string }> { title: { type: 'string', description: 'The title of the Wikipedia article to search', - required: 'true', + required: true, }, }, 'Provide simply the title you want to search on Wikipedia and nothing more. If re-using this tool, try a different title for different information.', @@ -19,15 +21,11 @@ export class WikipediaTool extends BaseTool<{ title: string }> { } async execute(args: { title: string }): Promise { - const response = await axios.get('https://en.wikipedia.org/w/api.php', { - params: { - action: 'query', - list: 'search', - srsearch: args.title, - format: 'json', - }, - }); - const result = response.data.query.search[0].snippet; - return [{ type: 'text', text: result }]; + try { + const { text } = await Networking.PostToServer('/getWikipediaSummary', { title: args.title }); + return [{ type: 'text', text: text }]; + } catch (error) { + return [{ type: 'text', text: 'An error occurred while fetching the article.' }]; + } } } diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 8a5f12c2b..d5a8ebeb3 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -148,6 +148,33 @@ export default class AssistantManager extends ApiManager { }, }); + register({ + method: Method.POST, + subscription: '/getWikipediaSummary', + secureHandler: async ({ req, res }) => { + const { title } = req.body; + try { + const response = await axios.get('https://en.wikipedia.org/w/api.php', { + params: { + action: 'query', + list: 'search', + srsearch: title, + format: 'json', + }, + }); + const summary = response.data.query.search[0].snippet; + if (!summary || summary.length === 0 || summary === '' || summary === ' ') { + res.send({ text: 'No article found with that title.' }); + } else { + res.send({ text: summary }); + } + } catch (error: any) { + console.error('Error retrieving article summary from Wikipedia:', error); + res.status(500).send({ error: 'Error retrieving article summary from Wikipedia.', details: error.message }); + } + }, + }); + register({ method: Method.POST, subscription: '/createDocument', -- cgit v1.2.3-70-g09d2 From f1cdfc1d02488c4a513fbf67f729f702526a345d Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 11 Jul 2024 12:06:04 -0400 Subject: not working well (files don't get filename immediately_ --- src/client/documents/DocumentTypes.ts | 1 + src/client/documents/Documents.ts | 5 +++ src/client/views/nodes/ChatBox/ChatBox.tsx | 36 +++++++---------- .../views/nodes/ChatBox/MessageComponent.tsx | 33 +++++++++++++++- .../views/nodes/ChatBox/tools/CollectionTool.ts | 0 src/client/views/nodes/ChatBox/tools/RAGTool.ts | 9 +++++ .../nodes/ChatBox/vectorstore/VectorstoreUpload.ts | 45 +++++++++++++++++----- 7 files changed, 95 insertions(+), 34 deletions(-) create mode 100644 src/client/views/nodes/ChatBox/tools/CollectionTool.ts (limited to 'src') diff --git a/src/client/documents/DocumentTypes.ts b/src/client/documents/DocumentTypes.ts index 8f95068db..cb1625381 100644 --- a/src/client/documents/DocumentTypes.ts +++ b/src/client/documents/DocumentTypes.ts @@ -28,6 +28,7 @@ export enum DocumentType { DATAVIZ = 'dataviz', LOADING = 'loading', SIMULATION = 'simulation', // physics simulation + MESSAGE = 'message', // chat message // special purpose wrappers that either take no data or are compositions of lower level types LINK = 'link', diff --git a/src/client/documents/Documents.ts b/src/client/documents/Documents.ts index a67e6b4f6..ea5eca804 100644 --- a/src/client/documents/Documents.ts +++ b/src/client/documents/Documents.ts @@ -791,6 +791,11 @@ export namespace Docs { export function RTFDocument(field: RichTextField, options: DocumentOptions = {}, fieldKey: string = 'text') { return InstanceFromProto(Prototypes.get(DocumentType.RTF), field, options, undefined, fieldKey); } + + export function MessageDocument(field: string, options: DocumentOptions = {}, fieldKey: string = 'data') { + return InstanceFromProto(Prototypes.get(DocumentType.MESSAGE), field, options, undefined, fieldKey); + } + export function TextDocument(text: string, options: DocumentOptions = {}, fieldKey: string = 'text') { const rtf = { doc: { diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 64ab2888b..9b2a92564 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -11,7 +11,7 @@ import { LinkManager } from '../../../util/LinkManager'; import { ViewBoxAnnotatableComponent } from '../../DocComponent'; import { FieldView, FieldViewProps } from '../FieldView'; import './ChatBox.scss'; -import MessageComponent from './MessageComponent'; +import MessageComponentBox from './MessageComponent'; import { ASSISTANT_ROLE, AssistantMessage, AI_Document, convertToAIDocument, Citation } from './types'; import { Vectorstore } from './vectorstore/VectorstoreUpload'; import { CollectionFreeFormDocumentView } from '../CollectionFreeFormDocumentView'; @@ -19,6 +19,7 @@ import { CollectionFreeFormView } from '../../collections/collectionFreeForm'; import { Agent } from './Agent'; import dotenv from 'dotenv'; import { DocData } from '../../../../fields/DocSymbols'; +import { DocumentView } from '../DocumentView'; dotenv.config(); @observer @@ -33,7 +34,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @observable inputValue: string = ''; @observable private _visibleDocs: Doc[] = []; private openai: OpenAI; - private vectorstore_id: string; + // private vectorstore_id: string; private documents: AI_Document[] = []; private _oldWheel: any; private vectorstore: Vectorstore; @@ -61,22 +62,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } @action - addAIDocs = async (visible_docs: Doc[]) => { - console.log('All Docs:', visible_docs); - visible_docs?.forEach(async doc => { - if (doc[DocData].ai_document) { - this.documents.push(convertToAIDocument(JSON.parse(StrCast(doc[DocData].ai_document)))); - } else { - const local_file_path: string = CsvCast(doc.data, PDFCast(doc.data))?.url?.pathname; - if (local_file_path) { - const { document_json } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); - const ai_document: AI_Document = convertToAIDocument(document_json); - this.documents.push(ai_document); - await this.vectorstore.addDocument(ai_document); - doc[DocData].ai_document = JSON.stringify(document_json); - } - } - }); + addDocsToVectorstore = async (visible_docs: Doc[]) => { + await this.vectorstore.addAIDocs(visible_docs); this.isInitializing = false; }; @@ -107,6 +94,10 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } }; + // getAssistantResponse() { + // return Docs.Create.MessageDocument(text, {}); + // } + @action askGPT = async (event: React.FormEvent): Promise => { event.preventDefault(); @@ -241,7 +232,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { reaction( () => this.visibleDocs, visibleDocs => { - this._visibleDocs = visibleDocs; + this._visibleDocs.push(...visibleDocs.filter(visibleDoc => !this._visibleDocs.includes(visibleDoc))); } ); observe( @@ -255,7 +246,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { if ((change as any).addedCount > 0) { // maybe check here if its already in the urls datadoc array so doesn't add twice console.log((change as any).added as Doc[]); - this.addAIDocs((change as any).added as Doc[]); + this.addDocsToVectorstore((change as any).added as Doc[]); } // (change as any).removed.forEach((link: any) => remLinkFromDoc(toRealField(link))); break; @@ -295,10 +286,11 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }}>
{this.history.map((message, index) => ( - + // + ))} {this.current_message && ( - void; } -const MessageComponent: React.FC = function ({ message, index, onFollowUpClick, onCitationClick, updateMessageCitations }) { +const MessageComponentBox: React.FC = function ({ message, index, onFollowUpClick, onCitationClick, updateMessageCitations }) { + // public static LayoutString(fieldKey: string) { + // return FieldView.LayoutString(MessageComponentBox, fieldKey); + // } + + // the presentation view that renders this slide + + // @computed + // get chatBoxView() { + // return this.DocumentView?.().containerViewPath?.().lastElement()?.ComponentView as ChatBox; + // } + const renderContent = (text: string) => { const citationRegex = /([^<]*)<\/citation>/g; const parts = []; @@ -68,4 +81,20 @@ const MessageComponent: React.FC = function ({ message, i ); }; -export default observer(MessageComponent); +// Docs.Prototypes.TemplateMap.set(DocumentType.MESSAGE, { +// layout: { view: MessageComponentBox, dataField: 'data' }, +// options: { +// acl: '', +// _height: 35, +// _xMargin: 10, +// _yMargin: 10, +// _layout_nativeDimEditable: true, +// _layout_reflowVertical: true, +// _layout_reflowHorizontal: true, +// defaultDoubleClick: 'ignore', +// systemIcon: 'BsFileEarmarkTextFill', +// layout_borderRounding: '10px', +// }, +// }); + +export default observer(MessageComponentBox); diff --git a/src/client/views/nodes/ChatBox/tools/CollectionTool.ts b/src/client/views/nodes/ChatBox/tools/CollectionTool.ts new file mode 100644 index 000000000..e69de29bb diff --git a/src/client/views/nodes/ChatBox/tools/RAGTool.ts b/src/client/views/nodes/ChatBox/tools/RAGTool.ts index 185efa0ba..36e4bc3ce 100644 --- a/src/client/views/nodes/ChatBox/tools/RAGTool.ts +++ b/src/client/views/nodes/ChatBox/tools/RAGTool.ts @@ -39,6 +39,7 @@ export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { !!!IMPORTANT Before you close the tag with , within the answer tags provide a set of 3 follow-up questions inside a tag and individually within tags. These should relate to the document, the current query, and the chat_history and should aim to help the user better understand whatever they are looking for. Also, ensure that the answer tags are wrapped with the correct step tags as well.`, + `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a set of document chunks (either images or text) that can be used to provide a grounded response based on user documents @@ -49,6 +50,14 @@ export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { ); } + changeSummaries(summaries: string) { + this.briefSummary = `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a set of document chunks (either images or text) that can be used to provide a grounded response based on user documents. + +!!!IMPORTANT Use the RAG tool ANYTIME the question may potentially (even if you are not sure) relate to one of the user's documents. +Here are the summaries of the user's documents: +${summaries}`; + } + async execute(args: { hypothetical_document_chunk: string }): Promise { const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk); return this.getFormattedChunks(relevantChunks); diff --git a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts index 1f483ad61..5e8e6b23a 100644 --- a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts +++ b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts @@ -3,7 +3,11 @@ import { CohereClient } from 'cohere-ai'; import { EmbedResponse } from 'cohere-ai/api'; import dotenv from 'dotenv'; -import { Chunk, AI_Document } from '../types'; +import { Chunk, AI_Document, convertToAIDocument } from '../types'; +import { Doc } from '../../../../../fields/Doc'; +import { DocData } from '../../../../../fields/DocSymbols'; +import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types'; +import { Networking } from '../../../../Network'; dotenv.config(); @@ -12,7 +16,7 @@ export class Vectorstore { private index!: Index; private cohere: CohereClient; private indexName: string = 'pdf-chatbot'; - private documents: AI_Document[] = []; + documents: AI_Document[] = []; constructor() { const pineconeApiKey = process.env.PINECONE_API_KEY; @@ -49,10 +53,35 @@ export class Vectorstore { this.index = this.pinecone.Index(this.indexName); } - async addDocument(document: AI_Document) { - this.documents.push(document); - await this.indexDocument(document); - console.log(`Document added: ${document.file_name}`); + async addAIDocs(visible_docs: Doc[]) { + console.log('All Docs:', visible_docs); + visible_docs?.forEach(async doc => { + await this.addAIDoc(doc); + }); + } + + async addAIDoc(doc: Doc) { + if (doc[DocData]?.ai_document) { + this.documents.push(convertToAIDocument(JSON.parse(StrCast(doc[DocData].ai_document)))); + console.log(`Document already added: ${doc[DocData].file_name}`); + } else { + console.log(doc); + const local_file_path: string = CsvCast(doc.data)?.url?.pathname ?? PDFCast(doc.data)?.url?.pathname; + console.log('Local File Path:', local_file_path); + if (local_file_path) { + const { document_json } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); + console.log('Document JSON:', document_json); + const ai_document: AI_Document = convertToAIDocument(document_json); + this.documents.push(ai_document); + await this.indexDocument(ai_document); + console.log(`Document added: ${ai_document.file_name}`); + doc[DocData].ai_document = JSON.stringify(document_json); + } + } + } + + getSummaries(): string { + return this.documents.map((doc, index) => `${index + 1}) ${doc.summary}`).join('\n') + '\n'; } private async indexDocument(document: AI_Document) { @@ -111,8 +140,4 @@ export class Vectorstore { return []; } } - - getSummaries(): string { - return this.documents.map((doc, index) => `${index + 1}) ${doc.summary}`).join('\n') + '\n'; - } } -- cgit v1.2.3-70-g09d2 From ef79b7d617035c52fea159225ba9a39b8222e8f4 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Mon, 15 Jul 2024 12:31:35 -0400 Subject: citation parsing working much better --- .vscode/launch.json | 15 ++-- src/client/views/nodes/ChatBox/Agent.ts | 8 +- src/client/views/nodes/ChatBox/AnswerParser.ts | 60 ++++++++++++++ src/client/views/nodes/ChatBox/ChatBox.tsx | 52 ++++-------- .../views/nodes/ChatBox/MessageComponent.tsx | 95 ++++++++-------------- src/client/views/nodes/ChatBox/types.ts | 18 +++- .../nodes/ChatBox/vectorstore/VectorstoreUpload.ts | 5 +- 7 files changed, 138 insertions(+), 115 deletions(-) create mode 100644 src/client/views/nodes/ChatBox/AnswerParser.ts (limited to 'src') diff --git a/.vscode/launch.json b/.vscode/launch.json index e4c31361c..e9a07cf93 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -59,7 +59,7 @@ "webRoot": "${workspaceFolder}" }, { - "type": "node", + "type": "pwa-node", "request": "attach", "name": "Typescript Server", "protocol": "inspector", @@ -68,7 +68,7 @@ "remoteRoot": "${workspaceFolder}" }, { - "type": "node", + "type": "pwa-node", "request": "launch", "name": "Current TS File", "runtimeExecutable": "npx", @@ -76,7 +76,7 @@ "port": 9229 }, { - "type": "node", + "type": "pwa-node", "request": "launch", "name": "Mocha Tests", "program": "${workspaceFolder}/node_modules/mocha/bin/_mocha", @@ -86,7 +86,7 @@ "protocol": "inspector" }, { - "type": "node", + "type": "pwa-node", "request": "launch", "name": "Mocha Current File", "program": "${workspaceFolder}/node_modules/mocha/bin/_mocha", @@ -96,9 +96,6 @@ "protocol": "inspector" } ], - - "resolveSourceMapLocations": [ - "${workspaceFolder}/**", - "!**/node_modules/**" - ] + + "resolveSourceMapLocations": ["${workspaceFolder}/**", "!**/node_modules/**"] } diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index 6757b2ce8..fd3c6e5e8 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -95,6 +95,8 @@ export class Agent { break; } else { console.log('Error: No valid action'); + this.interMessages.push({ role: 'user', content: 'No valid action, try again.' }); + break; } } else if (key === 'action_input') { const actionInput = builder.build({ action_input: step[key] }); @@ -116,11 +118,7 @@ export class Agent { } } else if (key === 'answer') { console.log('Answer found. Ending query.'); - const answerContent = builder.build({ answer: step[key] }); - this.messages.push({ role: 'assistant', content: answerContent }); - this.interMessages = []; - console.log(this.messages); - return answerContent; + return result; } } } diff --git a/src/client/views/nodes/ChatBox/AnswerParser.ts b/src/client/views/nodes/ChatBox/AnswerParser.ts new file mode 100644 index 000000000..f77d2261d --- /dev/null +++ b/src/client/views/nodes/ChatBox/AnswerParser.ts @@ -0,0 +1,60 @@ +import { ASSISTANT_ROLE, AssistantMessage, Citation, getChunkType } from './types'; + +export class AnswerParser { + static parse(xml: string): AssistantMessage { + const answerRegex = /([\s\S]*?)<\/answer>/; + const citationRegex = /(.*?)<\/citation>/g; + const followUpQuestionsRegex = /([\s\S]*?)<\/follow_up_questions>/; + const questionRegex = /(.*?)<\/question>/g; + + const answerMatch = answerRegex.exec(xml); + const followUpQuestionsMatch = followUpQuestionsRegex.exec(xml); + + if (!answerMatch) { + throw new Error('Invalid XML: Missing tag.'); + } + + const rawTextContent = answerMatch[1].trim(); + const textContentWithCitations = rawTextContent.replace(citationRegex, ''); + const textContent = textContentWithCitations.replace(followUpQuestionsRegex, '').trim(); + + let citations: Citation[] = []; + let match: RegExpExecArray | null; + + let plainTextOffset = 0; + let citationOffset = 0; + + while ((match = citationRegex.exec(rawTextContent)) !== null) { + const [fullMatch, chunk_id, type, direct_text] = match; + const citationStartIndex = match.index; + const citationPlainStart = citationStartIndex - citationOffset; + + citations.push({ + direct_text: direct_text.trim(), + type: getChunkType(type), + chunk_id: chunk_id, + location: citationPlainStart, + }); + + citationOffset += fullMatch.length; + } + + let followUpQuestions: string[] = []; + if (followUpQuestionsMatch) { + const questionsText = followUpQuestionsMatch[1]; + let questionMatch: RegExpExecArray | null; + + while ((questionMatch = questionRegex.exec(questionsText)) !== null) { + followUpQuestions.push(questionMatch[1].trim()); + } + } + const assistantResponse: AssistantMessage = { + role: ASSISTANT_ROLE.ASSISTANT, + text_content: textContent, + follow_up_questions: followUpQuestions, + citations: citations, + }; + + return assistantResponse; + } +} diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 9b2a92564..bae6bbaa6 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -12,7 +12,7 @@ import { ViewBoxAnnotatableComponent } from '../../DocComponent'; import { FieldView, FieldViewProps } from '../FieldView'; import './ChatBox.scss'; import MessageComponentBox from './MessageComponent'; -import { ASSISTANT_ROLE, AssistantMessage, AI_Document, convertToAIDocument, Citation } from './types'; +import { ASSISTANT_ROLE, AssistantMessage, AI_Document, convertToAIDocument, Citation, CHUNK_TYPE } from './types'; import { Vectorstore } from './vectorstore/VectorstoreUpload'; import { CollectionFreeFormDocumentView } from '../CollectionFreeFormDocumentView'; import { CollectionFreeFormView } from '../../collections/collectionFreeForm'; @@ -20,6 +20,7 @@ import { Agent } from './Agent'; import dotenv from 'dotenv'; import { DocData } from '../../../../fields/DocSymbols'; import { DocumentView } from '../DocumentView'; +import { AnswerParser } from './AnswerParser'; dotenv.config(); @observer @@ -47,14 +48,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { constructor(props: FieldViewProps) { super(props); makeObservable(this); - this.openai = this.initializeOpenAI(); - this.history = [{ role: ASSISTANT_ROLE.ASSISTANT, text: 'Welcome to the Document Analyser Assistant! Link a document or ask questions to get started.' }]; + this.history = [{ role: ASSISTANT_ROLE.ASSISTANT, text_content: 'Welcome to the Document Analyser Assistant! Link a document or ask questions to get started.' }]; this.openai = this.initializeOpenAI(); this.vectorstore = new Vectorstore(); this.agent = new Agent(this.vectorstore); // Initialize the Agent reaction( - () => this.history.map((msg: AssistantMessage) => ({ role: msg.role, text: msg.text, follow_up_questions: msg.follow_up_questions, citations: msg.citations })), + () => this.history.map((msg: AssistantMessage) => ({ role: msg.role, text_content: msg.text_content, follow_up_questions: msg.follow_up_questions, citations: msg.citations })), serializableHistory => { this.dataDoc.data = JSON.stringify(serializableHistory); } @@ -110,19 +110,19 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { try { textInput.value = ''; runInAction(() => { - this.history.push({ role: ASSISTANT_ROLE.USER, text: trimmedText }); + this.history.push({ role: ASSISTANT_ROLE.USER, text_content: trimmedText }); this.isLoading = true; }); const response = await this.agent.askAgent(trimmedText); // Use the chatbot to get the response runInAction(() => { - this.history.push(this.parseAssistantResponse(response)); + this.history.push(AnswerParser.parse(response)); }); this.dataDoc.data = JSON.stringify(this.history); } catch (err) { console.error('Error:', err); runInAction(() => { - this.history.push({ role: ASSISTANT_ROLE.ASSISTANT, text: 'Sorry, I encountered an error while processing your request.' }); + this.history.push({ role: ASSISTANT_ROLE.ASSISTANT, text_content: 'Sorry, I encountered an error while processing your request.' }); }); } finally { runInAction(() => { @@ -132,35 +132,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } }; - parseAssistantResponse(response: string): AssistantMessage { - const parser = new DOMParser(); - const xmlDoc = parser.parseFromString(response, 'text/xml'); - const answerElement = xmlDoc.querySelector('answer'); - const followUpQuestionsElement = xmlDoc.querySelector('follow_up_questions'); - - let text = ''; - let followUpQuestions: string[] = []; - - if (answerElement) { - // Remove the follow_up_questions element from the answer - const followUpElement = answerElement.querySelector('follow_up_questions'); - if (followUpElement) { - followUpElement.remove(); - } - text = answerElement.innerHTML.trim(); - } - - if (followUpQuestionsElement) { - followUpQuestions = Array.from(followUpQuestionsElement.querySelectorAll('question')).map(q => q.textContent || ''); - } - - return { - role: ASSISTANT_ROLE.ASSISTANT, - text, - follow_up_questions: followUpQuestions, - }; - } - @action updateMessageCitations = (index: number, citations: Citation[]) => { if (this.history[index]) { @@ -219,7 +190,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.history.push( ...storedHistory.map((msg: AssistantMessage) => ({ role: msg.role, - text: msg.text, + text_content: msg.text_content, follow_up_questions: msg.follow_up_questions, citations: msg.citations, })) @@ -246,6 +217,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { if ((change as any).addedCount > 0) { // maybe check here if its already in the urls datadoc array so doesn't add twice console.log((change as any).added as Doc[]); + console.log('here!'); this.addDocsToVectorstore((change as any).added as Doc[]); } // (change as any).removed.forEach((link: any) => remLinkFromDoc(toRealField(link))); @@ -265,7 +237,11 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @computed get visibleDocs() { - return (CollectionFreeFormDocumentView.from(this._props.DocumentView?.())?._props.parent as CollectionFreeFormView)?.childDocs.filter(doc => doc != this.Document) ?? []; + //return (CollectionFreeFormDocumentView.from(this._props.DocumentView?.())?._props.parent as CollectionFreeFormView)?.childDocs.filter(doc => doc != this.Document) ?? []; + return LinkManager.Instance.getAllRelatedLinks(this.Document) + .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) + .map(d => DocCast(d?.annotationOn, d)) + .filter(d => d); } @action diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx index 38faf7e00..76faff10b 100644 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ b/src/client/views/nodes/ChatBox/MessageComponent.tsx @@ -1,9 +1,6 @@ import React from 'react'; import { observer } from 'mobx-react'; -import { AssistantMessage, CHUNK_TYPE, Citation } from './types'; -import { TbInfoCircleFilled } from 'react-icons/tb'; -import { Docs } from '../../../documents/Documents'; -import { DocumentType } from '../../../documents/DocumentTypes'; +import { AssistantMessage, Citation } from './types'; interface MessageComponentProps { message: AssistantMessage; @@ -14,59 +11,53 @@ interface MessageComponentProps { } const MessageComponentBox: React.FC = function ({ message, index, onFollowUpClick, onCitationClick, updateMessageCitations }) { - // public static LayoutString(fieldKey: string) { - // return FieldView.LayoutString(MessageComponentBox, fieldKey); - // } - - // the presentation view that renders this slide - - // @computed - // get chatBoxView() { - // return this.DocumentView?.().containerViewPath?.().lastElement()?.ComponentView as ChatBox; - // } + const renderContent = (content: string) => { + if (!message.citations || message.citations.length === 0) { + return content; + } - const renderContent = (text: string) => { - const citationRegex = /([^<]*)<\/citation>/g; const parts = []; let lastIndex = 0; - let match; - const citations: Citation[] = []; - - while ((match = citationRegex.exec(text)) !== null) { - const [fullMatch, chunkId, type, content] = match; - const citation: Citation = { chunk_id: chunkId, type: type as CHUNK_TYPE, text: content }; - citations.push(citation); - parts.push(text.slice(lastIndex, match.index)); - parts.push( - { - e.preventDefault(); - onCitationClick(citation); - }} + message.citations.forEach((citation, idx) => { + const location = citation.location; + const textBefore = content.slice(lastIndex, location); + const citationButton = ( + ); - lastIndex = match.index + fullMatch.length; - } - - parts.push(text.slice(lastIndex)); + parts.push(textBefore, citationButton); + lastIndex = location; + }); - updateMessageCitations(index, citations); + parts.push(content.slice(lastIndex)); - return <>{parts}; + return parts; }; return (
-
{renderContent(message.text)}
+
{renderContent(message.text_content)}
{message.follow_up_questions && message.follow_up_questions.length > 0 && (

Follow-up Questions:

@@ -81,20 +72,4 @@ const MessageComponentBox: React.FC = function ({ message ); }; -// Docs.Prototypes.TemplateMap.set(DocumentType.MESSAGE, { -// layout: { view: MessageComponentBox, dataField: 'data' }, -// options: { -// acl: '', -// _height: 35, -// _xMargin: 10, -// _yMargin: 10, -// _layout_nativeDimEditable: true, -// _layout_reflowVertical: true, -// _layout_reflowHorizontal: true, -// defaultDoubleClick: 'ignore', -// systemIcon: 'BsFileEarmarkTextFill', -// layout_borderRounding: '10px', -// }, -// }); - export default observer(MessageComponentBox); diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts index 0270b6256..a0d295e92 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/ChatBox/types.ts @@ -9,17 +9,31 @@ export enum CHUNK_TYPE { TABLE = 'table', } +export function getChunkType(type: string): CHUNK_TYPE { + switch (type.toLowerCase()) { + case 'text': + return CHUNK_TYPE.TEXT; + case 'image': + return CHUNK_TYPE.IMAGE; + case 'table': + return CHUNK_TYPE.TABLE; + default: + return CHUNK_TYPE.TEXT; + } +} + export interface AssistantMessage { role: ASSISTANT_ROLE; - text: string; + text_content: string; follow_up_questions?: string[]; citations?: Citation[]; } export interface Citation { - text: string; + direct_text?: string; type: CHUNK_TYPE; chunk_id: string; + location: number; } export interface Chunk { diff --git a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts index 5e8e6b23a..d3b1cb4e7 100644 --- a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts +++ b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts @@ -58,14 +58,17 @@ export class Vectorstore { visible_docs?.forEach(async doc => { await this.addAIDoc(doc); }); + return; } async addAIDoc(doc: Doc) { if (doc[DocData]?.ai_document) { this.documents.push(convertToAIDocument(JSON.parse(StrCast(doc[DocData].ai_document)))); - console.log(`Document already added: ${doc[DocData].file_name}`); + console.log(`Document already added: ${doc.file_name}`); } else { console.log(doc); + console.log(PDFCast(doc.data)?.url?.pathname); + console.log(CsvCast(doc.data)?.url?.pathname); const local_file_path: string = CsvCast(doc.data)?.url?.pathname ?? PDFCast(doc.data)?.url?.pathname; console.log('Local File Path:', local_file_path); if (local_file_path) { -- cgit v1.2.3-70-g09d2 From 97fdb44133c6aed043f84fd345d5ac57125e5405 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Mon, 15 Jul 2024 13:47:39 -0400 Subject: attempt at adding links --- package-lock.json | 12 ++ package.json | 1 + src/client/views/nodes/ChatBox/ChatBox.scss | 128 ++++++++++----------- src/client/views/nodes/ChatBox/ChatBox.tsx | 80 ++++++++----- .../views/nodes/ChatBox/MessageComponent.tsx | 2 +- .../nodes/ChatBox/vectorstore/VectorstoreUpload.ts | 22 +++- 6 files changed, 143 insertions(+), 102 deletions(-) (limited to 'src') diff --git a/package-lock.json b/package-lock.json index 713174741..79aac1ece 100644 --- a/package-lock.json +++ b/package-lock.json @@ -141,6 +141,7 @@ "lodash": "^4.17.21", "mapbox-gl": "^3.0.1", "markdown-it": "^14.1.0", + "markdown-to-jsx": "^7.4.7", "mathquill": "^0.10.1-a", "md5-file": "^5.0.0", "memorystream": "^0.3.1", @@ -25639,6 +25640,17 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/markdown-to-jsx": { + "version": "7.4.7", + "resolved": "https://registry.npmjs.org/markdown-to-jsx/-/markdown-to-jsx-7.4.7.tgz", + "integrity": "sha512-0+ls1IQZdU6cwM1yu0ZjjiVWYtkbExSyUIFU2ZeDIFuZM1W42Mh4OlJ4nb4apX4H8smxDHRdFaoIVJGwfv5hkg==", + "engines": { + "node": ">= 10" + }, + "peerDependencies": { + "react": ">= 0.14.0" + } + }, "node_modules/material-colors": { "version": "1.2.6", "resolved": "https://registry.npmjs.org/material-colors/-/material-colors-1.2.6.tgz", diff --git a/package.json b/package.json index e1895ad81..bdc049694 100644 --- a/package.json +++ b/package.json @@ -226,6 +226,7 @@ "lodash": "^4.17.21", "mapbox-gl": "^3.0.1", "markdown-it": "^14.1.0", + "markdown-to-jsx": "^7.4.7", "mathquill": "^0.10.1-a", "md5-file": "^5.0.0", "memorystream": "^0.3.1", diff --git a/src/client/views/nodes/ChatBox/ChatBox.scss b/src/client/views/nodes/ChatBox/ChatBox.scss index f1e3d3d67..75171fe56 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.scss +++ b/src/client/views/nodes/ChatBox/ChatBox.scss @@ -5,6 +5,10 @@ $button-color: #007bff; $button-hover-color: darken($button-color, 10%); $shadow-color: rgba(0, 0, 0, 0.075); $border-radius: 8px; +$citation-color: #ff6347; +$citation-hover-color: darken($citation-color, 10%); +$follow-up-bg-color: #e9ecef; +$follow-up-hover-bg-color: #dee2e6; .chatBox { display: flex; @@ -40,51 +44,41 @@ $border-radius: 8px; .messages { display: flex; flex-direction: column; + .message { - padding: 10px; + padding: 10px 15px; margin-bottom: 10px; border-radius: $border-radius; background-color: lighten($background-color, 5%); box-shadow: 0 2px 5px $shadow-color; align-items: flex-start; max-width: 90%; - width: 100%; + width: fit-content; word-break: break-word; + position: relative; - .message-footer { - width: 100%; - - .toggle-logs-button { - margin-top: 10px; - width: 95%; - text-align: center; - background-color: $button-color; - color: #fff; - border: none; - border-radius: $border-radius; - cursor: pointer; - box-shadow: 0 2px 4px $shadow-color; - &:hover { - background-color: $button-hover-color; - } - } - .tool-logs { - width: 100%; - background-color: $input-background; - color: $text-color; - margin-top: 5px; - font-family: monospace; - overflow-x: auto; - max-height: 150px; - overflow-y: auto; + .citation-button { + background-color: $citation-color; + color: #fff; + border: none; + border-radius: 50%; + cursor: pointer; + width: 20px; + height: 20px; + display: inline-flex; + align-items: center; + justify-content: center; + font-size: 12px; + font-weight: bold; + margin: 0 2px; + padding: 0; + transition: background-color 0.3s; + + &:hover { + background-color: $citation-hover-color; } } - .custom-link { - color: lightblue; - text-decoration: underline; - cursor: pointer; - } &.user { align-self: flex-end; background-color: $button-color; @@ -108,6 +102,40 @@ $border-radius: 8px; border-radius: 50%; } } + + .follow-up-questions { + margin-top: 10px; + width: 100%; + + h4 { + margin-bottom: 5px; + font-size: 14px; + } + + .follow-up-button { + background-color: $follow-up-bg-color; + border: 1px solid #ddd; + border-radius: 8px; + padding: 8px 10px; + margin: 4px 0; + cursor: pointer; + transition: background-color 0.3s; + display: block; + width: 100%; + text-align: left; + white-space: normal; + word-wrap: break-word; + font-size: 12px; + color: $text-color; + min-height: 40px; + height: auto; + line-height: 1.3; + + &:hover { + background-color: $follow-up-hover-bg-color; + } + } + } } } @@ -212,37 +240,3 @@ $border-radius: 8px; } } } - -.follow-up-questions { - margin-top: 10px; - width: 100%; - - h4 { - margin-bottom: 5px; - font-size: 14px; - } - - .follow-up-button { - background-color: #f0f0f0; - border: 1px solid #ddd; - border-radius: 8px; - padding: 8px 10px; - margin: 4px 0; - cursor: pointer; - transition: background-color 0.3s; - display: block; - width: 100%; - text-align: left; - white-space: normal; - word-wrap: break-word; - font-size: 12px; - color: $text-color; - min-height: 40px; - height: auto; // Allow the button to expand as needed - line-height: 1.3; - - &:hover { - background-color: #e0e0e0; - } - } -} diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index bae6bbaa6..4d7381a57 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -12,15 +12,20 @@ import { ViewBoxAnnotatableComponent } from '../../DocComponent'; import { FieldView, FieldViewProps } from '../FieldView'; import './ChatBox.scss'; import MessageComponentBox from './MessageComponent'; -import { ASSISTANT_ROLE, AssistantMessage, AI_Document, convertToAIDocument, Citation, CHUNK_TYPE } from './types'; +import { ASSISTANT_ROLE, AssistantMessage, AI_Document, convertToAIDocument, Citation, CHUNK_TYPE, Chunk, getChunkType } from './types'; import { Vectorstore } from './vectorstore/VectorstoreUpload'; import { CollectionFreeFormDocumentView } from '../CollectionFreeFormDocumentView'; import { CollectionFreeFormView } from '../../collections/collectionFreeForm'; import { Agent } from './Agent'; import dotenv from 'dotenv'; -import { DocData } from '../../../../fields/DocSymbols'; +import { DocData, DocViews } from '../../../../fields/DocSymbols'; import { DocumentView } from '../DocumentView'; import { AnswerParser } from './AnswerParser'; +import { DocumentManager } from '../../../util/DocumentManager'; +import { UUID } from 'bson'; +import { v4 as uuidv4 } from 'uuid'; +import { aS } from '@fullcalendar/core/internal-common'; + dotenv.config(); @observer @@ -31,11 +36,10 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @observable isLoading: boolean = false; @observable isInitializing: boolean = true; @observable expandedScratchpadIndex: number | null = null; - @observable linked_docs_to_add: Doc[] = []; @observable inputValue: string = ''; - @observable private _visibleDocs: Doc[] = []; + @observable private currently_linked: Doc[] = []; private openai: OpenAI; - // private vectorstore_id: string; + private vectorstore_id: string; private documents: AI_Document[] = []; private _oldWheel: any; private vectorstore: Vectorstore; @@ -50,9 +54,14 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { makeObservable(this); this.history = [{ role: ASSISTANT_ROLE.ASSISTANT, text_content: 'Welcome to the Document Analyser Assistant! Link a document or ask questions to get started.' }]; this.openai = this.initializeOpenAI(); - this.vectorstore = new Vectorstore(); + if (StrCast(this.dataDoc.vectorstore_id) == '') { + this.vectorstore_id = uuidv4(); + this.dataDoc.vectorstore_id = this.vectorstore_id; + } else { + this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id); + } + this.vectorstore = new Vectorstore(this.vectorstore_id); this.agent = new Agent(this.vectorstore); // Initialize the Agent - reaction( () => this.history.map((msg: AssistantMessage) => ({ role: msg.role, text_content: msg.text_content, follow_up_questions: msg.follow_up_questions, citations: msg.citations })), serializableHistory => { @@ -61,19 +70,17 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { ); } - @action - addDocsToVectorstore = async (visible_docs: Doc[]) => { - await this.vectorstore.addAIDocs(visible_docs); - this.isInitializing = false; + addDocsToVectorstore = async (linkedDocs: Doc[]) => { + await this.vectorstore.addAIDocs(linkedDocs); }; - @action - uploadNewDocument = async (newDoc: Doc) => { - const local_file_path: string = CsvCast(newDoc.data, PDFCast(newDoc.data)).url.pathname; - const { document_json } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); - this.documents.push(...document_json.map(convertToAIDocument)); - newDoc['ai_document'] = document_json; - }; + // @action + // uploadNewDocument = async (newDoc: Doc) => { + // const local_file_path: string = CsvCast(newDoc.data, PDFCast(newDoc.data)).url.pathname; + // const { document_json } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); + // this.documents.push(...document_json.map(convertToAIDocument)); + // //newDoc['ai_document'] = document_json; + // }; @action toggleToolLogs = (index: number) => { @@ -142,6 +149,24 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @action handleCitationClick = (citation: Citation) => { console.log('Citation clicked:', citation); + const currentLinkedDocs: Doc[] = this.linkedDocs; + const chunk_id = citation.chunk_id; + for (let doc of currentLinkedDocs) { + const doc_chunks: Chunk[] = JSON.parse(StrCast(doc.ai_document)).chunks; + const chunk_file_name = doc_chunks.find(chunk => chunk.id === chunk_id)?.metadata.file_path; + const doc_url = CsvCast(doc.data, PDFCast(doc.data)).url.pathname; + console.log('URL: ' + doc_url + ' Citation URL: ' + chunk_file_name); + //const ai_field_id = doc[this.Document[Id] + '_ai_field_id']; + if (chunk_file_name == doc_url) { + DocumentManager.Instance.showDocument(doc, {}, () => { + console.log(doc.data); + //look at context path for each docview and choose the doc view that has as + //its parent the same collection view the chatbox is in + const first_view = Array.from(doc[DocViews])[0]; + first_view.ComponentView?.search?.(citation.direct_text); + }); + } + } // You can implement additional functionality here, such as showing a modal with the full citation content }; @@ -183,6 +208,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { componentDidMount() { this._props.setContentViewBox?.(this); + this.currently_linked = this.linkedDocs; if (this.dataDoc.data) { try { const storedHistory = JSON.parse(StrCast(this.dataDoc.data)); @@ -201,15 +227,15 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } } reaction( - () => this.visibleDocs, - visibleDocs => { - this._visibleDocs.push(...visibleDocs.filter(visibleDoc => !this._visibleDocs.includes(visibleDoc))); + () => this.linkedDocs, + linkedDocs => { + this.currently_linked.push(...linkedDocs.filter(linkedDoc => !this.currently_linked.includes(linkedDoc))); } ); observe( // right now this skips during initialization which is necessary because it would be blank // However, it will upload the same link twice when it is - this._visibleDocs, + this.currently_linked, change => { // observe pushes/splices on a user link DB 'data' field (should only happen for local changes) switch (change.type as any) { @@ -228,15 +254,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }, true ); - runInAction(() => { - if (!this._visibleDocs.length) { - this.isInitializing = false; - } - }); + if (this.isInitializing) { + this.isInitializing = false; + } } @computed - get visibleDocs() { + get linkedDocs() { //return (CollectionFreeFormDocumentView.from(this._props.DocumentView?.())?._props.parent as CollectionFreeFormView)?.childDocs.filter(doc => doc != this.Document) ?? []; return LinkManager.Instance.getAllRelatedLinks(this.Document) .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx index 76faff10b..e18224405 100644 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ b/src/client/views/nodes/ChatBox/MessageComponent.tsx @@ -35,7 +35,7 @@ const MessageComponentBox: React.FC = function ({ message height: '20px', borderRadius: '50%', border: 'none', - background: '#007bff', + background: '#ff6347', color: 'white', fontSize: '12px', fontWeight: 'bold', diff --git a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts index d3b1cb4e7..3a889bff2 100644 --- a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts +++ b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts @@ -16,9 +16,10 @@ export class Vectorstore { private index!: Index; private cohere: CohereClient; private indexName: string = 'pdf-chatbot'; + private id: string; documents: AI_Document[] = []; - constructor() { + constructor(id: string) { const pineconeApiKey = process.env.PINECONE_API_KEY; if (!pineconeApiKey) { throw new Error('PINECONE_API_KEY is not defined.'); @@ -30,6 +31,7 @@ export class Vectorstore { this.cohere = new CohereClient({ token: process.env.COHERE_API_KEY, }); + this.id = id; this.initializeIndex(); } @@ -62,23 +64,28 @@ export class Vectorstore { } async addAIDoc(doc: Doc) { - if (doc[DocData]?.ai_document) { - this.documents.push(convertToAIDocument(JSON.parse(StrCast(doc[DocData].ai_document)))); + if (doc.ai_document) { + if (doc.ai_document === 'IN PROGRESS') { + console.log('Already in progress.'); + return; + } + this.documents.push(convertToAIDocument(JSON.parse(StrCast(doc.ai_document)))); console.log(`Document already added: ${doc.file_name}`); } else { + doc.ai_document = 'IN PROGRESS'; console.log(doc); console.log(PDFCast(doc.data)?.url?.pathname); console.log(CsvCast(doc.data)?.url?.pathname); const local_file_path: string = CsvCast(doc.data)?.url?.pathname ?? PDFCast(doc.data)?.url?.pathname; console.log('Local File Path:', local_file_path); - if (local_file_path) { + if (local_file_path !== undefined || local_file_path !== null || local_file_path !== '') { const { document_json } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); console.log('Document JSON:', document_json); const ai_document: AI_Document = convertToAIDocument(document_json); this.documents.push(ai_document); await this.indexDocument(ai_document); console.log(`Document added: ${ai_document.file_name}`); - doc[DocData].ai_document = JSON.stringify(document_json); + doc.ai_document = JSON.stringify(document_json); } } } @@ -94,7 +101,7 @@ export class Vectorstore { ({ id: chunk.id, values: chunk.values, - metadata: chunk.metadata as RecordMetadata, + metadata: { ...chunk.metadata, vectorestore_id: this.id } as RecordMetadata, }) as PineconeRecord ); await this.index.upsert(pineconeRecords); @@ -125,6 +132,9 @@ export class Vectorstore { const queryResponse: QueryResponse = await this.index.query({ vector: queryEmbedding, + filter: { + vectorstore_id: this.id, + }, topK, includeValues: true, includeMetadata: true, -- cgit v1.2.3-70-g09d2 From 5a3d5b23c927c5fb05c7eeef1e3bb91479ef896a Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Mon, 15 Jul 2024 14:00:12 -0400 Subject: attempt #2 --- src/client/views/nodes/ChatBox/ChatBox.tsx | 25 +++++++++++----------- .../nodes/ChatBox/vectorstore/VectorstoreUpload.ts | 8 +++++-- 2 files changed, 19 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 4d7381a57..4d1cd38a0 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -37,7 +37,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @observable isInitializing: boolean = true; @observable expandedScratchpadIndex: number | null = null; @observable inputValue: string = ''; - @observable private currently_linked: Doc[] = []; + @observable private linked_docs_to_add: Doc[] = []; private openai: OpenAI; private vectorstore_id: string; private documents: AI_Document[] = []; @@ -158,7 +158,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { console.log('URL: ' + doc_url + ' Citation URL: ' + chunk_file_name); //const ai_field_id = doc[this.Document[Id] + '_ai_field_id']; if (chunk_file_name == doc_url) { - DocumentManager.Instance.showDocument(doc, {}, () => { + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { console.log(doc.data); //look at context path for each docview and choose the doc view that has as //its parent the same collection view the chatbox is in @@ -208,7 +208,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { componentDidMount() { this._props.setContentViewBox?.(this); - this.currently_linked = this.linkedDocs; if (this.dataDoc.data) { try { const storedHistory = JSON.parse(StrCast(this.dataDoc.data)); @@ -227,15 +226,21 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } } reaction( - () => this.linkedDocs, - linkedDocs => { - this.currently_linked.push(...linkedDocs.filter(linkedDoc => !this.currently_linked.includes(linkedDoc))); - } + () => { + const linkedDocs = LinkManager.Instance.getAllRelatedLinks(this.Document) + .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) + .map(d => DocCast(d?.annotationOn, d)) + .filter(d => d); + return linkedDocs; + }, + + linked => this.linked_docs_to_add.push(...linked.filter(linkedDoc => !this.linked_docs_to_add.includes(linkedDoc))) ); + observe( // right now this skips during initialization which is necessary because it would be blank // However, it will upload the same link twice when it is - this.currently_linked, + this.linked_docs_to_add, change => { // observe pushes/splices on a user link DB 'data' field (should only happen for local changes) switch (change.type as any) { @@ -243,7 +248,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { if ((change as any).addedCount > 0) { // maybe check here if its already in the urls datadoc array so doesn't add twice console.log((change as any).added as Doc[]); - console.log('here!'); this.addDocsToVectorstore((change as any).added as Doc[]); } // (change as any).removed.forEach((link: any) => remLinkFromDoc(toRealField(link))); @@ -254,9 +258,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }, true ); - if (this.isInitializing) { - this.isInitializing = false; - } } @computed diff --git a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts index 3a889bff2..64b89225c 100644 --- a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts +++ b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts @@ -64,8 +64,12 @@ export class Vectorstore { } async addAIDoc(doc: Doc) { - if (doc.ai_document) { - if (doc.ai_document === 'IN PROGRESS') { + console.log('Adding AI Document:', doc); + console.log('AI Document1:', doc[DocData].ai_document); + console.log('AI Document2:', doc.ai_document); + const ai_document_string: string = StrCast(doc.ai_document); + if (ai_document_string !== undefined && ai_document_string !== null && ai_document_string !== '' && ai_document_string !== ' ' && ai_document_string !== '{}') { + if (ai_document_string === 'IN PROGRESS') { console.log('Already in progress.'); return; } -- cgit v1.2.3-70-g09d2 From 65179e8b0519aa4ccf28afc4c429262ecf7a62f3 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Tue, 16 Jul 2024 11:11:53 -0400 Subject: attempting annotations for image chunks --- src/client/views/nodes/ChatBox/Agent.ts | 32 ++---- src/client/views/nodes/ChatBox/ChatBox.tsx | 111 +++++++++++++-------- src/client/views/nodes/ChatBox/tools/RAGTool.ts | 12 +-- src/client/views/nodes/ChatBox/types.ts | 3 + .../nodes/ChatBox/vectorstore/VectorstoreUpload.ts | 41 ++++---- src/client/views/pdf/PDFViewer.tsx | 24 +++++ 6 files changed, 128 insertions(+), 95 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index fd3c6e5e8..210d3c804 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -8,6 +8,7 @@ import { RAGTool } from './tools/RAGTool'; import { Vectorstore } from './vectorstore/VectorstoreUpload'; import { ChatCompletionAssistantMessageParam, ChatCompletionMessageParam } from 'openai/resources'; import dotenv from 'dotenv'; +import { ChatBox } from './ChatBox'; dotenv.config(); export class Agent { @@ -15,41 +16,24 @@ export class Agent { private tools: Record>; private messages: AgentMessage[] = []; private interMessages: AgentMessage[] = []; - private summaries: string; + private vectorstore: Vectorstore; + private history: () => string; - constructor(private vectorstore: Vectorstore) { + constructor(_vectorstore: Vectorstore, summaries: () => string, _history: () => string) { this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); - this.summaries = this.vectorstore ? this.vectorstore.getSummaries() : 'No documents available.'; + this.vectorstore = _vectorstore; + this.history = _history; this.tools = { wikipedia: new WikipediaTool(), calculate: new CalculateTool(), - rag: new RAGTool(vectorstore, this.summaries), + rag: new RAGTool(this.vectorstore, summaries), }; } - private refreshSummaries(): void { - this.summaries = this.vectorstore ? this.vectorstore.getSummaries() : 'No documents available.'; - this.tools.rag = new RAGTool(this.vectorstore, this.summaries); - } - - private formatChatHistory(): string { - let history = '\n'; - for (const message of this.messages) { - if (message.role === 'user') { - history += `${message.content}\n`; - } else if (message.role === 'assistant') { - history += `${message.content}\n`; - } - } - history += ''; - return history; - } - async askAgent(question: string, maxTurns: number = 8): Promise { - this.refreshSummaries(); console.log(`Starting query: ${question}`); this.messages.push({ role: 'user', content: question }); - const chatHistory = this.formatChatHistory(); + const chatHistory = this.history(); console.log(`Chat history: ${chatHistory}`); const systemPrompt = getReactPrompt(Object.values(this.tools), chatHistory); console.log(`System prompt: ${systemPrompt}`); diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 4d1cd38a0..a47e9a95b 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -1,4 +1,4 @@ -import { action, computed, makeObservable, observable, observe, reaction, runInAction } from 'mobx'; +import { action, computed, makeObservable, observable, observe, reaction, runInAction, ObservableSet } from 'mobx'; import { observer } from 'mobx-react'; import OpenAI, { ClientOptions } from 'openai'; import * as React from 'react'; @@ -25,6 +25,7 @@ import { DocumentManager } from '../../../util/DocumentManager'; import { UUID } from 'bson'; import { v4 as uuidv4 } from 'uuid'; import { aS } from '@fullcalendar/core/internal-common'; +import { computeRect } from '@fullcalendar/core/internal'; dotenv.config(); @@ -34,10 +35,10 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @observable.deep current_message: AssistantMessage | undefined = undefined; @observable isLoading: boolean = false; - @observable isInitializing: boolean = true; + @observable isInitializing: boolean = false; @observable expandedScratchpadIndex: number | null = null; @observable inputValue: string = ''; - @observable private linked_docs_to_add: Doc[] = []; + @observable private linked_docs_to_add: ObservableSet = observable.set(); private openai: OpenAI; private vectorstore_id: string; private documents: AI_Document[] = []; @@ -55,13 +56,19 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.history = [{ role: ASSISTANT_ROLE.ASSISTANT, text_content: 'Welcome to the Document Analyser Assistant! Link a document or ask questions to get started.' }]; this.openai = this.initializeOpenAI(); if (StrCast(this.dataDoc.vectorstore_id) == '') { + console.log('new_id'); this.vectorstore_id = uuidv4(); this.dataDoc.vectorstore_id = this.vectorstore_id; } else { this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id); } this.vectorstore = new Vectorstore(this.vectorstore_id); - this.agent = new Agent(this.vectorstore); // Initialize the Agent + this.agent = new Agent( + this.vectorstore, + () => this.summaries, + () => this.formattedHistory + ); + reaction( () => this.history.map((msg: AssistantMessage) => ({ role: msg.role, text_content: msg.text_content, follow_up_questions: msg.follow_up_questions, citations: msg.citations })), serializableHistory => { @@ -70,8 +77,11 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { ); } - addDocsToVectorstore = async (linkedDocs: Doc[]) => { - await this.vectorstore.addAIDocs(linkedDocs); + @action + addDocToVectorstore = async (newLinkedDoc: Doc) => { + this.isInitializing = true; + await this.vectorstore.addAIDoc(newLinkedDoc); + this.isInitializing = false; }; // @action @@ -152,20 +162,18 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { const currentLinkedDocs: Doc[] = this.linkedDocs; const chunk_id = citation.chunk_id; for (let doc of currentLinkedDocs) { - const doc_chunks: Chunk[] = JSON.parse(StrCast(doc.ai_document)).chunks; - const chunk_file_name = doc_chunks.find(chunk => chunk.id === chunk_id)?.metadata.file_path; + const doc_chunk_ids: string[] = JSON.parse(StrCast(doc.chunk_ids)); + if (!doc_chunk_ids.includes(chunk_id)) continue; const doc_url = CsvCast(doc.data, PDFCast(doc.data)).url.pathname; - console.log('URL: ' + doc_url + ' Citation URL: ' + chunk_file_name); + console.log('URL: ' + doc_url); //const ai_field_id = doc[this.Document[Id] + '_ai_field_id']; - if (chunk_file_name == doc_url) { - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { - console.log(doc.data); - //look at context path for each docview and choose the doc view that has as - //its parent the same collection view the chatbox is in - const first_view = Array.from(doc[DocViews])[0]; - first_view.ComponentView?.search?.(citation.direct_text); - }); - } + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { + console.log(doc.data); + //look at context path for each docview and choose the doc view that has as + //its parent the same collection view the chatbox is in + const first_view = Array.from(doc[DocViews])[0]; + first_view.ComponentView?.search?.(citation.direct_text); + }); } // You can implement additional functionality here, such as showing a modal with the full citation content }; @@ -234,32 +242,29 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { return linkedDocs; }, - linked => this.linked_docs_to_add.push(...linked.filter(linkedDoc => !this.linked_docs_to_add.includes(linkedDoc))) + linked => linked.forEach(doc => this.linked_docs_to_add.add(doc)) ); - observe( - // right now this skips during initialization which is necessary because it would be blank - // However, it will upload the same link twice when it is - this.linked_docs_to_add, - change => { - // observe pushes/splices on a user link DB 'data' field (should only happen for local changes) - switch (change.type as any) { - case 'splice': - if ((change as any).addedCount > 0) { - // maybe check here if its already in the urls datadoc array so doesn't add twice - console.log((change as any).added as Doc[]); - this.addDocsToVectorstore((change as any).added as Doc[]); - } - // (change as any).removed.forEach((link: any) => remLinkFromDoc(toRealField(link))); - break; - case 'update': // let oldValue = change.oldValue; - default: - } - }, - true - ); + observe(this.linked_docs_to_add, change => { + if (change.type === 'add') { + this.addDocToVectorstore(change.newValue); + } else if (change.type === 'delete') { + console.log('Deleted docs: ', change.oldValue); + } + }); } + // case 'splice': + // if ((change as any).addedCount > 0) { + // // maybe check here if its already in the urls datadoc array so doesn't add twice + // console.log((change as any).added as Doc[]); + // this.addDocsToVectorstore((change as any).added as Doc[]); + // } + // // (change as any).removed.forEach((link: any) => remLinkFromDoc(toRealField(link))); + // break; + // case 'update': // let oldValue = change.oldValue; + // default: + @computed get linkedDocs() { //return (CollectionFreeFormDocumentView.from(this._props.DocumentView?.())?._props.parent as CollectionFreeFormView)?.childDocs.filter(doc => doc != this.Document) ?? []; @@ -269,6 +274,32 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { .filter(d => d); } + @computed + get summaries(): string { + return ( + LinkManager.Instance.getAllRelatedLinks(this.Document) + .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) + .map(d => DocCast(d?.annotationOn, d)) + .filter(d => d) + .map((doc, index) => `${index + 1}) ${doc.summary}`) + .join('\n') + '\n' + ); + } + + @computed + get formattedHistory(): string { + let history = '\n'; + for (const message of this.history) { + history += `<${message.role}>${message.text_content}\n`; + } + history += ''; + return history; + } + + retrieveSummaries(): string { + return this.summaries; + } + @action handleFollowUpClick = (question: string) => { console.log('Follow-up question clicked:', question); diff --git a/src/client/views/nodes/ChatBox/tools/RAGTool.ts b/src/client/views/nodes/ChatBox/tools/RAGTool.ts index 36e4bc3ce..90f7bebfe 100644 --- a/src/client/views/nodes/ChatBox/tools/RAGTool.ts +++ b/src/client/views/nodes/ChatBox/tools/RAGTool.ts @@ -6,7 +6,7 @@ import * as fs from 'fs'; export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { constructor( private vectorstore: Vectorstore, - summaries: string + summaries: () => string ) { super( 'rag', @@ -46,18 +46,10 @@ export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { !!!IMPORTANT Use the RAG tool ANYTIME the question may potentially (even if you are not sure) relate to one of the user's documents. Here are the summaries of the user's documents: - ${summaries}` + ${summaries()}` ); } - changeSummaries(summaries: string) { - this.briefSummary = `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a set of document chunks (either images or text) that can be used to provide a grounded response based on user documents. - -!!!IMPORTANT Use the RAG tool ANYTIME the question may potentially (even if you are not sure) relate to one of the user's documents. -Here are the summaries of the user's documents: -${summaries}`; - } - async execute(args: { hypothetical_document_chunk: string }): Promise { const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk); return this.getFormattedChunks(relevantChunks); diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts index a0d295e92..e510837c8 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/ChatBox/types.ts @@ -1,3 +1,6 @@ +import { Doc } from '../../../../fields/Doc'; +import { StrCast } from '../../../../fields/Types'; + export enum ASSISTANT_ROLE { USER = 'user', ASSISTANT = 'assistant', diff --git a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts index 64b89225c..b47e276e7 100644 --- a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts +++ b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts @@ -55,28 +55,17 @@ export class Vectorstore { this.index = this.pinecone.Index(this.indexName); } - async addAIDocs(visible_docs: Doc[]) { - console.log('All Docs:', visible_docs); - visible_docs?.forEach(async doc => { - await this.addAIDoc(doc); - }); - return; - } - async addAIDoc(doc: Doc) { console.log('Adding AI Document:', doc); - console.log('AI Document1:', doc[DocData].ai_document); - console.log('AI Document2:', doc.ai_document); - const ai_document_string: string = StrCast(doc.ai_document); - if (ai_document_string !== undefined && ai_document_string !== null && ai_document_string !== '' && ai_document_string !== ' ' && ai_document_string !== '{}') { - if (ai_document_string === 'IN PROGRESS') { + const ai_document_status: string = StrCast(doc.ai_document_status); + if (ai_document_status !== undefined && ai_document_status !== null && ai_document_status !== '' && ai_document_status !== ' ' && ai_document_status !== '{}') { + if (ai_document_status === 'IN PROGRESS') { console.log('Already in progress.'); return; } - this.documents.push(convertToAIDocument(JSON.parse(StrCast(doc.ai_document)))); console.log(`Document already added: ${doc.file_name}`); } else { - doc.ai_document = 'IN PROGRESS'; + doc.ai_document_status = 'PROGRESS'; console.log(doc); console.log(PDFCast(doc.data)?.url?.pathname); console.log(CsvCast(doc.data)?.url?.pathname); @@ -89,15 +78,25 @@ export class Vectorstore { this.documents.push(ai_document); await this.indexDocument(ai_document); console.log(`Document added: ${ai_document.file_name}`); - doc.ai_document = JSON.stringify(document_json); + doc.summary = ai_document.summary; + doc.ai_purpose = ai_document.purpose; + if (doc.vectorstore_id === undefined || doc.vectorstore_id === null || doc.vectorstore_id === '' || doc.vectorstore_id === '[]') { + doc.vectorstore_id = JSON.stringify([this.id]); + } else { + doc.vectorstore_id = JSON.stringify(JSON.parse(StrCast(doc.vectorstore_id)).concat([this.id])); + } + if (doc.chunk_ids === undefined || doc.chunk_ids === null || doc.chunk_ids === '' || doc.chunk_ids === '[]') { + doc.chunk_ids = JSON.stringify([]); + } + ai_document.chunks.forEach(chunk => { + console.log(doc.chunk_ids); + doc.chunk_ids = JSON.stringify(JSON.parse(StrCast(doc.chunk_ids)).concat([chunk.id])); + }); + doc.ai_document_status = 'COMPLETED'; } } } - getSummaries(): string { - return this.documents.map((doc, index) => `${index + 1}) ${doc.summary}`).join('\n') + '\n'; - } - private async indexDocument(document: AI_Document) { console.log('Uploading vectors to content namespace...'); const pineconeRecords: PineconeRecord[] = document.chunks.map( @@ -105,7 +104,7 @@ export class Vectorstore { ({ id: chunk.id, values: chunk.values, - metadata: { ...chunk.metadata, vectorestore_id: this.id } as RecordMetadata, + metadata: { ...chunk.metadata, vectorstore_id: this.id } as RecordMetadata, }) as PineconeRecord ); await this.index.upsert(pineconeRecords); diff --git a/src/client/views/pdf/PDFViewer.tsx b/src/client/views/pdf/PDFViewer.tsx index 6c1617c38..2d6ed57d5 100644 --- a/src/client/views/pdf/PDFViewer.tsx +++ b/src/client/views/pdf/PDFViewer.tsx @@ -406,6 +406,30 @@ export class PDFViewer extends ObservableReactComponent { } }; + // @action + // createMarquee = (coords: [x1: number, x2: number, y1: number, y2: number]): void => { + // // const hit = document.elementFromPoint(e.clientX, e.clientY); + // // bcz: Change. drag selecting requires that preventDefault is NOT called. This used to happen in DocumentView, + // // but that's changed, so this shouldn't be needed. + // // if (hit && hit.localName === "span" && this.annotationsActive(true)) { // drag selecting text stops propagation + // // e.button === 0 && e.stopPropagation(); + // // } + // // if alt+left click, drag and annotate + // this._downX = coords[0]; + // this._downY = coords[2]; + + // if ((this._props.Document._freeform_scale || 1) !== 1) return; + // this._props.select(false); + // MarqueeAnnotator.clearAnnotations(this._savedAnnotations); + // this.isAnnotating = true; + // this._textSelecting = false; + // // if textLayer is hit, then we select text instead of using a marquee so clear out the marquee. + // this._styleRule = addStyleSheetRule(PDFViewer._annotationStyle, 'htmlAnnotation', { 'pointer-events': 'none' }); + + // this._marqueeref.current?.onInitiateSelection([coords[0], coords[2]]); + // this._marqueeref.current?.onTerminateSelection(); + // }; + @action finishMarquee = (/* x?: number, y?: number */) => { this._getAnchor = AnchorMenu.Instance?.GetAnchor; -- cgit v1.2.3-70-g09d2 From 74666884d0680745146f4e4ca24573637ee0a391 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Tue, 16 Jul 2024 15:50:39 -0400 Subject: working much better still working on adding images thouhg --- src/client/views/nodes/ChatBox/Agent.ts | 8 +- src/client/views/nodes/ChatBox/ChatBox.tsx | 86 +++++++--- src/client/views/nodes/ChatBox/tools/RAGTool.ts | 43 ++--- src/client/views/nodes/ChatBox/types.ts | 6 + .../nodes/ChatBox/vectorstore/VectorstoreUpload.ts | 47 ++++-- src/server/ApiManagers/AssistantManager.ts | 180 +++++++++------------ 6 files changed, 198 insertions(+), 172 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index 210d3c804..bada4b146 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -17,12 +17,12 @@ export class Agent { private messages: AgentMessage[] = []; private interMessages: AgentMessage[] = []; private vectorstore: Vectorstore; - private history: () => string; + private _history: () => string; - constructor(_vectorstore: Vectorstore, summaries: () => string, _history: () => string) { + constructor(_vectorstore: Vectorstore, summaries: () => string, history: () => string) { this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); this.vectorstore = _vectorstore; - this.history = _history; + this._history = history; this.tools = { wikipedia: new WikipediaTool(), calculate: new CalculateTool(), @@ -33,7 +33,7 @@ export class Agent { async askAgent(question: string, maxTurns: number = 8): Promise { console.log(`Starting query: ${question}`); this.messages.push({ role: 'user', content: question }); - const chatHistory = this.history(); + const chatHistory = this._history(); console.log(`Chat history: ${chatHistory}`); const systemPrompt = getReactPrompt(Object.values(this.tools), chatHistory); console.log(`System prompt: ${systemPrompt}`); diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index a47e9a95b..5d0a16b4f 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -63,11 +63,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id); } this.vectorstore = new Vectorstore(this.vectorstore_id); - this.agent = new Agent( - this.vectorstore, - () => this.summaries, - () => this.formattedHistory - ); + this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory); reaction( () => this.history.map((msg: AssistantMessage) => ({ role: msg.role, text_content: msg.text_content, follow_up_questions: msg.follow_up_questions, citations: msg.citations })), @@ -162,18 +158,66 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { const currentLinkedDocs: Doc[] = this.linkedDocs; const chunk_id = citation.chunk_id; for (let doc of currentLinkedDocs) { - const doc_chunk_ids: string[] = JSON.parse(StrCast(doc.chunk_ids)); - if (!doc_chunk_ids.includes(chunk_id)) continue; - const doc_url = CsvCast(doc.data, PDFCast(doc.data)).url.pathname; - console.log('URL: ' + doc_url); - //const ai_field_id = doc[this.Document[Id] + '_ai_field_id']; - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { - console.log(doc.data); - //look at context path for each docview and choose the doc view that has as - //its parent the same collection view the chatbox is in - const first_view = Array.from(doc[DocViews])[0]; - first_view.ComponentView?.search?.(citation.direct_text); - }); + console.log(JSON.parse(StrCast(doc.chunk_simpl))); + const doc_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl)); + const text_chunks = doc_chunk_simpl.text_chunks as [{ chunk_id: string; start_page: number; end_page: number }]; + const image_chunks = doc_chunk_simpl.image_chunks as [{ chunk_id: string; location: string; page: number; page_width: number; page_height: number }]; + + const found_text_chunk = text_chunks.find(chunk => chunk.chunk_id === chunk_id); + if (found_text_chunk) { + const doc_url = CsvCast(doc.data, PDFCast(doc.data)).url.pathname; + console.log('URL: ' + doc_url); + + //const ai_field_id = doc[this.Document[Id] + '_ai_field_id']; + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { + console.log(doc.data); + //look at context path for each docview and choose the doc view that has as + //its parent the same collection view the chatbox is in + const first_view = Array.from(doc[DocViews])[0]; + first_view.ComponentView?.search?.(citation.direct_text); + }); + } + + const found_image_chunk = image_chunks.find(chunk => chunk.chunk_id === chunk_id); + if (found_image_chunk) { + const location_string: string = found_image_chunk.location; + + // Extract variables from location_string + const values = location_string.replace(/[\[\]]/g, '').split(','); + + // Ensure we have exactly 4 values + if (values.length !== 4) { + console.error('Location string must contain exactly 4 numbers'); + return; // or handle this error as appropriate + } + + const x1 = parseInt(values[0]) * (parseInt(StrCast(doc.width)) / found_image_chunk.page_width); + const y1 = parseInt(values[1]) * (parseInt(StrCast(doc.height)) / found_image_chunk.page_height); + const x2 = parseInt(values[2]) * (parseInt(StrCast(doc.width)) / found_image_chunk.page_width); + const y2 = parseInt(values[3]) * (parseInt(StrCast(doc.height)) / found_image_chunk.page_height); + + // Parse values to numbers + // const [x1, y1, x2, y2] = values.map(Number); + + // Check if any parsing resulted in NaN + if ([x1, y1, x2, y2].some(isNaN)) { + console.error('All values in location string must be valid numbers'); + return; // or handle this error as appropriate + } + + const highlight_doc = Docs.Create.FreeformDocument([], { + x: x1, + y: y1, + _width: x2 - x1, + _height: y2 - y1, + backgroundColor: 'rgba(255, 255, 0, 0.5)', + }); + + Doc.AddDocToList(doc[DocData], Doc.LayoutFieldKey(doc) + '_annotations', highlight_doc); + highlight_doc.annotationOn = doc; + Doc.SetContainer(highlight_doc, doc); + DocumentManager.Instance.showDocument(highlight_doc, { willZoomCentered: true }, () => {}); + } } // You can implement additional functionality here, such as showing a modal with the full citation content }; @@ -296,9 +340,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { return history; } - retrieveSummaries(): string { + retrieveSummaries = () => { return this.summaries; - } + }; + + retrieveFormattedHistory = () => { + return this.formattedHistory; + }; @action handleFollowUpClick = (question: string) => { diff --git a/src/client/views/nodes/ChatBox/tools/RAGTool.ts b/src/client/views/nodes/ChatBox/tools/RAGTool.ts index 90f7bebfe..0a4529974 100644 --- a/src/client/views/nodes/ChatBox/tools/RAGTool.ts +++ b/src/client/views/nodes/ChatBox/tools/RAGTool.ts @@ -2,6 +2,7 @@ import { BaseTool } from './BaseTool'; import { Vectorstore } from '../vectorstore/VectorstoreUpload'; import { Chunk } from '../types'; import * as fs from 'fs'; +import { Networking } from '../../../../Network'; export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { constructor( @@ -52,42 +53,22 @@ export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { async execute(args: { hypothetical_document_chunk: string }): Promise { const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk); - return this.getFormattedChunks(relevantChunks); + const formatted_chunks = await this.getFormattedChunks(relevantChunks); + return formatted_chunks; } - private getFormattedChunks(relevantChunks: Chunk[]): { type: string; text?: string; image_url?: { url: string } }[] { - const content: { type: string; text?: string; image_url?: { url: string } }[] = [{ type: 'text', text: '' }]; + async getFormattedChunks(relevantChunks: Chunk[]): Promise<{ type: string; text?: string; image_url?: { url: string } }[]> { + try { + const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks }); - for (const chunk of relevantChunks) { - content.push({ - type: 'text', - text: ``, - }); - - if (chunk.metadata.type === 'image' || chunk.metadata.type === 'table') { - try { - const imageBuffer = fs.readFileSync(chunk.metadata.file_path); - const base64Image = imageBuffer.toString('base64'); - if (base64Image) { - content.push({ - type: 'image_url', - image_url: { - url: `data:image/jpeg;base64,${base64Image}`, - }, - }); - } else { - console.log(`Failed to encode image for chunk ${chunk.id}`); - } - } catch (error) { - console.error(`Error reading image file for chunk ${chunk.id}:`, error); - } + if (!formattedChunks) { + throw new Error('Failed to format chunks'); } - content.push({ type: 'text', text: `${chunk.metadata.text}\n\n` }); + return formattedChunks; + } catch (error) { + console.error('Error formatting chunks:', error); + throw error; } - - content.push({ type: 'text', text: '' }); - - return content; } } diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts index e510837c8..783610d6d 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/ChatBox/types.ts @@ -50,6 +50,9 @@ export interface Chunk { location: string; start_page: number; end_page: number; + base64_data?: string; + page_width: number; + page_height: number; }; } @@ -93,6 +96,9 @@ export function convertToAIDocument(json: any): AI_Document { location: chunk.metadata.location, start_page: chunk.metadata.start_page, end_page: chunk.metadata.end_page, + base64_data: chunk.metadata.base64_data ?? undefined, + width: chunk.metadata.width ?? undefined, + height: chunk.metadata.height ?? undefined, }, })); diff --git a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts index b47e276e7..b3e3f8679 100644 --- a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts +++ b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts @@ -3,7 +3,7 @@ import { CohereClient } from 'cohere-ai'; import { EmbedResponse } from 'cohere-ai/api'; import dotenv from 'dotenv'; -import { Chunk, AI_Document, convertToAIDocument } from '../types'; +import { Chunk, AI_Document, convertToAIDocument, CHUNK_TYPE } from '../types'; import { Doc } from '../../../../../fields/Doc'; import { DocData } from '../../../../../fields/DocSymbols'; import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types'; @@ -74,24 +74,47 @@ export class Vectorstore { if (local_file_path !== undefined || local_file_path !== null || local_file_path !== '') { const { document_json } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); console.log('Document JSON:', document_json); - const ai_document: AI_Document = convertToAIDocument(document_json); - this.documents.push(ai_document); - await this.indexDocument(ai_document); - console.log(`Document added: ${ai_document.file_name}`); - doc.summary = ai_document.summary; - doc.ai_purpose = ai_document.purpose; + //const ai_document: AI_Document = convertToAIDocument(document_json); + this.documents.push(document_json); + await this.indexDocument(convertToAIDocument(document_json)); + console.log(`Document added: ${document_json.file_name}`); + doc.summary = document_json.summary; + doc.ai_purpose = document_json.purpose; if (doc.vectorstore_id === undefined || doc.vectorstore_id === null || doc.vectorstore_id === '' || doc.vectorstore_id === '[]') { doc.vectorstore_id = JSON.stringify([this.id]); } else { doc.vectorstore_id = JSON.stringify(JSON.parse(StrCast(doc.vectorstore_id)).concat([this.id])); } - if (doc.chunk_ids === undefined || doc.chunk_ids === null || doc.chunk_ids === '' || doc.chunk_ids === '[]') { - doc.chunk_ids = JSON.stringify([]); + if (doc.chunk_simpl === undefined || doc.chunk_simpl === null || doc.chunk_simpl === '' || doc.chunk_simpl === '[]') { + doc.chunk_simpl = JSON.stringify({ text_chunks: [], image_chunks: [] }); } - ai_document.chunks.forEach(chunk => { - console.log(doc.chunk_ids); - doc.chunk_ids = JSON.stringify(JSON.parse(StrCast(doc.chunk_ids)).concat([chunk.id])); + let new_chunk_simpl: { text_chunks: { chunk_id: string; start_page: number; end_page: number }[]; image_chunks: { chunk_id: string; location: string; page: number; page_width: number; page_height: number }[] } = { + text_chunks: [], + image_chunks: [], + }; + + document_json.chunks.forEach((chunk: Chunk) => { + let chunk_to_add: { chunk_id: string; start_page: number; end_page: number }[] | { chunk_id: string; location: string; page: number; page_width: number; page_height: number }[]; + switch (chunk.metadata.type) { + case CHUNK_TYPE.TEXT: + chunk_to_add = [{ chunk_id: chunk.id, start_page: chunk.metadata.start_page, end_page: chunk.metadata.end_page }]; + new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl)); + new_chunk_simpl.text_chunks = new_chunk_simpl.text_chunks.concat(chunk_to_add); + doc.chunk_simpl = JSON.stringify(new_chunk_simpl); + break; + case CHUNK_TYPE.IMAGE: + case CHUNK_TYPE.TABLE: + console.log('Location:', chunk.metadata.location); + console.log('Height:', chunk.metadata.page_height); + console.log('Width:', chunk.metadata.page_width); + chunk_to_add = [{ chunk_id: chunk.id, location: chunk.metadata.location, page: chunk.metadata.start_page, page_width: chunk.metadata.page_width, page_height: chunk.metadata.page_height }]; + new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl)); + new_chunk_simpl.image_chunks = new_chunk_simpl.image_chunks.concat(chunk_to_add); + doc.chunk_simpl = JSON.stringify(new_chunk_simpl); + break; + } }); + doc.ai_document_status = 'COMPLETED'; } } diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index d5a8ebeb3..36468157a 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -8,6 +8,7 @@ import { filesDirectory, publicDirectory } from '../SocketData'; import { Method } from '../RouteManager'; import ApiManager, { Registration } from './ApiManager'; import axios from 'axios'; +import { Chunk } from '../../client/views/nodes/ChatBox/types'; export enum Directory { parsed_files = 'parsed_files', @@ -42,112 +43,6 @@ export default class AssistantManager extends ApiManager { dangerouslyAllowBrowser: true, }); - register({ - method: Method.POST, - subscription: '/uploadPDFToVectorStore', - secureHandler: async ({ req, res }) => { - const { urls, threadID, assistantID, vector_store_id } = req.body; - - const csvFilesIds: string[] = []; - const otherFileIds: string[] = []; - const allFileIds: string[] = []; - - const fileProcesses = urls.map(async (source: string) => { - const fullPath = path.join(publicDirectory, source); - const fileData = await openai.files.create({ file: createReadStream(fullPath), purpose: 'assistants' }); - allFileIds.push(fileData.id); - if (source.endsWith('.csv')) { - console.log(source); - csvFilesIds.push(fileData.id); - } else { - openai.beta.vectorStores.files.create(vector_store_id, { file_id: fileData.id }); - otherFileIds.push(fileData.id); - } - }); - try { - await Promise.all(fileProcesses).then(() => { - res.send({ vector_store_id: vector_store_id, openai_file_ids: allFileIds }); - }); - } catch (error) { - res.status(500).send({ error: 'Failed to process files' + error }); - } - }, - }); - - register({ - method: Method.POST, - subscription: '/downloadFileFromOpenAI', - secureHandler: async ({ req, res }) => { - const { file_id, file_name } = req.body; - //let files_directory: string; - let files_directory = '/files/openAIFiles/'; - switch (file_name.split('.').pop()) { - case 'pdf': - files_directory = '/files/pdfs/'; - break; - case 'csv': - files_directory = '/files/csv/'; - break; - case 'png': - case 'jpg': - case 'jpeg': - files_directory = '/files/images/'; - break; - default: - break; - } - - const directory = path.join(publicDirectory, files_directory); - - if (!fs.existsSync(directory)) { - fs.mkdirSync(directory); - } - const file = await openai.files.content(file_id); - const new_file_name = `${uuid.v4()}-${file_name}`; - const file_path = path.join(directory, new_file_name); - const file_array_buffer = await file.arrayBuffer(); - const bufferView = new Uint8Array(file_array_buffer); - try { - const written_file = await writeFileAsync(file_path, bufferView); - console.log(written_file); - console.log(file_path); - console.log(file_array_buffer); - console.log(bufferView); - const file_object = new File([bufferView], file_name); - //DashUploadUtils.upload(file_object, 'openAIFiles'); - res.send({ file_path: path.join(files_directory, new_file_name) }); - /* res.send( { - source: "file", - result: { - accessPaths: { - agnostic: {client: path.join('/files/openAIFiles/', `${uuid.v4()}-${file_name}`)} - }, - rawText: "", - duration: 0, - }, - } ); */ - } catch (error) { - res.status(500).send({ error: 'Failed to write file' + error }); - } - }, - }); - - register({ - method: Method.POST, - subscription: '/askAgent', - secureHandler: async ({ req, res }) => { - const { input } = req.body; - - try { - const response = await axios.post('http://localhost:8080/ask', { input }); - res.send({ response: response.data.response }); - } catch (error: any) { - console.error('Error communicating with chatbot:', error); - res.status(500).send({ error: 'Failed to communicate with the chatbot', details: error.message }); - } - }, - }); - register({ method: Method.POST, subscription: '/getWikipediaSummary', @@ -212,6 +107,37 @@ export default class AssistantManager extends ApiManager { } } + if (result.chunks && Array.isArray(result.chunks)) { + for (const chunk of result.chunks) { + if (chunk.metadata && (chunk.metadata.type === 'image' || chunk.metadata.type === 'table')) { + let files_directory = '/files/chunk_images/'; + const directory = path.join(publicDirectory, files_directory); + + if (!fs.existsSync(directory)) { + fs.mkdirSync(directory); + } + + const fileName = path.basename(chunk.metadata.file_path); + const filePath = path.join(directory, fileName); + + // Check if base64_data exists + if (chunk.metadata.base64_data) { + // Decode Base64 and save as file + const buffer = Buffer.from(chunk.metadata.base64_data, 'base64'); + await fs.promises.writeFile(filePath, buffer); + + // Update the file path in the chunk + chunk.metadata.file_path = path.join(files_directory, fileName); + chunk.metadata.base64_data = undefined; + } else { + console.warn(`No base64_data found for chunk: ${fileName}`); + } + } + } + } else { + console.warn("Result does not contain an iterable 'chunks' property"); + } + res.send({ document_json: result }); } catch (error: any) { console.error('Error communicating with chatbot:', error); @@ -219,5 +145,47 @@ export default class AssistantManager extends ApiManager { } }, }); + + register({ + method: Method.POST, + subscription: '/formatChunks', + secureHandler: async ({ req, res }) => { + const { relevantChunks } = req.body; + const content: { type: string; text?: string; image_url?: { url: string } }[] = [{ type: 'text', text: '' }]; + + for (const chunk of relevantChunks) { + content.push({ + type: 'text', + text: ``, + }); + + if (chunk.metadata.type === 'image' || chunk.metadata.type === 'table') { + try { + const filePath = serverPathToFile(Directory.parsed_files, chunk.metadata.file_path); + const imageBuffer = await readFileAsync(filePath); + const base64Image = imageBuffer.toString('base64'); + if (base64Image) { + content.push({ + type: 'image_url', + image_url: { + url: `data:image/jpeg;base64,${base64Image}`, + }, + }); + } else { + console.log(`Failed to encode image for chunk ${chunk.id}`); + } + } catch (error) { + console.error(`Error reading image file for chunk ${chunk.id}:`, error); + } + } + + content.push({ type: 'text', text: `${chunk.metadata.text}\n\n` }); + } + + content.push({ type: 'text', text: '' }); + + res.send({ formattedChunks: content }); + }, + }); } } -- cgit v1.2.3-70-g09d2 From 6e0dd5cf8b36e66edbced83cf5e6d4e2e272be3f Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Tue, 16 Jul 2024 16:11:19 -0400 Subject: not sure why JSON parsing isn't working --- src/client/views/nodes/ChatBox/ChatBox.tsx | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 5d0a16b4f..c7ae9a354 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -158,10 +158,12 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { const currentLinkedDocs: Doc[] = this.linkedDocs; const chunk_id = citation.chunk_id; for (let doc of currentLinkedDocs) { - console.log(JSON.parse(StrCast(doc.chunk_simpl))); - const doc_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl)); - const text_chunks = doc_chunk_simpl.text_chunks as [{ chunk_id: string; start_page: number; end_page: number }]; - const image_chunks = doc_chunk_simpl.image_chunks as [{ chunk_id: string; location: string; page: number; page_width: number; page_height: number }]; + //console.log(JSON.parse(StrCast(doc.chunk_simpl))); + console.log(JSON.stringify(StrCast(doc.chunk_simpl))); + const doc_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl) as string); + console.log(doc_chunk_simpl); + const text_chunks = doc_chunk_simpl.text_chunks as [{ chunk_id: string; start_page: number; end_page: number }] | []; + const image_chunks = doc_chunk_simpl.image_chunks as [{ chunk_id: string; location: string; page: number; page_width: number; page_height: number }] | []; const found_text_chunk = text_chunks.find(chunk => chunk.chunk_id === chunk_id); if (found_text_chunk) { -- cgit v1.2.3-70-g09d2 From 0340c24eccce3d90c03934dec14d574128fb32ef Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Wed, 17 Jul 2024 12:06:40 -0400 Subject: added image citation highlights --- src/client/views/nodes/ChatBox/AnswerParser.ts | 4 +- src/client/views/nodes/ChatBox/ChatBox.scss | 4 +- src/client/views/nodes/ChatBox/ChatBox.tsx | 128 +++++++++++---------- .../views/nodes/ChatBox/MessageComponent.tsx | 2 +- src/client/views/nodes/ChatBox/types.ts | 69 +++++------ .../nodes/ChatBox/vectorstore/VectorstoreUpload.ts | 16 ++- 6 files changed, 115 insertions(+), 108 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/AnswerParser.ts b/src/client/views/nodes/ChatBox/AnswerParser.ts index f77d2261d..1162d46b0 100644 --- a/src/client/views/nodes/ChatBox/AnswerParser.ts +++ b/src/client/views/nodes/ChatBox/AnswerParser.ts @@ -1,4 +1,5 @@ import { ASSISTANT_ROLE, AssistantMessage, Citation, getChunkType } from './types'; +import { v4 as uuid } from 'uuid'; export class AnswerParser { static parse(xml: string): AssistantMessage { @@ -33,7 +34,8 @@ export class AnswerParser { direct_text: direct_text.trim(), type: getChunkType(type), chunk_id: chunk_id, - location: citationPlainStart, + text_location: citationPlainStart, + citation_id: uuid(), }); citationOffset += fullMatch.length; diff --git a/src/client/views/nodes/ChatBox/ChatBox.scss b/src/client/views/nodes/ChatBox/ChatBox.scss index 75171fe56..e39938c4f 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.scss +++ b/src/client/views/nodes/ChatBox/ChatBox.scss @@ -174,7 +174,7 @@ $follow-up-hover-bg-color: #dee2e6; } } -.initializing-overlay { +.uploading-overlay { position: absolute; top: 0; left: 0; @@ -189,7 +189,7 @@ $follow-up-hover-bg-color: #dee2e6; z-index: 10; &::before { - content: 'Initializing...'; + content: 'Uploading Docs...'; font-weight: bold; } } diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index c7ae9a354..8b4a7bd0a 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -2,7 +2,7 @@ import { action, computed, makeObservable, observable, observe, reaction, runInA import { observer } from 'mobx-react'; import OpenAI, { ClientOptions } from 'openai'; import * as React from 'react'; -import { Doc } from '../../../../fields/Doc'; +import { Doc, DocListCast } from '../../../../fields/Doc'; import { CsvCast, DocCast, PDFCast, StrCast } from '../../../../fields/Types'; import { Networking } from '../../../Network'; import { DocumentType } from '../../../documents/DocumentTypes'; @@ -12,7 +12,7 @@ import { ViewBoxAnnotatableComponent } from '../../DocComponent'; import { FieldView, FieldViewProps } from '../FieldView'; import './ChatBox.scss'; import MessageComponentBox from './MessageComponent'; -import { ASSISTANT_ROLE, AssistantMessage, AI_Document, convertToAIDocument, Citation, CHUNK_TYPE, Chunk, getChunkType } from './types'; +import { ASSISTANT_ROLE, AssistantMessage, AI_Document, Citation, CHUNK_TYPE, Chunk, getChunkType } from './types'; import { Vectorstore } from './vectorstore/VectorstoreUpload'; import { CollectionFreeFormDocumentView } from '../CollectionFreeFormDocumentView'; import { CollectionFreeFormView } from '../../collections/collectionFreeForm'; @@ -26,6 +26,7 @@ import { UUID } from 'bson'; import { v4 as uuidv4 } from 'uuid'; import { aS } from '@fullcalendar/core/internal-common'; import { computeRect } from '@fullcalendar/core/internal'; +import { DocUtils } from '../../../documents/DocUtils'; dotenv.config(); @@ -35,7 +36,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @observable.deep current_message: AssistantMessage | undefined = undefined; @observable isLoading: boolean = false; - @observable isInitializing: boolean = false; + @observable isUploadingDocs: boolean = false; @observable expandedScratchpadIndex: number | null = null; @observable inputValue: string = ''; @observable private linked_docs_to_add: ObservableSet = observable.set(); @@ -75,9 +76,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @action addDocToVectorstore = async (newLinkedDoc: Doc) => { - this.isInitializing = true; await this.vectorstore.addAIDoc(newLinkedDoc); - this.isInitializing = false; }; // @action @@ -158,75 +157,76 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { const currentLinkedDocs: Doc[] = this.linkedDocs; const chunk_id = citation.chunk_id; for (let doc of currentLinkedDocs) { - //console.log(JSON.parse(StrCast(doc.chunk_simpl))); - console.log(JSON.stringify(StrCast(doc.chunk_simpl))); - const doc_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl) as string); - console.log(doc_chunk_simpl); - const text_chunks = doc_chunk_simpl.text_chunks as [{ chunk_id: string; start_page: number; end_page: number }] | []; - const image_chunks = doc_chunk_simpl.image_chunks as [{ chunk_id: string; location: string; page: number; page_width: number; page_height: number }] | []; - - const found_text_chunk = text_chunks.find(chunk => chunk.chunk_id === chunk_id); - if (found_text_chunk) { - const doc_url = CsvCast(doc.data, PDFCast(doc.data)).url.pathname; - console.log('URL: ' + doc_url); - - //const ai_field_id = doc[this.Document[Id] + '_ai_field_id']; - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { - console.log(doc.data); - //look at context path for each docview and choose the doc view that has as - //its parent the same collection view the chatbox is in - const first_view = Array.from(doc[DocViews])[0]; - first_view.ComponentView?.search?.(citation.direct_text); - }); - } - - const found_image_chunk = image_chunks.find(chunk => chunk.chunk_id === chunk_id); - if (found_image_chunk) { - const location_string: string = found_image_chunk.location; + if (doc.chunk_simpl) { + //console.log(JSON.parse(StrCast(doc.chunk_simpl))); + const doc_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl)); + console.log(doc_chunk_simpl); + const text_chunks = doc_chunk_simpl.text_chunks as [{ chunk_id: string; start_page: number; end_page: number }] | []; + const image_chunks = doc_chunk_simpl.image_chunks as [{ chunk_id: string; location: string; page: number }] | []; + + const found_text_chunk = text_chunks.find(chunk => chunk.chunk_id === chunk_id); + if (found_text_chunk) { + const doc_url = CsvCast(doc.data, PDFCast(doc.data)).url.pathname; + console.log('URL: ' + doc_url); + + //const ai_field_id = doc[this.Document[Id] + '_ai_field_id']; + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { + console.log(doc.data); + //look at context path for each docview and choose the doc view that has as + //its parent the same collection view the chatbox is in + const first_view = Array.from(doc[DocViews])[0]; + first_view.ComponentView?.search?.(citation.direct_text); + }); + } - // Extract variables from location_string - const values = location_string.replace(/[\[\]]/g, '').split(','); + const found_image_chunk = image_chunks.find(chunk => chunk.chunk_id === chunk_id); + if (found_image_chunk) { + const location_string: string = found_image_chunk.location; - // Ensure we have exactly 4 values - if (values.length !== 4) { - console.error('Location string must contain exactly 4 numbers'); - return; // or handle this error as appropriate - } + // Extract variables from location_string + const values = location_string.replace(/[\[\]]/g, '').split(','); - const x1 = parseInt(values[0]) * (parseInt(StrCast(doc.width)) / found_image_chunk.page_width); - const y1 = parseInt(values[1]) * (parseInt(StrCast(doc.height)) / found_image_chunk.page_height); - const x2 = parseInt(values[2]) * (parseInt(StrCast(doc.width)) / found_image_chunk.page_width); - const y2 = parseInt(values[3]) * (parseInt(StrCast(doc.height)) / found_image_chunk.page_height); + // Ensure we have exactly 4 values + if (values.length !== 4) { + console.error('Location string must contain exactly 4 numbers'); + return; // or handle this error as appropriate + } - // Parse values to numbers - // const [x1, y1, x2, y2] = values.map(Number); + const x1 = parseFloat(values[0]) * Doc.NativeWidth(doc); + const y1 = parseFloat(values[1]) * Doc.NativeHeight(doc); + const x2 = parseFloat(values[2]) * Doc.NativeWidth(doc); + const y2 = parseFloat(values[3]) * Doc.NativeHeight(doc); - // Check if any parsing resulted in NaN - if ([x1, y1, x2, y2].some(isNaN)) { - console.error('All values in location string must be valid numbers'); - return; // or handle this error as appropriate - } + const annotationKey = Doc.LayoutFieldKey(doc) + '_annotations'; - const highlight_doc = Docs.Create.FreeformDocument([], { - x: x1, - y: y1, - _width: x2 - x1, - _height: y2 - y1, - backgroundColor: 'rgba(255, 255, 0, 0.5)', - }); + const existingDoc = DocListCast(doc[DocData][annotationKey]).find(d => d.citation_id === citation.citation_id); + const highlight_doc = existingDoc ?? this.createImageCitationHighlight(x1, y1, x2, y2, citation, annotationKey, doc); - Doc.AddDocToList(doc[DocData], Doc.LayoutFieldKey(doc) + '_annotations', highlight_doc); - highlight_doc.annotationOn = doc; - Doc.SetContainer(highlight_doc, doc); - DocumentManager.Instance.showDocument(highlight_doc, { willZoomCentered: true }, () => {}); + DocumentManager.Instance.showDocument(highlight_doc, { willZoomCentered: true }, () => {}); + } } } // You can implement additional functionality here, such as showing a modal with the full citation content }; + createImageCitationHighlight = (x1: number, y1: number, x2: number, y2: number, citation: Citation, annotationKey: string, pdfDoc: Doc): Doc => { + const highlight_doc = Docs.Create.FreeformDocument([], { + x: x1, + y: y1, + _width: x2 - x1, + _height: y2 - y1, + backgroundColor: 'rgba(255, 255, 0, 0.5)', + }); + highlight_doc[DocData].citation_id = citation.citation_id; + Doc.AddDocToList(pdfDoc[DocData], annotationKey, highlight_doc); + highlight_doc.annotationOn = pdfDoc; + Doc.SetContainer(highlight_doc, pdfDoc); + return highlight_doc; + }; + // @action // uploadLinks = async (linkedDocs: Doc[]) => { - // if (this.isInitializing) { + // if (this.isUploadingDocs) { // console.log('Initialization in progress, upload aborted.'); // return; // } @@ -293,7 +293,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { observe(this.linked_docs_to_add, change => { if (change.type === 'add') { + runInAction(() => { + this.isUploadingDocs = true; + }); this.addDocToVectorstore(change.newValue); + runInAction(() => { + this.isUploadingDocs = false; + }); } else if (change.type === 'delete') { console.log('Deleted docs: ', change.oldValue); } @@ -358,7 +364,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { render() { return (
- {this.isInitializing &&
Initializing...
} + {this.isUploadingDocs &&
}
{ diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx index e18224405..9f3dee990 100644 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ b/src/client/views/nodes/ChatBox/MessageComponent.tsx @@ -20,7 +20,7 @@ const MessageComponentBox: React.FC = function ({ message let lastIndex = 0; message.citations.forEach((citation, idx) => { - const location = citation.location; + const location = citation.text_location; const textBefore = content.slice(lastIndex, location); const citationButton = ( ); - parts.push(textBefore, citationButton); + parts.push(citationButton); lastIndex = location; }); - parts.push(content.slice(lastIndex)); + parts.push({content.slice(lastIndex)}); return parts; }; diff --git a/src/client/views/nodes/ChatBox/prompts.ts b/src/client/views/nodes/ChatBox/prompts.ts index c011d65b7..d5eb99cb2 100644 --- a/src/client/views/nodes/ChatBox/prompts.ts +++ b/src/client/views/nodes/ChatBox/prompts.ts @@ -19,6 +19,8 @@ export function getReactPrompt(tools: Tool[], chatHistory: string): string { Your available actions are: ***** ${toolDescriptions} + ***** + no_tool: Use this when no external tool or action is required to answer the question. ********** Example: You will be called with: @@ -74,13 +76,59 @@ export function getReactPrompt(tools: Tool[], chatHistory: string): string { + ********** + Example: +You will be called with: +What is 2 + 2? + +You will then output: + + This is a simple arithmetic question that doesn't require any external tool. + no_tool + + +THEN PAUSE AND DO NOT OUTPUT ANYTHING. + +You will be called again with this: + + { + "no_tool": { + "name": "no_tool", + "description": "Use when no external tool or action is required", + "parameters": [] + } + } + + +You will then output: + + + + +THEN PAUSE AND DO NOT OUTPUT ANYTHING. + +You will then be called again with this: +No tool used. Proceed with answering the question. + +You then output: + + + 2 + 2 equals 4. + + What is 3 + 3? + Can you explain the concept of addition? + What is 2 * 2? + + + ********** Here is the history of your conversation with the user (all loop steps are ommitted, so it is just the user query and final answer): ${chatHistory} Use context from the past conversation if necessary. ********** If the response is inadequate, repeat the loop, either trying a different tool or changing the parameters for the action input. - + ********** + !!!IMPORTANT Only use tools when they are absolutely necessary to answer the question. If you have enough information or knowledge to answer the question without using a tool, use the "no_tool" action instead. !!!IMPORTANT When you have an Answer, Write your entire response inside an element (which itself should be inside the step element for the current step). After you finish the answer, provide an array of 3 follow-up questions inside a array. These should relate to the query and the response and should aim to help the user better understand whatever they are looking for. ********** !!!IMPORTANT Every response, provide in full parsable and valid XML with the root element being the step number (e.g. ), iterated every time you output something new. diff --git a/src/client/views/nodes/ChatBox/tools/CollectionTool.ts b/src/client/views/nodes/ChatBox/tools/CollectionTool.ts deleted file mode 100644 index e69de29bb..000000000 diff --git a/src/client/views/nodes/ChatBox/tools/CreateCollectionTool.ts b/src/client/views/nodes/ChatBox/tools/CreateCollectionTool.ts new file mode 100644 index 000000000..26ac0d7cc --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools/CreateCollectionTool.ts @@ -0,0 +1,35 @@ +import { DocCast } from '../../../../../fields/Types'; +import { DocServer } from '../../../../DocServer'; +import { Docs } from '../../../../documents/Documents'; +import { DocumentView } from '../../DocumentView'; +import { OpenWhere } from '../../OpenWhere'; +import { BaseTool } from './BaseTool'; + +export class GetDocsContentTool extends BaseTool<{ title: string; document_ids: string[] }> { + private _docView: DocumentView; + constructor(docView: DocumentView) { + super( + 'retrieveDocs', + 'Retrieves the contents of all Documents that the user is interacting with in Dash ', + { + title: { + type: 'string', + description: 'the title of the collection that you will be making', + required: 'true', + }, + }, + 'Provide a mathematical expression to calculate that would work with JavaScript eval().', + 'Runs a calculation and returns the number - uses JavaScript so be sure to use floating point syntax if necessary' + ); + this._docView = docView; + } + + async execute(args: { title: string; document_ids: string[] }): Promise { + // Note: Using eval() can be dangerous. Consider using a safer alternative. + const docs = args.document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id))); + const collection = Docs.Create.FreeformDocument(docs, { title: args.title }); + this._docView._props.addDocTab(collection, OpenWhere.addRight); //in future, create popup prompting user where to add + return [{ type: 'text', text: 'Collection created in Dash called ' + args.title }]; + } +} +//export function create_collection(docView: DocumentView, document_ids: string[], title: string): string {} diff --git a/src/client/views/nodes/ChatBox/tools/GetDocsTool.ts b/src/client/views/nodes/ChatBox/tools/GetDocsTool.ts new file mode 100644 index 000000000..f970ca8ee --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools/GetDocsTool.ts @@ -0,0 +1,29 @@ +import { DocCast } from '../../../../../fields/Types'; +import { DocServer } from '../../../../DocServer'; +import { Docs } from '../../../../documents/Documents'; +import { DocumentView } from '../../DocumentView'; +import { OpenWhere } from '../../OpenWhere'; +import { BaseTool } from './BaseTool'; + +export class GetDocsTool extends BaseTool<{ title: string; document_ids: string[] }> { + private _docView: DocumentView; + constructor(docView: DocumentView) { + super( + 'retrieveDocs', + 'Retrieves the contents of all Documents that the user is interacting with in Dash', + {}, + 'No need to provide anything. Just run the tool and it will retrieve the contents of all Documents that the user is interacting with in Dash.', + 'Returns the the documents in Dash in JSON form. This will include the title of the document, the location in the FreeFormDocument, and the content of the document, any applicable data fields, the layout of the document, etc.' + ); + this._docView = docView; + } + + async execute(args: { title: string; document_ids: string[] }): Promise { + // Note: Using eval() can be dangerous. Consider using a safer alternative. + const docs = args.document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id))); + const collection = Docs.Create.FreeformDocument(docs, { title: args.title }); + this._docView._props.addDocTab(collection, OpenWhere.addRight); //in future, create popup prompting user where to add + return [{ type: 'text', text: 'Collection created in Dash called ' + args.title }]; + } +} +//export function create_collection(docView: DocumentView, document_ids: string[], title: string): string {} diff --git a/src/client/views/nodes/ChatBox/tools/NoTool.ts b/src/client/views/nodes/ChatBox/tools/NoTool.ts new file mode 100644 index 000000000..1f0830a77 --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools/NoTool.ts @@ -0,0 +1,18 @@ +// tools/NoTool.ts +import { BaseTool } from './BaseTool'; + +export class NoTool extends BaseTool<{}> { + constructor() { + super( + 'no_tool', + 'Use this when no external tool or action is required to answer the question.', + {}, + 'When using the "no_tool" action, simply provide an empty element. The observation will always be "No tool used. Proceed with answering the question."', + 'Use when no external tool or action is required to answer the question.' + ); + } + + async execute(args: {}): Promise { + return [{ type: 'text', text: 'No tool used. Proceed with answering the question.' }]; + } +} -- cgit v1.2.3-70-g09d2 From ae4809cce727a056bdc648249c0f76174a496307 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Wed, 17 Jul 2024 16:06:20 -0400 Subject: new prompt --- src/client/views/nodes/ChatBox/prompts.ts | 224 ++++++++++++++++-------------- 1 file changed, 117 insertions(+), 107 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/prompts.ts b/src/client/views/nodes/ChatBox/prompts.ts index d5eb99cb2..5c353337f 100644 --- a/src/client/views/nodes/ChatBox/prompts.ts +++ b/src/client/views/nodes/ChatBox/prompts.ts @@ -6,133 +6,143 @@ export function getReactPrompt(tools: Tool[], chatHistory: string): string { const toolDescriptions = tools.map(tool => `${tool.name}:\n${tool.briefSummary}`).join('\n*****\n'); return ` - You run in a loop of Thought, Action, (PAUSE), Action Input, (PAUSE), Observation. - (this Thought/Action/PAUSE/Action Input/PAUSE/Observation can repeat N times) - Contain each stage of the loop within an XML element that specifies the stage type (e.g. content of the thought). - At the end of the loop, you output an Answer with the answer content contained within an XML element with an tag. At the end of the answer should be an array of 3 potential follow-up questions for the user to ask you next, contained within a key. - Use to describe your thoughts about the question you have been asked. - Use to specify run one of the actions available to you. - Then, you will be provided with action rules within an element that specifies how you should structure the input to the action and what the output of that action will look like - then return another element. - Then, provide within an element each parameter, with parameter names as element tags themselves with their values inside, following the structure defined in the action rules. - Observation, in an element will be the result of running those actions. - ********** - Your available actions are: - ***** - ${toolDescriptions} - ***** - no_tool: Use this when no external tool or action is required to answer the question. - ********** - Example: - You will be called with: - What is the capital of France? - - You will then output: - - I should look up France on Wikipedia - wikipedia - - - THEN PAUSE AND DO NOT OUTPUT ANYTHING. - - You will be called again with this: - - { - "wikipedia": { - "name": "wikipedia", - "description": "Search Wikipedia and return a summary", - "parameters": [ - { - "title": { - "type": "string", - "description": "The title of the Wikipedia article to search", - "required": "true" - } - } - ] - } - } - - - You will then output (back in valid XML with the parameters each being a tag): - - - France - - - - THEN PAUSE AND DO NOT OUTPUT ANYTHING. - - You will then be called again with this: - France is a country. The capital is Paris. - - You then output: - - - The capital of France is Paris - - Where in France is Paris located? - What are some major tourist attractions in Paris? - What are some other major cities in France? - - - - ********** - Example: -You will be called with: -What is 2 + 2? - -You will then output: + +You are an advanced AI assistant with access to various tools. Your task is to answer user queries accurately and efficiently. Follow these instructions meticulously: + +1. Operation Loop: + You operate in a loop of Thought, Action, Observation, and Answer. Each iteration is numbered (step1, step2, etc.). + +2. Response Structure: + a. Enclose each step in numbered XML tags: , , etc. + b. Within each step, use the following tags as needed: + - Your reasoning process + - The tool you choose to use + - Parameters for the chosen tool + - Your final response (only in the last step) + +3. Detailed Process: + a. Analyze the user's query carefully. + b. Determine if a tool is necessary or if you can answer directly. + c. If a tool is needed: + - Select the most appropriate tool. + - Use to specify the tool. + - Wait for action rules to be provided. + - Provide tool parameters in . + - Wait for the observation from the tool. + d. If no tool is needed, use the 'no_tool' action. + e. Based on observations or your knowledge, formulate your answer. + f. Provide the final answer in the tag, including follow-up questions. + +4. Available Tools: +${toolDescriptions} + no_tool: Use when no external tool is required to answer the question. + +5. Critical Rules: + - Use tools ONLY when absolutely necessary for accurate answers. + - Ensure ALL XML is valid, properly nested, and complete. + - ALWAYS pause after and tags for system processing. + - If the initial answer is inadequate, iterate through additional steps to refine it. + - Utilize context from past conversations when relevant (provided in Chat History). + - ALWAYS include your final response within a single tag. + +6. Answer Format: + Your final tag must contain: + - The complete answer to the user's query. + - An array of EXACTLY 3 follow-up questions within tags. + +7. Example Interaction: +What is the population of Tokyo, and how does it compare to New York City? + - This is a simple arithmetic question that doesn't require any external tool. - no_tool + To answer this question accurately, I need to look up the current population figures for both Tokyo and New York City. I'll use the Wikipedia tool for this information. + wikipedia -THEN PAUSE AND DO NOT OUTPUT ANYTHING. +[SYSTEM PAUSE] -You will be called again with this: { - "no_tool": { - "name": "no_tool", - "description": "Use when no external tool or action is required", - "parameters": [] + "wikipedia": { + "name": "wikipedia", + "description": "Search Wikipedia and return a summary", + "parameters": [ + { + "title": { + "type": "string", + "description": "The title of the Wikipedia article to search", + "required": "true" + } + } + ] } } -You will then output: - + + Tokyo + -THEN PAUSE AND DO NOT OUTPUT ANYTHING. +[SYSTEM PAUSE] -You will then be called again with this: -No tool used. Proceed with answering the question. +Tokyo is the capital and most populous prefecture of Japan. The Tokyo metropolitan area, which includes Tokyo and several surrounding prefectures, is the world's most populous metropolitan area with an estimated 37.468 million residents as of 2018. -You then output: + Now that I have information about Tokyo, I need to get information about New York City for comparison. + wikipedia + + +[SYSTEM PAUSE] + + + { + "wikipedia": { + "name": "wikipedia", + "description": "Search Wikipedia and return a summary", + "parameters": [ + { + "title": { + "type": "string", + "description": "The title of the Wikipedia article to search", + "required": "true" + } + } + ] + } + } + + + + + New York City + + + +[SYSTEM PAUSE] + +New York City is the most populous city in the United States. With an estimated 2020 population of 8,804,190 distributed over 300.46 square miles (778.2 km2), New York City is also the most densely populated major city in the United States. + + + Now that I have the population information for both Tokyo and New York City, I can provide a comprehensive answer comparing the two. - 2 + 2 equals 4. + The population of Tokyo metropolitan area is approximately 37.468 million (as of 2018), while New York City has a population of about 8.8 million (as of 2020). Tokyo's population is significantly larger, more than four times that of New York City. It's important to note that the Tokyo figure refers to the broader metropolitan area, while the New York City figure is for the city proper. Even accounting for this difference, Tokyo remains substantially more populous than New York City. + - What is 3 + 3? - Can you explain the concept of addition? - What is 2 * 2? + What factors contribute to Tokyo's significantly larger population compared to New York City? + How do the population densities of Tokyo and New York City compare? + What challenges do these megacities face due to their large populations? - - ********** - Here is the history of your conversation with the user (all loop steps are ommitted, so it is just the user query and final answer): - ${chatHistory} - Use context from the past conversation if necessary. - ********** - If the response is inadequate, repeat the loop, either trying a different tool or changing the parameters for the action input. - ********** - !!!IMPORTANT Only use tools when they are absolutely necessary to answer the question. If you have enough information or knowledge to answer the question without using a tool, use the "no_tool" action instead. - !!!IMPORTANT When you have an Answer, Write your entire response inside an element (which itself should be inside the step element for the current step). After you finish the answer, provide an array of 3 follow-up questions inside a array. These should relate to the query and the response and should aim to help the user better understand whatever they are looking for. - ********** - !!!IMPORTANT Every response, provide in full parsable and valid XML with the root element being the step number (e.g. ), iterated every time you output something new. - `; + + +8. Chat History: +${chatHistory} + +Remember to use this history for context when appropriate. + +Now, process the user's query and provide your response following the format and rules outlined above. Ensure your final answer is comprehensive and entirely contained within a single tag. +`; } export function getSummarizedChunksPrompt(chunks: string): string { -- cgit v1.2.3-70-g09d2 From 834ca4e21fead079e681b963e2d533d93a53cb91 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Mon, 22 Jul 2024 10:49:02 -0400 Subject: fixing summaries by having them part of the ReAct prompt --- src/client/views/nodes/ChatBox/Agent.ts | 8 +- src/client/views/nodes/ChatBox/ChatBox.tsx | 80 +----- .../views/nodes/ChatBox/MessageComponent.tsx | 11 +- src/client/views/nodes/ChatBox/prompts.ts | 295 +++++++++++---------- src/client/views/nodes/ChatBox/tools/RAGTool.ts | 11 +- 5 files changed, 172 insertions(+), 233 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index a3b1d083c..d494928f9 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -19,25 +19,27 @@ export class Agent { private interMessages: AgentMessage[] = []; private vectorstore: Vectorstore; private _history: () => string; + private _summaries: () => string; constructor(_vectorstore: Vectorstore, summaries: () => string, history: () => string) { this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); this.vectorstore = _vectorstore; this._history = history; + this._summaries = summaries; this.tools = { wikipedia: new WikipediaTool(), calculate: new CalculateTool(), - rag: new RAGTool(this.vectorstore, summaries), + rag: new RAGTool(this.vectorstore), no_tool: new NoTool(), }; } - async askAgent(question: string, maxTurns: number = 8): Promise { + async askAgent(question: string, maxTurns: number = 10): Promise { console.log(`Starting query: ${question}`); this.messages.push({ role: 'user', content: question }); const chatHistory = this._history(); console.log(`Chat history: ${chatHistory}`); - const systemPrompt = getReactPrompt(Object.values(this.tools), chatHistory); + const systemPrompt = getReactPrompt(Object.values(this.tools), this._summaries, chatHistory); console.log(`System prompt: ${systemPrompt}`); this.interMessages = [{ role: 'system', content: systemPrompt }]; diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index e3a164b3e..49c9b3292 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -4,7 +4,6 @@ import OpenAI, { ClientOptions } from 'openai'; import * as React from 'react'; import { Doc, DocListCast } from '../../../../fields/Doc'; import { CsvCast, DocCast, PDFCast, StrCast } from '../../../../fields/Types'; -import { Networking } from '../../../Network'; import { DocumentType } from '../../../documents/DocumentTypes'; import { Docs } from '../../../documents/Documents'; import { LinkManager } from '../../../util/LinkManager'; @@ -14,19 +13,12 @@ import './ChatBox.scss'; import MessageComponentBox from './MessageComponent'; import { ASSISTANT_ROLE, AssistantMessage, AI_Document, Citation, CHUNK_TYPE, Chunk, getChunkType } from './types'; import { Vectorstore } from './vectorstore/VectorstoreUpload'; -import { CollectionFreeFormDocumentView } from '../CollectionFreeFormDocumentView'; -import { CollectionFreeFormView } from '../../collections/collectionFreeForm'; import { Agent } from './Agent'; import dotenv from 'dotenv'; import { DocData, DocViews } from '../../../../fields/DocSymbols'; -import { DocumentView } from '../DocumentView'; import { AnswerParser } from './AnswerParser'; import { DocumentManager } from '../../../util/DocumentManager'; -import { UUID } from 'bson'; import { v4 as uuidv4 } from 'uuid'; -import { aS } from '@fullcalendar/core/internal-common'; -import { computeRect } from '@fullcalendar/core/internal'; -import { DocUtils } from '../../../documents/DocUtils'; dotenv.config(); @@ -78,14 +70,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { await this.vectorstore.addAIDoc(newLinkedDoc); }; - // @action - // uploadNewDocument = async (newDoc: Doc) => { - // const local_file_path: string = CsvCast(newDoc.data, PDFCast(newDoc.data)).url.pathname; - // const { document_json } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); - // this.documents.push(...document_json.map(convertToAIDocument)); - // //newDoc['ai_document'] = document_json; - // }; - @action toggleToolLogs = (index: number) => { this.expandedScratchpadIndex = this.expandedScratchpadIndex === index ? null : index; @@ -105,10 +89,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } }; - // getAssistantResponse() { - // return Docs.Create.MessageDocument(text, {}); - // } - @action askGPT = async (event: React.FormEvent): Promise => { event.preventDefault(); @@ -223,42 +203,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { return highlight_doc; }; - // @action - // uploadLinks = async (linkedDocs: Doc[]) => { - // if (this.isUploadingDocs) { - // console.log('Initialization in progress, upload aborted.'); - // return; - // } - // const urls: string[] = linkedDocs.map(doc => CsvCast(doc.data, PDFCast(doc.data)).url.pathname); - // const csvUrls: string[] = urls.filter(url => url.endsWith('.csv')); - // console.log(this.assistantID, this.threadID, urls); - - // await Networking.PostToServer('/uploadPDFs', { file_path: urls[0] }); - - // // linkedDocs.forEach((doc, i) => { - // // doc[this.Document[Id] + '_ai_field_id'] = openaiFileIds[i]; - // // console.log('AI Field ID: ' + openaiFileIds[i]); - // // }); - - // // if (csvUrls.length > 0) { - // // for (let i = 0; i < csvUrls.length; i++) { - // // this.linkedCsvIDs.push(openaiFileIds[urls.indexOf(csvUrls[i])]); - // // } - // // console.log('linked csvs:' + this.linkedCsvIDs); - // // await this.openai.beta.assistants.update(this.assistantID, { - // // tools: [{ type: 'file_search' }, { type: 'code_interpreter' }], - // // tool_resources: { - // // file_search: { - // // vector_store_ids: [this.vectorStoreID], - // // }, - // // code_interpreter: { - // // file_ids: this.linkedCsvIDs, - // // }, - // // }, - // // }); - // // } - // }; - componentDidMount() { this._props.setContentViewBox?.(this); if (this.dataDoc.data) { @@ -307,20 +251,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }); } - // case 'splice': - // if ((change as any).addedCount > 0) { - // // maybe check here if its already in the urls datadoc array so doesn't add twice - // console.log((change as any).added as Doc[]); - // this.addDocsToVectorstore((change as any).added as Doc[]); - // } - // // (change as any).removed.forEach((link: any) => remLinkFromDoc(toRealField(link))); - // break; - // case 'update': // let oldValue = change.oldValue; - // default: - @computed get linkedDocs() { - //return (CollectionFreeFormDocumentView.from(this._props.DocumentView?.())?._props.parent as CollectionFreeFormView)?.childDocs.filter(doc => doc != this.Document) ?? []; return LinkManager.Instance.getAllRelatedLinks(this.Document) .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) .map(d => DocCast(d?.annotationOn, d)) @@ -334,6 +266,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) .map(d => DocCast(d?.annotationOn, d)) .filter(d => d) + .filter(d => d.summary) .map((doc, index) => `${index + 1}) ${doc.summary}`) .join('\n') + '\n' ); @@ -375,19 +308,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }}>
{this.history.map((message, index) => ( - // ))} - {this.current_message && ( - - )}
diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx index fb4a56bc3..56fde8bb2 100644 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ b/src/client/views/nodes/ChatBox/MessageComponent.tsx @@ -1,7 +1,7 @@ import React from 'react'; import { observer } from 'mobx-react'; -import ReactMarkdown from 'react-markdown'; import { AssistantMessage, Citation } from './types'; +import Markdown from 'react-markdown'; interface MessageComponentProps { message: AssistantMessage; @@ -14,7 +14,7 @@ interface MessageComponentProps { const MessageComponentBox: React.FC = function ({ message, index, onFollowUpClick, onCitationClick, updateMessageCitations }) { const renderContent = (content: string) => { if (!message.citations || message.citations.length === 0) { - return {content}; + return content; } const parts = []; @@ -23,10 +23,9 @@ const MessageComponentBox: React.FC = function ({ message message.citations.forEach((citation, idx) => { const location = citation.text_location; const textBefore = content.slice(lastIndex, location); - parts.push({textBefore}); const citationButton = ( ); - parts.push(citationButton); + parts.push(textBefore, citationButton); lastIndex = location; }); - parts.push({content.slice(lastIndex)}); + parts.push(content.slice(lastIndex)); return parts; }; diff --git a/src/client/views/nodes/ChatBox/prompts.ts b/src/client/views/nodes/ChatBox/prompts.ts index 5c353337f..4109ffc17 100644 --- a/src/client/views/nodes/ChatBox/prompts.ts +++ b/src/client/views/nodes/ChatBox/prompts.ts @@ -2,147 +2,170 @@ import { Tool } from './types'; -export function getReactPrompt(tools: Tool[], chatHistory: string): string { +export function getReactPrompt(tools: Tool[], summaries: () => string, chatHistory: string): string { const toolDescriptions = tools.map(tool => `${tool.name}:\n${tool.briefSummary}`).join('\n*****\n'); return ` - -You are an advanced AI assistant with access to various tools. Your task is to answer user queries accurately and efficiently. Follow these instructions meticulously: - -1. Operation Loop: - You operate in a loop of Thought, Action, Observation, and Answer. Each iteration is numbered (step1, step2, etc.). - -2. Response Structure: - a. Enclose each step in numbered XML tags: , , etc. - b. Within each step, use the following tags as needed: - - Your reasoning process - - The tool you choose to use - - Parameters for the chosen tool - - Your final response (only in the last step) - -3. Detailed Process: - a. Analyze the user's query carefully. - b. Determine if a tool is necessary or if you can answer directly. - c. If a tool is needed: - - Select the most appropriate tool. - - Use to specify the tool. - - Wait for action rules to be provided. - - Provide tool parameters in . - - Wait for the observation from the tool. - d. If no tool is needed, use the 'no_tool' action. - e. Based on observations or your knowledge, formulate your answer. - f. Provide the final answer in the tag, including follow-up questions. - -4. Available Tools: -${toolDescriptions} - no_tool: Use when no external tool is required to answer the question. - -5. Critical Rules: - - Use tools ONLY when absolutely necessary for accurate answers. - - Ensure ALL XML is valid, properly nested, and complete. - - ALWAYS pause after and tags for system processing. - - If the initial answer is inadequate, iterate through additional steps to refine it. - - Utilize context from past conversations when relevant (provided in Chat History). - - ALWAYS include your final response within a single tag. - -6. Answer Format: - Your final tag must contain: - - The complete answer to the user's query. - - An array of EXACTLY 3 follow-up questions within tags. - -7. Example Interaction: -What is the population of Tokyo, and how does it compare to New York City? - - - To answer this question accurately, I need to look up the current population figures for both Tokyo and New York City. I'll use the Wikipedia tool for this information. - wikipedia - - -[SYSTEM PAUSE] - - - { - "wikipedia": { - "name": "wikipedia", - "description": "Search Wikipedia and return a summary", - "parameters": [ - { - "title": { - "type": "string", - "description": "The title of the Wikipedia article to search", - "required": "true" - } + + You are an advanced AI assistant with access to various tools. Your task is to answer user queries accurately and efficiently. Follow these instructions meticulously: + + 1. Operation Loop: + You operate in a loop of Thought, Action, (STOP), *Action Rules*, Action Input, (STOP), *Observation*, and Answer. Each iteration is numbered (step1, step2, etc.). + + 2. Response Structure: + a. Enclose each step in numbered XML tags: , , etc. + b. Within each step, use the following tags as needed: + - Your reasoning process + - The tool you choose to use + - Parameters for the chosen tool + - Your final response (only in the last step) + c. Stop after and tags for system processing. You will receive a user response after each time you stop in the form of either action rules or an observation. + + 3. Detailed Process: + a. Analyze the user's query carefully. + b. Determine if a tool is necessary or if you can answer directly. + c. If a tool is needed: + c.1) Select the most appropriate tool. + c.2) Use to specify the tool. + c.3) End your response in order for action rules to be provided. + c.4) Based on the action rules, provide tool parameters in . + c.5) End your response again and wait for the observation from the tool. + d. If no tool is needed, use the 'no_tool' action but still follow the same structure. + e. Use the retrieval (RAG) tool ANYTIME the question may potentially (even if you are not sure) relate to one of the user's documents. Here are the summaries of the user's documents: + ${summaries()} + f. Based on observations or your knowledge, formulate your answer. + g. Provide the final answer in the tag, including follow-up questions. + + 4. Available Tools: + ${toolDescriptions} + no_tool: Use when no external tool is required to answer the question. If a user document may relate to the query, do not use, and instead, use the RAG tool (even if unsure). + + 5. !!!Critical Rules!!!: + - Use tools ONLY when absolutely necessary for accurate answers (except when query may relate to user documents, then use RAG tool ALWAYS to start). + - Only provide one step at a time and only write assistant steps. Do not skip steps. Do not provide multiple steps at once. Decide the step that you will provide based on previous steps taken. + - Ensure ALL XML is valid, properly nested, and complete. + - ALWAYS stop after and tags. + - If the initial answer is inadequate, iterate through additional steps to refine it. + - Utilize context from past conversations when relevant (provided in Chat History). + - ALWAYS include your final response within a single tag. + + 6. Answer Format: + Your final tag must contain: + - The complete answer to the user's query. + - An array of EXACTLY 3 follow-up questions within tags. + + 7. Example Interaction (YOU ONLY OUTPUT THE ASSISTANT STEPS): + SYSTEM: + + ***SYSTEM MESSAGE ELIDED*** + + + USER: + + What is the population of Tokyo, and how does it compare to New York City? + + + ASSISTANT: + + To answer this question accurately, I need to look up the current population figures for both Tokyo and New York City. I'll use the Wikipedia tool for this information. + wikipedia + + + USER: + + + { + "wikipedia": { + "name": "wikipedia", + "description": "Search Wikipedia and return a summary", + "parameters": [ + { + "title": { + "type": "string", + "description": "The title of the Wikipedia article to search", + "required": "true" + } + } + ] } - ] - } - } - - - - - Tokyo - - - -[SYSTEM PAUSE] - -Tokyo is the capital and most populous prefecture of Japan. The Tokyo metropolitan area, which includes Tokyo and several surrounding prefectures, is the world's most populous metropolitan area with an estimated 37.468 million residents as of 2018. - - - Now that I have information about Tokyo, I need to get information about New York City for comparison. - wikipedia - - -[SYSTEM PAUSE] - - - { - "wikipedia": { - "name": "wikipedia", - "description": "Search Wikipedia and return a summary", - "parameters": [ - { - "title": { - "type": "string", - "description": "The title of the Wikipedia article to search", - "required": "true" - } + } + + + + ASSISTANT: + + + Tokyo + + + + USER: + + Tokyo is the capital and most populous prefecture of Japan. The Tokyo metropolitan area, which includes Tokyo and several surrounding prefectures, is the world's most populous metropolitan area with an estimated 37.468 million residents as of 2018. + + + ASSISTANT: + + Now that I have information about Tokyo, I need to get information about New York City for comparison. + wikipedia + + + USER: + + + { + "wikipedia": { + "name": "wikipedia", + "description": "Search Wikipedia and return a summary", + "parameters": [ + { + "title": { + "type": "string", + "description": "The title of the Wikipedia article to search", + "required": "true" + } + } + ] } - ] - } - } - - - - - New York City - - - -[SYSTEM PAUSE] - -New York City is the most populous city in the United States. With an estimated 2020 population of 8,804,190 distributed over 300.46 square miles (778.2 km2), New York City is also the most densely populated major city in the United States. - - - Now that I have the population information for both Tokyo and New York City, I can provide a comprehensive answer comparing the two. - - The population of Tokyo metropolitan area is approximately 37.468 million (as of 2018), while New York City has a population of about 8.8 million (as of 2020). Tokyo's population is significantly larger, more than four times that of New York City. It's important to note that the Tokyo figure refers to the broader metropolitan area, while the New York City figure is for the city proper. Even accounting for this difference, Tokyo remains substantially more populous than New York City. - - - What factors contribute to Tokyo's significantly larger population compared to New York City? - How do the population densities of Tokyo and New York City compare? - What challenges do these megacities face due to their large populations? - - - - -8. Chat History: -${chatHistory} - -Remember to use this history for context when appropriate. - -Now, process the user's query and provide your response following the format and rules outlined above. Ensure your final answer is comprehensive and entirely contained within a single tag. -`; + } + + + + ASSISTANT: + + + New York City + + + + USER: + + New York City is the most populous city in the United States. With an estimated 2020 population of 8,804,190 distributed over 300.46 square miles (778.2 km2), New York City is also the most densely populated major city in the United States. + + + ASSISTANT: + + Now that I have the population information for both Tokyo and New York City, I can provide a comprehensive answer comparing the two. + + The population of Tokyo metropolitan area is approximately 37.468 million (as of 2018), while New York City has a population of about 8.8 million (as of 2020). Tokyo's population is significantly larger, more than four times that of New York City. It's important to note that the Tokyo figure refers to the broader metropolitan area, while the New York City figure is for the city proper. Even accounting for this difference, Tokyo remains substantially more populous than New York City. + + + What factors contribute to Tokyo's significantly larger population compared to New York City? + How do the population densities of Tokyo and New York City compare? + What challenges do these megacities face due to their large populations? + + + + + 8. Chat History: + ${chatHistory} + + Remember to use this history for context when appropriate. + + Now, process the user's query and provide your response following the format and rules outlined above. Ensure your final answer is comprehensive and entirely contained within a single tag. + + !!!IMPORTANT Very importantly, even if you use no tool or have an answer, follow the structure and output ONE step at a time. You will be provided with user steps and will output the appropriate single assistant step. FOLLOW THE STRUCTURE; Do not skip to the answer right away or skip steps. +`; } export function getSummarizedChunksPrompt(chunks: string): string { diff --git a/src/client/views/nodes/ChatBox/tools/RAGTool.ts b/src/client/views/nodes/ChatBox/tools/RAGTool.ts index 0a4529974..5bc31dbab 100644 --- a/src/client/views/nodes/ChatBox/tools/RAGTool.ts +++ b/src/client/views/nodes/ChatBox/tools/RAGTool.ts @@ -5,10 +5,7 @@ import * as fs from 'fs'; import { Networking } from '../../../../Network'; export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { - constructor( - private vectorstore: Vectorstore, - summaries: () => string - ) { + constructor(private vectorstore: Vectorstore) { super( 'rag', 'Perform a RAG search on user documents', @@ -43,11 +40,7 @@ export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a set of document chunks (either images or text) that can be used to provide a grounded response based on - user documents - - !!!IMPORTANT Use the RAG tool ANYTIME the question may potentially (even if you are not sure) relate to one of the user's documents. - Here are the summaries of the user's documents: - ${summaries()}` + user documents` ); } -- cgit v1.2.3-70-g09d2 From e7b7f7000534200d75f3519ffb13d5b22dbc5481 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Mon, 22 Jul 2024 12:19:27 -0400 Subject: attempting new RAG prompt and formatting of citations vs grounded text --- src/client/views/nodes/ChatBox/Agent.ts | 20 ++-- src/client/views/nodes/ChatBox/AnswerParser.ts | 84 ++++++++++++---- src/client/views/nodes/ChatBox/ChatBox.tsx | 19 ++-- .../views/nodes/ChatBox/MessageComponent.tsx | 84 ++++++++-------- src/client/views/nodes/ChatBox/tools/RAGTool.ts | 106 ++++++++++++++++++--- src/client/views/nodes/ChatBox/types.ts | 15 ++- 6 files changed, 237 insertions(+), 91 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index d494928f9..ca1b5c60c 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -43,14 +43,14 @@ export class Agent { console.log(`System prompt: ${systemPrompt}`); this.interMessages = [{ role: 'system', content: systemPrompt }]; - this.interMessages.push({ role: 'user', content: `${question}` }); + this.interMessages.push({ role: 'user', content: `${question}` }); const parser = new XMLParser(); const builder = new XMLBuilder(); let currentAction: string | undefined; - for (let i = 0; i < maxTurns; i++) { - console.log(`Turn ${i + 1}/${maxTurns}`); + for (let i = 1; i < maxTurns; i++) { + console.log(`Turn ${i}/${maxTurns}`); const result = await this.execute(); console.log(`Bot response: ${result}`); @@ -73,17 +73,20 @@ export class Agent { currentAction = step[key] as string; console.log(`Action: ${currentAction}`); if (this.tools[currentAction]) { + i++; const nextPrompt = [ { type: 'text', - text: builder.build({ action_rules: this.tools[currentAction].getActionRule() }), + text: `` + builder.build({ action_rules: this.tools[currentAction].getActionRule() }) + `<\step>`, }, ]; this.interMessages.push({ role: 'user', content: nextPrompt }); + break; } else { console.log('Error: No valid action'); - this.interMessages.push({ role: 'user', content: 'No valid action, try again.' }); + i++; + this.interMessages.push({ role: 'user', content: `No valid action, try again.` }); break; } } else if (key === 'action_input') { @@ -92,7 +95,12 @@ export class Agent { if (currentAction) { try { const observation = await this.processAction(currentAction, step[key]); - const nextPrompt = [{ type: 'text', text: '' }, ...observation, { type: 'text', text: '' }]; + // const stepElement = parsedResult.documentElement; + // const rootTagName = stepElement.tagName; + // const match = rootTagName.match(/step(\d+)/); + // const currentStep = match ? parseInt(match[1]) + 1 : 1; + i++; + const nextPrompt = [{ type: 'text', text: `` }, ...observation, { type: 'text', text: '' }]; console.log(observation); this.interMessages.push({ role: 'user', content: nextPrompt }); break; diff --git a/src/client/views/nodes/ChatBox/AnswerParser.ts b/src/client/views/nodes/ChatBox/AnswerParser.ts index 1162d46b0..4b6c817fd 100644 --- a/src/client/views/nodes/ChatBox/AnswerParser.ts +++ b/src/client/views/nodes/ChatBox/AnswerParser.ts @@ -1,12 +1,13 @@ -import { ASSISTANT_ROLE, AssistantMessage, Citation, getChunkType } from './types'; +import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getChunkType } from './types'; import { v4 as uuid } from 'uuid'; export class AnswerParser { static parse(xml: string): AssistantMessage { const answerRegex = /([\s\S]*?)<\/answer>/; - const citationRegex = /(.*?)<\/citation>/g; + const citationRegex = /([\s\S]*?)<\/citation>/g; const followUpQuestionsRegex = /([\s\S]*?)<\/follow_up_questions>/; const questionRegex = /(.*?)<\/question>/g; + const groundedTextRegex = /([\s\S]*?)<\/grounded_text>/g; const answerMatch = answerRegex.exec(xml); const followUpQuestionsMatch = followUpQuestionsRegex.exec(xml); @@ -16,45 +17,86 @@ export class AnswerParser { } const rawTextContent = answerMatch[1].trim(); - const textContentWithCitations = rawTextContent.replace(citationRegex, ''); - const textContent = textContentWithCitations.replace(followUpQuestionsRegex, '').trim(); - + let textContent: AssistantMessage['content'] = []; let citations: Citation[] = []; - let match: RegExpExecArray | null; - - let plainTextOffset = 0; - let citationOffset = 0; - - while ((match = citationRegex.exec(rawTextContent)) !== null) { - const [fullMatch, chunk_id, type, direct_text] = match; - const citationStartIndex = match.index; - const citationPlainStart = citationStartIndex - citationOffset; + let contentIndex = 0; + // Parse citations + let citationMatch; + while ((citationMatch = citationRegex.exec(rawTextContent)) !== null) { + const [_, index, chunk_id, type, direct_text] = citationMatch; citations.push({ direct_text: direct_text.trim(), type: getChunkType(type), - chunk_id: chunk_id, - text_location: citationPlainStart, + chunk_id, citation_id: uuid(), }); + } - citationOffset += fullMatch.length; + // Parse text content (normal and grounded) + let lastIndex = 0; + let matches = []; + + // Find all grounded text matches + let groundedTextMatch; + while ((groundedTextMatch = groundedTextRegex.exec(rawTextContent)) !== null) { + matches.push({ + type: 'grounded', + index: groundedTextMatch.index, + length: groundedTextMatch[0].length, + citationIndexes: groundedTextMatch[1], + text: groundedTextMatch[2], + }); + } + + // Sort matches by their index in the original text + matches.sort((a, b) => a.index - b.index); + + // Process normal and grounded text in order + for (let i = 0; i <= matches.length; i++) { + const currentMatch = matches[i]; + const nextMatchIndex = currentMatch ? currentMatch.index : rawTextContent.length; + + // Add normal text before the current grounded text (or end of content) + if (nextMatchIndex > lastIndex) { + const normalText = rawTextContent.slice(lastIndex, nextMatchIndex).trim(); + if (normalText) { + textContent.push({ + index: contentIndex++, + type: TEXT_TYPE.NORMAL, + text: normalText, + citation_ids: null, + }); + } + } + + // Add grounded text if there's a match + if (currentMatch) { + const citationIds = currentMatch.citationIndexes.split(',').map(index => citations[parseInt(index) - 1].citation_id); + textContent.push({ + index: contentIndex++, + type: TEXT_TYPE.GROUNDED, + text: currentMatch.text.trim(), + citation_ids: citationIds, + }); + lastIndex = currentMatch.index + currentMatch.length; + } } let followUpQuestions: string[] = []; if (followUpQuestionsMatch) { const questionsText = followUpQuestionsMatch[1]; - let questionMatch: RegExpExecArray | null; - + let questionMatch; while ((questionMatch = questionRegex.exec(questionsText)) !== null) { followUpQuestions.push(questionMatch[1].trim()); } } + const assistantResponse: AssistantMessage = { role: ASSISTANT_ROLE.ASSISTANT, - text_content: textContent, + content: textContent, follow_up_questions: followUpQuestions, - citations: citations, + citations, }; return assistantResponse; diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 49c9b3292..9e604073d 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -11,7 +11,7 @@ import { ViewBoxAnnotatableComponent } from '../../DocComponent'; import { FieldView, FieldViewProps } from '../FieldView'; import './ChatBox.scss'; import MessageComponentBox from './MessageComponent'; -import { ASSISTANT_ROLE, AssistantMessage, AI_Document, Citation, CHUNK_TYPE, Chunk, getChunkType } from './types'; +import { ASSISTANT_ROLE, AssistantMessage, AI_Document, Citation, CHUNK_TYPE, Chunk, getChunkType, TEXT_TYPE } from './types'; import { Vectorstore } from './vectorstore/VectorstoreUpload'; import { Agent } from './Agent'; import dotenv from 'dotenv'; @@ -58,7 +58,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory); reaction( - () => this.history.map((msg: AssistantMessage) => ({ role: msg.role, text_content: msg.text_content, follow_up_questions: msg.follow_up_questions, citations: msg.citations })), + () => this.history.map((msg: AssistantMessage) => ({ role: msg.role, content: msg.content, follow_up_questions: msg.follow_up_questions, citations: msg.citations })), serializableHistory => { this.dataDoc.data = JSON.stringify(serializableHistory); } @@ -101,7 +101,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { try { textInput.value = ''; runInAction(() => { - this.history.push({ role: ASSISTANT_ROLE.USER, text_content: trimmedText }); + this.history.push({ role: ASSISTANT_ROLE.USER, content: [{ index: 0, type: TEXT_TYPE.NORMAL, text: trimmedText, citation_ids: null }] }); this.isLoading = true; }); @@ -113,7 +113,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } catch (err) { console.error('Error:', err); runInAction(() => { - this.history.push({ role: ASSISTANT_ROLE.ASSISTANT, text_content: 'Sorry, I encountered an error while processing your request.' }); + this.history.push({ role: ASSISTANT_ROLE.USER, content: [{ index: 0, type: TEXT_TYPE.NORMAL, text: 'Sorry, I encountered an error while processing your request.', citation_ids: null }] }); }); } finally { runInAction(() => { @@ -212,7 +212,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.history.push( ...storedHistory.map((msg: AssistantMessage) => ({ role: msg.role, - text_content: msg.text_content, + content: msg.content, follow_up_questions: msg.follow_up_questions, citations: msg.citations, })) @@ -222,7 +222,12 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { console.error('Failed to parse history from dataDoc:', e); } } else { - this.history = [{ role: ASSISTANT_ROLE.ASSISTANT, text_content: 'Welcome to the Document Analyser Assistant! Link a document or ask questions to get started.' }]; + runInAction(() => { + this.history.push({ + role: ASSISTANT_ROLE.USER, + content: [{ index: 0, type: TEXT_TYPE.NORMAL, text: 'Welcome to the Document Analyser Assistant! Link a document or ask questions to get started.', citation_ids: null }], + }); + }); } reaction( () => { @@ -276,7 +281,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { get formattedHistory(): string { let history = '\n'; for (const message of this.history) { - history += `<${message.role}>${message.text_content}\n`; + history += `<${message.role}>${message.content.map(content => content.text).join(' ')}\n`; } history += ''; return history; diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx index 56fde8bb2..fd7c445c5 100644 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ b/src/client/views/nodes/ChatBox/MessageComponent.tsx @@ -1,6 +1,6 @@ import React from 'react'; import { observer } from 'mobx-react'; -import { AssistantMessage, Citation } from './types'; +import { AssistantMessage, Citation, MessageContent, TEXT_TYPE } from './types'; import Markdown from 'react-markdown'; interface MessageComponentProps { @@ -12,53 +12,51 @@ interface MessageComponentProps { } const MessageComponentBox: React.FC = function ({ message, index, onFollowUpClick, onCitationClick, updateMessageCitations }) { - const renderContent = (content: string) => { - if (!message.citations || message.citations.length === 0) { - return content; - } - - const parts = []; - let lastIndex = 0; - - message.citations.forEach((citation, idx) => { - const location = citation.text_location; - const textBefore = content.slice(lastIndex, location); - const citationButton = ( - + const renderContent = (item: MessageContent) => { + const i = item.index; + if (item.type === TEXT_TYPE.GROUNDED) { + const citation_ids = item.citation_ids || []; + return ( + + {item.text} + {citation_ids.map((id, idx) => { + const citation = message.citations?.find(c => c.citation_id === id); + if (!citation) return null; + return ( + + ); + })} + ); - parts.push(textBefore, citationButton); - lastIndex = location; - }); - - parts.push(content.slice(lastIndex)); - - return parts; + } else { + return {item.text}; + } }; return (
-
{renderContent(message.text_content)}
+
{message.content && message.content.map(messageFragment => {renderContent(messageFragment)})}
{message.follow_up_questions && message.follow_up_questions.length > 0 && (

Follow-up Questions:

diff --git a/src/client/views/nodes/ChatBox/tools/RAGTool.ts b/src/client/views/nodes/ChatBox/tools/RAGTool.ts index 5bc31dbab..4b29d6bce 100644 --- a/src/client/views/nodes/ChatBox/tools/RAGTool.ts +++ b/src/client/views/nodes/ChatBox/tools/RAGTool.ts @@ -17,26 +17,108 @@ export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { required: 'true', }, }, - `Your task is to first provide a response to the user's prompt based on the information given in the chunks and considering the chat history. Follow these steps: + ` + Your task is to provide a comprehensive response to the user's prompt based on the given chunks and chat history. Follow these structural guidelines meticulously: - 1. Carefully read and analyze the provided chunks, which may include text, images, or tables. Each chunk has an associated chunk_id. + 1. Overall Structure: + + [Main content with nested grounded_text tags] + + [Individual citation tags] + + + [Three question tags] + + - 2. Review the prompt and chat history to understand the context of the user's question or request. + 2. Grounded Text Tag Structure: + - Basic format: + + [Your generated text based on chunk information] + - 3. Formulate a response that addresses the prompt using information from the relevant chunks. Your response should be informative and directly answer the user's question or request. + - Nested format: + + [General information] + + [More specific information] + + - 4. Use citations to support your response. Citations should contain direct textual references to the granular, specific part of the original chunk that applies to the situation—with no text ommitted. Citations should be in the following format: - - For text: relevant direct text from the chunk that the citation in referencing specifically - - For images or tables: + - Multiple citation indices: + + [Information synthesized from multiple chunks] + - Place citations after the sentences they apply to. You can use multiple citations in a row. + 3. Citation Tag Structure: + + [For text: relevant subset of original chunk] + [For image/table: leave empty] + - 5. If there's insufficient information in the provided chunks to answer the prompt sufficiently, ALWAYS respond with RAG not applicable + 4. Detailed Grounded Text Guidelines: + a. Wrap all information derived from chunks in grounded_text tags. + b. Nest grounded_text tags when presenting hierarchical or increasingly specific information or when a larger section of generated text is best grounded by one subset of a chunk and smaller sections of that generated text are best grounded by other subsets of either the same or different chunk(s). + c. Use a single grounded_text tag for closely related information that references the same citation (subset of text from a chunk). + d. Combine multiple citation indices for synthesized information from multiple citations. + e. Ensure every grounded_text tag has at least one corresponding citation. + f. Grounded text can be as short as a few words or as long as several sentences. + d. Avoid overlapping grounded_text tags; instead, use nesting or sequential tags. - Write your entire response, including follow-up questions, inside tags. Remember to use the citation format for both text and image references, and maintain a conversational tone throughout your response. + 5. Detailed Citation Guidelines: + a. Create a unique citation for each distinct piece of information from the chunks that is used to support grounded_text. + b. Ensure each citation has a unique index number. + c. Specify the correct type: "text", "image", or "table". + d. For text chunks, include only the relevant subset of the original text that the grounded_text is based on. + e. For image/table chunks, leave the citation content empty. + f. One citation can be used for multiple grounded_text tags if they are based on the same information. + g. One text chunk can have multiple citations if different parts of the text have different important information. + h. !!!DO NOT OVERCITE - only include citations for information that is directly relevant to the grounded_text. - !!!IMPORTANT Before you close the tag with , within the answer tags provide a set of 3 follow-up questions inside a tag and individually within tags. These should relate to the document, the current query, and the chat_history and should aim to help the user better understand whatever they are looking for. - Also, ensure that the answer tags are wrapped with the correct step tags as well.`, + 6. Structural Integrity Checks: + a. Ensure all opening tags have corresponding closing tags. + b. Verify that all grounded_text tags have valid citation_index attributes. + c. Check that all cited indices in grounded_text tags have corresponding citations. + d. Confirm proper nesting - tags opened last should be closed first. + + Example of grounded_text usage: + + + + Artificial Intelligence (AI) is revolutionizing various sectors, with healthcare experiencing significant transformations in areas such as diagnosis and treatment planning. + + In the field of medical diagnosis, AI has shown remarkable capabilities, particularly in radiology. For instance, AI systems have drastically improved mammogram analysis, achieving 99% accuracy at a rate 30 times faster than human radiologists. + + This advancement not only enhances the efficiency of healthcare systems but also significantly reduces the occurrence of false positives, leading to fewer unnecessary biopsies and reduced patient stress. + + + + + + Beyond diagnosis, AI is playing a crucial role in drug discovery and development. By analyzing vast amounts of genetic and molecular data, AI algorithms can identify potential drug candidates much faster than traditional methods. + + This could potentially reduce the time and cost of bringing new medications to market, especially for rare diseases that have historically received less attention due to limited market potential. + + + + [... rest of the content ...] + + + Artificial Intelligence is revolutionizing various industries, with healthcare being one of the most profoundly affected sectors. + AI has shown particular promise in the field of radiology, enhancing the accuracy and speed of image analysis. + According to recent studies, AI systems have achieved 99% accuracy in mammogram analysis, performing the task 30 times faster than human radiologists. + The improvement in mammogram accuracy has led to a significant reduction in false positives, decreasing the need for unnecessary biopsies and reducing patient anxiety. + AI is accelerating the drug discovery process by analyzing complex molecular and genetic data to identify potential drug candidates. + The use of AI in drug discovery could significantly reduce the time and cost associated with bringing new medications to market, particularly for rare diseases. + + + + How might AI-driven personalized medicine impact the cost and accessibility of healthcare in the future? + What measures can be taken to ensure that AI systems in healthcare are free from biases and equally effective for diverse populations? + How could the role of healthcare professionals evolve as AI becomes more integrated into medical practices? + + + `, `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a set of document chunks (either images or text) that can be used to provide a grounded response based on diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts index d702d5c41..10c80c05a 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/ChatBox/types.ts @@ -6,6 +6,11 @@ export enum ASSISTANT_ROLE { ASSISTANT = 'assistant', } +export enum TEXT_TYPE { + NORMAL = 'normal', + GROUNDED = 'grounded', +} + export enum CHUNK_TYPE { TEXT = 'text', IMAGE = 'image', @@ -27,16 +32,22 @@ export function getChunkType(type: string): CHUNK_TYPE { export interface AssistantMessage { role: ASSISTANT_ROLE; - text_content: string; + content: MessageContent[]; follow_up_questions?: string[]; citations?: Citation[]; } +export interface MessageContent { + index: number; + type: TEXT_TYPE; + text: string; + citation_ids: string[] | null; +} + export interface Citation { direct_text?: string; type: CHUNK_TYPE; chunk_id: string; - text_location: number; citation_id: string; } -- cgit v1.2.3-70-g09d2 From 56f6637e9d9052c4bc7724d2713573ac67141631 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Mon, 22 Jul 2024 13:20:36 -0400 Subject: works decently well --- src/client/views/nodes/ChatBox/Agent.ts | 4 +- src/client/views/nodes/ChatBox/AnswerParser.ts | 70 ++++++++-------------- .../views/nodes/ChatBox/MessageComponent.tsx | 2 +- src/client/views/nodes/ChatBox/prompts.ts | 4 +- src/client/views/nodes/ChatBox/tools/RAGTool.ts | 52 ++++++---------- 5 files changed, 47 insertions(+), 85 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index ca1b5c60c..b9d137270 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -22,7 +22,7 @@ export class Agent { private _summaries: () => string; constructor(_vectorstore: Vectorstore, summaries: () => string, history: () => string) { - this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); + this.client = new OpenAI({ apiKey: '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', dangerouslyAllowBrowser: true }); this.vectorstore = _vectorstore; this._history = history; this._summaries = summaries; @@ -34,7 +34,7 @@ export class Agent { }; } - async askAgent(question: string, maxTurns: number = 10): Promise { + async askAgent(question: string, maxTurns: number = 20): Promise { console.log(`Starting query: ${question}`); this.messages.push({ role: 'user', content: question }); const chatHistory = this._history(); diff --git a/src/client/views/nodes/ChatBox/AnswerParser.ts b/src/client/views/nodes/ChatBox/AnswerParser.ts index 4b6c817fd..dd7ec3499 100644 --- a/src/client/views/nodes/ChatBox/AnswerParser.ts +++ b/src/client/views/nodes/ChatBox/AnswerParser.ts @@ -17,71 +17,49 @@ export class AnswerParser { } const rawTextContent = answerMatch[1].trim(); - let textContent: AssistantMessage['content'] = []; + let content: AssistantMessage['content'] = []; let citations: Citation[] = []; let contentIndex = 0; // Parse citations let citationMatch; + const citationMap = new Map(); while ((citationMatch = citationRegex.exec(rawTextContent)) !== null) { const [_, index, chunk_id, type, direct_text] = citationMatch; + const citation_id = uuid(); + citationMap.set(index, citation_id); citations.push({ direct_text: direct_text.trim(), type: getChunkType(type), chunk_id, - citation_id: uuid(), + citation_id, }); } - // Parse text content (normal and grounded) - let lastIndex = 0; - let matches = []; + // Parse grounded text content + const parseGroundedText = (text: string): AssistantMessage['content'] => { + const result: AssistantMessage['content'] = []; + let lastIndex = 0; + let match; - // Find all grounded text matches - let groundedTextMatch; - while ((groundedTextMatch = groundedTextRegex.exec(rawTextContent)) !== null) { - matches.push({ - type: 'grounded', - index: groundedTextMatch.index, - length: groundedTextMatch[0].length, - citationIndexes: groundedTextMatch[1], - text: groundedTextMatch[2], - }); - } - - // Sort matches by their index in the original text - matches.sort((a, b) => a.index - b.index); - - // Process normal and grounded text in order - for (let i = 0; i <= matches.length; i++) { - const currentMatch = matches[i]; - const nextMatchIndex = currentMatch ? currentMatch.index : rawTextContent.length; - - // Add normal text before the current grounded text (or end of content) - if (nextMatchIndex > lastIndex) { - const normalText = rawTextContent.slice(lastIndex, nextMatchIndex).trim(); - if (normalText) { - textContent.push({ - index: contentIndex++, - type: TEXT_TYPE.NORMAL, - text: normalText, - citation_ids: null, - }); - } - } + while ((match = groundedTextRegex.exec(text)) !== null) { + const [fullMatch, citationIndex, groundedText] = match; + const citation_ids = citationIndex.split(',').map(index => citationMap.get(index) || ''); - // Add grounded text if there's a match - if (currentMatch) { - const citationIds = currentMatch.citationIndexes.split(',').map(index => citations[parseInt(index) - 1].citation_id); - textContent.push({ + result.push({ index: contentIndex++, type: TEXT_TYPE.GROUNDED, - text: currentMatch.text.trim(), - citation_ids: citationIds, + text: groundedText.trim(), + citation_ids, }); - lastIndex = currentMatch.index + currentMatch.length; + + lastIndex = match.index + fullMatch.length; } - } + + return result; + }; + + content = parseGroundedText(rawTextContent); let followUpQuestions: string[] = []; if (followUpQuestionsMatch) { @@ -94,7 +72,7 @@ export class AnswerParser { const assistantResponse: AssistantMessage = { role: ASSISTANT_ROLE.ASSISTANT, - content: textContent, + content, follow_up_questions: followUpQuestions, citations, }; diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx index fd7c445c5..d24a55d23 100644 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ b/src/client/views/nodes/ChatBox/MessageComponent.tsx @@ -24,7 +24,7 @@ const MessageComponentBox: React.FC = function ({ message if (!citation) return null; return (
diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx index 07bfd4e3d..70b0527a2 100644 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ b/src/client/views/nodes/ChatBox/MessageComponent.tsx @@ -62,6 +62,14 @@ const MessageComponentBox: React.FC = function ({ message return (
+
+ {message.thoughts && + message.thoughts.map((thought, index) => ( + + Thought: {thought} + + ))} +
{message.content && message.content.map(messageFragment => {renderContent(messageFragment)})}
{message.follow_up_questions && message.follow_up_questions.length > 0 && (
diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts index 1c7aaa4b7..391f124e0 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/ChatBox/types.ts @@ -47,6 +47,7 @@ export interface AssistantMessage { role: ASSISTANT_ROLE; content: MessageContent[]; follow_up_questions?: string[]; + thoughts?: string[]; citations?: Citation[]; } -- cgit v1.2.3-70-g09d2 From 2c38022a7f21d4b498277b18ad31baf24ac3a143 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Sun, 18 Aug 2024 10:12:35 -0400 Subject: Attempting streaming content --- src/client/views/nodes/ChatBox/Agent.ts | 198 +++++++++++++++++---- src/client/views/nodes/ChatBox/ChatBox.tsx | 43 ++--- .../views/nodes/ChatBox/MessageComponent.tsx | 32 +++- src/client/views/nodes/ChatBox/StreamParser.ts | 125 +++++++++++++ src/client/views/nodes/ChatBox/types.ts | 1 + 5 files changed, 326 insertions(+), 73 deletions(-) create mode 100644 src/client/views/nodes/ChatBox/StreamParser.ts (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index 8bad29d9a..2c7c40e0c 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -1,5 +1,5 @@ import OpenAI from 'openai'; -import { Tool, AgentMessage } from './types'; +import { Tool, AgentMessage, AssistantMessage, TEXT_TYPE, CHUNK_TYPE, ASSISTANT_ROLE } from './types'; import { getReactPrompt } from './prompts'; import { XMLParser, XMLBuilder } from 'fast-xml-parser'; import { Vectorstore } from './vectorstore/Vectorstore'; @@ -12,6 +12,9 @@ import { WebsiteInfoScraperTool } from './tools/WebsiteInfoScraperTool'; import { SearchTool } from './tools/SearchTool'; import { NoTool } from './tools/NoTool'; import { on } from 'events'; +import { StreamParser } from './StreamParser'; +import { v4 as uuidv4 } from 'uuid'; +import { AnswerParser } from './AnswerParser'; dotenv.config(); @@ -41,7 +44,7 @@ export class Agent { }; } - async askAgent(question: string, maxTurns: number = 20, onUpdate: (update: string) => void): Promise { + async askAgent(question: string, maxTurns: number = 20, onUpdate: (update: AssistantMessage) => void): Promise { console.log(`Starting query: ${question}`); this.messages.push({ role: 'user', content: question }); const chatHistory = this._history(); @@ -53,12 +56,18 @@ export class Agent { const builder = new XMLBuilder({ ignoreAttributes: false, attributeNamePrefix: '@_' }); let currentAction: string | undefined; - let thoughtNumber = 0; + let assistantMessage: AssistantMessage = { + role: ASSISTANT_ROLE.ASSISTANT, + content: [], + thoughts: [], + actions: [], + citations: [], + }; for (let i = 2; i < maxTurns; i += 2) { console.log(`Turn ${i}/${maxTurns}`); - const result = await this.execute(onUpdate, thoughtNumber); + const result = await this.execute(assistantMessage, onUpdate); this.interMessages.push({ role: 'assistant', content: result }); let parsedResult; @@ -66,23 +75,25 @@ export class Agent { parsedResult = parser.parse(result); } catch (error) { console.log('Error: Invalid XML response from bot'); - return 'Invalid response format.'; + return assistantMessage; } const stage = parsedResult.stage; if (!stage) { console.log('Error: No stage found in response'); - return 'Invalid response format: No stage found.'; + return assistantMessage; } for (const key in stage) { - if (key === 'thought') { - console.log(`Thought: ${stage[key]}`); - thoughtNumber++; - } else if (key === 'action') { + if (!assistantMessage.actions) { + assistantMessage.actions = []; + } + if (key === 'action') { currentAction = stage[key] as string; console.log(`Action: ${currentAction}`); + assistantMessage.actions.push({ index: assistantMessage.actions.length, action: currentAction, action_input: '' }); + onUpdate({ ...assistantMessage }); if (this.tools[currentAction]) { const nextPrompt = `` + builder.build({ action_rules: this.tools[currentAction].getActionRule() }) + ``; this.interMessages.push({ role: 'user', content: nextPrompt }); @@ -93,8 +104,12 @@ export class Agent { break; } } else if (key === 'action_input') { - const actionInput = builder.build({ action_input: stage[key] }); + const actionInput = stage[key]; console.log(`Action input: ${actionInput}`); + if (currentAction && assistantMessage.actions.length > 0) { + assistantMessage.actions[assistantMessage.actions.length - 1].action_input = actionInput; + onUpdate({ ...assistantMessage }); + } if (currentAction) { try { const observation = await this.processAction(currentAction, stage[key]); @@ -104,24 +119,28 @@ export class Agent { break; } catch (error) { console.log(`Error processing action: ${error}`); - return `${error}`; + return assistantMessage; } } else { console.log('Error: Action input without a valid action'); - return 'Action input without a valid action'; + return assistantMessage; } } else if (key === 'answer') { console.log('Answer found. Ending query.'); - onUpdate(`ANSWER:${stage[key]}`); - return result; + const parsedAnswer = AnswerParser.parse(`${stage[key]}`); + assistantMessage.content = parsedAnswer.content; + assistantMessage.follow_up_questions = parsedAnswer.follow_up_questions; + assistantMessage.citations = parsedAnswer.citations; + onUpdate({ ...assistantMessage }); + return assistantMessage; } } } console.log('Reached maximum turns. Ending query.'); - return 'Reached maximum turns without finding an answer'; + return assistantMessage; } - private async execute(onUpdate: (update: string) => void, thoughtNumber: number): Promise { + private async execute(assistantMessage: AssistantMessage, onUpdate: (update: AssistantMessage) => void): Promise { const stream = await this.client.chat.completions.create({ model: 'gpt-4o', messages: this.interMessages as ChatCompletionMessageParam[], @@ -130,32 +149,147 @@ export class Agent { }); let fullResponse = ''; + let currentTag = ''; let currentContent = ''; + let isInsideTag = false; + let isInsideActionInput = false; + let actionInputContent = ''; + + if (!assistantMessage.actions) { + assistantMessage.actions = []; + } for await (const chunk of stream) { const content = chunk.choices[0]?.delta?.content || ''; fullResponse += content; - currentContent += content; + for (const char of content) { + if (char === '<') { + isInsideTag = true; + if (currentTag && currentContent) { + if (currentTag === 'action_input') { + assistantMessage.actions[assistantMessage.actions.length - 1].action_input = actionInputContent; + actionInputContent = ''; + } else { + this.processStreamedContent(currentTag, currentContent, assistantMessage); + } + onUpdate({ ...assistantMessage }); + } + currentTag = ''; + currentContent = ''; + } else if (char === '>') { + isInsideTag = false; + if (currentTag === 'action_input') { + isInsideActionInput = true; + } else if (currentTag === '/action_input') { + isInsideActionInput = false; + assistantMessage.actions[assistantMessage.actions.length - 1].action_input = actionInputContent; + actionInputContent = ''; + onUpdate({ ...assistantMessage }); + } + if (currentTag.startsWith('/')) { + currentTag = ''; + } + } else if (isInsideTag) { + currentTag += char; + } else if (isInsideActionInput) { + actionInputContent += char; + } else { + currentContent += char; + if (currentTag === 'thought' || currentTag === 'action') { + this.processStreamedContent(currentTag, currentContent, assistantMessage); + onUpdate({ ...assistantMessage }); + } + } + } + } - console.log(currentContent); + return fullResponse; + } - if (currentContent.includes('')) { - onUpdate(`THOUGHT${thoughtNumber}:${currentContent}`); - } - if (currentContent.includes('')) { - currentContent = ''; - } - if (currentContent.includes('')) { - onUpdate(`ANSWER_START:${currentContent}`); - } - if (currentContent.includes('')) { - onUpdate(`ANSWER_END:${currentContent}`); - currentContent = ''; + private processStreamedContent(tag: string, content: string, assistantMessage: AssistantMessage) { + if (!assistantMessage.thoughts) { + assistantMessage.thoughts = []; + } + if (!assistantMessage.actions) { + assistantMessage.actions = []; + } + switch (tag) { + case 'thought': + if (assistantMessage.thoughts.length > 0) { + assistantMessage.thoughts[assistantMessage.thoughts.length - 1] = content; + } else { + assistantMessage.thoughts.push(content); + } + break; + case 'action': + if (assistantMessage.actions.length > 0) { + assistantMessage.actions[assistantMessage.actions.length - 1].action = content; + } else { + assistantMessage.actions.push({ index: assistantMessage.actions.length, action: content, action_input: '' }); + } + break; + case 'action_input': + if (assistantMessage.actions.length > 0) { + assistantMessage.actions[assistantMessage.actions.length - 1].action_input = content; + } + break; + } + } + + private processAnswer(content: string, assistantMessage: AssistantMessage) { + const groundedTextRegex = /([\s\S]*?)<\/grounded_text>/g; + let lastIndex = 0; + let match; + + while ((match = groundedTextRegex.exec(content)) !== null) { + const [fullMatch, citationIndex, groundedText] = match; + + // Add normal text before the grounded text + if (match.index > lastIndex) { + const normalText = content.slice(lastIndex, match.index).trim(); + if (normalText) { + assistantMessage.content.push({ + index: assistantMessage.content.length, + type: TEXT_TYPE.NORMAL, + text: normalText, + citation_ids: null, + }); + } } + + // Add grounded text + const citation_id = uuidv4(); + assistantMessage.content.push({ + index: assistantMessage.content.length, + type: TEXT_TYPE.GROUNDED, + text: groundedText.trim(), + citation_ids: [citation_id], + }); + + // Add citation + assistantMessage.citations?.push({ + citation_id, + chunk_id: '', + type: CHUNK_TYPE.TEXT, + direct_text: '', + }); + + lastIndex = match.index + fullMatch.length; } - return fullResponse; + // Add any remaining normal text after the last grounded text + if (lastIndex < content.length) { + const remainingText = content.slice(lastIndex).trim(); + if (remainingText) { + assistantMessage.content.push({ + index: assistantMessage.content.length, + type: TEXT_TYPE.NORMAL, + text: remainingText, + citation_ids: null, + }); + } + } } private async processAction(action: string, actionInput: any): Promise { diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index d38c71810..099c0298e 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -149,45 +149,24 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { if (trimmedText) { try { - //Make everything go through the answer parser - //Pass in all the updates to the AnswrParser and it will create the assistant messasge that will be the current message including adding in the thoughts and also waiting for the asnwer and also showing tool progress textInput.value = ''; this.history.push({ role: ASSISTANT_ROLE.USER, content: [{ index: 0, type: TEXT_TYPE.NORMAL, text: trimmedText, citation_ids: null }] }); this.isLoading = true; - this.current_message = { role: ASSISTANT_ROLE.ASSISTANT, content: [], thoughts: [] }; - - let currentThought = ''; - - this.current_message?.thoughts?.push(currentThought); - - const onUpdate = (update: string) => { - const thoughtNumber = Number(update.match(/^THOUGHT(\d+):/)?.[1] ?? 0); - const regex = /\s*([\s\S]*?)(?:<\/thought>|$)/; - const match = update.match(regex); - const currentThought = match ? match[1].trim() : ''; - //const numericPrefix = Number(update.match(/^\d+/)?.[0]); - if (update.startsWith('THOUGHT')) { - console.log('Thought:', currentThought, thoughtNumber); - if (this.current_message?.thoughts) { - if (this.current_message.thoughts.length <= thoughtNumber) { - this.current_message.thoughts.push(currentThought); - } else { - this.current_message.thoughts[thoughtNumber] = currentThought; - } - } - console.log('Thoughts:', this.current_message?.thoughts); - } + this.current_message = { role: ASSISTANT_ROLE.ASSISTANT, content: [], thoughts: [], actions: [], citations: [] }; + + const onUpdate = (update: AssistantMessage) => { + runInAction(() => { + this.current_message = { ...update }; + }); }; - const response = await this.agent.askAgent(trimmedText, 20, onUpdate); - const parsedAnswer = AnswerParser.parse(response); - parsedAnswer.thoughts = this.current_message?.thoughts; + const finalMessage = await this.agent.askAgent(trimmedText, 20, onUpdate); - if (this.current_message) { - this.history.push(parsedAnswer); + runInAction(() => { + this.history.push({ ...finalMessage }); this.current_message = undefined; - } - this.dataDoc.data = JSON.stringify(this.history); + this.dataDoc.data = JSON.stringify(this.history); + }); } catch (err) { console.error('Error:', err); this.history.push({ role: ASSISTANT_ROLE.ASSISTANT, content: [{ index: 0, type: TEXT_TYPE.NORMAL, text: 'Sorry, I encountered an error while processing your request.', citation_ids: null }] }); diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx index 70b0527a2..e82dcd5f7 100644 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ b/src/client/views/nodes/ChatBox/MessageComponent.tsx @@ -55,21 +55,35 @@ const MessageComponentBox: React.FC = function ({ message {item.text} ); + } else if ('query' in item) { + // Handle the case where the item has a query property + return ( + + {JSON.stringify(item.query)} + + ); } else { - return {item.text}; + // Fallback for any other unexpected cases + return {JSON.stringify(item)}; } }; return (
-
- {message.thoughts && - message.thoughts.map((thought, index) => ( - - Thought: {thought} - - ))} -
+ {message.thoughts && + message.thoughts.map((thought, idx) => ( +
+ Thought: {thought} +
+ ))} + {message.actions && + message.actions.map((action, idx) => ( +
+ Action: {action.action} +
+ Input: {action.action_input} +
+ ))}
{message.content && message.content.map(messageFragment => {renderContent(messageFragment)})}
{message.follow_up_questions && message.follow_up_questions.length > 0 && (
diff --git a/src/client/views/nodes/ChatBox/StreamParser.ts b/src/client/views/nodes/ChatBox/StreamParser.ts new file mode 100644 index 000000000..9b087663a --- /dev/null +++ b/src/client/views/nodes/ChatBox/StreamParser.ts @@ -0,0 +1,125 @@ +import { AssistantMessage, ASSISTANT_ROLE, TEXT_TYPE, Citation, CHUNK_TYPE } from './types'; +import { v4 as uuidv4 } from 'uuid'; + +export class StreamParser { + private currentMessage: AssistantMessage; + private currentTag: string | null = null; + private buffer: string = ''; + private citationIndex: number = 1; + + constructor() { + this.currentMessage = { + role: ASSISTANT_ROLE.ASSISTANT, + content: [], + thoughts: [], + actions: [], + citations: [], + }; + } + + parse(chunk: string): AssistantMessage { + this.buffer += chunk; + + while (this.buffer.length > 0) { + if (this.currentTag === null) { + const openTagMatch = this.buffer.match(/<(\w+)>/); + if (openTagMatch) { + this.currentTag = openTagMatch[1]; + this.buffer = this.buffer.slice(openTagMatch.index! + openTagMatch[0].length); + } else { + break; + } + } else { + const closeTagIndex = this.buffer.indexOf(``); + if (closeTagIndex !== -1) { + const content = this.buffer.slice(0, closeTagIndex); + this.processTag(this.currentTag, content); + this.buffer = this.buffer.slice(closeTagIndex + this.currentTag.length + 3); + this.currentTag = null; + } else { + break; + } + } + } + + return this.currentMessage; + } + + private processTag(tag: string, content: string) { + switch (tag) { + case 'thought': + this.currentMessage.thoughts!.push(content); + break; + case 'action': + this.currentMessage.actions!.push({ index: this.currentMessage.actions!.length, action: content, action_input: '' }); + break; + case 'action_input': + if (this.currentMessage.actions!.length > 0) { + this.currentMessage.actions![this.currentMessage.actions!.length - 1].action_input = content; + } + break; + case 'answer': + this.processAnswer(content); + break; + } + } + + private processAnswer(content: string) { + const groundedTextRegex = /([\s\S]*?)<\/grounded_text>/g; + let lastIndex = 0; + let match; + + while ((match = groundedTextRegex.exec(content)) !== null) { + const [fullMatch, citationIndex, groundedText] = match; + + // Add normal text before the grounded text + if (match.index > lastIndex) { + const normalText = content.slice(lastIndex, match.index).trim(); + if (normalText) { + this.currentMessage.content.push({ + index: this.currentMessage.content.length, + type: TEXT_TYPE.NORMAL, + text: normalText, + citation_ids: null, + }); + } + } + + // Add grounded text + const citation_id = uuidv4(); + this.currentMessage.content.push({ + index: this.currentMessage.content.length, + type: TEXT_TYPE.GROUNDED, + text: groundedText.trim(), + citation_ids: [citation_id], + }); + + // Add citation + this.currentMessage.citations!.push({ + citation_id, + chunk_id: '', + type: CHUNK_TYPE.TEXT, + direct_text: '', + }); + + lastIndex = match.index + fullMatch.length; + } + + // Add any remaining normal text after the last grounded text + if (lastIndex < content.length) { + const remainingText = content.slice(lastIndex).trim(); + if (remainingText) { + this.currentMessage.content.push({ + index: this.currentMessage.content.length, + type: TEXT_TYPE.NORMAL, + text: remainingText, + citation_ids: null, + }); + } + } + } + + getResult(): AssistantMessage { + return this.currentMessage; + } +} diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts index 391f124e0..efeec7b93 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/ChatBox/types.ts @@ -48,6 +48,7 @@ export interface AssistantMessage { content: MessageContent[]; follow_up_questions?: string[]; thoughts?: string[]; + actions?: { index: number; action: string; action_input: string }[]; citations?: Citation[]; } -- cgit v1.2.3-70-g09d2 From ff3c041af6738d025926732115a032d40cffb859 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Mon, 19 Aug 2024 10:55:57 -0400 Subject: working on making streaming work --- src/client/views/nodes/ChatBox/Agent.ts | 125 +++++++------------------ src/client/views/nodes/ChatBox/AnswerParser.ts | 14 +-- src/client/views/nodes/ChatBox/types.ts | 1 + 3 files changed, 40 insertions(+), 100 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index 2c7c40e0c..413ecbd41 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -27,6 +27,8 @@ export class Agent { private _history: () => string; private _summaries: () => string; private _csvData: () => { filename: string; id: string; text: string }[]; + private actionNumber: number = 0; + private thoughtNumber: number = 0; constructor(_vectorstore: Vectorstore, summaries: () => string, history: () => string, csvData: () => { filename: string; id: string; text: string }[], addLinkedUrlDoc: (url: string, id: string) => void) { this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); @@ -44,14 +46,13 @@ export class Agent { }; } - async askAgent(question: string, maxTurns: number = 20, onUpdate: (update: AssistantMessage) => void): Promise { + async askAgent(question: string, maxTurns: number = 30, onUpdate: (update: AssistantMessage) => void): Promise { console.log(`Starting query: ${question}`); this.messages.push({ role: 'user', content: question }); const chatHistory = this._history(); const systemPrompt = getReactPrompt(Object.values(this.tools), this._summaries, chatHistory); this.interMessages = [{ role: 'system', content: systemPrompt }]; this.interMessages.push({ role: 'user', content: `${question}` }); - const parser = new XMLParser({ ignoreAttributes: false, attributeNamePrefix: '@_' }); const builder = new XMLBuilder({ ignoreAttributes: false, attributeNamePrefix: '@_' }); let currentAction: string | undefined; @@ -75,6 +76,7 @@ export class Agent { parsedResult = parser.parse(result); } catch (error) { console.log('Error: Invalid XML response from bot'); + assistantMessage.content.push({ index: assistantMessage.content.length, type: TEXT_TYPE.ERROR, text: 'Invalid response from bot', citation_ids: null }); return assistantMessage; } @@ -82,6 +84,7 @@ export class Agent { if (!stage) { console.log('Error: No stage found in response'); + assistantMessage.content.push({ index: assistantMessage.content.length, type: TEXT_TYPE.ERROR, text: 'Invalid response from bot', citation_ids: null }); return assistantMessage; } @@ -89,13 +92,20 @@ export class Agent { if (!assistantMessage.actions) { assistantMessage.actions = []; } - if (key === 'action') { + if (key === 'thought') { + console.log(`Thought: ${stage[key]}`); + this.thoughtNumber++; + } else if (key === 'action') { currentAction = stage[key] as string; console.log(`Action: ${currentAction}`); - assistantMessage.actions.push({ index: assistantMessage.actions.length, action: currentAction, action_input: '' }); onUpdate({ ...assistantMessage }); if (this.tools[currentAction]) { - const nextPrompt = `` + builder.build({ action_rules: this.tools[currentAction].getActionRule() }) + ``; + const nextPrompt = [ + { + type: 'text', + text: `` + builder.build({ action_rules: this.tools[currentAction].getActionRule() }) + ``, + }, + ]; this.interMessages.push({ role: 'user', content: nextPrompt }); break; } else { @@ -104,35 +114,31 @@ export class Agent { break; } } else if (key === 'action_input') { - const actionInput = stage[key]; + const actionInput = builder.build({ action_input: stage[key] }); console.log(`Action input: ${actionInput}`); - if (currentAction && assistantMessage.actions.length > 0) { - assistantMessage.actions[assistantMessage.actions.length - 1].action_input = actionInput; - onUpdate({ ...assistantMessage }); - } if (currentAction) { try { const observation = await this.processAction(currentAction, stage[key]); - const nextPrompt = `${observation}`; + const nextPrompt = [{ type: 'text', text: ` ` }, ...observation, { type: 'text', text: '' }]; console.log(observation); this.interMessages.push({ role: 'user', content: nextPrompt }); + this.actionNumber++; //might not work with no tool break; } catch (error) { console.log(`Error processing action: ${error}`); + assistantMessage.content.push({ index: assistantMessage.content.length, type: TEXT_TYPE.ERROR, text: 'Invalid response from bot', citation_ids: null }); return assistantMessage; } } else { console.log('Error: Action input without a valid action'); + assistantMessage.content.push({ index: assistantMessage.content.length, type: TEXT_TYPE.ERROR, text: 'Invalid response from bot', citation_ids: null }); return assistantMessage; } } else if (key === 'answer') { console.log('Answer found. Ending query.'); - const parsedAnswer = AnswerParser.parse(`${stage[key]}`); - assistantMessage.content = parsedAnswer.content; - assistantMessage.follow_up_questions = parsedAnswer.follow_up_questions; - assistantMessage.citations = parsedAnswer.citations; - onUpdate({ ...assistantMessage }); - return assistantMessage; + const parsedAnswer = AnswerParser.parse(result, assistantMessage); + onUpdate({ ...parsedAnswer }); + return parsedAnswer; } } } @@ -148,12 +154,12 @@ export class Agent { stream: true, }); - let fullResponse = ''; - let currentTag = ''; - let currentContent = ''; - let isInsideTag = false; - let isInsideActionInput = false; - let actionInputContent = ''; + let fullResponse: string = ''; + let currentTag: string = ''; + let currentContent: string = ''; + let isInsideTag: boolean = false; + let isInsideActionInput: boolean = false; + let actionInputContent: string = ''; if (!assistantMessage.actions) { assistantMessage.actions = []; @@ -183,6 +189,7 @@ export class Agent { isInsideActionInput = true; } else if (currentTag === '/action_input') { isInsideActionInput = false; + console.log('Action input:', actionInputContent); assistantMessage.actions[assistantMessage.actions.length - 1].action_input = actionInputContent; actionInputContent = ''; onUpdate({ ...assistantMessage }); @@ -216,82 +223,18 @@ export class Agent { } switch (tag) { case 'thought': - if (assistantMessage.thoughts.length > 0) { - assistantMessage.thoughts[assistantMessage.thoughts.length - 1] = content; - } else { - assistantMessage.thoughts.push(content); - } + assistantMessage.thoughts[this.thoughtNumber] = content; break; case 'action': - if (assistantMessage.actions.length > 0) { - assistantMessage.actions[assistantMessage.actions.length - 1].action = content; - } else { - assistantMessage.actions.push({ index: assistantMessage.actions.length, action: content, action_input: '' }); - } + assistantMessage.actions[this.actionNumber].action = content; + break; case 'action_input': - if (assistantMessage.actions.length > 0) { - assistantMessage.actions[assistantMessage.actions.length - 1].action_input = content; - } + assistantMessage.actions[this.actionNumber].action_input = content; break; } } - private processAnswer(content: string, assistantMessage: AssistantMessage) { - const groundedTextRegex = /([\s\S]*?)<\/grounded_text>/g; - let lastIndex = 0; - let match; - - while ((match = groundedTextRegex.exec(content)) !== null) { - const [fullMatch, citationIndex, groundedText] = match; - - // Add normal text before the grounded text - if (match.index > lastIndex) { - const normalText = content.slice(lastIndex, match.index).trim(); - if (normalText) { - assistantMessage.content.push({ - index: assistantMessage.content.length, - type: TEXT_TYPE.NORMAL, - text: normalText, - citation_ids: null, - }); - } - } - - // Add grounded text - const citation_id = uuidv4(); - assistantMessage.content.push({ - index: assistantMessage.content.length, - type: TEXT_TYPE.GROUNDED, - text: groundedText.trim(), - citation_ids: [citation_id], - }); - - // Add citation - assistantMessage.citations?.push({ - citation_id, - chunk_id: '', - type: CHUNK_TYPE.TEXT, - direct_text: '', - }); - - lastIndex = match.index + fullMatch.length; - } - - // Add any remaining normal text after the last grounded text - if (lastIndex < content.length) { - const remainingText = content.slice(lastIndex).trim(); - if (remainingText) { - assistantMessage.content.push({ - index: assistantMessage.content.length, - type: TEXT_TYPE.NORMAL, - text: remainingText, - citation_ids: null, - }); - } - } - } - private async processAction(action: string, actionInput: any): Promise { if (!(action in this.tools)) { throw new Error(`Unknown action: ${action}`); diff --git a/src/client/views/nodes/ChatBox/AnswerParser.ts b/src/client/views/nodes/ChatBox/AnswerParser.ts index 9956792d8..68637b7c7 100644 --- a/src/client/views/nodes/ChatBox/AnswerParser.ts +++ b/src/client/views/nodes/ChatBox/AnswerParser.ts @@ -2,7 +2,7 @@ import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getC import { v4 as uuid } from 'uuid'; export class AnswerParser { - static parse(xml: string): AssistantMessage { + static parse(xml: string, currentMessage: AssistantMessage): AssistantMessage { const answerRegex = /([\s\S]*?)<\/answer>/; const citationsRegex = /([\s\S]*?)<\/citations>/; const citationRegex = /([\s\S]*?)<\/citation>/g; @@ -102,14 +102,10 @@ export class AnswerParser { followUpQuestions.push(questionMatch[1].trim()); } } + currentMessage.content = currentMessage.content.concat(content); + currentMessage.citations = citations; + currentMessage.follow_up_questions = followUpQuestions; - const assistantResponse: AssistantMessage = { - role: ASSISTANT_ROLE.ASSISTANT, - content, - follow_up_questions: followUpQuestions, - citations, - }; - - return assistantResponse; + return currentMessage; } } diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts index efeec7b93..b4e66bdbe 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/ChatBox/types.ts @@ -10,6 +10,7 @@ export enum ASSISTANT_ROLE { export enum TEXT_TYPE { NORMAL = 'normal', GROUNDED = 'grounded', + ERROR = 'error', } export enum CHUNK_TYPE { -- cgit v1.2.3-70-g09d2 From 4b6ce2ffcb82c1a7467ef7ed8b67b97094a8f6b6 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Mon, 19 Aug 2024 12:53:00 -0400 Subject: Streaming wiht thoughts and actions working much better but still get error for web search --- src/client/views/nodes/ChatBox/Agent.ts | 112 +++++++-------------- src/client/views/nodes/ChatBox/AnswerParser.ts | 17 ++-- src/client/views/nodes/ChatBox/ChatBox.tsx | 25 +++-- .../views/nodes/ChatBox/MessageComponent.tsx | 32 +++--- src/client/views/nodes/ChatBox/prompts.ts | 50 ++++++--- src/client/views/nodes/ChatBox/types.ts | 17 +++- 6 files changed, 126 insertions(+), 127 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index 413ecbd41..ae08271ee 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -1,5 +1,5 @@ import OpenAI from 'openai'; -import { Tool, AgentMessage, AssistantMessage, TEXT_TYPE, CHUNK_TYPE, ASSISTANT_ROLE } from './types'; +import { Tool, AgentMessage, AssistantMessage, TEXT_TYPE, CHUNK_TYPE, ASSISTANT_ROLE, ProcessingInfo, PROCESSING_TYPE } from './types'; import { getReactPrompt } from './prompts'; import { XMLParser, XMLBuilder } from 'fast-xml-parser'; import { Vectorstore } from './vectorstore/Vectorstore'; @@ -29,6 +29,8 @@ export class Agent { private _csvData: () => { filename: string; id: string; text: string }[]; private actionNumber: number = 0; private thoughtNumber: number = 0; + private processingNumber: number = 0; + private processingInfo: ProcessingInfo[] = []; constructor(_vectorstore: Vectorstore, summaries: () => string, history: () => string, csvData: () => { filename: string; id: string; text: string }[], addLinkedUrlDoc: (url: string, id: string) => void) { this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); @@ -46,7 +48,7 @@ export class Agent { }; } - async askAgent(question: string, maxTurns: number = 30, onUpdate: (update: AssistantMessage) => void): Promise { + async askAgent(question: string, onUpdate: (update: ProcessingInfo[]) => void, maxTurns: number = 30): Promise { console.log(`Starting query: ${question}`); this.messages.push({ role: 'user', content: question }); const chatHistory = this._history(); @@ -57,48 +59,34 @@ export class Agent { const builder = new XMLBuilder({ ignoreAttributes: false, attributeNamePrefix: '@_' }); let currentAction: string | undefined; - let assistantMessage: AssistantMessage = { - role: ASSISTANT_ROLE.ASSISTANT, - content: [], - thoughts: [], - actions: [], - citations: [], - }; + this.processingInfo = []; for (let i = 2; i < maxTurns; i += 2) { console.log(`Turn ${i}/${maxTurns}`); - const result = await this.execute(assistantMessage, onUpdate); + const result = await this.execute(onUpdate); this.interMessages.push({ role: 'assistant', content: result }); let parsedResult; try { parsedResult = parser.parse(result); } catch (error) { - console.log('Error: Invalid XML response from bot'); - assistantMessage.content.push({ index: assistantMessage.content.length, type: TEXT_TYPE.ERROR, text: 'Invalid response from bot', citation_ids: null }); - return assistantMessage; + throw new Error(`Error parsing response: ${error}`); } const stage = parsedResult.stage; if (!stage) { - console.log('Error: No stage found in response'); - assistantMessage.content.push({ index: assistantMessage.content.length, type: TEXT_TYPE.ERROR, text: 'Invalid response from bot', citation_ids: null }); - return assistantMessage; + throw new Error(`Error: No stage found in response`); } for (const key in stage) { - if (!assistantMessage.actions) { - assistantMessage.actions = []; - } if (key === 'thought') { console.log(`Thought: ${stage[key]}`); - this.thoughtNumber++; + this.processingNumber++; } else if (key === 'action') { currentAction = stage[key] as string; console.log(`Action: ${currentAction}`); - onUpdate({ ...assistantMessage }); if (this.tools[currentAction]) { const nextPrompt = [ { @@ -118,35 +106,29 @@ export class Agent { console.log(`Action input: ${actionInput}`); if (currentAction) { try { - const observation = await this.processAction(currentAction, stage[key]); + const observation = await this.processAction(currentAction, stage[key].inputs); const nextPrompt = [{ type: 'text', text: ` ` }, ...observation, { type: 'text', text: '' }]; console.log(observation); this.interMessages.push({ role: 'user', content: nextPrompt }); - this.actionNumber++; //might not work with no tool + this.processingNumber++; break; } catch (error) { - console.log(`Error processing action: ${error}`); - assistantMessage.content.push({ index: assistantMessage.content.length, type: TEXT_TYPE.ERROR, text: 'Invalid response from bot', citation_ids: null }); - return assistantMessage; + throw new Error(`Error processing action: ${error}`); } } else { - console.log('Error: Action input without a valid action'); - assistantMessage.content.push({ index: assistantMessage.content.length, type: TEXT_TYPE.ERROR, text: 'Invalid response from bot', citation_ids: null }); - return assistantMessage; + throw new Error('Error: Action input without a valid action'); } } else if (key === 'answer') { console.log('Answer found. Ending query.'); - const parsedAnswer = AnswerParser.parse(result, assistantMessage); - onUpdate({ ...parsedAnswer }); + const parsedAnswer = AnswerParser.parse(result, this.processingInfo); return parsedAnswer; } } } - console.log('Reached maximum turns. Ending query.'); - return assistantMessage; + throw new Error('Reached maximum turns. Ending query.'); } - private async execute(assistantMessage: AssistantMessage, onUpdate: (update: AssistantMessage) => void): Promise { + private async execute(onUpdate: (update: ProcessingInfo[]) => void): Promise { const stream = await this.client.chat.completions.create({ model: 'gpt-4o', messages: this.interMessages as ChatCompletionMessageParam[], @@ -158,12 +140,6 @@ export class Agent { let currentTag: string = ''; let currentContent: string = ''; let isInsideTag: boolean = false; - let isInsideActionInput: boolean = false; - let actionInputContent: string = ''; - - if (!assistantMessage.actions) { - assistantMessage.actions = []; - } for await (const chunk of stream) { const content = chunk.choices[0]?.delta?.content || ''; @@ -172,40 +148,20 @@ export class Agent { for (const char of content) { if (char === '<') { isInsideTag = true; - if (currentTag && currentContent) { - if (currentTag === 'action_input') { - assistantMessage.actions[assistantMessage.actions.length - 1].action_input = actionInputContent; - actionInputContent = ''; - } else { - this.processStreamedContent(currentTag, currentContent, assistantMessage); - } - onUpdate({ ...assistantMessage }); - } currentTag = ''; currentContent = ''; } else if (char === '>') { isInsideTag = false; - if (currentTag === 'action_input') { - isInsideActionInput = true; - } else if (currentTag === '/action_input') { - isInsideActionInput = false; - console.log('Action input:', actionInputContent); - assistantMessage.actions[assistantMessage.actions.length - 1].action_input = actionInputContent; - actionInputContent = ''; - onUpdate({ ...assistantMessage }); - } if (currentTag.startsWith('/')) { currentTag = ''; } } else if (isInsideTag) { currentTag += char; - } else if (isInsideActionInput) { - actionInputContent += char; } else { currentContent += char; - if (currentTag === 'thought' || currentTag === 'action') { - this.processStreamedContent(currentTag, currentContent, assistantMessage); - onUpdate({ ...assistantMessage }); + if (currentTag === 'thought' || currentTag === 'action_input_description') { + this.processStreamedContent(currentTag, currentContent); + onUpdate(this.processingInfo); } } } @@ -214,24 +170,24 @@ export class Agent { return fullResponse; } - private processStreamedContent(tag: string, content: string, assistantMessage: AssistantMessage) { - if (!assistantMessage.thoughts) { - assistantMessage.thoughts = []; - } - if (!assistantMessage.actions) { - assistantMessage.actions = []; - } + private processStreamedContent(tag: string, streamed_content: string) { + const current_info = this.processingInfo.find(info => info.index === this.processingNumber); switch (tag) { case 'thought': - assistantMessage.thoughts[this.thoughtNumber] = content; - break; - case 'action': - assistantMessage.actions[this.actionNumber].action = content; - - break; - case 'action_input': - assistantMessage.actions[this.actionNumber].action_input = content; + if (current_info) { + current_info.content = streamed_content; + } else { + console.log(`Adding thought: ${streamed_content}`); + this.processingInfo.push({ index: this.processingNumber, type: PROCESSING_TYPE.THOUGHT, content: streamed_content.trim() }); + } break; + case 'action_input_description': + if (current_info) { + current_info.content = streamed_content; + } else { + console.log(`Adding thought: ${streamed_content}`); + this.processingInfo.push({ index: this.processingNumber, type: PROCESSING_TYPE.ACTION, content: streamed_content.trim() }); + } } } diff --git a/src/client/views/nodes/ChatBox/AnswerParser.ts b/src/client/views/nodes/ChatBox/AnswerParser.ts index 68637b7c7..1d46a366d 100644 --- a/src/client/views/nodes/ChatBox/AnswerParser.ts +++ b/src/client/views/nodes/ChatBox/AnswerParser.ts @@ -1,8 +1,8 @@ -import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getChunkType } from './types'; +import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getChunkType, ProcessingInfo } from './types'; import { v4 as uuid } from 'uuid'; export class AnswerParser { - static parse(xml: string, currentMessage: AssistantMessage): AssistantMessage { + static parse(xml: string, processingInfo: ProcessingInfo[]): AssistantMessage { const answerRegex = /([\s\S]*?)<\/answer>/; const citationsRegex = /([\s\S]*?)<\/citations>/; const citationRegex = /([\s\S]*?)<\/citation>/g; @@ -102,10 +102,15 @@ export class AnswerParser { followUpQuestions.push(questionMatch[1].trim()); } } - currentMessage.content = currentMessage.content.concat(content); - currentMessage.citations = citations; - currentMessage.follow_up_questions = followUpQuestions; - return currentMessage; + const assistantResponse: AssistantMessage = { + role: ASSISTANT_ROLE.ASSISTANT, + content, + follow_up_questions: followUpQuestions, + citations, + processing_info: processingInfo, + }; + + return assistantResponse; } } diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 099c0298e..36416a330 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -11,7 +11,7 @@ import { ViewBoxAnnotatableComponent } from '../../DocComponent'; import { FieldView, FieldViewProps } from '../FieldView'; import './ChatBox.scss'; import MessageComponentBox from './MessageComponent'; -import { ASSISTANT_ROLE, AssistantMessage, AI_Document, Citation, CHUNK_TYPE, RAGChunk, getChunkType, TEXT_TYPE, SimplifiedChunk } from './types'; +import { ASSISTANT_ROLE, AssistantMessage, AI_Document, Citation, CHUNK_TYPE, RAGChunk, getChunkType, TEXT_TYPE, SimplifiedChunk, ProcessingInfo } from './types'; import { Vectorstore } from './vectorstore/Vectorstore'; import { Agent } from './Agent'; import dotenv from 'dotenv'; @@ -150,26 +150,30 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { if (trimmedText) { try { textInput.value = ''; - this.history.push({ role: ASSISTANT_ROLE.USER, content: [{ index: 0, type: TEXT_TYPE.NORMAL, text: trimmedText, citation_ids: null }] }); + this.history.push({ role: ASSISTANT_ROLE.USER, content: [{ index: 0, type: TEXT_TYPE.NORMAL, text: trimmedText, citation_ids: null }], processing_info: [] }); this.isLoading = true; - this.current_message = { role: ASSISTANT_ROLE.ASSISTANT, content: [], thoughts: [], actions: [], citations: [] }; + this.current_message = { role: ASSISTANT_ROLE.ASSISTANT, content: [], citations: [], processing_info: [] }; - const onUpdate = (update: AssistantMessage) => { + const onUpdate = (update: ProcessingInfo[]) => { runInAction(() => { - this.current_message = { ...update }; + if (this.current_message) { + this.current_message = { ...this.current_message, processing_info: update }; + } }); }; - const finalMessage = await this.agent.askAgent(trimmedText, 20, onUpdate); + const finalMessage = await this.agent.askAgent(trimmedText, onUpdate); runInAction(() => { - this.history.push({ ...finalMessage }); - this.current_message = undefined; - this.dataDoc.data = JSON.stringify(this.history); + if (this.current_message) { + this.history.push({ ...finalMessage }); + this.current_message = undefined; + this.dataDoc.data = JSON.stringify(this.history); + } }); } catch (err) { console.error('Error:', err); - this.history.push({ role: ASSISTANT_ROLE.ASSISTANT, content: [{ index: 0, type: TEXT_TYPE.NORMAL, text: 'Sorry, I encountered an error while processing your request.', citation_ids: null }] }); + this.history.push({ role: ASSISTANT_ROLE.ASSISTANT, content: [{ index: 0, type: TEXT_TYPE.ERROR, text: 'Sorry, I encountered an error while processing your request.', citation_ids: null }], processing_info: [] }); } finally { this.isLoading = false; } @@ -295,6 +299,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.history.push({ role: ASSISTANT_ROLE.ASSISTANT, content: [{ index: 0, type: TEXT_TYPE.NORMAL, text: 'Welcome to the Document Analyser Assistant! Link a document or ask questions to get started.', citation_ids: null }], + processing_info: [], }); }); } diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx index e82dcd5f7..0b8fa6b96 100644 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ b/src/client/views/nodes/ChatBox/MessageComponent.tsx @@ -1,6 +1,6 @@ import React from 'react'; import { observer } from 'mobx-react'; -import { AssistantMessage, Citation, MessageContent, TEXT_TYPE } from './types'; +import { AssistantMessage, Citation, MessageContent, PROCESSING_TYPE, ProcessingInfo, TEXT_TYPE } from './types'; import Markdown from 'react-markdown'; interface MessageComponentProps { @@ -68,22 +68,24 @@ const MessageComponentBox: React.FC = function ({ message } }; + console.log(message.processing_info); + return (
- {message.thoughts && - message.thoughts.map((thought, idx) => ( -
- Thought: {thought} -
- ))} - {message.actions && - message.actions.map((action, idx) => ( -
- Action: {action.action} -
- Input: {action.action_input} -
- ))} + {message.processing_info && + (message.processing_info as ProcessingInfo[]).map(item => + item.type === PROCESSING_TYPE.THOUGHT ? ( +
+ Thought: {item.content} +
+ ) : item.type === PROCESSING_TYPE.ACTION ? ( +
+ Action: {item.content} +
+ ) : ( +
+ ) + )}
{message.content && message.content.map(messageFragment => {renderContent(messageFragment)})}
{message.follow_up_questions && message.follow_up_questions.length > 0 && (
diff --git a/src/client/views/nodes/ChatBox/prompts.ts b/src/client/views/nodes/ChatBox/prompts.ts index 5f4f79a7f..4a67ac1d1 100644 --- a/src/client/views/nodes/ChatBox/prompts.ts +++ b/src/client/views/nodes/ChatBox/prompts.ts @@ -16,7 +16,7 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto return ` - You are an advanced AI assistant equipped with various tools to answer user queries accurately and efficiently. Your task is to provide a comprehensive response based on the user’s prompt using available tools, chat history, and provided information. Follow these guidelines meticulously to ensure the accuracy and structure of your response. + You are an advanced AI assistant equipped with various tools to answer user queries accurately and efficiently. Your task is to provide a comprehensive response based on the user's prompt using available tools, chat history, and provided information. Follow these guidelines meticulously to ensure the accuracy and structure of your response. @@ -42,7 +42,7 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto <citation> - Provide citations for each grounded text, referencing the tool or chunk used. - <question> - Include exactly three follow-up questions from the user’s perspective within these tags. + <question> - Include exactly three follow-up questions from the user's perspective within these tags. @@ -73,20 +73,20 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto Specify the correct type: "text", "image", "table", "csv", or "url". For text-based information, include only the relevant subset of the original information that the <grounded_text> is based on. For image, table, csv, or url citation types, leave the citation content empty. - ALL CITATIONS MUST use the chunk_id field to reference the source, whether it’s from RAG, search + website scraping, data analysis, or any other tool. + ALL CITATIONS MUST use the chunk_id field to reference the source, whether it's from RAG, search + website scraping, data analysis, or any other tool. One citation can be used for multiple <grounded_text> tags if they are based on the same tool or chunk information. !!!DO NOT OVERCITE - only include citations for information that is directly relevant to the <grounded_text>. - Analyze the user’s query carefully. + Analyze the user's query carefully. Determine whether a tool is required to answer the query accurately. If a tool is necessary: Select the most appropriate tool. Use the <action> tag to specify the tool. End your response after the <action> tag and wait for action rules to be provided. - Based on the action rules, provide the necessary tool parameters within the <action_input> tag. + Based on the action rules, provide the necessary tool parameters within the <action_input> tag, including a brief description of what you're doing with the action. End your response again and wait for the observation from the tool. If no tool is needed, use the 'no_tool' action but still follow the same response structure. @@ -97,8 +97,8 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto Your final <answer> tag must contain: - The complete answer to the user’s query, with grounded information wrapped in <grounded_text> tags and general information wrapped in <normal_text> tags. - Exactly three follow-up questions written from the user’s perspective, enclosed within <follow_up_questions> tags. + The complete answer to the user's query, with grounded information wrapped in <grounded_text> tags and general information wrapped in <normal_text> tags. + Exactly three follow-up questions written from the user's perspective, enclosed within <follow_up_questions> tags. @@ -138,9 +138,12 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto - - The user is asking for key moments and statements from official sources regarding the 2010 Vancouver Winter Olympics. Search the provided documents for any press releases or official statements that highlight significant events, achievements, or noteworthy aspects of the games. - + Searching user documents for official statements and key moments of the 2010 Vancouver Winter Olympics. + + + The user is asking for key moments and statements from official sources regarding the 2010 Vancouver Winter Olympics. Search the provided documents for any press releases or official statements that highlight significant events, achievements, or noteworthy aspects of the games. + + @@ -177,7 +180,10 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto - 2010_Vancouver_Olympics_Medal_Count.csv + Analyzing the medal count data for the 2010 Vancouver Winter Olympics to compare country performances. + + 2010_Vancouver_Olympics_Medal_Count.csv + @@ -248,7 +254,10 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto - Best hiking trails Yosemite National Park + Searching the web for information about the best hiking trails in Yosemite National Park. + + Best hiking trails Yosemite National Park + @@ -283,7 +292,10 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto - https://www.nps.gov/yose/planyourvisit/hiking.htm + Scraping information about hiking trails from the official Yosemite National Park website. + + https://www.nps.gov/yose/planyourvisit/hiking.htm + @@ -309,7 +321,10 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto - https://www.alltrails.com/parks/us/california/yosemite-national-park + Scraping user reviews and ratings for Yosemite hiking trails from AllTrails. + + https://www.alltrails.com/parks/us/california/yosemite-national-park + @@ -335,7 +350,10 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto - https://www.yosemitehikes.com/ + Gathering detailed trail descriptions and hiking information from YosemiteHikes.com. + + https://www.yosemitehikes.com/ + @@ -410,7 +428,7 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto - Now, process the user’s query and provide your response following the format and rules outlined above. Ensure your final answer is comprehensive, correctly cited, and entirely contained within the structured tags. Do not get stuck in infinite loops and keep responses concise, grounded, and most importantly, HELPFUL AND USEFUL! + Now, process the user's query and provide your response following the format and rules outlined above. Ensure your final answer is comprehensive, correctly cited, and entirely contained within the structured tags. Do not get stuck in infinite loops and keep responses concise, grounded, and most importantly, HELPFUL AND USEFUL! `; diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts index b4e66bdbe..09f14f019 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/ChatBox/types.ts @@ -1,6 +1,8 @@ import { breadcrumbsClasses } from '@mui/material'; import { Doc } from '../../../../fields/Doc'; import { StrCast } from '../../../../fields/Types'; +import e from 'cors'; +import { index } from 'd3'; export enum ASSISTANT_ROLE { USER = 'user', @@ -21,6 +23,12 @@ export enum CHUNK_TYPE { CSV = 'CSV', } +export enum PROCESSING_TYPE { + THOUGHT = 'thought', + ACTION = 'action', + //eventually migrate error to here +} + export function getChunkType(type: string): CHUNK_TYPE { switch (type.toLowerCase()) { case 'text': @@ -44,13 +52,18 @@ export function getChunkType(type: string): CHUNK_TYPE { } } +export interface ProcessingInfo { + index: number; + type: PROCESSING_TYPE; + content: string; +} + export interface AssistantMessage { role: ASSISTANT_ROLE; content: MessageContent[]; follow_up_questions?: string[]; - thoughts?: string[]; - actions?: { index: number; action: string; action_input: string }[]; citations?: Citation[]; + processing_info: ProcessingInfo[]; } export interface MessageContent { -- cgit v1.2.3-70-g09d2 From 4c0c7794c85cfdbcd61a7ee5cb9a29494fd0444b Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Tue, 20 Aug 2024 15:17:25 -0400 Subject: better styling, now thoughts and actions are hidden, scroll works better MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit next steps: - [ ] Ensure it doesn’t create more web documents when one already exists - [ ] Citations should not be rendered on the next line but on the same line as the text - [ ] If invalid XML, run get 3.5 to verify and fix XML based one examples - [ ] Making sure if you ask for other information, it doesn’t go to the same website. Providing website history in use rules for the search tool and website scraper tool or in the prompt directly --- package-lock.json | 633 ++++++++++++++++++++- package.json | 1 + src/client/util/CurrentUserUtils.ts | 2 +- src/client/views/nodes/ChatBox/ChatBox.scss | 402 ++++++------- src/client/views/nodes/ChatBox/ChatBox.tsx | 101 +++- .../views/nodes/ChatBox/MessageComponent.scss | 10 - .../views/nodes/ChatBox/MessageComponent.tsx | 97 ++-- src/server/ApiManagers/AssistantManager.ts | 64 +-- 8 files changed, 978 insertions(+), 332 deletions(-) delete mode 100644 src/client/views/nodes/ChatBox/MessageComponent.scss (limited to 'src') diff --git a/package-lock.json b/package-lock.json index 601addf4e..68c9e9746 100644 --- a/package-lock.json +++ b/package-lock.json @@ -180,6 +180,7 @@ "prosemirror-transform": "^1.8.0", "prosemirror-view": "^1.32.5", "pug": "^3.0.2", + "puppeteer": "^23.1.0", "query-string": "^7.1.3", "querystring-es3": "^0.2.1", "raw-loader": "^4.0.2", @@ -5204,6 +5205,140 @@ "url": "https://opencollective.com/popperjs" } }, + "node_modules/@puppeteer/browsers": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.3.1.tgz", + "integrity": "sha512-uK7o3hHkK+naEobMSJ+2ySYyXtQkBxIH8Gn4MK9ciePjNV+Pf+PgY/W7iPzn2MTjl3stcYB5AlcTmPYw7AXDwA==", + "dependencies": { + "debug": "^4.3.6", + "extract-zip": "^2.0.1", + "progress": "^2.0.3", + "proxy-agent": "^6.4.0", + "semver": "^7.6.3", + "tar-fs": "^3.0.6", + "unbzip2-stream": "^1.4.3", + "yargs": "^17.7.2" + }, + "bin": { + "browsers": "lib/cjs/main-cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@puppeteer/browsers/node_modules/ansi-styles": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", + "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", + "dependencies": { + "color-convert": "^2.0.1" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/chalk/ansi-styles?sponsor=1" + } + }, + "node_modules/@puppeteer/browsers/node_modules/cliui": { + "version": "8.0.1", + "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", + "integrity": "sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==", + "dependencies": { + "string-width": "^4.2.0", + "strip-ansi": "^6.0.1", + "wrap-ansi": "^7.0.0" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@puppeteer/browsers/node_modules/color-convert": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", + "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", + "dependencies": { + "color-name": "~1.1.4" + }, + "engines": { + "node": ">=7.0.0" + } + }, + "node_modules/@puppeteer/browsers/node_modules/color-name": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", + "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==" + }, + "node_modules/@puppeteer/browsers/node_modules/emoji-regex": { + "version": "8.0.0", + "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", + "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==" + }, + "node_modules/@puppeteer/browsers/node_modules/semver": { + "version": "7.6.3", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.6.3.tgz", + "integrity": "sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/@puppeteer/browsers/node_modules/string-width": { + "version": "4.2.3", + "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", + "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", + "dependencies": { + "emoji-regex": "^8.0.0", + "is-fullwidth-code-point": "^3.0.0", + "strip-ansi": "^6.0.1" + }, + "engines": { + "node": ">=8" + } + }, + "node_modules/@puppeteer/browsers/node_modules/wrap-ansi": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", + "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", + "dependencies": { + "ansi-styles": "^4.0.0", + "string-width": "^4.1.0", + "strip-ansi": "^6.0.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/chalk/wrap-ansi?sponsor=1" + } + }, + "node_modules/@puppeteer/browsers/node_modules/yargs": { + "version": "17.7.2", + "resolved": "https://registry.npmjs.org/yargs/-/yargs-17.7.2.tgz", + "integrity": "sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==", + "dependencies": { + "cliui": "^8.0.1", + "escalade": "^3.1.1", + "get-caller-file": "^2.0.5", + "require-directory": "^2.1.1", + "string-width": "^4.2.3", + "y18n": "^5.0.5", + "yargs-parser": "^21.1.1" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@puppeteer/browsers/node_modules/yargs-parser": { + "version": "21.1.1", + "resolved": "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz", + "integrity": "sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==", + "engines": { + "node": ">=12" + } + }, "node_modules/@react-aria/actiongroup": { "version": "3.7.5", "resolved": "https://registry.npmjs.org/@react-aria/actiongroup/-/actiongroup-3.7.5.tgz", @@ -8786,6 +8921,11 @@ "resolved": "https://registry.npmjs.org/@tokenizer/token/-/token-0.3.0.tgz", "integrity": "sha512-OvjF+z51L3ov0OyAU0duzsYuvO01PH7x4t6DJx+guahgTnBHkhJdG7soQeTSFLWN3efnHyibZ4Z8l2EuWwJN3A==" }, + "node_modules/@tootallnate/quickjs-emscripten": { + "version": "0.23.0", + "resolved": "https://registry.npmjs.org/@tootallnate/quickjs-emscripten/-/quickjs-emscripten-0.23.0.tgz", + "integrity": "sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==" + }, "node_modules/@tsconfig/node10": { "version": "1.0.11", "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.11.tgz", @@ -11530,6 +11670,15 @@ "resolved": "https://registry.npmjs.org/@types/yargs-parser/-/yargs-parser-21.0.3.tgz", "integrity": "sha512-I4q9QU9MQv4oEOz4tAHJtNz1cwuLxn2F3xcc2iV5WdqLPpUnj30aUuxt1mAxYTG+oe8CZMV/+6rU4S4gRDzqtQ==" }, + "node_modules/@types/yauzl": { + "version": "2.10.3", + "resolved": "https://registry.npmjs.org/@types/yauzl/-/yauzl-2.10.3.tgz", + "integrity": "sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==", + "optional": true, + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/youtube": { "version": "0.0.50", "resolved": "https://registry.npmjs.org/@types/youtube/-/youtube-0.0.50.tgz", @@ -12594,6 +12743,17 @@ "node": ">=0.10.0" } }, + "node_modules/ast-types": { + "version": "0.13.4", + "resolved": "https://registry.npmjs.org/ast-types/-/ast-types-0.13.4.tgz", + "integrity": "sha512-x1FCFnFifvYDDzTaLII71vG5uvDwgtmDTEVWAxrgeiR8VjMONcCXJx7E+USjDtHlwFmt9MysbqgF9b9Vjr6w+w==", + "dependencies": { + "tslib": "^2.0.1" + }, + "engines": { + "node": ">=4" + } + }, "node_modules/ast-types-flow": { "version": "0.0.8", "resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.8.tgz", @@ -12854,6 +13014,41 @@ "integrity": "sha512-qMKFd2qG/36aA4GwvKq8MxnPgCQAmBWmSyLWsJcbn8v03wvIPQ/hG1Ms8bPzndZxMDoHpxez5VOS+gC9Yi24/Q==", "optional": true }, + "node_modules/bare-fs": { + "version": "2.3.1", + "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-2.3.1.tgz", + "integrity": "sha512-W/Hfxc/6VehXlsgFtbB5B4xFcsCl+pAh30cYhoFyXErf6oGrwjh8SwiPAdHgpmWonKuYpZgGywN0SXt7dgsADA==", + "optional": true, + "dependencies": { + "bare-events": "^2.0.0", + "bare-path": "^2.0.0", + "bare-stream": "^2.0.0" + } + }, + "node_modules/bare-os": { + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-2.4.0.tgz", + "integrity": "sha512-v8DTT08AS/G0F9xrhyLtepoo9EJBJ85FRSMbu1pQUlAf6A8T0tEEQGMVObWeqpjhSPXsE0VGlluFBJu2fdoTNg==", + "optional": true + }, + "node_modules/bare-path": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/bare-path/-/bare-path-2.1.3.tgz", + "integrity": "sha512-lh/eITfU8hrj9Ru5quUp0Io1kJWIk1bTjzo7JH1P5dWmQ2EL4hFUlfI8FonAhSlgIfhn63p84CDY/x+PisgcXA==", + "optional": true, + "dependencies": { + "bare-os": "^2.1.0" + } + }, + "node_modules/bare-stream": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.1.3.tgz", + "integrity": "sha512-tiDAH9H/kP+tvNO5sczyn9ZAA7utrSMobyDchsnyyXBuUe2FSQWbxhtuHB8jwpHYYevVo2UJpcmvvjrbHboUUQ==", + "optional": true, + "dependencies": { + "streamx": "^2.18.0" + } + }, "node_modules/Base64": { "version": "0.2.1", "resolved": "https://registry.npmjs.org/Base64/-/Base64-0.2.1.tgz", @@ -12894,6 +13089,14 @@ "node": ">=6.0.0" } }, + "node_modules/basic-ftp": { + "version": "5.0.5", + "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.0.5.tgz", + "integrity": "sha512-4Bcg1P8xhUuqcii/S0Z9wiHIrQVPMermM1any+MX5GeGD7faD3/msQUDGLol9wOcz4/jbg/WJnGqoJF6LiBdtg==", + "engines": { + "node": ">=10.0.0" + } + }, "node_modules/batch": { "version": "0.6.1", "resolved": "https://registry.npmjs.org/batch/-/batch-0.6.1.tgz", @@ -16765,6 +16968,19 @@ "node": ">=6.0" } }, + "node_modules/chromium-bidi": { + "version": "0.6.4", + "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.6.4.tgz", + "integrity": "sha512-8zoq6ogmhQQkAKZVKO2ObFTl4uOkqoX1PlKQX3hZQ5E9cbUotcAb7h4pTNVAGGv8Z36PF3CtdOriEp/Rz82JqQ==", + "dependencies": { + "mitt": "3.0.1", + "urlpattern-polyfill": "10.0.0", + "zod": "3.23.8" + }, + "peerDependencies": { + "devtools-protocol": "*" + } + }, "node_modules/ci-info": { "version": "3.9.0", "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-3.9.0.tgz", @@ -18345,9 +18561,9 @@ "integrity": "sha512-okzr3f11N6WuqYtZSvm+F776mB41wRZMhKP+hc34YdW+KmtYYK9iqvHSwo2k9FEH3fhGXvOPV6yz2IcSrfRUDg==" }, "node_modules/debug": { - "version": "4.3.5", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.5.tgz", - "integrity": "sha512-pt0bNEmneDIvdL1Xsd9oDQ/wrQRkXDT4AUWlNZNPKvW5x/jyO9VFXkJUP07vQ2upmw5PlaITaPKc31jK13V+jg==", + "version": "4.3.6", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.6.tgz", + "integrity": "sha512-O/09Bd4Z1fBrU4VzkhFqVgpPzaGbw6Sm9FEkBT1A/YBXQFGuuSxa1dN2nxgxS34JmKXqYx8CZAwEVoJFImUXIg==", "dependencies": { "ms": "2.1.2" }, @@ -18582,6 +18798,19 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/degenerator": { + "version": "5.0.1", + "resolved": "https://registry.npmjs.org/degenerator/-/degenerator-5.0.1.tgz", + "integrity": "sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==", + "dependencies": { + "ast-types": "^0.13.4", + "escodegen": "^2.1.0", + "esprima": "^4.0.1" + }, + "engines": { + "node": ">= 14" + } + }, "node_modules/delaunator": { "version": "5.0.1", "resolved": "https://registry.npmjs.org/delaunator/-/delaunator-5.0.1.tgz", @@ -18738,6 +18967,11 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/devtools-protocol": { + "version": "0.0.1312386", + "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1312386.tgz", + "integrity": "sha512-DPnhUXvmvKT2dFA/j7B+riVLUt9Q6RKJlcppojL5CoRywJJKLDYnRlw0gTFKfgDPHP5E04UoB71SxoJlVZy8FA==" + }, "node_modules/dezalgo": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/dezalgo/-/dezalgo-1.0.4.tgz", @@ -19011,6 +19245,14 @@ "node": ">=0.10.0" } }, + "node_modules/end-of-stream": { + "version": "1.4.4", + "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.4.tgz", + "integrity": "sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==", + "dependencies": { + "once": "^1.4.0" + } + }, "node_modules/engine.io": { "version": "6.5.5", "resolved": "https://registry.npmjs.org/engine.io/-/engine.io-6.5.5.tgz", @@ -19114,6 +19356,14 @@ "url": "https://github.com/fb55/entities?sponsor=1" } }, + "node_modules/env-paths": { + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/env-paths/-/env-paths-2.2.1.tgz", + "integrity": "sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==", + "engines": { + "node": ">=6" + } + }, "node_modules/envinfo": { "version": "7.13.0", "resolved": "https://registry.npmjs.org/envinfo/-/envinfo-7.13.0.tgz", @@ -19343,6 +19593,35 @@ "url": "https://github.com/sponsors/sindresorhus" } }, + "node_modules/escodegen": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/escodegen/-/escodegen-2.1.0.tgz", + "integrity": "sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==", + "dependencies": { + "esprima": "^4.0.1", + "estraverse": "^5.2.0", + "esutils": "^2.0.2" + }, + "bin": { + "escodegen": "bin/escodegen.js", + "esgenerate": "bin/esgenerate.js" + }, + "engines": { + "node": ">=6.0" + }, + "optionalDependencies": { + "source-map": "~0.6.1" + } + }, + "node_modules/escodegen/node_modules/source-map": { + "version": "0.6.1", + "resolved": "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz", + "integrity": "sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==", + "optional": true, + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/eslint": { "version": "8.57.0", "resolved": "https://registry.npmjs.org/eslint/-/eslint-8.57.0.tgz", @@ -21286,6 +21565,39 @@ "resolved": "https://registry.npmjs.org/extract-colors/-/extract-colors-4.0.6.tgz", "integrity": "sha512-U+pYyQKXCSHOmtZPIEJBGLJjLDiqS+oOub2ILA3a7UGt9+IvZvwAN3hOPFjUgT+gX/apSBwP5vBgnKMlV0fy8Q==" }, + "node_modules/extract-zip": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/extract-zip/-/extract-zip-2.0.1.tgz", + "integrity": "sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==", + "dependencies": { + "debug": "^4.1.1", + "get-stream": "^5.1.0", + "yauzl": "^2.10.0" + }, + "bin": { + "extract-zip": "cli.js" + }, + "engines": { + "node": ">= 10.17.0" + }, + "optionalDependencies": { + "@types/yauzl": "^2.9.1" + } + }, + "node_modules/extract-zip/node_modules/get-stream": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/get-stream/-/get-stream-5.2.0.tgz", + "integrity": "sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==", + "dependencies": { + "pump": "^3.0.0" + }, + "engines": { + "node": ">=8" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/extsprintf": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/extsprintf/-/extsprintf-1.3.0.tgz", @@ -21410,6 +21722,14 @@ "node": ">=0.8.0" } }, + "node_modules/fd-slicer": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/fd-slicer/-/fd-slicer-1.1.0.tgz", + "integrity": "sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==", + "dependencies": { + "pend": "~1.2.0" + } + }, "node_modules/fetch-blob": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/fetch-blob/-/fetch-blob-2.1.2.tgz", @@ -22480,6 +22800,41 @@ "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" } }, + "node_modules/get-uri": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.3.tgz", + "integrity": "sha512-BzUrJBS9EcUb4cFol8r4W3v1cPsSyajLSthNkz5BxbpDcHN5tIrM10E2eNvfnvBn3DaT3DUgx0OpsBKkaOpanw==", + "dependencies": { + "basic-ftp": "^5.0.2", + "data-uri-to-buffer": "^6.0.2", + "debug": "^4.3.4", + "fs-extra": "^11.2.0" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/get-uri/node_modules/data-uri-to-buffer": { + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz", + "integrity": "sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==", + "engines": { + "node": ">= 14" + } + }, + "node_modules/get-uri/node_modules/fs-extra": { + "version": "11.2.0", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.2.0.tgz", + "integrity": "sha512-PmDi3uwK5nFuXh7XDTlVnS17xJS7vW36is2+w3xcv8SVxiB4NyATf4ctkVY5bkSjX0Y4nbvZCq1/EjtEyr9ktw==", + "dependencies": { + "graceful-fs": "^4.2.0", + "jsonfile": "^6.0.1", + "universalify": "^2.0.0" + }, + "engines": { + "node": ">=14.14" + } + }, "node_modules/get-value": { "version": "2.0.6", "resolved": "https://registry.npmjs.org/get-value/-/get-value-2.0.6.tgz", @@ -24224,6 +24579,28 @@ "loose-envify": "^1.0.0" } }, + "node_modules/ip-address": { + "version": "9.0.5", + "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-9.0.5.tgz", + "integrity": "sha512-zHtQzGojZXTwZTHQqra+ETKd4Sn3vgi7uBmlPoXVWZqYvuKmtI0l/VZTjqGmJY9x88GGOaZ9+G9ES8hC4T4X8g==", + "dependencies": { + "jsbn": "1.1.0", + "sprintf-js": "^1.1.3" + }, + "engines": { + "node": ">= 12" + } + }, + "node_modules/ip-address/node_modules/jsbn": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-1.1.0.tgz", + "integrity": "sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A==" + }, + "node_modules/ip-address/node_modules/sprintf-js": { + "version": "1.1.3", + "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz", + "integrity": "sha512-Oo+0REFV59/rz3gfJNKQiBlwfHaSESl1pcGyABQsnnIfWOFt6JNj5gCog2U6MLZ//IGYD+nA8nI+mTShREReaA==" + }, "node_modules/ipaddr.js": { "version": "1.9.1", "resolved": "https://registry.npmjs.org/ipaddr.js/-/ipaddr.js-1.9.1.tgz", @@ -32566,6 +32943,11 @@ "resolved": "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz", "integrity": "sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==" }, + "node_modules/mitt": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/mitt/-/mitt-3.0.1.tgz", + "integrity": "sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==" + }, "node_modules/mj-context-menu": { "version": "0.6.1", "resolved": "https://registry.npmjs.org/mj-context-menu/-/mj-context-menu-0.6.1.tgz", @@ -33083,6 +33465,14 @@ "resolved": "https://registry.npmjs.org/neo-async/-/neo-async-2.6.2.tgz", "integrity": "sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw==" }, + "node_modules/netmask": { + "version": "2.0.2", + "resolved": "https://registry.npmjs.org/netmask/-/netmask-2.0.2.tgz", + "integrity": "sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==", + "engines": { + "node": ">= 0.4.0" + } + }, "node_modules/nextafter": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/nextafter/-/nextafter-1.0.0.tgz", @@ -36182,6 +36572,36 @@ "node": ">=6" } }, + "node_modules/pac-proxy-agent": { + "version": "7.0.2", + "resolved": "https://registry.npmjs.org/pac-proxy-agent/-/pac-proxy-agent-7.0.2.tgz", + "integrity": "sha512-BFi3vZnO9X5Qt6NRz7ZOaPja3ic0PhlsmCRYLOpN11+mWBCR6XJDqW5RF3j8jm4WGGQZtBA+bTfxYzeKW73eHg==", + "dependencies": { + "@tootallnate/quickjs-emscripten": "^0.23.0", + "agent-base": "^7.0.2", + "debug": "^4.3.4", + "get-uri": "^6.0.1", + "http-proxy-agent": "^7.0.0", + "https-proxy-agent": "^7.0.5", + "pac-resolver": "^7.0.1", + "socks-proxy-agent": "^8.0.4" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/pac-resolver": { + "version": "7.0.1", + "resolved": "https://registry.npmjs.org/pac-resolver/-/pac-resolver-7.0.1.tgz", + "integrity": "sha512-5NPgf87AT2STgwa2ntRMr45jTKrYBGkVU36yT0ig/n/GMAa3oPqhZfIQ2kMEimReg0+t9kZViDVZ83qfVUlckg==", + "dependencies": { + "degenerator": "^5.0.0", + "netmask": "^2.0.2" + }, + "engines": { + "node": ">= 14" + } + }, "node_modules/package-json-from-dist": { "version": "1.0.0", "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.0.tgz", @@ -36621,6 +37041,11 @@ "url": "https://github.com/sponsors/Borewit" } }, + "node_modules/pend": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/pend/-/pend-1.2.0.tgz", + "integrity": "sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==" + }, "node_modules/performance-now": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/performance-now/-/performance-now-2.1.0.tgz", @@ -37021,7 +37446,6 @@ "version": "2.0.3", "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==", - "dev": true, "engines": { "node": ">=0.4.0" } @@ -37175,6 +37599,32 @@ "node": ">= 0.10" } }, + "node_modules/proxy-agent": { + "version": "6.4.0", + "resolved": "https://registry.npmjs.org/proxy-agent/-/proxy-agent-6.4.0.tgz", + "integrity": "sha512-u0piLU+nCOHMgGjRbimiXmA9kM/L9EHh3zL81xCdp7m+Y2pHIsnmbdDoEDoAz5geaonNR6q6+yOPQs6n4T6sBQ==", + "dependencies": { + "agent-base": "^7.0.2", + "debug": "^4.3.4", + "http-proxy-agent": "^7.0.1", + "https-proxy-agent": "^7.0.3", + "lru-cache": "^7.14.1", + "pac-proxy-agent": "^7.0.1", + "proxy-from-env": "^1.1.0", + "socks-proxy-agent": "^8.0.2" + }, + "engines": { + "node": ">= 14" + } + }, + "node_modules/proxy-agent/node_modules/lru-cache": { + "version": "7.18.3", + "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-7.18.3.tgz", + "integrity": "sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA==", + "engines": { + "node": ">=12" + } + }, "node_modules/proxy-from-env": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/proxy-from-env/-/proxy-from-env-1.1.0.tgz", @@ -37302,6 +37752,15 @@ "resolved": "https://registry.npmjs.org/pug-walk/-/pug-walk-2.0.0.tgz", "integrity": "sha512-yYELe9Q5q9IQhuvqsZNwA5hfPkMJ8u92bQLIMcsMxf/VADjNtEYptU+inlufAFYcWdHlwNfZOEnOOQrZrcyJCQ==" }, + "node_modules/pump": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.0.tgz", + "integrity": "sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==", + "dependencies": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + }, "node_modules/punycode": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", @@ -37318,6 +37777,83 @@ "node": ">=6" } }, + "node_modules/puppeteer": { + "version": "23.1.0", + "resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-23.1.0.tgz", + "integrity": "sha512-m+CyicDlGN1AVUeOsCa6/+KQydJzxfsPowL7fQy+VGNeaWafB0m8G5aGfXdfZztKMxzCsdz7KNNzbJPeG9wwFw==", + "hasInstallScript": true, + "dependencies": { + "@puppeteer/browsers": "2.3.1", + "chromium-bidi": "0.6.4", + "cosmiconfig": "^9.0.0", + "devtools-protocol": "0.0.1312386", + "puppeteer-core": "23.1.0", + "typed-query-selector": "^2.12.0" + }, + "bin": { + "puppeteer": "lib/cjs/puppeteer/node/cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/puppeteer-core": { + "version": "23.1.0", + "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-23.1.0.tgz", + "integrity": "sha512-SvAsu+xnLN2FMXE/59bp3s3WXp8ewqUGzVV4AQtml/2xmsciZnU/bXcCW+eETHPWQ6Agg2vTI7QzWXPpEARK2g==", + "dependencies": { + "@puppeteer/browsers": "2.3.1", + "chromium-bidi": "0.6.4", + "debug": "^4.3.6", + "devtools-protocol": "0.0.1312386", + "typed-query-selector": "^2.12.0", + "ws": "^8.18.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/puppeteer/node_modules/argparse": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", + "integrity": "sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==" + }, + "node_modules/puppeteer/node_modules/cosmiconfig": { + "version": "9.0.0", + "resolved": "https://registry.npmjs.org/cosmiconfig/-/cosmiconfig-9.0.0.tgz", + "integrity": "sha512-itvL5h8RETACmOTFc4UfIyB2RfEHi71Ax6E/PivVxq9NseKbOWpeyHEOIbmAw1rs8Ak0VursQNww7lf7YtUwzg==", + "dependencies": { + "env-paths": "^2.2.1", + "import-fresh": "^3.3.0", + "js-yaml": "^4.1.0", + "parse-json": "^5.2.0" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/sponsors/d-fischer" + }, + "peerDependencies": { + "typescript": ">=4.9.5" + }, + "peerDependenciesMeta": { + "typescript": { + "optional": true + } + } + }, + "node_modules/puppeteer/node_modules/js-yaml": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-4.1.0.tgz", + "integrity": "sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==", + "dependencies": { + "argparse": "^2.0.1" + }, + "bin": { + "js-yaml": "bin/js-yaml.js" + } + }, "node_modules/q": { "version": "1.5.1", "resolved": "https://registry.npmjs.org/q/-/q-1.5.1.tgz", @@ -40274,6 +40810,15 @@ "node": ">=4" } }, + "node_modules/smart-buffer": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.2.0.tgz", + "integrity": "sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==", + "engines": { + "node": ">= 6.0.0", + "npm": ">= 3.0.0" + } + }, "node_modules/socket.io": { "version": "4.7.5", "resolved": "https://registry.npmjs.org/socket.io/-/socket.io-4.7.5.tgz", @@ -40366,6 +40911,32 @@ "uuid": "dist/bin/uuid" } }, + "node_modules/socks": { + "version": "2.8.3", + "resolved": "https://registry.npmjs.org/socks/-/socks-2.8.3.tgz", + "integrity": "sha512-l5x7VUUWbjVFbafGLxPWkYsHIhEvmF85tbIeFZWc8ZPtoMyybuEhL7Jye/ooC4/d48FgOjSJXgsF/AJPYCW8Zw==", + "dependencies": { + "ip-address": "^9.0.5", + "smart-buffer": "^4.2.0" + }, + "engines": { + "node": ">= 10.0.0", + "npm": ">= 3.0.0" + } + }, + "node_modules/socks-proxy-agent": { + "version": "8.0.4", + "resolved": "https://registry.npmjs.org/socks-proxy-agent/-/socks-proxy-agent-8.0.4.tgz", + "integrity": "sha512-GNAq/eg8Udq2x0eNiFkr9gRg5bA7PXEWagQdeRX4cPSG+X/8V38v637gim9bjFptMk1QWsCTr0ttrJEiXbNnRw==", + "dependencies": { + "agent-base": "^7.1.1", + "debug": "^4.3.4", + "socks": "^2.8.3" + }, + "engines": { + "node": ">= 14" + } + }, "node_modules/sort-asc": { "version": "0.2.0", "resolved": "https://registry.npmjs.org/sort-asc/-/sort-asc-0.2.0.tgz", @@ -41241,6 +41812,19 @@ "node": ">=10" } }, + "node_modules/tar-fs": { + "version": "3.0.6", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-3.0.6.tgz", + "integrity": "sha512-iokBDQQkUyeXhgPYaZxmczGPhnhXZ0CmrqI+MOb/WFGS9DW5wnfrLgtjUJBvz50vQ3qfRwJ62QVoCFu8mPVu5w==", + "dependencies": { + "pump": "^3.0.0", + "tar-stream": "^3.1.5" + }, + "optionalDependencies": { + "bare-fs": "^2.1.1", + "bare-path": "^2.1.0" + } + }, "node_modules/tar-stream": { "version": "3.1.7", "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-3.1.7.tgz", @@ -41403,8 +41987,7 @@ "node_modules/through": { "version": "2.3.8", "resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz", - "integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==", - "dev": true + "integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==" }, "node_modules/thunky": { "version": "1.1.0", @@ -42350,6 +42933,11 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/typed-query-selector": { + "version": "2.12.0", + "resolved": "https://registry.npmjs.org/typed-query-selector/-/typed-query-selector-2.12.0.tgz", + "integrity": "sha512-SbklCd1F0EiZOyPiW192rrHZzZ5sBijB6xM+cpmrwDqObvdtunOHHIk9fCGsoK5JVIYXoyEp4iEdE3upFH3PAg==" + }, "node_modules/typescript": { "version": "5.5.3", "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.5.3.tgz", @@ -42458,6 +43046,15 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/unbzip2-stream": { + "version": "1.4.3", + "resolved": "https://registry.npmjs.org/unbzip2-stream/-/unbzip2-stream-1.4.3.tgz", + "integrity": "sha512-mlExGW4w71ebDJviH16lQLtZS32VKqsSfk80GCfUlwT/4/hNRFsoscrF/c++9xinkMzECL1uL9DDwXqFWkruPg==", + "dependencies": { + "buffer": "^5.2.1", + "through": "^2.3.8" + } + }, "node_modules/undefsafe": { "version": "2.0.5", "resolved": "https://registry.npmjs.org/undefsafe/-/undefsafe-2.0.5.tgz", @@ -42860,6 +43457,11 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/urlpattern-polyfill": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz", + "integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg==" + }, "node_modules/use-composed-ref": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/use-composed-ref/-/use-composed-ref-1.3.0.tgz", @@ -43968,7 +44570,6 @@ "version": "8.18.0", "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.0.tgz", "integrity": "sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==", - "dev": true, "engines": { "node": ">=10.0.0" }, @@ -44161,6 +44762,23 @@ "node": ">=8" } }, + "node_modules/yauzl": { + "version": "2.10.0", + "resolved": "https://registry.npmjs.org/yauzl/-/yauzl-2.10.0.tgz", + "integrity": "sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==", + "dependencies": { + "buffer-crc32": "~0.2.3", + "fd-slicer": "~1.1.0" + } + }, + "node_modules/yauzl/node_modules/buffer-crc32": { + "version": "0.2.13", + "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz", + "integrity": "sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==", + "engines": { + "node": "*" + } + }, "node_modules/yn": { "version": "3.1.1", "resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz", @@ -44198,7 +44816,6 @@ "version": "3.23.8", "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz", "integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==", - "peer": true, "funding": { "url": "https://github.com/sponsors/colinhacks" } diff --git a/package.json b/package.json index 544917e78..57a8ffc05 100644 --- a/package.json +++ b/package.json @@ -265,6 +265,7 @@ "prosemirror-transform": "^1.8.0", "prosemirror-view": "^1.32.5", "pug": "^3.0.2", + "puppeteer": "^23.1.0", "query-string": "^7.1.3", "querystring-es3": "^0.2.1", "raw-loader": "^4.0.2", diff --git a/src/client/util/CurrentUserUtils.ts b/src/client/util/CurrentUserUtils.ts index e095bc659..280830442 100644 --- a/src/client/util/CurrentUserUtils.ts +++ b/src/client/util/CurrentUserUtils.ts @@ -371,7 +371,7 @@ pie title Minerals in my tap water {key: "Button", creator: Docs.Create.ButtonDocument, opts: { _width: 150, _height: 50, _xPadding: 10, _yPadding: 10, title_custom: true, waitForDoubleClickToClick: 'never'}, scripts: {onClick: FollowLinkScript()?.script.originalScript ?? ""}}, {key: "Script", creator: opts => Docs.Create.ScriptingDocument(null, opts), opts: { _width: 200, _height: 250, }}, {key: "DataViz", creator: opts => Docs.Create.DataVizDocument("/users/rz/Downloads/addresses.csv", opts), opts: { _width: 300, _height: 300 }}, - {key: "Chat", creator: Docs.Create.ChatDocument, opts: { _width: 300, _height: 300, }}, + {key: "Chat", creator: Docs.Create.ChatDocument, opts: { _width: 500, _height: 500, }}, {key: "Header", creator: headerTemplate, opts: { _width: 300, _height: 120, _header_pointerEvents: "all", _header_height: 50, _header_fontSize: 9,_layout_autoHeightMargins: 50, _layout_autoHeight: true, treeView_HideUnrendered: true}}, {key: "ViewSlide", creator: slideView, opts: { _width: 400, _height: 300, _xMargin: 3, _yMargin: 3,}}, {key: "Trail", creator: Docs.Create.PresDocument, opts: { _width: 400, _height: 30, _type_collection: CollectionViewType.Stacking, dropAction: dropActionType.embed, treeView_HideTitle: true, _layout_fitWidth:true, layout_boxShadow: "0 0" }}, diff --git a/src/client/views/nodes/ChatBox/ChatBox.scss b/src/client/views/nodes/ChatBox/ChatBox.scss index 91bb3aba7..76fa05ce8 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.scss +++ b/src/client/views/nodes/ChatBox/ChatBox.scss @@ -1,246 +1,246 @@ -$background-color: #f8f9fa; +@import url('https://fonts.googleapis.com/css2?family=Atkinson+Hyperlegible:ital,wght@0,400;0,700;1,400;1,700&display=swap'); + +$primary-color: #4a90e2; +$secondary-color: #f5f8fa; $text-color: #333; -$input-background: #fff; -$button-color: #007bff; -$button-hover-color: darken($button-color, 10%); -$shadow-color: rgba(0, 0, 0, 0.075); -$border-radius: 8px; -$citation-color: #ff6347; -$citation-hover-color: darken($citation-color, 10%); -$follow-up-bg-color: #e9ecef; -$follow-up-hover-bg-color: #dee2e6; - -.chatBox { +$light-text-color: #777; +$border-color: #e1e8ed; +$shadow-color: rgba(0, 0, 0, 0.1); +$transition: all 0.3s ease; + +.chat-box { display: flex; flex-direction: column; - width: 100%; height: 100%; - background-color: $background-color; - font-family: 'Helvetica Neue', Arial, sans-serif; + background-color: #fff; + font-family: + 'Atkinson Hyperlegible', + -apple-system, + BlinkMacSystemFont, + 'Segoe UI', + Roboto, + Helvetica, + Arial, + sans-serif; + border-radius: 12px; + overflow: hidden; + box-shadow: 0 4px 12px $shadow-color; + + .chat-header { + background-color: $primary-color; + color: white; + padding: 15px; + text-align: center; + box-shadow: 0 2px 4px $shadow-color; + height: fit-content; + h2 { + margin: 0; + font-size: 1.3em; + font-weight: 500; + } + } - .scroll-box { + .chat-messages { flex-grow: 1; - overflow-y: scroll; - overflow-x: hidden; - height: 100%; - padding: 10px; + overflow-y: auto; + padding: 20px; display: flex; - flex-direction: column-reverse; - padding-bottom: 0; + flex-direction: column; &::-webkit-scrollbar { - width: 8px; + width: 6px; } + &::-webkit-scrollbar-thumb { - background-color: darken($background-color, 10%); - border-radius: $border-radius; + background-color: $border-color; + border-radius: 3px; } + } - .chat-content { - display: flex; - flex-direction: column; + .chat-input { + display: flex; + padding: 20px; + border-top: 1px solid $border-color; + background-color: #fff; + + input { + flex-grow: 1; + padding: 12px 15px; + border: 1px solid $border-color; + border-radius: 24px; + font-size: 15px; + transition: $transition; + + &:focus { + outline: none; + border-color: $primary-color; + box-shadow: 0 0 0 2px rgba($primary-color, 0.2); + } } - .messages { + .submit-button { + background-color: $primary-color; + color: white; + border: none; + border-radius: 50%; + width: 48px; + height: 48px; + margin-left: 10px; + cursor: pointer; + transition: $transition; display: flex; - flex-direction: column; - - .message { - padding: 10px 15px; - margin-bottom: 10px; - border-radius: $border-radius; - background-color: lighten($background-color, 5%); - box-shadow: 0 2px 5px $shadow-color; - align-items: flex-start; - max-width: 90%; - width: fit-content; - word-break: break-word; - position: relative; - - .citation-button { - background-color: $citation-color; - color: #fff; - border: none; - border-radius: 50%; - cursor: pointer; - width: 20px; - height: 20px; - display: inline-flex; - align-items: center; - justify-content: center; - font-size: 12px; - font-weight: bold; - margin: 0 2px; - padding: 0; - transition: background-color 0.3s; - - &:hover { - background-color: $citation-hover-color; - } - } - - &.user { - align-self: flex-end; - background-color: $button-color; - color: #fff; - } - - &.chatbot { - align-self: flex-start; - background-color: $input-background; - color: $text-color; - } - - span { - flex-grow: 1; - padding-right: 10px; - } - - img { - max-width: 50px; - max-height: 50px; - border-radius: 50%; - } + align-items: center; + justify-content: center; + + &:hover { + background-color: darken($primary-color, 10%); + } + + &:disabled { + background-color: $light-text-color; + cursor: not-allowed; } - .follow-up-questions { - margin-top: 10px; - width: 100%; - - h4 { - margin-bottom: 5px; - font-size: 14px; - } - - .follow-up-button { - background-color: $follow-up-bg-color; - border: 1px solid #ddd; - border-radius: 8px; - padding: 8px 10px; - margin: 4px 0; - cursor: pointer; - transition: background-color 0.3s; - display: block; - width: 100%; - text-align: left; - white-space: normal; - word-wrap: break-word; - font-size: 12px; - color: $text-color; - min-height: 40px; - height: auto; - line-height: 1.3; - - &:hover { - background-color: $follow-up-hover-bg-color; - } - } + .spinner { + height: 24px; + width: 24px; + border: 3px solid rgba(255, 255, 255, 0.3); + border-top: 3px solid #fff; + border-radius: 50%; + animation: spin 1s linear infinite; } } } +} - .chat-form { - display: flex; - flex-grow: 0; - bottom: 0; - width: 100%; - padding: 10px; - background-color: $input-background; - box-shadow: inset 0 -1px 2px $shadow-color; - margin-bottom: 0; +.message { + max-width: 80%; + margin-bottom: 20px; + padding: 16px 20px; + border-radius: 18px; + font-size: 15px; + line-height: 1.5; + box-shadow: 0 2px 4px $shadow-color; + + &.user { + align-self: flex-end; + background-color: $primary-color; + color: white; + border-bottom-right-radius: 4px; + } - input[type='text'] { - flex-grow: 1; - border: 1px solid darken($input-background, 10%); - border-radius: $border-radius; - padding: 8px 12px; - margin-right: 10px; + &.chatbot { + align-self: flex-start; + background-color: $secondary-color; + color: $text-color; + border-bottom-left-radius: 4px; + } + + .toggle-info { + background-color: transparent; + color: $primary-color; + border: 1px solid $primary-color; + width: 100%; + height: fit-content; + border-radius: 8px; + padding: 10px 16px; + font-size: 14px; + cursor: pointer; + transition: $transition; + margin-top: 10px; + + &:hover { + background-color: rgba($primary-color, 0.1); } + } +} - button { - padding: 8px 16px; - background-color: $button-color; - color: #fff; - border: none; - border-radius: $border-radius; - cursor: pointer; - transition: background-color 0.3s; - min-width: 80px; +.follow-up-questions { + margin-top: 15px; - &:hover { - background-color: $button-hover-color; - } + h4 { + font-size: 15px; + font-weight: 600; + margin-bottom: 10px; + } + + .questions-list { + display: flex; + flex-direction: column; + gap: 10px; + } + + .follow-up-button { + background-color: #fff; + color: $primary-color; + border: 1px solid $primary-color; + border-radius: 8px; + padding: 10px 16px; + font-size: 14px; + cursor: pointer; + transition: $transition; + text-align: left; + white-space: normal; + word-wrap: break-word; + width: 100%; + height: fit-content; + + &:hover { + background-color: $primary-color; + color: #fff; } } } -.uploading-overlay { - position: absolute; - top: 0; - left: 0; - width: 100%; - height: 100%; - background-color: rgba($background-color, 0.95); - display: flex; - justify-content: center; +.citation-button { + display: inline-flex; align-items: center; - font-size: 1.5em; + justify-content: center; + width: 20px; + height: 20px; + border-radius: 50%; + background-color: rgba(0, 0, 0, 0.1); color: $text-color; - z-index: 10; - - &::before { - content: 'Uploading Docs...'; - font-weight: bold; + font-size: 12px; + font-weight: bold; + margin-left: 5px; + cursor: pointer; + transition: $transition; + vertical-align: middle; + + &:hover { + background-color: rgba(0, 0, 0, 0.2); } } -.modal { - position: fixed; +.uploading-overlay { + position: absolute; top: 0; left: 0; - width: 100%; - height: 100%; + right: 0; + bottom: 0; + background-color: rgba(255, 255, 255, 0.8); display: flex; justify-content: center; align-items: center; - background-color: rgba(0, 0, 0, 0.4); - - .modal-content { - background-color: $input-background; - color: $text-color; - padding: 20px; - border-radius: $border-radius; - box-shadow: 0 2px 10px $shadow-color; - display: flex; - flex-direction: column; - align-items: center; - width: auto; - min-width: 300px; - - h4 { - margin-bottom: 15px; - } - - p { - margin-bottom: 20px; - } + z-index: 1000; +} - button { - padding: 10px 20px; - background-color: $button-color; - color: #fff; - border: none; - border-radius: $border-radius; - cursor: pointer; - transition: background-color 0.3s; +@keyframes spin { + 0% { + transform: rotate(0deg); + } + 100% { + transform: rotate(360deg); + } +} - &:hover { - background-color: $button-hover-color; - } - } +@media (max-width: 768px) { + .chat-box { + border-radius: 0; } - .thought-text { - color: #6c757d; - font-style: italic; + + .message { + max-width: 90%; } } diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 36416a330..4a98f8dc1 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -21,6 +21,8 @@ import { DocumentManager } from '../../../util/DocumentManager'; import { v4 as uuidv4 } from 'uuid'; import { chunk } from 'lodash'; import { DocUtils } from '../../../documents/DocUtils'; +import { createRef } from 'react'; +import { ClientUtils } from '../../../../ClientUtils'; dotenv.config(); @@ -37,10 +39,10 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @observable private linked_csv_files: { filename: string; id: string; text: string }[] = []; private openai: OpenAI; private vectorstore_id: string; - private documents: AI_Document[] = []; - private _oldWheel: any; private vectorstore: Vectorstore; private agent: Agent; // Add the ChatBot instance + private _oldWheel: HTMLDivElement | null = null; + private messagesRef: React.RefObject; public static LayoutString(fieldKey: string) { return FieldView.LayoutString(ChatBox, fieldKey); @@ -59,6 +61,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } this.vectorstore = new Vectorstore(this.vectorstore_id, this.retrieveDocIds); this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc); + this.messagesRef = React.createRef(); reaction( () => this.history.map((msg: AssistantMessage) => ({ role: msg.role, content: msg.content, follow_up_questions: msg.follow_up_questions, citations: msg.citations })), @@ -133,6 +136,23 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { return new OpenAI(configuration); } + addScrollListener = () => { + if (this.messagesRef.current) { + this.messagesRef.current.addEventListener('wheel', this.onPassiveWheel, { passive: false }); + } + }; + + removeScrollListener = () => { + if (this.messagesRef.current) { + this.messagesRef.current.removeEventListener('wheel', this.onPassiveWheel); + } + }; + + scrollToBottom = () => { + if (this.messagesRef.current) { + } + }; + onPassiveWheel = (e: WheelEvent) => { if (this._props.isContentActive()) { e.stopPropagation(); @@ -160,6 +180,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.current_message = { ...this.current_message, processing_info: update }; } }); + this.scrollToBottom(); }; const finalMessage = await this.agent.askAgent(trimmedText, onUpdate); @@ -176,8 +197,10 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.history.push({ role: ASSISTANT_ROLE.ASSISTANT, content: [{ index: 0, type: TEXT_TYPE.ERROR, text: 'Sorry, I encountered an error while processing your request.', citation_ids: null }], processing_info: [] }); } finally { this.isLoading = false; + this.scrollToBottom(); } } + this.scrollToBottom(); }; @action @@ -202,6 +225,11 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { doc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] }); }; + @computed + get userName() { + return ClientUtils.CurrentUserEmail; + } + @action handleCitationClick = (citation: Citation) => { console.log('Citation clicked:', citation); @@ -276,6 +304,10 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { return highlight_doc; }; + componentDidUpdate() { + this.scrollToBottom(); + } + componentDidMount() { this._props.setContentViewBox?.(this); if (this.dataDoc.data) { @@ -332,6 +364,11 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { console.log('Deleted docs: ', change.oldValue); } }); + this.addScrollListener(); + } + + componentWillUnmount() { + this.removeScrollListener(); } @computed @@ -411,35 +448,41 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }; render() { return ( -
- {this.isUploadingDocs &&
} -
{ - this._oldWheel?.removeEventListener('wheel', this.onPassiveWheel); - this._oldWheel = r; - r?.addEventListener('wheel', this.onPassiveWheel, { passive: false }); - }}> -
- {this.history.map((message, index) => ( - - ))} - {!this.current_message ? null : ( - - )} +
+ {this.isUploadingDocs && ( +
+
+ )} +
+

{this.userName()}'s AI Assistant

- - (this.inputValue = e.target.value)} /> -
diff --git a/src/client/views/nodes/ChatBox/MessageComponent.scss b/src/client/views/nodes/ChatBox/MessageComponent.scss deleted file mode 100644 index 6fcc0e5e7..000000000 --- a/src/client/views/nodes/ChatBox/MessageComponent.scss +++ /dev/null @@ -1,10 +0,0 @@ -MessageComponent-citation { - color: lightblue; - vertical-align: super; - font-size: smaller; -} -MessageComponent-file_path { - color: lightblue; - vertical-align: baseline; - font-size: inherit; -} diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx index 0b8fa6b96..00e9795e3 100644 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ b/src/client/views/nodes/ChatBox/MessageComponent.tsx @@ -1,7 +1,7 @@ -import React from 'react'; +import React, { useState } from 'react'; import { observer } from 'mobx-react'; import { AssistantMessage, Citation, MessageContent, PROCESSING_TYPE, ProcessingInfo, TEXT_TYPE } from './types'; -import Markdown from 'react-markdown'; +import ReactMarkdown from 'react-markdown'; interface MessageComponentProps { message: AssistantMessage; @@ -12,37 +12,20 @@ interface MessageComponentProps { } const MessageComponentBox: React.FC = function ({ message, index, onFollowUpClick, onCitationClick, updateMessageCitations }) { + const [dropdownOpen, setDropdownOpen] = useState(false); + const renderContent = (item: MessageContent) => { const i = item.index; if (item.type === TEXT_TYPE.GROUNDED) { const citation_ids = item.citation_ids || []; return ( - {item.text} + {item.text} {citation_ids.map((id, idx) => { const citation = message.citations?.find(c => c.citation_id === id); if (!citation) return null; return ( - ); @@ -52,49 +35,65 @@ const MessageComponentBox: React.FC = function ({ message } else if (item.type === TEXT_TYPE.NORMAL) { return ( - {item.text} + {item.text} ); } else if ('query' in item) { - // Handle the case where the item has a query property return ( - {JSON.stringify(item.query)} + {JSON.stringify(item.query)} ); } else { - // Fallback for any other unexpected cases - return {JSON.stringify(item)}; + return ( + + {JSON.stringify(item)} + + ); } }; - console.log(message.processing_info); + const hasProcessingInfo = message.processing_info && message.processing_info.length > 0; + + const renderProcessingInfo = (info: ProcessingInfo) => { + if (info.type === PROCESSING_TYPE.THOUGHT) { + return ( +
+ Thought: {info.content} +
+ ); + } else if (info.type === PROCESSING_TYPE.ACTION) { + return ( +
+ Action: {info.content} +
+ ); + } else { + return null; + } + }; return (
- {message.processing_info && - (message.processing_info as ProcessingInfo[]).map(item => - item.type === PROCESSING_TYPE.THOUGHT ? ( -
- Thought: {item.content} -
- ) : item.type === PROCESSING_TYPE.ACTION ? ( -
- Action: {item.content} -
- ) : ( -
- ) - )} -
{message.content && message.content.map(messageFragment => {renderContent(messageFragment)})}
+
{message.content && message.content.map(messageFragment => {renderContent(messageFragment)})}
+ {hasProcessingInfo && ( +
+ + {dropdownOpen &&
{message.processing_info.map(renderProcessingInfo)}
} +
+ )} {message.follow_up_questions && message.follow_up_questions.length > 0 && (

Follow-up Questions:

- {message.follow_up_questions.map((question, idx) => ( - - ))} +
+ {message.follow_up_questions.map((question, idx) => ( + + ))} +
)}
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index cd26ca79b..9b85dbbe8 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -14,6 +14,8 @@ import { PartitionResponse } from 'unstructured-client/sdk/models/operations'; import { ChunkingStrategy, Strategy } from 'unstructured-client/sdk/models/shared'; import * as cheerio from 'cheerio'; import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk'; +import { google } from 'googleapis'; +import puppeteer from 'puppeteer'; export enum Directory { parsed_files = 'parsed_files', @@ -55,6 +57,7 @@ export default class AssistantManager extends ApiManager { }, }); const scrapflyClient = new ScrapflyClient({ key: process.env._CLIENT_SCRAPFLY_API_KEY! }); + const customsearch = google.customsearch('v1'); register({ method: Method.POST, @@ -89,20 +92,18 @@ export default class AssistantManager extends ApiManager { secureHandler: async ({ req, res }) => { const { query } = req.body; try { - const response = await axios.get('http://api.serpstack.com/search', { - params: { - access_key: process.env._CLIENT_SERPSTACK_API_KEY, - query: query, - }, + const response = await customsearch.cse.list({ + q: query, + cx: process.env._CLIENT_GOOGLE_SEARCH_ENGINE_ID, + key: process.env._CLIENT_GOOGLE_API_KEY, + safe: 'active', }); - console.log(response.data); - const results = response.data.organic_results.map((result: any) => ({ - url: result.url, - snippet: result.snippet, - })); - - console.log(results); + const results = + response.data.items?.map((item: any) => ({ + url: item.link, + snippet: item.snippet, + })) || []; res.send({ results }); } catch (error: any) { @@ -144,6 +145,7 @@ export default class AssistantManager extends ApiManager { const scrapedImagesDirectory = pathToDirectory(Directory.scrape_images); const filePath = serverPathToFile(Directory.scrape_images, url_filename); + // Check if the image already exists if (fs.existsSync(filePath)) { const imageBuffer = await readFileAsync(filePath); const base64Image = imageBuffer.toString('base64'); @@ -151,33 +153,27 @@ export default class AssistantManager extends ApiManager { return res.send({ website_image_base64: base64Image }); } + // Create the directory if it doesn't exist if (!fs.existsSync(scrapedImagesDirectory)) { fs.mkdirSync(scrapedImagesDirectory); } - const result = await scrapflyClient.scrape( - new ScrapeConfig({ - url: url, - render_js: true, - screenshots: { everything: 'fullpage' }, - }) - ); - - const screenshotPromises = Object.entries(result.result.screenshots).map(async ([name, screenshot]) => { - const response = await axios.get(screenshot.url, { - params: { - key: process.env._CLIENT_SCRAPFLY_API_KEY!, - options: 'print_media_format', - proxy_pool: 'public_residential_pool', - }, - responseType: 'arraybuffer', - }); - await fs.promises.writeFile(filePath, response.data); - return response.data.toString('base64'); + // Launch Puppeteer to take a screenshot of the webpage + const browser = await puppeteer.launch({ + args: ['--no-sandbox', '--disable-setuid-sandbox'], }); - - const base64Screenshots = await Promise.all(screenshotPromises); - res.send({ website_image_base64: base64Screenshots[0] }); + const page = await browser.newPage(); + await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'); + await page.goto(url, { waitUntil: 'networkidle2' }); + const screenshotBuffer = await page.screenshot({ fullPage: true }); + await browser.close(); + + // Save the screenshot to the file system + await writeFileAsync(filePath, screenshotBuffer); + + // Return the base64-encoded image + const base64Image = Buffer.from(screenshotBuffer).toString('base64'); + res.send({ website_image_base64: base64Image }); } catch (error: any) { console.error('Error scraping website:', error); res.status(500).send({ error: 'Failed to scrape website', details: error.message }); -- cgit v1.2.3-70-g09d2 From 9da46dd6b00bf4a9f733001fbe43edcf586fc634 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Tue, 20 Aug 2024 15:17:31 -0400 Subject: Update ChatBox.tsx --- src/client/views/nodes/ChatBox/ChatBox.tsx | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 4a98f8dc1..1366eb772 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -150,6 +150,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { scrollToBottom = () => { if (this.messagesRef.current) { + this.messagesRef.current.scrollTop = this.messagesRef.current.scrollHeight; } }; -- cgit v1.2.3-70-g09d2 From 57dcd9e29a9b622493f8a4246545675385223572 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Tue, 20 Aug 2024 16:55:07 -0400 Subject: fixed prompt to be more explicit with action input structure --- src/client/views/nodes/ChatBox/prompts.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/prompts.ts b/src/client/views/nodes/ChatBox/prompts.ts index 4a67ac1d1..200a0fa75 100644 --- a/src/client/views/nodes/ChatBox/prompts.ts +++ b/src/client/views/nodes/ChatBox/prompts.ts @@ -86,7 +86,7 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto Select the most appropriate tool. Use the <action> tag to specify the tool. End your response after the <action> tag and wait for action rules to be provided. - Based on the action rules, provide the necessary tool parameters within the <action_input> tag, including a brief description of what you're doing with the action. + Based on the action rules, provide the necessary tool parameters within the <inputs>. The <inputs> tag should be within the <action_input> tag, and should follow an <action_description> tag that contains a brief description of what you're doing with the action. End your response again and wait for the observation from the tool. If no tool is needed, use the 'no_tool' action but still follow the same response structure. -- cgit v1.2.3-70-g09d2 From 79e4c4a3fba42b90ffa656db3ca435505f978afe Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Tue, 20 Aug 2024 18:32:08 -0400 Subject: supports multiple inputs maybe also make it so web results cannot have overlap (no same url in websites returned by search) Also make sure it will cite multiple websites --- src/client/views/nodes/ChatBox/Agent.ts | 52 ++++- src/client/views/nodes/ChatBox/prompts.ts | 228 +++++++++++---------- .../views/nodes/ChatBox/tools/CalculateTool.ts | 1 + .../nodes/ChatBox/tools/CreateCollectionTool.ts | 1 + .../views/nodes/ChatBox/tools/DataAnalysisTool.ts | 36 +++- src/client/views/nodes/ChatBox/tools/SearchTool.ts | 63 +++--- .../nodes/ChatBox/tools/WebsiteInfoScraperTool.ts | 52 +++-- src/server/ApiManagers/AssistantManager.ts | 3 +- 8 files changed, 260 insertions(+), 176 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index ae08271ee..43138bf94 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -55,13 +55,23 @@ export class Agent { const systemPrompt = getReactPrompt(Object.values(this.tools), this._summaries, chatHistory); this.interMessages = [{ role: 'system', content: systemPrompt }]; this.interMessages.push({ role: 'user', content: `${question}` }); - const parser = new XMLParser({ ignoreAttributes: false, attributeNamePrefix: '@_' }); + const parser = new XMLParser({ + ignoreAttributes: false, + attributeNamePrefix: '@_', + textNodeName: '_text', + isArray: (name, jpath, isLeafNode, isAttribute) => { + // Convert tags with the same name to arrays + return ['query', 'url'].indexOf(name) !== -1; + }, + }); const builder = new XMLBuilder({ ignoreAttributes: false, attributeNamePrefix: '@_' }); + let currentAction: string | undefined; this.processingInfo = []; for (let i = 2; i < maxTurns; i += 2) { + console.log(this.interMessages); console.log(`Turn ${i}/${maxTurns}`); const result = await this.execute(onUpdate); @@ -102,11 +112,14 @@ export class Agent { break; } } else if (key === 'action_input') { - const actionInput = builder.build({ action_input: stage[key] }); - console.log(`Action input: ${actionInput}`); + const actionInput = stage[key]; + console.log(`Action input:`, actionInput); if (currentAction) { try { - const observation = await this.processAction(currentAction, stage[key].inputs); + // Parse the inputs + //const parsedInputs = this.parseActionInputs(actionInput.inputs); + //console.log(`Parsed inputs:`, parsedInputs); + const observation = await this.processAction(currentAction, actionInput.inputs); const nextPrompt = [{ type: 'text', text: ` ` }, ...observation, { type: 'text', text: '' }]; console.log(observation); this.interMessages.push({ role: 'user', content: nextPrompt }); @@ -198,9 +211,26 @@ export class Agent { const tool = this.tools[action]; const args: Record = {}; + for (const paramName in tool.parameters) { if (actionInput[paramName] !== undefined) { - args[paramName] = actionInput[paramName]; + if (Array.isArray(actionInput[paramName])) { + // If the input is already an array, use it as is + args[paramName] = actionInput[paramName]; + } else if (typeof actionInput[paramName] === 'object' && actionInput[paramName] !== null) { + // If the input is an object, check if it has multiple of the same tag + const values = Object.values(actionInput[paramName]); + if (values.length > 1) { + // If there are multiple values, convert to an array + args[paramName] = values; + } else { + // If there's only one value, use it directly + args[paramName] = values[0]; + } + } else { + // For single values, use them as is + args[paramName] = actionInput[paramName]; + } } else if (tool.parameters[paramName].required === 'true') { throw new Error(`Missing required parameter '${paramName}' for action '${action}'`); } @@ -208,4 +238,16 @@ export class Agent { return await tool.execute(args); } + + private parseActionInputs(inputs: any): Record { + const parsedInputs: Record = {}; + for (const key in inputs) { + if (Array.isArray(inputs[key])) { + parsedInputs[key] = inputs[key].map((item: any) => item._text); + } else { + parsedInputs[key] = inputs[key]._text; + } + } + return parsedInputs; + } } diff --git a/src/client/views/nodes/ChatBox/prompts.ts b/src/client/views/nodes/ChatBox/prompts.ts index 200a0fa75..845b4d255 100644 --- a/src/client/views/nodes/ChatBox/prompts.ts +++ b/src/client/views/nodes/ChatBox/prompts.ts @@ -56,15 +56,15 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto Grounded text can be as short as a few words or as long as several sentences. Avoid overlapping or nesting <grounded_text> tags; use sequential tags instead. Grounded text tags should always have a citation_index attribute that references a citation index number that the text is grounded in. - Should be in Markdown format. + Content within the <grounded_text> tags should be in Markdown format. - Wrap all information that is not derived from tools or chunks in <normal_text> tags. + Wrap all text that is not derived from tools or chunks in <normal_text> tags (any text outputted in the answer that is not in a <grounded_text> tag should be within a normal text tag). Ensure that these tags are used for your reasoning, background knowledge, or general information that does not require a citation. Do not use <normal_text> tags for information that needs grounding or citation. Anything that is in any user docs should be grounded text and cited, not normal text, even if it is background or general information. - Should be in Markdown format. + Content within the <normal_text> tags should be in Markdown format. @@ -87,6 +87,7 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto Use the <action> tag to specify the tool. End your response after the <action> tag and wait for action rules to be provided. Based on the action rules, provide the necessary tool parameters within the <inputs>. The <inputs> tag should be within the <action_input> tag, and should follow an <action_description> tag that contains a brief description of what you're doing with the action. + For each input, you may provide as many different iterations of the same tag (i.e. to provide multiple inputs to the tool) as is specified in the input's max_input's field. End your response again and wait for the observation from the tool. If no tool is needed, use the 'no_tool' action but still follow the same response structure. @@ -169,7 +170,7 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto - With information on key moments from the press release, I will now analyze the medal count data to compare how different countries performed during the games. + With information on key moments from the press release, I will now analyze the medal count data and economic impact data to provide a comprehensive overview of the 2010 Vancouver Winter Olympics's impact. dataAnalysis @@ -180,9 +181,10 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto - Analyzing the medal count data for the 2010 Vancouver Winter Olympics to compare country performances. + Analyzing the medal count data and economic impact data for the 2010 Vancouver Winter Olympics to compare country performances and assess the games' overall impact. 2010_Vancouver_Olympics_Medal_Count.csv + 2010_Vancouver_Olympics_Economic_Impact.csv @@ -194,234 +196,236 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto Germany,10,13,7,30 Canada,14,7,5,26 Norway,9,8,6,23 + Russia,3,5,7,15 + South Korea,6,6,2,14 + China,5,2,4,11 + Sweden,5,2,4,11 + France,2,3,6,11 + Austria,4,6,6,16 + + + Category,Value + Total Cost (CAD),7.7 billion + Revenue Generated (CAD),2.5 billion + Jobs Created,45000 + Tourism Increase (%),35 + Infrastructure Investment (CAD),1.9 billion + Post-Olympic Annual Tourism Revenue Increase (CAD),220 million + Venues Repurposed,8 + Carbon Footprint Offset (%),118 - Now that I have gathered comprehensive information from the official press release and the medal count data, I will provide a detailed answer to the user's query. + Now that I have gathered comprehensive information from the official press release, medal count data, and economic impact data, I will provide a detailed answer to the user's query, incorporating all aspects of the 2010 Vancouver Winter Olympics's impact. The 2010 Winter Olympics, officially known as the XXI Olympic Winter Games, took place in Vancouver, British Columbia, Canada, from February 12 to 28, 2010. It featured 86 events in 15 disciplines across 7 sports, with 2,566 athletes from 82 National Olympic Committees participating. + The Opening Ceremony was one of the most spectacular in Olympic history, celebrating Canadian culture and heritage. A significant highlight for the host nation was the victory of the Canadian men's and women's ice hockey teams, both securing gold medals, which was a source of immense national pride. + - In terms of medal performance, the United States led the overall tally with 37 medals, followed by Germany with 30 medals. Canada, while placing third in total medals, achieved a record number of 14 gold medals, the highest of any country in the 2010 games. + In terms of medal performance, the United States led the overall tally with 37 medals (9 gold, 15 silver, 13 bronze), followed by Germany with 30 medals (10 gold, 13 silver, 7 bronze). Canada, while placing third in total medals with 26, achieved a record number of 14 gold medals, the highest of any country in the 2010 games. Other notable performances came from Norway (23 medals), Russia (15 medals), and South Korea (14 medals). + - However, the games were also marked by tragedy with the unfortunate death of Georgian luger Nodar Kumaritashvili during a training run, which deeply affected the Olympic community. + The economic impact of the 2010 Vancouver Winter Olympics was significant. The total cost of hosting the games was approximately 7.7 billion CAD, while generating revenue of 2.5 billion CAD. The games created about 45,000 jobs and led to a 35% increase in tourism during the event. The long-term benefits include an annual increase of 220 million CAD in tourism revenue and 1.9 billion CAD invested in infrastructure. - - - - Vancouver, BC - The 2010 Winter Olympics have been hailed as a monumental success, showcasing the best of Canadian culture and athletic prowess. The Opening Ceremony, held on February 12, 2010, was a spectacular celebration of the nation's history and diversity... - Key Moments: - - The Canadian men's and women's ice hockey teams both claimed gold medals, an achievement that brought immense pride to the host nation. - + + From an environmental perspective, the Vancouver Olympics made efforts to be sustainable, with 118% of the carbon footprint offset and 8 venues repurposed after the games, demonstrating a commitment to leaving a positive legacy. + + + + However, the games were also marked by tragedy with the unfortunate death of Georgian luger Nodar Kumaritashvili during a training run, which deeply affected the Olympic community and led to increased scrutiny of safety measures across all events. + + + + Despite this tragedy, the 2010 Vancouver Winter Olympics were generally hailed as a monumental success, showcasing the best of Canadian culture and athletic prowess. The games concluded on February 28, 2010, with a memorable Closing Ceremony that highlighted the achievements of the athletes and the spirit of unity that defined these Olympics. + + + + + - - Tragedy struck early in the games with the death of Georgian luger Nodar Kumaritashvili during a training run. - + + + + - What were the economic impacts on Vancouver after hosting the 2010 Winter Olympics? - How did the tragic accident of Nodar Kumaritashvili influence safety protocols in luge and other winter sports? - Can you provide more information on other significant performances by athletes during the 2010 Winter Olympics? + How did the 2010 Vancouver Winter Olympics compare to previous Winter Olympics in terms of economic impact and sustainability efforts? + What specific infrastructure improvements were made in Vancouver as a result of hosting the 2010 Winter Olympics? + How did the performance of Canadian athletes in the 2010 Vancouver Winter Olympics compare to their performance in previous Winter Olympics? + + ***SYSTEM MESSAGE ELIDED*** + - What are the best hiking trails in Yosemite National Park? + What are the best hiking trails in Yosemite National Park? And which are the safest? - - To provide comprehensive information about the best hiking trails in Yosemite National Park, I'll use the search tool to find relevant websites with hiking recommendations and park information. + To provide comprehensive information about the best hiking trails in Yosemite National Park, I'll use the search tool to find relevant websites with hiking recommendations, safety information, and general park information. searchTool - ***Action rules elided*** - - Searching the web for information about the best hiking trails in Yosemite National Park. + Searching the web for information about the best and safest hiking trails in Yosemite National Park. Best hiking trails Yosemite National Park + Safest hiking trails Yosemite National Park + Information about hiking in Yosemite National Park - - + https://www.nps.gov/yose/planyourvisit/hiking.htm Official National Park Service website with hiking information for Yosemite - + https://www.alltrails.com/parks/us/california/yosemite-national-park AllTrails page featuring user-rated hiking trails in Yosemite - + https://www.yosemitehikes.com/ Comprehensive guide to Yosemite hikes with detailed trail information - ***7 more search results elided*** - - I've found several relevant websites about hiking in Yosemite National Park. I'll start by scraping content from the official National Park Service website for the most reliable information. + I've found several relevant websites about hiking in Yosemite National Park. I'll start by scraping content from the official National Park Service website for the most reliable information, then move on to AllTrails for user ratings and reviews, and finally to YosemiteHikes.com for more detailed trail descriptions. websiteInfoScraper - ***Action rules elided*** - - Scraping information about hiking trails from the official Yosemite National Park website. + Scraping information from three websites about hiking trails in Yosemite National Park. https://www.nps.gov/yose/planyourvisit/hiking.htm + https://www.alltrails.com/parks/us/california/yosemite-national-park + https://www.yosemitehikes.com/ - - ***Website image elided*** - ***Scraped content from NPS website*** + ***NPS general planning website image elided*** + + + ***AllTrails website image elided*** + + + ***YosemiteHikes.com website image elided*** - - The information from the National Park Service website was helpful, but it lacks user reviews and ratings. I'll check AllTrails for this additional perspective, although it may not provide as much official information. + The information gathered from the three websites provides a good overview of the best hiking trails and safety considerations in Yosemite National Park. However, I noticed that there's limited information about seasonal trail conditions and closures, which could be crucial for planning a safe hike. To address this, I'll scrape additional information from the Yosemite National Park's current conditions page. websiteInfoScraper - ***Action rules elided*** - - Scraping user reviews and ratings for Yosemite hiking trails from AllTrails. + Scraping information about current trail conditions and closures in Yosemite National Park. - https://www.alltrails.com/parks/us/california/yosemite-national-park + https://www.nps.gov/yose/planyourvisit/conditions.htm - - - ***Website image elided*** - ***Scraped content from AllTrails*** + + ***NPS conditions website image elided*** - - The AllTrails website didn't provide as much detailed information as I had hoped. To ensure a comprehensive answer, I'll scrape YosemiteHikes.com for additional details and trail descriptions. + Now that I have additional information about current trail conditions and seasonal closures, I can provide a more comprehensive and up-to-date answer to the user's query about the best and safest hiking trails in Yosemite National Park. - websiteInfoScraper - - - - ***Action rules elided*** - + + + Yosemite National Park offers a wide range of hiking trails suitable for various skill levels. Some of the best hiking trails in Yosemite, based on the information gathered, include: - - - Gathering detailed trail descriptions and hiking information from YosemiteHikes.com. - - https://www.yosemitehikes.com/ - - - + 1. Mist Trail to Vernal Fall: A moderate 3-mile round-trip hike in Yosemite Valley, offering close-up views of waterfalls... ***REST ELIDED FOR BREVITY*** - - - - ***Website image elided*** - ***Scraped content from YosemiteHikes.com*** - - - - - - - I have successfully scraped information from all three websites. Now, I'll synthesize this information to provide a comprehensive answer about the best hiking trails in Yosemite National Park. - - - - Yosemite National Park is renowned for its breathtaking landscapes and diverse hiking opportunities. The park spans nearly 1,200 square miles of wilderness in California's Sierra Nevada mountains, offering trails suitable for all skill levels. - + + ***CONTENT ELIDED FOR BREVITY*** + + + ***CONTENT ELIDED FOR BREVITY*** + - Based on the information gathered from multiple sources, here are some of the best hiking trails in Yosemite: + Regarding safety, some of the safest trails for hikers of all skill levels include: + + 1. Lower Yosemite Fall Trail: An easy 1-mile loop in Yosemite Valley... ***REST ELIDED FOR BREVITY*** + - 1. Half Dome: This strenuous 14-16 mile round trip hike is one of Yosemite's most iconic and challenging trails, requiring a permit for the cable section. - - 2. Mist Trail to Vernal and Nevada Falls: A popular 6.4-mile round trip hike offering close-up views of two spectacular waterfalls. - - 3. Yosemite Falls Trail: A strenuous 7.2-mile round trip hike to the top of North America's tallest waterfall. - - 4. Bridalveil Fall Trail: An easy 0.5-mile round trip walk to the base of this iconic Yosemite waterfall. - - 5. Mirror Lake Trail: A moderate 2-5 mile round trip hike to a lake that reflects surrounding cliffs and Half Dome. - + + ***CONTENT ELIDED FOR BREVITY*** + - These trails offer a range of difficulties and showcase the park's diverse natural beauty, from towering waterfalls to panoramic valley views. + To ensure a safe hiking experience in Yosemite, regardless of the trail you choose, follow these safety tips: - - - It's important to note that trail conditions can vary depending on the season, and some trails may require permits or have specific regulations. Always check the official Yosemite National Park website for the most up-to-date information before planning your hike. + + 1. Stay on marked trails at all times... + ***REST ELIDED FOR BREVITY*** - - Remember to practice Leave No Trace principles, stay on designated trails, and come prepared with appropriate gear, water, and snacks for your hike. + Remember that even the "safest" trails can pose risks if proper precautions are not taken. Always hike within your ability level and be prepared for the specific conditions of the trail you choose. It's essential to check for daily updates on trail conditions before starting your hike, as they can change rapidly, especially during spring and fall. - - - - - - - - + + https://www.nps.gov/yose/planyourvisit/hiking.htm + + + https://www.alltrails.com/parks/us/california/yosemite-national-park + + + https://www.yosemitehikes.com/ + + + https://www.nps.gov/yose/planyourvisit/conditions.htm + - What is the best time of year to hike in Yosemite National Park? - Are there any beginner-friendly trails in Yosemite that offer scenic views? - What safety precautions should hikers take when exploring Yosemite's trails? + Are there any special permits required for hiking certain trails in Yosemite? + What wildlife might I encounter while hiking in Yosemite, and how should I prepare for potential encounters? - ${chatHistory} Use this history for context when appropriate. diff --git a/src/client/views/nodes/ChatBox/tools/CalculateTool.ts b/src/client/views/nodes/ChatBox/tools/CalculateTool.ts index 818332c44..74b7ca27b 100644 --- a/src/client/views/nodes/ChatBox/tools/CalculateTool.ts +++ b/src/client/views/nodes/ChatBox/tools/CalculateTool.ts @@ -10,6 +10,7 @@ export class CalculateTool extends BaseTool<{ expression: string }> { type: 'string', description: 'The mathematical expression to evaluate', required: 'true', + max_inputs: '1', }, }, 'Provide a mathematical expression to calculate that would work with JavaScript eval().', diff --git a/src/client/views/nodes/ChatBox/tools/CreateCollectionTool.ts b/src/client/views/nodes/ChatBox/tools/CreateCollectionTool.ts index 26ac0d7cc..573428179 100644 --- a/src/client/views/nodes/ChatBox/tools/CreateCollectionTool.ts +++ b/src/client/views/nodes/ChatBox/tools/CreateCollectionTool.ts @@ -16,6 +16,7 @@ export class GetDocsContentTool extends BaseTool<{ title: string; document_ids: type: 'string', description: 'the title of the collection that you will be making', required: 'true', + max_inputs: '1', }, }, 'Provide a mathematical expression to calculate that would work with JavaScript eval().', diff --git a/src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts b/src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts index b45733639..a12ee46e5 100644 --- a/src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts +++ b/src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts @@ -1,21 +1,22 @@ import { BaseTool } from './BaseTool'; -export class DataAnalysisTool extends BaseTool<{ csv_file_name: string }> { +export class DataAnalysisTool extends BaseTool<{ csv_file_name: string | string[] }> { private csv_files_function: () => { filename: string; id: string; text: string }[]; constructor(csv_files: () => { filename: string; id: string; text: string }[]) { super( 'dataAnalysis', - 'Analyzes, and provides insights, from a CSV file', + 'Analyzes, and provides insights, from one or more CSV files', { csv_file_name: { type: 'string', - description: 'Name of the CSV file to analyze', + description: 'Name(s) of the CSV file(s) to analyze', required: 'true', + max_inputs: '3', }, }, - 'Provide the name of the CSV file to analyze based on the user query and whichever available CSV file may be relevant.', - 'Provides the full CSV file text for your analysis based on the user query and the available CSV file. ' + 'Provide the name(s) of up to 3 CSV files to analyze based on the user query and whichever available CSV files may be relevant.', + 'Provides the full CSV file text for your analysis based on the user query and the available CSV file(s). ' ); this.csv_files_function = csv_files; } @@ -32,8 +33,27 @@ export class DataAnalysisTool extends BaseTool<{ csv_file_name: string }> { return file?.id; } - async execute(args: { csv_file_name: string }): Promise { - console.log(this.csv_files_function()); - return [{ type: 'text', text: `` + this.getFileContent(args.csv_file_name) + '' }]; + async execute(args: { csv_file_name: string | string[] }): Promise { + const filenames = Array.isArray(args.csv_file_name) ? args.csv_file_name : [args.csv_file_name]; + const results = []; + + for (const filename of filenames) { + const fileContent = this.getFileContent(filename); + const fileID = this.getFileID(filename); + + if (fileContent && fileID) { + results.push({ + type: 'text', + text: `${fileContent}`, + }); + } else { + results.push({ + type: 'text', + text: `File not found: ${filename}`, + }); + } + } + + return results; } } diff --git a/src/client/views/nodes/ChatBox/tools/SearchTool.ts b/src/client/views/nodes/ChatBox/tools/SearchTool.ts index 6523fd11c..b926cbadc 100644 --- a/src/client/views/nodes/ChatBox/tools/SearchTool.ts +++ b/src/client/views/nodes/ChatBox/tools/SearchTool.ts @@ -1,47 +1,54 @@ +import { max } from 'lodash'; import { Networking } from '../../../../Network'; import { BaseTool } from './BaseTool'; import { v4 as uuidv4 } from 'uuid'; -export class SearchTool extends BaseTool<{ query: string }> { +export class SearchTool extends BaseTool<{ query: string | string[] }> { private _addLinkedUrlDoc: (url: string, id: string) => void; - - constructor(addLinkedUrlDoc: (url: string, id: string) => void) { + private _max_results: number; + constructor(addLinkedUrlDoc: (url: string, id: string) => void, max_results: number = 5) { super( 'searchTool', - 'Search the web to find a wide range of websites related to a query', + 'Search the web to find a wide range of websites related to a query or multiple queries', { query: { type: 'string', - description: 'The search query to use for finding websites', - required: true, + description: 'The search query or queries to use for finding websites', + required: 'true', + max_inputs: '3', }, }, - 'Provide a search query to find a broad range of websites. This tool is intended to help you identify relevant websites, but not to be used for providing the final answer. Use this information to determine which specific website to investigate further.', - 'Returns a list of websites and their overviews based on the search query, helping to identify which website might contain the most relevant information.' + 'Provide up to 3 search queries to find a broad range of websites. This tool is intended to help you identify relevant websites, but not to be used for providing the final answer. Use this information to determine which specific website to investigate further.', + 'Returns a list of websites and their overviews based on the search queries, helping to identify which websites might contain relevant information.' ); this._addLinkedUrlDoc = addLinkedUrlDoc; + this._max_results = max_results; } - async execute(args: { query: string }): Promise { - try { - const { results } = await Networking.PostToServer('/getWebSearchResults', { query: args.query }); - console.log(results); - const data: { type: string; text: string }[] = results.map((result: { url: string; snippet: string }) => { - console.log; - const id = uuidv4(); - //this._addLinkedUrlDoc(result.url, id); //not needed right now because it shouldn't use this information to ground responses and should scrape afterwards - return { - type: 'text', - text: ` - ${result.url} - ${result.snippet} - `, - }; - }); - return data; - } catch (error) { - console.log(error); - return [{ type: 'text', text: 'An error occurred while performing the web search.' }]; + async execute(args: { query: string | string[] }): Promise { + const queries = Array.isArray(args.query) ? args.query : [args.query]; + const allResults = []; + + for (const query of queries) { + try { + const { results } = await Networking.PostToServer('/getWebSearchResults', { query, max_results: this._max_results }); + const data: { type: string; text: string }[] = results.map((result: { url: string; snippet: string }) => { + const id = uuidv4(); + return { + type: 'text', + text: ` + ${result.url} + ${result.snippet} + `, + }; + }); + allResults.push(...data); + } catch (error) { + console.log(error); + allResults.push({ type: 'text', text: `An error occurred while performing the web search for query: ${query}` }); + } } + + return allResults; } } diff --git a/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts index c59afefbd..739329bea 100644 --- a/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts +++ b/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts @@ -2,43 +2,51 @@ import { Networking } from '../../../../Network'; import { BaseTool } from './BaseTool'; import { v4 as uuidv4 } from 'uuid'; -export class WebsiteInfoScraperTool extends BaseTool<{ url: string }> { +export class WebsiteInfoScraperTool extends BaseTool<{ url: string | string[] }> { private _addLinkedUrlDoc: (url: string, id: string) => void; constructor(addLinkedUrlDoc: (url: string, id: string) => void) { super( 'websiteInfoScraper', - 'Scrape detailed information from a specific website identified as the most relevant', + 'Scrape detailed information from specific websites identified as relevant to the user query', { url: { type: 'string', - description: 'The URL of the website to scrape', - required: true, + description: 'The URL(s) of the website(s) to scrape', + required: 'true', + max_inputs: '3', }, }, - 'Provide the URL of the website that you have identified as the most relevant from the previous search. This tool will scrape and process detailed information from that specific website. It will also create a document from the scraped content for future reference.', - 'Returns the full HTML content from the provided URL and creates a document from the content for further analysis.' + 'Provide up to 3 URLs of websites that you have identified as the most relevant from the previous search. This tool will provide screenshots of those specific websites. It will also create a document from the scraped content for future reference. When providing a response to the user, ideally reference as many of the websites as possible in order to provide a well grounded result.', + 'Returns the full content of the webpages as images for analysis.' ); this._addLinkedUrlDoc = addLinkedUrlDoc; } - async execute(args: { url: string }): Promise { - try { - const { website_image_base64 } = await Networking.PostToServer('/scrapeWebsite', { url: args.url }); - const id = uuidv4(); - this._addLinkedUrlDoc(args.url, id); - return [ - { type: 'text', text: ` ` }, - { - type: 'image_url', - image_url: { - url: `data:image/jpeg;base64,${website_image_base64}`, + async execute(args: { url: string | string[] }): Promise { + const urls = Array.isArray(args.url) ? args.url : [args.url]; + const results = []; + + for (const url of urls) { + try { + const { website_image_base64 } = await Networking.PostToServer('/scrapeWebsite', { url }); + const id = uuidv4(); + this._addLinkedUrlDoc(url, id); + results.push( + { type: 'text', text: ` ` }, + { + type: 'image_url', + image_url: { + url: `data:image/jpeg;base64,${website_image_base64}`, + }, }, - }, - { type: 'text', text: `\n` }, - ]; - } catch (error) { - return [{ type: 'text', text: 'An error occurred while scraping the website.' }]; + { type: 'text', text: `\n` } + ); + } catch (error) { + results.push({ type: 'text', text: `An error occurred while scraping the website: ${url}` }); + } } + + return results; } } diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 9b85dbbe8..afaeaf961 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -90,13 +90,14 @@ export default class AssistantManager extends ApiManager { method: Method.POST, subscription: '/getWebSearchResults', secureHandler: async ({ req, res }) => { - const { query } = req.body; + const { query, max_results } = req.body; try { const response = await customsearch.cse.list({ q: query, cx: process.env._CLIENT_GOOGLE_SEARCH_ENGINE_ID, key: process.env._CLIENT_GOOGLE_API_KEY, safe: 'active', + num: max_results, }); const results = -- cgit v1.2.3-70-g09d2 From e5464e4c04ef6f8a2bbf868b43bbcdba54239406 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Wed, 21 Aug 2024 14:13:56 -0400 Subject: fixed citations so they show different numbers --- package-lock.json | 21 ++++++++++ package.json | 2 + .../views/nodes/ChatBox/MessageComponent.tsx | 3 +- .../nodes/ChatBox/tools/WebsiteInfoScraperTool.ts | 17 ++------ src/server/ApiManagers/AssistantManager.ts | 47 ++++++++++------------ 5 files changed, 51 insertions(+), 39 deletions(-) (limited to 'src') diff --git a/package-lock.json b/package-lock.json index 68c9e9746..a7e31663f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -26,6 +26,7 @@ "@fullcalendar/daygrid": "^6.1.10", "@fullcalendar/multimonth": "^6.1.10", "@internationalized/date": "^3.5.0", + "@mozilla/readability": "^0.5.0", "@mui/icons-material": "^5.14.19", "@mui/material": "^5.14.19", "@octokit/core": "^6.0.1", @@ -271,6 +272,7 @@ "@types/howler": "^2.2.11", "@types/html-to-text": "^9.0.4", "@types/jquery": "^3.5.29", + "@types/jsdom": "^21.1.7", "@types/libxmljs": "^0.18.12", "@types/lodash": "^4.14.202", "@types/mocha": "^10.0.6", @@ -4755,6 +4757,14 @@ "sparse-bitfield": "^3.0.3" } }, + "node_modules/@mozilla/readability": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/@mozilla/readability/-/readability-0.5.0.tgz", + "integrity": "sha512-Z+CZ3QaosfFaTqvhQsIktyGrjFjSC0Fa4EMph4mqKnWhmyoGICsV/8QK+8HpXut6zV7zwfWwqDmEjtk1Qf6EgQ==", + "engines": { + "node": ">=14.0.0" + } + }, "node_modules/@mui/base": { "version": "5.0.0-beta.40", "resolved": "https://registry.npmjs.org/@mui/base/-/base-5.0.0-beta.40.tgz", @@ -11150,6 +11160,17 @@ "@types/sizzle": "*" } }, + "node_modules/@types/jsdom": { + "version": "21.1.7", + "resolved": "https://registry.npmjs.org/@types/jsdom/-/jsdom-21.1.7.tgz", + "integrity": "sha512-yOriVnggzrnQ3a9OKOCxaVuSug3w3/SbOj5i7VwXWZEyUNl3bLF9V3MfxGbZKuwqJOQyRfqXyROBB1CoZLFWzA==", + "dev": true, + "dependencies": { + "@types/node": "*", + "@types/tough-cookie": "*", + "parse5": "^7.0.0" + } + }, "node_modules/@types/json-schema": { "version": "7.0.15", "resolved": "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.15.tgz", diff --git a/package.json b/package.json index 57a8ffc05..2b2a9ab89 100644 --- a/package.json +++ b/package.json @@ -43,6 +43,7 @@ "@types/howler": "^2.2.11", "@types/html-to-text": "^9.0.4", "@types/jquery": "^3.5.29", + "@types/jsdom": "^21.1.7", "@types/libxmljs": "^0.18.12", "@types/lodash": "^4.14.202", "@types/mocha": "^10.0.6", @@ -111,6 +112,7 @@ "@fullcalendar/daygrid": "^6.1.10", "@fullcalendar/multimonth": "^6.1.10", "@internationalized/date": "^3.5.0", + "@mozilla/readability": "^0.5.0", "@mui/icons-material": "^5.14.19", "@mui/material": "^5.14.19", "@octokit/core": "^6.0.1", diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx index 00e9795e3..3edfb272c 100644 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ b/src/client/views/nodes/ChatBox/MessageComponent.tsx @@ -16,6 +16,7 @@ const MessageComponentBox: React.FC = function ({ message const renderContent = (item: MessageContent) => { const i = item.index; + console.log('item', item, 'index', i); if (item.type === TEXT_TYPE.GROUNDED) { const citation_ids = item.citation_ids || []; return ( @@ -26,7 +27,7 @@ const MessageComponentBox: React.FC = function ({ message if (!citation) return null; return ( ); })} diff --git a/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts index 739329bea..4588b5aec 100644 --- a/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts +++ b/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts @@ -17,8 +17,8 @@ export class WebsiteInfoScraperTool extends BaseTool<{ url: string | string[] }> max_inputs: '3', }, }, - 'Provide up to 3 URLs of websites that you have identified as the most relevant from the previous search. This tool will provide screenshots of those specific websites. It will also create a document from the scraped content for future reference. When providing a response to the user, ideally reference as many of the websites as possible in order to provide a well grounded result.', - 'Returns the full content of the webpages as images for analysis.' + 'Provide up to 3 URLs of websites that you have identified as the most relevant from the previous search. This tool will provide the text content of those specific websites. When providing a final response to the user based on information from these chunks, ideally cite as many of the url chunks as possible (ground your infromation from multiple sources, if possible) in order to provide a well grounded result.', + 'Returns the text content of the webpages for analysis.' ); this._addLinkedUrlDoc = addLinkedUrlDoc; } @@ -29,19 +29,10 @@ export class WebsiteInfoScraperTool extends BaseTool<{ url: string | string[] }> for (const url of urls) { try { - const { website_image_base64 } = await Networking.PostToServer('/scrapeWebsite', { url }); + const { website_plain_text } = await Networking.PostToServer('/scrapeWebsite', { url }); const id = uuidv4(); this._addLinkedUrlDoc(url, id); - results.push( - { type: 'text', text: ` ` }, - { - type: 'image_url', - image_url: { - url: `data:image/jpeg;base64,${website_image_base64}`, - }, - }, - { type: 'text', text: `\n` } - ); + results.push({ type: 'text', text: `\n${website_plain_text}\n\n` }); } catch (error) { results.push({ type: 'text', text: `An error occurred while scraping the website: ${url}` }); } diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index afaeaf961..a59a2d22d 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -15,7 +15,9 @@ import { ChunkingStrategy, Strategy } from 'unstructured-client/sdk/models/share import * as cheerio from 'cheerio'; import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk'; import { google } from 'googleapis'; -import puppeteer from 'puppeteer'; +import * as puppeteer from 'puppeteer'; +import { JSDOM } from 'jsdom'; +import { Readability } from '@mozilla/readability'; export enum Directory { parsed_files = 'parsed_files', @@ -142,39 +144,34 @@ export default class AssistantManager extends ApiManager { secureHandler: async ({ req, res }) => { const { url } = req.body; try { - const url_filename = url.replace(/\./g, '-').replace(/\//g, '_') + '.jpg'; - const scrapedImagesDirectory = pathToDirectory(Directory.scrape_images); - const filePath = serverPathToFile(Directory.scrape_images, url_filename); - - // Check if the image already exists - if (fs.existsSync(filePath)) { - const imageBuffer = await readFileAsync(filePath); - const base64Image = imageBuffer.toString('base64'); - console.log('Image already exists'); - return res.send({ website_image_base64: base64Image }); - } - - // Create the directory if it doesn't exist - if (!fs.existsSync(scrapedImagesDirectory)) { - fs.mkdirSync(scrapedImagesDirectory); - } - - // Launch Puppeteer to take a screenshot of the webpage + // Launch Puppeteer to navigate to the webpage const browser = await puppeteer.launch({ args: ['--no-sandbox', '--disable-setuid-sandbox'], }); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'); await page.goto(url, { waitUntil: 'networkidle2' }); - const screenshotBuffer = await page.screenshot({ fullPage: true }); + + // Get the HTML content of the page + const htmlContent = await page.content(); await browser.close(); - // Save the screenshot to the file system - await writeFileAsync(filePath, screenshotBuffer); + // Use JSDOM to parse the HTML content + const dom = new JSDOM(htmlContent, { url }); - // Return the base64-encoded image - const base64Image = Buffer.from(screenshotBuffer).toString('base64'); - res.send({ website_image_base64: base64Image }); + // Use Readability to extract the readable content + const reader = new Readability(dom.window.document); + const article = reader.parse(); + + if (article) { + // Extract the plain text from the article content + const plainText = article.textContent; + + // Return the plain text content + res.send({ website_plain_text: plainText }); + } else { + res.status(500).send({ error: 'Failed to extract readable content' }); + } } catch (error: any) { console.error('Error scraping website:', error); res.status(500).send({ error: 'Failed to scrape website', details: error.message }); -- cgit v1.2.3-70-g09d2 From 484eb670b291afa07f2f7b976fafe02bdc9ac71d Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Wed, 21 Aug 2024 16:06:31 -0400 Subject: added answer streaming parsing so it provides realtime parsing and then follow-up questions and citations are added when its finished --- src/client/views/nodes/ChatBox/Agent.ts | 54 ++++----- src/client/views/nodes/ChatBox/AnswerParser.ts | 2 +- src/client/views/nodes/ChatBox/ChatBox.tsx | 16 ++- .../views/nodes/ChatBox/MessageComponent.tsx | 3 +- src/client/views/nodes/ChatBox/StreamParser.ts | 125 --------------------- .../views/nodes/ChatBox/StreamedAnswerParser.ts | 73 ++++++++++++ 6 files changed, 111 insertions(+), 162 deletions(-) delete mode 100644 src/client/views/nodes/ChatBox/StreamParser.ts create mode 100644 src/client/views/nodes/ChatBox/StreamedAnswerParser.ts (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index 43138bf94..4ccb179f0 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -12,9 +12,9 @@ import { WebsiteInfoScraperTool } from './tools/WebsiteInfoScraperTool'; import { SearchTool } from './tools/SearchTool'; import { NoTool } from './tools/NoTool'; import { on } from 'events'; -import { StreamParser } from './StreamParser'; import { v4 as uuidv4 } from 'uuid'; import { AnswerParser } from './AnswerParser'; +import { StreamedAnswerParser } from './StreamedAnswerParser'; dotenv.config(); @@ -31,6 +31,7 @@ export class Agent { private thoughtNumber: number = 0; private processingNumber: number = 0; private processingInfo: ProcessingInfo[] = []; + private streamedAnswerParser: StreamedAnswerParser = new StreamedAnswerParser(); constructor(_vectorstore: Vectorstore, summaries: () => string, history: () => string, csvData: () => { filename: string; id: string; text: string }[], addLinkedUrlDoc: (url: string, id: string) => void) { this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); @@ -48,7 +49,7 @@ export class Agent { }; } - async askAgent(question: string, onUpdate: (update: ProcessingInfo[]) => void, maxTurns: number = 30): Promise { + async askAgent(question: string, onProcessingUpdate: (processingUpdate: ProcessingInfo[]) => void, onAnswerUpdate: (answerUpdate: string) => void, maxTurns: number = 30): Promise { console.log(`Starting query: ${question}`); this.messages.push({ role: 'user', content: question }); const chatHistory = this._history(); @@ -74,7 +75,7 @@ export class Agent { console.log(this.interMessages); console.log(`Turn ${i}/${maxTurns}`); - const result = await this.execute(onUpdate); + const result = await this.execute(onProcessingUpdate, onAnswerUpdate); this.interMessages.push({ role: 'assistant', content: result }); let parsedResult; @@ -133,6 +134,7 @@ export class Agent { } } else if (key === 'answer') { console.log('Answer found. Ending query.'); + this.streamedAnswerParser.reset(); const parsedAnswer = AnswerParser.parse(result, this.processingInfo); return parsedAnswer; } @@ -141,7 +143,7 @@ export class Agent { throw new Error('Reached maximum turns. Ending query.'); } - private async execute(onUpdate: (update: ProcessingInfo[]) => void): Promise { + private async execute(onProcessingUpdate: (processingUpdate: ProcessingInfo[]) => void, onAnswerUpdate: (answerUpdate: string) => void): Promise { const stream = await this.client.chat.completions.create({ model: 'gpt-4o', messages: this.interMessages as ChatCompletionMessageParam[], @@ -155,11 +157,18 @@ export class Agent { let isInsideTag: boolean = false; for await (const chunk of stream) { - const content = chunk.choices[0]?.delta?.content || ''; + let content = chunk.choices[0]?.delta?.content || ''; fullResponse += content; for (const char of content) { - if (char === '<') { + if (currentTag === 'answer') { + currentContent += char; + console.log(char); + const streamedAnswer = this.streamedAnswerParser.parse(char); + console.log(streamedAnswer); + onAnswerUpdate(streamedAnswer); + continue; + } else if (char === '<') { isInsideTag = true; currentTag = ''; currentContent = ''; @@ -170,11 +179,15 @@ export class Agent { } } else if (isInsideTag) { currentTag += char; - } else { + } else if (currentTag === 'thought' || currentTag === 'action_input_description') { currentContent += char; - if (currentTag === 'thought' || currentTag === 'action_input_description') { - this.processStreamedContent(currentTag, currentContent); - onUpdate(this.processingInfo); + const current_info = this.processingInfo.find(info => info.index === this.processingNumber); + if (current_info) { + current_info.content = currentContent.trim(); + onProcessingUpdate(this.processingInfo); + } else { + this.processingInfo.push({ index: this.processingNumber, type: currentTag === 'thought' ? PROCESSING_TYPE.THOUGHT : PROCESSING_TYPE.ACTION, content: currentContent.trim() }); + onProcessingUpdate(this.processingInfo); } } } @@ -183,27 +196,6 @@ export class Agent { return fullResponse; } - private processStreamedContent(tag: string, streamed_content: string) { - const current_info = this.processingInfo.find(info => info.index === this.processingNumber); - switch (tag) { - case 'thought': - if (current_info) { - current_info.content = streamed_content; - } else { - console.log(`Adding thought: ${streamed_content}`); - this.processingInfo.push({ index: this.processingNumber, type: PROCESSING_TYPE.THOUGHT, content: streamed_content.trim() }); - } - break; - case 'action_input_description': - if (current_info) { - current_info.content = streamed_content; - } else { - console.log(`Adding thought: ${streamed_content}`); - this.processingInfo.push({ index: this.processingNumber, type: PROCESSING_TYPE.ACTION, content: streamed_content.trim() }); - } - } - } - private async processAction(action: string, actionInput: any): Promise { if (!(action in this.tools)) { throw new Error(`Unknown action: ${action}`); diff --git a/src/client/views/nodes/ChatBox/AnswerParser.ts b/src/client/views/nodes/ChatBox/AnswerParser.ts index 1d46a366d..b18083a27 100644 --- a/src/client/views/nodes/ChatBox/AnswerParser.ts +++ b/src/client/views/nodes/ChatBox/AnswerParser.ts @@ -56,7 +56,7 @@ export class AnswerParser { while ((match = groundedTextRegex.exec(rawTextContent)) !== null) { const [fullMatch, citationIndex, groundedText] = match; - // Add normal text before the grounded text + // Add normal text that is before the grounded text if (match.index > lastIndex) { const normalText = rawTextContent.slice(lastIndex, match.index).trim(); if (normalText) { diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 1366eb772..45f5c0a65 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -11,7 +11,7 @@ import { ViewBoxAnnotatableComponent } from '../../DocComponent'; import { FieldView, FieldViewProps } from '../FieldView'; import './ChatBox.scss'; import MessageComponentBox from './MessageComponent'; -import { ASSISTANT_ROLE, AssistantMessage, AI_Document, Citation, CHUNK_TYPE, RAGChunk, getChunkType, TEXT_TYPE, SimplifiedChunk, ProcessingInfo } from './types'; +import { ASSISTANT_ROLE, AssistantMessage, AI_Document, Citation, CHUNK_TYPE, RAGChunk, getChunkType, TEXT_TYPE, SimplifiedChunk, ProcessingInfo, MessageContent } from './types'; import { Vectorstore } from './vectorstore/Vectorstore'; import { Agent } from './Agent'; import dotenv from 'dotenv'; @@ -175,16 +175,24 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.isLoading = true; this.current_message = { role: ASSISTANT_ROLE.ASSISTANT, content: [], citations: [], processing_info: [] }; - const onUpdate = (update: ProcessingInfo[]) => { + const onProcessingUpdate = (processingUpdate: ProcessingInfo[]) => { runInAction(() => { if (this.current_message) { - this.current_message = { ...this.current_message, processing_info: update }; + this.current_message = { ...this.current_message, processing_info: processingUpdate }; } }); this.scrollToBottom(); }; - const finalMessage = await this.agent.askAgent(trimmedText, onUpdate); + const onAnswerUpdate = (answerUpdate: string) => { + runInAction(() => { + if (this.current_message) { + this.current_message = { ...this.current_message, content: [{ text: answerUpdate, type: TEXT_TYPE.NORMAL, index: 0, citation_ids: [] }] }; + } + }); + }; + + const finalMessage = await this.agent.askAgent(trimmedText, onProcessingUpdate, onAnswerUpdate); runInAction(() => { if (this.current_message) { diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx index 3edfb272c..d0e78c751 100644 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ b/src/client/views/nodes/ChatBox/MessageComponent.tsx @@ -76,15 +76,16 @@ const MessageComponentBox: React.FC = function ({ message return (
-
{message.content && message.content.map(messageFragment => {renderContent(messageFragment)})}
{hasProcessingInfo && (
{dropdownOpen &&
{message.processing_info.map(renderProcessingInfo)}
} +
)} +
{message.content && message.content.map(messageFragment => {renderContent(messageFragment)})}
{message.follow_up_questions && message.follow_up_questions.length > 0 && (

Follow-up Questions:

diff --git a/src/client/views/nodes/ChatBox/StreamParser.ts b/src/client/views/nodes/ChatBox/StreamParser.ts deleted file mode 100644 index 9b087663a..000000000 --- a/src/client/views/nodes/ChatBox/StreamParser.ts +++ /dev/null @@ -1,125 +0,0 @@ -import { AssistantMessage, ASSISTANT_ROLE, TEXT_TYPE, Citation, CHUNK_TYPE } from './types'; -import { v4 as uuidv4 } from 'uuid'; - -export class StreamParser { - private currentMessage: AssistantMessage; - private currentTag: string | null = null; - private buffer: string = ''; - private citationIndex: number = 1; - - constructor() { - this.currentMessage = { - role: ASSISTANT_ROLE.ASSISTANT, - content: [], - thoughts: [], - actions: [], - citations: [], - }; - } - - parse(chunk: string): AssistantMessage { - this.buffer += chunk; - - while (this.buffer.length > 0) { - if (this.currentTag === null) { - const openTagMatch = this.buffer.match(/<(\w+)>/); - if (openTagMatch) { - this.currentTag = openTagMatch[1]; - this.buffer = this.buffer.slice(openTagMatch.index! + openTagMatch[0].length); - } else { - break; - } - } else { - const closeTagIndex = this.buffer.indexOf(``); - if (closeTagIndex !== -1) { - const content = this.buffer.slice(0, closeTagIndex); - this.processTag(this.currentTag, content); - this.buffer = this.buffer.slice(closeTagIndex + this.currentTag.length + 3); - this.currentTag = null; - } else { - break; - } - } - } - - return this.currentMessage; - } - - private processTag(tag: string, content: string) { - switch (tag) { - case 'thought': - this.currentMessage.thoughts!.push(content); - break; - case 'action': - this.currentMessage.actions!.push({ index: this.currentMessage.actions!.length, action: content, action_input: '' }); - break; - case 'action_input': - if (this.currentMessage.actions!.length > 0) { - this.currentMessage.actions![this.currentMessage.actions!.length - 1].action_input = content; - } - break; - case 'answer': - this.processAnswer(content); - break; - } - } - - private processAnswer(content: string) { - const groundedTextRegex = /([\s\S]*?)<\/grounded_text>/g; - let lastIndex = 0; - let match; - - while ((match = groundedTextRegex.exec(content)) !== null) { - const [fullMatch, citationIndex, groundedText] = match; - - // Add normal text before the grounded text - if (match.index > lastIndex) { - const normalText = content.slice(lastIndex, match.index).trim(); - if (normalText) { - this.currentMessage.content.push({ - index: this.currentMessage.content.length, - type: TEXT_TYPE.NORMAL, - text: normalText, - citation_ids: null, - }); - } - } - - // Add grounded text - const citation_id = uuidv4(); - this.currentMessage.content.push({ - index: this.currentMessage.content.length, - type: TEXT_TYPE.GROUNDED, - text: groundedText.trim(), - citation_ids: [citation_id], - }); - - // Add citation - this.currentMessage.citations!.push({ - citation_id, - chunk_id: '', - type: CHUNK_TYPE.TEXT, - direct_text: '', - }); - - lastIndex = match.index + fullMatch.length; - } - - // Add any remaining normal text after the last grounded text - if (lastIndex < content.length) { - const remainingText = content.slice(lastIndex).trim(); - if (remainingText) { - this.currentMessage.content.push({ - index: this.currentMessage.content.length, - type: TEXT_TYPE.NORMAL, - text: remainingText, - citation_ids: null, - }); - } - } - } - - getResult(): AssistantMessage { - return this.currentMessage; - } -} diff --git a/src/client/views/nodes/ChatBox/StreamedAnswerParser.ts b/src/client/views/nodes/ChatBox/StreamedAnswerParser.ts new file mode 100644 index 000000000..3585cab4a --- /dev/null +++ b/src/client/views/nodes/ChatBox/StreamedAnswerParser.ts @@ -0,0 +1,73 @@ +import { threadId } from 'worker_threads'; + +enum ParserState { + Outside, + InGroundedText, + InNormalText, +} + +export class StreamedAnswerParser { + private state: ParserState = ParserState.Outside; + private buffer: string = ''; + private result: string = ''; + private isStartOfLine: boolean = true; + + public parse(char: string): string { + switch (this.state) { + case ParserState.Outside: + if (char === '<') { + this.buffer = '<'; + } else if (char === '>') { + if (this.buffer.startsWith('') { + this.state = ParserState.Outside; + this.buffer = ''; + } else if (this.buffer.startsWith('') { + this.state = ParserState.Outside; + this.buffer = ''; + } else if (this.buffer.startsWith('<')) { + this.buffer += char; + } else { + this.processChar(char); + } + break; + } + + return this.result.trim(); + } + + private processChar(char: string): void { + if (this.isStartOfLine && char === ' ') { + // Skip leading spaces + return; + } + if (char === '\n') { + this.result += char; + this.isStartOfLine = true; + } else { + this.result += char; + this.isStartOfLine = false; + } + } + + public reset(): void { + this.state = ParserState.Outside; + this.buffer = ''; + this.result = ''; + this.isStartOfLine = true; + } +} -- cgit v1.2.3-70-g09d2 From 74208b86f0b4d32bcf071fea23dd629d2cc7b46b Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Wed, 21 Aug 2024 16:08:07 -0400 Subject: removed printlines --- src/client/views/nodes/ChatBox/Agent.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index 4ccb179f0..6f42c08b9 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -163,9 +163,9 @@ export class Agent { for (const char of content) { if (currentTag === 'answer') { currentContent += char; - console.log(char); + //console.log(char); const streamedAnswer = this.streamedAnswerParser.parse(char); - console.log(streamedAnswer); + //console.log(streamedAnswer); onAnswerUpdate(streamedAnswer); continue; } else if (char === '<') { -- cgit v1.2.3-70-g09d2 From 3d290c126623404b56958d16becc244d04548658 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 22 Aug 2024 11:17:28 -0400 Subject: fixed table citations and now formatted chunks show table info as well --- src/client/views/nodes/ChatBox/ChatBox.tsx | 13 ++++++++----- src/client/views/nodes/ChatBox/MessageComponent.tsx | 2 +- src/client/views/nodes/ChatBox/prompts.ts | 2 ++ src/client/views/nodes/ChatBox/tools/RAGTool.ts | 5 +++-- src/server/ApiManagers/AssistantManager.ts | 10 ++++------ 5 files changed, 18 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 45f5c0a65..d79233fae 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -251,9 +251,12 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] }; console.log(docChunkSimpl); const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkId === chunkId); + console.log(foundChunk); if (foundChunk) { - switch (getChunkType(foundChunk.chunkType)) { + console.log(getChunkType(foundChunk.chunkType)); + switch (foundChunk.chunkType) { case CHUNK_TYPE.IMAGE: + case CHUNK_TYPE.TABLE: const values = foundChunk.location?.replace(/[\[\]]/g, '').split(','); if (values?.length !== 4) { @@ -262,9 +265,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } const x1 = parseFloat(values[0]) * Doc.NativeWidth(doc); - const y1 = parseFloat(values[1]) * Doc.NativeHeight(doc); + const y1 = parseFloat(values[1]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc); const x2 = parseFloat(values[2]) * Doc.NativeWidth(doc); - const y2 = parseFloat(values[3]) * Doc.NativeHeight(doc); + const y2 = parseFloat(values[3]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc); const annotationKey = Doc.LayoutFieldKey(doc) + '_annotations'; @@ -290,7 +293,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }); break; default: - console.log('Chunk type not supported'); + console.log('Chunk type not supported', foundChunk.chunkType); break; } } @@ -339,7 +342,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { runInAction(() => { this.history.push({ role: ASSISTANT_ROLE.ASSISTANT, - content: [{ index: 0, type: TEXT_TYPE.NORMAL, text: 'Welcome to the Document Analyser Assistant! Link a document or ask questions to get started.', citation_ids: null }], + content: [{ index: 0, type: TEXT_TYPE.NORMAL, text: `Hey, ${this.userName()} Welcome to the Your Friendly Assistant! Link a document or ask questions about anything to get started.`, citation_ids: null }], processing_info: [], }); }); diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx index d0e78c751..812e52ee0 100644 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ b/src/client/views/nodes/ChatBox/MessageComponent.tsx @@ -16,7 +16,7 @@ const MessageComponentBox: React.FC = function ({ message const renderContent = (item: MessageContent) => { const i = item.index; - console.log('item', item, 'index', i); + //console.log('item', item, 'index', i); if (item.type === TEXT_TYPE.GROUNDED) { const citation_ids = item.citation_ids || []; return ( diff --git a/src/client/views/nodes/ChatBox/prompts.ts b/src/client/views/nodes/ChatBox/prompts.ts index 845b4d255..76d958daf 100644 --- a/src/client/views/nodes/ChatBox/prompts.ts +++ b/src/client/views/nodes/ChatBox/prompts.ts @@ -27,6 +27,8 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto If a tool is needed, ALWAYS select the most appropriate tool based on the user's query. If the query could relate to user documents or require external information (e.g., RAG, search + website scraping, data analysis), USE the appropriate tool to gather that information. If there are no user docs or the user docs have not yielded helpful information, use the search tool to find websites followed by the website scraper tool to get useful infromation from one of those websites. You can use the website scraper (or even the search tool), multiple times to find information from multiple websites either from the same search or different searches. + Always follow the response structure provided in the instructions. + If you are stuck in a loop because a tool isn't helping you even though you think it should, use the 'no_tool' action to proceed with the response and ask the user for more information or clarification or let them know you cannot answer their question and why. diff --git a/src/client/views/nodes/ChatBox/tools/RAGTool.ts b/src/client/views/nodes/ChatBox/tools/RAGTool.ts index 26fa2adc5..fcd0ea43f 100644 --- a/src/client/views/nodes/ChatBox/tools/RAGTool.ts +++ b/src/client/views/nodes/ChatBox/tools/RAGTool.ts @@ -54,9 +54,10 @@ export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { 5. Detailed Citation Guidelines: a. Create a unique citation for each distinct piece of information from the chunks that is used to support grounded_text. + b. ALL TEXT CITATIONS must have direct text in its element content (e.g. DIRECT TEXT HERE) that is a relevant subset of the original text chunk that is being cited specifically. b. Ensure each citation has a unique index number. c. Specify the correct type: "text", "image", or "table". - d. For text chunks, include only the relevant subset of the original text that the grounded_text is based on. + d. For text chunks, the content of the citation should ALWAYS have the relevant subset of the original text that the grounded_text is based on. e. For image/table chunks, leave the citation content empty. f. One citation can be used for multiple grounded_text tags if they are based on the same chunk information. g. !!!DO NOT OVERCITE - only include citations for information that is directly relevant to the grounded_text. @@ -120,7 +121,7 @@ export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { async getFormattedChunks(relevantChunks: RAGChunk[]): Promise<{ type: string; text?: string; image_url?: { url: string } }[]> { try { const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks }); - + console.log('Formatted Chunks:', formattedChunks); if (!formattedChunks) { throw new Error('Failed to format chunks'); } diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index a59a2d22d..9bc5bf128 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -189,7 +189,7 @@ export default class AssistantManager extends ApiManager { try { // Read file data and convert to base64 - const file_data = fs.readFileSync(public_path, { encoding: 'base64' }); + const file_data: string = fs.readFileSync(public_path, { encoding: 'base64' }); const response = await axios.post( 'http://localhost:8080/createDocument', @@ -265,12 +265,12 @@ export default class AssistantManager extends ApiManager { for (const chunk of relevantChunks) { content.push({ type: 'text', - text: ``, + text: ``, }); if (chunk.metadata.type === 'image' || chunk.metadata.type === 'table') { try { - const filePath = serverPathToFile(Directory.parsed_files, chunk.metadata.file_path); + const filePath = serverPathToFile(Directory.chunk_images, chunk.metadata.file_path); const imageBuffer = await readFileAsync(filePath); const base64Image = imageBuffer.toString('base64'); if (base64Image) { @@ -286,10 +286,8 @@ export default class AssistantManager extends ApiManager { } catch (error) { console.error(`Error reading image file for chunk ${chunk.id}:`, error); } - content.push({ type: 'text', text: `\n\n` }); - } else { - content.push({ type: 'text', text: `${chunk.metadata.text}\n\n` }); } + content.push({ type: 'text', text: `${chunk.metadata.text}\n\n` }); } content.push({ type: 'text', text: '' }); -- cgit v1.2.3-70-g09d2 From 9be434cddc30baada63aff0c5dae6dbf606f2590 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 22 Aug 2024 15:32:18 -0400 Subject: adding csv --- src/client/views/nodes/ChatBox/Agent.ts | 63 ++++++++++++---------- src/client/views/nodes/ChatBox/AnswerParser.ts | 3 ++ src/client/views/nodes/ChatBox/ChatBox.tsx | 22 ++++++-- src/client/views/nodes/ChatBox/prompts.ts | 3 +- .../views/nodes/ChatBox/tools/CreateCSVTool.ts | 51 ++++++++++++++++++ src/server/ApiManagers/AssistantManager.ts | 37 +++++++++++++ 6 files changed, 147 insertions(+), 32 deletions(-) create mode 100644 src/client/views/nodes/ChatBox/tools/CreateCSVTool.ts (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index 6f42c08b9..eaa17d283 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -15,6 +15,7 @@ import { on } from 'events'; import { v4 as uuidv4 } from 'uuid'; import { AnswerParser } from './AnswerParser'; import { StreamedAnswerParser } from './StreamedAnswerParser'; +import { CreateCSVTool } from './tools/CreateCSVTool'; dotenv.config(); @@ -33,7 +34,14 @@ export class Agent { private processingInfo: ProcessingInfo[] = []; private streamedAnswerParser: StreamedAnswerParser = new StreamedAnswerParser(); - constructor(_vectorstore: Vectorstore, summaries: () => string, history: () => string, csvData: () => { filename: string; id: string; text: string }[], addLinkedUrlDoc: (url: string, id: string) => void) { + constructor( + _vectorstore: Vectorstore, + summaries: () => string, + history: () => string, + csvData: () => { filename: string; id: string; text: string }[], + addLinkedUrlDoc: (url: string, id: string) => void, + createCSVInDash: (url: string, title: string, id: string, data: string) => void + ) { this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); this.vectorstore = _vectorstore; this._history = history; @@ -45,6 +53,7 @@ export class Agent { dataAnalysis: new DataAnalysisTool(csvData), websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc), searchTool: new SearchTool(addLinkedUrlDoc), + createCSV: new CreateCSVTool(createCSVInDash), no_tool: new NoTool(), }; } @@ -114,7 +123,7 @@ export class Agent { } } else if (key === 'action_input') { const actionInput = stage[key]; - console.log(`Action input:`, actionInput); + console.log(`Action input:`, actionInput.inputs); if (currentAction) { try { // Parse the inputs @@ -204,31 +213,31 @@ export class Agent { const tool = this.tools[action]; const args: Record = {}; - for (const paramName in tool.parameters) { - if (actionInput[paramName] !== undefined) { - if (Array.isArray(actionInput[paramName])) { - // If the input is already an array, use it as is - args[paramName] = actionInput[paramName]; - } else if (typeof actionInput[paramName] === 'object' && actionInput[paramName] !== null) { - // If the input is an object, check if it has multiple of the same tag - const values = Object.values(actionInput[paramName]); - if (values.length > 1) { - // If there are multiple values, convert to an array - args[paramName] = values; - } else { - // If there's only one value, use it directly - args[paramName] = values[0]; - } - } else { - // For single values, use them as is - args[paramName] = actionInput[paramName]; - } - } else if (tool.parameters[paramName].required === 'true') { - throw new Error(`Missing required parameter '${paramName}' for action '${action}'`); - } - } - - return await tool.execute(args); + // for (const paramName in tool.parameters) { + // if (actionInput[paramName] !== undefined) { + // if (Array.isArray(actionInput[paramName])) { + // // If the input is already an array, use it as is + // args[paramName] = actionInput[paramName]; + // } else if (typeof actionInput[paramName] === 'object' && actionInput[paramName] !== null) { + // // If the input is an object, check if it has multiple of the same tag + // const values = Object.values(actionInput[paramName]); + // if (values.length > 1) { + // // If there are multiple values, convert to an array + // args[paramName] = values; + // } else { + // // If there's only one value, use it directly + // args[paramName] = values[0]; + // } + // } else { + // // For single values, use them as is + // args[paramName] = actionInput[paramName]; + // } + // } else if (tool.parameters[paramName].required === 'true') { + // throw new Error(`Missing required parameter '${paramName}' for action '${action}'`); + // } + // } + + return await tool.execute(actionInput); } private parseActionInputs(inputs: any): Record { diff --git a/src/client/views/nodes/ChatBox/AnswerParser.ts b/src/client/views/nodes/ChatBox/AnswerParser.ts index b18083a27..05d26b8de 100644 --- a/src/client/views/nodes/ChatBox/AnswerParser.ts +++ b/src/client/views/nodes/ChatBox/AnswerParser.ts @@ -9,6 +9,7 @@ export class AnswerParser { const followUpQuestionsRegex = /([\s\S]*?)<\/follow_up_questions>/; const questionRegex = /(.*?)<\/question>/g; const groundedTextRegex = /([\s\S]*?)<\/grounded_text>/g; + const normalTextRegex = /([\s\S]*?)<\/normal_text>/g; const answerMatch = answerRegex.exec(xml); const citationsMatch = citationsRegex.exec(xml); @@ -49,6 +50,8 @@ export class AnswerParser { } } + rawTextContent = rawTextContent.replace(normalTextRegex, '$1'); + // Parse text content (normal and grounded) let lastIndex = 0; let match; diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index d79233fae..ffede6901 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -3,7 +3,7 @@ import { observer } from 'mobx-react'; import OpenAI, { ClientOptions } from 'openai'; import * as React from 'react'; import { Doc, DocListCast } from '../../../../fields/Doc'; -import { CsvCast, DocCast, PDFCast, StrCast } from '../../../../fields/Types'; +import { CsvCast, DocCast, PDFCast, RTFCast, StrCast } from '../../../../fields/Types'; import { DocumentType } from '../../../documents/DocumentTypes'; import { Docs } from '../../../documents/Documents'; import { LinkManager } from '../../../util/LinkManager'; @@ -60,7 +60,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id); } this.vectorstore = new Vectorstore(this.vectorstore_id, this.retrieveDocIds); - this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc); + this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.createCSVInDash); this.messagesRef = React.createRef(); reaction( @@ -77,7 +77,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }; @action - addCSVForAnalysis = async (newLinkedDoc: Doc) => { + addCSVForAnalysis = async (newLinkedDoc: Doc, id?: string) => { console.log('adding csv file for analysis'); if (!newLinkedDoc.chunk_simpl) { const csvData: string = StrCast(newLinkedDoc.text); @@ -104,7 +104,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { model: 'gpt-3.5-turbo', }); console.log('CSV Data:', csvData); - const csvId = uuidv4(); + const csvId = id ?? uuidv4(); this.linked_csv_files.push({ filename: CsvCast(newLinkedDoc.data).url.pathname, @@ -239,6 +239,20 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { return ClientUtils.CurrentUserEmail; } + @action + createCSVInDash = async (url: string, title: string, id: string, data: string) => { + console.log('Creating CSV in Dash:', url, title); + const doc = DocCast(await DocUtils.DocumentFromType('csv', url, { title: title, text: RTFCast(data) })); + + const linkDoc = Docs.Create.LinkDocument(this.Document, doc); + LinkManager.Instance.addLink(linkDoc); + + doc && this._props.addDocument?.(doc); + await DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); + + this.addCSVForAnalysis(doc, id); + }; + @action handleCitationClick = (citation: Citation) => { console.log('Citation clicked:', citation); diff --git a/src/client/views/nodes/ChatBox/prompts.ts b/src/client/views/nodes/ChatBox/prompts.ts index 76d958daf..71a4f33b4 100644 --- a/src/client/views/nodes/ChatBox/prompts.ts +++ b/src/client/views/nodes/ChatBox/prompts.ts @@ -27,8 +27,9 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto If a tool is needed, ALWAYS select the most appropriate tool based on the user's query. If the query could relate to user documents or require external information (e.g., RAG, search + website scraping, data analysis), USE the appropriate tool to gather that information. If there are no user docs or the user docs have not yielded helpful information, use the search tool to find websites followed by the website scraper tool to get useful infromation from one of those websites. You can use the website scraper (or even the search tool), multiple times to find information from multiple websites either from the same search or different searches. + Ensure at the end of every final answer, you provide exactly three follow-up questions from the user's perspective—from the perspective that they are asking the question. Always follow the response structure provided in the instructions. - If you are stuck in a loop because a tool isn't helping you even though you think it should, use the 'no_tool' action to proceed with the response and ask the user for more information or clarification or let them know you cannot answer their question and why. + If a tool doesn't work—or yield helpful results—after two tries, EITHER use another tool or proceed with the response and ask the user for more information or clarification or let them know you cannot answer their question and why. DO NOT CONTINUE WITH THE SAME TOOL 3 TIMES. diff --git a/src/client/views/nodes/ChatBox/tools/CreateCSVTool.ts b/src/client/views/nodes/ChatBox/tools/CreateCSVTool.ts new file mode 100644 index 000000000..55015846b --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools/CreateCSVTool.ts @@ -0,0 +1,51 @@ +import { BaseTool } from './BaseTool'; +import { Networking } from '../../../../Network'; + +export class CreateCSVTool extends BaseTool<{ csvData: string; filename: string }> { + private _handleCSVResult: (url: string, filename: string, id: string, data: string) => void; + + constructor(handleCSVResult: (url: string, title: string, id: string, data: string) => void) { + super( + 'createCSV', + 'Creates a CSV file from raw CSV data and saves it to the server', + { + type: 'object', + properties: { + csvData: { + type: 'string', + description: 'A string of comma-separated values representing the CSV data.', + }, + filename: { + type: 'string', + description: 'The base name of the CSV file to be created. Should end in ".csv".', + }, + }, + required: ['csvData', 'filename'], + }, + 'Provide a CSV string and a filename to create a CSV file.', + 'Creates a CSV file from the provided CSV string and saves it to the server with a unique identifier, returning the file URL and UUID.' + ); + this._handleCSVResult = handleCSVResult; + } + + async execute(args: { csvData: string; filename: string }): Promise { + try { + console.log('Creating CSV file:', args.filename, ' with data:', args.csvData); + // Post the raw CSV data to the createCSV endpoint on the server + const { fileUrl, id } = await Networking.PostToServer('/createCSV', { filename: args.filename, data: args.csvData }); + + // Handle the result by invoking the callback + this._handleCSVResult(fileUrl, args.filename, id, args.csvData); + + return [ + { + type: 'text', + text: `File successfully created: ${fileUrl}. \nNow a CSV file with this data and the name ${args.filename} is available as a user doc.`, + }, + ]; + } catch (error) { + console.error('Error creating CSV file:', error); + throw new Error('Failed to create CSV file.'); + } + } +} diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 9bc5bf128..0456c730b 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -296,6 +296,43 @@ export default class AssistantManager extends ApiManager { }, }); + register({ + method: Method.POST, + subscription: '/createCSV', + secureHandler: async ({ req, res }) => { + const { filename, data } = req.body; + + // Validate input + if (!filename || !data) { + res.status(400).send({ error: 'Filename and data fields are required.' }); + return; + } + + try { + // Generate a UUID for the file + const uuidv4 = uuid.v4(); + + // Construct the full filename with the UUID prefix + const fullFilename = `${uuidv4}-${filename}`; + + // Get the full server path where the file will be saved + const serverFilePath = serverPathToFile(Directory.csv, fullFilename); + + // Write the CSV data (which is a raw string) to the file + await writeFileAsync(serverFilePath, data, 'utf8'); + + // Construct the full client URL for accessing the file + const fileUrl = clientPathToFile(Directory.csv, fullFilename); + + // Return the file URL and UUID to the client + res.send({ fileUrl, id: uuidv4 }); + } catch (error: any) { + console.error('Error creating CSV file:', error); + res.status(500).send({ error: 'Failed to create CSV file.', details: error.message }); + } + }, + }); + register({ method: Method.POST, subscription: '/chunkDocument', -- cgit v1.2.3-70-g09d2 From 3a1d859359b462fc9a9f1c001d6681a8d886f2b6 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 29 Aug 2024 21:01:29 -0400 Subject: added loop summary and updated type for RAG tool output --- src/client/views/nodes/ChatBox/AnswerParser.ts | 6 ++++++ src/client/views/nodes/ChatBox/ChatBox.tsx | 6 +++++- src/client/views/nodes/ChatBox/prompts.ts | 16 ++++++++++++---- src/client/views/nodes/ChatBox/tools/RAGTool.ts | 5 +++-- src/client/views/nodes/ChatBox/types.ts | 1 + 5 files changed, 27 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/AnswerParser.ts b/src/client/views/nodes/ChatBox/AnswerParser.ts index 05d26b8de..885114195 100644 --- a/src/client/views/nodes/ChatBox/AnswerParser.ts +++ b/src/client/views/nodes/ChatBox/AnswerParser.ts @@ -10,10 +10,12 @@ export class AnswerParser { const questionRegex = /(.*?)<\/question>/g; const groundedTextRegex = /([\s\S]*?)<\/grounded_text>/g; const normalTextRegex = /([\s\S]*?)<\/normal_text>/g; + const loopSummaryRegex = /([\s\S]*?)<\/loop_summary>/; const answerMatch = answerRegex.exec(xml); const citationsMatch = citationsRegex.exec(xml); const followUpQuestionsMatch = followUpQuestionsRegex.exec(xml); + const loopSummaryMatch = loopSummaryRegex.exec(xml); if (!answerMatch) { throw new Error('Invalid XML: Missing tag.'); @@ -31,6 +33,9 @@ export class AnswerParser { if (followUpQuestionsMatch) { rawTextContent = rawTextContent.replace(followUpQuestionsMatch[0], '').trim(); } + if (loopSummaryMatch) { + rawTextContent = rawTextContent.replace(loopSummaryMatch[0], '').trim(); + } // Parse citations let citationMatch; @@ -112,6 +117,7 @@ export class AnswerParser { follow_up_questions: followUpQuestions, citations, processing_info: processingInfo, + loop_summary: loopSummaryMatch ? loopSummaryMatch[1].trim() : undefined, }; return assistantResponse; diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index ffede6901..345bfd8d1 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -445,7 +445,11 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { get formattedHistory(): string { let history = '\n'; for (const message of this.history) { - history += `<${message.role}>${message.content.map(content => content.text).join(' ')}\n`; + history += `<${message.role}>${message.content.map(content => content.text).join(' ')}`; + if (message.loop_summary) { + history += `${message.loop_summary}`; + } + history += `\n`; } history += ''; return history; diff --git a/src/client/views/nodes/ChatBox/prompts.ts b/src/client/views/nodes/ChatBox/prompts.ts index 71a4f33b4..9ea86dbf4 100644 --- a/src/client/views/nodes/ChatBox/prompts.ts +++ b/src/client/views/nodes/ChatBox/prompts.ts @@ -37,16 +37,17 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto When providing your final response, use the following structure: -
- <grounded_text> - Wrap text that is derived from tool-based or chunk-based information within these tags, ensuring proper citation. - <normal_text> - Wrap text that is not derived from tool-based or chunk-based information within these tags. -
+ <grounded_text> - Wrap text that is derived from tool-based or chunk-based information within these tags, ensuring proper citation. + <normal_text> - Wrap text that is not derived from tool-based or chunk-based information within these tags. <citation> - Provide citations for each grounded text, referencing the tool or chunk used. <question> - Include exactly three follow-up questions from the user's perspective within these tags. + + <loop_summary> - Provide a summary of the actions and tools used by the assistant throughout the interaction within these tags. +
@@ -103,6 +104,7 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto The complete answer to the user's query, with grounded information wrapped in <grounded_text> tags and general information wrapped in <normal_text> tags. Exactly three follow-up questions written from the user's perspective, enclosed within <follow_up_questions> tags. + A concise <loop_summary> that describes the actions and tools used throughout the interaction. @@ -267,6 +269,9 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto What specific infrastructure improvements were made in Vancouver as a result of hosting the 2010 Winter Olympics? How did the performance of Canadian athletes in the 2010 Vancouver Winter Olympics compare to their performance in previous Winter Olympics?
+ + The assistant used the RAG tool to gather information from the user's documents, focusing on key moments and official statements. It then used the dataAnalysis tool to analyze the medal count and economic impact data, providing a comprehensive overview of the 2010 Vancouver Winter Olympics's impact. + @@ -424,6 +429,9 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto Are there any special permits required for hiking certain trails in Yosemite? What wildlife might I encounter while hiking in Yosemite, and how should I prepare for potential encounters? + + The assistant used the search tool to find relevant websites, then scraped information from those sites using the websiteInfoScraper tool. Additional information about current trail conditions was gathered from the NPS website to provide a comprehensive and safe hiking guide. + diff --git a/src/client/views/nodes/ChatBox/tools/RAGTool.ts b/src/client/views/nodes/ChatBox/tools/RAGTool.ts index fcd0ea43f..fe8deae6b 100644 --- a/src/client/views/nodes/ChatBox/tools/RAGTool.ts +++ b/src/client/views/nodes/ChatBox/tools/RAGTool.ts @@ -4,6 +4,7 @@ import { RAGChunk } from '../types'; import * as fs from 'fs'; import { Networking } from '../../../../Network'; import { file } from 'jszip'; +import { ChatCompletion, ChatCompletionContentPart, ChatCompletionMessageParam } from 'openai/resources'; export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { constructor(private vectorstore: Vectorstore) { @@ -118,10 +119,10 @@ export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { return formatted_chunks; } - async getFormattedChunks(relevantChunks: RAGChunk[]): Promise<{ type: string; text?: string; image_url?: { url: string } }[]> { + async getFormattedChunks(relevantChunks: RAGChunk[]): Promise { try { const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks }); - console.log('Formatted Chunks:', formattedChunks); + if (!formattedChunks) { throw new Error('Failed to format chunks'); } diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts index 09f14f019..a59ead067 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/ChatBox/types.ts @@ -64,6 +64,7 @@ export interface AssistantMessage { follow_up_questions?: string[]; citations?: Citation[]; processing_info: ProcessingInfo[]; + loop_summary?: string; } export interface MessageContent { -- cgit v1.2.3-70-g09d2 From 9594247dfd645516600d8fa5dfd875cbbd6aca13 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Fri, 30 Aug 2024 13:47:30 -0400 Subject: tried to add progress bar to chatbox but not showing up --- src/client/views/nodes/ChatBox/ChatBox.tsx | 29 ++++++++++- src/client/views/nodes/ChatBox/ProgressBar.scss | 51 ++++++++++++++++++ src/client/views/nodes/ChatBox/ProgressBar.tsx | 17 ++++++ .../views/nodes/ChatBox/vectorstore/Vectorstore.ts | 60 +++++++++++++++------- src/server/ApiManagers/AssistantManager.ts | 25 ++++++++- 5 files changed, 161 insertions(+), 21 deletions(-) create mode 100644 src/client/views/nodes/ChatBox/ProgressBar.scss create mode 100644 src/client/views/nodes/ChatBox/ProgressBar.tsx (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 345bfd8d1..7e238e28b 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -23,6 +23,7 @@ import { chunk } from 'lodash'; import { DocUtils } from '../../../documents/DocUtils'; import { createRef } from 'react'; import { ClientUtils } from '../../../../ClientUtils'; +import { ProgressBar } from './ProgressBar'; dotenv.config(); @@ -33,6 +34,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @observable isLoading: boolean = false; @observable isUploadingDocs: boolean = false; + @observable uploadProgress: number = 0; // Track progress percentage + @observable currentStep: string = ''; // Track current step name @observable expandedScratchpadIndex: number | null = null; @observable inputValue: string = ''; @observable private linked_docs_to_add: ObservableSet = observable.set(); @@ -73,7 +76,23 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @action addDocToVectorstore = async (newLinkedDoc: Doc) => { - await this.vectorstore.addAIDoc(newLinkedDoc); + this.isUploadingDocs = true; + this.uploadProgress = 0; + this.currentStep = 'Initializing...'; + + await this.vectorstore.addAIDoc(newLinkedDoc, this.updateProgress); + + runInAction(() => { + this.isUploadingDocs = false; + this.uploadProgress = 0; + this.currentStep = ''; + }); + }; + + @action + updateProgress = (progress: number, step: string) => { + this.uploadProgress = progress; + this.currentStep = step; }; @action @@ -479,6 +498,14 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { render() { return (
+ {this.isUploadingDocs && ( +
+
+ +
{this.currentStep}
+
+
+ )} {this.isUploadingDocs && (
diff --git a/src/client/views/nodes/ChatBox/ProgressBar.scss b/src/client/views/nodes/ChatBox/ProgressBar.scss new file mode 100644 index 000000000..dcde666de --- /dev/null +++ b/src/client/views/nodes/ChatBox/ProgressBar.scss @@ -0,0 +1,51 @@ +.progress-circle { + position: relative; + width: 120px; + height: 120px; +} + +.progress-ring { + transform: rotate(-90deg); + position: absolute; + top: 0; + left: 0; +} + +.progress-ring__circle { + transition: 0.35s stroke-dashoffset; + transform: rotate(-90deg); + transform-origin: 50% 50%; +} + +.progress-text { + position: absolute; + top: 50%; + left: 50%; + transform: translate(-50%, -50%); + font-size: 24px; + font-weight: bold; + color: #000; +} + +.uploading-overlay { + position: absolute; + top: 0; + left: 0; + right: 0; + bottom: 0; + background-color: rgba(255, 255, 255, 0.8); + display: flex; + align-items: center; + justify-content: center; + z-index: 1000; +} + +.progress-container { + text-align: center; +} + +.step-name { + margin-top: 10px; + font-size: 18px; + color: #333; +} diff --git a/src/client/views/nodes/ChatBox/ProgressBar.tsx b/src/client/views/nodes/ChatBox/ProgressBar.tsx new file mode 100644 index 000000000..765ddbef5 --- /dev/null +++ b/src/client/views/nodes/ChatBox/ProgressBar.tsx @@ -0,0 +1,17 @@ +import React from 'react'; +import './ProgressBar.scss'; // Create this CSS file for styling + +interface ProgressBarProps { + progress: number; +} + +export const ProgressBar: React.FC = ({ progress }) => { + return ( +
+ + + +
{Math.round(progress)}%
+
+ ); +}; diff --git a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts index 4383bb72d..388574bff 100644 --- a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts +++ b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts @@ -60,10 +60,11 @@ export class Vectorstore { this.index = this.pinecone.Index(this.indexName); } - async addAIDoc(doc: Doc) { + async addAIDoc(doc: Doc, progressCallback: (progress: number, step: string) => void) { console.log('Adding AI Document:', doc); const ai_document_status: string = StrCast(doc.ai_document_status); - if (ai_document_status !== undefined && ai_document_status !== null && ai_document_status !== '' && ai_document_status !== ' ' && ai_document_status !== '{}') { + + if (ai_document_status !== undefined && ai_document_status !== null && ai_document_status.trim() !== '' && ai_document_status !== '{}') { if (ai_document_status === 'IN PROGRESS') { console.log('Already in progress.'); return; @@ -72,31 +73,54 @@ export class Vectorstore { } else { doc.ai_document_status = 'PROGRESS'; console.log(doc); - console.log(PDFCast(doc.data)?.url?.pathname); - console.log(CsvCast(doc.data)?.url?.pathname); const local_file_path: string = CsvCast(doc.data)?.url?.pathname ?? PDFCast(doc.data)?.url?.pathname; console.log('Local File Path:', local_file_path); - if (local_file_path !== undefined || local_file_path !== null || local_file_path !== '') { - const { document_json } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); - console.log('Document JSON:', document_json); - //const ai_document: AI_Document = convertToAIDocument(document_json); - this.documents.push(document_json); - await this.indexDocument(JSON.parse(JSON.stringify(document_json, (key, value) => (value === null || value === undefined ? undefined : value)))); - console.log(`Document added: ${document_json.file_name}`); - doc.summary = document_json.summary; - doc.ai_doc_id = document_json.doc_id; - this._doc_ids.push(document_json.doc_id); - doc.ai_purpose = document_json.purpose; - if (doc.vectorstore_id === undefined || doc.vectorstore_id === null || doc.vectorstore_id === '' || doc.vectorstore_id === '[]') { + + if (local_file_path) { + // Start the document creation process + const response = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); + const jobId = response.job_id; + + // Poll the server for progress updates + let result: any = null; + + while (!result) { + await new Promise(resolve => setTimeout(resolve, 5000)); // Polling interval + + const progressResponse = JSON.parse(await Networking.FetchFromServer(`/getProgress/${jobId}`)); + if (progressResponse) { + const progress = progressResponse.progress; + const step = progressResponse.step; + progressCallback(progress, step); + } + + const resultResponse = JSON.parse(await Networking.FetchFromServer(`/getResult/${jobId}`)); + if (resultResponse.status === 200) { + result = resultResponse.data; + } + } + + // Process the final document result + console.log('Document JSON:', result); + this.documents.push(result); + await this.indexDocument(JSON.parse(JSON.stringify(result, (key, value) => (value === null || value === undefined ? undefined : value)))); + console.log(`Document added: ${result.file_name}`); + doc.summary = result.summary; + doc.ai_doc_id = result.doc_id; + this._doc_ids.push(result.doc_id); + doc.ai_purpose = result.purpose; + + if (!doc.vectorstore_id) { doc.vectorstore_id = JSON.stringify([this._id]); } else { doc.vectorstore_id = JSON.stringify(JSON.parse(StrCast(doc.vectorstore_id)).concat([this._id])); } - if (doc.chunk_simpl === undefined || doc.chunk_simpl === null || doc.chunk_simpl === '' || doc.chunk_simpl === '[]') { + + if (!doc.chunk_simpl) { doc.chunk_simpl = JSON.stringify({ chunks: [] }); } - document_json.chunks.forEach((chunk: RAGChunk) => { + result.chunks.forEach((chunk: RAGChunk) => { const chunkToAdd = { chunkId: chunk.id, startPage: chunk.metadata.start_page, diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 0456c730b..54d78cd15 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -206,10 +206,16 @@ export default class AssistantManager extends ApiManager { const jobId = response.data.job_id; - // Poll for results + let progress; let result; while (!result) { - await new Promise(resolve => setTimeout(resolve, 5000)); // Wait for 1 second + await new Promise(resolve => setTimeout(resolve, 5000)); // Wait for 5 seconds + const progressResponse = await axios.get(`http://localhost:8080/getProgress/${jobId}`); + const progress = progressResponse.data; + + // Accessing the correct keys + console.log(`Current step: ${progress.step}, Progress within step: ${progress.progress}%`); + const resultResponse = await axios.get(`http://localhost:8080/getResult/${jobId}`); if (resultResponse.status === 200) { result = resultResponse.data; @@ -255,6 +261,21 @@ export default class AssistantManager extends ApiManager { }, }); + register({ + method: Method.GET, + subscription: '/getProgress/:jobId', + secureHandler: async ({ req, res }) => { + const { jobId } = req.params; + try { + const progressResponse = await axios.get(`http://localhost:8080/getProgress/${jobId}`); + res.json(progressResponse.data); + } catch (error) { + console.error('Error getting progress:', error); + res.status(500).send({ error: 'Failed to get progress', details: JSON.parse(error as string).message }); + } + }, + }); + register({ method: Method.POST, subscription: '/formatChunks', -- cgit v1.2.3-70-g09d2 From 9983e5602ce18d771180b5c28d0ef78c71ef89e3 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Mon, 2 Sep 2024 14:32:14 -0400 Subject: added loading animation --- src/client/views/nodes/ChatBox/ChatBox.tsx | 28 ++++----- .../views/nodes/ChatBox/vectorstore/Vectorstore.ts | 34 +++++++---- src/server/ApiManagers/AssistantManager.ts | 71 ++++++++++++---------- 3 files changed, 70 insertions(+), 63 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 7e238e28b..fdc0e3a17 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -33,13 +33,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @observable.deep current_message: AssistantMessage | undefined = undefined; @observable isLoading: boolean = false; - @observable isUploadingDocs: boolean = false; @observable uploadProgress: number = 0; // Track progress percentage @observable currentStep: string = ''; // Track current step name @observable expandedScratchpadIndex: number | null = null; @observable inputValue: string = ''; @observable private linked_docs_to_add: ObservableSet = observable.set(); @observable private linked_csv_files: { filename: string; id: string; text: string }[] = []; + @observable private isUploadingDocs: boolean = false; private openai: OpenAI; private vectorstore_id: string; private vectorstore: Vectorstore; @@ -76,21 +76,25 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @action addDocToVectorstore = async (newLinkedDoc: Doc) => { - this.isUploadingDocs = true; this.uploadProgress = 0; this.currentStep = 'Initializing...'; + this.isUploadingDocs = true; - await this.vectorstore.addAIDoc(newLinkedDoc, this.updateProgress); - - runInAction(() => { + try { + await this.vectorstore.addAIDoc(newLinkedDoc, this.updateProgress); + } catch (error) { + console.error('Error uploading document:', error); + this.currentStep = 'Error during upload'; + } finally { this.isUploadingDocs = false; this.uploadProgress = 0; this.currentStep = ''; - }); + } }; @action updateProgress = (progress: number, step: string) => { + console.log('Progress:', progress, step); this.uploadProgress = progress; this.currentStep = step; }; @@ -394,17 +398,11 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { observe(this.linked_docs_to_add, change => { if (change.type === 'add') { - runInAction(() => { - this.isUploadingDocs = true; - }); if (PDFCast(change.newValue.data)) { this.addDocToVectorstore(change.newValue); } else if (CsvCast(change.newValue.data)) { this.addCSVForAnalysis(change.newValue); } - runInAction(() => { - this.isUploadingDocs = false; - }); } else if (change.type === 'delete') { console.log('Deleted docs: ', change.oldValue); } @@ -495,6 +493,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { console.log('Follow-up question clicked:', question); this.inputValue = question; }; + render() { return (
@@ -506,11 +505,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() {
)} - {this.isUploadingDocs && ( -
-
-
- )}

{this.userName()}'s AI Assistant

diff --git a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts index 388574bff..408274703 100644 --- a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts +++ b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts @@ -77,27 +77,35 @@ export class Vectorstore { console.log('Local File Path:', local_file_path); if (local_file_path) { + console.log('Creating AI Document...'); // Start the document creation process - const response = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); - const jobId = response.job_id; + const { jobId } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); // Poll the server for progress updates + let inProgress: boolean = true; let result: any = null; + while (inProgress) { + await new Promise(resolve => setTimeout(resolve, 2000)); // Polling interval + + const resultResponse = await Networking.FetchFromServer(`/getResult/${jobId}`); + const resultResponseJson = JSON.parse(resultResponse); + //console.log('Result Response:', resultResponseJson); + if (resultResponseJson.status === 'completed') { + console.log('Result here:', resultResponseJson); + result = resultResponseJson; + break; + } - while (!result) { - await new Promise(resolve => setTimeout(resolve, 5000)); // Polling interval + const progressResponse = await Networking.FetchFromServer(`/getProgress/${jobId}`); + const progressResponseJson = JSON.parse(progressResponse); + //console.log('Progress Response:', progressResponseJson); - const progressResponse = JSON.parse(await Networking.FetchFromServer(`/getProgress/${jobId}`)); - if (progressResponse) { - const progress = progressResponse.progress; - const step = progressResponse.step; + if (progressResponseJson) { + console.log('Progress:', progressResponseJson); + const progress = progressResponseJson.progress; + const step = progressResponseJson.step; progressCallback(progress, step); } - - const resultResponse = JSON.parse(await Networking.FetchFromServer(`/getResult/${jobId}`)); - if (resultResponse.status === 200) { - result = resultResponse.data; - } } // Process the final document result diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 54d78cd15..2ffc99e58 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -204,23 +204,42 @@ export default class AssistantManager extends ApiManager { } ); - const jobId = response.data.job_id; + const jobId = response.data['job_id']; + console.log('Job ID:', jobId); - let progress; - let result; - while (!result) { - await new Promise(resolve => setTimeout(resolve, 5000)); // Wait for 5 seconds - const progressResponse = await axios.get(`http://localhost:8080/getProgress/${jobId}`); - const progress = progressResponse.data; + res.send({ jobId }); + } catch (error: any) { + console.error('Error communicating with chatbot:', error); + res.status(500).send({ error: 'Failed to communicate with the chatbot', details: error.message }); + } + }, + }); - // Accessing the correct keys - console.log(`Current step: ${progress.step}, Progress within step: ${progress.progress}%`); + register({ + method: Method.GET, + subscription: '/getProgress/:jobId', + secureHandler: async ({ req, res }) => { + const { jobId } = req.params; + try { + const progressResponse = await axios.get(`http://localhost:8080/getProgress/${jobId}`); + console.log(`Current step: ${progressResponse.data.step}, Progress within step: ${progressResponse.data.progress}%`); + res.json(progressResponse.data); + } catch (error) { + console.error('Error getting progress:', error); + res.status(500).send({ error: 'Failed to get progress', details: JSON.parse(error as string).message }); + } + }, + }); - const resultResponse = await axios.get(`http://localhost:8080/getResult/${jobId}`); - if (resultResponse.status === 200) { - result = resultResponse.data; - } - } + register({ + method: Method.GET, + subscription: '/getResult/:jobId', + secureHandler: async ({ req, res }) => { + const { jobId } = req.params; + try { + const finalResponse = await axios.get(`http://localhost:8080/getResult/${jobId}`); + console.log('Result:', finalResponse.data); + const result = finalResponse.data; if (result.chunks && Array.isArray(result.chunks)) { for (const chunk of result.chunks) { @@ -249,29 +268,15 @@ export default class AssistantManager extends ApiManager { } } } + result['status'] = 'completed'; } else { - console.warn("Result does not contain an iterable 'chunks' property"); + console.warn('Not ready'); + result.status = 'pending'; } - - res.send({ document_json: result }); - } catch (error: any) { - console.error('Error communicating with chatbot:', error); - res.status(500).send({ error: 'Failed to communicate with the chatbot', details: error.message }); - } - }, - }); - - register({ - method: Method.GET, - subscription: '/getProgress/:jobId', - secureHandler: async ({ req, res }) => { - const { jobId } = req.params; - try { - const progressResponse = await axios.get(`http://localhost:8080/getProgress/${jobId}`); - res.json(progressResponse.data); + res.json(result); } catch (error) { console.error('Error getting progress:', error); - res.status(500).send({ error: 'Failed to get progress', details: JSON.parse(error as string).message }); + res.status(500).send({ error: 'Failed to get progress', details: error }); } }, }); -- cgit v1.2.3-70-g09d2 From 785e55141cab178a761080f5c99384bb19855969 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Mon, 2 Sep 2024 14:44:29 -0400 Subject: made the progress cooler (double bounce) --- src/client/views/nodes/ChatBox/ChatBox.tsx | 2 +- src/client/views/nodes/ChatBox/ProgressBar.scss | 54 ++++++++++++++++--------- src/client/views/nodes/ChatBox/ProgressBar.tsx | 18 ++++----- 3 files changed, 44 insertions(+), 30 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index fdc0e3a17..383be0bb7 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -500,7 +500,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { {this.isUploadingDocs && (
- +
{this.currentStep}
diff --git a/src/client/views/nodes/ChatBox/ProgressBar.scss b/src/client/views/nodes/ChatBox/ProgressBar.scss index dcde666de..ff5be4a38 100644 --- a/src/client/views/nodes/ChatBox/ProgressBar.scss +++ b/src/client/views/nodes/ChatBox/ProgressBar.scss @@ -1,30 +1,43 @@ -.progress-circle { +.spinner-container { + display: flex; + flex-direction: column; + justify-content: center; + align-items: center; + height: 100%; +} + +.spinner { + width: 60px; + height: 60px; position: relative; - width: 120px; - height: 120px; + margin-bottom: 20px; // Space between spinner and text } -.progress-ring { - transform: rotate(-90deg); +.double-bounce1, +.double-bounce2 { + width: 100%; + height: 100%; + border-radius: 50%; + background-color: #4a90e2; + opacity: 0.6; position: absolute; top: 0; left: 0; + animation: bounce 2s infinite ease-in-out; } -.progress-ring__circle { - transition: 0.35s stroke-dashoffset; - transform: rotate(-90deg); - transform-origin: 50% 50%; +.double-bounce2 { + animation-delay: -1s; } -.progress-text { - position: absolute; - top: 50%; - left: 50%; - transform: translate(-50%, -50%); - font-size: 24px; - font-weight: bold; - color: #000; +@keyframes bounce { + 0%, + 100% { + transform: scale(0); + } + 50% { + transform: scale(1); + } } .uploading-overlay { @@ -41,11 +54,16 @@ } .progress-container { + display: flex; + flex-direction: column; + align-items: center; text-align: center; } .step-name { - margin-top: 10px; font-size: 18px; color: #333; + text-align: center; + width: 100%; + margin-top: -10px; // Adjust to move the text closer to the spinner } diff --git a/src/client/views/nodes/ChatBox/ProgressBar.tsx b/src/client/views/nodes/ChatBox/ProgressBar.tsx index 765ddbef5..0aa07213f 100644 --- a/src/client/views/nodes/ChatBox/ProgressBar.tsx +++ b/src/client/views/nodes/ChatBox/ProgressBar.tsx @@ -1,17 +1,13 @@ import React from 'react'; -import './ProgressBar.scss'; // Create this CSS file for styling +import './ProgressBar.scss'; -interface ProgressBarProps { - progress: number; -} - -export const ProgressBar: React.FC = ({ progress }) => { +export const ProgressBar: React.FC = () => { return ( -
- - - -
{Math.round(progress)}%
+
+
+
+
+
); }; -- cgit v1.2.3-70-g09d2 From 6c7e67810419ddc0e59f548509b4066fbd2ca90f Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Sat, 7 Sep 2024 11:10:33 -0400 Subject: Fixed web document rendering by changing cors --- package-lock.json | 2020 ++++++++++++++++---- package.json | 3 + src/client/util/Scripting.ts | 8 +- src/client/views/nodes/ChatBox/ChatBox.scss | 8 +- src/client/views/nodes/ChatBox/ChatBox.tsx | 59 +- src/client/views/nodes/ChatBox/prompts.ts | 1 + src/client/views/nodes/ChatBox/tools/RAGTool.ts | 22 +- src/client/views/nodes/ChatBox/types.ts | 2 + .../views/nodes/ChatBox/vectorstore/Vectorstore.ts | 7 +- src/client/views/nodes/PDFBox.tsx | 16 +- src/client/views/pdf/PDFViewer.tsx | 20 +- src/server/ApiManagers/AssistantManager.ts | 21 + 12 files changed, 1760 insertions(+), 427 deletions(-) (limited to 'src') diff --git a/package-lock.json b/package-lock.json index ca1985a7e..3a938ebd0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -27,8 +27,9 @@ "@fullcalendar/multimonth": "^6.1.10", "@fullcalendar/timegrid": "^6.1.15", "@internationalized/date": "^3.5.0", - "@mui/icons-material": "^5.14.19", - "@mui/material": "^5.14.19", + "@mozilla/readability": "^0.5.0", + "@mui/icons-material": "^6.0.1", + "@mui/material": "^6.0.1", "@octokit/core": "^6.0.1", "@pinecone-database/pinecone": "^2.2.2", "@react-google-maps/api": "^2.19.2", @@ -120,6 +121,8 @@ "formidable": "3.5.1", "fullcalendar": "^6.1.15", "function-plot": "^1.23.3", + "fuse.js": "^7.0.0", + "fuzzy-search": "^3.2.1", "golden-layout": "^2.6.0", "google-auth-library": "^9.4.1", "googleapis": "^142.0.0", @@ -221,7 +224,8 @@ "reveal.js": "^5.0.2", "rimraf": "^6.0.0", "sass": "^1.69.5", - "sass-loader": "^14.2.0", + "sass-loader": "^16.0.1", + "scrapfly-sdk": "^0.6.4", "serializr": "^3.0.2", "shelljs": "^0.8.5", "socket.io": "^4.7.2", @@ -274,6 +278,7 @@ "@types/express": "^4.17.21", "@types/express-session": "^1.17.10", "@types/file-saver": "^2.0.7", + "@types/fuzzy-search": "^2.1.5", "@types/howler": "^2.2.11", "@types/html-to-text": "^9.0.4", "@types/jquery": "^3.5.29", @@ -309,7 +314,7 @@ "eslint": "^9.9.0", "eslint-plugin-react": "^7.34.1", "globals": "^15.1.0", - "jsdom": "^24.0.0", + "jsdom": "^25.0.0", "mocha": "^10.2.0", "prettier": "^3.1.0", "scss-loader": "0.0.1", @@ -425,6 +430,956 @@ "node": ">=6.0.0" } }, + "node_modules/@antfu/install-pkg": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/@antfu/install-pkg/-/install-pkg-0.4.1.tgz", + "integrity": "sha512-T7yB5QNG29afhWVkVq7XeIMBa5U/vs9mX69YqayXypPRmYzUmzwnYltplHmPtZ4HPCn+sQKeXW8I47wCbuBOjw==", + "dependencies": { + "package-manager-detector": "^0.2.0", + "tinyexec": "^0.3.0" + }, + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/@antfu/utils": { + "version": "0.7.10", + "resolved": "https://registry.npmjs.org/@antfu/utils/-/utils-0.7.10.tgz", + "integrity": "sha512-+562v9k4aI80m1+VuMHehNJWLOFjBnXn3tdOitzD0il5b7smkSBal4+a3oKiQTbrwMmN/TBUMDvbdoWDehgOww==", + "funding": { + "url": "https://github.com/sponsors/antfu" + } + }, + "node_modules/@aws-crypto/crc32": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/crc32/-/crc32-3.0.0.tgz", + "integrity": "sha512-IzSgsrxUcsrejQbPVilIKy16kAT52EwB6zSaI+M3xxIhKh5+aldEyvI+z6erM7TCLB2BJsFrtHjp6/4/sr+3dA==", + "dependencies": { + "@aws-crypto/util": "^3.0.0", + "@aws-sdk/types": "^3.222.0", + "tslib": "^1.11.1" + } + }, + "node_modules/@aws-crypto/crc32/node_modules/@aws-crypto/util": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/util/-/util-3.0.0.tgz", + "integrity": "sha512-2OJlpeJpCR48CC8r+uKVChzs9Iungj9wkZrl8Z041DWEWvyIHILYKCPNzJghKsivj+S3mLo6BVc7mBNzdxA46w==", + "dependencies": { + "@aws-sdk/types": "^3.222.0", + "@aws-sdk/util-utf8-browser": "^3.0.0", + "tslib": "^1.11.1" + } + }, + "node_modules/@aws-crypto/crc32/node_modules/tslib": { + "version": "1.14.1", + "resolved": "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz", + "integrity": "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg==" + }, + "node_modules/@aws-crypto/sha256-browser": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/sha256-browser/-/sha256-browser-5.2.0.tgz", + "integrity": "sha512-AXfN/lGotSQwu6HNcEsIASo7kWXZ5HYWvfOmSNKDsEqC4OashTp8alTmaz+F7TC2L083SFv5RdB+qU3Vs1kZqw==", + "dependencies": { + "@aws-crypto/sha256-js": "^5.2.0", + "@aws-crypto/supports-web-crypto": "^5.2.0", + "@aws-crypto/util": "^5.2.0", + "@aws-sdk/types": "^3.222.0", + "@aws-sdk/util-locate-window": "^3.0.0", + "@smithy/util-utf8": "^2.0.0", + "tslib": "^2.6.2" + } + }, + "node_modules/@aws-crypto/sha256-browser/node_modules/@smithy/is-array-buffer": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/is-array-buffer/-/is-array-buffer-2.2.0.tgz", + "integrity": "sha512-GGP3O9QFD24uGeAXYUjwSTXARoqpZykHadOmA8G5vfJPK0/DC67qa//0qvqrJzL1xc8WQWX7/yc7fwudjPHPhA==", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-crypto/sha256-browser/node_modules/@smithy/util-buffer-from": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/util-buffer-from/-/util-buffer-from-2.2.0.tgz", + "integrity": "sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA==", + "dependencies": { + "@smithy/is-array-buffer": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-crypto/sha256-browser/node_modules/@smithy/util-utf8": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@smithy/util-utf8/-/util-utf8-2.3.0.tgz", + "integrity": "sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A==", + "dependencies": { + "@smithy/util-buffer-from": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-crypto/sha256-js": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/sha256-js/-/sha256-js-5.2.0.tgz", + "integrity": "sha512-FFQQyu7edu4ufvIZ+OadFpHHOt+eSTBaYaki44c+akjg7qZg9oOQeLlk77F6tSYqjDAFClrHJk9tMf0HdVyOvA==", + "dependencies": { + "@aws-crypto/util": "^5.2.0", + "@aws-sdk/types": "^3.222.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-crypto/supports-web-crypto": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/supports-web-crypto/-/supports-web-crypto-5.2.0.tgz", + "integrity": "sha512-iAvUotm021kM33eCdNfwIN//F77/IADDSs58i+MDaOqFrVjZo9bAal0NK7HurRuWLLpF1iLX7gbWrjHjeo+YFg==", + "dependencies": { + "tslib": "^2.6.2" + } + }, + "node_modules/@aws-crypto/util": { + "version": "5.2.0", + "resolved": "https://registry.npmjs.org/@aws-crypto/util/-/util-5.2.0.tgz", + "integrity": "sha512-4RkU9EsI6ZpBve5fseQlGNUWKMa1RLPQ1dnjnQoe07ldfIzcsGb5hC5W0Dm7u423KWzawlrpbjXBrXCEv9zazQ==", + "dependencies": { + "@aws-sdk/types": "^3.222.0", + "@smithy/util-utf8": "^2.0.0", + "tslib": "^2.6.2" + } + }, + "node_modules/@aws-crypto/util/node_modules/@smithy/is-array-buffer": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/is-array-buffer/-/is-array-buffer-2.2.0.tgz", + "integrity": "sha512-GGP3O9QFD24uGeAXYUjwSTXARoqpZykHadOmA8G5vfJPK0/DC67qa//0qvqrJzL1xc8WQWX7/yc7fwudjPHPhA==", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-crypto/util/node_modules/@smithy/util-buffer-from": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/@smithy/util-buffer-from/-/util-buffer-from-2.2.0.tgz", + "integrity": "sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA==", + "dependencies": { + "@smithy/is-array-buffer": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-crypto/util/node_modules/@smithy/util-utf8": { + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@smithy/util-utf8/-/util-utf8-2.3.0.tgz", + "integrity": "sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A==", + "dependencies": { + "@smithy/util-buffer-from": "^2.2.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/client-cognito-identity": { + "version": "3.645.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-cognito-identity/-/client-cognito-identity-3.645.0.tgz", + "integrity": "sha512-nBfWDzWBQI1NCHYqBAmiifhdnLRxQYozaq6OjTuRcALjYJbOdFV7t0w9FWGISOq1OnM7r8UdCXlr2bzdyU0tJA==", + "dependencies": { + "@aws-crypto/sha256-browser": "5.2.0", + "@aws-crypto/sha256-js": "5.2.0", + "@aws-sdk/client-sso-oidc": "3.645.0", + "@aws-sdk/client-sts": "3.645.0", + "@aws-sdk/core": "3.635.0", + "@aws-sdk/credential-provider-node": "3.645.0", + "@aws-sdk/middleware-host-header": "3.620.0", + "@aws-sdk/middleware-logger": "3.609.0", + "@aws-sdk/middleware-recursion-detection": "3.620.0", + "@aws-sdk/middleware-user-agent": "3.645.0", + "@aws-sdk/region-config-resolver": "3.614.0", + "@aws-sdk/types": "3.609.0", + "@aws-sdk/util-endpoints": "3.645.0", + "@aws-sdk/util-user-agent-browser": "3.609.0", + "@aws-sdk/util-user-agent-node": "3.614.0", + "@smithy/config-resolver": "^3.0.5", + "@smithy/core": "^2.4.0", + "@smithy/fetch-http-handler": "^3.2.4", + "@smithy/hash-node": "^3.0.3", + "@smithy/invalid-dependency": "^3.0.3", + "@smithy/middleware-content-length": "^3.0.5", + "@smithy/middleware-endpoint": "^3.1.0", + "@smithy/middleware-retry": "^3.0.15", + "@smithy/middleware-serde": "^3.0.3", + "@smithy/middleware-stack": "^3.0.3", + "@smithy/node-config-provider": "^3.1.4", + "@smithy/node-http-handler": "^3.1.4", + "@smithy/protocol-http": "^4.1.0", + "@smithy/smithy-client": "^3.2.0", + "@smithy/types": "^3.3.0", + "@smithy/url-parser": "^3.0.3", + "@smithy/util-base64": "^3.0.0", + "@smithy/util-body-length-browser": "^3.0.0", + "@smithy/util-body-length-node": "^3.0.0", + "@smithy/util-defaults-mode-browser": "^3.0.15", + "@smithy/util-defaults-mode-node": "^3.0.15", + "@smithy/util-endpoints": "^2.0.5", + "@smithy/util-middleware": "^3.0.3", + "@smithy/util-retry": "^3.0.3", + "@smithy/util-utf8": "^3.0.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-sdk/client-sagemaker": { + "version": "3.645.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-sagemaker/-/client-sagemaker-3.645.0.tgz", + "integrity": "sha512-JgvTs6UDlUlAUFFfTHj75/78EG5XzyYDPM++yvmTG+kCrwU9N1YRBHEQ1yzC0SNXpEZnb7RQPZP0USdqU/nFnw==", + "dependencies": { + "@aws-crypto/sha256-browser": "5.2.0", + "@aws-crypto/sha256-js": "5.2.0", + "@aws-sdk/client-sso-oidc": "3.645.0", + "@aws-sdk/client-sts": "3.645.0", + "@aws-sdk/core": "3.635.0", + "@aws-sdk/credential-provider-node": "3.645.0", + "@aws-sdk/middleware-host-header": "3.620.0", + "@aws-sdk/middleware-logger": "3.609.0", + "@aws-sdk/middleware-recursion-detection": "3.620.0", + "@aws-sdk/middleware-user-agent": "3.645.0", + "@aws-sdk/region-config-resolver": "3.614.0", + "@aws-sdk/types": "3.609.0", + "@aws-sdk/util-endpoints": "3.645.0", + "@aws-sdk/util-user-agent-browser": "3.609.0", + "@aws-sdk/util-user-agent-node": "3.614.0", + "@smithy/config-resolver": "^3.0.5", + "@smithy/core": "^2.4.0", + "@smithy/fetch-http-handler": "^3.2.4", + "@smithy/hash-node": "^3.0.3", + "@smithy/invalid-dependency": "^3.0.3", + "@smithy/middleware-content-length": "^3.0.5", + "@smithy/middleware-endpoint": "^3.1.0", + "@smithy/middleware-retry": "^3.0.15", + "@smithy/middleware-serde": "^3.0.3", + "@smithy/middleware-stack": "^3.0.3", + "@smithy/node-config-provider": "^3.1.4", + "@smithy/node-http-handler": "^3.1.4", + "@smithy/protocol-http": "^4.1.0", + "@smithy/smithy-client": "^3.2.0", + "@smithy/types": "^3.3.0", + "@smithy/url-parser": "^3.0.3", + "@smithy/util-base64": "^3.0.0", + "@smithy/util-body-length-browser": "^3.0.0", + "@smithy/util-body-length-node": "^3.0.0", + "@smithy/util-defaults-mode-browser": "^3.0.15", + "@smithy/util-defaults-mode-node": "^3.0.15", + "@smithy/util-endpoints": "^2.0.5", + "@smithy/util-middleware": "^3.0.3", + "@smithy/util-retry": "^3.0.3", + "@smithy/util-utf8": "^3.0.0", + "@smithy/util-waiter": "^3.1.2", + "tslib": "^2.6.2", + "uuid": "^9.0.1" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-sdk/client-sagemaker/node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "bin": { + "uuid": "dist/bin/uuid" + } + }, + "node_modules/@aws-sdk/client-sso": { + "version": "3.645.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-sso/-/client-sso-3.645.0.tgz", + "integrity": "sha512-2rc8TjnsNddOeKQ/pfNN7deNvGLXAeKeYtHtGDAiM2qfTKxd2sNcAsZ+JCDLyshuD4xLM5fpUyR0X8As9EAouQ==", + "dependencies": { + "@aws-crypto/sha256-browser": "5.2.0", + "@aws-crypto/sha256-js": "5.2.0", + "@aws-sdk/core": "3.635.0", + "@aws-sdk/middleware-host-header": "3.620.0", + "@aws-sdk/middleware-logger": "3.609.0", + "@aws-sdk/middleware-recursion-detection": "3.620.0", + "@aws-sdk/middleware-user-agent": "3.645.0", + "@aws-sdk/region-config-resolver": "3.614.0", + "@aws-sdk/types": "3.609.0", + "@aws-sdk/util-endpoints": "3.645.0", + "@aws-sdk/util-user-agent-browser": "3.609.0", + "@aws-sdk/util-user-agent-node": "3.614.0", + "@smithy/config-resolver": "^3.0.5", + "@smithy/core": "^2.4.0", + "@smithy/fetch-http-handler": "^3.2.4", + "@smithy/hash-node": "^3.0.3", + "@smithy/invalid-dependency": "^3.0.3", + "@smithy/middleware-content-length": "^3.0.5", + "@smithy/middleware-endpoint": "^3.1.0", + "@smithy/middleware-retry": "^3.0.15", + "@smithy/middleware-serde": "^3.0.3", + "@smithy/middleware-stack": "^3.0.3", + "@smithy/node-config-provider": "^3.1.4", + "@smithy/node-http-handler": "^3.1.4", + "@smithy/protocol-http": "^4.1.0", + "@smithy/smithy-client": "^3.2.0", + "@smithy/types": "^3.3.0", + "@smithy/url-parser": "^3.0.3", + "@smithy/util-base64": "^3.0.0", + "@smithy/util-body-length-browser": "^3.0.0", + "@smithy/util-body-length-node": "^3.0.0", + "@smithy/util-defaults-mode-browser": "^3.0.15", + "@smithy/util-defaults-mode-node": "^3.0.15", + "@smithy/util-endpoints": "^2.0.5", + "@smithy/util-middleware": "^3.0.3", + "@smithy/util-retry": "^3.0.3", + "@smithy/util-utf8": "^3.0.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-sdk/client-sso-oidc": { + "version": "3.645.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-sso-oidc/-/client-sso-oidc-3.645.0.tgz", + "integrity": "sha512-X9ULtdk3cO+1ysurEkJ1MSnu6U00qodXx+IVual+1jXX4RYY1WmQmfo7uDKf6FFkz7wW1DAqU+GJIBNQr0YH8A==", + "dependencies": { + "@aws-crypto/sha256-browser": "5.2.0", + "@aws-crypto/sha256-js": "5.2.0", + "@aws-sdk/core": "3.635.0", + "@aws-sdk/credential-provider-node": "3.645.0", + "@aws-sdk/middleware-host-header": "3.620.0", + "@aws-sdk/middleware-logger": "3.609.0", + "@aws-sdk/middleware-recursion-detection": "3.620.0", + "@aws-sdk/middleware-user-agent": "3.645.0", + "@aws-sdk/region-config-resolver": "3.614.0", + "@aws-sdk/types": "3.609.0", + "@aws-sdk/util-endpoints": "3.645.0", + "@aws-sdk/util-user-agent-browser": "3.609.0", + "@aws-sdk/util-user-agent-node": "3.614.0", + "@smithy/config-resolver": "^3.0.5", + "@smithy/core": "^2.4.0", + "@smithy/fetch-http-handler": "^3.2.4", + "@smithy/hash-node": "^3.0.3", + "@smithy/invalid-dependency": "^3.0.3", + "@smithy/middleware-content-length": "^3.0.5", + "@smithy/middleware-endpoint": "^3.1.0", + "@smithy/middleware-retry": "^3.0.15", + "@smithy/middleware-serde": "^3.0.3", + "@smithy/middleware-stack": "^3.0.3", + "@smithy/node-config-provider": "^3.1.4", + "@smithy/node-http-handler": "^3.1.4", + "@smithy/protocol-http": "^4.1.0", + "@smithy/smithy-client": "^3.2.0", + "@smithy/types": "^3.3.0", + "@smithy/url-parser": "^3.0.3", + "@smithy/util-base64": "^3.0.0", + "@smithy/util-body-length-browser": "^3.0.0", + "@smithy/util-body-length-node": "^3.0.0", + "@smithy/util-defaults-mode-browser": "^3.0.15", + "@smithy/util-defaults-mode-node": "^3.0.15", + "@smithy/util-endpoints": "^2.0.5", + "@smithy/util-middleware": "^3.0.3", + "@smithy/util-retry": "^3.0.3", + "@smithy/util-utf8": "^3.0.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + }, + "peerDependencies": { + "@aws-sdk/client-sts": "^3.645.0" + } + }, + "node_modules/@aws-sdk/client-sts": { + "version": "3.645.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/client-sts/-/client-sts-3.645.0.tgz", + "integrity": "sha512-6azXYtvtnAsPf2ShN9vKynIYVcJOpo6IoVmoMAVgNaBJyllP+s/RORzranYZzckqfmrudSxtct4rVapjLWuAMg==", + "dependencies": { + "@aws-crypto/sha256-browser": "5.2.0", + "@aws-crypto/sha256-js": "5.2.0", + "@aws-sdk/client-sso-oidc": "3.645.0", + "@aws-sdk/core": "3.635.0", + "@aws-sdk/credential-provider-node": "3.645.0", + "@aws-sdk/middleware-host-header": "3.620.0", + "@aws-sdk/middleware-logger": "3.609.0", + "@aws-sdk/middleware-recursion-detection": "3.620.0", + "@aws-sdk/middleware-user-agent": "3.645.0", + "@aws-sdk/region-config-resolver": "3.614.0", + "@aws-sdk/types": "3.609.0", + "@aws-sdk/util-endpoints": "3.645.0", + "@aws-sdk/util-user-agent-browser": "3.609.0", + "@aws-sdk/util-user-agent-node": "3.614.0", + "@smithy/config-resolver": "^3.0.5", + "@smithy/core": "^2.4.0", + "@smithy/fetch-http-handler": "^3.2.4", + "@smithy/hash-node": "^3.0.3", + "@smithy/invalid-dependency": "^3.0.3", + "@smithy/middleware-content-length": "^3.0.5", + "@smithy/middleware-endpoint": "^3.1.0", + "@smithy/middleware-retry": "^3.0.15", + "@smithy/middleware-serde": "^3.0.3", + "@smithy/middleware-stack": "^3.0.3", + "@smithy/node-config-provider": "^3.1.4", + "@smithy/node-http-handler": "^3.1.4", + "@smithy/protocol-http": "^4.1.0", + "@smithy/smithy-client": "^3.2.0", + "@smithy/types": "^3.3.0", + "@smithy/url-parser": "^3.0.3", + "@smithy/util-base64": "^3.0.0", + "@smithy/util-body-length-browser": "^3.0.0", + "@smithy/util-body-length-node": "^3.0.0", + "@smithy/util-defaults-mode-browser": "^3.0.15", + "@smithy/util-defaults-mode-node": "^3.0.15", + "@smithy/util-endpoints": "^2.0.5", + "@smithy/util-middleware": "^3.0.3", + "@smithy/util-retry": "^3.0.3", + "@smithy/util-utf8": "^3.0.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-sdk/core": { + "version": "3.635.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/core/-/core-3.635.0.tgz", + "integrity": "sha512-i1x/E/sgA+liUE1XJ7rj1dhyXpAKO1UKFUcTTHXok2ARjWTvszHnSXMOsB77aPbmn0fUp1JTx2kHUAZ1LVt5Bg==", + "dependencies": { + "@smithy/core": "^2.4.0", + "@smithy/node-config-provider": "^3.1.4", + "@smithy/property-provider": "^3.1.3", + "@smithy/protocol-http": "^4.1.0", + "@smithy/signature-v4": "^4.1.0", + "@smithy/smithy-client": "^3.2.0", + "@smithy/types": "^3.3.0", + "@smithy/util-middleware": "^3.0.3", + "fast-xml-parser": "4.4.1", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-sdk/core/node_modules/fast-xml-parser": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/fast-xml-parser/-/fast-xml-parser-4.4.1.tgz", + "integrity": "sha512-xkjOecfnKGkSsOwtZ5Pz7Us/T6mrbPQrq0nh+aCO5V9nk5NLWmasAHumTKjiPJPWANe+kAZ84Jc8ooJkzZ88Sw==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/NaturalIntelligence" + }, + { + "type": "paypal", + "url": "https://paypal.me/naturalintelligence" + } + ], + "dependencies": { + "strnum": "^1.0.5" + }, + "bin": { + "fxparser": "src/cli/cli.js" + } + }, + "node_modules/@aws-sdk/credential-provider-cognito-identity": { + "version": "3.645.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-cognito-identity/-/credential-provider-cognito-identity-3.645.0.tgz", + "integrity": "sha512-Z4By/90TaYQZO1dPR1udYhegFiOlSWnZsJOYSAk4Gdny26Tqb78xVLw9R/33CzFblXC4WVSt4gizXTQ/sYyHNg==", + "dependencies": { + "@aws-sdk/client-cognito-identity": "3.645.0", + "@aws-sdk/types": "3.609.0", + "@smithy/property-provider": "^3.1.3", + "@smithy/types": "^3.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-env": { + "version": "3.620.1", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-env/-/credential-provider-env-3.620.1.tgz", + "integrity": "sha512-ExuILJ2qLW5ZO+rgkNRj0xiAipKT16Rk77buvPP8csR7kkCflT/gXTyzRe/uzIiETTxM7tr8xuO9MP/DQXqkfg==", + "dependencies": { + "@aws-sdk/types": "3.609.0", + "@smithy/property-provider": "^3.1.3", + "@smithy/types": "^3.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-http": { + "version": "3.635.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-http/-/credential-provider-http-3.635.0.tgz", + "integrity": "sha512-iJyRgEjOCQlBMXqtwPLIKYc7Bsc6nqjrZybdMDenPDa+kmLg7xh8LxHsu9088e+2/wtLicE34FsJJIfzu3L82g==", + "dependencies": { + "@aws-sdk/types": "3.609.0", + "@smithy/fetch-http-handler": "^3.2.4", + "@smithy/node-http-handler": "^3.1.4", + "@smithy/property-provider": "^3.1.3", + "@smithy/protocol-http": "^4.1.0", + "@smithy/smithy-client": "^3.2.0", + "@smithy/types": "^3.3.0", + "@smithy/util-stream": "^3.1.3", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-ini": { + "version": "3.645.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-ini/-/credential-provider-ini-3.645.0.tgz", + "integrity": "sha512-LlZW0qwUwNlTaAIDCNpLbPsyXvS42pRIwF92fgtCQedmdnpN3XRUC6hcwSYI7Xru3GGKp3RnceOvsdOaRJORsw==", + "dependencies": { + "@aws-sdk/credential-provider-env": "3.620.1", + "@aws-sdk/credential-provider-http": "3.635.0", + "@aws-sdk/credential-provider-process": "3.620.1", + "@aws-sdk/credential-provider-sso": "3.645.0", + "@aws-sdk/credential-provider-web-identity": "3.621.0", + "@aws-sdk/types": "3.609.0", + "@smithy/credential-provider-imds": "^3.2.0", + "@smithy/property-provider": "^3.1.3", + "@smithy/shared-ini-file-loader": "^3.1.4", + "@smithy/types": "^3.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + }, + "peerDependencies": { + "@aws-sdk/client-sts": "^3.645.0" + } + }, + "node_modules/@aws-sdk/credential-provider-node": { + "version": "3.645.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-node/-/credential-provider-node-3.645.0.tgz", + "integrity": "sha512-eGFFuNvLeXjCJf5OCIuSEflxUowmK+bCS+lK4M8ofsYOEGAivdx7C0UPxNjHpvM8wKd8vpMl5phTeS9BWX5jMQ==", + "dependencies": { + "@aws-sdk/credential-provider-env": "3.620.1", + "@aws-sdk/credential-provider-http": "3.635.0", + "@aws-sdk/credential-provider-ini": "3.645.0", + "@aws-sdk/credential-provider-process": "3.620.1", + "@aws-sdk/credential-provider-sso": "3.645.0", + "@aws-sdk/credential-provider-web-identity": "3.621.0", + "@aws-sdk/types": "3.609.0", + "@smithy/credential-provider-imds": "^3.2.0", + "@smithy/property-provider": "^3.1.3", + "@smithy/shared-ini-file-loader": "^3.1.4", + "@smithy/types": "^3.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-process": { + "version": "3.620.1", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-process/-/credential-provider-process-3.620.1.tgz", + "integrity": "sha512-hWqFMidqLAkaV9G460+1at6qa9vySbjQKKc04p59OT7lZ5cO5VH5S4aI05e+m4j364MBROjjk2ugNvfNf/8ILg==", + "dependencies": { + "@aws-sdk/types": "3.609.0", + "@smithy/property-provider": "^3.1.3", + "@smithy/shared-ini-file-loader": "^3.1.4", + "@smithy/types": "^3.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-sso": { + "version": "3.645.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-sso/-/credential-provider-sso-3.645.0.tgz", + "integrity": "sha512-d6XuChAl5NCsCrUexc6AFb4efPmb9+66iwPylKG+iMTMYgO1ackfy1Q2/f35jdn0jolkPkzKsVyfzsEVoID6ew==", + "dependencies": { + "@aws-sdk/client-sso": "3.645.0", + "@aws-sdk/token-providers": "3.614.0", + "@aws-sdk/types": "3.609.0", + "@smithy/property-provider": "^3.1.3", + "@smithy/shared-ini-file-loader": "^3.1.4", + "@smithy/types": "^3.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-sdk/credential-provider-web-identity": { + "version": "3.621.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-provider-web-identity/-/credential-provider-web-identity-3.621.0.tgz", + "integrity": "sha512-w7ASSyfNvcx7+bYGep3VBgC3K6vEdLmlpjT7nSIHxxQf+WSdvy+HynwJosrpZax0sK5q0D1Jpn/5q+r5lwwW6w==", + "dependencies": { + "@aws-sdk/types": "3.609.0", + "@smithy/property-provider": "^3.1.3", + "@smithy/types": "^3.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + }, + "peerDependencies": { + "@aws-sdk/client-sts": "^3.621.0" + } + }, + "node_modules/@aws-sdk/credential-providers": { + "version": "3.645.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/credential-providers/-/credential-providers-3.645.0.tgz", + "integrity": "sha512-6g9qMngrMCvHNsxmh/1urnWKrvaa2fv55b3bYwPxwJCYAvg/xc7bV8YHL7GS2rJpACG707k9G86DTW+Hab8bJA==", + "dependencies": { + "@aws-sdk/client-cognito-identity": "3.645.0", + "@aws-sdk/client-sso": "3.645.0", + "@aws-sdk/client-sts": "3.645.0", + "@aws-sdk/credential-provider-cognito-identity": "3.645.0", + "@aws-sdk/credential-provider-env": "3.620.1", + "@aws-sdk/credential-provider-http": "3.635.0", + "@aws-sdk/credential-provider-ini": "3.645.0", + "@aws-sdk/credential-provider-node": "3.645.0", + "@aws-sdk/credential-provider-process": "3.620.1", + "@aws-sdk/credential-provider-sso": "3.645.0", + "@aws-sdk/credential-provider-web-identity": "3.621.0", + "@aws-sdk/types": "3.609.0", + "@smithy/credential-provider-imds": "^3.2.0", + "@smithy/property-provider": "^3.1.3", + "@smithy/types": "^3.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-sdk/middleware-host-header": { + "version": "3.620.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-host-header/-/middleware-host-header-3.620.0.tgz", + "integrity": "sha512-VMtPEZwqYrII/oUkffYsNWY9PZ9xpNJpMgmyU0rlDQ25O1c0Hk3fJmZRe6pEkAJ0omD7kLrqGl1DUjQVxpd/Rg==", + "dependencies": { + "@aws-sdk/types": "3.609.0", + "@smithy/protocol-http": "^4.1.0", + "@smithy/types": "^3.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-sdk/middleware-logger": { + "version": "3.609.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-logger/-/middleware-logger-3.609.0.tgz", + "integrity": "sha512-S62U2dy4jMDhDFDK5gZ4VxFdWzCtLzwbYyFZx2uvPYTECkepLUfzLic2BHg2Qvtu4QjX+oGE3P/7fwaGIsGNuQ==", + "dependencies": { + "@aws-sdk/types": "3.609.0", + "@smithy/types": "^3.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-sdk/middleware-recursion-detection": { + "version": "3.620.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-recursion-detection/-/middleware-recursion-detection-3.620.0.tgz", + "integrity": "sha512-nh91S7aGK3e/o1ck64sA/CyoFw+gAYj2BDOnoNa6ouyCrVJED96ZXWbhye/fz9SgmNUZR2g7GdVpiLpMKZoI5w==", + "dependencies": { + "@aws-sdk/types": "3.609.0", + "@smithy/protocol-http": "^4.1.0", + "@smithy/types": "^3.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-sdk/middleware-user-agent": { + "version": "3.645.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/middleware-user-agent/-/middleware-user-agent-3.645.0.tgz", + "integrity": "sha512-NpTAtqWK+49lRuxfz7st9for80r4NriCMK0RfdJSoPFVntjsSQiQ7+2nW2XL05uVY633e9DvCAw8YatX3zd1mw==", + "dependencies": { + "@aws-sdk/types": "3.609.0", + "@aws-sdk/util-endpoints": "3.645.0", + "@smithy/protocol-http": "^4.1.0", + "@smithy/types": "^3.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-sdk/protocol-http": { + "version": "3.374.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/protocol-http/-/protocol-http-3.374.0.tgz", + "integrity": "sha512-9WpRUbINdGroV3HiZZIBoJvL2ndoWk39OfwxWs2otxByppJZNN14bg/lvCx5e8ggHUti7IBk5rb0nqQZ4m05pg==", + "deprecated": "This package has moved to @smithy/protocol-http", + "dependencies": { + "@smithy/protocol-http": "^1.1.0", + "tslib": "^2.5.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/protocol-http/node_modules/@smithy/protocol-http": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@smithy/protocol-http/-/protocol-http-1.2.0.tgz", + "integrity": "sha512-GfGfruksi3nXdFok5RhgtOnWe5f6BndzYfmEXISD+5gAGdayFGpjWu5pIqIweTudMtse20bGbc+7MFZXT1Tb8Q==", + "dependencies": { + "@smithy/types": "^1.2.0", + "tslib": "^2.5.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/protocol-http/node_modules/@smithy/types": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@smithy/types/-/types-1.2.0.tgz", + "integrity": "sha512-z1r00TvBqF3dh4aHhya7nz1HhvCg4TRmw51fjMrh5do3h+ngSstt/yKlNbHeb9QxJmFbmN8KEVSWgb1bRvfEoA==", + "dependencies": { + "tslib": "^2.5.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/region-config-resolver": { + "version": "3.614.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/region-config-resolver/-/region-config-resolver-3.614.0.tgz", + "integrity": "sha512-vDCeMXvic/LU0KFIUjpC3RiSTIkkvESsEfbVHiHH0YINfl8HnEqR5rj+L8+phsCeVg2+LmYwYxd5NRz4PHxt5g==", + "dependencies": { + "@aws-sdk/types": "3.609.0", + "@smithy/node-config-provider": "^3.1.4", + "@smithy/types": "^3.3.0", + "@smithy/util-config-provider": "^3.0.0", + "@smithy/util-middleware": "^3.0.3", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-sdk/signature-v4": { + "version": "3.374.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/signature-v4/-/signature-v4-3.374.0.tgz", + "integrity": "sha512-2xLJvSdzcZZAg0lsDLUAuSQuihzK0dcxIK7WmfuJeF7DGKJFmp9czQmz5f3qiDz6IDQzvgK1M9vtJSVCslJbyQ==", + "deprecated": "This package has moved to @smithy/signature-v4", + "dependencies": { + "@smithy/signature-v4": "^1.0.1", + "tslib": "^2.5.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/signature-v4/node_modules/@smithy/is-array-buffer": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@smithy/is-array-buffer/-/is-array-buffer-1.1.0.tgz", + "integrity": "sha512-twpQ/n+3OWZJ7Z+xu43MJErmhB/WO/mMTnqR6PwWQShvSJ/emx5d1N59LQZk6ZpTAeuRWrc+eHhkzTp9NFjNRQ==", + "dependencies": { + "tslib": "^2.5.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/signature-v4/node_modules/@smithy/signature-v4": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@smithy/signature-v4/-/signature-v4-1.1.0.tgz", + "integrity": "sha512-fDo3m7YqXBs7neciOePPd/X9LPm5QLlDMdIC4m1H6dgNLnXfLMFNIxEfPyohGA8VW9Wn4X8lygnPSGxDZSmp0Q==", + "dependencies": { + "@smithy/eventstream-codec": "^1.1.0", + "@smithy/is-array-buffer": "^1.1.0", + "@smithy/types": "^1.2.0", + "@smithy/util-hex-encoding": "^1.1.0", + "@smithy/util-middleware": "^1.1.0", + "@smithy/util-uri-escape": "^1.1.0", + "@smithy/util-utf8": "^1.1.0", + "tslib": "^2.5.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/signature-v4/node_modules/@smithy/types": { + "version": "1.2.0", + "resolved": "https://registry.npmjs.org/@smithy/types/-/types-1.2.0.tgz", + "integrity": "sha512-z1r00TvBqF3dh4aHhya7nz1HhvCg4TRmw51fjMrh5do3h+ngSstt/yKlNbHeb9QxJmFbmN8KEVSWgb1bRvfEoA==", + "dependencies": { + "tslib": "^2.5.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/signature-v4/node_modules/@smithy/util-buffer-from": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@smithy/util-buffer-from/-/util-buffer-from-1.1.0.tgz", + "integrity": "sha512-9m6NXE0ww+ra5HKHCHig20T+FAwxBAm7DIdwc/767uGWbRcY720ybgPacQNB96JMOI7xVr/CDa3oMzKmW4a+kw==", + "dependencies": { + "@smithy/is-array-buffer": "^1.1.0", + "tslib": "^2.5.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/signature-v4/node_modules/@smithy/util-hex-encoding": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@smithy/util-hex-encoding/-/util-hex-encoding-1.1.0.tgz", + "integrity": "sha512-7UtIE9eH0u41zpB60Jzr0oNCQ3hMJUabMcKRUVjmyHTXiWDE4vjSqN6qlih7rCNeKGbioS7f/y2Jgym4QZcKFg==", + "dependencies": { + "tslib": "^2.5.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/signature-v4/node_modules/@smithy/util-middleware": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@smithy/util-middleware/-/util-middleware-1.1.0.tgz", + "integrity": "sha512-6hhckcBqVgjWAqLy2vqlPZ3rfxLDhFWEmM7oLh2POGvsi7j0tHkbN7w4DFhuBExVJAbJ/qqxqZdRY6Fu7/OezQ==", + "dependencies": { + "tslib": "^2.5.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/signature-v4/node_modules/@smithy/util-uri-escape": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@smithy/util-uri-escape/-/util-uri-escape-1.1.0.tgz", + "integrity": "sha512-/jL/V1xdVRt5XppwiaEU8Etp5WHZj609n0xMTuehmCqdoOFbId1M+aEeDWZsQ+8JbEB/BJ6ynY2SlYmOaKtt8w==", + "dependencies": { + "tslib": "^2.5.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/signature-v4/node_modules/@smithy/util-utf8": { + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/@smithy/util-utf8/-/util-utf8-1.1.0.tgz", + "integrity": "sha512-p/MYV+JmqmPyjdgyN2UxAeYDj9cBqCjp0C/NsTWnnjoZUVqoeZ6IrW915L9CAKWVECgv9lVQGc4u/yz26/bI1A==", + "dependencies": { + "@smithy/util-buffer-from": "^1.1.0", + "tslib": "^2.5.0" + }, + "engines": { + "node": ">=14.0.0" + } + }, + "node_modules/@aws-sdk/token-providers": { + "version": "3.614.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/token-providers/-/token-providers-3.614.0.tgz", + "integrity": "sha512-okItqyY6L9IHdxqs+Z116y5/nda7rHxLvROxtAJdLavWTYDydxrZstImNgGWTeVdmc0xX2gJCI77UYUTQWnhRw==", + "dependencies": { + "@aws-sdk/types": "3.609.0", + "@smithy/property-provider": "^3.1.3", + "@smithy/shared-ini-file-loader": "^3.1.4", + "@smithy/types": "^3.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + }, + "peerDependencies": { + "@aws-sdk/client-sso-oidc": "^3.614.0" + } + }, + "node_modules/@aws-sdk/types": { + "version": "3.609.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/types/-/types-3.609.0.tgz", + "integrity": "sha512-+Tqnh9w0h2LcrUsdXyT1F8mNhXz+tVYBtP19LpeEGntmvHwa2XzvLUCWpoIAIVsHp5+HdB2X9Sn0KAtmbFXc2Q==", + "dependencies": { + "@smithy/types": "^3.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-sdk/util-endpoints": { + "version": "3.645.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-endpoints/-/util-endpoints-3.645.0.tgz", + "integrity": "sha512-Oe+xaU4ic4PB1k3pb5VTC1/MWES13IlgpaQw01bVHGfwP6Yv6zZOxizRzca2Y3E+AyR+nKD7vXtHRY+w3bi4bg==", + "dependencies": { + "@aws-sdk/types": "3.609.0", + "@smithy/types": "^3.3.0", + "@smithy/util-endpoints": "^2.0.5", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-sdk/util-locate-window": { + "version": "3.568.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-locate-window/-/util-locate-window-3.568.0.tgz", + "integrity": "sha512-3nh4TINkXYr+H41QaPelCceEB2FXP3fxp93YZXB/kqJvX0U9j0N0Uk45gvsjmEPzG8XxkPEeLIfT2I1M7A6Lig==", + "dependencies": { + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + } + }, + "node_modules/@aws-sdk/util-user-agent-browser": { + "version": "3.609.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-user-agent-browser/-/util-user-agent-browser-3.609.0.tgz", + "integrity": "sha512-fojPU+mNahzQ0YHYBsx0ZIhmMA96H+ZIZ665ObU9tl+SGdbLneVZVikGve+NmHTQwHzwkFsZYYnVKAkreJLAtA==", + "dependencies": { + "@aws-sdk/types": "3.609.0", + "@smithy/types": "^3.3.0", + "bowser": "^2.11.0", + "tslib": "^2.6.2" + } + }, + "node_modules/@aws-sdk/util-user-agent-node": { + "version": "3.614.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-user-agent-node/-/util-user-agent-node-3.614.0.tgz", + "integrity": "sha512-15ElZT88peoHnq5TEoEtZwoXTXRxNrk60TZNdpl/TUBJ5oNJ9Dqb5Z4ryb8ofN6nm9aFf59GVAerFDz8iUoHBA==", + "dependencies": { + "@aws-sdk/types": "3.609.0", + "@smithy/node-config-provider": "^3.1.4", + "@smithy/types": "^3.3.0", + "tslib": "^2.6.2" + }, + "engines": { + "node": ">=16.0.0" + }, + "peerDependencies": { + "aws-crt": ">=1.0.0" + }, + "peerDependenciesMeta": { + "aws-crt": { + "optional": true + } + } + }, + "node_modules/@aws-sdk/util-utf8-browser": { + "version": "3.259.0", + "resolved": "https://registry.npmjs.org/@aws-sdk/util-utf8-browser/-/util-utf8-browser-3.259.0.tgz", + "integrity": "sha512-UvFa/vR+e19XookZF8RzFZBrw2EUkQWxiBW0yYQAhvk3C+QVGl0H3ouca8LDBlBfQKXwmW3huo/59H8rwb1wJw==", + "dependencies": { + "tslib": "^2.3.1" + } + }, "node_modules/@azure/abort-controller": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@azure/abort-controller/-/abort-controller-1.1.0.tgz", @@ -2674,6 +3629,21 @@ "url": "https://opencollective.com/eslint" } }, + "node_modules/@eslint/eslintrc/node_modules/ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dependencies": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, "node_modules/@eslint/eslintrc/node_modules/argparse": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/argparse/-/argparse-2.0.1.tgz", @@ -2710,6 +3680,11 @@ "js-yaml": "bin/js-yaml.js" } }, + "node_modules/@eslint/eslintrc/node_modules/json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==" + }, "node_modules/@eslint/eslintrc/node_modules/minimatch": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", @@ -3127,6 +4102,11 @@ "node": "^14.15.0 || ^16.10.0 || >=18.0.0" } }, + "node_modules/@jest/schemas/node_modules/@sinclair/typebox": { + "version": "0.27.8", + "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz", + "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==" + }, "node_modules/@jest/types": { "version": "29.6.3", "resolved": "https://registry.npmjs.org/@jest/types/-/types-29.6.3.tgz", @@ -3883,14 +4863,22 @@ "langium": "3.0.0" } }, - "node_modules/@mui/base": { - "version": "5.0.0-beta.40", - "resolved": "https://registry.npmjs.org/@mui/base/-/base-5.0.0-beta.40.tgz", - "integrity": "sha512-I/lGHztkCzvwlXpjD2+SNmvNQvB4227xBXhISPjEaJUXGImOQ9f3D2Yj/T3KasSI/h0MLWy74X0J6clhPmsRbQ==", + "node_modules/@mongodb-js/saslprep": { + "version": "1.1.9", + "resolved": "https://registry.npmjs.org/@mongodb-js/saslprep/-/saslprep-1.1.9.tgz", + "integrity": "sha512-tVkljjeEaAhCqTzajSdgbQ6gE6f3oneVwa3iXR6csiEwXXOFsiC6Uh9iAjAhXPtqa/XMDHWjjeNH/77m/Yq2dw==", "dependencies": { "sparse-bitfield": "^3.0.3" } }, + "node_modules/@mozilla/readability": { + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/@mozilla/readability/-/readability-0.5.0.tgz", + "integrity": "sha512-Z+CZ3QaosfFaTqvhQsIktyGrjFjSC0Fa4EMph4mqKnWhmyoGICsV/8QK+8HpXut6zV7zwfWwqDmEjtk1Qf6EgQ==", + "engines": { + "node": ">=14.0.0" + } + }, "node_modules/@mui/core-downloads-tracker": { "version": "6.0.2", "resolved": "https://registry.npmjs.org/@mui/core-downloads-tracker/-/core-downloads-tracker-6.0.2.tgz", @@ -4261,31 +5249,6 @@ "node": ">=14.0.0" } }, - "node_modules/@pinecone-database/pinecone/node_modules/@sinclair/typebox": { - "version": "0.29.6", - "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.29.6.tgz", - "integrity": "sha512-aX5IFYWlMa7tQ8xZr3b2gtVReCvg7f3LEhjir/JAjX2bJCMVJA5tIPv30wTD4KDfcwMd7DDYY3hFDeGmOgtrZQ==" - }, - "node_modules/@pinecone-database/pinecone/node_modules/ajv": { - "version": "8.17.1", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", - "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", - "dependencies": { - "fast-deep-equal": "^3.1.3", - "fast-uri": "^3.0.1", - "json-schema-traverse": "^1.0.0", - "require-from-string": "^2.0.2" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/epoberezkin" - } - }, - "node_modules/@pinecone-database/pinecone/node_modules/json-schema-traverse": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", - "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==" - }, "node_modules/@pkgjs/parseargs": { "version": "0.11.0", "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", @@ -4305,9 +5268,9 @@ } }, "node_modules/@puppeteer/browsers": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.3.1.tgz", - "integrity": "sha512-uK7o3hHkK+naEobMSJ+2ySYyXtQkBxIH8Gn4MK9ciePjNV+Pf+PgY/W7iPzn2MTjl3stcYB5AlcTmPYw7AXDwA==", + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.4.0.tgz", + "integrity": "sha512-x8J1csfIygOwf6D6qUAZ0ASk3z63zPb7wkNeHRerCMh82qWKUrOgkuP005AJC8lDL6/evtXETGEJVcwykKT4/g==", "dependencies": { "debug": "^4.3.6", "extract-zip": "^2.0.1", @@ -7405,9 +8368,9 @@ } }, "node_modules/@sinclair/typebox": { - "version": "0.27.8", - "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.27.8.tgz", - "integrity": "sha512-+Fj43pSMwJs4KRrH/938Uf+uAELIgVBmQzg/q1YG10djyfA3TnrU8N8XzqCh/okZdszqBQTZf96idMfE5lnwTA==" + "version": "0.29.6", + "resolved": "https://registry.npmjs.org/@sinclair/typebox/-/typebox-0.29.6.tgz", + "integrity": "sha512-aX5IFYWlMa7tQ8xZr3b2gtVReCvg7f3LEhjir/JAjX2bJCMVJA5tIPv30wTD4KDfcwMd7DDYY3hFDeGmOgtrZQ==" }, "node_modules/@sindresorhus/is": { "version": "7.0.0", @@ -7448,17 +8411,19 @@ } }, "node_modules/@smithy/core": { - "version": "2.2.7", - "resolved": "https://registry.npmjs.org/@smithy/core/-/core-2.2.7.tgz", - "integrity": "sha512-Wwd9QWKaYdR+n/oIqJbuwSr9lHuv7sa1e3Zu4wIToZl0sS7xapTYYqQtXP1hKKtIWz0jl8AhvOfNwkfT5jjV0w==", + "version": "2.4.0", + "resolved": "https://registry.npmjs.org/@smithy/core/-/core-2.4.0.tgz", + "integrity": "sha512-cHXq+FneIF/KJbt4q4pjN186+Jf4ZB0ZOqEaZMBhT79srEyGDDBV31NqBRBjazz8ppQ1bJbDJMY9ba5wKFV36w==", "dependencies": { - "@smithy/middleware-endpoint": "^3.0.5", - "@smithy/middleware-retry": "^3.0.10", + "@smithy/middleware-endpoint": "^3.1.0", + "@smithy/middleware-retry": "^3.0.15", "@smithy/middleware-serde": "^3.0.3", - "@smithy/protocol-http": "^4.0.4", - "@smithy/smithy-client": "^3.1.8", + "@smithy/protocol-http": "^4.1.0", + "@smithy/smithy-client": "^3.2.0", "@smithy/types": "^3.3.0", + "@smithy/util-body-length-browser": "^3.0.0", "@smithy/util-middleware": "^3.0.3", + "@smithy/util-utf8": "^3.0.0", "tslib": "^2.6.2" }, "engines": { @@ -7466,9 +8431,9 @@ } }, "node_modules/@smithy/credential-provider-imds": { - "version": "3.1.4", - "resolved": "https://registry.npmjs.org/@smithy/credential-provider-imds/-/credential-provider-imds-3.1.4.tgz", - "integrity": "sha512-NKyH01m97Xa5xf3pB2QOF3lnuE8RIK0hTVNU5zvZAwZU8uspYO4DHQVlK+Y5gwSrujTfHvbfd1D9UFJAc0iYKQ==", + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/@smithy/credential-provider-imds/-/credential-provider-imds-3.2.0.tgz", + "integrity": "sha512-0SCIzgd8LYZ9EJxUjLXBmEKSZR/P/w6l7Rz/pab9culE/RWuqelAKGJvn5qUOl8BgX8Yj5HWM50A5hiB/RzsgA==", "dependencies": { "@smithy/node-config-provider": "^3.1.4", "@smithy/property-provider": "^3.1.3", @@ -7514,11 +8479,11 @@ } }, "node_modules/@smithy/fetch-http-handler": { - "version": "3.2.2", - "resolved": "https://registry.npmjs.org/@smithy/fetch-http-handler/-/fetch-http-handler-3.2.2.tgz", - "integrity": "sha512-3LaWlBZObyGrOOd7e5MlacnAKEwFBmAeiW/TOj2eR9475Vnq30uS2510+tnKbxrGjROfNdOhQqGo5j3sqLT6bA==", + "version": "3.2.4", + "resolved": "https://registry.npmjs.org/@smithy/fetch-http-handler/-/fetch-http-handler-3.2.4.tgz", + "integrity": "sha512-kBprh5Gs5h7ug4nBWZi1FZthdqSM+T7zMmsZxx0IBvWUn7dK3diz2SHn7Bs4dQGFDk8plDv375gzenDoNwrXjg==", "dependencies": { - "@smithy/protocol-http": "^4.0.4", + "@smithy/protocol-http": "^4.1.0", "@smithy/querystring-builder": "^3.0.3", "@smithy/types": "^3.3.0", "@smithy/util-base64": "^3.0.0", @@ -7560,11 +8525,11 @@ } }, "node_modules/@smithy/middleware-content-length": { - "version": "3.0.4", - "resolved": "https://registry.npmjs.org/@smithy/middleware-content-length/-/middleware-content-length-3.0.4.tgz", - "integrity": "sha512-wySGje/KfhsnF8YSh9hP16pZcl3C+X6zRsvSfItQGvCyte92LliilU3SD0nR7kTlxnAJwxY8vE/k4Eoezj847Q==", + "version": "3.0.5", + "resolved": "https://registry.npmjs.org/@smithy/middleware-content-length/-/middleware-content-length-3.0.5.tgz", + "integrity": "sha512-ILEzC2eyxx6ncej3zZSwMpB5RJ0zuqH7eMptxC4KN3f+v9bqT8ohssKbhNR78k/2tWW+KS5Spw+tbPF4Ejyqvw==", "dependencies": { - "@smithy/protocol-http": "^4.0.4", + "@smithy/protocol-http": "^4.1.0", "@smithy/types": "^3.3.0", "tslib": "^2.6.2" }, @@ -7573,9 +8538,9 @@ } }, "node_modules/@smithy/middleware-endpoint": { - "version": "3.0.5", - "resolved": "https://registry.npmjs.org/@smithy/middleware-endpoint/-/middleware-endpoint-3.0.5.tgz", - "integrity": "sha512-V4acqqrh5tDxUEGVTOgf2lYMZqPQsoGntCrjrJZEeBzEzDry2d2vcI1QCXhGltXPPY+BMc6eksZMguA9fIY8vA==", + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/@smithy/middleware-endpoint/-/middleware-endpoint-3.1.0.tgz", + "integrity": "sha512-5y5aiKCEwg9TDPB4yFE7H6tYvGFf1OJHNczeY10/EFF8Ir8jZbNntQJxMWNfeQjC1mxPsaQ6mR9cvQbf+0YeMw==", "dependencies": { "@smithy/middleware-serde": "^3.0.3", "@smithy/node-config-provider": "^3.1.4", @@ -7590,14 +8555,14 @@ } }, "node_modules/@smithy/middleware-retry": { - "version": "3.0.10", - "resolved": "https://registry.npmjs.org/@smithy/middleware-retry/-/middleware-retry-3.0.10.tgz", - "integrity": "sha512-+6ibpv6jpkTNJS6yErQSEjbxCWf1/jMeUSlpSlUiTYf73LGR9riSRlIrL1+JEW0eEpb6MelQ04BIc38aj8GtxQ==", + "version": "3.0.15", + "resolved": "https://registry.npmjs.org/@smithy/middleware-retry/-/middleware-retry-3.0.15.tgz", + "integrity": "sha512-iTMedvNt1ApdvkaoE8aSDuwaoc+BhvHqttbA/FO4Ty+y/S5hW6Ci/CTScG7vam4RYJWZxdTElc3MEfHRVH6cgQ==", "dependencies": { "@smithy/node-config-provider": "^3.1.4", - "@smithy/protocol-http": "^4.0.4", + "@smithy/protocol-http": "^4.1.0", "@smithy/service-error-classification": "^3.0.3", - "@smithy/smithy-client": "^3.1.8", + "@smithy/smithy-client": "^3.2.0", "@smithy/types": "^3.3.0", "@smithy/util-middleware": "^3.0.3", "@smithy/util-retry": "^3.0.3", @@ -7659,12 +8624,12 @@ } }, "node_modules/@smithy/node-http-handler": { - "version": "3.1.3", - "resolved": "https://registry.npmjs.org/@smithy/node-http-handler/-/node-http-handler-3.1.3.tgz", - "integrity": "sha512-UiKZm8KHb/JeOPzHZtRUfyaRDO1KPKPpsd7iplhiwVGOeVdkiVJ5bVe7+NhWREMOKomrDIDdSZyglvMothLg0Q==", + "version": "3.1.4", + "resolved": "https://registry.npmjs.org/@smithy/node-http-handler/-/node-http-handler-3.1.4.tgz", + "integrity": "sha512-+UmxgixgOr/yLsUxcEKGH0fMNVteJFGkmRltYFHnBMlogyFdpzn2CwqWmxOrfJELhV34v0WSlaqG1UtE1uXlJg==", "dependencies": { "@smithy/abort-controller": "^3.1.1", - "@smithy/protocol-http": "^4.0.4", + "@smithy/protocol-http": "^4.1.0", "@smithy/querystring-builder": "^3.0.3", "@smithy/types": "^3.3.0", "tslib": "^2.6.2" @@ -7686,9 +8651,9 @@ } }, "node_modules/@smithy/protocol-http": { - "version": "4.0.4", - "resolved": "https://registry.npmjs.org/@smithy/protocol-http/-/protocol-http-4.0.4.tgz", - "integrity": "sha512-fAA2O4EFyNRyYdFLVIv5xMMeRb+3fRKc/Rt2flh5k831vLvUmNFXcydeg7V3UeEhGURJI4c1asmGJBjvmF6j8Q==", + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/@smithy/protocol-http/-/protocol-http-4.1.0.tgz", + "integrity": "sha512-dPVoHYQ2wcHooGXg3LQisa1hH0e4y0pAddPMeeUPipI1tEOqL6A4N0/G7abeq+K8wrwSgjk4C0wnD1XZpJm5aA==", "dependencies": { "@smithy/types": "^3.3.0", "tslib": "^2.6.2" @@ -7746,11 +8711,12 @@ } }, "node_modules/@smithy/signature-v4": { - "version": "3.1.2", - "resolved": "https://registry.npmjs.org/@smithy/signature-v4/-/signature-v4-3.1.2.tgz", - "integrity": "sha512-3BcPylEsYtD0esM4Hoyml/+s7WP2LFhcM3J2AGdcL2vx9O60TtfpDOL72gjb4lU8NeRPeKAwR77YNyyGvMbuEA==", + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/@smithy/signature-v4/-/signature-v4-4.1.0.tgz", + "integrity": "sha512-aRryp2XNZeRcOtuJoxjydO6QTaVhxx/vjaR+gx7ZjaFgrgPRyZ3HCTbfwqYj6ZWEBHkCSUfcaymKPURaByukag==", "dependencies": { "@smithy/is-array-buffer": "^3.0.0", + "@smithy/protocol-http": "^4.1.0", "@smithy/types": "^3.3.0", "@smithy/util-hex-encoding": "^3.0.0", "@smithy/util-middleware": "^3.0.3", @@ -7763,15 +8729,15 @@ } }, "node_modules/@smithy/smithy-client": { - "version": "3.1.8", - "resolved": "https://registry.npmjs.org/@smithy/smithy-client/-/smithy-client-3.1.8.tgz", - "integrity": "sha512-nUNGCa0NgvtD0eM45732EBp1H9JQITChMBegGtPRhJD00v3hiFF6tibiOihcYwP5mbp9Kui+sOCl86rDT/Ew2w==", + "version": "3.2.0", + "resolved": "https://registry.npmjs.org/@smithy/smithy-client/-/smithy-client-3.2.0.tgz", + "integrity": "sha512-pDbtxs8WOhJLJSeaF/eAbPgXg4VVYFlRcL/zoNYA5WbG3wBL06CHtBSg53ppkttDpAJ/hdiede+xApip1CwSLw==", "dependencies": { - "@smithy/middleware-endpoint": "^3.0.5", + "@smithy/middleware-endpoint": "^3.1.0", "@smithy/middleware-stack": "^3.0.3", - "@smithy/protocol-http": "^4.0.4", + "@smithy/protocol-http": "^4.1.0", "@smithy/types": "^3.3.0", - "@smithy/util-stream": "^3.1.0", + "@smithy/util-stream": "^3.1.3", "tslib": "^2.6.2" }, "engines": { @@ -7855,12 +8821,12 @@ } }, "node_modules/@smithy/util-defaults-mode-browser": { - "version": "3.0.10", - "resolved": "https://registry.npmjs.org/@smithy/util-defaults-mode-browser/-/util-defaults-mode-browser-3.0.10.tgz", - "integrity": "sha512-WgaNxh33md2zvlD+1TSceVmM7DIy7qYMtuhOat+HYoTntsg0QTbNvoB/5DRxEwSpN84zKf9O34yqzRRtxJZgFg==", + "version": "3.0.15", + "resolved": "https://registry.npmjs.org/@smithy/util-defaults-mode-browser/-/util-defaults-mode-browser-3.0.15.tgz", + "integrity": "sha512-FZ4Psa3vjp8kOXcd3HJOiDPBCWtiilLl57r0cnNtq/Ga9RSDrM5ERL6xt+tO43+2af6Pn5Yp92x2n5vPuduNfg==", "dependencies": { "@smithy/property-provider": "^3.1.3", - "@smithy/smithy-client": "^3.1.8", + "@smithy/smithy-client": "^3.2.0", "@smithy/types": "^3.3.0", "bowser": "^2.11.0", "tslib": "^2.6.2" @@ -7870,15 +8836,15 @@ } }, "node_modules/@smithy/util-defaults-mode-node": { - "version": "3.0.10", - "resolved": "https://registry.npmjs.org/@smithy/util-defaults-mode-node/-/util-defaults-mode-node-3.0.10.tgz", - "integrity": "sha512-3x/pcNIFyaAEQqXc3qnQsCFLlTz/Mwsfl9ciEPU56/Dk/g1kTFjkzyLbUNJaeOo5HT01VrpJBKrBuN94qbPm9A==", + "version": "3.0.15", + "resolved": "https://registry.npmjs.org/@smithy/util-defaults-mode-node/-/util-defaults-mode-node-3.0.15.tgz", + "integrity": "sha512-KSyAAx2q6d0t6f/S4XB2+3+6aQacm3aLMhs9aLMqn18uYGUepbdssfogW5JQZpc6lXNBnp0tEnR5e9CEKmEd7A==", "dependencies": { "@smithy/config-resolver": "^3.0.5", - "@smithy/credential-provider-imds": "^3.1.4", + "@smithy/credential-provider-imds": "^3.2.0", "@smithy/node-config-provider": "^3.1.4", "@smithy/property-provider": "^3.1.3", - "@smithy/smithy-client": "^3.1.8", + "@smithy/smithy-client": "^3.2.0", "@smithy/types": "^3.3.0", "tslib": "^2.6.2" }, @@ -7936,12 +8902,12 @@ } }, "node_modules/@smithy/util-stream": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/@smithy/util-stream/-/util-stream-3.1.0.tgz", - "integrity": "sha512-QEMvyv58QIptWA8cpQPbHagJOAlrbCt3ueB9EShwdFfVMYAviXdVtksszQQq+o+dv5dalUMWUbUHUDSJgkF9xg==", + "version": "3.1.3", + "resolved": "https://registry.npmjs.org/@smithy/util-stream/-/util-stream-3.1.3.tgz", + "integrity": "sha512-FIv/bRhIlAxC0U7xM1BCnF2aDRPq0UaelqBHkM2lsCp26mcBbgI0tCVTv+jGdsQLUmAMybua/bjDsSu8RQHbmw==", "dependencies": { - "@smithy/fetch-http-handler": "^3.2.2", - "@smithy/node-http-handler": "^3.1.3", + "@smithy/fetch-http-handler": "^3.2.4", + "@smithy/node-http-handler": "^3.1.4", "@smithy/types": "^3.3.0", "@smithy/util-base64": "^3.0.0", "@smithy/util-buffer-from": "^3.0.0", @@ -10175,6 +11141,12 @@ "@types/node": "*" } }, + "node_modules/@types/fuzzy-search": { + "version": "2.1.5", + "resolved": "https://registry.npmjs.org/@types/fuzzy-search/-/fuzzy-search-2.1.5.tgz", + "integrity": "sha512-Yw8OsjhVKbKw83LMDOZ9RXc+N+um48DmZYMrz7QChpHkQuygsc5O40oCL7SfvWgpaaviCx2TbNXYUBwhMtBH5w==", + "dev": true + }, "node_modules/@types/geojson": { "version": "7946.0.14", "resolved": "https://registry.npmjs.org/@types/geojson/-/geojson-7946.0.14.tgz", @@ -11097,36 +12069,36 @@ "integrity": "sha512-zuVdFrMJiuCDQUMCzQaD6KL28MjnqqN8XnAqiEq9PNm/hCPTSGfrXCOfwj1ow4LFb/tNymJPwsNbVePc1xFqrQ==" }, "node_modules/@vue/compiler-core": { - "version": "3.5.1", - "resolved": "https://registry.npmjs.org/@vue/compiler-core/-/compiler-core-3.5.1.tgz", - "integrity": "sha512-WdjF+NSgFYdWttHevHw5uaJFtKPalhmxhlu2uREj8cLP0uyKKIR60/JvSZNTp0x+NSd63iTiORQTx3+tt55NWQ==", + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/@vue/compiler-core/-/compiler-core-3.5.2.tgz", + "integrity": "sha512-1aP7FL2GkqfcskHWGg3lfWQpJnrmewKc+rNJ/hq9WNaAw4BEyJ5QbNChnqmbw+tJ409zdy1XWmUeXXMrCKJcQQ==", "dependencies": { "@babel/parser": "^7.25.3", - "@vue/shared": "3.5.1", + "@vue/shared": "3.5.2", "entities": "^4.5.0", "estree-walker": "^2.0.2", "source-map-js": "^1.2.0" } }, "node_modules/@vue/compiler-dom": { - "version": "3.5.1", - "resolved": "https://registry.npmjs.org/@vue/compiler-dom/-/compiler-dom-3.5.1.tgz", - "integrity": "sha512-Ao23fB1lINo18HLCbJVApvzd9OQe8MgmQSgyY5+umbWj2w92w9KykVmJ4Iv2US5nak3ixc2B+7Km7JTNhQ8kSQ==", + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/@vue/compiler-dom/-/compiler-dom-3.5.2.tgz", + "integrity": "sha512-QY4DpT8ZIUyu/ZA5gErpSEDocGNEbHmpkZIC/d5jbp/rUF0iOJNigAy3HCCKc0PMMhDlrcysO3ufQ6Ab4MpEcQ==", "dependencies": { - "@vue/compiler-core": "3.5.1", - "@vue/shared": "3.5.1" + "@vue/compiler-core": "3.5.2", + "@vue/shared": "3.5.2" } }, "node_modules/@vue/compiler-sfc": { - "version": "3.5.1", - "resolved": "https://registry.npmjs.org/@vue/compiler-sfc/-/compiler-sfc-3.5.1.tgz", - "integrity": "sha512-DFizMNH8eDglLhlfwJ0+ciBsztaYe3fY/zcZjrqL1ljXvUw/UpC84M1d7HpBTCW68SNqZyIxrs1XWmf+73Y65w==", + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/@vue/compiler-sfc/-/compiler-sfc-3.5.2.tgz", + "integrity": "sha512-vErEtybSU290LbMW+ChYllI9tNJEdTW1oU+8cZWINZyjlWeTSa9YqDl4/pZJSnozOI+HmcaC1Vz2eFKmXNSXZA==", "dependencies": { "@babel/parser": "^7.25.3", - "@vue/compiler-core": "3.5.1", - "@vue/compiler-dom": "3.5.1", - "@vue/compiler-ssr": "3.5.1", - "@vue/shared": "3.5.1", + "@vue/compiler-core": "3.5.2", + "@vue/compiler-dom": "3.5.2", + "@vue/compiler-ssr": "3.5.2", + "@vue/shared": "3.5.2", "estree-walker": "^2.0.2", "magic-string": "^0.30.11", "postcss": "^8.4.44", @@ -11134,18 +12106,18 @@ } }, "node_modules/@vue/compiler-ssr": { - "version": "3.5.1", - "resolved": "https://registry.npmjs.org/@vue/compiler-ssr/-/compiler-ssr-3.5.1.tgz", - "integrity": "sha512-C1hpSHQgRM8bg+5XWWD7CkFaVpSn9wZHCLRd10AmxqrH17d4EMP6+XcZpwBOM7H1jeStU5naEapZZWX0kso1tQ==", + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/@vue/compiler-ssr/-/compiler-ssr-3.5.2.tgz", + "integrity": "sha512-vMtA4tQK/AM3UAYJsmouQzQpgG+h9TKiD5BV+Zt+ZyAMdicxzSEEFGWf/CykRnDpqj9fMfIHPhOezJVNxiXe2A==", "dependencies": { - "@vue/compiler-dom": "3.5.1", - "@vue/shared": "3.5.1" + "@vue/compiler-dom": "3.5.2", + "@vue/shared": "3.5.2" } }, "node_modules/@vue/shared": { - "version": "3.5.1", - "resolved": "https://registry.npmjs.org/@vue/shared/-/shared-3.5.1.tgz", - "integrity": "sha512-NdcTRoO4KuW2RSFgpE2c+E/R/ZHaRzWPxAGxhmxZaaqLh6nYCXx7lc9a88ioqOCxCaV2SFJmujkxbUScW7dNsQ==" + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/@vue/shared/-/shared-3.5.2.tgz", + "integrity": "sha512-Ce89WNFBzcDca/AgFTxgX4/K4iAyF7oFIp8Z5aBbFBNbtpwnQr+5pZOoHndxnjE2h+YFcipVMzs9UL11XB6dwA==" }, "node_modules/@webassemblyjs/ast": { "version": "1.12.1", @@ -11446,14 +12418,14 @@ } }, "node_modules/ajv": { - "version": "6.12.6", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", - "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "version": "8.17.1", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", + "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", "dependencies": { - "fast-deep-equal": "^3.1.1", - "fast-json-stable-stringify": "^2.0.0", - "json-schema-traverse": "^0.4.1", - "uri-js": "^4.2.2" + "fast-deep-equal": "^3.1.3", + "fast-uri": "^3.0.1", + "json-schema-traverse": "^1.0.0", + "require-from-string": "^2.0.2" }, "funding": { "type": "github", @@ -11476,32 +12448,15 @@ } } }, - "node_modules/ajv-formats/node_modules/ajv": { - "version": "8.17.1", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", - "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", + "node_modules/ajv-keywords": { + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-5.1.0.tgz", + "integrity": "sha512-YCS/JNFAUyr5vAuhk1DWm1CBxRHW9LbJ2ozWeemrIqpbsqKjHVxYPyi5GC0rjZIT5JxJ3virVTS8wk4i/Z+krw==", "dependencies": { - "fast-deep-equal": "^3.1.3", - "fast-uri": "^3.0.1", - "json-schema-traverse": "^1.0.0", - "require-from-string": "^2.0.2" + "fast-deep-equal": "^3.1.3" }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/epoberezkin" - } - }, - "node_modules/ajv-formats/node_modules/json-schema-traverse": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", - "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==" - }, - "node_modules/ajv-keywords": { - "version": "3.5.2", - "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-3.5.2.tgz", - "integrity": "sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ==", "peerDependencies": { - "ajv": "^6.9.1" + "ajv": "^8.8.2" } }, "node_modules/ansi-colors": { @@ -11890,17 +12845,13 @@ "node": ">=0.10.0" } }, - "node_modules/ast-types-flow": { - "version": "0.0.8", - "resolved": "https://registry.npmjs.org/ast-types-flow/-/ast-types-flow-0.0.8.tgz", - "integrity": "sha512-OH/2E5Fg20h2aPrbe+QL8JZQFko0YZaF+j4mnQ7BGhfavO7OpSLa8a0y9sBwomHdSbkhTS8TQNayBfnW5DwbvQ==", - "dev": true - }, - "node_modules/astral-regex": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/astral-regex/-/astral-regex-1.0.0.tgz", - "integrity": "sha512-+Ryf6g3BKoRc7jfp7ad8tM4TtMiaWvbF/1/sQcZPkkS7ag3D5nMBCe2UfOTONtAkaG0tO0ij3C5Lwmf1EiyjHg==", - "dev": true, + "node_modules/ast-types": { + "version": "0.13.4", + "resolved": "https://registry.npmjs.org/ast-types/-/ast-types-0.13.4.tgz", + "integrity": "sha512-x1FCFnFifvYDDzTaLII71vG5uvDwgtmDTEVWAxrgeiR8VjMONcCXJx7E+USjDtHlwFmt9MysbqgF9b9Vjr6w+w==", + "dependencies": { + "tslib": "^2.0.1" + }, "engines": { "node": ">=4" } @@ -12111,9 +13062,9 @@ "optional": true }, "node_modules/bare-fs": { - "version": "2.3.1", - "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-2.3.1.tgz", - "integrity": "sha512-W/Hfxc/6VehXlsgFtbB5B4xFcsCl+pAh30cYhoFyXErf6oGrwjh8SwiPAdHgpmWonKuYpZgGywN0SXt7dgsADA==", + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-2.3.3.tgz", + "integrity": "sha512-7RYKL+vZVCyAsMLi5SPu7QGauGGT8avnP/HO571ndEuV4MYdGXvLhtW67FuLPeEI8EiIY7zbbRR9x7x7HU0kgw==", "optional": true, "dependencies": { "bare-events": "^2.0.0", @@ -12122,9 +13073,9 @@ } }, "node_modules/bare-os": { - "version": "2.4.0", - "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-2.4.0.tgz", - "integrity": "sha512-v8DTT08AS/G0F9xrhyLtepoo9EJBJ85FRSMbu1pQUlAf6A8T0tEEQGMVObWeqpjhSPXsE0VGlluFBJu2fdoTNg==", + "version": "2.4.2", + "resolved": "https://registry.npmjs.org/bare-os/-/bare-os-2.4.2.tgz", + "integrity": "sha512-HZoJwzC+rZ9lqEemTMiO0luOePoGYNBgsLLgegKR/cljiJvcDNhDZQkzC+NC5Oh0aHbdBNSOHpghwMuB5tqhjg==", "optional": true }, "node_modules/bare-path": { @@ -12137,11 +13088,12 @@ } }, "node_modules/bare-stream": { - "version": "2.1.3", - "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.1.3.tgz", - "integrity": "sha512-tiDAH9H/kP+tvNO5sczyn9ZAA7utrSMobyDchsnyyXBuUe2FSQWbxhtuHB8jwpHYYevVo2UJpcmvvjrbHboUUQ==", + "version": "2.2.1", + "resolved": "https://registry.npmjs.org/bare-stream/-/bare-stream-2.2.1.tgz", + "integrity": "sha512-YTB47kHwBW9zSG8LD77MIBAAQXjU2WjAkMHeeb7hUplVs6+IoM5I7uEVQNPMB7lj9r8I76UMdoMkGnCodHOLqg==", "optional": true, "dependencies": { + "b4a": "^1.6.6", "streamx": "^2.18.0" } }, @@ -15921,9 +16873,9 @@ } }, "node_modules/caniuse-lite": { - "version": "1.0.30001655", - "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001655.tgz", - "integrity": "sha512-jRGVy3iSGO5Uutn2owlb5gR6qsGngTw9ZTb4ali9f3glshcNmJ2noam4Mo9zia5P9Dk3jNNydy7vQjuE5dQmfg==", + "version": "1.0.30001657", + "resolved": "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001657.tgz", + "integrity": "sha512-DPbJAlP8/BAXy3IgiWmZKItubb3TYGP0WscQQlVGIfT4s/YlFYVuJgyOsQNP7rJRChx/qdMeLJQJP0Sgg2yjNA==", "funding": [ { "type": "opencollective", @@ -16073,6 +17025,70 @@ "node": ">= 16" } }, + "node_modules/cheerio": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/cheerio/-/cheerio-1.0.0.tgz", + "integrity": "sha512-quS9HgjQpdaXOvsZz82Oz7uxtXiy6UIsIQcpBj7HRw2M63Skasm9qlDocAM7jNuaxdhpPU7c4kJN+gA5MCu4ww==", + "dependencies": { + "cheerio-select": "^2.1.0", + "dom-serializer": "^2.0.0", + "domhandler": "^5.0.3", + "domutils": "^3.1.0", + "encoding-sniffer": "^0.2.0", + "htmlparser2": "^9.1.0", + "parse5": "^7.1.2", + "parse5-htmlparser2-tree-adapter": "^7.0.0", + "parse5-parser-stream": "^7.1.2", + "undici": "^6.19.5", + "whatwg-mimetype": "^4.0.0" + }, + "engines": { + "node": ">=18.17" + }, + "funding": { + "url": "https://github.com/cheeriojs/cheerio?sponsor=1" + } + }, + "node_modules/cheerio-select": { + "version": "2.1.0", + "resolved": "https://registry.npmjs.org/cheerio-select/-/cheerio-select-2.1.0.tgz", + "integrity": "sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==", + "dependencies": { + "boolbase": "^1.0.0", + "css-select": "^5.1.0", + "css-what": "^6.1.0", + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, + "node_modules/chevrotain": { + "version": "11.0.3", + "resolved": "https://registry.npmjs.org/chevrotain/-/chevrotain-11.0.3.tgz", + "integrity": "sha512-ci2iJH6LeIkvP9eJW6gpueU8cnZhv85ELY8w8WiFtNjMHA5ad6pQLaJo9mEly/9qUyCpvqX8/POVUTf18/HFdw==", + "dependencies": { + "@chevrotain/cst-dts-gen": "11.0.3", + "@chevrotain/gast": "11.0.3", + "@chevrotain/regexp-to-ast": "11.0.3", + "@chevrotain/types": "11.0.3", + "@chevrotain/utils": "11.0.3", + "lodash-es": "4.17.21" + } + }, + "node_modules/chevrotain-allstar": { + "version": "0.3.1", + "resolved": "https://registry.npmjs.org/chevrotain-allstar/-/chevrotain-allstar-0.3.1.tgz", + "integrity": "sha512-b7g+y9A0v4mxCW1qUhf3BSVPg+/NvGErk/dOkrDaHA0nQIQGAtrOjlX//9OQtRlSCy+x9rfB5N8yC71lH1nvMw==", + "dependencies": { + "lodash-es": "^4.17.21" + }, + "peerDependencies": { + "chevrotain": "^11.0.0" + } + }, "node_modules/child_process": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/child_process/-/child_process-1.0.2.tgz", @@ -16129,9 +17145,9 @@ } }, "node_modules/chromium-bidi": { - "version": "0.6.4", - "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.6.4.tgz", - "integrity": "sha512-8zoq6ogmhQQkAKZVKO2ObFTl4uOkqoX1PlKQX3hZQ5E9cbUotcAb7h4pTNVAGGv8Z36PF3CtdOriEp/Rz82JqQ==", + "version": "0.6.5", + "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.6.5.tgz", + "integrity": "sha512-RuLrmzYrxSb0s9SgpB+QN5jJucPduZQ/9SIe76MDxYJuecPW5mxMdacJ1f4EtgiV+R0p3sCkznTMvH0MPGFqjA==", "dependencies": { "mitt": "3.0.1", "urlpattern-polyfill": "10.0.0", @@ -16349,15 +17365,15 @@ } }, "node_modules/cohere-ai": { - "version": "7.11.0", - "resolved": "https://registry.npmjs.org/cohere-ai/-/cohere-ai-7.11.0.tgz", - "integrity": "sha512-rtBx1J4ifcg+zbl8o4yN7pzJ0akczj68wmMue1beqelVUEjbcw3uwP3dQ3hR14y41J3CDnCx+eYAh0oizx04fQ==", + "version": "7.13.0", + "resolved": "https://registry.npmjs.org/cohere-ai/-/cohere-ai-7.13.0.tgz", + "integrity": "sha512-/VTqq2dW7YkQEfeBwEmckAHorQuw1exnfrO3orsixVXASr71oF3TL0w/xi9ZVN9xsoYpXZyVaiD8GBxLEiGJ7Q==", "dependencies": { "@aws-sdk/client-sagemaker": "^3.583.0", "@aws-sdk/credential-providers": "^3.583.0", "@aws-sdk/protocol-http": "^3.374.0", "@aws-sdk/signature-v4": "^3.374.0", - "form-data": "4.0.0", + "form-data": "^4.0.0", "form-data-encoder": "^4.0.2", "formdata-node": "^6.0.3", "js-base64": "3.7.2", @@ -16367,14 +17383,6 @@ "url-join": "4.0.1" } }, - "node_modules/cohere-ai/node_modules/formdata-node": { - "version": "6.0.3", - "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-6.0.3.tgz", - "integrity": "sha512-8e1++BCiTzUno9v5IZ2J6bv4RU+3UKDmqWUQD0MIMVCd9AdhWkO1gw57oo1mNEX1dMq2EGI+FbWz4B92pscSQg==", - "engines": { - "node": ">= 18" - } - }, "node_modules/cohere-ai/node_modules/qs": { "version": "6.11.2", "resolved": "https://registry.npmjs.org/qs/-/qs-6.11.2.tgz", @@ -16970,68 +17978,20 @@ } }, "node_modules/css-select": { - "version": "4.3.0", - "resolved": "https://registry.npmjs.org/css-select/-/css-select-4.3.0.tgz", - "integrity": "sha512-wPpOYtnsVontu2mODhA19JrqWxNsfdatRKd64kmpRbQgh1KtItko5sTnEpPdpSaJszTOhEMlF/RPz28qj4HqhQ==", + "version": "5.1.0", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-5.1.0.tgz", + "integrity": "sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==", "dependencies": { "boolbase": "^1.0.0", - "css-what": "^6.0.1", - "domhandler": "^4.3.1", - "domutils": "^2.8.0", + "css-what": "^6.1.0", + "domhandler": "^5.0.2", + "domutils": "^3.0.1", "nth-check": "^2.0.1" }, "funding": { "url": "https://github.com/sponsors/fb55" } }, - "node_modules/css-select/node_modules/dom-serializer": { - "version": "1.4.1", - "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-1.4.1.tgz", - "integrity": "sha512-VHwB3KfrcOOkelEG2ZOfxqLZdfkil8PtJi4P8N2MMXucZq2yLp75ClViUlOVwyoHEDjYU433Aq+5zWP61+RGag==", - "dependencies": { - "domelementtype": "^2.0.1", - "domhandler": "^4.2.0", - "entities": "^2.0.0" - }, - "funding": { - "url": "https://github.com/cheeriojs/dom-serializer?sponsor=1" - } - }, - "node_modules/css-select/node_modules/domhandler": { - "version": "4.3.1", - "resolved": "https://registry.npmjs.org/domhandler/-/domhandler-4.3.1.tgz", - "integrity": "sha512-GrwoxYN+uWlzO8uhUXRl0P+kHE4GtVPfYzVLcUxPL7KNdHKj66vvlhiweIHqYYXWlw+T8iLMp42Lm67ghw4WMQ==", - "dependencies": { - "domelementtype": "^2.2.0" - }, - "engines": { - "node": ">= 4" - }, - "funding": { - "url": "https://github.com/fb55/domhandler?sponsor=1" - } - }, - "node_modules/css-select/node_modules/domutils": { - "version": "2.8.0", - "resolved": "https://registry.npmjs.org/domutils/-/domutils-2.8.0.tgz", - "integrity": "sha512-w96Cjofp72M5IIhpjgobBimYEfoPjx1Vx0BSX9P30WBdZW2WIKU0T1Bd0kz2eNZ9ikjKgHbEyKx8BB6H1L3h3A==", - "dependencies": { - "dom-serializer": "^1.0.1", - "domelementtype": "^2.2.0", - "domhandler": "^4.2.0" - }, - "funding": { - "url": "https://github.com/fb55/domutils?sponsor=1" - } - }, - "node_modules/css-select/node_modules/entities": { - "version": "2.2.0", - "resolved": "https://registry.npmjs.org/entities/-/entities-2.2.0.tgz", - "integrity": "sha512-p92if5Nz619I0w+akJrLZH0MX0Pb5DX39XOwQTtXSdQQOaYH03S1uIQp4mhOZtAXrxq4ViO67YTiLBo2638o9A==", - "funding": { - "url": "https://github.com/fb55/entities?sponsor=1" - } - }, "node_modules/css-to-react-native": { "version": "3.2.0", "resolved": "https://registry.npmjs.org/css-to-react-native/-/css-to-react-native-3.2.0.tgz", @@ -17636,11 +18596,11 @@ } }, "node_modules/data-uri-to-buffer": { - "version": "3.0.1", - "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-3.0.1.tgz", - "integrity": "sha512-WboRycPNsVw3B3TL559F7kuBUM4d8CgMEvk6xEJlOp7OBPjt6G7z8WMWlD2rOFZLk6OYfFIUGsCOWzcQH9K2og==", + "version": "6.0.2", + "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-6.0.2.tgz", + "integrity": "sha512-7hvf7/GW8e86rW0ptuwS3OcBGDjIi6SZva7hCyWC0yYry2cOPmLIjXAUHI6DK2HsnwJd9ifmt57i8eV2n4YNpw==", "engines": { - "node": ">= 6" + "node": ">= 14" } }, "node_modules/data-urls": { @@ -18090,9 +19050,9 @@ } }, "node_modules/devtools-protocol": { - "version": "0.0.1312386", - "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1312386.tgz", - "integrity": "sha512-DPnhUXvmvKT2dFA/j7B+riVLUt9Q6RKJlcppojL5CoRywJJKLDYnRlw0gTFKfgDPHP5E04UoB71SxoJlVZy8FA==" + "version": "0.0.1330662", + "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1330662.tgz", + "integrity": "sha512-pzh6YQ8zZfz3iKlCvgzVCu22NdpZ8hNmwU6WnQjNVquh0A9iVosPtNLWDwaWVGyrntQlltPFztTMK5Cg6lfCuw==" }, "node_modules/dezalgo": { "version": "1.0.4", @@ -18267,6 +19227,11 @@ "safer-buffer": "^2.1.0" } }, + "node_modules/ecc-jsbn/node_modules/jsbn": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", + "integrity": "sha512-UVU9dibq2JcFWxQPA6KCqj5O42VOmAY3zQUfEKxU0KpTGXwNoCjkX1e13eHNvw/xPynt6pU0rZ1htjWTNTSXsg==" + }, "node_modules/ecdsa-sig-formatter": { "version": "1.0.11", "resolved": "https://registry.npmjs.org/ecdsa-sig-formatter/-/ecdsa-sig-formatter-1.0.11.tgz", @@ -18281,9 +19246,9 @@ "integrity": "sha512-WMwm9LhRUo+WUaRN+vRuETqG89IgZphVSNkdFgeb6sS/E4OrDIN7t48CAewSHXc6C8lefD8KKfr5vY61brQlow==" }, "node_modules/electron-to-chromium": { - "version": "1.5.13", - "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.13.tgz", - "integrity": "sha512-lbBcvtIJ4J6sS4tb5TLp1b4LyfCdMkwStzXPyAgVgTRAsep4bvrAGaBOP7ZJtQMNJpSQ9SqG4brWOroNaQtm7Q==" + "version": "1.5.14", + "resolved": "https://registry.npmjs.org/electron-to-chromium/-/electron-to-chromium-1.5.14.tgz", + "integrity": "sha512-bEfPECb3fJ15eaDnu9LEJ2vPGD6W1vt7vZleSVyFhYuMIKm3vz/g9lt7IvEzgdwj58RjbPKUF2rXTCN/UW47tQ==" }, "node_modules/emoji-regex": { "version": "9.2.2", @@ -18883,6 +19848,21 @@ "webpack": "^5.0.0" } }, + "node_modules/eslint/node_modules/ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dependencies": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, "node_modules/eslint/node_modules/ansi-styles": { "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", @@ -18945,6 +19925,11 @@ "node": ">=8" } }, + "node_modules/eslint/node_modules/json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==" + }, "node_modules/eslint/node_modules/minimatch": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", @@ -19507,6 +20492,34 @@ "webpack": "^4.0.0 || ^5.0.0" } }, + "node_modules/file-loader/node_modules/ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dependencies": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/file-loader/node_modules/ajv-keywords": { + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-3.5.2.tgz", + "integrity": "sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ==", + "peerDependencies": { + "ajv": "^6.9.1" + } + }, + "node_modules/file-loader/node_modules/json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==" + }, "node_modules/file-loader/node_modules/schema-utils": { "version": "3.3.0", "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-3.3.0.tgz", @@ -19820,6 +20833,29 @@ "webpack": "^5.11.0" } }, + "node_modules/fork-ts-checker-webpack-plugin/node_modules/ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dependencies": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/fork-ts-checker-webpack-plugin/node_modules/ajv-keywords": { + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-3.5.2.tgz", + "integrity": "sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ==", + "peerDependencies": { + "ajv": "^6.9.1" + } + }, "node_modules/fork-ts-checker-webpack-plugin/node_modules/ansi-styles": { "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", @@ -19923,6 +20959,11 @@ "js-yaml": "bin/js-yaml.js" } }, + "node_modules/fork-ts-checker-webpack-plugin/node_modules/json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==" + }, "node_modules/fork-ts-checker-webpack-plugin/node_modules/minimatch": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz", @@ -19995,23 +21036,11 @@ } }, "node_modules/formdata-node": { - "version": "4.4.1", - "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz", - "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==", - "dependencies": { - "node-domexception": "1.0.0", - "web-streams-polyfill": "4.0.0-beta.3" - }, - "engines": { - "node": ">= 12.20" - } - }, - "node_modules/formdata-node/node_modules/web-streams-polyfill": { - "version": "4.0.0-beta.3", - "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz", - "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==", + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-6.0.3.tgz", + "integrity": "sha512-8e1++BCiTzUno9v5IZ2J6bv4RU+3UKDmqWUQD0MIMVCd9AdhWkO1gw57oo1mNEX1dMq2EGI+FbWz4B92pscSQg==", "engines": { - "node": ">= 14" + "node": ">= 18" } }, "node_modules/formidable": { @@ -20171,6 +21200,19 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/fuse.js": { + "version": "7.0.0", + "resolved": "https://registry.npmjs.org/fuse.js/-/fuse.js-7.0.0.tgz", + "integrity": "sha512-14F4hBIxqKvD4Zz/XjDc3y94mNZN6pRv3U13Udo0lNLCWRBUsrMv2xwcF/y/Z5sV6+FQW+/ow68cHpm4sunt8Q==", + "engines": { + "node": ">=10" + } + }, + "node_modules/fuzzy-search": { + "version": "3.2.1", + "resolved": "https://registry.npmjs.org/fuzzy-search/-/fuzzy-search-3.2.1.tgz", + "integrity": "sha512-vAcPiyomt1ioKAsAL2uxSABHJ4Ju/e4UeDM+g1OlR0vV4YhLGMNsdLNvZTpEDY4JCSt0E4hASCNM5t2ETtsbyg==" + }, "node_modules/gauge": { "version": "3.0.2", "resolved": "https://registry.npmjs.org/gauge/-/gauge-3.0.2.tgz", @@ -20449,16 +21491,31 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/get-tsconfig": { - "version": "4.7.5", - "resolved": "https://registry.npmjs.org/get-tsconfig/-/get-tsconfig-4.7.5.tgz", - "integrity": "sha512-ZCuZCnlqNzjb4QprAzXKdpp/gh6KTxSJuw3IBsPnV/7fV4NxC9ckB+vPTt8w7fJA0TaSD7c55BR47JD6MEDyDw==", - "dev": true, + "node_modules/get-uri": { + "version": "6.0.3", + "resolved": "https://registry.npmjs.org/get-uri/-/get-uri-6.0.3.tgz", + "integrity": "sha512-BzUrJBS9EcUb4cFol8r4W3v1cPsSyajLSthNkz5BxbpDcHN5tIrM10E2eNvfnvBn3DaT3DUgx0OpsBKkaOpanw==", "dependencies": { - "resolve-pkg-maps": "^1.0.0" + "basic-ftp": "^5.0.2", + "data-uri-to-buffer": "^6.0.2", + "debug": "^4.3.4", + "fs-extra": "^11.2.0" }, - "funding": { - "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" + "engines": { + "node": ">= 14" + } + }, + "node_modules/get-uri/node_modules/fs-extra": { + "version": "11.2.0", + "resolved": "https://registry.npmjs.org/fs-extra/-/fs-extra-11.2.0.tgz", + "integrity": "sha512-PmDi3uwK5nFuXh7XDTlVnS17xJS7vW36is2+w3xcv8SVxiB4NyATf4ctkVY5bkSjX0Y4nbvZCq1/EjtEyr9ktw==", + "dependencies": { + "graceful-fs": "^4.2.0", + "jsonfile": "^6.0.1", + "universalify": "^2.0.0" + }, + "engines": { + "node": ">=14.14" } }, "node_modules/get-value": { @@ -20788,6 +21845,26 @@ "node": ">=6" } }, + "node_modules/har-validator/node_modules/ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dependencies": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/har-validator/node_modules/json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==" + }, "node_modules/has-bigints": { "version": "1.0.2", "resolved": "https://registry.npmjs.org/has-bigints/-/has-bigints-1.0.2.tgz", @@ -21236,6 +22313,24 @@ "node": ">=14" } }, + "node_modules/html-to-text/node_modules/htmlparser2": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.2.tgz", + "integrity": "sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1", + "entities": "^4.4.0" + } + }, "node_modules/html-url-attributes": { "version": "3.0.0", "resolved": "https://registry.npmjs.org/html-url-attributes/-/html-url-attributes-3.0.0.tgz", @@ -21286,9 +22381,9 @@ } }, "node_modules/htmlparser2": { - "version": "8.0.2", - "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.2.tgz", - "integrity": "sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==", + "version": "9.1.0", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-9.1.0.tgz", + "integrity": "sha512-5zfg6mHUoaer/97TxnGpxmbR7zJtPwIYFMZ/H5ucTlPZhKvtum05yiPK3Mgai3a0DyVxv7qYqoweaEd2nrYQzQ==", "funding": [ "https://github.com/fb55/htmlparser2?sponsor=1", { @@ -21299,8 +22394,8 @@ "dependencies": { "domelementtype": "^2.3.0", "domhandler": "^5.0.3", - "domutils": "^3.0.1", - "entities": "^4.4.0" + "domutils": "^3.1.0", + "entities": "^4.5.0" } }, "node_modules/http-browserify": { @@ -22064,11 +23159,6 @@ "node": ">= 12" } }, - "node_modules/ip-address/node_modules/jsbn": { - "version": "1.1.0", - "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-1.1.0.tgz", - "integrity": "sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A==" - }, "node_modules/ip-address/node_modules/sprintf-js": { "version": "1.1.3", "resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.1.3.tgz", @@ -22948,9 +24038,9 @@ "integrity": "sha512-NnRs6dsyqUXejqk/yv2aiXlAvOs56sLkX6nUdeaNezI5LFFLlsZjOThmwnrcwh5ZZRwZlCMnVAY3CvhIhoVEKQ==" }, "node_modules/js-datepicker": { - "version": "5.18.2", - "resolved": "https://registry.npmjs.org/js-datepicker/-/js-datepicker-5.18.2.tgz", - "integrity": "sha512-lBh5tIwb1ewDVlNHuwczoLiMhTFWnGYuHSA7ONAPKyMeofIFZDFGeOZ71UTY/Mk2evJJt+L66ec/RiXUy8XzEg==" + "version": "5.18.3", + "resolved": "https://registry.npmjs.org/js-datepicker/-/js-datepicker-5.18.3.tgz", + "integrity": "sha512-M1+UKeRkLT0p04JhnSpNugveFSfy6VfEeNTyAyFef6unKIqQx4oQqx66lsBi0FRKHb4HtQsmJ8V4L4SPj1lbRQ==" }, "node_modules/js-stringify": { "version": "1.0.2", @@ -22975,14 +24065,14 @@ } }, "node_modules/jsbn": { - "version": "0.1.1", - "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", - "integrity": "sha512-UVU9dibq2JcFWxQPA6KCqj5O42VOmAY3zQUfEKxU0KpTGXwNoCjkX1e13eHNvw/xPynt6pU0rZ1htjWTNTSXsg==" + "version": "1.1.0", + "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-1.1.0.tgz", + "integrity": "sha512-4bYVV3aAMtDTTu4+xsDYa6sy9GyJ69/amsu9sYF2zqjiEoZA5xJi3BrfX3uY+/IekIu7MwdObdbDWpoZdBv3/A==" }, "node_modules/jsdom": { - "version": "24.1.0", - "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-24.1.0.tgz", - "integrity": "sha512-6gpM7pRXCwIOKxX47cgOyvyQDN/Eh0f1MeKySBV2xGdKtqJBLj8P25eY3EVCWo2mglDDzozR2r2MW4T+JiNUZA==", + "version": "25.0.0", + "resolved": "https://registry.npmjs.org/jsdom/-/jsdom-25.0.0.tgz", + "integrity": "sha512-OhoFVT59T7aEq75TVw9xxEfkXgacpqAhQaYgP9y/fDqWQCMB/b1H66RfmPm/MaeaAIU9nDwMOVTlPN51+ao6CQ==", "dev": true, "dependencies": { "cssstyle": "^4.0.1", @@ -23054,9 +24144,9 @@ "integrity": "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA==" }, "node_modules/json-schema-traverse": { - "version": "0.4.1", - "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", - "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==" + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", + "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==" }, "node_modules/json-stable-stringify-without-jsonify": { "version": "1.0.1", @@ -23330,6 +24420,14 @@ } } }, + "node_modules/ky-universal/node_modules/data-uri-to-buffer": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/data-uri-to-buffer/-/data-uri-to-buffer-3.0.1.tgz", + "integrity": "sha512-WboRycPNsVw3B3TL559F7kuBUM4d8CgMEvk6xEJlOp7OBPjt6G7z8WMWlD2rOFZLk6OYfFIUGsCOWzcQH9K2og==", + "engines": { + "node": ">= 6" + } + }, "node_modules/ky-universal/node_modules/node-fetch": { "version": "3.0.0-beta.9", "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-3.0.0-beta.9.tgz", @@ -23362,9 +24460,9 @@ } }, "node_modules/launch-editor": { - "version": "2.8.2", - "resolved": "https://registry.npmjs.org/launch-editor/-/launch-editor-2.8.2.tgz", - "integrity": "sha512-eF5slEUZXmi6WvFzI3dYcv+hA24/iKnROf24HztcURJpSz9RBmBgz5cNCVOeguouf1llrwy6Yctl4C4HM+xI8g==", + "version": "2.9.1", + "resolved": "https://registry.npmjs.org/launch-editor/-/launch-editor-2.9.1.tgz", + "integrity": "sha512-Gcnl4Bd+hRO9P9icCP/RVVT2o8SFlPXofuCxvA2SaZuH45whSvf5p8x5oih5ftLiVhEI4sp5xDY+R+b3zJBh5w==", "dev": true, "dependencies": { "picocolors": "^1.0.0", @@ -23870,6 +24968,28 @@ "url": "https://github.com/sponsors/wooorm" } }, + "node_modules/markdown-to-jsx": { + "version": "7.5.0", + "resolved": "https://registry.npmjs.org/markdown-to-jsx/-/markdown-to-jsx-7.5.0.tgz", + "integrity": "sha512-RrBNcMHiFPcz/iqIj0n3wclzHXjwS7mzjBNWecKKVhNTIxQepIix6Il/wZCn2Cg5Y1ow2Qi84+eJrryFRWBEWw==", + "engines": { + "node": ">= 10" + }, + "peerDependencies": { + "react": ">= 0.14.0" + } + }, + "node_modules/marked": { + "version": "13.0.3", + "resolved": "https://registry.npmjs.org/marked/-/marked-13.0.3.tgz", + "integrity": "sha512-rqRix3/TWzE9rIoFGIn8JmsVfhiuC8VIQ8IdX5TfzmeBucdY05/0UlzKaw0eVtpcN/OdVFpBk7CjKGo9iHJ/zA==", + "bin": { + "marked": "bin/marked.js" + }, + "engines": { + "node": ">= 18" + } + }, "node_modules/material-colors": { "version": "1.2.6", "resolved": "https://registry.npmjs.org/material-colors/-/material-colors-1.2.6.tgz", @@ -28324,11 +29444,31 @@ "resolved": "https://registry.npmjs.org/form-data-encoder/-/form-data-encoder-1.7.2.tgz", "integrity": "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A==" }, + "node_modules/openai/node_modules/formdata-node": { + "version": "4.4.1", + "resolved": "https://registry.npmjs.org/formdata-node/-/formdata-node-4.4.1.tgz", + "integrity": "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ==", + "dependencies": { + "node-domexception": "1.0.0", + "web-streams-polyfill": "4.0.0-beta.3" + }, + "engines": { + "node": ">= 12.20" + } + }, "node_modules/openai/node_modules/undici-types": { "version": "5.26.5", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-5.26.5.tgz", "integrity": "sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==" }, + "node_modules/openai/node_modules/web-streams-polyfill": { + "version": "4.0.0-beta.3", + "resolved": "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-4.0.0-beta.3.tgz", + "integrity": "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug==", + "engines": { + "node": ">= 14" + } + }, "node_modules/opentype.js": { "version": "1.3.4", "resolved": "https://registry.npmjs.org/opentype.js/-/opentype.js-1.3.4.tgz", @@ -29341,7 +30481,6 @@ "version": "2.0.3", "resolved": "https://registry.npmjs.org/progress/-/progress-2.0.3.tgz", "integrity": "sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==", - "dev": true, "engines": { "node": ">=0.4.0" } @@ -29674,16 +30813,16 @@ } }, "node_modules/puppeteer": { - "version": "23.1.0", - "resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-23.1.0.tgz", - "integrity": "sha512-m+CyicDlGN1AVUeOsCa6/+KQydJzxfsPowL7fQy+VGNeaWafB0m8G5aGfXdfZztKMxzCsdz7KNNzbJPeG9wwFw==", + "version": "23.3.0", + "resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-23.3.0.tgz", + "integrity": "sha512-e2jY8cdWSUGsrLxqGm3hIbJq/UIk1uOY8XY7SM51leXkH7shrIyE91lK90Q9byX6tte+cyL3HKqlWBEd6TjWTA==", "hasInstallScript": true, "dependencies": { - "@puppeteer/browsers": "2.3.1", - "chromium-bidi": "0.6.4", + "@puppeteer/browsers": "2.4.0", + "chromium-bidi": "0.6.5", "cosmiconfig": "^9.0.0", - "devtools-protocol": "0.0.1312386", - "puppeteer-core": "23.1.0", + "devtools-protocol": "0.0.1330662", + "puppeteer-core": "23.3.0", "typed-query-selector": "^2.12.0" }, "bin": { @@ -29694,14 +30833,14 @@ } }, "node_modules/puppeteer-core": { - "version": "23.1.0", - "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-23.1.0.tgz", - "integrity": "sha512-SvAsu+xnLN2FMXE/59bp3s3WXp8ewqUGzVV4AQtml/2xmsciZnU/bXcCW+eETHPWQ6Agg2vTI7QzWXPpEARK2g==", + "version": "23.3.0", + "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-23.3.0.tgz", + "integrity": "sha512-sB2SsVMFs4gKad5OCdv6w5vocvtEUrRl0zQqSyRPbo/cj1Ktbarmhxy02Zyb9R9HrssBcJDZbkrvBnbaesPyYg==", "dependencies": { - "@puppeteer/browsers": "2.3.1", - "chromium-bidi": "0.6.4", + "@puppeteer/browsers": "2.4.0", + "chromium-bidi": "0.6.5", "debug": "^4.3.6", - "devtools-protocol": "0.0.1312386", + "devtools-protocol": "0.0.1330662", "typed-query-selector": "^2.12.0", "ws": "^8.18.0" }, @@ -29909,6 +31048,34 @@ "webpack": "^4.0.0 || ^5.0.0" } }, + "node_modules/raw-loader/node_modules/ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dependencies": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/raw-loader/node_modules/ajv-keywords": { + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-3.5.2.tgz", + "integrity": "sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ==", + "peerDependencies": { + "ajv": "^6.9.1" + } + }, + "node_modules/raw-loader/node_modules/json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==" + }, "node_modules/raw-loader/node_modules/schema-utils": { "version": "3.3.0", "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-3.3.0.tgz", @@ -30891,6 +32058,21 @@ "strip-ansi": "^6.0.1" } }, + "node_modules/renderkid/node_modules/css-select": { + "version": "4.3.0", + "resolved": "https://registry.npmjs.org/css-select/-/css-select-4.3.0.tgz", + "integrity": "sha512-wPpOYtnsVontu2mODhA19JrqWxNsfdatRKd64kmpRbQgh1KtItko5sTnEpPdpSaJszTOhEMlF/RPz28qj4HqhQ==", + "dependencies": { + "boolbase": "^1.0.0", + "css-what": "^6.0.1", + "domhandler": "^4.3.1", + "domutils": "^2.8.0", + "nth-check": "^2.0.1" + }, + "funding": { + "url": "https://github.com/sponsors/fb55" + } + }, "node_modules/renderkid/node_modules/dom-serializer": { "version": "1.4.1", "resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-1.4.1.tgz", @@ -31522,41 +32704,10 @@ "url": "https://opencollective.com/webpack" } }, - "node_modules/schema-utils/node_modules/ajv": { - "version": "8.17.1", - "resolved": "https://registry.npmjs.org/ajv/-/ajv-8.17.1.tgz", - "integrity": "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g==", - "dependencies": { - "fast-deep-equal": "^3.1.3", - "fast-uri": "^3.0.1", - "json-schema-traverse": "^1.0.0", - "require-from-string": "^2.0.2" - }, - "funding": { - "type": "github", - "url": "https://github.com/sponsors/epoberezkin" - } - }, - "node_modules/schema-utils/node_modules/ajv-keywords": { - "version": "5.1.0", - "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-5.1.0.tgz", - "integrity": "sha512-YCS/JNFAUyr5vAuhk1DWm1CBxRHW9LbJ2ozWeemrIqpbsqKjHVxYPyi5GC0rjZIT5JxJ3virVTS8wk4i/Z+krw==", - "dependencies": { - "fast-deep-equal": "^3.1.3" - }, - "peerDependencies": { - "ajv": "^8.8.2" - } - }, - "node_modules/schema-utils/node_modules/json-schema-traverse": { - "version": "1.0.0", - "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz", - "integrity": "sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug==" - }, "node_modules/scrapfly-sdk": { - "version": "0.6.4", - "resolved": "https://registry.npmjs.org/scrapfly-sdk/-/scrapfly-sdk-0.6.4.tgz", - "integrity": "sha512-hP7WK+jhcANKa2+fEpvLKee5wNRgQlw1xCPfKo/x8CTVccAOKaUYm2P6OqLHg5mINIBHDSHhOXjBRpuKo/Cd/w==", + "version": "0.6.5", + "resolved": "https://registry.npmjs.org/scrapfly-sdk/-/scrapfly-sdk-0.6.5.tgz", + "integrity": "sha512-JJNIxBbVlG0zcmb0dUknJGprFNHEj/8+e9XxlKG4OKKg2j5voKhCpxyhJ43oivzuiXIsWQnydEtYQoICtZC06g==", "dependencies": { "@deno/shim-deno": "~0.18.0", "cheerio": "1.0.0-rc.12" @@ -31582,6 +32733,24 @@ "url": "https://github.com/cheeriojs/cheerio?sponsor=1" } }, + "node_modules/scrapfly-sdk/node_modules/htmlparser2": { + "version": "8.0.2", + "resolved": "https://registry.npmjs.org/htmlparser2/-/htmlparser2-8.0.2.tgz", + "integrity": "sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==", + "funding": [ + "https://github.com/fb55/htmlparser2?sponsor=1", + { + "type": "github", + "url": "https://github.com/sponsors/fb55" + } + ], + "dependencies": { + "domelementtype": "^2.3.0", + "domhandler": "^5.0.3", + "domutils": "^3.0.1", + "entities": "^4.4.0" + } + }, "node_modules/scss-loader": { "version": "0.0.1", "resolved": "https://registry.npmjs.org/scss-loader/-/scss-loader-0.0.1.tgz", @@ -32102,35 +33271,13 @@ "resolved": "https://registry.npmjs.org/skmeans/-/skmeans-0.9.7.tgz", "integrity": "sha512-hNj1/oZ7ygsfmPZ7ZfN5MUBRoGg1gtpnImuJBgLO0ljQ67DtJuiQaiYdS4lUA6s0KCwnPhGivtC/WRwIZLkHyg==" }, - "node_modules/slash": { - "version": "3.0.0", - "resolved": "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz", - "integrity": "sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q==", - "engines": { - "node": ">=8" - } - }, - "node_modules/slice-ansi": { - "version": "2.1.0", - "resolved": "https://registry.npmjs.org/slice-ansi/-/slice-ansi-2.1.0.tgz", - "integrity": "sha512-Qu+VC3EwYLldKa1fCxuuvULvSJOKEgk9pi8dZeCVK7TqBfUNTH4sFkk4joj8afVSfAYgJoSOetjx9QWOJ5mYoQ==", - "dev": true, - "dependencies": { - "ansi-styles": "^3.2.0", - "astral-regex": "^1.0.0", - "is-fullwidth-code-point": "^2.0.0" - }, - "engines": { - "node": ">=6" - } - }, - "node_modules/slice-ansi/node_modules/is-fullwidth-code-point": { - "version": "2.0.0", - "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-2.0.0.tgz", - "integrity": "sha512-VHskAKYM8RfSFXwee5t5cbN5PZeq1Wrh6qd5bkyiXIf6UQcN6w/A0eXM9r6t8d+GYOh+o6ZhiEnb88LN/Y8m2w==", - "dev": true, + "node_modules/smart-buffer": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/smart-buffer/-/smart-buffer-4.2.0.tgz", + "integrity": "sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==", "engines": { - "node": ">=4" + "node": ">= 6.0.0", + "npm": ">= 3.0.0" } }, "node_modules/socket.io": { @@ -32479,6 +33626,11 @@ "node": ">=0.10.0" } }, + "node_modules/sshpk/node_modules/jsbn": { + "version": "0.1.1", + "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", + "integrity": "sha512-UVU9dibq2JcFWxQPA6KCqj5O42VOmAY3zQUfEKxU0KpTGXwNoCjkX1e13eHNvw/xPynt6pU0rZ1htjWTNTSXsg==" + }, "node_modules/standard-error": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/standard-error/-/standard-error-1.1.0.tgz", @@ -33128,6 +34280,29 @@ } } }, + "node_modules/terser-webpack-plugin/node_modules/ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dependencies": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/terser-webpack-plugin/node_modules/ajv-keywords": { + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-3.5.2.tgz", + "integrity": "sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ==", + "peerDependencies": { + "ajv": "^6.9.1" + } + }, "node_modules/terser-webpack-plugin/node_modules/has-flag": { "version": "4.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-4.0.0.tgz", @@ -33149,6 +34324,11 @@ "node": ">= 10.13.0" } }, + "node_modules/terser-webpack-plugin/node_modules/json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==" + }, "node_modules/terser-webpack-plugin/node_modules/schema-utils": { "version": "3.3.0", "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-3.3.0.tgz", @@ -33222,8 +34402,7 @@ "node_modules/through": { "version": "2.3.8", "resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz", - "integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==", - "dev": true + "integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==" }, "node_modules/thunky": { "version": "1.1.0", @@ -34241,15 +35420,38 @@ "through": "^2.3.8" } }, + "node_modules/unbzip2-stream/node_modules/buffer": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz", + "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "dependencies": { + "base64-js": "^1.3.1", + "ieee754": "^1.1.13" + } + }, "node_modules/undefsafe": { "version": "2.0.5", "resolved": "https://registry.npmjs.org/undefsafe/-/undefsafe-2.0.5.tgz", "integrity": "sha512-WxONCrssBM8TSPRqN5EmsjVrsv4A8X12J4ArBiiayv3DyyG3ZlIg6yysuuSYdZsVz3TKcTg2fd//Ujd4CHV1iA==" }, "node_modules/undici": { - "version": "6.19.7", - "resolved": "https://registry.npmjs.org/undici/-/undici-6.19.7.tgz", - "integrity": "sha512-HR3W/bMGPSr90i8AAp2C4DM3wChFdJPLrWYpIS++LxS8K+W535qftjt+4MyjNYHeWabMj1nvtmLIi7l++iq91A==", + "version": "6.19.8", + "resolved": "https://registry.npmjs.org/undici/-/undici-6.19.8.tgz", + "integrity": "sha512-U8uCCl2x9TK3WANvmBavymRzxbfFYG+tAu+fgx3zxQy3qdagQqBLwJVrdyO1TBfUXvfKveMKJZhpvUYoOjM+4g==", "engines": { "node": ">=18.17" } @@ -34561,6 +35763,34 @@ } } }, + "node_modules/url-loader/node_modules/ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dependencies": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/url-loader/node_modules/ajv-keywords": { + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-3.5.2.tgz", + "integrity": "sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ==", + "peerDependencies": { + "ajv": "^6.9.1" + } + }, + "node_modules/url-loader/node_modules/json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==" + }, "node_modules/url-loader/node_modules/schema-utils": { "version": "3.3.0", "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-3.3.0.tgz", @@ -35222,6 +36452,29 @@ "node": ">=10.13.0" } }, + "node_modules/webpack/node_modules/ajv": { + "version": "6.12.6", + "resolved": "https://registry.npmjs.org/ajv/-/ajv-6.12.6.tgz", + "integrity": "sha512-j3fVLgvTo527anyYyJOGTYJbG+vnnQYvE0m5mmkc1TK+nxAppkCLMIL0aZ4dblVCNoGShhm+kzE4ZUykBoMg4g==", + "dependencies": { + "fast-deep-equal": "^3.1.1", + "fast-json-stable-stringify": "^2.0.0", + "json-schema-traverse": "^0.4.1", + "uri-js": "^4.2.2" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/epoberezkin" + } + }, + "node_modules/webpack/node_modules/ajv-keywords": { + "version": "3.5.2", + "resolved": "https://registry.npmjs.org/ajv-keywords/-/ajv-keywords-3.5.2.tgz", + "integrity": "sha512-5p6WTN0DdTGVQk6VjcEju19IgaHudalcfabD7yhDGeA6bcQnmL+CpveLJq/3hvfwd1aof6L386Ougkx6RfyMIQ==", + "peerDependencies": { + "ajv": "^6.9.1" + } + }, "node_modules/webpack/node_modules/eslint-scope": { "version": "5.1.1", "resolved": "https://registry.npmjs.org/eslint-scope/-/eslint-scope-5.1.1.tgz", @@ -35242,6 +36495,11 @@ "node": ">=4.0" } }, + "node_modules/webpack/node_modules/json-schema-traverse": { + "version": "0.4.1", + "resolved": "https://registry.npmjs.org/json-schema-traverse/-/json-schema-traverse-0.4.1.tgz", + "integrity": "sha512-xbbCH5dCYU5T8LcEhhuh7HJ88HXuW3qsI3Y0zOZFKfZEHcpWiHU/Jxzk629Brsab/mMiHQti9wMP+845RPe3Vg==" + }, "node_modules/webpack/node_modules/schema-utils": { "version": "3.3.0", "resolved": "https://registry.npmjs.org/schema-utils/-/schema-utils-3.3.0.tgz", diff --git a/package.json b/package.json index ead8cdf06..93204f962 100644 --- a/package.json +++ b/package.json @@ -41,6 +41,7 @@ "@types/express": "^4.17.21", "@types/express-session": "^1.17.10", "@types/file-saver": "^2.0.7", + "@types/fuzzy-search": "^2.1.5", "@types/howler": "^2.2.11", "@types/html-to-text": "^9.0.4", "@types/jquery": "^3.5.29", @@ -201,6 +202,8 @@ "formidable": "3.5.1", "fullcalendar": "^6.1.15", "function-plot": "^1.23.3", + "fuse.js": "^7.0.0", + "fuzzy-search": "^3.2.1", "golden-layout": "^2.6.0", "google-auth-library": "^9.4.1", "googleapis": "^142.0.0", diff --git a/src/client/util/Scripting.ts b/src/client/util/Scripting.ts index c63d3d7cb..3e7a2df02 100644 --- a/src/client/util/Scripting.ts +++ b/src/client/util/Scripting.ts @@ -1,7 +1,7 @@ // export const ts = (window as any).ts; // import * as typescriptlib from '!!raw-loader!../../../node_modules/typescript/lib/lib.d.ts' // import * as typescriptes5 from '!!raw-loader!../../../node_modules/typescript/lib/lib.es5.d.ts' -import typescriptlib from 'type_decls.d'; +// import typescriptlib from 'type_decls.d'; import * as ts from 'typescript'; import { Doc, FieldType } from '../../fields/Doc'; import { RefField } from '../../fields/RefField'; @@ -29,7 +29,7 @@ export interface CompiledScript { readonly compiled: true; readonly originalScript: string; // eslint-disable-next-line no-use-before-define - readonly options: Readonly; + readonly options: Readonly; run(args?: { [name: string]: unknown }, onError?: (res: string) => void, errorVal?: unknown): ScriptResult; } @@ -154,7 +154,7 @@ class ScriptingCompilerHost { export type Traverser = (node: ts.Node, indentation: string) => boolean | void; export type TraverserParam = Traverser | { onEnter: Traverser; onLeave: Traverser }; export type Transformer = { - transformer: ts.TransformerFactory; + transformer: ts.TransformerFactory; getVars?: () => { [name: string]: FieldType }; }; export interface ScriptOptions { @@ -248,7 +248,7 @@ export function CompileScript(script: string, options: ScriptOptions = {}): Comp const funcScript = `(function(${paramString})${reqTypes} { ${body} })`; host.writeFile('file.ts', funcScript); - if (typecheck) host.writeFile('node_modules/typescript/lib/lib.d.ts', typescriptlib); + // if (typecheck) host.writeFile('node_modules/typescript/lib/lib.d.ts', typescriptlib); const program = ts.createProgram(['file.ts'], {}, host); const testResult = program.emit(); const outputText = host.readFile('file.js'); diff --git a/src/client/views/nodes/ChatBox/ChatBox.scss b/src/client/views/nodes/ChatBox/ChatBox.scss index 76fa05ce8..adb0663c3 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.scss +++ b/src/client/views/nodes/ChatBox/ChatBox.scss @@ -7,7 +7,6 @@ $light-text-color: #777; $border-color: #e1e8ed; $shadow-color: rgba(0, 0, 0, 0.1); $transition: all 0.3s ease; - .chat-box { display: flex; flex-direction: column; @@ -25,6 +24,7 @@ $transition: all 0.3s ease; border-radius: 12px; overflow: hidden; box-shadow: 0 4px 12px $shadow-color; + position: relative; .chat-header { background-color: $primary-color; @@ -33,6 +33,7 @@ $transition: all 0.3s ease; text-align: center; box-shadow: 0 2px 4px $shadow-color; height: fit-content; + h2 { margin: 0; font-size: 1.3em; @@ -46,6 +47,7 @@ $transition: all 0.3s ease; padding: 20px; display: flex; flex-direction: column; + gap: 10px; // Added to give space between elements &::-webkit-scrollbar { width: 6px; @@ -108,6 +110,9 @@ $transition: all 0.3s ease; border-top: 3px solid #fff; border-radius: 50%; animation: spin 1s linear infinite; + display: flex; + align-items: center; + justify-content: center; } } } @@ -121,6 +126,7 @@ $transition: all 0.3s ease; font-size: 15px; line-height: 1.5; box-shadow: 0 2px 4px $shadow-color; + word-wrap: break-word; // To handle long words &.user { align-self: flex-end; diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 383be0bb7..32ccbc35e 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -24,6 +24,8 @@ import { DocUtils } from '../../../documents/DocUtils'; import { createRef } from 'react'; import { ClientUtils } from '../../../../ClientUtils'; import { ProgressBar } from './ProgressBar'; +import { DocumentView } from '../DocumentView'; +import { Networking } from '../../../Network'; dotenv.config(); @@ -37,7 +39,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @observable currentStep: string = ''; // Track current step name @observable expandedScratchpadIndex: number | null = null; @observable inputValue: string = ''; - @observable private linked_docs_to_add: ObservableSet = observable.set(); + @observable private linked_docs_to_add: ObservableSet = observable.set(); @observable private linked_csv_files: { filename: string; id: string; text: string }[] = []; @observable private isUploadingDocs: boolean = false; private openai: OpenAI; @@ -45,7 +47,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { private vectorstore: Vectorstore; private agent: Agent; // Add the ChatBot instance private _oldWheel: HTMLDivElement | null = null; - private messagesRef: React.RefObject; + private messagesRef: React.RefObject; public static LayoutString(fieldKey: string) { return FieldView.LayoutString(ChatBox, fieldKey); @@ -184,7 +186,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }; @action - askGPT = async (event: React.FormEvent): Promise => { + askGPT = async (event: React.FormEvent): Promise => { event.preventDefault(); this.inputValue = ''; @@ -244,14 +246,43 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @action addLinkedUrlDoc = async (url: string, id: string) => { - const doc = Docs.Create.WebDocument(url); + const doc = Docs.Create.WebDocument(url, { data_useCors: true }); + + // const scriptsKey = Doc.LayoutFieldKey(doc) + '_allowScripts'; + // doc[DocData][scriptsKey] = true; + + console.log('Adding URL:', url); + + //console.log('Layout Field Key:', doc[DocData][scriptsKey]); const linkDoc = Docs.Create.LinkDocument(this.Document, doc); LinkManager.Instance.addLink(linkDoc); + let canDisplay; + + try { + // Fetch the URL content through the proxy + const { data } = await Networking.PostToServer('/proxyFetch', { url }); + + // Simulating header behavior as you can't fetch headers via the proxy + const xFrameOptions = data.headers?.['x-frame-options']; + + if (xFrameOptions && xFrameOptions.toUpperCase() === 'SAMEORIGIN') { + console.log('URL cannot be displayed in an iframe:', url); + canDisplay = false; + } else { + console.log('URL can be displayed in an iframe:', url); + console.log(StrCast(linkDoc.canDisplay)); + canDisplay = true; + } + } catch (error) { + console.error('Error fetching the URL from the server:', error); + } const chunkToAdd = { chunkId: id, chunkType: CHUNK_TYPE.URL, + url: url, + canDisplay: canDisplay, }; doc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] }); @@ -315,18 +346,26 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { break; case CHUNK_TYPE.TEXT: DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { - const firstView = Array.from(doc[DocViews])[0]; - firstView.ComponentView?.search?.(citation.direct_text); + const firstView = Array.from(doc[DocViews])[0] as DocumentView; + firstView.ComponentView?.search?.(citation.direct_text ?? ''); }); break; case CHUNK_TYPE.URL: - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { - const firstView = Array.from(doc[DocViews])[0]; - }); + console.log('Opening URL:', foundChunk.url); + console.log('Can display:', foundChunk.canDisplay); + if (!foundChunk.canDisplay) { + console.log('Opening URL in new tab:', doc.displayUrl); + window.open(StrCast(doc.displayUrl), '_blank'); + } else if (foundChunk.canDisplay) { + console.log('Opening URL in Dash:', doc.displayUrl); + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { + const firstView = Array.from(doc[DocViews])[0] as DocumentView; + }); + } break; case CHUNK_TYPE.CSV: DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { - const firstView = Array.from(doc[DocViews])[0]; + const firstView = Array.from(doc[DocViews])[0] as DocumentView; }); break; default: diff --git a/src/client/views/nodes/ChatBox/prompts.ts b/src/client/views/nodes/ChatBox/prompts.ts index 9ea86dbf4..0a356189b 100644 --- a/src/client/views/nodes/ChatBox/prompts.ts +++ b/src/client/views/nodes/ChatBox/prompts.ts @@ -30,6 +30,7 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto Ensure at the end of every final answer, you provide exactly three follow-up questions from the user's perspective—from the perspective that they are asking the question. Always follow the response structure provided in the instructions. If a tool doesn't work—or yield helpful results—after two tries, EITHER use another tool or proceed with the response and ask the user for more information or clarification or let them know you cannot answer their question and why. DO NOT CONTINUE WITH THE SAME TOOL 3 TIMES. + Use multiple tools in conjunction with each other to provide a comprehensive answer to the user's query, if needed (i.e. for the prompt "create a CSV showing historical bird migration trends", you could use the search tool and the webscraper tool to get the info, and then use the create CSV tool to create the CSV) diff --git a/src/client/views/nodes/ChatBox/tools/RAGTool.ts b/src/client/views/nodes/ChatBox/tools/RAGTool.ts index fe8deae6b..544b9daba 100644 --- a/src/client/views/nodes/ChatBox/tools/RAGTool.ts +++ b/src/client/views/nodes/ChatBox/tools/RAGTool.ts @@ -6,7 +6,7 @@ import { Networking } from '../../../../Network'; import { file } from 'jszip'; import { ChatCompletion, ChatCompletionContentPart, ChatCompletionMessageParam } from 'openai/resources'; -export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { +export class RAGTool extends BaseTool { constructor(private vectorstore: Vectorstore) { super( 'rag', @@ -55,13 +55,15 @@ export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { 5. Detailed Citation Guidelines: a. Create a unique citation for each distinct piece of information from the chunks that is used to support grounded_text. - b. ALL TEXT CITATIONS must have direct text in its element content (e.g. DIRECT TEXT HERE) that is a relevant subset of the original text chunk that is being cited specifically. - b. Ensure each citation has a unique index number. - c. Specify the correct type: "text", "image", or "table". - d. For text chunks, the content of the citation should ALWAYS have the relevant subset of the original text that the grounded_text is based on. - e. For image/table chunks, leave the citation content empty. - f. One citation can be used for multiple grounded_text tags if they are based on the same chunk information. - g. !!!DO NOT OVERCITE - only include citations for information that is directly relevant to the grounded_text. + b. ALL TEXT CITATIONS must have direct text in its element content (e.g. DIRECT TEXT HERE) that is a relevant SUBSET of the original text chunk that is being cited specifically. + c. DO NOT paraphrase or summarize the text; use the original text as much as possible. + d. DO NOT USE THE FULL TEXT CHUNK as the citation content; only use the relevant subset of the text that the grounded_text is base. AS SHORT AS POSSIBLE WHILE PROVIDING INFORMATION (ONE TO TWO SENTENCES USUALLY)! + e. Ensure each citation has a unique index number. + f. Specify the correct type: "text", "image", or "table". + g. For text chunks, the content of the citation should ALWAYS have the relevant subset of the original text that the grounded_text is based on. + h. For image/table chunks, leave the citation content empty. + i. One citation can be used for multiple grounded_text tags if they are based on the same chunk information. + j. !!!DO NOT OVERCITE - only include citations for information that is directly relevant to the grounded_text. 6. Structural Integrity Checks: a. Ensure all opening tags have corresponding closing tags. @@ -113,13 +115,13 @@ export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { ); } - async execute(args: { hypothetical_document_chunk: string }): Promise { + async execute(args: { hypothetical_document_chunk: string }): Promise { const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk); const formatted_chunks = await this.getFormattedChunks(relevantChunks); return formatted_chunks; } - async getFormattedChunks(relevantChunks: RAGChunk[]): Promise { + async getFormattedChunks(relevantChunks: RAGChunk[]): Promise { try { const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks }); diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts index a59ead067..a12c52592 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/ChatBox/types.ts @@ -106,6 +106,8 @@ export interface SimplifiedChunk { endPage: number; location?: string; chunkType: CHUNK_TYPE; + url?: string; + canDisplay?: boolean; } export interface AI_Document { diff --git a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts index 408274703..b5145c1f7 100644 --- a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts +++ b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts @@ -135,6 +135,7 @@ export class Vectorstore { endPage: chunk.metadata.end_page, location: chunk.metadata.location, chunkType: chunk.metadata.type as CHUNK_TYPE, + text: chunk.metadata.text, }; const new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl)); new_chunk_simpl.chunks = new_chunk_simpl.chunks.concat(chunkToAdd); @@ -148,7 +149,7 @@ export class Vectorstore { private async indexDocument(document: any) { console.log('Uploading vectors to content namespace...'); - const pineconeRecords: PineconeRecord[] = (document.chunks as RAGChunk[]).map( + const pineconeRecords: PineconeRecord[] = (document.chunks as RAGChunk[]).map( chunk => ({ id: chunk.id, @@ -159,7 +160,7 @@ export class Vectorstore { await this.index.upsert(pineconeRecords); } - async retrieve(query: string, topK: number = 10): Promise { + async retrieve(query: string, topK: number = 10): Promise { console.log(`Retrieving chunks for query: ${query}`); try { const queryEmbeddingResponse: EmbedResponse = await this.cohere.embed({ @@ -182,7 +183,7 @@ export class Vectorstore { throw new Error('Query embedding is not an array'); } - const queryResponse: QueryResponse = await this.index.query({ + const queryResponse: QueryResponse = await this.index.query({ vector: queryEmbedding, filter: { doc_id: { $in: this._doc_ids }, diff --git a/src/client/views/nodes/PDFBox.tsx b/src/client/views/nodes/PDFBox.tsx index cb0b0d71f..4616ec057 100644 --- a/src/client/views/nodes/PDFBox.tsx +++ b/src/client/views/nodes/PDFBox.tsx @@ -43,14 +43,14 @@ export class PDFBox extends ViewBoxAnnotatableComponent() { public static openSidebarWidth = 250; public static sidebarResizerWidth = 5; private _searchString: string = ''; - private _initialScrollTarget: Opt; + private _initialScrollTarget: Opt; private _pdfViewer: PDFViewer | undefined; private _searchRef = React.createRef(); private _disposers: { [name: string]: IReactionDisposer } = {}; private _sidebarRef = React.createRef(); @observable private _searching: boolean = false; - @observable private _pdf: Opt = undefined; + @observable private _pdf: Opt = undefined; @observable private _pageControls = false; @computed get pdfUrl() { @@ -242,13 +242,13 @@ export class PDFBox extends ViewBoxAnnotatableComponent() { options.didMove = true; this.toggleSidebar(false); } - return new Promise>(res => { + return new Promise(res => { DocumentView.addViewRenderedCb(doc, dv => res(dv)); }); }; getAnchor = (addAsAnnotation: boolean, pinProps?: PinProps) => { - let ele: Opt; + let ele: Opt; if (this._pdfViewer?.selectionContent()) { ele = document.createElement('div'); ele.append(this._pdfViewer.selectionContent()!); @@ -327,7 +327,7 @@ export class PDFBox extends ViewBoxAnnotatableComponent() { this._initialScrollTarget = undefined; } }; - searchStringChanged = (e: React.ChangeEvent) => { + searchStringChanged = (e: React.ChangeEvent) => { this._searchString = e.currentTarget.value; }; @@ -365,8 +365,8 @@ export class PDFBox extends ViewBoxAnnotatableComponent() { } ); }; - @observable _previewNativeWidth: Opt = undefined; - @observable _previewWidth: Opt = undefined; + @observable _previewNativeWidth: Opt = undefined; + @observable _previewWidth: Opt = undefined; toggleSidebar = action((preview: boolean = false) => { const nativeWidth = NumCast(this.layoutDoc[this.fieldKey + '_nativeWidth']); const sideratio = ((!this.layoutDoc.nativeWidth || this.layoutDoc.nativeWidth === nativeWidth ? PDFBox.openSidebarWidth : 0) + nativeWidth) / nativeWidth; @@ -637,7 +637,7 @@ export class PDFBox extends ViewBoxAnnotatableComponent() { } static pdfcache = new Map(); - static pdfpromise = new Map>(); + static pdfpromise = new Map(); render() { TraceMobx(); const pdfView = !this._pdf ? null : this.renderPdfView; diff --git a/src/client/views/pdf/PDFViewer.tsx b/src/client/views/pdf/PDFViewer.tsx index b9d5ce857..7543b3fb1 100644 --- a/src/client/views/pdf/PDFViewer.tsx +++ b/src/client/views/pdf/PDFViewer.tsx @@ -51,7 +51,7 @@ interface IViewerProps extends FieldViewProps { * Handles rendering and virtualization of the pdf */ @observer -export class PDFViewer extends ObservableReactComponent { +export class PDFViewer extends ObservableReactComponent { static _annotationStyle = addStyleSheet(); constructor(props: IViewerProps) { @@ -68,12 +68,12 @@ export class PDFViewer extends ObservableReactComponent { private _pdfViewer!: PDFJSViewer.PDFViewer; private _styleRule: number | undefined; // stylesheet rule for making hyperlinks clickable private _retries = 0; // number of times tried to create the PDF viewer - private _setPreviewCursor: undefined | ((x: number, y: number, drag: boolean, hide: boolean, doc: Opt) => void); + private _setPreviewCursor: undefined | ((x: number, y: number, drag: boolean, hide: boolean, doc: Opt) => void); private _marqueeref = React.createRef(); - private _annotationLayer: React.RefObject = React.createRef(); + private _annotationLayer: React.RefObject = React.createRef(); private _disposers: { [name: string]: IReactionDisposer } = {}; - private _viewer: React.RefObject = React.createRef(); - _mainCont: React.RefObject = React.createRef(); + private _viewer: React.RefObject = React.createRef(); + _mainCont: React.RefObject = React.createRef(); private _selectionText: string = ''; private _selectionContent: DocumentFragment | undefined; private _downX: number = 0; @@ -81,9 +81,9 @@ export class PDFViewer extends ObservableReactComponent { private _lastSearch = false; private _viewerIsSetup = false; private _ignoreScroll = false; - private _initialScroll: { loc: Opt; easeFunc: 'linear' | 'ease' | undefined } | undefined; + private _initialScroll: { loc: Opt; easeFunc: 'linear' | 'ease' | undefined } | undefined; private _forcedScroll = true; - _getAnchor: (savedAnnotations: Opt>, addAsAnnotation: boolean) => Opt = () => undefined; + _getAnchor: (savedAnnotations: Opt, addAsAnnotation: boolean) => Opt = () => undefined; selectionText = () => this._selectionText; selectionContent = () => this._selectionContent; @@ -179,7 +179,7 @@ export class PDFViewer extends ObservableReactComponent { // otherwise it will scroll smoothly. scrollFocus = (doc: Doc, scrollTop: number, options: FocusViewOptions) => { const mainCont = this._mainCont.current; - let focusSpeed: Opt; + let focusSpeed: Opt; if (doc !== this._props.Document && mainCont) { const windowHeight = this._props.PanelHeight() / (this._props.NativeDimScaling?.() || 1); const scrollTo = ClientUtils.scrollIntoView(scrollTop, doc[Height](), NumCast(this._props.layoutDoc._layout_scrollTop), windowHeight, windowHeight * 0.1, this._scrollHeight); @@ -495,7 +495,7 @@ export class PDFViewer extends ObservableReactComponent { // e.stopPropagation(); // bcz: not sure why this was here. We need to allow the DocumentView to get clicks to process doubleClicks }; - setPreviewCursor = (func?: (x: number, y: number, drag: boolean, hide: boolean, doc: Opt) => void) => { + setPreviewCursor = (func?: (x: number, y: number, drag: boolean, hide: boolean, doc: Opt) => void) => { this._setPreviewCursor = func; }; @@ -534,7 +534,7 @@ export class PDFViewer extends ObservableReactComponent { panelHeight = () => this._props.PanelHeight() / (this._props.NativeDimScaling?.() || 1); transparentFilter = () => [...this._props.childFilters(), ClientUtils.TransparentBackgroundFilter]; opaqueFilter = () => [...this._props.childFilters(), ClientUtils.noDragDocsFilter, ...(SnappingManager.CanEmbed && this._props.isContentActive() ? [] : [ClientUtils.OpaqueBackgroundFilter])]; - childStyleProvider = (doc: Doc | undefined, props: Opt, property: string) => { + childStyleProvider = (doc: Doc | undefined, props: Opt, property: string) => { if (doc instanceof Doc && property === StyleProp.PointerEvents) { if (this.inlineTextAnnotations.includes(doc) || this._props.isContentActive() === false) return 'none'; const isInk = doc.layout_isSvg && !props?.LayoutTemplateString; diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 2ffc99e58..58d35ce64 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -138,6 +138,27 @@ export default class AssistantManager extends ApiManager { } }; + register({ + method: Method.POST, + subscription: '/proxyFetch', + secureHandler: async ({ req, res }) => { + const { url } = req.body; + + if (!url) { + res.status(400).send({ error: 'No URL provided' }); + return; + } + + try { + const data = await fetchWithRetry(url); + res.send({ data }); + } catch (error: any) { + console.error('Error fetching the URL:', error); + res.status(500).send({ error: 'Failed to fetch the URL', details: error.message }); + } + }, + }); + register({ method: Method.POST, subscription: '/scrapeWebsite', -- cgit v1.2.3-70-g09d2 From 38c7c270fc5e2078798fc6b75fac90d54a7f658f Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Sat, 7 Sep 2024 11:27:23 -0400 Subject: added commenting and restructured --- src/client/views/nodes/ChatBox/ChatBox.tsx | 241 ++++++++++++++++++++++++----- 1 file changed, 201 insertions(+), 40 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 32ccbc35e..7c961d0c4 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -11,7 +11,7 @@ import { ViewBoxAnnotatableComponent } from '../../DocComponent'; import { FieldView, FieldViewProps } from '../FieldView'; import './ChatBox.scss'; import MessageComponentBox from './MessageComponent'; -import { ASSISTANT_ROLE, AssistantMessage, AI_Document, Citation, CHUNK_TYPE, RAGChunk, getChunkType, TEXT_TYPE, SimplifiedChunk, ProcessingInfo, MessageContent } from './types'; +import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, RAGChunk, getChunkType, TEXT_TYPE, SimplifiedChunk, ProcessingInfo, MessageContent } from './types'; import { Vectorstore } from './vectorstore/Vectorstore'; import { Agent } from './Agent'; import dotenv from 'dotenv'; @@ -29,33 +29,51 @@ import { Networking } from '../../../Network'; dotenv.config(); +/** + * ChatBox is the main class responsible for managing the interaction between the user and the assistant, + * handling documents, and integrating with OpenAI for tasks such as document analysis, chat functionality, + * and vector store interactions. + */ @observer export class ChatBox extends ViewBoxAnnotatableComponent() { + // MobX observable properties to track UI state and data @observable history: AssistantMessage[] = []; @observable.deep current_message: AssistantMessage | undefined = undefined; - @observable isLoading: boolean = false; - @observable uploadProgress: number = 0; // Track progress percentage - @observable currentStep: string = ''; // Track current step name + @observable uploadProgress: number = 0; + @observable currentStep: string = ''; @observable expandedScratchpadIndex: number | null = null; @observable inputValue: string = ''; @observable private linked_docs_to_add: ObservableSet = observable.set(); @observable private linked_csv_files: { filename: string; id: string; text: string }[] = []; @observable private isUploadingDocs: boolean = false; + + // Private properties for managing OpenAI API, vector store, agent, and UI elements private openai: OpenAI; private vectorstore_id: string; private vectorstore: Vectorstore; - private agent: Agent; // Add the ChatBot instance + private agent: Agent; private _oldWheel: HTMLDivElement | null = null; private messagesRef: React.RefObject; + /** + * Static method that returns the layout string for the field. + * @param fieldKey Key to get the layout string. + */ public static LayoutString(fieldKey: string) { return FieldView.LayoutString(ChatBox, fieldKey); } + /** + * Constructor initializes the component, sets up OpenAI, vector store, and agent instances, + * and observes changes in the chat history to save the state in dataDoc. + * @param props The properties passed to the component. + */ constructor(props: FieldViewProps) { super(props); - makeObservable(this); + makeObservable(this); // Enable MobX observables + + // Initialize OpenAI, vectorstore, and agent this.openai = this.initializeOpenAI(); if (StrCast(this.dataDoc.vectorstore_id) == '') { console.log('new_id'); @@ -68,14 +86,26 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.createCSVInDash); this.messagesRef = React.createRef(); + // Reaction to update dataDoc when chat history changes reaction( - () => this.history.map((msg: AssistantMessage) => ({ role: msg.role, content: msg.content, follow_up_questions: msg.follow_up_questions, citations: msg.citations })), + () => + this.history.map((msg: AssistantMessage) => ({ + role: msg.role, + content: msg.content, + follow_up_questions: msg.follow_up_questions, + citations: msg.citations, + })), serializableHistory => { this.dataDoc.data = JSON.stringify(serializableHistory); } ); } + /** + * Adds a document to the vectorstore for AI-based analysis. + * Handles the upload progress and errors during the process. + * @param newLinkedDoc The new document to add. + */ @action addDocToVectorstore = async (newLinkedDoc: Doc) => { this.uploadProgress = 0; @@ -83,6 +113,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.isUploadingDocs = true; try { + // Add the document to the vectorstore await this.vectorstore.addAIDoc(newLinkedDoc, this.updateProgress); } catch (error) { console.error('Error uploading document:', error); @@ -94,6 +125,11 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } }; + /** + * Updates the upload progress and the current step in the UI. + * @param progress The percentage of the progress. + * @param step The current step name. + */ @action updateProgress = (progress: number, step: string) => { console.log('Progress:', progress, step); @@ -101,12 +137,20 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.currentStep = step; }; + /** + * Adds a CSV file for analysis by sending it to OpenAI and generating a summary. + * @param newLinkedDoc The linked document representing the CSV file. + * @param id Optional ID for the document. + */ @action addCSVForAnalysis = async (newLinkedDoc: Doc, id?: string) => { console.log('adding csv file for analysis'); if (!newLinkedDoc.chunk_simpl) { + // Convert document text to CSV data const csvData: string = StrCast(newLinkedDoc.text); console.log('CSV Data:', csvData); + + // Generate a summary using OpenAI API const completion = await this.openai.chat.completions.create({ messages: [ { @@ -117,27 +161,25 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { { role: 'user', content: `Please provide a comprehensive summary of the CSV file based on the provided data. Ensure the summary highlights the most important information, patterns, and insights. Your response should be in paragraph form and be concise. - CSV Data: - ${csvData} - ********** Summary:`, }, ], model: 'gpt-3.5-turbo', }); - console.log('CSV Data:', csvData); + const csvId = id ?? uuidv4(); + // Add CSV details to linked files this.linked_csv_files.push({ filename: CsvCast(newLinkedDoc.data).url.pathname, id: csvId, text: csvData, }); - console.log(this.linked_csv_files); + // Add a chunk for the CSV and assign the summary const chunkToAdd = { chunkId: csvId, chunkType: CHUNK_TYPE.CSV, @@ -147,11 +189,19 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } }; + /** + * Toggles the tool logs, expanding or collapsing the scratchpad at the given index. + * @param index Index of the tool log to toggle. + */ @action toggleToolLogs = (index: number) => { this.expandedScratchpadIndex = this.expandedScratchpadIndex === index ? null : index; }; + /** + * Initializes the OpenAI API client using the API key from environment variables. + * @returns OpenAI client instance. + */ initializeOpenAI() { console.log(process.env.OPENAI_KEY); const configuration: ClientOptions = { @@ -161,49 +211,81 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { return new OpenAI(configuration); } + /** + * Adds a scroll event listener to detect user scrolling and handle passive wheel events. + */ addScrollListener = () => { if (this.messagesRef.current) { this.messagesRef.current.addEventListener('wheel', this.onPassiveWheel, { passive: false }); } }; + /** + * Removes the scroll event listener from the chat messages container. + */ removeScrollListener = () => { if (this.messagesRef.current) { this.messagesRef.current.removeEventListener('wheel', this.onPassiveWheel); } }; + /** + * Scrolls the chat messages container to the bottom, ensuring the latest message is visible. + */ scrollToBottom = () => { if (this.messagesRef.current) { this.messagesRef.current.scrollTop = this.messagesRef.current.scrollHeight; } }; + /** + * Event handler for detecting wheel scrolling and stopping the event propagation. + * @param e The wheel event. + */ onPassiveWheel = (e: WheelEvent) => { if (this._props.isContentActive()) { e.stopPropagation(); } }; + /** + * Sends the user's input to OpenAI, displays the loading indicator, and updates the chat history. + * @param event The form submission event. + */ @action askGPT = async (event: React.FormEvent): Promise => { event.preventDefault(); this.inputValue = ''; + // Extract the user's message const textInput = event.currentTarget.elements.namedItem('messageInput') as HTMLInputElement; const trimmedText = textInput.value.trim(); if (trimmedText) { try { textInput.value = ''; - this.history.push({ role: ASSISTANT_ROLE.USER, content: [{ index: 0, type: TEXT_TYPE.NORMAL, text: trimmedText, citation_ids: null }], processing_info: [] }); + // Add the user's message to the history + this.history.push({ + role: ASSISTANT_ROLE.USER, + content: [{ index: 0, type: TEXT_TYPE.NORMAL, text: trimmedText, citation_ids: null }], + processing_info: [], + }); this.isLoading = true; - this.current_message = { role: ASSISTANT_ROLE.ASSISTANT, content: [], citations: [], processing_info: [] }; + this.current_message = { + role: ASSISTANT_ROLE.ASSISTANT, + content: [], + citations: [], + processing_info: [], + }; + // Define callbacks for real-time processing updates const onProcessingUpdate = (processingUpdate: ProcessingInfo[]) => { runInAction(() => { if (this.current_message) { - this.current_message = { ...this.current_message, processing_info: processingUpdate }; + this.current_message = { + ...this.current_message, + processing_info: processingUpdate, + }; } }); this.scrollToBottom(); @@ -212,13 +294,18 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { const onAnswerUpdate = (answerUpdate: string) => { runInAction(() => { if (this.current_message) { - this.current_message = { ...this.current_message, content: [{ text: answerUpdate, type: TEXT_TYPE.NORMAL, index: 0, citation_ids: [] }] }; + this.current_message = { + ...this.current_message, + content: [{ text: answerUpdate, type: TEXT_TYPE.NORMAL, index: 0, citation_ids: [] }], + }; } }); }; + // Send the user's question to the assistant and get the final message const finalMessage = await this.agent.askAgent(trimmedText, onProcessingUpdate, onAnswerUpdate); + // Update the history with the final assistant message runInAction(() => { if (this.current_message) { this.history.push({ ...finalMessage }); @@ -228,7 +315,12 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }); } catch (err) { console.error('Error:', err); - this.history.push({ role: ASSISTANT_ROLE.ASSISTANT, content: [{ index: 0, type: TEXT_TYPE.ERROR, text: 'Sorry, I encountered an error while processing your request.', citation_ids: null }], processing_info: [] }); + // Handle error in processing + this.history.push({ + role: ASSISTANT_ROLE.ASSISTANT, + content: [{ index: 0, type: TEXT_TYPE.ERROR, text: 'Sorry, I encountered an error while processing your request.', citation_ids: null }], + processing_info: [], + }); } finally { this.isLoading = false; this.scrollToBottom(); @@ -237,6 +329,11 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.scrollToBottom(); }; + /** + * Updates the citations for a given message in the chat history. + * @param index The index of the message in the history. + * @param citations The list of citations to add to the message. + */ @action updateMessageCitations = (index: number, citations: Citation[]) => { if (this.history[index]) { @@ -244,17 +341,16 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } }; + /** + * Adds a linked document from a URL for future reference and analysis. + * @param url The URL of the document to add. + * @param id The unique identifier for the document. + */ @action addLinkedUrlDoc = async (url: string, id: string) => { const doc = Docs.Create.WebDocument(url, { data_useCors: true }); - - // const scriptsKey = Doc.LayoutFieldKey(doc) + '_allowScripts'; - // doc[DocData][scriptsKey] = true; - console.log('Adding URL:', url); - //console.log('Layout Field Key:', doc[DocData][scriptsKey]); - const linkDoc = Docs.Create.LinkDocument(this.Document, doc); LinkManager.Instance.addLink(linkDoc); let canDisplay; @@ -263,7 +359,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { // Fetch the URL content through the proxy const { data } = await Networking.PostToServer('/proxyFetch', { url }); - // Simulating header behavior as you can't fetch headers via the proxy + // Simulating header behavior since we can't fetch headers via proxy const xFrameOptions = data.headers?.['x-frame-options']; if (xFrameOptions && xFrameOptions.toUpperCase() === 'SAMEORIGIN') { @@ -271,7 +367,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { canDisplay = false; } else { console.log('URL can be displayed in an iframe:', url); - console.log(StrCast(linkDoc.canDisplay)); canDisplay = true; } } catch (error) { @@ -288,11 +383,21 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { doc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] }); }; + /** + * Getter to retrieve the current user's name from the client utils. + */ @computed get userName() { return ClientUtils.CurrentUserEmail; } + /** + * Creates a CSV document in the dashboard and adds it for analysis. + * @param url The URL of the CSV. + * @param title The title of the CSV document. + * @param id The unique ID for the document. + * @param data The CSV data content. + */ @action createCSVInDash = async (url: string, title: string, id: string, data: string) => { console.log('Creating CSV in Dash:', url, title); @@ -307,6 +412,10 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.addCSVForAnalysis(doc, id); }; + /** + * Event handler to manage citations click in the message components. + * @param citation The citation object clicked by the user. + */ @action handleCitationClick = (citation: Citation) => { console.log('Citation clicked:', citation); @@ -314,14 +423,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { const chunkId = citation.chunk_id; + // Loop through the linked documents to find the matching chunk and handle its display for (let doc of currentLinkedDocs) { if (doc.chunk_simpl) { const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] }; - console.log(docChunkSimpl); const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkId === chunkId); - console.log(foundChunk); if (foundChunk) { - console.log(getChunkType(foundChunk.chunkType)); + // Handle different types of chunks (image, text, table, etc.) switch (foundChunk.chunkType) { case CHUNK_TYPE.IMAGE: case CHUNK_TYPE.TABLE: @@ -351,22 +459,14 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }); break; case CHUNK_TYPE.URL: - console.log('Opening URL:', foundChunk.url); - console.log('Can display:', foundChunk.canDisplay); if (!foundChunk.canDisplay) { - console.log('Opening URL in new tab:', doc.displayUrl); window.open(StrCast(doc.displayUrl), '_blank'); } else if (foundChunk.canDisplay) { - console.log('Opening URL in Dash:', doc.displayUrl); - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { - const firstView = Array.from(doc[DocViews])[0] as DocumentView; - }); + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); } break; case CHUNK_TYPE.CSV: - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { - const firstView = Array.from(doc[DocViews])[0] as DocumentView; - }); + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); break; default: console.log('Chunk type not supported', foundChunk.chunkType); @@ -377,6 +477,17 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } }; + /** + * Creates an annotation highlight on a PDF document for image citations. + * @param x1 X-coordinate of the top-left corner of the highlight. + * @param y1 Y-coordinate of the top-left corner of the highlight. + * @param x2 X-coordinate of the bottom-right corner of the highlight. + * @param y2 Y-coordinate of the bottom-right corner of the highlight. + * @param citation The citation object to associate with the highlight. + * @param annotationKey The key used to store the annotation. + * @param pdfDoc The document where the highlight is created. + * @returns The highlighted document. + */ createImageCitationHighlight = (x1: number, y1: number, x2: number, y2: number, citation: Citation, annotationKey: string, pdfDoc: Doc): Doc => { const highlight_doc = Docs.Create.FreeformDocument([], { x: x1, @@ -392,10 +503,18 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { return highlight_doc; }; + /** + * Lifecycle method that triggers when the component updates. + * Ensures the chat is scrolled to the bottom when new messages are added. + */ componentDidUpdate() { this.scrollToBottom(); } + /** + * Lifecycle method that triggers when the component mounts. + * Initializes scroll listeners, sets up document reactions, and loads chat history from dataDoc if available. + */ componentDidMount() { this._props.setContentViewBox?.(this); if (this.dataDoc.data) { @@ -415,14 +534,24 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { console.error('Failed to parse history from dataDoc:', e); } } else { + // Default welcome message runInAction(() => { this.history.push({ role: ASSISTANT_ROLE.ASSISTANT, - content: [{ index: 0, type: TEXT_TYPE.NORMAL, text: `Hey, ${this.userName()} Welcome to the Your Friendly Assistant! Link a document or ask questions about anything to get started.`, citation_ids: null }], + content: [ + { + index: 0, + type: TEXT_TYPE.NORMAL, + text: `Hey, ${this.userName()}! Welcome to Your Friendly Assistant. Link a document or ask questions to get started.`, + citation_ids: null, + }, + ], processing_info: [], }); }); } + + // Set up reactions for linked documents reaction( () => { const linkedDocs = LinkManager.Instance.getAllRelatedLinks(this.Document) @@ -431,10 +560,10 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { .filter(d => d); return linkedDocs; }, - linked => linked.forEach(doc => this.linked_docs_to_add.add(doc)) ); + // Observe changes to linked documents and handle document addition observe(this.linked_docs_to_add, change => { if (change.type === 'add') { if (PDFCast(change.newValue.data)) { @@ -449,10 +578,17 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.addScrollListener(); } + /** + * Lifecycle method that triggers when the component unmounts. + * Removes scroll listeners to avoid memory leaks. + */ componentWillUnmount() { this.removeScrollListener(); } + /** + * Getter that retrieves all linked documents for the current document. + */ @computed get linkedDocs() { return LinkManager.Instance.getAllRelatedLinks(this.Document) @@ -461,6 +597,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { .filter(d => d); } + /** + * Getter that retrieves document IDs of linked documents that have AI-related content. + */ @computed get docIds() { return LinkManager.Instance.getAllRelatedLinks(this.Document) @@ -471,6 +610,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { .map(d => StrCast(d.ai_doc_id)); } + /** + * Getter that retrieves summaries of all linked documents. + */ @computed get summaries(): string { return ( @@ -492,11 +634,17 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { ); } + /** + * Getter that retrieves all linked CSV files for analysis. + */ @computed get linkedCSVs(): { filename: string; id: string; text: string }[] { return this.linked_csv_files; } + /** + * Getter that formats the entire chat history as a string for the agent's system message. + */ @computed get formattedHistory(): string { let history = '\n'; @@ -511,6 +659,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { return history; } + // Other helper methods for retrieving document data and processing + retrieveSummaries = () => { return this.summaries; }; @@ -527,12 +677,20 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { return this.docIds; }; + /** + * Handles follow-up questions when the user clicks on them. + * Automatically sets the input value to the clicked follow-up question. + * @param question The follow-up question clicked by the user. + */ @action handleFollowUpClick = (question: string) => { console.log('Follow-up question clicked:', question); this.inputValue = question; }; + /** + * Renders the chat interface, including the message list, input field, and other UI elements. + */ render() { return (
@@ -580,6 +738,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } } +/** + * Register the ChatBox component as the template for CHAT document types. + */ Docs.Prototypes.TemplateMap.set(DocumentType.CHAT, { layout: { view: ChatBox, dataField: 'data' }, options: { acl: '', chat: '', chat_history: '', chat_thread_id: '', chat_assistant_id: '', chat_vector_store_id: '' }, -- cgit v1.2.3-70-g09d2 From 0b3c2ed595b85391e9833a3b7710d2169439a582 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Sat, 7 Sep 2024 11:27:47 -0400 Subject: removed unnecessary imports --- src/client/views/nodes/ChatBox/ChatBox.tsx | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 7c961d0c4..ff699aab3 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -11,17 +11,14 @@ import { ViewBoxAnnotatableComponent } from '../../DocComponent'; import { FieldView, FieldViewProps } from '../FieldView'; import './ChatBox.scss'; import MessageComponentBox from './MessageComponent'; -import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, RAGChunk, getChunkType, TEXT_TYPE, SimplifiedChunk, ProcessingInfo, MessageContent } from './types'; +import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, SimplifiedChunk, ProcessingInfo } from './types'; import { Vectorstore } from './vectorstore/Vectorstore'; import { Agent } from './Agent'; import dotenv from 'dotenv'; import { DocData, DocViews } from '../../../../fields/DocSymbols'; -import { AnswerParser } from './AnswerParser'; import { DocumentManager } from '../../../util/DocumentManager'; import { v4 as uuidv4 } from 'uuid'; -import { chunk } from 'lodash'; import { DocUtils } from '../../../documents/DocUtils'; -import { createRef } from 'react'; import { ClientUtils } from '../../../../ClientUtils'; import { ProgressBar } from './ProgressBar'; import { DocumentView } from '../DocumentView'; -- cgit v1.2.3-70-g09d2 From 210f8f5f1cd19e9416a12524cce119b273334fd3 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Sat, 7 Sep 2024 11:48:36 -0400 Subject: reorganized parsers, added comments to vectorstore, and added citation popup for text citations --- src/client/views/nodes/ChatBox/Agent.ts | 4 +- src/client/views/nodes/ChatBox/AnswerParser.ts | 125 --------------------- src/client/views/nodes/ChatBox/ChatBox.scss | 27 +++++ src/client/views/nodes/ChatBox/ChatBox.tsx | 12 ++ src/client/views/nodes/ChatBox/ChunkManager.ts | 24 ---- .../views/nodes/ChatBox/StreamedAnswerParser.ts | 73 ------------ .../nodes/ChatBox/response_parsers/AnswerParser.ts | 125 +++++++++++++++++++++ .../response_parsers/StreamedAnswerParser.ts | 73 ++++++++++++ .../views/nodes/ChatBox/vectorstore/Vectorstore.ts | 125 ++++++++++++++------- 9 files changed, 326 insertions(+), 262 deletions(-) delete mode 100644 src/client/views/nodes/ChatBox/AnswerParser.ts delete mode 100644 src/client/views/nodes/ChatBox/ChunkManager.ts delete mode 100644 src/client/views/nodes/ChatBox/StreamedAnswerParser.ts create mode 100644 src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts create mode 100644 src/client/views/nodes/ChatBox/response_parsers/StreamedAnswerParser.ts (limited to 'src') diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index eaa17d283..9eb069c78 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -13,8 +13,8 @@ import { SearchTool } from './tools/SearchTool'; import { NoTool } from './tools/NoTool'; import { on } from 'events'; import { v4 as uuidv4 } from 'uuid'; -import { AnswerParser } from './AnswerParser'; -import { StreamedAnswerParser } from './StreamedAnswerParser'; +import { AnswerParser } from './response_parsers/AnswerParser'; +import { StreamedAnswerParser } from './response_parsers/StreamedAnswerParser'; import { CreateCSVTool } from './tools/CreateCSVTool'; dotenv.config(); diff --git a/src/client/views/nodes/ChatBox/AnswerParser.ts b/src/client/views/nodes/ChatBox/AnswerParser.ts deleted file mode 100644 index 885114195..000000000 --- a/src/client/views/nodes/ChatBox/AnswerParser.ts +++ /dev/null @@ -1,125 +0,0 @@ -import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getChunkType, ProcessingInfo } from './types'; -import { v4 as uuid } from 'uuid'; - -export class AnswerParser { - static parse(xml: string, processingInfo: ProcessingInfo[]): AssistantMessage { - const answerRegex = /([\s\S]*?)<\/answer>/; - const citationsRegex = /([\s\S]*?)<\/citations>/; - const citationRegex = /([\s\S]*?)<\/citation>/g; - const followUpQuestionsRegex = /([\s\S]*?)<\/follow_up_questions>/; - const questionRegex = /(.*?)<\/question>/g; - const groundedTextRegex = /([\s\S]*?)<\/grounded_text>/g; - const normalTextRegex = /([\s\S]*?)<\/normal_text>/g; - const loopSummaryRegex = /([\s\S]*?)<\/loop_summary>/; - - const answerMatch = answerRegex.exec(xml); - const citationsMatch = citationsRegex.exec(xml); - const followUpQuestionsMatch = followUpQuestionsRegex.exec(xml); - const loopSummaryMatch = loopSummaryRegex.exec(xml); - - if (!answerMatch) { - throw new Error('Invalid XML: Missing tag.'); - } - - let rawTextContent = answerMatch[1].trim(); - let content: AssistantMessage['content'] = []; - let citations: Citation[] = []; - let contentIndex = 0; - - // Remove citations and follow-up questions from rawTextContent - if (citationsMatch) { - rawTextContent = rawTextContent.replace(citationsMatch[0], '').trim(); - } - if (followUpQuestionsMatch) { - rawTextContent = rawTextContent.replace(followUpQuestionsMatch[0], '').trim(); - } - if (loopSummaryMatch) { - rawTextContent = rawTextContent.replace(loopSummaryMatch[0], '').trim(); - } - - // Parse citations - let citationMatch; - const citationMap = new Map(); - if (citationsMatch) { - const citationsContent = citationsMatch[1]; - while ((citationMatch = citationRegex.exec(citationsContent)) !== null) { - const [_, index, chunk_id, type, direct_text] = citationMatch; - const citation_id = uuid(); - citationMap.set(index, citation_id); - citations.push({ - direct_text: direct_text.trim(), - type: getChunkType(type), - chunk_id, - citation_id, - }); - } - } - - rawTextContent = rawTextContent.replace(normalTextRegex, '$1'); - - // Parse text content (normal and grounded) - let lastIndex = 0; - let match; - - while ((match = groundedTextRegex.exec(rawTextContent)) !== null) { - const [fullMatch, citationIndex, groundedText] = match; - - // Add normal text that is before the grounded text - if (match.index > lastIndex) { - const normalText = rawTextContent.slice(lastIndex, match.index).trim(); - if (normalText) { - content.push({ - index: contentIndex++, - type: TEXT_TYPE.NORMAL, - text: normalText, - citation_ids: null, - }); - } - } - - // Add grounded text - const citation_ids = citationIndex.split(',').map(index => citationMap.get(index) || ''); - content.push({ - index: contentIndex++, - type: TEXT_TYPE.GROUNDED, - text: groundedText.trim(), - citation_ids, - }); - - lastIndex = match.index + fullMatch.length; - } - - // Add any remaining normal text after the last grounded text - if (lastIndex < rawTextContent.length) { - const remainingText = rawTextContent.slice(lastIndex).trim(); - if (remainingText) { - content.push({ - index: contentIndex++, - type: TEXT_TYPE.NORMAL, - text: remainingText, - citation_ids: null, - }); - } - } - - let followUpQuestions: string[] = []; - if (followUpQuestionsMatch) { - const questionsText = followUpQuestionsMatch[1]; - let questionMatch; - while ((questionMatch = questionRegex.exec(questionsText)) !== null) { - followUpQuestions.push(questionMatch[1].trim()); - } - } - - const assistantResponse: AssistantMessage = { - role: ASSISTANT_ROLE.ASSISTANT, - content, - follow_up_questions: followUpQuestions, - citations, - processing_info: processingInfo, - loop_summary: loopSummaryMatch ? loopSummaryMatch[1].trim() : undefined, - }; - - return assistantResponse; - } -} diff --git a/src/client/views/nodes/ChatBox/ChatBox.scss b/src/client/views/nodes/ChatBox/ChatBox.scss index adb0663c3..42f6a0d61 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.scss +++ b/src/client/views/nodes/ChatBox/ChatBox.scss @@ -116,6 +116,33 @@ $transition: all 0.3s ease; } } } + .citation-popup { + position: fixed; + bottom: 50px; + left: 50%; + transform: translateX(-50%); + background-color: rgba(0, 0, 0, 0.8); + color: white; + padding: 10px 20px; + border-radius: 10px; + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2); + z-index: 1000; + animation: fadeIn 0.3s ease-in-out; + + p { + margin: 0; + font-size: 14px; + } + + @keyframes fadeIn { + from { + opacity: 0; + } + to { + opacity: 1; + } + } + } } .message { diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index ff699aab3..98a2e6002 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -44,6 +44,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @observable private linked_docs_to_add: ObservableSet = observable.set(); @observable private linked_csv_files: { filename: string; id: string; text: string }[] = []; @observable private isUploadingDocs: boolean = false; + @observable private citationPopup: { text: string; visible: boolean } = { text: '', visible: false }; // Private properties for managing OpenAI API, vector store, agent, and UI elements private openai: OpenAI; @@ -450,6 +451,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { DocumentManager.Instance.showDocument(highlightDoc, { willZoomCentered: true }, () => {}); break; case CHUNK_TYPE.TEXT: + this.citationPopup = { text: citation.direct_text ?? 'No text available', visible: true }; + setTimeout(() => (this.citationPopup.visible = false), 3000); // Hide after 3 seconds + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { const firstView = Array.from(doc[DocViews])[0] as DocumentView; firstView.ComponentView?.search?.(citation.direct_text ?? ''); @@ -730,6 +734,14 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { )} + {/* Popup for citation */} + {this.citationPopup.visible && ( +
+

+ Text from your document: {this.citationPopup.text} +

+
+ )}
); } diff --git a/src/client/views/nodes/ChatBox/ChunkManager.ts b/src/client/views/nodes/ChatBox/ChunkManager.ts deleted file mode 100644 index 64c073640..000000000 --- a/src/client/views/nodes/ChatBox/ChunkManager.ts +++ /dev/null @@ -1,24 +0,0 @@ -import { SimplifiedChunk } from './types'; - -class ChunkManager { - private chunks: SimplifiedChunk[]; - - constructor() { - this.chunks = []; - } - - addChunk(chunk: SimplifiedChunk) { - this.chunks.push(chunk); - } - - removeChunk(chunk: SimplifiedChunk) { - const index = this.chunks.indexOf(chunk); - if (index !== -1) { - this.chunks.splice(index, 1); - } - } - - getChunks() { - return this.chunks; - } -} diff --git a/src/client/views/nodes/ChatBox/StreamedAnswerParser.ts b/src/client/views/nodes/ChatBox/StreamedAnswerParser.ts deleted file mode 100644 index 3585cab4a..000000000 --- a/src/client/views/nodes/ChatBox/StreamedAnswerParser.ts +++ /dev/null @@ -1,73 +0,0 @@ -import { threadId } from 'worker_threads'; - -enum ParserState { - Outside, - InGroundedText, - InNormalText, -} - -export class StreamedAnswerParser { - private state: ParserState = ParserState.Outside; - private buffer: string = ''; - private result: string = ''; - private isStartOfLine: boolean = true; - - public parse(char: string): string { - switch (this.state) { - case ParserState.Outside: - if (char === '<') { - this.buffer = '<'; - } else if (char === '>') { - if (this.buffer.startsWith('') { - this.state = ParserState.Outside; - this.buffer = ''; - } else if (this.buffer.startsWith('') { - this.state = ParserState.Outside; - this.buffer = ''; - } else if (this.buffer.startsWith('<')) { - this.buffer += char; - } else { - this.processChar(char); - } - break; - } - - return this.result.trim(); - } - - private processChar(char: string): void { - if (this.isStartOfLine && char === ' ') { - // Skip leading spaces - return; - } - if (char === '\n') { - this.result += char; - this.isStartOfLine = true; - } else { - this.result += char; - this.isStartOfLine = false; - } - } - - public reset(): void { - this.state = ParserState.Outside; - this.buffer = ''; - this.result = ''; - this.isStartOfLine = true; - } -} diff --git a/src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts b/src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts new file mode 100644 index 000000000..79b53b0a3 --- /dev/null +++ b/src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts @@ -0,0 +1,125 @@ +import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getChunkType, ProcessingInfo } from '../types'; +import { v4 as uuid } from 'uuid'; + +export class AnswerParser { + static parse(xml: string, processingInfo: ProcessingInfo[]): AssistantMessage { + const answerRegex = /([\s\S]*?)<\/answer>/; + const citationsRegex = /([\s\S]*?)<\/citations>/; + const citationRegex = /([\s\S]*?)<\/citation>/g; + const followUpQuestionsRegex = /([\s\S]*?)<\/follow_up_questions>/; + const questionRegex = /(.*?)<\/question>/g; + const groundedTextRegex = /([\s\S]*?)<\/grounded_text>/g; + const normalTextRegex = /([\s\S]*?)<\/normal_text>/g; + const loopSummaryRegex = /([\s\S]*?)<\/loop_summary>/; + + const answerMatch = answerRegex.exec(xml); + const citationsMatch = citationsRegex.exec(xml); + const followUpQuestionsMatch = followUpQuestionsRegex.exec(xml); + const loopSummaryMatch = loopSummaryRegex.exec(xml); + + if (!answerMatch) { + throw new Error('Invalid XML: Missing tag.'); + } + + let rawTextContent = answerMatch[1].trim(); + let content: AssistantMessage['content'] = []; + let citations: Citation[] = []; + let contentIndex = 0; + + // Remove citations and follow-up questions from rawTextContent + if (citationsMatch) { + rawTextContent = rawTextContent.replace(citationsMatch[0], '').trim(); + } + if (followUpQuestionsMatch) { + rawTextContent = rawTextContent.replace(followUpQuestionsMatch[0], '').trim(); + } + if (loopSummaryMatch) { + rawTextContent = rawTextContent.replace(loopSummaryMatch[0], '').trim(); + } + + // Parse citations + let citationMatch; + const citationMap = new Map(); + if (citationsMatch) { + const citationsContent = citationsMatch[1]; + while ((citationMatch = citationRegex.exec(citationsContent)) !== null) { + const [_, index, chunk_id, type, direct_text] = citationMatch; + const citation_id = uuid(); + citationMap.set(index, citation_id); + citations.push({ + direct_text: direct_text.trim(), + type: getChunkType(type), + chunk_id, + citation_id, + }); + } + } + + rawTextContent = rawTextContent.replace(normalTextRegex, '$1'); + + // Parse text content (normal and grounded) + let lastIndex = 0; + let match; + + while ((match = groundedTextRegex.exec(rawTextContent)) !== null) { + const [fullMatch, citationIndex, groundedText] = match; + + // Add normal text that is before the grounded text + if (match.index > lastIndex) { + const normalText = rawTextContent.slice(lastIndex, match.index).trim(); + if (normalText) { + content.push({ + index: contentIndex++, + type: TEXT_TYPE.NORMAL, + text: normalText, + citation_ids: null, + }); + } + } + + // Add grounded text + const citation_ids = citationIndex.split(',').map(index => citationMap.get(index) || ''); + content.push({ + index: contentIndex++, + type: TEXT_TYPE.GROUNDED, + text: groundedText.trim(), + citation_ids, + }); + + lastIndex = match.index + fullMatch.length; + } + + // Add any remaining normal text after the last grounded text + if (lastIndex < rawTextContent.length) { + const remainingText = rawTextContent.slice(lastIndex).trim(); + if (remainingText) { + content.push({ + index: contentIndex++, + type: TEXT_TYPE.NORMAL, + text: remainingText, + citation_ids: null, + }); + } + } + + let followUpQuestions: string[] = []; + if (followUpQuestionsMatch) { + const questionsText = followUpQuestionsMatch[1]; + let questionMatch; + while ((questionMatch = questionRegex.exec(questionsText)) !== null) { + followUpQuestions.push(questionMatch[1].trim()); + } + } + + const assistantResponse: AssistantMessage = { + role: ASSISTANT_ROLE.ASSISTANT, + content, + follow_up_questions: followUpQuestions, + citations, + processing_info: processingInfo, + loop_summary: loopSummaryMatch ? loopSummaryMatch[1].trim() : undefined, + }; + + return assistantResponse; + } +} diff --git a/src/client/views/nodes/ChatBox/response_parsers/StreamedAnswerParser.ts b/src/client/views/nodes/ChatBox/response_parsers/StreamedAnswerParser.ts new file mode 100644 index 000000000..3585cab4a --- /dev/null +++ b/src/client/views/nodes/ChatBox/response_parsers/StreamedAnswerParser.ts @@ -0,0 +1,73 @@ +import { threadId } from 'worker_threads'; + +enum ParserState { + Outside, + InGroundedText, + InNormalText, +} + +export class StreamedAnswerParser { + private state: ParserState = ParserState.Outside; + private buffer: string = ''; + private result: string = ''; + private isStartOfLine: boolean = true; + + public parse(char: string): string { + switch (this.state) { + case ParserState.Outside: + if (char === '<') { + this.buffer = '<'; + } else if (char === '>') { + if (this.buffer.startsWith('') { + this.state = ParserState.Outside; + this.buffer = ''; + } else if (this.buffer.startsWith('') { + this.state = ParserState.Outside; + this.buffer = ''; + } else if (this.buffer.startsWith('<')) { + this.buffer += char; + } else { + this.processChar(char); + } + break; + } + + return this.result.trim(); + } + + private processChar(char: string): void { + if (this.isStartOfLine && char === ' ') { + // Skip leading spaces + return; + } + if (char === '\n') { + this.result += char; + this.isStartOfLine = true; + } else { + this.result += char; + this.isStartOfLine = false; + } + } + + public reset(): void { + this.state = ParserState.Outside; + this.buffer = ''; + this.result = ''; + this.isStartOfLine = true; + } +} diff --git a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts index b5145c1f7..cc3b1ccd5 100644 --- a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts +++ b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts @@ -2,47 +2,55 @@ import { Pinecone, Index, IndexList, PineconeRecord, RecordMetadata, QueryRespon import { CohereClient } from 'cohere-ai'; import { EmbedResponse } from 'cohere-ai/api'; import dotenv from 'dotenv'; -import axios from 'axios'; -import { SimplifiedChunk } from '../types'; - import { RAGChunk, AI_Document, CHUNK_TYPE } from '../types'; import { Doc } from '../../../../../fields/Doc'; -import { DocData } from '../../../../../fields/DocSymbols'; import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types'; import { Networking } from '../../../../Network'; dotenv.config(); +/** + * The Vectorstore class integrates with Pinecone for vector-based document indexing and retrieval, + * and Cohere for text embedding. It handles AI document management, uploads, and query-based retrieval. + */ export class Vectorstore { - private pinecone: Pinecone; - private index!: Index; - private cohere: CohereClient; - private indexName: string = 'pdf-chatbot'; - private _id: string; - private _doc_ids: string[] = []; + private pinecone: Pinecone; // Pinecone client for managing the vector index. + private index!: Index; // The specific Pinecone index used for document chunks. + private cohere: CohereClient; // Cohere client for generating embeddings. + private indexName: string = 'pdf-chatbot'; // Default name for the index. + private _id: string; // Unique ID for the Vectorstore instance. + private _doc_ids: string[] = []; // List of document IDs handled by this instance. - documents: AI_Document[] = []; + documents: AI_Document[] = []; // Store the documents indexed in the vectorstore. + /** + * Constructor initializes the Pinecone and Cohere clients, sets up the document ID list, + * and initializes the Pinecone index. + * @param id The unique identifier for the vectorstore instance. + * @param doc_ids A function that returns a list of document IDs. + */ constructor(id: string, doc_ids: () => string[]) { const pineconeApiKey = process.env.PINECONE_API_KEY; if (!pineconeApiKey) { throw new Error('PINECONE_API_KEY is not defined.'); } - this.pinecone = new Pinecone({ - apiKey: pineconeApiKey, - }); - this.cohere = new CohereClient({ - token: process.env.COHERE_API_KEY, - }); + // Initialize Pinecone and Cohere clients with API keys from the environment. + this.pinecone = new Pinecone({ apiKey: pineconeApiKey }); + this.cohere = new CohereClient({ token: process.env.COHERE_API_KEY }); this._id = id; this._doc_ids = doc_ids(); this.initializeIndex(); } + /** + * Initializes the Pinecone index by checking if it exists, and creating it if not. + * The index is set to use the cosine metric for vector similarity. + */ private async initializeIndex() { const indexList: IndexList = await this.pinecone.listIndexes(); + // Check if the index already exists, otherwise create it. if (!indexList.indexes?.some(index => index.name === this.indexName)) { await this.pinecone.createIndex({ name: this.indexName, @@ -57,62 +65,76 @@ export class Vectorstore { }); } + // Set the index for future use. this.index = this.pinecone.Index(this.indexName); } + /** + * Adds an AI document to the vectorstore. This method handles document chunking, uploading to the + * vectorstore, and updating the progress for long-running tasks like file uploads. + * @param doc The document to be added to the vectorstore. + * @param progressCallback Callback to update the progress of the upload. + */ async addAIDoc(doc: Doc, progressCallback: (progress: number, step: string) => void) { console.log('Adding AI Document:', doc); const ai_document_status: string = StrCast(doc.ai_document_status); - if (ai_document_status !== undefined && ai_document_status !== null && ai_document_status.trim() !== '' && ai_document_status !== '{}') { + // Skip if the document is already in progress or completed. + if (ai_document_status !== undefined && ai_document_status.trim() !== '' && ai_document_status !== '{}') { if (ai_document_status === 'IN PROGRESS') { console.log('Already in progress.'); return; } - if (!this._doc_ids.includes(StrCast(doc.ai_doc_id))) this._doc_ids.push(StrCast(doc.ai_doc_id)); + if (!this._doc_ids.includes(StrCast(doc.ai_doc_id))) { + this._doc_ids.push(StrCast(doc.ai_doc_id)); + } } else { + // Start processing the document. doc.ai_document_status = 'PROGRESS'; console.log(doc); + + // Get the local file path (CSV or PDF). const local_file_path: string = CsvCast(doc.data)?.url?.pathname ?? PDFCast(doc.data)?.url?.pathname; console.log('Local File Path:', local_file_path); if (local_file_path) { console.log('Creating AI Document...'); - // Start the document creation process + // Start the document creation process by sending the file to the server. const { jobId } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); - // Poll the server for progress updates + // Poll the server for progress updates. let inProgress: boolean = true; let result: any = null; while (inProgress) { - await new Promise(resolve => setTimeout(resolve, 2000)); // Polling interval + // Polling interval for status updates. + await new Promise(resolve => setTimeout(resolve, 2000)); + // Check if the job is completed. const resultResponse = await Networking.FetchFromServer(`/getResult/${jobId}`); const resultResponseJson = JSON.parse(resultResponse); - //console.log('Result Response:', resultResponseJson); if (resultResponseJson.status === 'completed') { console.log('Result here:', resultResponseJson); result = resultResponseJson; break; } + // Fetch progress information and update the progress callback. const progressResponse = await Networking.FetchFromServer(`/getProgress/${jobId}`); const progressResponseJson = JSON.parse(progressResponse); - //console.log('Progress Response:', progressResponseJson); - if (progressResponseJson) { - console.log('Progress:', progressResponseJson); const progress = progressResponseJson.progress; const step = progressResponseJson.step; progressCallback(progress, step); } } - // Process the final document result + // Once completed, process the document and add it to the vectorstore. console.log('Document JSON:', result); this.documents.push(result); - await this.indexDocument(JSON.parse(JSON.stringify(result, (key, value) => (value === null || value === undefined ? undefined : value)))); + await this.indexDocument(result); console.log(`Document added: ${result.file_name}`); + + // Update document metadata such as summary, purpose, and vectorstore ID. doc.summary = result.summary; doc.ai_doc_id = result.doc_id; this._doc_ids.push(result.doc_id); @@ -128,6 +150,7 @@ export class Vectorstore { doc.chunk_simpl = JSON.stringify({ chunks: [] }); } + // Process each chunk of the document and update the document's chunk_simpl field. result.chunks.forEach((chunk: RAGChunk) => { const chunkToAdd = { chunkId: chunk.id, @@ -142,27 +165,41 @@ export class Vectorstore { doc.chunk_simpl = JSON.stringify(new_chunk_simpl); }); + // Mark the document status as completed. doc.ai_document_status = 'COMPLETED'; } } } + /** + * Indexes the processed document by uploading the document's vector chunks to the Pinecone index. + * @param document The processed document containing its chunks and metadata. + */ private async indexDocument(document: any) { console.log('Uploading vectors to content namespace...'); - const pineconeRecords: PineconeRecord[] = (document.chunks as RAGChunk[]).map( - chunk => - ({ - id: chunk.id, - values: chunk.values, - metadata: { ...chunk.metadata } as RecordMetadata, - }) as PineconeRecord - ); + + // Prepare Pinecone records for each chunk in the document. + const pineconeRecords: PineconeRecord[] = (document.chunks as RAGChunk[]).map(chunk => ({ + id: chunk.id, + values: chunk.values, + metadata: { ...chunk.metadata } as RecordMetadata, + })); + + // Upload the records to Pinecone. await this.index.upsert(pineconeRecords); } - async retrieve(query: string, topK: number = 10): Promise { + /** + * Retrieves the top K document chunks relevant to the user's query. + * This involves embedding the query using Cohere, then querying Pinecone for matching vectors. + * @param query The search query string. + * @param topK The number of top results to return (default is 10). + * @returns A list of document chunks that match the query. + */ + async retrieve(query: string, topK: number = 10): Promise { console.log(`Retrieving chunks for query: ${query}`); try { + // Generate an embedding for the query using Cohere. const queryEmbeddingResponse: EmbedResponse = await this.cohere.embed({ texts: [query], model: 'embed-english-v3.0', @@ -171,6 +208,7 @@ export class Vectorstore { let queryEmbedding: number[]; + // Extract the embedding from the response. if (Array.isArray(queryEmbeddingResponse.embeddings)) { queryEmbedding = queryEmbeddingResponse.embeddings[0]; } else if (queryEmbeddingResponse.embeddings && 'embeddings' in queryEmbeddingResponse.embeddings) { @@ -183,6 +221,7 @@ export class Vectorstore { throw new Error('Query embedding is not an array'); } + // Query the Pinecone index using the embedding and filter by document IDs. const queryResponse: QueryResponse = await this.index.query({ vector: queryEmbedding, filter: { @@ -193,12 +232,22 @@ export class Vectorstore { includeMetadata: true, }); + // Map the results into RAGChunks and return them. return queryResponse.matches.map( match => ({ id: match.id, values: match.values as number[], - metadata: match.metadata as { text: string; type: string; original_document: string; file_path: string; doc_id: string; location: string; start_page: number; end_page: number }, + metadata: match.metadata as { + text: string; + type: string; + original_document: string; + file_path: string; + doc_id: string; + location: string; + start_page: number; + end_page: number; + }, }) as RAGChunk ); } catch (error) { -- cgit v1.2.3-70-g09d2 From 4791cd23af08da70895204a3a7fbaf889d9af2d5 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Sat, 7 Sep 2024 12:43:05 -0400 Subject: completely restructured, added comments, and significantly reduced the length of the prompt (~72% shorter and cheaper) --- src/client/views/Main.tsx | 2 +- src/client/views/nodes/ChatBox/Agent.ts | 254 ------- src/client/views/nodes/ChatBox/ChatBox.scss | 279 -------- src/client/views/nodes/ChatBox/ChatBox.tsx | 756 --------------------- .../views/nodes/ChatBox/MessageComponent.tsx | 105 --- src/client/views/nodes/ChatBox/ProgressBar.scss | 69 -- src/client/views/nodes/ChatBox/ProgressBar.tsx | 13 - src/client/views/nodes/ChatBox/prompts.ts | 465 ------------- .../nodes/ChatBox/response_parsers/AnswerParser.ts | 125 ---- .../response_parsers/StreamedAnswerParser.ts | 73 -- src/client/views/nodes/ChatBox/tools.ts | 26 - src/client/views/nodes/ChatBox/tools/BaseTool.ts | 24 - .../views/nodes/ChatBox/tools/CalculateTool.ts | 26 - .../views/nodes/ChatBox/tools/CreateCSVTool.ts | 51 -- .../nodes/ChatBox/tools/CreateCollectionTool.ts | 36 - .../views/nodes/ChatBox/tools/DataAnalysisTool.ts | 59 -- .../views/nodes/ChatBox/tools/GetDocsTool.ts | 29 - src/client/views/nodes/ChatBox/tools/NoTool.ts | 18 - src/client/views/nodes/ChatBox/tools/RAGTool.ts | 138 ---- src/client/views/nodes/ChatBox/tools/SearchTool.ts | 54 -- .../nodes/ChatBox/tools/WebsiteInfoScraperTool.ts | 43 -- .../views/nodes/ChatBox/tools/WikipediaTool.ts | 37 - src/client/views/nodes/ChatBox/types.ts | 169 ----- .../views/nodes/ChatBox/vectorstore/Vectorstore.ts | 258 ------- .../views/nodes/chatbot/agentsystem/Agent.ts | 278 ++++++++ .../views/nodes/chatbot/agentsystem/prompts.ts | 181 +++++ .../nodes/chatbot/chatboxcomponents/ChatBox.scss | 279 ++++++++ .../nodes/chatbot/chatboxcomponents/ChatBox.tsx | 745 ++++++++++++++++++++ .../chatbot/chatboxcomponents/MessageComponent.tsx | 146 ++++ .../chatbot/chatboxcomponents/ProgressBar.scss | 69 ++ .../chatbot/chatboxcomponents/ProgressBar.tsx | 22 + .../nodes/chatbot/response_parsers/AnswerParser.ts | 125 ++++ .../response_parsers/StreamedAnswerParser.ts | 73 ++ src/client/views/nodes/chatbot/tools/BaseTool.ts | 24 + .../views/nodes/chatbot/tools/CalculateTool.ts | 26 + .../views/nodes/chatbot/tools/CreateCSVTool.ts | 51 ++ .../nodes/chatbot/tools/CreateCollectionTool.ts | 36 + .../views/nodes/chatbot/tools/DataAnalysisTool.ts | 59 ++ .../views/nodes/chatbot/tools/GetDocsTool.ts | 29 + src/client/views/nodes/chatbot/tools/NoTool.ts | 18 + src/client/views/nodes/chatbot/tools/RAGTool.ts | 138 ++++ src/client/views/nodes/chatbot/tools/SearchTool.ts | 54 ++ .../nodes/chatbot/tools/WebsiteInfoScraperTool.ts | 83 +++ .../views/nodes/chatbot/tools/WikipediaTool.ts | 37 + src/client/views/nodes/chatbot/types/types.ts | 129 ++++ .../views/nodes/chatbot/vectorstore/Vectorstore.ts | 258 +++++++ src/server/ApiManagers/AssistantManager.ts | 2 +- 47 files changed, 2862 insertions(+), 3109 deletions(-) delete mode 100644 src/client/views/nodes/ChatBox/Agent.ts delete mode 100644 src/client/views/nodes/ChatBox/ChatBox.scss delete mode 100644 src/client/views/nodes/ChatBox/ChatBox.tsx delete mode 100644 src/client/views/nodes/ChatBox/MessageComponent.tsx delete mode 100644 src/client/views/nodes/ChatBox/ProgressBar.scss delete mode 100644 src/client/views/nodes/ChatBox/ProgressBar.tsx delete mode 100644 src/client/views/nodes/ChatBox/prompts.ts delete mode 100644 src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts delete mode 100644 src/client/views/nodes/ChatBox/response_parsers/StreamedAnswerParser.ts delete mode 100644 src/client/views/nodes/ChatBox/tools.ts delete mode 100644 src/client/views/nodes/ChatBox/tools/BaseTool.ts delete mode 100644 src/client/views/nodes/ChatBox/tools/CalculateTool.ts delete mode 100644 src/client/views/nodes/ChatBox/tools/CreateCSVTool.ts delete mode 100644 src/client/views/nodes/ChatBox/tools/CreateCollectionTool.ts delete mode 100644 src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts delete mode 100644 src/client/views/nodes/ChatBox/tools/GetDocsTool.ts delete mode 100644 src/client/views/nodes/ChatBox/tools/NoTool.ts delete mode 100644 src/client/views/nodes/ChatBox/tools/RAGTool.ts delete mode 100644 src/client/views/nodes/ChatBox/tools/SearchTool.ts delete mode 100644 src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts delete mode 100644 src/client/views/nodes/ChatBox/tools/WikipediaTool.ts delete mode 100644 src/client/views/nodes/ChatBox/types.ts delete mode 100644 src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts create mode 100644 src/client/views/nodes/chatbot/agentsystem/Agent.ts create mode 100644 src/client/views/nodes/chatbot/agentsystem/prompts.ts create mode 100644 src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss create mode 100644 src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx create mode 100644 src/client/views/nodes/chatbot/chatboxcomponents/MessageComponent.tsx create mode 100644 src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss create mode 100644 src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.tsx create mode 100644 src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts create mode 100644 src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts create mode 100644 src/client/views/nodes/chatbot/tools/BaseTool.ts create mode 100644 src/client/views/nodes/chatbot/tools/CalculateTool.ts create mode 100644 src/client/views/nodes/chatbot/tools/CreateCSVTool.ts create mode 100644 src/client/views/nodes/chatbot/tools/CreateCollectionTool.ts create mode 100644 src/client/views/nodes/chatbot/tools/DataAnalysisTool.ts create mode 100644 src/client/views/nodes/chatbot/tools/GetDocsTool.ts create mode 100644 src/client/views/nodes/chatbot/tools/NoTool.ts create mode 100644 src/client/views/nodes/chatbot/tools/RAGTool.ts create mode 100644 src/client/views/nodes/chatbot/tools/SearchTool.ts create mode 100644 src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts create mode 100644 src/client/views/nodes/chatbot/tools/WikipediaTool.ts create mode 100644 src/client/views/nodes/chatbot/types/types.ts create mode 100644 src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts (limited to 'src') diff --git a/src/client/views/Main.tsx b/src/client/views/Main.tsx index f7cd0e925..e1f7dd233 100644 --- a/src/client/views/Main.tsx +++ b/src/client/views/Main.tsx @@ -27,7 +27,7 @@ import { CollectionSchemaView } from './collections/collectionSchema/CollectionS import { SchemaRowBox } from './collections/collectionSchema/SchemaRowBox'; import './global/globalScripts'; import { AudioBox } from './nodes/AudioBox'; -import { ChatBox } from './nodes/ChatBox/ChatBox'; +import { ChatBox } from './nodes/chatbot/chatboxcomponents/ChatBox'; import { ComparisonBox } from './nodes/ComparisonBox'; import { DataVizBox } from './nodes/DataVizBox/DataVizBox'; import { DiagramBox } from './nodes/DiagramBox'; diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts deleted file mode 100644 index 9eb069c78..000000000 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ /dev/null @@ -1,254 +0,0 @@ -import OpenAI from 'openai'; -import { Tool, AgentMessage, AssistantMessage, TEXT_TYPE, CHUNK_TYPE, ASSISTANT_ROLE, ProcessingInfo, PROCESSING_TYPE } from './types'; -import { getReactPrompt } from './prompts'; -import { XMLParser, XMLBuilder } from 'fast-xml-parser'; -import { Vectorstore } from './vectorstore/Vectorstore'; -import { ChatCompletionMessageParam } from 'openai/resources'; -import dotenv from 'dotenv'; -import { CalculateTool } from './tools/CalculateTool'; -import { RAGTool } from './tools/RAGTool'; -import { DataAnalysisTool } from './tools/DataAnalysisTool'; -import { WebsiteInfoScraperTool } from './tools/WebsiteInfoScraperTool'; -import { SearchTool } from './tools/SearchTool'; -import { NoTool } from './tools/NoTool'; -import { on } from 'events'; -import { v4 as uuidv4 } from 'uuid'; -import { AnswerParser } from './response_parsers/AnswerParser'; -import { StreamedAnswerParser } from './response_parsers/StreamedAnswerParser'; -import { CreateCSVTool } from './tools/CreateCSVTool'; - -dotenv.config(); - -export class Agent { - private client: OpenAI; - private tools: Record>; - private messages: AgentMessage[] = []; - private interMessages: AgentMessage[] = []; - private vectorstore: Vectorstore; - private _history: () => string; - private _summaries: () => string; - private _csvData: () => { filename: string; id: string; text: string }[]; - private actionNumber: number = 0; - private thoughtNumber: number = 0; - private processingNumber: number = 0; - private processingInfo: ProcessingInfo[] = []; - private streamedAnswerParser: StreamedAnswerParser = new StreamedAnswerParser(); - - constructor( - _vectorstore: Vectorstore, - summaries: () => string, - history: () => string, - csvData: () => { filename: string; id: string; text: string }[], - addLinkedUrlDoc: (url: string, id: string) => void, - createCSVInDash: (url: string, title: string, id: string, data: string) => void - ) { - this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); - this.vectorstore = _vectorstore; - this._history = history; - this._summaries = summaries; - this._csvData = csvData; - this.tools = { - calculate: new CalculateTool(), - rag: new RAGTool(this.vectorstore), - dataAnalysis: new DataAnalysisTool(csvData), - websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc), - searchTool: new SearchTool(addLinkedUrlDoc), - createCSV: new CreateCSVTool(createCSVInDash), - no_tool: new NoTool(), - }; - } - - async askAgent(question: string, onProcessingUpdate: (processingUpdate: ProcessingInfo[]) => void, onAnswerUpdate: (answerUpdate: string) => void, maxTurns: number = 30): Promise { - console.log(`Starting query: ${question}`); - this.messages.push({ role: 'user', content: question }); - const chatHistory = this._history(); - const systemPrompt = getReactPrompt(Object.values(this.tools), this._summaries, chatHistory); - this.interMessages = [{ role: 'system', content: systemPrompt }]; - this.interMessages.push({ role: 'user', content: `${question}` }); - const parser = new XMLParser({ - ignoreAttributes: false, - attributeNamePrefix: '@_', - textNodeName: '_text', - isArray: (name, jpath, isLeafNode, isAttribute) => { - // Convert tags with the same name to arrays - return ['query', 'url'].indexOf(name) !== -1; - }, - }); - const builder = new XMLBuilder({ ignoreAttributes: false, attributeNamePrefix: '@_' }); - - let currentAction: string | undefined; - - this.processingInfo = []; - - for (let i = 2; i < maxTurns; i += 2) { - console.log(this.interMessages); - console.log(`Turn ${i}/${maxTurns}`); - - const result = await this.execute(onProcessingUpdate, onAnswerUpdate); - this.interMessages.push({ role: 'assistant', content: result }); - - let parsedResult; - try { - parsedResult = parser.parse(result); - } catch (error) { - throw new Error(`Error parsing response: ${error}`); - } - - const stage = parsedResult.stage; - - if (!stage) { - throw new Error(`Error: No stage found in response`); - } - - for (const key in stage) { - if (key === 'thought') { - console.log(`Thought: ${stage[key]}`); - this.processingNumber++; - } else if (key === 'action') { - currentAction = stage[key] as string; - console.log(`Action: ${currentAction}`); - if (this.tools[currentAction]) { - const nextPrompt = [ - { - type: 'text', - text: `` + builder.build({ action_rules: this.tools[currentAction].getActionRule() }) + ``, - }, - ]; - this.interMessages.push({ role: 'user', content: nextPrompt }); - break; - } else { - console.log('Error: No valid action'); - this.interMessages.push({ role: 'user', content: `No valid action, try again.` }); - break; - } - } else if (key === 'action_input') { - const actionInput = stage[key]; - console.log(`Action input:`, actionInput.inputs); - if (currentAction) { - try { - // Parse the inputs - //const parsedInputs = this.parseActionInputs(actionInput.inputs); - //console.log(`Parsed inputs:`, parsedInputs); - const observation = await this.processAction(currentAction, actionInput.inputs); - const nextPrompt = [{ type: 'text', text: ` ` }, ...observation, { type: 'text', text: '' }]; - console.log(observation); - this.interMessages.push({ role: 'user', content: nextPrompt }); - this.processingNumber++; - break; - } catch (error) { - throw new Error(`Error processing action: ${error}`); - } - } else { - throw new Error('Error: Action input without a valid action'); - } - } else if (key === 'answer') { - console.log('Answer found. Ending query.'); - this.streamedAnswerParser.reset(); - const parsedAnswer = AnswerParser.parse(result, this.processingInfo); - return parsedAnswer; - } - } - } - throw new Error('Reached maximum turns. Ending query.'); - } - - private async execute(onProcessingUpdate: (processingUpdate: ProcessingInfo[]) => void, onAnswerUpdate: (answerUpdate: string) => void): Promise { - const stream = await this.client.chat.completions.create({ - model: 'gpt-4o', - messages: this.interMessages as ChatCompletionMessageParam[], - temperature: 0, - stream: true, - }); - - let fullResponse: string = ''; - let currentTag: string = ''; - let currentContent: string = ''; - let isInsideTag: boolean = false; - - for await (const chunk of stream) { - let content = chunk.choices[0]?.delta?.content || ''; - fullResponse += content; - - for (const char of content) { - if (currentTag === 'answer') { - currentContent += char; - //console.log(char); - const streamedAnswer = this.streamedAnswerParser.parse(char); - //console.log(streamedAnswer); - onAnswerUpdate(streamedAnswer); - continue; - } else if (char === '<') { - isInsideTag = true; - currentTag = ''; - currentContent = ''; - } else if (char === '>') { - isInsideTag = false; - if (currentTag.startsWith('/')) { - currentTag = ''; - } - } else if (isInsideTag) { - currentTag += char; - } else if (currentTag === 'thought' || currentTag === 'action_input_description') { - currentContent += char; - const current_info = this.processingInfo.find(info => info.index === this.processingNumber); - if (current_info) { - current_info.content = currentContent.trim(); - onProcessingUpdate(this.processingInfo); - } else { - this.processingInfo.push({ index: this.processingNumber, type: currentTag === 'thought' ? PROCESSING_TYPE.THOUGHT : PROCESSING_TYPE.ACTION, content: currentContent.trim() }); - onProcessingUpdate(this.processingInfo); - } - } - } - } - - return fullResponse; - } - - private async processAction(action: string, actionInput: any): Promise { - if (!(action in this.tools)) { - throw new Error(`Unknown action: ${action}`); - } - - const tool = this.tools[action]; - const args: Record = {}; - - // for (const paramName in tool.parameters) { - // if (actionInput[paramName] !== undefined) { - // if (Array.isArray(actionInput[paramName])) { - // // If the input is already an array, use it as is - // args[paramName] = actionInput[paramName]; - // } else if (typeof actionInput[paramName] === 'object' && actionInput[paramName] !== null) { - // // If the input is an object, check if it has multiple of the same tag - // const values = Object.values(actionInput[paramName]); - // if (values.length > 1) { - // // If there are multiple values, convert to an array - // args[paramName] = values; - // } else { - // // If there's only one value, use it directly - // args[paramName] = values[0]; - // } - // } else { - // // For single values, use them as is - // args[paramName] = actionInput[paramName]; - // } - // } else if (tool.parameters[paramName].required === 'true') { - // throw new Error(`Missing required parameter '${paramName}' for action '${action}'`); - // } - // } - - return await tool.execute(actionInput); - } - - private parseActionInputs(inputs: any): Record { - const parsedInputs: Record = {}; - for (const key in inputs) { - if (Array.isArray(inputs[key])) { - parsedInputs[key] = inputs[key].map((item: any) => item._text); - } else { - parsedInputs[key] = inputs[key]._text; - } - } - return parsedInputs; - } -} diff --git a/src/client/views/nodes/ChatBox/ChatBox.scss b/src/client/views/nodes/ChatBox/ChatBox.scss deleted file mode 100644 index 42f6a0d61..000000000 --- a/src/client/views/nodes/ChatBox/ChatBox.scss +++ /dev/null @@ -1,279 +0,0 @@ -@import url('https://fonts.googleapis.com/css2?family=Atkinson+Hyperlegible:ital,wght@0,400;0,700;1,400;1,700&display=swap'); - -$primary-color: #4a90e2; -$secondary-color: #f5f8fa; -$text-color: #333; -$light-text-color: #777; -$border-color: #e1e8ed; -$shadow-color: rgba(0, 0, 0, 0.1); -$transition: all 0.3s ease; -.chat-box { - display: flex; - flex-direction: column; - height: 100%; - background-color: #fff; - font-family: - 'Atkinson Hyperlegible', - -apple-system, - BlinkMacSystemFont, - 'Segoe UI', - Roboto, - Helvetica, - Arial, - sans-serif; - border-radius: 12px; - overflow: hidden; - box-shadow: 0 4px 12px $shadow-color; - position: relative; - - .chat-header { - background-color: $primary-color; - color: white; - padding: 15px; - text-align: center; - box-shadow: 0 2px 4px $shadow-color; - height: fit-content; - - h2 { - margin: 0; - font-size: 1.3em; - font-weight: 500; - } - } - - .chat-messages { - flex-grow: 1; - overflow-y: auto; - padding: 20px; - display: flex; - flex-direction: column; - gap: 10px; // Added to give space between elements - - &::-webkit-scrollbar { - width: 6px; - } - - &::-webkit-scrollbar-thumb { - background-color: $border-color; - border-radius: 3px; - } - } - - .chat-input { - display: flex; - padding: 20px; - border-top: 1px solid $border-color; - background-color: #fff; - - input { - flex-grow: 1; - padding: 12px 15px; - border: 1px solid $border-color; - border-radius: 24px; - font-size: 15px; - transition: $transition; - - &:focus { - outline: none; - border-color: $primary-color; - box-shadow: 0 0 0 2px rgba($primary-color, 0.2); - } - } - - .submit-button { - background-color: $primary-color; - color: white; - border: none; - border-radius: 50%; - width: 48px; - height: 48px; - margin-left: 10px; - cursor: pointer; - transition: $transition; - display: flex; - align-items: center; - justify-content: center; - - &:hover { - background-color: darken($primary-color, 10%); - } - - &:disabled { - background-color: $light-text-color; - cursor: not-allowed; - } - - .spinner { - height: 24px; - width: 24px; - border: 3px solid rgba(255, 255, 255, 0.3); - border-top: 3px solid #fff; - border-radius: 50%; - animation: spin 1s linear infinite; - display: flex; - align-items: center; - justify-content: center; - } - } - } - .citation-popup { - position: fixed; - bottom: 50px; - left: 50%; - transform: translateX(-50%); - background-color: rgba(0, 0, 0, 0.8); - color: white; - padding: 10px 20px; - border-radius: 10px; - box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2); - z-index: 1000; - animation: fadeIn 0.3s ease-in-out; - - p { - margin: 0; - font-size: 14px; - } - - @keyframes fadeIn { - from { - opacity: 0; - } - to { - opacity: 1; - } - } - } -} - -.message { - max-width: 80%; - margin-bottom: 20px; - padding: 16px 20px; - border-radius: 18px; - font-size: 15px; - line-height: 1.5; - box-shadow: 0 2px 4px $shadow-color; - word-wrap: break-word; // To handle long words - - &.user { - align-self: flex-end; - background-color: $primary-color; - color: white; - border-bottom-right-radius: 4px; - } - - &.chatbot { - align-self: flex-start; - background-color: $secondary-color; - color: $text-color; - border-bottom-left-radius: 4px; - } - - .toggle-info { - background-color: transparent; - color: $primary-color; - border: 1px solid $primary-color; - width: 100%; - height: fit-content; - border-radius: 8px; - padding: 10px 16px; - font-size: 14px; - cursor: pointer; - transition: $transition; - margin-top: 10px; - - &:hover { - background-color: rgba($primary-color, 0.1); - } - } -} - -.follow-up-questions { - margin-top: 15px; - - h4 { - font-size: 15px; - font-weight: 600; - margin-bottom: 10px; - } - - .questions-list { - display: flex; - flex-direction: column; - gap: 10px; - } - - .follow-up-button { - background-color: #fff; - color: $primary-color; - border: 1px solid $primary-color; - border-radius: 8px; - padding: 10px 16px; - font-size: 14px; - cursor: pointer; - transition: $transition; - text-align: left; - white-space: normal; - word-wrap: break-word; - width: 100%; - height: fit-content; - - &:hover { - background-color: $primary-color; - color: #fff; - } - } -} - -.citation-button { - display: inline-flex; - align-items: center; - justify-content: center; - width: 20px; - height: 20px; - border-radius: 50%; - background-color: rgba(0, 0, 0, 0.1); - color: $text-color; - font-size: 12px; - font-weight: bold; - margin-left: 5px; - cursor: pointer; - transition: $transition; - vertical-align: middle; - - &:hover { - background-color: rgba(0, 0, 0, 0.2); - } -} - -.uploading-overlay { - position: absolute; - top: 0; - left: 0; - right: 0; - bottom: 0; - background-color: rgba(255, 255, 255, 0.8); - display: flex; - justify-content: center; - align-items: center; - z-index: 1000; -} - -@keyframes spin { - 0% { - transform: rotate(0deg); - } - 100% { - transform: rotate(360deg); - } -} - -@media (max-width: 768px) { - .chat-box { - border-radius: 0; - } - - .message { - max-width: 90%; - } -} diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx deleted file mode 100644 index 98a2e6002..000000000 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ /dev/null @@ -1,756 +0,0 @@ -import { action, computed, makeObservable, observable, observe, reaction, runInAction, ObservableSet } from 'mobx'; -import { observer } from 'mobx-react'; -import OpenAI, { ClientOptions } from 'openai'; -import * as React from 'react'; -import { Doc, DocListCast } from '../../../../fields/Doc'; -import { CsvCast, DocCast, PDFCast, RTFCast, StrCast } from '../../../../fields/Types'; -import { DocumentType } from '../../../documents/DocumentTypes'; -import { Docs } from '../../../documents/Documents'; -import { LinkManager } from '../../../util/LinkManager'; -import { ViewBoxAnnotatableComponent } from '../../DocComponent'; -import { FieldView, FieldViewProps } from '../FieldView'; -import './ChatBox.scss'; -import MessageComponentBox from './MessageComponent'; -import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, SimplifiedChunk, ProcessingInfo } from './types'; -import { Vectorstore } from './vectorstore/Vectorstore'; -import { Agent } from './Agent'; -import dotenv from 'dotenv'; -import { DocData, DocViews } from '../../../../fields/DocSymbols'; -import { DocumentManager } from '../../../util/DocumentManager'; -import { v4 as uuidv4 } from 'uuid'; -import { DocUtils } from '../../../documents/DocUtils'; -import { ClientUtils } from '../../../../ClientUtils'; -import { ProgressBar } from './ProgressBar'; -import { DocumentView } from '../DocumentView'; -import { Networking } from '../../../Network'; - -dotenv.config(); - -/** - * ChatBox is the main class responsible for managing the interaction between the user and the assistant, - * handling documents, and integrating with OpenAI for tasks such as document analysis, chat functionality, - * and vector store interactions. - */ -@observer -export class ChatBox extends ViewBoxAnnotatableComponent() { - // MobX observable properties to track UI state and data - @observable history: AssistantMessage[] = []; - @observable.deep current_message: AssistantMessage | undefined = undefined; - @observable isLoading: boolean = false; - @observable uploadProgress: number = 0; - @observable currentStep: string = ''; - @observable expandedScratchpadIndex: number | null = null; - @observable inputValue: string = ''; - @observable private linked_docs_to_add: ObservableSet = observable.set(); - @observable private linked_csv_files: { filename: string; id: string; text: string }[] = []; - @observable private isUploadingDocs: boolean = false; - @observable private citationPopup: { text: string; visible: boolean } = { text: '', visible: false }; - - // Private properties for managing OpenAI API, vector store, agent, and UI elements - private openai: OpenAI; - private vectorstore_id: string; - private vectorstore: Vectorstore; - private agent: Agent; - private _oldWheel: HTMLDivElement | null = null; - private messagesRef: React.RefObject; - - /** - * Static method that returns the layout string for the field. - * @param fieldKey Key to get the layout string. - */ - public static LayoutString(fieldKey: string) { - return FieldView.LayoutString(ChatBox, fieldKey); - } - - /** - * Constructor initializes the component, sets up OpenAI, vector store, and agent instances, - * and observes changes in the chat history to save the state in dataDoc. - * @param props The properties passed to the component. - */ - constructor(props: FieldViewProps) { - super(props); - makeObservable(this); // Enable MobX observables - - // Initialize OpenAI, vectorstore, and agent - this.openai = this.initializeOpenAI(); - if (StrCast(this.dataDoc.vectorstore_id) == '') { - console.log('new_id'); - this.vectorstore_id = uuidv4(); - this.dataDoc.vectorstore_id = this.vectorstore_id; - } else { - this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id); - } - this.vectorstore = new Vectorstore(this.vectorstore_id, this.retrieveDocIds); - this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.createCSVInDash); - this.messagesRef = React.createRef(); - - // Reaction to update dataDoc when chat history changes - reaction( - () => - this.history.map((msg: AssistantMessage) => ({ - role: msg.role, - content: msg.content, - follow_up_questions: msg.follow_up_questions, - citations: msg.citations, - })), - serializableHistory => { - this.dataDoc.data = JSON.stringify(serializableHistory); - } - ); - } - - /** - * Adds a document to the vectorstore for AI-based analysis. - * Handles the upload progress and errors during the process. - * @param newLinkedDoc The new document to add. - */ - @action - addDocToVectorstore = async (newLinkedDoc: Doc) => { - this.uploadProgress = 0; - this.currentStep = 'Initializing...'; - this.isUploadingDocs = true; - - try { - // Add the document to the vectorstore - await this.vectorstore.addAIDoc(newLinkedDoc, this.updateProgress); - } catch (error) { - console.error('Error uploading document:', error); - this.currentStep = 'Error during upload'; - } finally { - this.isUploadingDocs = false; - this.uploadProgress = 0; - this.currentStep = ''; - } - }; - - /** - * Updates the upload progress and the current step in the UI. - * @param progress The percentage of the progress. - * @param step The current step name. - */ - @action - updateProgress = (progress: number, step: string) => { - console.log('Progress:', progress, step); - this.uploadProgress = progress; - this.currentStep = step; - }; - - /** - * Adds a CSV file for analysis by sending it to OpenAI and generating a summary. - * @param newLinkedDoc The linked document representing the CSV file. - * @param id Optional ID for the document. - */ - @action - addCSVForAnalysis = async (newLinkedDoc: Doc, id?: string) => { - console.log('adding csv file for analysis'); - if (!newLinkedDoc.chunk_simpl) { - // Convert document text to CSV data - const csvData: string = StrCast(newLinkedDoc.text); - console.log('CSV Data:', csvData); - - // Generate a summary using OpenAI API - const completion = await this.openai.chat.completions.create({ - messages: [ - { - role: 'system', - content: - 'You are an AI assistant tasked with summarizing the content of a CSV file. You will be provided with the data from the CSV file and your goal is to generate a concise summary that captures the main themes, trends, and key points represented in the data.', - }, - { - role: 'user', - content: `Please provide a comprehensive summary of the CSV file based on the provided data. Ensure the summary highlights the most important information, patterns, and insights. Your response should be in paragraph form and be concise. - CSV Data: - ${csvData} - ********** - Summary:`, - }, - ], - model: 'gpt-3.5-turbo', - }); - - const csvId = id ?? uuidv4(); - - // Add CSV details to linked files - this.linked_csv_files.push({ - filename: CsvCast(newLinkedDoc.data).url.pathname, - id: csvId, - text: csvData, - }); - - // Add a chunk for the CSV and assign the summary - const chunkToAdd = { - chunkId: csvId, - chunkType: CHUNK_TYPE.CSV, - }; - newLinkedDoc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] }); - newLinkedDoc.summary = completion.choices[0].message.content!; - } - }; - - /** - * Toggles the tool logs, expanding or collapsing the scratchpad at the given index. - * @param index Index of the tool log to toggle. - */ - @action - toggleToolLogs = (index: number) => { - this.expandedScratchpadIndex = this.expandedScratchpadIndex === index ? null : index; - }; - - /** - * Initializes the OpenAI API client using the API key from environment variables. - * @returns OpenAI client instance. - */ - initializeOpenAI() { - console.log(process.env.OPENAI_KEY); - const configuration: ClientOptions = { - apiKey: process.env.OPENAI_KEY, - dangerouslyAllowBrowser: true, - }; - return new OpenAI(configuration); - } - - /** - * Adds a scroll event listener to detect user scrolling and handle passive wheel events. - */ - addScrollListener = () => { - if (this.messagesRef.current) { - this.messagesRef.current.addEventListener('wheel', this.onPassiveWheel, { passive: false }); - } - }; - - /** - * Removes the scroll event listener from the chat messages container. - */ - removeScrollListener = () => { - if (this.messagesRef.current) { - this.messagesRef.current.removeEventListener('wheel', this.onPassiveWheel); - } - }; - - /** - * Scrolls the chat messages container to the bottom, ensuring the latest message is visible. - */ - scrollToBottom = () => { - if (this.messagesRef.current) { - this.messagesRef.current.scrollTop = this.messagesRef.current.scrollHeight; - } - }; - - /** - * Event handler for detecting wheel scrolling and stopping the event propagation. - * @param e The wheel event. - */ - onPassiveWheel = (e: WheelEvent) => { - if (this._props.isContentActive()) { - e.stopPropagation(); - } - }; - - /** - * Sends the user's input to OpenAI, displays the loading indicator, and updates the chat history. - * @param event The form submission event. - */ - @action - askGPT = async (event: React.FormEvent): Promise => { - event.preventDefault(); - this.inputValue = ''; - - // Extract the user's message - const textInput = event.currentTarget.elements.namedItem('messageInput') as HTMLInputElement; - const trimmedText = textInput.value.trim(); - - if (trimmedText) { - try { - textInput.value = ''; - // Add the user's message to the history - this.history.push({ - role: ASSISTANT_ROLE.USER, - content: [{ index: 0, type: TEXT_TYPE.NORMAL, text: trimmedText, citation_ids: null }], - processing_info: [], - }); - this.isLoading = true; - this.current_message = { - role: ASSISTANT_ROLE.ASSISTANT, - content: [], - citations: [], - processing_info: [], - }; - - // Define callbacks for real-time processing updates - const onProcessingUpdate = (processingUpdate: ProcessingInfo[]) => { - runInAction(() => { - if (this.current_message) { - this.current_message = { - ...this.current_message, - processing_info: processingUpdate, - }; - } - }); - this.scrollToBottom(); - }; - - const onAnswerUpdate = (answerUpdate: string) => { - runInAction(() => { - if (this.current_message) { - this.current_message = { - ...this.current_message, - content: [{ text: answerUpdate, type: TEXT_TYPE.NORMAL, index: 0, citation_ids: [] }], - }; - } - }); - }; - - // Send the user's question to the assistant and get the final message - const finalMessage = await this.agent.askAgent(trimmedText, onProcessingUpdate, onAnswerUpdate); - - // Update the history with the final assistant message - runInAction(() => { - if (this.current_message) { - this.history.push({ ...finalMessage }); - this.current_message = undefined; - this.dataDoc.data = JSON.stringify(this.history); - } - }); - } catch (err) { - console.error('Error:', err); - // Handle error in processing - this.history.push({ - role: ASSISTANT_ROLE.ASSISTANT, - content: [{ index: 0, type: TEXT_TYPE.ERROR, text: 'Sorry, I encountered an error while processing your request.', citation_ids: null }], - processing_info: [], - }); - } finally { - this.isLoading = false; - this.scrollToBottom(); - } - } - this.scrollToBottom(); - }; - - /** - * Updates the citations for a given message in the chat history. - * @param index The index of the message in the history. - * @param citations The list of citations to add to the message. - */ - @action - updateMessageCitations = (index: number, citations: Citation[]) => { - if (this.history[index]) { - this.history[index].citations = citations; - } - }; - - /** - * Adds a linked document from a URL for future reference and analysis. - * @param url The URL of the document to add. - * @param id The unique identifier for the document. - */ - @action - addLinkedUrlDoc = async (url: string, id: string) => { - const doc = Docs.Create.WebDocument(url, { data_useCors: true }); - console.log('Adding URL:', url); - - const linkDoc = Docs.Create.LinkDocument(this.Document, doc); - LinkManager.Instance.addLink(linkDoc); - let canDisplay; - - try { - // Fetch the URL content through the proxy - const { data } = await Networking.PostToServer('/proxyFetch', { url }); - - // Simulating header behavior since we can't fetch headers via proxy - const xFrameOptions = data.headers?.['x-frame-options']; - - if (xFrameOptions && xFrameOptions.toUpperCase() === 'SAMEORIGIN') { - console.log('URL cannot be displayed in an iframe:', url); - canDisplay = false; - } else { - console.log('URL can be displayed in an iframe:', url); - canDisplay = true; - } - } catch (error) { - console.error('Error fetching the URL from the server:', error); - } - - const chunkToAdd = { - chunkId: id, - chunkType: CHUNK_TYPE.URL, - url: url, - canDisplay: canDisplay, - }; - - doc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] }); - }; - - /** - * Getter to retrieve the current user's name from the client utils. - */ - @computed - get userName() { - return ClientUtils.CurrentUserEmail; - } - - /** - * Creates a CSV document in the dashboard and adds it for analysis. - * @param url The URL of the CSV. - * @param title The title of the CSV document. - * @param id The unique ID for the document. - * @param data The CSV data content. - */ - @action - createCSVInDash = async (url: string, title: string, id: string, data: string) => { - console.log('Creating CSV in Dash:', url, title); - const doc = DocCast(await DocUtils.DocumentFromType('csv', url, { title: title, text: RTFCast(data) })); - - const linkDoc = Docs.Create.LinkDocument(this.Document, doc); - LinkManager.Instance.addLink(linkDoc); - - doc && this._props.addDocument?.(doc); - await DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); - - this.addCSVForAnalysis(doc, id); - }; - - /** - * Event handler to manage citations click in the message components. - * @param citation The citation object clicked by the user. - */ - @action - handleCitationClick = (citation: Citation) => { - console.log('Citation clicked:', citation); - const currentLinkedDocs: Doc[] = this.linkedDocs; - - const chunkId = citation.chunk_id; - - // Loop through the linked documents to find the matching chunk and handle its display - for (let doc of currentLinkedDocs) { - if (doc.chunk_simpl) { - const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] }; - const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkId === chunkId); - if (foundChunk) { - // Handle different types of chunks (image, text, table, etc.) - switch (foundChunk.chunkType) { - case CHUNK_TYPE.IMAGE: - case CHUNK_TYPE.TABLE: - const values = foundChunk.location?.replace(/[\[\]]/g, '').split(','); - - if (values?.length !== 4) { - console.error('Location string must contain exactly 4 numbers'); - return; - } - - const x1 = parseFloat(values[0]) * Doc.NativeWidth(doc); - const y1 = parseFloat(values[1]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc); - const x2 = parseFloat(values[2]) * Doc.NativeWidth(doc); - const y2 = parseFloat(values[3]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc); - - const annotationKey = Doc.LayoutFieldKey(doc) + '_annotations'; - - const existingDoc = DocListCast(doc[DocData][annotationKey]).find(d => d.citation_id === citation.citation_id); - const highlightDoc = existingDoc ?? this.createImageCitationHighlight(x1, y1, x2, y2, citation, annotationKey, doc); - - DocumentManager.Instance.showDocument(highlightDoc, { willZoomCentered: true }, () => {}); - break; - case CHUNK_TYPE.TEXT: - this.citationPopup = { text: citation.direct_text ?? 'No text available', visible: true }; - setTimeout(() => (this.citationPopup.visible = false), 3000); // Hide after 3 seconds - - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { - const firstView = Array.from(doc[DocViews])[0] as DocumentView; - firstView.ComponentView?.search?.(citation.direct_text ?? ''); - }); - break; - case CHUNK_TYPE.URL: - if (!foundChunk.canDisplay) { - window.open(StrCast(doc.displayUrl), '_blank'); - } else if (foundChunk.canDisplay) { - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); - } - break; - case CHUNK_TYPE.CSV: - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); - break; - default: - console.log('Chunk type not supported', foundChunk.chunkType); - break; - } - } - } - } - }; - - /** - * Creates an annotation highlight on a PDF document for image citations. - * @param x1 X-coordinate of the top-left corner of the highlight. - * @param y1 Y-coordinate of the top-left corner of the highlight. - * @param x2 X-coordinate of the bottom-right corner of the highlight. - * @param y2 Y-coordinate of the bottom-right corner of the highlight. - * @param citation The citation object to associate with the highlight. - * @param annotationKey The key used to store the annotation. - * @param pdfDoc The document where the highlight is created. - * @returns The highlighted document. - */ - createImageCitationHighlight = (x1: number, y1: number, x2: number, y2: number, citation: Citation, annotationKey: string, pdfDoc: Doc): Doc => { - const highlight_doc = Docs.Create.FreeformDocument([], { - x: x1, - y: y1, - _width: x2 - x1, - _height: y2 - y1, - backgroundColor: 'rgba(255, 255, 0, 0.5)', - }); - highlight_doc[DocData].citation_id = citation.citation_id; - Doc.AddDocToList(pdfDoc[DocData], annotationKey, highlight_doc); - highlight_doc.annotationOn = pdfDoc; - Doc.SetContainer(highlight_doc, pdfDoc); - return highlight_doc; - }; - - /** - * Lifecycle method that triggers when the component updates. - * Ensures the chat is scrolled to the bottom when new messages are added. - */ - componentDidUpdate() { - this.scrollToBottom(); - } - - /** - * Lifecycle method that triggers when the component mounts. - * Initializes scroll listeners, sets up document reactions, and loads chat history from dataDoc if available. - */ - componentDidMount() { - this._props.setContentViewBox?.(this); - if (this.dataDoc.data) { - try { - const storedHistory = JSON.parse(StrCast(this.dataDoc.data)); - runInAction(() => { - this.history.push( - ...storedHistory.map((msg: AssistantMessage) => ({ - role: msg.role, - content: msg.content, - follow_up_questions: msg.follow_up_questions, - citations: msg.citations, - })) - ); - }); - } catch (e) { - console.error('Failed to parse history from dataDoc:', e); - } - } else { - // Default welcome message - runInAction(() => { - this.history.push({ - role: ASSISTANT_ROLE.ASSISTANT, - content: [ - { - index: 0, - type: TEXT_TYPE.NORMAL, - text: `Hey, ${this.userName()}! Welcome to Your Friendly Assistant. Link a document or ask questions to get started.`, - citation_ids: null, - }, - ], - processing_info: [], - }); - }); - } - - // Set up reactions for linked documents - reaction( - () => { - const linkedDocs = LinkManager.Instance.getAllRelatedLinks(this.Document) - .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) - .map(d => DocCast(d?.annotationOn, d)) - .filter(d => d); - return linkedDocs; - }, - linked => linked.forEach(doc => this.linked_docs_to_add.add(doc)) - ); - - // Observe changes to linked documents and handle document addition - observe(this.linked_docs_to_add, change => { - if (change.type === 'add') { - if (PDFCast(change.newValue.data)) { - this.addDocToVectorstore(change.newValue); - } else if (CsvCast(change.newValue.data)) { - this.addCSVForAnalysis(change.newValue); - } - } else if (change.type === 'delete') { - console.log('Deleted docs: ', change.oldValue); - } - }); - this.addScrollListener(); - } - - /** - * Lifecycle method that triggers when the component unmounts. - * Removes scroll listeners to avoid memory leaks. - */ - componentWillUnmount() { - this.removeScrollListener(); - } - - /** - * Getter that retrieves all linked documents for the current document. - */ - @computed - get linkedDocs() { - return LinkManager.Instance.getAllRelatedLinks(this.Document) - .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) - .map(d => DocCast(d?.annotationOn, d)) - .filter(d => d); - } - - /** - * Getter that retrieves document IDs of linked documents that have AI-related content. - */ - @computed - get docIds() { - return LinkManager.Instance.getAllRelatedLinks(this.Document) - .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) - .map(d => DocCast(d?.annotationOn, d)) - .filter(d => d) - .filter(d => d.ai_doc_id) - .map(d => StrCast(d.ai_doc_id)); - } - - /** - * Getter that retrieves summaries of all linked documents. - */ - @computed - get summaries(): string { - return ( - LinkManager.Instance.getAllRelatedLinks(this.Document) - .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) - .map(d => DocCast(d?.annotationOn, d)) - .filter(d => d) - .filter(d => d.summary) - .map((doc, index) => { - if (PDFCast(doc.data)) { - return `${doc.summary}`; - } else if (CsvCast(doc.data)) { - return `${doc.summary}`; - } else { - return `${index + 1}) ${doc.summary}`; - } - }) - .join('\n') + '\n' - ); - } - - /** - * Getter that retrieves all linked CSV files for analysis. - */ - @computed - get linkedCSVs(): { filename: string; id: string; text: string }[] { - return this.linked_csv_files; - } - - /** - * Getter that formats the entire chat history as a string for the agent's system message. - */ - @computed - get formattedHistory(): string { - let history = '\n'; - for (const message of this.history) { - history += `<${message.role}>${message.content.map(content => content.text).join(' ')}`; - if (message.loop_summary) { - history += `${message.loop_summary}`; - } - history += `\n`; - } - history += ''; - return history; - } - - // Other helper methods for retrieving document data and processing - - retrieveSummaries = () => { - return this.summaries; - }; - - retrieveCSVData = () => { - return this.linkedCSVs; - }; - - retrieveFormattedHistory = () => { - return this.formattedHistory; - }; - - retrieveDocIds = () => { - return this.docIds; - }; - - /** - * Handles follow-up questions when the user clicks on them. - * Automatically sets the input value to the clicked follow-up question. - * @param question The follow-up question clicked by the user. - */ - @action - handleFollowUpClick = (question: string) => { - console.log('Follow-up question clicked:', question); - this.inputValue = question; - }; - - /** - * Renders the chat interface, including the message list, input field, and other UI elements. - */ - render() { - return ( -
- {this.isUploadingDocs && ( -
-
- -
{this.currentStep}
-
-
- )} -
-

{this.userName()}'s AI Assistant

-
-
- {this.history.map((message, index) => ( - - ))} - {this.current_message && ( - - )} -
-
- (this.inputValue = e.target.value)} /> - -
- {/* Popup for citation */} - {this.citationPopup.visible && ( -
-

- Text from your document: {this.citationPopup.text} -

-
- )} -
- ); - } -} - -/** - * Register the ChatBox component as the template for CHAT document types. - */ -Docs.Prototypes.TemplateMap.set(DocumentType.CHAT, { - layout: { view: ChatBox, dataField: 'data' }, - options: { acl: '', chat: '', chat_history: '', chat_thread_id: '', chat_assistant_id: '', chat_vector_store_id: '' }, -}); diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/ChatBox/MessageComponent.tsx deleted file mode 100644 index 812e52ee0..000000000 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ /dev/null @@ -1,105 +0,0 @@ -import React, { useState } from 'react'; -import { observer } from 'mobx-react'; -import { AssistantMessage, Citation, MessageContent, PROCESSING_TYPE, ProcessingInfo, TEXT_TYPE } from './types'; -import ReactMarkdown from 'react-markdown'; - -interface MessageComponentProps { - message: AssistantMessage; - index: number; - onFollowUpClick: (question: string) => void; - onCitationClick: (citation: Citation) => void; - updateMessageCitations: (index: number, citations: Citation[]) => void; -} - -const MessageComponentBox: React.FC = function ({ message, index, onFollowUpClick, onCitationClick, updateMessageCitations }) { - const [dropdownOpen, setDropdownOpen] = useState(false); - - const renderContent = (item: MessageContent) => { - const i = item.index; - //console.log('item', item, 'index', i); - if (item.type === TEXT_TYPE.GROUNDED) { - const citation_ids = item.citation_ids || []; - return ( - - {item.text} - {citation_ids.map((id, idx) => { - const citation = message.citations?.find(c => c.citation_id === id); - if (!citation) return null; - return ( - - ); - })} - - ); - } else if (item.type === TEXT_TYPE.NORMAL) { - return ( - - {item.text} - - ); - } else if ('query' in item) { - return ( - - {JSON.stringify(item.query)} - - ); - } else { - return ( - - {JSON.stringify(item)} - - ); - } - }; - - const hasProcessingInfo = message.processing_info && message.processing_info.length > 0; - - const renderProcessingInfo = (info: ProcessingInfo) => { - if (info.type === PROCESSING_TYPE.THOUGHT) { - return ( -
- Thought: {info.content} -
- ); - } else if (info.type === PROCESSING_TYPE.ACTION) { - return ( -
- Action: {info.content} -
- ); - } else { - return null; - } - }; - - return ( -
- {hasProcessingInfo && ( -
- - {dropdownOpen &&
{message.processing_info.map(renderProcessingInfo)}
} -
-
- )} -
{message.content && message.content.map(messageFragment => {renderContent(messageFragment)})}
- {message.follow_up_questions && message.follow_up_questions.length > 0 && ( -
-

Follow-up Questions:

-
- {message.follow_up_questions.map((question, idx) => ( - - ))} -
-
- )} -
- ); -}; - -export default observer(MessageComponentBox); diff --git a/src/client/views/nodes/ChatBox/ProgressBar.scss b/src/client/views/nodes/ChatBox/ProgressBar.scss deleted file mode 100644 index ff5be4a38..000000000 --- a/src/client/views/nodes/ChatBox/ProgressBar.scss +++ /dev/null @@ -1,69 +0,0 @@ -.spinner-container { - display: flex; - flex-direction: column; - justify-content: center; - align-items: center; - height: 100%; -} - -.spinner { - width: 60px; - height: 60px; - position: relative; - margin-bottom: 20px; // Space between spinner and text -} - -.double-bounce1, -.double-bounce2 { - width: 100%; - height: 100%; - border-radius: 50%; - background-color: #4a90e2; - opacity: 0.6; - position: absolute; - top: 0; - left: 0; - animation: bounce 2s infinite ease-in-out; -} - -.double-bounce2 { - animation-delay: -1s; -} - -@keyframes bounce { - 0%, - 100% { - transform: scale(0); - } - 50% { - transform: scale(1); - } -} - -.uploading-overlay { - position: absolute; - top: 0; - left: 0; - right: 0; - bottom: 0; - background-color: rgba(255, 255, 255, 0.8); - display: flex; - align-items: center; - justify-content: center; - z-index: 1000; -} - -.progress-container { - display: flex; - flex-direction: column; - align-items: center; - text-align: center; -} - -.step-name { - font-size: 18px; - color: #333; - text-align: center; - width: 100%; - margin-top: -10px; // Adjust to move the text closer to the spinner -} diff --git a/src/client/views/nodes/ChatBox/ProgressBar.tsx b/src/client/views/nodes/ChatBox/ProgressBar.tsx deleted file mode 100644 index 0aa07213f..000000000 --- a/src/client/views/nodes/ChatBox/ProgressBar.tsx +++ /dev/null @@ -1,13 +0,0 @@ -import React from 'react'; -import './ProgressBar.scss'; - -export const ProgressBar: React.FC = () => { - return ( -
-
-
-
-
-
- ); -}; diff --git a/src/client/views/nodes/ChatBox/prompts.ts b/src/client/views/nodes/ChatBox/prompts.ts deleted file mode 100644 index 0a356189b..000000000 --- a/src/client/views/nodes/ChatBox/prompts.ts +++ /dev/null @@ -1,465 +0,0 @@ -// prompts.ts - -import { Tool } from './types'; - -export function getReactPrompt(tools: Tool[], summaries: () => string, chatHistory: string): string { - const toolDescriptions: string = tools - .map( - tool => ` - - ${tool.name} - ${tool.briefSummary} - - ` - ) - .join('\n'); - - return ` - - You are an advanced AI assistant equipped with various tools to answer user queries accurately and efficiently. Your task is to provide a comprehensive response based on the user's prompt using available tools, chat history, and provided information. Follow these guidelines meticulously to ensure the accuracy and structure of your response. - - - - **MOST IMPORTANT**: Always output responses within stage number tags, using the stage number and the system role as the root tag (e.g., , , etc.). This is crucial and should never be overlooked. - **STOP after every stage and wait for the system to provide the next input (e.g., action rules or observations). - Only output **ONE stage at a time** in your responses. Do not skip stages or provide multiple stages at once. Thus, you should only output even stage number root tags. - Always structure your responses using valid, well-formed XML with properly nested tags. - If a tool is needed, ALWAYS select the most appropriate tool based on the user's query. - If the query could relate to user documents or require external information (e.g., RAG, search + website scraping, data analysis), USE the appropriate tool to gather that information. - If there are no user docs or the user docs have not yielded helpful information, use the search tool to find websites followed by the website scraper tool to get useful infromation from one of those websites. You can use the website scraper (or even the search tool), multiple times to find information from multiple websites either from the same search or different searches. - Ensure at the end of every final answer, you provide exactly three follow-up questions from the user's perspective—from the perspective that they are asking the question. - Always follow the response structure provided in the instructions. - If a tool doesn't work—or yield helpful results—after two tries, EITHER use another tool or proceed with the response and ask the user for more information or clarification or let them know you cannot answer their question and why. DO NOT CONTINUE WITH THE SAME TOOL 3 TIMES. - Use multiple tools in conjunction with each other to provide a comprehensive answer to the user's query, if needed (i.e. for the prompt "create a CSV showing historical bird migration trends", you could use the search tool and the webscraper tool to get the info, and then use the create CSV tool to create the CSV) - - - - - When providing your final response, use the following structure: - - - <grounded_text> - Wrap text that is derived from tool-based or chunk-based information within these tags, ensuring proper citation. - <normal_text> - Wrap text that is not derived from tool-based or chunk-based information within these tags. - - <citation> - Provide citations for each grounded text, referencing the tool or chunk used. - - - <question> - Include exactly three follow-up questions from the user's perspective within these tags. - - - <loop_summary> - Provide a summary of the actions and tools used by the assistant throughout the interaction within these tags. - - - - - - Wrap all information derived from tools (e.g., RAG, search + website scraping, data analysis)—which will be provided in chunks—in <grounded_text> tags. - DO NOT PUT ANYTHING THAT IS NOT DIRECTLY DERIVED FROM TOOLS OR CHUNKS IN <grounded_text> TAGS. - Use a single <grounded_text> tag for sequential and closely related information that references the same citation. - If other citations are used sequentially, create new <grounded_text> tags. - Ensure each <grounded_text> tag has corresponding citations (up to three, and one is fine). Separate multiple citation indices with commas. - Grounded text can be as short as a few words or as long as several sentences. - Avoid overlapping or nesting <grounded_text> tags; use sequential tags instead. - Grounded text tags should always have a citation_index attribute that references a citation index number that the text is grounded in. - Content within the <grounded_text> tags should be in Markdown format. - - - - Wrap all text that is not derived from tools or chunks in <normal_text> tags (any text outputted in the answer that is not in a <grounded_text> tag should be within a normal text tag). - Ensure that these tags are used for your reasoning, background knowledge, or general information that does not require a citation. - Do not use <normal_text> tags for information that needs grounding or citation. - Anything that is in any user docs should be grounded text and cited, not normal text, even if it is background or general information. - Content within the <normal_text> tags should be in Markdown format. - - - - Create a unique citation for each distinct piece of information from tools or chunks that is used to support <grounded_text>. - Ensure each citation has a unique index number. - Specify the correct type: "text", "image", "table", "csv", or "url". - For text-based information, include only the relevant subset of the original information that the <grounded_text> is based on. - For image, table, csv, or url citation types, leave the citation content empty. - ALL CITATIONS MUST use the chunk_id field to reference the source, whether it's from RAG, search + website scraping, data analysis, or any other tool. - One citation can be used for multiple <grounded_text> tags if they are based on the same tool or chunk information. - !!!DO NOT OVERCITE - only include citations for information that is directly relevant to the <grounded_text>. - - - - Analyze the user's query carefully. - Determine whether a tool is required to answer the query accurately. - If a tool is necessary: - - Select the most appropriate tool. - Use the <action> tag to specify the tool. - End your response after the <action> tag and wait for action rules to be provided. - Based on the action rules, provide the necessary tool parameters within the <inputs>. The <inputs> tag should be within the <action_input> tag, and should follow an <action_description> tag that contains a brief description of what you're doing with the action. - For each input, you may provide as many different iterations of the same tag (i.e. to provide multiple inputs to the tool) as is specified in the input's max_input's field. - End your response again and wait for the observation from the tool. - - If no tool is needed, use the 'no_tool' action but still follow the same response structure. - If the query might relate to user documents or requires external information, **ALWAYS** use the appropriate tool to retrieve the information (either rag or dataAnalysis). - Once all observations are collected, or if no tool was needed, provide your comprehensive answer within the <answer> tag, using the <grounded_text> and <normal_text> tags as required. - - - - Your final <answer> tag must contain: - - The complete answer to the user's query, with grounded information wrapped in <grounded_text> tags and general information wrapped in <normal_text> tags. - Exactly three follow-up questions written from the user's perspective, enclosed within <follow_up_questions> tags. - A concise <loop_summary> that describes the actions and tools used throughout the interaction. - - - - - ${toolDescriptions} - If no external tool is required to answer the question, use the 'no_tool' action. However, if the query might relate to user documents or require external information, do not use 'no_tool'—instead, use the appropriate tool (RAG, search + website scraping, data analysis), even if unsure. - - - - ENSURE THAT YOU ONLY USE TOOLS THAT ANALYZE OR OTHERWISE USE USER DOCS IF THE QUERY APPLIES TO ONE OF THESE USER SUMMARIES (AT LEAST SOMEWHAT). IF THERE ARE NO SUMMARIES, THERE ARE NO USER DOCUMENTS. - - ${summaries()} - - - - - These examples are not, by any means, exhaustive in terms of how tools can be used in conjunction with one another. They simply are to provide you with examples of how to structure your outputs and use some of the tools in some contexts. - YOU ONLY OUTPUT THE ASSISTANT STAGES: - - - ***SYSTEM MESSAGE ELIDED*** - - - Could you provide a detailed overview of the 2010 Vancouver Winter Olympics's impact, including the overall summary of the games, key moments from official statements, and how the medal count compared across countries? - - - - - Since the user has Olympics related docs, I will use the RAG tool to find relevant information from the user's documents, specifically focusing on key moments and statements from an official press release. - - rag - - - - ***Action rules elided*** - - - - - Searching user documents for official statements and key moments of the 2010 Vancouver Winter Olympics. - - - The user is asking for key moments and statements from official sources regarding the 2010 Vancouver Winter Olympics. Search the provided documents for any press releases or official statements that highlight significant events, achievements, or noteworthy aspects of the games. - - - - - - - - PRESS RELEASE: 2010 VANCOUVER WINTER OLYMPICS - - Vancouver, BC - - - The 2010 Winter Olympics, officially known as the XXI Olympic Winter Games, took place in Vancouver, British Columbia, Canada, from February 12 to 28, 2010. It featured 86 events in 15 disciplines across 7 sports, with 2,566 athletes from 82 National Olympic Committees participating. This edition of the Winter Olympics was notable for being the first hosted by Canada since the 1988 Winter Olympics in Calgary. - - Overall, the 2010 Winter Olympics have been hailed as a monumental success, showcasing the best of Canadian culture and athletic prowess. The Opening Ceremony, held on February 12, 2010, was a spectacular celebration of the nation's history and diversity... - - Key Moments: - - The Canadian men's and women's ice hockey teams both claimed gold medals, an achievement that brought immense pride to the host nation. - - Tragedy struck early in the games with the death of Georgian luger Nodar Kumaritashvili during a training run. This incident led to increased scrutiny of safety measures across all events... - - ***More chunk content elided for brevity*** - - The games concluded on February 28, 2010, with a memorable Closing Ceremony that highlighted the achievements of the athletes and the spirit of unity that defined the 2010 Vancouver Winter Olympics. - - - - - - With information on key moments from the press release, I will now analyze the medal count data and economic impact data to provide a comprehensive overview of the 2010 Vancouver Winter Olympics's impact. - - dataAnalysis - - - - ***Action rules elided*** - - - - - Analyzing the medal count data and economic impact data for the 2010 Vancouver Winter Olympics to compare country performances and assess the games' overall impact. - - 2010_Vancouver_Olympics_Medal_Count.csv - 2010_Vancouver_Olympics_Economic_Impact.csv - - - - - - - Country,Gold,Silver,Bronze,Total - USA,9,15,13,37 - Germany,10,13,7,30 - Canada,14,7,5,26 - Norway,9,8,6,23 - Russia,3,5,7,15 - South Korea,6,6,2,14 - China,5,2,4,11 - Sweden,5,2,4,11 - France,2,3,6,11 - Austria,4,6,6,16 - - - Category,Value - Total Cost (CAD),7.7 billion - Revenue Generated (CAD),2.5 billion - Jobs Created,45000 - Tourism Increase (%),35 - Infrastructure Investment (CAD),1.9 billion - Post-Olympic Annual Tourism Revenue Increase (CAD),220 million - Venues Repurposed,8 - Carbon Footprint Offset (%),118 - - - - - - Now that I have gathered comprehensive information from the official press release, medal count data, and economic impact data, I will provide a detailed answer to the user's query, incorporating all aspects of the 2010 Vancouver Winter Olympics's impact. - - - - The 2010 Winter Olympics, officially known as the XXI Olympic Winter Games, took place in Vancouver, British Columbia, Canada, from February 12 to 28, 2010. It featured 86 events in 15 disciplines across 7 sports, with 2,566 athletes from 82 National Olympic Committees participating. - - - - The Opening Ceremony was one of the most spectacular in Olympic history, celebrating Canadian culture and heritage. A significant highlight for the host nation was the victory of the Canadian men's and women's ice hockey teams, both securing gold medals, which was a source of immense national pride. - - - - In terms of medal performance, the United States led the overall tally with 37 medals (9 gold, 15 silver, 13 bronze), followed by Germany with 30 medals (10 gold, 13 silver, 7 bronze). Canada, while placing third in total medals with 26, achieved a record number of 14 gold medals, the highest of any country in the 2010 games. Other notable performances came from Norway (23 medals), Russia (15 medals), and South Korea (14 medals). - - - - The economic impact of the 2010 Vancouver Winter Olympics was significant. The total cost of hosting the games was approximately 7.7 billion CAD, while generating revenue of 2.5 billion CAD. The games created about 45,000 jobs and led to a 35% increase in tourism during the event. The long-term benefits include an annual increase of 220 million CAD in tourism revenue and 1.9 billion CAD invested in infrastructure. - - - - From an environmental perspective, the Vancouver Olympics made efforts to be sustainable, with 118% of the carbon footprint offset and 8 venues repurposed after the games, demonstrating a commitment to leaving a positive legacy. - - - - However, the games were also marked by tragedy with the unfortunate death of Georgian luger Nodar Kumaritashvili during a training run, which deeply affected the Olympic community and led to increased scrutiny of safety measures across all events. - - - - Despite this tragedy, the 2010 Vancouver Winter Olympics were generally hailed as a monumental success, showcasing the best of Canadian culture and athletic prowess. The games concluded on February 28, 2010, with a memorable Closing Ceremony that highlighted the achievements of the athletes and the spirit of unity that defined these Olympics. - - - - - - - - - - - - - - How did the 2010 Vancouver Winter Olympics compare to previous Winter Olympics in terms of economic impact and sustainability efforts? - What specific infrastructure improvements were made in Vancouver as a result of hosting the 2010 Winter Olympics? - How did the performance of Canadian athletes in the 2010 Vancouver Winter Olympics compare to their performance in previous Winter Olympics? - - - The assistant used the RAG tool to gather information from the user's documents, focusing on key moments and official statements. It then used the dataAnalysis tool to analyze the medal count and economic impact data, providing a comprehensive overview of the 2010 Vancouver Winter Olympics's impact. - - - - - - - ***SYSTEM MESSAGE ELIDED*** - - - What are the best hiking trails in Yosemite National Park? And which are the safest? - - - - To provide comprehensive information about the best hiking trails in Yosemite National Park, I'll use the search tool to find relevant websites with hiking recommendations, safety information, and general park information. - - searchTool - - - ***Action rules elided*** - - - - Searching the web for information about the best and safest hiking trails in Yosemite National Park. - - Best hiking trails Yosemite National Park - Safest hiking trails Yosemite National Park - Information about hiking in Yosemite National Park - - - - - - - https://www.nps.gov/yose/planyourvisit/hiking.htm - Official National Park Service website with hiking information for Yosemite - - - https://www.alltrails.com/parks/us/california/yosemite-national-park - AllTrails page featuring user-rated hiking trails in Yosemite - - - https://www.yosemitehikes.com/ - Comprehensive guide to Yosemite hikes with detailed trail information - - - - - - I've found several relevant websites about hiking in Yosemite National Park. I'll start by scraping content from the official National Park Service website for the most reliable information, then move on to AllTrails for user ratings and reviews, and finally to YosemiteHikes.com for more detailed trail descriptions. - - websiteInfoScraper - - - ***Action rules elided*** - - - - Scraping information from three websites about hiking trails in Yosemite National Park. - - https://www.nps.gov/yose/planyourvisit/hiking.htm - https://www.alltrails.com/parks/us/california/yosemite-national-park - https://www.yosemitehikes.com/ - - - - - - - ***NPS general planning website image elided*** - - - ***AllTrails website image elided*** - - - ***YosemiteHikes.com website image elided*** - - - - - - The information gathered from the three websites provides a good overview of the best hiking trails and safety considerations in Yosemite National Park. However, I noticed that there's limited information about seasonal trail conditions and closures, which could be crucial for planning a safe hike. To address this, I'll scrape additional information from the Yosemite National Park's current conditions page. - - websiteInfoScraper - - - ***Action rules elided*** - - - - Scraping information about current trail conditions and closures in Yosemite National Park. - - https://www.nps.gov/yose/planyourvisit/conditions.htm - - - - - - - ***NPS conditions website image elided*** - - - - - - Now that I have additional information about current trail conditions and seasonal closures, I can provide a more comprehensive and up-to-date answer to the user's query about the best and safest hiking trails in Yosemite National Park. - - - - Yosemite National Park offers a wide range of hiking trails suitable for various skill levels. Some of the best hiking trails in Yosemite, based on the information gathered, include: - - 1. Mist Trail to Vernal Fall: A moderate 3-mile round-trip hike in Yosemite Valley, offering close-up views of waterfalls... ***REST ELIDED FOR BREVITY*** - - - ***CONTENT ELIDED FOR BREVITY*** - - - - ***CONTENT ELIDED FOR BREVITY*** - - - Regarding safety, some of the safest trails for hikers of all skill levels include: - - - 1. Lower Yosemite Fall Trail: An easy 1-mile loop in Yosemite Valley... ***REST ELIDED FOR BREVITY*** - - - - ***CONTENT ELIDED FOR BREVITY*** - - - To ensure a safe hiking experience in Yosemite, regardless of the trail you choose, follow these safety tips: - - - 1. Stay on marked trails at all times... - ***REST ELIDED FOR BREVITY*** - - - Remember that even the "safest" trails can pose risks if proper precautions are not taken. Always hike within your ability level and be prepared for the specific conditions of the trail you choose. It's essential to check for daily updates on trail conditions before starting your hike, as they can change rapidly, especially during spring and fall. - - - - https://www.nps.gov/yose/planyourvisit/hiking.htm - - - https://www.alltrails.com/parks/us/california/yosemite-national-park - - - https://www.yosemitehikes.com/ - - - https://www.nps.gov/yose/planyourvisit/conditions.htm - - - - What is the best time of year to hike in Yosemite National Park? - Are there any special permits required for hiking certain trails in Yosemite? - What wildlife might I encounter while hiking in Yosemite, and how should I prepare for potential encounters? - - - The assistant used the search tool to find relevant websites, then scraped information from those sites using the websiteInfoScraper tool. Additional information about current trail conditions was gathered from the NPS website to provide a comprehensive and safe hiking guide. - - - - - - - - ${chatHistory} - Use this history for context when appropriate. - - - - Now, process the user's query and provide your response following the format and rules outlined above. Ensure your final answer is comprehensive, correctly cited, and entirely contained within the structured tags. Do not get stuck in infinite loops and keep responses concise, grounded, and most importantly, HELPFUL AND USEFUL! - - -`; -} - -export function getSummarizedChunksPrompt(chunks: string): string { - return `Please provide a comprehensive summary of what you think the document from which these chunks originated. - Ensure the summary captures the main ideas and key points from all provided chunks. Be concise and brief and only provide the summary in paragraph form. - - Text chunks: - \`\`\` - ${chunks} - \`\`\``; -} - -export function getSummarizedSystemPrompt(): string { - return 'You are an AI assistant tasked with summarizing a document. You are provided with important chunks from the document and provide a summary, as best you can, of what the document will contain overall. Be concise and brief with your response.'; -} diff --git a/src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts b/src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts deleted file mode 100644 index 79b53b0a3..000000000 --- a/src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts +++ /dev/null @@ -1,125 +0,0 @@ -import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getChunkType, ProcessingInfo } from '../types'; -import { v4 as uuid } from 'uuid'; - -export class AnswerParser { - static parse(xml: string, processingInfo: ProcessingInfo[]): AssistantMessage { - const answerRegex = /([\s\S]*?)<\/answer>/; - const citationsRegex = /([\s\S]*?)<\/citations>/; - const citationRegex = /([\s\S]*?)<\/citation>/g; - const followUpQuestionsRegex = /([\s\S]*?)<\/follow_up_questions>/; - const questionRegex = /(.*?)<\/question>/g; - const groundedTextRegex = /([\s\S]*?)<\/grounded_text>/g; - const normalTextRegex = /([\s\S]*?)<\/normal_text>/g; - const loopSummaryRegex = /([\s\S]*?)<\/loop_summary>/; - - const answerMatch = answerRegex.exec(xml); - const citationsMatch = citationsRegex.exec(xml); - const followUpQuestionsMatch = followUpQuestionsRegex.exec(xml); - const loopSummaryMatch = loopSummaryRegex.exec(xml); - - if (!answerMatch) { - throw new Error('Invalid XML: Missing tag.'); - } - - let rawTextContent = answerMatch[1].trim(); - let content: AssistantMessage['content'] = []; - let citations: Citation[] = []; - let contentIndex = 0; - - // Remove citations and follow-up questions from rawTextContent - if (citationsMatch) { - rawTextContent = rawTextContent.replace(citationsMatch[0], '').trim(); - } - if (followUpQuestionsMatch) { - rawTextContent = rawTextContent.replace(followUpQuestionsMatch[0], '').trim(); - } - if (loopSummaryMatch) { - rawTextContent = rawTextContent.replace(loopSummaryMatch[0], '').trim(); - } - - // Parse citations - let citationMatch; - const citationMap = new Map(); - if (citationsMatch) { - const citationsContent = citationsMatch[1]; - while ((citationMatch = citationRegex.exec(citationsContent)) !== null) { - const [_, index, chunk_id, type, direct_text] = citationMatch; - const citation_id = uuid(); - citationMap.set(index, citation_id); - citations.push({ - direct_text: direct_text.trim(), - type: getChunkType(type), - chunk_id, - citation_id, - }); - } - } - - rawTextContent = rawTextContent.replace(normalTextRegex, '$1'); - - // Parse text content (normal and grounded) - let lastIndex = 0; - let match; - - while ((match = groundedTextRegex.exec(rawTextContent)) !== null) { - const [fullMatch, citationIndex, groundedText] = match; - - // Add normal text that is before the grounded text - if (match.index > lastIndex) { - const normalText = rawTextContent.slice(lastIndex, match.index).trim(); - if (normalText) { - content.push({ - index: contentIndex++, - type: TEXT_TYPE.NORMAL, - text: normalText, - citation_ids: null, - }); - } - } - - // Add grounded text - const citation_ids = citationIndex.split(',').map(index => citationMap.get(index) || ''); - content.push({ - index: contentIndex++, - type: TEXT_TYPE.GROUNDED, - text: groundedText.trim(), - citation_ids, - }); - - lastIndex = match.index + fullMatch.length; - } - - // Add any remaining normal text after the last grounded text - if (lastIndex < rawTextContent.length) { - const remainingText = rawTextContent.slice(lastIndex).trim(); - if (remainingText) { - content.push({ - index: contentIndex++, - type: TEXT_TYPE.NORMAL, - text: remainingText, - citation_ids: null, - }); - } - } - - let followUpQuestions: string[] = []; - if (followUpQuestionsMatch) { - const questionsText = followUpQuestionsMatch[1]; - let questionMatch; - while ((questionMatch = questionRegex.exec(questionsText)) !== null) { - followUpQuestions.push(questionMatch[1].trim()); - } - } - - const assistantResponse: AssistantMessage = { - role: ASSISTANT_ROLE.ASSISTANT, - content, - follow_up_questions: followUpQuestions, - citations, - processing_info: processingInfo, - loop_summary: loopSummaryMatch ? loopSummaryMatch[1].trim() : undefined, - }; - - return assistantResponse; - } -} diff --git a/src/client/views/nodes/ChatBox/response_parsers/StreamedAnswerParser.ts b/src/client/views/nodes/ChatBox/response_parsers/StreamedAnswerParser.ts deleted file mode 100644 index 3585cab4a..000000000 --- a/src/client/views/nodes/ChatBox/response_parsers/StreamedAnswerParser.ts +++ /dev/null @@ -1,73 +0,0 @@ -import { threadId } from 'worker_threads'; - -enum ParserState { - Outside, - InGroundedText, - InNormalText, -} - -export class StreamedAnswerParser { - private state: ParserState = ParserState.Outside; - private buffer: string = ''; - private result: string = ''; - private isStartOfLine: boolean = true; - - public parse(char: string): string { - switch (this.state) { - case ParserState.Outside: - if (char === '<') { - this.buffer = '<'; - } else if (char === '>') { - if (this.buffer.startsWith('') { - this.state = ParserState.Outside; - this.buffer = ''; - } else if (this.buffer.startsWith('') { - this.state = ParserState.Outside; - this.buffer = ''; - } else if (this.buffer.startsWith('<')) { - this.buffer += char; - } else { - this.processChar(char); - } - break; - } - - return this.result.trim(); - } - - private processChar(char: string): void { - if (this.isStartOfLine && char === ' ') { - // Skip leading spaces - return; - } - if (char === '\n') { - this.result += char; - this.isStartOfLine = true; - } else { - this.result += char; - this.isStartOfLine = false; - } - } - - public reset(): void { - this.state = ParserState.Outside; - this.buffer = ''; - this.result = ''; - this.isStartOfLine = true; - } -} diff --git a/src/client/views/nodes/ChatBox/tools.ts b/src/client/views/nodes/ChatBox/tools.ts deleted file mode 100644 index 4035280a8..000000000 --- a/src/client/views/nodes/ChatBox/tools.ts +++ /dev/null @@ -1,26 +0,0 @@ -import { DocCast } from '../../../../fields/Types'; -import { DocServer } from '../../../DocServer'; -import { Docs } from '../../../documents/Documents'; -import { DocUtils } from '../../../documents/DocUtils'; -import { TabDocView } from '../../collections/TabDocView'; -import { DocumentView } from '../DocumentView'; -import { OpenWhere } from '../OpenWhere'; - -export function retrieval(json: any): string { - return ''; -} - -export function create_collection(docView: DocumentView, document_ids: string[], title: string): string { - const docs = document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id))); - const collection = Docs.Create.FreeformDocument(docs, { title }); - docView._props.addDocTab(collection, OpenWhere.addRight); //in future, create popup prompting user where to add - return 'Collection created in Dash called ' + title; -} - -export function create_link(docView: DocumentView, document_ids: string[]): string { - //Make document_ids a size 2 array - const docs = document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id))); - const linkDoc = DocUtils.MakeLink(docs[0], docs[1], {})!; - DocumentView.linkCommonAncestor(linkDoc)?.ComponentView?.addDocument?.(linkDoc); - return 'Link created between ' + docs[0].title + ' and ' + docs[1].title; -} diff --git a/src/client/views/nodes/ChatBox/tools/BaseTool.ts b/src/client/views/nodes/ChatBox/tools/BaseTool.ts deleted file mode 100644 index 2e2267653..000000000 --- a/src/client/views/nodes/ChatBox/tools/BaseTool.ts +++ /dev/null @@ -1,24 +0,0 @@ -import { Tool } from '../types'; - -export abstract class BaseTool = Record> implements Tool { - constructor( - public name: string, - public description: string, - public parameters: Record, - public citationRules: string, - public briefSummary: string - ) {} - - abstract execute(args: T): Promise; - - getActionRule(): Record { - return { - [this.name]: { - name: this.name, - citationRules: this.citationRules, - description: this.description, - parameters: this.parameters, - }, - }; - } -} diff --git a/src/client/views/nodes/ChatBox/tools/CalculateTool.ts b/src/client/views/nodes/ChatBox/tools/CalculateTool.ts deleted file mode 100644 index 74b7ca27b..000000000 --- a/src/client/views/nodes/ChatBox/tools/CalculateTool.ts +++ /dev/null @@ -1,26 +0,0 @@ -import { BaseTool } from './BaseTool'; - -export class CalculateTool extends BaseTool<{ expression: string }> { - constructor() { - super( - 'calculate', - 'Perform a calculation', - { - expression: { - type: 'string', - description: 'The mathematical expression to evaluate', - required: 'true', - max_inputs: '1', - }, - }, - 'Provide a mathematical expression to calculate that would work with JavaScript eval().', - 'Runs a calculation and returns the number - uses JavaScript so be sure to use floating point syntax if necessary' - ); - } - - async execute(args: { expression: string }): Promise { - // Note: Using eval() can be dangerous. Consider using a safer alternative. - const result = eval(args.expression); - return [{ type: 'text', text: result.toString() }]; - } -} diff --git a/src/client/views/nodes/ChatBox/tools/CreateCSVTool.ts b/src/client/views/nodes/ChatBox/tools/CreateCSVTool.ts deleted file mode 100644 index 55015846b..000000000 --- a/src/client/views/nodes/ChatBox/tools/CreateCSVTool.ts +++ /dev/null @@ -1,51 +0,0 @@ -import { BaseTool } from './BaseTool'; -import { Networking } from '../../../../Network'; - -export class CreateCSVTool extends BaseTool<{ csvData: string; filename: string }> { - private _handleCSVResult: (url: string, filename: string, id: string, data: string) => void; - - constructor(handleCSVResult: (url: string, title: string, id: string, data: string) => void) { - super( - 'createCSV', - 'Creates a CSV file from raw CSV data and saves it to the server', - { - type: 'object', - properties: { - csvData: { - type: 'string', - description: 'A string of comma-separated values representing the CSV data.', - }, - filename: { - type: 'string', - description: 'The base name of the CSV file to be created. Should end in ".csv".', - }, - }, - required: ['csvData', 'filename'], - }, - 'Provide a CSV string and a filename to create a CSV file.', - 'Creates a CSV file from the provided CSV string and saves it to the server with a unique identifier, returning the file URL and UUID.' - ); - this._handleCSVResult = handleCSVResult; - } - - async execute(args: { csvData: string; filename: string }): Promise { - try { - console.log('Creating CSV file:', args.filename, ' with data:', args.csvData); - // Post the raw CSV data to the createCSV endpoint on the server - const { fileUrl, id } = await Networking.PostToServer('/createCSV', { filename: args.filename, data: args.csvData }); - - // Handle the result by invoking the callback - this._handleCSVResult(fileUrl, args.filename, id, args.csvData); - - return [ - { - type: 'text', - text: `File successfully created: ${fileUrl}. \nNow a CSV file with this data and the name ${args.filename} is available as a user doc.`, - }, - ]; - } catch (error) { - console.error('Error creating CSV file:', error); - throw new Error('Failed to create CSV file.'); - } - } -} diff --git a/src/client/views/nodes/ChatBox/tools/CreateCollectionTool.ts b/src/client/views/nodes/ChatBox/tools/CreateCollectionTool.ts deleted file mode 100644 index 573428179..000000000 --- a/src/client/views/nodes/ChatBox/tools/CreateCollectionTool.ts +++ /dev/null @@ -1,36 +0,0 @@ -import { DocCast } from '../../../../../fields/Types'; -import { DocServer } from '../../../../DocServer'; -import { Docs } from '../../../../documents/Documents'; -import { DocumentView } from '../../DocumentView'; -import { OpenWhere } from '../../OpenWhere'; -import { BaseTool } from './BaseTool'; - -export class GetDocsContentTool extends BaseTool<{ title: string; document_ids: string[] }> { - private _docView: DocumentView; - constructor(docView: DocumentView) { - super( - 'retrieveDocs', - 'Retrieves the contents of all Documents that the user is interacting with in Dash ', - { - title: { - type: 'string', - description: 'the title of the collection that you will be making', - required: 'true', - max_inputs: '1', - }, - }, - 'Provide a mathematical expression to calculate that would work with JavaScript eval().', - 'Runs a calculation and returns the number - uses JavaScript so be sure to use floating point syntax if necessary' - ); - this._docView = docView; - } - - async execute(args: { title: string; document_ids: string[] }): Promise { - // Note: Using eval() can be dangerous. Consider using a safer alternative. - const docs = args.document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id))); - const collection = Docs.Create.FreeformDocument(docs, { title: args.title }); - this._docView._props.addDocTab(collection, OpenWhere.addRight); //in future, create popup prompting user where to add - return [{ type: 'text', text: 'Collection created in Dash called ' + args.title }]; - } -} -//export function create_collection(docView: DocumentView, document_ids: string[], title: string): string {} diff --git a/src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts b/src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts deleted file mode 100644 index a12ee46e5..000000000 --- a/src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts +++ /dev/null @@ -1,59 +0,0 @@ -import { BaseTool } from './BaseTool'; - -export class DataAnalysisTool extends BaseTool<{ csv_file_name: string | string[] }> { - private csv_files_function: () => { filename: string; id: string; text: string }[]; - - constructor(csv_files: () => { filename: string; id: string; text: string }[]) { - super( - 'dataAnalysis', - 'Analyzes, and provides insights, from one or more CSV files', - { - csv_file_name: { - type: 'string', - description: 'Name(s) of the CSV file(s) to analyze', - required: 'true', - max_inputs: '3', - }, - }, - 'Provide the name(s) of up to 3 CSV files to analyze based on the user query and whichever available CSV files may be relevant.', - 'Provides the full CSV file text for your analysis based on the user query and the available CSV file(s). ' - ); - this.csv_files_function = csv_files; - } - - getFileContent(filename: string): string | undefined { - const files = this.csv_files_function(); - const file = files.find(f => f.filename === filename); - return file?.text; - } - - getFileID(filename: string): string | undefined { - const files = this.csv_files_function(); - const file = files.find(f => f.filename === filename); - return file?.id; - } - - async execute(args: { csv_file_name: string | string[] }): Promise { - const filenames = Array.isArray(args.csv_file_name) ? args.csv_file_name : [args.csv_file_name]; - const results = []; - - for (const filename of filenames) { - const fileContent = this.getFileContent(filename); - const fileID = this.getFileID(filename); - - if (fileContent && fileID) { - results.push({ - type: 'text', - text: `${fileContent}`, - }); - } else { - results.push({ - type: 'text', - text: `File not found: ${filename}`, - }); - } - } - - return results; - } -} diff --git a/src/client/views/nodes/ChatBox/tools/GetDocsTool.ts b/src/client/views/nodes/ChatBox/tools/GetDocsTool.ts deleted file mode 100644 index f970ca8ee..000000000 --- a/src/client/views/nodes/ChatBox/tools/GetDocsTool.ts +++ /dev/null @@ -1,29 +0,0 @@ -import { DocCast } from '../../../../../fields/Types'; -import { DocServer } from '../../../../DocServer'; -import { Docs } from '../../../../documents/Documents'; -import { DocumentView } from '../../DocumentView'; -import { OpenWhere } from '../../OpenWhere'; -import { BaseTool } from './BaseTool'; - -export class GetDocsTool extends BaseTool<{ title: string; document_ids: string[] }> { - private _docView: DocumentView; - constructor(docView: DocumentView) { - super( - 'retrieveDocs', - 'Retrieves the contents of all Documents that the user is interacting with in Dash', - {}, - 'No need to provide anything. Just run the tool and it will retrieve the contents of all Documents that the user is interacting with in Dash.', - 'Returns the the documents in Dash in JSON form. This will include the title of the document, the location in the FreeFormDocument, and the content of the document, any applicable data fields, the layout of the document, etc.' - ); - this._docView = docView; - } - - async execute(args: { title: string; document_ids: string[] }): Promise { - // Note: Using eval() can be dangerous. Consider using a safer alternative. - const docs = args.document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id))); - const collection = Docs.Create.FreeformDocument(docs, { title: args.title }); - this._docView._props.addDocTab(collection, OpenWhere.addRight); //in future, create popup prompting user where to add - return [{ type: 'text', text: 'Collection created in Dash called ' + args.title }]; - } -} -//export function create_collection(docView: DocumentView, document_ids: string[], title: string): string {} diff --git a/src/client/views/nodes/ChatBox/tools/NoTool.ts b/src/client/views/nodes/ChatBox/tools/NoTool.ts deleted file mode 100644 index 1f0830a77..000000000 --- a/src/client/views/nodes/ChatBox/tools/NoTool.ts +++ /dev/null @@ -1,18 +0,0 @@ -// tools/NoTool.ts -import { BaseTool } from './BaseTool'; - -export class NoTool extends BaseTool<{}> { - constructor() { - super( - 'no_tool', - 'Use this when no external tool or action is required to answer the question.', - {}, - 'When using the "no_tool" action, simply provide an empty element. The observation will always be "No tool used. Proceed with answering the question."', - 'Use when no external tool or action is required to answer the question.' - ); - } - - async execute(args: {}): Promise { - return [{ type: 'text', text: 'No tool used. Proceed with answering the question.' }]; - } -} diff --git a/src/client/views/nodes/ChatBox/tools/RAGTool.ts b/src/client/views/nodes/ChatBox/tools/RAGTool.ts deleted file mode 100644 index 544b9daba..000000000 --- a/src/client/views/nodes/ChatBox/tools/RAGTool.ts +++ /dev/null @@ -1,138 +0,0 @@ -import { BaseTool } from './BaseTool'; -import { Vectorstore } from '../vectorstore/Vectorstore'; -import { RAGChunk } from '../types'; -import * as fs from 'fs'; -import { Networking } from '../../../../Network'; -import { file } from 'jszip'; -import { ChatCompletion, ChatCompletionContentPart, ChatCompletionMessageParam } from 'openai/resources'; - -export class RAGTool extends BaseTool { - constructor(private vectorstore: Vectorstore) { - super( - 'rag', - 'Perform a RAG search on user documents', - { - hypothetical_document_chunk: { - type: 'string', - description: - "Detailed version of the prompt that is effectively a hypothetical document chunk that would be ideal to embed and compare to the vectors of real document chunks to fetch the most relevant document chunks to answer the user's query", - required: 'true', - }, - }, - ` - Your task is to provide a comprehensive response to the user's prompt based on the given chunks and chat history. Follow these structural guidelines meticulously: - - 1. Overall Structure: - - [Main content with grounded_text tags interspersed with normal plain text (information that is not derived from chunks' information)] - - [Individual citation tags] - - - [Three question tags] - - - - 2. Grounded Text Tag Structure: - - Basic format: - - [Your generated text based on information from a subset of a chunk (a citation's direct text)] - - - 3. Citation Tag Structure: - - [For text: relevant subset of original chunk] - [For image/table: leave empty] - - - 4. Detailed Grounded Text Guidelines: - a. Wrap all information derived from chunks in grounded_text tags. - b. DO NOT PUT ANYTHING THAT IS NOT DIRECTLY DERIVED FROM INFORMATION FROM CHUNKS (EITHER IMAGE, TABLE, OR TEXT) IN GROUNDED_TEXT TAGS. - c. Use a single grounded_text tag for suquential and closely related information that references the same citation. If other citations' information are used sequentially, create new grounded_text tags. - d. Ensure every grounded_text tag has up to a few corresponding citations (should not be more than 3 and only 1 is fine). Multiple citation indices should be separated by commas. - e. Grounded text can be as short as a few words or as long as several sentences. - f. Avoid overlapping or nesting grounded_text tags; instead, use sequential tags. - - 5. Detailed Citation Guidelines: - a. Create a unique citation for each distinct piece of information from the chunks that is used to support grounded_text. - b. ALL TEXT CITATIONS must have direct text in its element content (e.g. DIRECT TEXT HERE) that is a relevant SUBSET of the original text chunk that is being cited specifically. - c. DO NOT paraphrase or summarize the text; use the original text as much as possible. - d. DO NOT USE THE FULL TEXT CHUNK as the citation content; only use the relevant subset of the text that the grounded_text is base. AS SHORT AS POSSIBLE WHILE PROVIDING INFORMATION (ONE TO TWO SENTENCES USUALLY)! - e. Ensure each citation has a unique index number. - f. Specify the correct type: "text", "image", or "table". - g. For text chunks, the content of the citation should ALWAYS have the relevant subset of the original text that the grounded_text is based on. - h. For image/table chunks, leave the citation content empty. - i. One citation can be used for multiple grounded_text tags if they are based on the same chunk information. - j. !!!DO NOT OVERCITE - only include citations for information that is directly relevant to the grounded_text. - - 6. Structural Integrity Checks: - a. Ensure all opening tags have corresponding closing tags. - b. Verify that all grounded_text tags have valid citation_index attributes (they should be equal to the associated citation(s) index field—not their chunk_id field). - c. Check that all cited indices in grounded_text tags have corresponding citations. - - Example of grounded_text usage: - - - - Artificial Intelligence (AI) is revolutionizing various sectors, with healthcare experiencing significant transformations in areas such as diagnosis and treatment planning. - - - In the field of medical diagnosis, AI has shown remarkable capabilities, particularly in radiology. For instance, AI systems have drastically improved mammogram analysis, achieving 99% accuracy at a rate 30 times faster than human radiologists. - - - This advancement not only enhances the efficiency of healthcare systems but also significantly reduces the occurrence of false positives, leading to fewer unnecessary biopsies and reduced patient stress. - - - - Beyond diagnosis, AI is playing a crucial role in drug discovery and development. By analyzing vast amounts of genetic and molecular data, AI algorithms can identify potential drug candidates much faster than traditional methods. - - - This could potentially reduce the time and cost of bringing new medications to market, especially for rare diseases that have historically received less attention due to limited market potential. - - - [... rest of the content ...] - - - Artificial Intelligence is revolutionizing various industries, with healthcare being one of the most profoundly affected sectors. - AI has shown particular promise in the field of radiology, enhancing the accuracy and speed of image analysis. - According to recent studies, AI systems have achieved 99% accuracy in mammogram analysis, performing the task 30 times faster than human radiologists. - The improvement in mammogram accuracy has led to a significant reduction in false positives, decreasing the need for unnecessary biopsies and reducing patient anxiety. - AI is accelerating the drug discovery process by analyzing complex molecular and genetic data to identify potential drug candidates. - The use of AI in drug discovery could significantly reduce the time and cost associated with bringing new medications to market, particularly for rare diseases. - - - - How might AI-driven personalized medicine impact the cost and accessibility of healthcare in the future? - What measures can be taken to ensure that AI systems in healthcare are free from biases and equally effective for diverse populations? - How could the role of healthcare professionals evolve as AI becomes more integrated into medical practices? - - - `, - - `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a - set of document chunks (either images or text) that can be used to provide a grounded response based on - user documents` - ); - } - - async execute(args: { hypothetical_document_chunk: string }): Promise { - const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk); - const formatted_chunks = await this.getFormattedChunks(relevantChunks); - return formatted_chunks; - } - - async getFormattedChunks(relevantChunks: RAGChunk[]): Promise { - try { - const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks }); - - if (!formattedChunks) { - throw new Error('Failed to format chunks'); - } - - return formattedChunks; - } catch (error) { - console.error('Error formatting chunks:', error); - throw error; - } - } -} diff --git a/src/client/views/nodes/ChatBox/tools/SearchTool.ts b/src/client/views/nodes/ChatBox/tools/SearchTool.ts deleted file mode 100644 index b926cbadc..000000000 --- a/src/client/views/nodes/ChatBox/tools/SearchTool.ts +++ /dev/null @@ -1,54 +0,0 @@ -import { max } from 'lodash'; -import { Networking } from '../../../../Network'; -import { BaseTool } from './BaseTool'; -import { v4 as uuidv4 } from 'uuid'; - -export class SearchTool extends BaseTool<{ query: string | string[] }> { - private _addLinkedUrlDoc: (url: string, id: string) => void; - private _max_results: number; - constructor(addLinkedUrlDoc: (url: string, id: string) => void, max_results: number = 5) { - super( - 'searchTool', - 'Search the web to find a wide range of websites related to a query or multiple queries', - { - query: { - type: 'string', - description: 'The search query or queries to use for finding websites', - required: 'true', - max_inputs: '3', - }, - }, - 'Provide up to 3 search queries to find a broad range of websites. This tool is intended to help you identify relevant websites, but not to be used for providing the final answer. Use this information to determine which specific website to investigate further.', - 'Returns a list of websites and their overviews based on the search queries, helping to identify which websites might contain relevant information.' - ); - this._addLinkedUrlDoc = addLinkedUrlDoc; - this._max_results = max_results; - } - - async execute(args: { query: string | string[] }): Promise { - const queries = Array.isArray(args.query) ? args.query : [args.query]; - const allResults = []; - - for (const query of queries) { - try { - const { results } = await Networking.PostToServer('/getWebSearchResults', { query, max_results: this._max_results }); - const data: { type: string; text: string }[] = results.map((result: { url: string; snippet: string }) => { - const id = uuidv4(); - return { - type: 'text', - text: ` - ${result.url} - ${result.snippet} - `, - }; - }); - allResults.push(...data); - } catch (error) { - console.log(error); - allResults.push({ type: 'text', text: `An error occurred while performing the web search for query: ${query}` }); - } - } - - return allResults; - } -} diff --git a/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts deleted file mode 100644 index 4588b5aec..000000000 --- a/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts +++ /dev/null @@ -1,43 +0,0 @@ -import { Networking } from '../../../../Network'; -import { BaseTool } from './BaseTool'; -import { v4 as uuidv4 } from 'uuid'; - -export class WebsiteInfoScraperTool extends BaseTool<{ url: string | string[] }> { - private _addLinkedUrlDoc: (url: string, id: string) => void; - - constructor(addLinkedUrlDoc: (url: string, id: string) => void) { - super( - 'websiteInfoScraper', - 'Scrape detailed information from specific websites identified as relevant to the user query', - { - url: { - type: 'string', - description: 'The URL(s) of the website(s) to scrape', - required: 'true', - max_inputs: '3', - }, - }, - 'Provide up to 3 URLs of websites that you have identified as the most relevant from the previous search. This tool will provide the text content of those specific websites. When providing a final response to the user based on information from these chunks, ideally cite as many of the url chunks as possible (ground your infromation from multiple sources, if possible) in order to provide a well grounded result.', - 'Returns the text content of the webpages for analysis.' - ); - this._addLinkedUrlDoc = addLinkedUrlDoc; - } - - async execute(args: { url: string | string[] }): Promise { - const urls = Array.isArray(args.url) ? args.url : [args.url]; - const results = []; - - for (const url of urls) { - try { - const { website_plain_text } = await Networking.PostToServer('/scrapeWebsite', { url }); - const id = uuidv4(); - this._addLinkedUrlDoc(url, id); - results.push({ type: 'text', text: `\n${website_plain_text}\n\n` }); - } catch (error) { - results.push({ type: 'text', text: `An error occurred while scraping the website: ${url}` }); - } - } - - return results; - } -} diff --git a/src/client/views/nodes/ChatBox/tools/WikipediaTool.ts b/src/client/views/nodes/ChatBox/tools/WikipediaTool.ts deleted file mode 100644 index 143d91d80..000000000 --- a/src/client/views/nodes/ChatBox/tools/WikipediaTool.ts +++ /dev/null @@ -1,37 +0,0 @@ -import { title } from 'process'; -import { Networking } from '../../../../Network'; -import { BaseTool } from './BaseTool'; -import axios from 'axios'; -import { v4 as uuidv4 } from 'uuid'; - -export class WikipediaTool extends BaseTool<{ title: string }> { - private _addLinkedUrlDoc: (url: string, id: string) => void; - constructor(addLinkedUrlDoc: (url: string, id: string) => void) { - super( - 'wikipedia', - 'Search Wikipedia and return a summary', - { - title: { - type: 'string', - description: 'The title of the Wikipedia article to search', - required: true, - }, - }, - 'Provide simply the title you want to search on Wikipedia and nothing more. If re-using this tool, try a different title for different information.', - 'Returns a summary from searching an article title on Wikipedia' - ); - this._addLinkedUrlDoc = addLinkedUrlDoc; - } - - async execute(args: { title: string }): Promise { - try { - const { text } = await Networking.PostToServer('/getWikipediaSummary', { title: args.title }); - const id = uuidv4(); - const url = `https://en.wikipedia.org/wiki/${args.title.replace(/ /g, '_')}`; - this._addLinkedUrlDoc(url, id); - return [{ type: 'text', text: ` ${text} ` }]; - } catch (error) { - return [{ type: 'text', text: 'An error occurred while fetching the article.' }]; - } - } -} diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts deleted file mode 100644 index a12c52592..000000000 --- a/src/client/views/nodes/ChatBox/types.ts +++ /dev/null @@ -1,169 +0,0 @@ -import { breadcrumbsClasses } from '@mui/material'; -import { Doc } from '../../../../fields/Doc'; -import { StrCast } from '../../../../fields/Types'; -import e from 'cors'; -import { index } from 'd3'; - -export enum ASSISTANT_ROLE { - USER = 'user', - ASSISTANT = 'assistant', -} - -export enum TEXT_TYPE { - NORMAL = 'normal', - GROUNDED = 'grounded', - ERROR = 'error', -} - -export enum CHUNK_TYPE { - TEXT = 'text', - IMAGE = 'image', - TABLE = 'table', - URL = 'url', - CSV = 'CSV', -} - -export enum PROCESSING_TYPE { - THOUGHT = 'thought', - ACTION = 'action', - //eventually migrate error to here -} - -export function getChunkType(type: string): CHUNK_TYPE { - switch (type.toLowerCase()) { - case 'text': - return CHUNK_TYPE.TEXT; - break; - case 'image': - return CHUNK_TYPE.IMAGE; - break; - case 'table': - return CHUNK_TYPE.TABLE; - break; - case 'CSV': - return CHUNK_TYPE.CSV; - break; - case 'url': - return CHUNK_TYPE.URL; - break; - default: - return CHUNK_TYPE.TEXT; - break; - } -} - -export interface ProcessingInfo { - index: number; - type: PROCESSING_TYPE; - content: string; -} - -export interface AssistantMessage { - role: ASSISTANT_ROLE; - content: MessageContent[]; - follow_up_questions?: string[]; - citations?: Citation[]; - processing_info: ProcessingInfo[]; - loop_summary?: string; -} - -export interface MessageContent { - index: number; - type: TEXT_TYPE; - text: string; - citation_ids: string[] | null; -} - -export interface Citation { - direct_text?: string; - type: CHUNK_TYPE; - chunk_id: string; - citation_id: string; - url?: string; -} - -export interface RAGChunk { - id: string; - values: number[]; - metadata: { - text: string; - type: CHUNK_TYPE; - original_document: string; - file_path: string; - doc_id: string; - location: string; - start_page: number; - end_page: number; - base64_data?: string | undefined; - page_width?: number | undefined; - page_height?: number | undefined; - }; -} - -export interface SimplifiedChunk { - chunkId: string; - startPage: number; - endPage: number; - location?: string; - chunkType: CHUNK_TYPE; - url?: string; - canDisplay?: boolean; -} - -export interface AI_Document { - purpose: string; - file_name: string; - num_pages: number; - summary: string; - chunks: RAGChunk[]; - type: string; -} - -export interface Tool = Record> { - name: string; - description: string; - parameters: Record; - citationRules: string; - briefSummary: string; - execute: (args: T) => Promise; - getActionRule: () => Record; -} - -export interface AgentMessage { - role: 'system' | 'user' | 'assistant'; - content: string | { type: string; text?: string; image_url?: { url: string } }[]; -} - -// export function convertToAIDocument(json: any): AI_Document { -// if (!json) { -// throw new Error('Invalid JSON object'); -// } - -// const chunks: Chunk[] = json.chunks.map((chunk: any) => ({ -// id: chunk.id, -// values: chunk.values, -// metadata: { -// text: chunk.metadata.text, -// type: chunk.metadata.type as CHUNK_TYPE, // Ensure type casting -// original_document: chunk.metadata.original_document, -// file_path: chunk.metadata.file_path, -// location: chunk.metadata.location, -// start_page: chunk.metadata.start_page, -// end_page: chunk.metadata.end_page, -// base64_data: chunk.metadata.base64_data, -// width: chunk.metadata.width, -// height: chunk.metadata.height, -// }, -// })); - -// const aiDocument: AI_Document = { -// purpose: json.purpose, -// file_name: json.file_name, -// num_pages: json.num_pages, -// summary: json.summary, -// chunks: chunks, -// type: json.type, -// }; - -// return aiDocument; -// } diff --git a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts deleted file mode 100644 index cc3b1ccd5..000000000 --- a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts +++ /dev/null @@ -1,258 +0,0 @@ -import { Pinecone, Index, IndexList, PineconeRecord, RecordMetadata, QueryResponse } from '@pinecone-database/pinecone'; -import { CohereClient } from 'cohere-ai'; -import { EmbedResponse } from 'cohere-ai/api'; -import dotenv from 'dotenv'; -import { RAGChunk, AI_Document, CHUNK_TYPE } from '../types'; -import { Doc } from '../../../../../fields/Doc'; -import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types'; -import { Networking } from '../../../../Network'; - -dotenv.config(); - -/** - * The Vectorstore class integrates with Pinecone for vector-based document indexing and retrieval, - * and Cohere for text embedding. It handles AI document management, uploads, and query-based retrieval. - */ -export class Vectorstore { - private pinecone: Pinecone; // Pinecone client for managing the vector index. - private index!: Index; // The specific Pinecone index used for document chunks. - private cohere: CohereClient; // Cohere client for generating embeddings. - private indexName: string = 'pdf-chatbot'; // Default name for the index. - private _id: string; // Unique ID for the Vectorstore instance. - private _doc_ids: string[] = []; // List of document IDs handled by this instance. - - documents: AI_Document[] = []; // Store the documents indexed in the vectorstore. - - /** - * Constructor initializes the Pinecone and Cohere clients, sets up the document ID list, - * and initializes the Pinecone index. - * @param id The unique identifier for the vectorstore instance. - * @param doc_ids A function that returns a list of document IDs. - */ - constructor(id: string, doc_ids: () => string[]) { - const pineconeApiKey = process.env.PINECONE_API_KEY; - if (!pineconeApiKey) { - throw new Error('PINECONE_API_KEY is not defined.'); - } - - // Initialize Pinecone and Cohere clients with API keys from the environment. - this.pinecone = new Pinecone({ apiKey: pineconeApiKey }); - this.cohere = new CohereClient({ token: process.env.COHERE_API_KEY }); - this._id = id; - this._doc_ids = doc_ids(); - this.initializeIndex(); - } - - /** - * Initializes the Pinecone index by checking if it exists, and creating it if not. - * The index is set to use the cosine metric for vector similarity. - */ - private async initializeIndex() { - const indexList: IndexList = await this.pinecone.listIndexes(); - - // Check if the index already exists, otherwise create it. - if (!indexList.indexes?.some(index => index.name === this.indexName)) { - await this.pinecone.createIndex({ - name: this.indexName, - dimension: 1024, - metric: 'cosine', - spec: { - serverless: { - cloud: 'aws', - region: 'us-east-1', - }, - }, - }); - } - - // Set the index for future use. - this.index = this.pinecone.Index(this.indexName); - } - - /** - * Adds an AI document to the vectorstore. This method handles document chunking, uploading to the - * vectorstore, and updating the progress for long-running tasks like file uploads. - * @param doc The document to be added to the vectorstore. - * @param progressCallback Callback to update the progress of the upload. - */ - async addAIDoc(doc: Doc, progressCallback: (progress: number, step: string) => void) { - console.log('Adding AI Document:', doc); - const ai_document_status: string = StrCast(doc.ai_document_status); - - // Skip if the document is already in progress or completed. - if (ai_document_status !== undefined && ai_document_status.trim() !== '' && ai_document_status !== '{}') { - if (ai_document_status === 'IN PROGRESS') { - console.log('Already in progress.'); - return; - } - if (!this._doc_ids.includes(StrCast(doc.ai_doc_id))) { - this._doc_ids.push(StrCast(doc.ai_doc_id)); - } - } else { - // Start processing the document. - doc.ai_document_status = 'PROGRESS'; - console.log(doc); - - // Get the local file path (CSV or PDF). - const local_file_path: string = CsvCast(doc.data)?.url?.pathname ?? PDFCast(doc.data)?.url?.pathname; - console.log('Local File Path:', local_file_path); - - if (local_file_path) { - console.log('Creating AI Document...'); - // Start the document creation process by sending the file to the server. - const { jobId } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); - - // Poll the server for progress updates. - let inProgress: boolean = true; - let result: any = null; - while (inProgress) { - // Polling interval for status updates. - await new Promise(resolve => setTimeout(resolve, 2000)); - - // Check if the job is completed. - const resultResponse = await Networking.FetchFromServer(`/getResult/${jobId}`); - const resultResponseJson = JSON.parse(resultResponse); - if (resultResponseJson.status === 'completed') { - console.log('Result here:', resultResponseJson); - result = resultResponseJson; - break; - } - - // Fetch progress information and update the progress callback. - const progressResponse = await Networking.FetchFromServer(`/getProgress/${jobId}`); - const progressResponseJson = JSON.parse(progressResponse); - if (progressResponseJson) { - const progress = progressResponseJson.progress; - const step = progressResponseJson.step; - progressCallback(progress, step); - } - } - - // Once completed, process the document and add it to the vectorstore. - console.log('Document JSON:', result); - this.documents.push(result); - await this.indexDocument(result); - console.log(`Document added: ${result.file_name}`); - - // Update document metadata such as summary, purpose, and vectorstore ID. - doc.summary = result.summary; - doc.ai_doc_id = result.doc_id; - this._doc_ids.push(result.doc_id); - doc.ai_purpose = result.purpose; - - if (!doc.vectorstore_id) { - doc.vectorstore_id = JSON.stringify([this._id]); - } else { - doc.vectorstore_id = JSON.stringify(JSON.parse(StrCast(doc.vectorstore_id)).concat([this._id])); - } - - if (!doc.chunk_simpl) { - doc.chunk_simpl = JSON.stringify({ chunks: [] }); - } - - // Process each chunk of the document and update the document's chunk_simpl field. - result.chunks.forEach((chunk: RAGChunk) => { - const chunkToAdd = { - chunkId: chunk.id, - startPage: chunk.metadata.start_page, - endPage: chunk.metadata.end_page, - location: chunk.metadata.location, - chunkType: chunk.metadata.type as CHUNK_TYPE, - text: chunk.metadata.text, - }; - const new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl)); - new_chunk_simpl.chunks = new_chunk_simpl.chunks.concat(chunkToAdd); - doc.chunk_simpl = JSON.stringify(new_chunk_simpl); - }); - - // Mark the document status as completed. - doc.ai_document_status = 'COMPLETED'; - } - } - } - - /** - * Indexes the processed document by uploading the document's vector chunks to the Pinecone index. - * @param document The processed document containing its chunks and metadata. - */ - private async indexDocument(document: any) { - console.log('Uploading vectors to content namespace...'); - - // Prepare Pinecone records for each chunk in the document. - const pineconeRecords: PineconeRecord[] = (document.chunks as RAGChunk[]).map(chunk => ({ - id: chunk.id, - values: chunk.values, - metadata: { ...chunk.metadata } as RecordMetadata, - })); - - // Upload the records to Pinecone. - await this.index.upsert(pineconeRecords); - } - - /** - * Retrieves the top K document chunks relevant to the user's query. - * This involves embedding the query using Cohere, then querying Pinecone for matching vectors. - * @param query The search query string. - * @param topK The number of top results to return (default is 10). - * @returns A list of document chunks that match the query. - */ - async retrieve(query: string, topK: number = 10): Promise { - console.log(`Retrieving chunks for query: ${query}`); - try { - // Generate an embedding for the query using Cohere. - const queryEmbeddingResponse: EmbedResponse = await this.cohere.embed({ - texts: [query], - model: 'embed-english-v3.0', - inputType: 'search_query', - }); - - let queryEmbedding: number[]; - - // Extract the embedding from the response. - if (Array.isArray(queryEmbeddingResponse.embeddings)) { - queryEmbedding = queryEmbeddingResponse.embeddings[0]; - } else if (queryEmbeddingResponse.embeddings && 'embeddings' in queryEmbeddingResponse.embeddings) { - queryEmbedding = (queryEmbeddingResponse.embeddings as { embeddings: number[][] }).embeddings[0]; - } else { - throw new Error('Invalid embedding response format'); - } - - if (!Array.isArray(queryEmbedding)) { - throw new Error('Query embedding is not an array'); - } - - // Query the Pinecone index using the embedding and filter by document IDs. - const queryResponse: QueryResponse = await this.index.query({ - vector: queryEmbedding, - filter: { - doc_id: { $in: this._doc_ids }, - }, - topK, - includeValues: true, - includeMetadata: true, - }); - - // Map the results into RAGChunks and return them. - return queryResponse.matches.map( - match => - ({ - id: match.id, - values: match.values as number[], - metadata: match.metadata as { - text: string; - type: string; - original_document: string; - file_path: string; - doc_id: string; - location: string; - start_page: number; - end_page: number; - }, - }) as RAGChunk - ); - } catch (error) { - console.error(`Error retrieving chunks: ${error}`); - return []; - } - } -} diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts new file mode 100644 index 000000000..180d05cf3 --- /dev/null +++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts @@ -0,0 +1,278 @@ +import OpenAI from 'openai'; +import { Tool, AgentMessage, AssistantMessage, TEXT_TYPE, CHUNK_TYPE, ASSISTANT_ROLE, ProcessingInfo, PROCESSING_TYPE } from '../types/types'; +import { getReactPrompt } from './prompts'; +import { XMLParser, XMLBuilder } from 'fast-xml-parser'; +import { Vectorstore } from '../vectorstore/Vectorstore'; +import { ChatCompletionMessageParam } from 'openai/resources'; +import dotenv from 'dotenv'; +import { CalculateTool } from '../tools/CalculateTool'; +import { RAGTool } from '../tools/RAGTool'; +import { DataAnalysisTool } from '../tools/DataAnalysisTool'; +import { WebsiteInfoScraperTool } from '../tools/WebsiteInfoScraperTool'; +import { SearchTool } from '../tools/SearchTool'; +import { NoTool } from '../tools/NoTool'; +import { v4 as uuidv4 } from 'uuid'; +import { AnswerParser } from '../response_parsers/AnswerParser'; +import { StreamedAnswerParser } from '../response_parsers/StreamedAnswerParser'; +import { CreateCSVTool } from '../tools/CreateCSVTool'; + +dotenv.config(); + +/** + * The Agent class handles the interaction between the assistant and the tools available, + * processes user queries, and manages the communication flow between the tools and OpenAI. + */ +export class Agent { + // Private properties + private client: OpenAI; + private tools: Record>; + private messages: AgentMessage[] = []; + private interMessages: AgentMessage[] = []; + private vectorstore: Vectorstore; + private _history: () => string; + private _summaries: () => string; + private _csvData: () => { filename: string; id: string; text: string }[]; + private actionNumber: number = 0; + private thoughtNumber: number = 0; + private processingNumber: number = 0; + private processingInfo: ProcessingInfo[] = []; + private streamedAnswerParser: StreamedAnswerParser = new StreamedAnswerParser(); + + /** + * The constructor initializes the agent with the vector store and toolset, and sets up the OpenAI client. + * @param _vectorstore Vector store instance for document storage and retrieval. + * @param summaries A function to retrieve document summaries. + * @param history A function to retrieve chat history. + * @param csvData A function to retrieve CSV data linked to the assistant. + * @param addLinkedUrlDoc A function to add a linked document from a URL. + * @param createCSVInDash A function to create a CSV document in the dashboard. + */ + constructor( + _vectorstore: Vectorstore, + summaries: () => string, + history: () => string, + csvData: () => { filename: string; id: string; text: string }[], + addLinkedUrlDoc: (url: string, id: string) => void, + createCSVInDash: (url: string, title: string, id: string, data: string) => void + ) { + // Initialize OpenAI client with API key from environment + this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); + this.vectorstore = _vectorstore; + this._history = history; + this._summaries = summaries; + this._csvData = csvData; + + // Define available tools for the assistant + this.tools = { + calculate: new CalculateTool(), + rag: new RAGTool(this.vectorstore), + dataAnalysis: new DataAnalysisTool(csvData), + websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc), + searchTool: new SearchTool(addLinkedUrlDoc), + createCSV: new CreateCSVTool(createCSVInDash), + no_tool: new NoTool(), + }; + } + + /** + * This method handles the conversation flow with the assistant, processes user queries, + * and manages the assistant's decision-making process, including tool actions. + * @param question The user's question. + * @param onProcessingUpdate Callback function for processing updates. + * @param onAnswerUpdate Callback function for answer updates. + * @param maxTurns The maximum number of turns to allow in the conversation. + * @returns The final response from the assistant. + */ + async askAgent(question: string, onProcessingUpdate: (processingUpdate: ProcessingInfo[]) => void, onAnswerUpdate: (answerUpdate: string) => void, maxTurns: number = 30): Promise { + console.log(`Starting query: ${question}`); + + // Push user's question to message history + this.messages.push({ role: 'user', content: question }); + + // Retrieve chat history and generate system prompt + const chatHistory = this._history(); + const systemPrompt = getReactPrompt(Object.values(this.tools), this._summaries, chatHistory); + + // Initialize intermediate messages + this.interMessages = [{ role: 'system', content: systemPrompt }]; + this.interMessages.push({ role: 'user', content: `${question}` }); + + // Setup XML parser and builder + const parser = new XMLParser({ + ignoreAttributes: false, + attributeNamePrefix: '@_', + textNodeName: '_text', + isArray: (name, jpath, isLeafNode, isAttribute) => ['query', 'url'].indexOf(name) !== -1, + }); + const builder = new XMLBuilder({ ignoreAttributes: false, attributeNamePrefix: '@_' }); + + let currentAction: string | undefined; + this.processingInfo = []; + + // Conversation loop (up to maxTurns) + for (let i = 2; i < maxTurns; i += 2) { + console.log(this.interMessages); + console.log(`Turn ${i}/${maxTurns}`); + + // Execute a step in the conversation and get the result + const result = await this.execute(onProcessingUpdate, onAnswerUpdate); + this.interMessages.push({ role: 'assistant', content: result }); + + let parsedResult; + try { + // Parse XML result from the assistant + parsedResult = parser.parse(result); + } catch (error) { + throw new Error(`Error parsing response: ${error}`); + } + + // Extract the stage from the parsed result + const stage = parsedResult.stage; + if (!stage) { + throw new Error(`Error: No stage found in response`); + } + + // Handle different stage elements (thoughts, actions, inputs, answers) + for (const key in stage) { + if (key === 'thought') { + // Handle assistant's thoughts + console.log(`Thought: ${stage[key]}`); + this.processingNumber++; + } else if (key === 'action') { + // Handle action stage + currentAction = stage[key] as string; + console.log(`Action: ${currentAction}`); + + if (this.tools[currentAction]) { + // Prepare the next action based on the current tool + const nextPrompt = [ + { + type: 'text', + text: `` + builder.build({ action_rules: this.tools[currentAction].getActionRule() }) + ``, + }, + ]; + this.interMessages.push({ role: 'user', content: nextPrompt }); + break; + } else { + // Handle error in case of an invalid action + console.log('Error: No valid action'); + this.interMessages.push({ role: 'user', content: `No valid action, try again.` }); + break; + } + } else if (key === 'action_input') { + // Handle action input stage + const actionInput = stage[key]; + console.log(`Action input:`, actionInput.inputs); + + if (currentAction) { + try { + // Process the action with its input + const observation = await this.processAction(currentAction, actionInput.inputs); + const nextPrompt = [{ type: 'text', text: ` ` }, ...observation, { type: 'text', text: '' }]; + console.log(observation); + this.interMessages.push({ role: 'user', content: nextPrompt }); + this.processingNumber++; + break; + } catch (error) { + throw new Error(`Error processing action: ${error}`); + } + } else { + throw new Error('Error: Action input without a valid action'); + } + } else if (key === 'answer') { + // If an answer is found, end the query + console.log('Answer found. Ending query.'); + this.streamedAnswerParser.reset(); + const parsedAnswer = AnswerParser.parse(result, this.processingInfo); + return parsedAnswer; + } + } + } + + throw new Error('Reached maximum turns. Ending query.'); + } + + /** + * Executes a step in the conversation, processing the assistant's response and parsing it in real-time. + * @param onProcessingUpdate Callback for processing updates. + * @param onAnswerUpdate Callback for answer updates. + * @returns The full response from the assistant. + */ + private async execute(onProcessingUpdate: (processingUpdate: ProcessingInfo[]) => void, onAnswerUpdate: (answerUpdate: string) => void): Promise { + // Stream OpenAI response for real-time updates + const stream = await this.client.chat.completions.create({ + model: 'gpt-4o', + messages: this.interMessages as ChatCompletionMessageParam[], + temperature: 0, + stream: true, + }); + + let fullResponse: string = ''; + let currentTag: string = ''; + let currentContent: string = ''; + let isInsideTag: boolean = false; + + // Process each chunk of the streamed response + for await (const chunk of stream) { + let content = chunk.choices[0]?.delta?.content || ''; + fullResponse += content; + + // Parse the streamed content character by character + for (const char of content) { + if (currentTag === 'answer') { + // Handle answer parsing for real-time updates + currentContent += char; + const streamedAnswer = this.streamedAnswerParser.parse(char); + onAnswerUpdate(streamedAnswer); + continue; + } else if (char === '<') { + // Start of a new tag + isInsideTag = true; + currentTag = ''; + currentContent = ''; + } else if (char === '>') { + // End of the tag + isInsideTag = false; + if (currentTag.startsWith('/')) { + currentTag = ''; + } + } else if (isInsideTag) { + // Append characters to the tag name + currentTag += char; + } else if (currentTag === 'thought' || currentTag === 'action_input_description') { + // Handle processing information for thought or action input description + currentContent += char; + const current_info = this.processingInfo.find(info => info.index === this.processingNumber); + if (current_info) { + current_info.content = currentContent.trim(); + onProcessingUpdate(this.processingInfo); + } else { + this.processingInfo.push({ + index: this.processingNumber, + type: currentTag === 'thought' ? PROCESSING_TYPE.THOUGHT : PROCESSING_TYPE.ACTION, + content: currentContent.trim(), + }); + onProcessingUpdate(this.processingInfo); + } + } + } + } + + return fullResponse; + } + + /** + * Processes a specific action by invoking the appropriate tool with the provided inputs. + * @param action The action to perform. + * @param actionInput The inputs for the action. + * @returns The result of the action. + */ + private async processAction(action: string, actionInput: any): Promise { + if (!(action in this.tools)) { + throw new Error(`Unknown action: ${action}`); + } + + const tool = this.tools[action]; + return await tool.execute(actionInput); + } +} diff --git a/src/client/views/nodes/chatbot/agentsystem/prompts.ts b/src/client/views/nodes/chatbot/agentsystem/prompts.ts new file mode 100644 index 000000000..9daabc35f --- /dev/null +++ b/src/client/views/nodes/chatbot/agentsystem/prompts.ts @@ -0,0 +1,181 @@ +// prompts.ts + +import { Tool } from '../types/types'; + +export function getReactPrompt(tools: Tool[], summaries: () => string, chatHistory: string): string { + const toolDescriptions = tools + .map( + tool => ` + + ${tool.name} + ${tool.briefSummary} + ` + ) + .join('\n'); + + return ` + + You are an advanced AI assistant equipped with tools to answer user queries efficiently. You operate in a loop that is RIGIDLY structured and requires the use of specific tags and formats for your responses. Your goal is to provide accurate and well-structured answers to user queries. Below are the guidelines and information you can use to structure your approach to accomplishing this task. + + + + **STRUCTURE**: Always use the correct stage tags (e.g., ) for every response. Use only even-numbered stages for your responses. + **STOP after every stage and wait for input. Do not combine multiple stages in one response.** + If a tool is needed, select the most appropriate tool based on the query. + **If one tool does not yield satisfactory results or fails twice, try another tool that might work better for the query.** + Ensure that **ALL answers follow the answer structure**: grounded text wrapped in tags with corresponding citations, normal text in tags, and three follow-up questions at the end. + + + + + - All information derived from tools or user documents must be wrapped in these tags with proper citation. + - Use this tag for text not derived from tools or user documents. + + - Provide proper citations for each , referencing the tool or document chunk used. + + - Provide exactly three user-perspective follow-up questions. + - Summarize the actions and tools used in the conversation. + + + + + **Wrap ALL tool-based information** in tags and provide citations. + Use separate tags for distinct information or when switching to a different tool or document. + Ensure that **EVERY** tag includes a citation index referencing the source of the information. + Over-citing is discouraged—only cite the information that is directly relevant to the user's query. + + + + Wrap general information or reasoning **not derived from tools or documents** in tags. + Never put information derived from user documents or tools in tags—use for those. + + + + Carefully analyze the user query and determine if a tool is necessary to provide an accurate answer. + If a tool is needed, choose the most appropriate one and **stop after the action** to wait for system input. + If no tool is needed, use the 'no_tool' action but follow the structure. + When all observations are complete, format the final answer using and tags with appropriate citations. + Include exactly three follow-up questions from the user's perspective. + Provide a loop summary at the end of the conversation. + + + + ${toolDescriptions} + If no external tool is required, use 'no_tool', but if there might be relevant external information, use the appropriate tool. + + + + ${summaries()} + + + + ${chatHistory} + + + + + + Can you provide key moments from the 2022 World Cup and its impact on tourism in Qatar? + + + + + I will use the RAG tool to retrieve key moments from the user's World Cup documents. Afterward, I will use the website scraper tool to gather tourism impact data on Qatar. + + rag + + + + ***Action rules omitted*** + + + + + Searching user documents for key moments from the 2022 World Cup. + + Key moments from the 2022 World Cup. Goals, assists, big wins, big losses. + + + + + + + + The 2022 FIFA World Cup saw Argentina win, with Lionel Messi's performance being a key highlight. It was widely celebrated as a historical moment in sports. + + + + + + + With key moments from the World Cup retrieved, I will now use the website scraper tool to gather data on Qatar’s tourism impact during the World Cup. + + websiteInfoScraper + + + + ***Action rules omitted*** + + + + + Scraping websites for information about Qatar's tourism impact during the 2022 World Cup. + + Tourism impact of the 2022 World Cup in Qatar + + + + + + + + https://www.qatartourism.com/world-cup-impact + During the 2022 World Cup, Qatar saw a 40% increase in tourism, with over 1.5 million visitors attending. + + + + + + + Now that I have gathered both key moments from the World Cup and tourism impact data from Qatar, I will summarize the information in my final response. + + + **The 2022 World Cup** saw Argentina crowned champions, with **Lionel Messi** leading his team to victory, marking a historic moment in sports. + **Qatar** experienced a **40% increase in tourism** during the World Cup, welcoming over **1.5 million visitors**, significantly boosting its economy. + Moments like **Messi’s triumph** often become ingrained in the legacy of World Cups, immortalizing these tournaments in both sports and cultural memory. The **long-term implications** of the World Cup on Qatar's **economy, tourism**, and **global image** remain important areas of interest as the country continues to build on the momentum generated by hosting this prestigious event. + + Key moments from the 2022 World Cup. + + + + What long-term effects has the World Cup had on Qatar's economy and infrastructure? + Can you compare Qatar's tourism numbers with previous World Cup hosts? + How has Qatar’s image on the global stage evolved post-World Cup? + + + The assistant first used the RAG tool to extract key moments from the user documents about the 2022 World Cup. Then, the assistant utilized the website scraping tool to gather data on Qatar's tourism impact. Both tools provided valuable information, and no additional tools were needed. + + + + + + + + Process the user's query according to these rules. Ensure your final answer is comprehensive, well-structured, and includes citations where appropriate. + +`; +} + +export function getSummarizedChunksPrompt(chunks: string): string { + return `Please provide a comprehensive summary of what you think the document from which these chunks originated. + Ensure the summary captures the main ideas and key points from all provided chunks. Be concise and brief and only provide the summary in paragraph form. + + Text chunks: + \`\`\` + ${chunks} + \`\`\``; +} + +export function getSummarizedSystemPrompt(): string { + return 'You are an AI assistant tasked with summarizing a document. You are provided with important chunks from the document and provide a summary, as best you can, of what the document will contain overall. Be concise and brief with your response.'; +} diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss new file mode 100644 index 000000000..42f6a0d61 --- /dev/null +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss @@ -0,0 +1,279 @@ +@import url('https://fonts.googleapis.com/css2?family=Atkinson+Hyperlegible:ital,wght@0,400;0,700;1,400;1,700&display=swap'); + +$primary-color: #4a90e2; +$secondary-color: #f5f8fa; +$text-color: #333; +$light-text-color: #777; +$border-color: #e1e8ed; +$shadow-color: rgba(0, 0, 0, 0.1); +$transition: all 0.3s ease; +.chat-box { + display: flex; + flex-direction: column; + height: 100%; + background-color: #fff; + font-family: + 'Atkinson Hyperlegible', + -apple-system, + BlinkMacSystemFont, + 'Segoe UI', + Roboto, + Helvetica, + Arial, + sans-serif; + border-radius: 12px; + overflow: hidden; + box-shadow: 0 4px 12px $shadow-color; + position: relative; + + .chat-header { + background-color: $primary-color; + color: white; + padding: 15px; + text-align: center; + box-shadow: 0 2px 4px $shadow-color; + height: fit-content; + + h2 { + margin: 0; + font-size: 1.3em; + font-weight: 500; + } + } + + .chat-messages { + flex-grow: 1; + overflow-y: auto; + padding: 20px; + display: flex; + flex-direction: column; + gap: 10px; // Added to give space between elements + + &::-webkit-scrollbar { + width: 6px; + } + + &::-webkit-scrollbar-thumb { + background-color: $border-color; + border-radius: 3px; + } + } + + .chat-input { + display: flex; + padding: 20px; + border-top: 1px solid $border-color; + background-color: #fff; + + input { + flex-grow: 1; + padding: 12px 15px; + border: 1px solid $border-color; + border-radius: 24px; + font-size: 15px; + transition: $transition; + + &:focus { + outline: none; + border-color: $primary-color; + box-shadow: 0 0 0 2px rgba($primary-color, 0.2); + } + } + + .submit-button { + background-color: $primary-color; + color: white; + border: none; + border-radius: 50%; + width: 48px; + height: 48px; + margin-left: 10px; + cursor: pointer; + transition: $transition; + display: flex; + align-items: center; + justify-content: center; + + &:hover { + background-color: darken($primary-color, 10%); + } + + &:disabled { + background-color: $light-text-color; + cursor: not-allowed; + } + + .spinner { + height: 24px; + width: 24px; + border: 3px solid rgba(255, 255, 255, 0.3); + border-top: 3px solid #fff; + border-radius: 50%; + animation: spin 1s linear infinite; + display: flex; + align-items: center; + justify-content: center; + } + } + } + .citation-popup { + position: fixed; + bottom: 50px; + left: 50%; + transform: translateX(-50%); + background-color: rgba(0, 0, 0, 0.8); + color: white; + padding: 10px 20px; + border-radius: 10px; + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2); + z-index: 1000; + animation: fadeIn 0.3s ease-in-out; + + p { + margin: 0; + font-size: 14px; + } + + @keyframes fadeIn { + from { + opacity: 0; + } + to { + opacity: 1; + } + } + } +} + +.message { + max-width: 80%; + margin-bottom: 20px; + padding: 16px 20px; + border-radius: 18px; + font-size: 15px; + line-height: 1.5; + box-shadow: 0 2px 4px $shadow-color; + word-wrap: break-word; // To handle long words + + &.user { + align-self: flex-end; + background-color: $primary-color; + color: white; + border-bottom-right-radius: 4px; + } + + &.chatbot { + align-self: flex-start; + background-color: $secondary-color; + color: $text-color; + border-bottom-left-radius: 4px; + } + + .toggle-info { + background-color: transparent; + color: $primary-color; + border: 1px solid $primary-color; + width: 100%; + height: fit-content; + border-radius: 8px; + padding: 10px 16px; + font-size: 14px; + cursor: pointer; + transition: $transition; + margin-top: 10px; + + &:hover { + background-color: rgba($primary-color, 0.1); + } + } +} + +.follow-up-questions { + margin-top: 15px; + + h4 { + font-size: 15px; + font-weight: 600; + margin-bottom: 10px; + } + + .questions-list { + display: flex; + flex-direction: column; + gap: 10px; + } + + .follow-up-button { + background-color: #fff; + color: $primary-color; + border: 1px solid $primary-color; + border-radius: 8px; + padding: 10px 16px; + font-size: 14px; + cursor: pointer; + transition: $transition; + text-align: left; + white-space: normal; + word-wrap: break-word; + width: 100%; + height: fit-content; + + &:hover { + background-color: $primary-color; + color: #fff; + } + } +} + +.citation-button { + display: inline-flex; + align-items: center; + justify-content: center; + width: 20px; + height: 20px; + border-radius: 50%; + background-color: rgba(0, 0, 0, 0.1); + color: $text-color; + font-size: 12px; + font-weight: bold; + margin-left: 5px; + cursor: pointer; + transition: $transition; + vertical-align: middle; + + &:hover { + background-color: rgba(0, 0, 0, 0.2); + } +} + +.uploading-overlay { + position: absolute; + top: 0; + left: 0; + right: 0; + bottom: 0; + background-color: rgba(255, 255, 255, 0.8); + display: flex; + justify-content: center; + align-items: center; + z-index: 1000; +} + +@keyframes spin { + 0% { + transform: rotate(0deg); + } + 100% { + transform: rotate(360deg); + } +} + +@media (max-width: 768px) { + .chat-box { + border-radius: 0; + } + + .message { + max-width: 90%; + } +} diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx new file mode 100644 index 000000000..6dc691798 --- /dev/null +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -0,0 +1,745 @@ +import { action, computed, makeObservable, observable, observe, reaction, runInAction, ObservableSet } from 'mobx'; +import { observer } from 'mobx-react'; +import OpenAI, { ClientOptions } from 'openai'; +import * as React from 'react'; +import { Doc, DocListCast } from '../../../../../fields/Doc'; +import { CsvCast, DocCast, PDFCast, RTFCast, StrCast } from '../../../../../fields/Types'; +import { DocumentType } from '../../../../documents/DocumentTypes'; +import { Docs } from '../../../../documents/Documents'; +import { LinkManager } from '../../../../util/LinkManager'; +import { ViewBoxAnnotatableComponent } from '../../../DocComponent'; +import { FieldView, FieldViewProps } from '../../FieldView'; +import './ChatBox.scss'; +import MessageComponentBox from './MessageComponent'; +import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, SimplifiedChunk, ProcessingInfo } from '../types/types'; +import { Vectorstore } from '../vectorstore/Vectorstore'; +import { Agent } from '../agentsystem/Agent'; +import dotenv from 'dotenv'; +import { DocData, DocViews } from '../../../../../fields/DocSymbols'; +import { DocumentManager } from '../../../../util/DocumentManager'; +import { v4 as uuidv4 } from 'uuid'; +import { DocUtils } from '../../../../documents/DocUtils'; +import { ClientUtils } from '../../../../../ClientUtils'; +import { ProgressBar } from './ProgressBar'; +import { DocumentView } from '../../DocumentView'; +import { Networking } from '../../../../Network'; + +dotenv.config(); + +/** + * ChatBox is the main class responsible for managing the interaction between the user and the assistant, + * handling documents, and integrating with OpenAI for tasks such as document analysis, chat functionality, + * and vector store interactions. + */ +@observer +export class ChatBox extends ViewBoxAnnotatableComponent() { + // MobX observable properties to track UI state and data + @observable history: AssistantMessage[] = []; + @observable.deep current_message: AssistantMessage | undefined = undefined; + @observable isLoading: boolean = false; + @observable uploadProgress: number = 0; + @observable currentStep: string = ''; + @observable expandedScratchpadIndex: number | null = null; + @observable inputValue: string = ''; + @observable private linked_docs_to_add: ObservableSet = observable.set(); + @observable private linked_csv_files: { filename: string; id: string; text: string }[] = []; + @observable private isUploadingDocs: boolean = false; + @observable private citationPopup: { text: string; visible: boolean } = { text: '', visible: false }; + + // Private properties for managing OpenAI API, vector store, agent, and UI elements + private openai: OpenAI; + private vectorstore_id: string; + private vectorstore: Vectorstore; + private agent: Agent; + private _oldWheel: HTMLDivElement | null = null; + private messagesRef: React.RefObject; + + /** + * Static method that returns the layout string for the field. + * @param fieldKey Key to get the layout string. + */ + public static LayoutString(fieldKey: string) { + return FieldView.LayoutString(ChatBox, fieldKey); + } + + /** + * Constructor initializes the component, sets up OpenAI, vector store, and agent instances, + * and observes changes in the chat history to save the state in dataDoc. + * @param props The properties passed to the component. + */ + constructor(props: FieldViewProps) { + super(props); + makeObservable(this); // Enable MobX observables + + // Initialize OpenAI, vectorstore, and agent + this.openai = this.initializeOpenAI(); + if (StrCast(this.dataDoc.vectorstore_id) == '') { + this.vectorstore_id = uuidv4(); + this.dataDoc.vectorstore_id = this.vectorstore_id; + } else { + this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id); + } + this.vectorstore = new Vectorstore(this.vectorstore_id, this.retrieveDocIds); + this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.createCSVInDash); + this.messagesRef = React.createRef(); + + // Reaction to update dataDoc when chat history changes + reaction( + () => + this.history.map((msg: AssistantMessage) => ({ + role: msg.role, + content: msg.content, + follow_up_questions: msg.follow_up_questions, + citations: msg.citations, + })), + serializableHistory => { + this.dataDoc.data = JSON.stringify(serializableHistory); + } + ); + } + + /** + * Adds a document to the vectorstore for AI-based analysis. + * Handles the upload progress and errors during the process. + * @param newLinkedDoc The new document to add. + */ + @action + addDocToVectorstore = async (newLinkedDoc: Doc) => { + this.uploadProgress = 0; + this.currentStep = 'Initializing...'; + this.isUploadingDocs = true; + + try { + // Add the document to the vectorstore + await this.vectorstore.addAIDoc(newLinkedDoc, this.updateProgress); + } catch (error) { + console.error('Error uploading document:', error); + this.currentStep = 'Error during upload'; + } finally { + this.isUploadingDocs = false; + this.uploadProgress = 0; + this.currentStep = ''; + } + }; + + /** + * Updates the upload progress and the current step in the UI. + * @param progress The percentage of the progress. + * @param step The current step name. + */ + @action + updateProgress = (progress: number, step: string) => { + this.uploadProgress = progress; + this.currentStep = step; + }; + + /** + * Adds a CSV file for analysis by sending it to OpenAI and generating a summary. + * @param newLinkedDoc The linked document representing the CSV file. + * @param id Optional ID for the document. + */ + @action + addCSVForAnalysis = async (newLinkedDoc: Doc, id?: string) => { + if (!newLinkedDoc.chunk_simpl) { + // Convert document text to CSV data + const csvData: string = StrCast(newLinkedDoc.text); + + // Generate a summary using OpenAI API + const completion = await this.openai.chat.completions.create({ + messages: [ + { + role: 'system', + content: + 'You are an AI assistant tasked with summarizing the content of a CSV file. You will be provided with the data from the CSV file and your goal is to generate a concise summary that captures the main themes, trends, and key points represented in the data.', + }, + { + role: 'user', + content: `Please provide a comprehensive summary of the CSV file based on the provided data. Ensure the summary highlights the most important information, patterns, and insights. Your response should be in paragraph form and be concise. + CSV Data: + ${csvData} + ********** + Summary:`, + }, + ], + model: 'gpt-3.5-turbo', + }); + + const csvId = id ?? uuidv4(); + + // Add CSV details to linked files + this.linked_csv_files.push({ + filename: CsvCast(newLinkedDoc.data).url.pathname, + id: csvId, + text: csvData, + }); + + // Add a chunk for the CSV and assign the summary + const chunkToAdd = { + chunkId: csvId, + chunkType: CHUNK_TYPE.CSV, + }; + newLinkedDoc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] }); + newLinkedDoc.summary = completion.choices[0].message.content!; + } + }; + + /** + * Toggles the tool logs, expanding or collapsing the scratchpad at the given index. + * @param index Index of the tool log to toggle. + */ + @action + toggleToolLogs = (index: number) => { + this.expandedScratchpadIndex = this.expandedScratchpadIndex === index ? null : index; + }; + + /** + * Initializes the OpenAI API client using the API key from environment variables. + * @returns OpenAI client instance. + */ + initializeOpenAI() { + const configuration: ClientOptions = { + apiKey: process.env.OPENAI_KEY, + dangerouslyAllowBrowser: true, + }; + return new OpenAI(configuration); + } + + /** + * Adds a scroll event listener to detect user scrolling and handle passive wheel events. + */ + addScrollListener = () => { + if (this.messagesRef.current) { + this.messagesRef.current.addEventListener('wheel', this.onPassiveWheel, { passive: false }); + } + }; + + /** + * Removes the scroll event listener from the chat messages container. + */ + removeScrollListener = () => { + if (this.messagesRef.current) { + this.messagesRef.current.removeEventListener('wheel', this.onPassiveWheel); + } + }; + + /** + * Scrolls the chat messages container to the bottom, ensuring the latest message is visible. + */ + scrollToBottom = () => { + if (this.messagesRef.current) { + this.messagesRef.current.scrollTop = this.messagesRef.current.scrollHeight; + } + }; + + /** + * Event handler for detecting wheel scrolling and stopping the event propagation. + * @param e The wheel event. + */ + onPassiveWheel = (e: WheelEvent) => { + if (this._props.isContentActive()) { + e.stopPropagation(); + } + }; + + /** + * Sends the user's input to OpenAI, displays the loading indicator, and updates the chat history. + * @param event The form submission event. + */ + @action + askGPT = async (event: React.FormEvent): Promise => { + event.preventDefault(); + this.inputValue = ''; + + // Extract the user's message + const textInput = event.currentTarget.elements.namedItem('messageInput') as HTMLInputElement; + const trimmedText = textInput.value.trim(); + + if (trimmedText) { + try { + textInput.value = ''; + // Add the user's message to the history + this.history.push({ + role: ASSISTANT_ROLE.USER, + content: [{ index: 0, type: TEXT_TYPE.NORMAL, text: trimmedText, citation_ids: null }], + processing_info: [], + }); + this.isLoading = true; + this.current_message = { + role: ASSISTANT_ROLE.ASSISTANT, + content: [], + citations: [], + processing_info: [], + }; + + // Define callbacks for real-time processing updates + const onProcessingUpdate = (processingUpdate: ProcessingInfo[]) => { + runInAction(() => { + if (this.current_message) { + this.current_message = { + ...this.current_message, + processing_info: processingUpdate, + }; + } + }); + this.scrollToBottom(); + }; + + const onAnswerUpdate = (answerUpdate: string) => { + runInAction(() => { + if (this.current_message) { + this.current_message = { + ...this.current_message, + content: [{ text: answerUpdate, type: TEXT_TYPE.NORMAL, index: 0, citation_ids: [] }], + }; + } + }); + }; + + // Send the user's question to the assistant and get the final message + const finalMessage = await this.agent.askAgent(trimmedText, onProcessingUpdate, onAnswerUpdate); + + // Update the history with the final assistant message + runInAction(() => { + if (this.current_message) { + this.history.push({ ...finalMessage }); + this.current_message = undefined; + this.dataDoc.data = JSON.stringify(this.history); + } + }); + } catch (err) { + console.error('Error:', err); + // Handle error in processing + this.history.push({ + role: ASSISTANT_ROLE.ASSISTANT, + content: [{ index: 0, type: TEXT_TYPE.ERROR, text: 'Sorry, I encountered an error while processing your request.', citation_ids: null }], + processing_info: [], + }); + } finally { + this.isLoading = false; + this.scrollToBottom(); + } + } + this.scrollToBottom(); + }; + + /** + * Updates the citations for a given message in the chat history. + * @param index The index of the message in the history. + * @param citations The list of citations to add to the message. + */ + @action + updateMessageCitations = (index: number, citations: Citation[]) => { + if (this.history[index]) { + this.history[index].citations = citations; + } + }; + + /** + * Adds a linked document from a URL for future reference and analysis. + * @param url The URL of the document to add. + * @param id The unique identifier for the document. + */ + @action + addLinkedUrlDoc = async (url: string, id: string) => { + const doc = Docs.Create.WebDocument(url, { data_useCors: true }); + + const linkDoc = Docs.Create.LinkDocument(this.Document, doc); + LinkManager.Instance.addLink(linkDoc); + let canDisplay; + + try { + // Fetch the URL content through the proxy + const { data } = await Networking.PostToServer('/proxyFetch', { url }); + + // Simulating header behavior since we can't fetch headers via proxy + const xFrameOptions = data.headers?.['x-frame-options']; + + if (xFrameOptions && xFrameOptions.toUpperCase() === 'SAMEORIGIN') { + canDisplay = false; + } else { + canDisplay = true; + } + } catch (error) { + console.error('Error fetching the URL from the server:', error); + } + + const chunkToAdd = { + chunkId: id, + chunkType: CHUNK_TYPE.URL, + url: url, + canDisplay: canDisplay, + }; + + doc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] }); + }; + + /** + * Getter to retrieve the current user's name from the client utils. + */ + @computed + get userName() { + return ClientUtils.CurrentUserEmail; + } + + /** + * Creates a CSV document in the dashboard and adds it for analysis. + * @param url The URL of the CSV. + * @param title The title of the CSV document. + * @param id The unique ID for the document. + * @param data The CSV data content. + */ + @action + createCSVInDash = async (url: string, title: string, id: string, data: string) => { + const doc = DocCast(await DocUtils.DocumentFromType('csv', url, { title: title, text: RTFCast(data) })); + + const linkDoc = Docs.Create.LinkDocument(this.Document, doc); + LinkManager.Instance.addLink(linkDoc); + + doc && this._props.addDocument?.(doc); + await DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); + + this.addCSVForAnalysis(doc, id); + }; + + /** + * Event handler to manage citations click in the message components. + * @param citation The citation object clicked by the user. + */ + @action + handleCitationClick = (citation: Citation) => { + const currentLinkedDocs: Doc[] = this.linkedDocs; + + const chunkId = citation.chunk_id; + + // Loop through the linked documents to find the matching chunk and handle its display + for (let doc of currentLinkedDocs) { + if (doc.chunk_simpl) { + const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] }; + const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkId === chunkId); + if (foundChunk) { + // Handle different types of chunks (image, text, table, etc.) + switch (foundChunk.chunkType) { + case CHUNK_TYPE.IMAGE: + case CHUNK_TYPE.TABLE: + const values = foundChunk.location?.replace(/[\[\]]/g, '').split(','); + + if (values?.length !== 4) { + console.error('Location string must contain exactly 4 numbers'); + return; + } + + const x1 = parseFloat(values[0]) * Doc.NativeWidth(doc); + const y1 = parseFloat(values[1]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc); + const x2 = parseFloat(values[2]) * Doc.NativeWidth(doc); + const y2 = parseFloat(values[3]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc); + + const annotationKey = Doc.LayoutFieldKey(doc) + '_annotations'; + + const existingDoc = DocListCast(doc[DocData][annotationKey]).find(d => d.citation_id === citation.citation_id); + const highlightDoc = existingDoc ?? this.createImageCitationHighlight(x1, y1, x2, y2, citation, annotationKey, doc); + + DocumentManager.Instance.showDocument(highlightDoc, { willZoomCentered: true }, () => {}); + break; + case CHUNK_TYPE.TEXT: + this.citationPopup = { text: citation.direct_text ?? 'No text available', visible: true }; + setTimeout(() => (this.citationPopup.visible = false), 3000); // Hide after 3 seconds + + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { + const firstView = Array.from(doc[DocViews])[0] as DocumentView; + firstView.ComponentView?.search?.(citation.direct_text ?? ''); + }); + break; + case CHUNK_TYPE.URL: + if (!foundChunk.canDisplay) { + window.open(StrCast(doc.displayUrl), '_blank'); + } else if (foundChunk.canDisplay) { + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); + } + break; + case CHUNK_TYPE.CSV: + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); + break; + default: + console.error('Chunk type not recognized:', foundChunk.chunkType); + break; + } + } + } + } + }; + + /** + * Creates an annotation highlight on a PDF document for image citations. + * @param x1 X-coordinate of the top-left corner of the highlight. + * @param y1 Y-coordinate of the top-left corner of the highlight. + * @param x2 X-coordinate of the bottom-right corner of the highlight. + * @param y2 Y-coordinate of the bottom-right corner of the highlight. + * @param citation The citation object to associate with the highlight. + * @param annotationKey The key used to store the annotation. + * @param pdfDoc The document where the highlight is created. + * @returns The highlighted document. + */ + createImageCitationHighlight = (x1: number, y1: number, x2: number, y2: number, citation: Citation, annotationKey: string, pdfDoc: Doc): Doc => { + const highlight_doc = Docs.Create.FreeformDocument([], { + x: x1, + y: y1, + _width: x2 - x1, + _height: y2 - y1, + backgroundColor: 'rgba(255, 255, 0, 0.5)', + }); + highlight_doc[DocData].citation_id = citation.citation_id; + Doc.AddDocToList(pdfDoc[DocData], annotationKey, highlight_doc); + highlight_doc.annotationOn = pdfDoc; + Doc.SetContainer(highlight_doc, pdfDoc); + return highlight_doc; + }; + + /** + * Lifecycle method that triggers when the component updates. + * Ensures the chat is scrolled to the bottom when new messages are added. + */ + componentDidUpdate() { + this.scrollToBottom(); + } + + /** + * Lifecycle method that triggers when the component mounts. + * Initializes scroll listeners, sets up document reactions, and loads chat history from dataDoc if available. + */ + componentDidMount() { + this._props.setContentViewBox?.(this); + if (this.dataDoc.data) { + try { + const storedHistory = JSON.parse(StrCast(this.dataDoc.data)); + runInAction(() => { + this.history.push( + ...storedHistory.map((msg: AssistantMessage) => ({ + role: msg.role, + content: msg.content, + follow_up_questions: msg.follow_up_questions, + citations: msg.citations, + })) + ); + }); + } catch (e) { + console.error('Failed to parse history from dataDoc:', e); + } + } else { + // Default welcome message + runInAction(() => { + this.history.push({ + role: ASSISTANT_ROLE.ASSISTANT, + content: [ + { + index: 0, + type: TEXT_TYPE.NORMAL, + text: `Hey, ${this.userName()}! Welcome to Your Friendly Assistant. Link a document or ask questions to get started.`, + citation_ids: null, + }, + ], + processing_info: [], + }); + }); + } + + // Set up reactions for linked documents + reaction( + () => { + const linkedDocs = LinkManager.Instance.getAllRelatedLinks(this.Document) + .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) + .map(d => DocCast(d?.annotationOn, d)) + .filter(d => d); + return linkedDocs; + }, + linked => linked.forEach(doc => this.linked_docs_to_add.add(doc)) + ); + + // Observe changes to linked documents and handle document addition + observe(this.linked_docs_to_add, change => { + if (change.type === 'add') { + if (PDFCast(change.newValue.data)) { + this.addDocToVectorstore(change.newValue); + } else if (CsvCast(change.newValue.data)) { + this.addCSVForAnalysis(change.newValue); + } + } else if (change.type === 'delete') { + // Handle document removal + } + }); + this.addScrollListener(); + } + + /** + * Lifecycle method that triggers when the component unmounts. + * Removes scroll listeners to avoid memory leaks. + */ + componentWillUnmount() { + this.removeScrollListener(); + } + + /** + * Getter that retrieves all linked documents for the current document. + */ + @computed + get linkedDocs() { + return LinkManager.Instance.getAllRelatedLinks(this.Document) + .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) + .map(d => DocCast(d?.annotationOn, d)) + .filter(d => d); + } + + /** + * Getter that retrieves document IDs of linked documents that have AI-related content. + */ + @computed + get docIds() { + return LinkManager.Instance.getAllRelatedLinks(this.Document) + .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) + .map(d => DocCast(d?.annotationOn, d)) + .filter(d => d) + .filter(d => d.ai_doc_id) + .map(d => StrCast(d.ai_doc_id)); + } + + /** + * Getter that retrieves summaries of all linked documents. + */ + @computed + get summaries(): string { + return ( + LinkManager.Instance.getAllRelatedLinks(this.Document) + .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) + .map(d => DocCast(d?.annotationOn, d)) + .filter(d => d) + .filter(d => d.summary) + .map((doc, index) => { + if (PDFCast(doc.data)) { + return `${doc.summary}`; + } else if (CsvCast(doc.data)) { + return `${doc.summary}`; + } else { + return `${index + 1}) ${doc.summary}`; + } + }) + .join('\n') + '\n' + ); + } + + /** + * Getter that retrieves all linked CSV files for analysis. + */ + @computed + get linkedCSVs(): { filename: string; id: string; text: string }[] { + return this.linked_csv_files; + } + + /** + * Getter that formats the entire chat history as a string for the agent's system message. + */ + @computed + get formattedHistory(): string { + let history = '\n'; + for (const message of this.history) { + history += `<${message.role}>${message.content.map(content => content.text).join(' ')}`; + if (message.loop_summary) { + history += `${message.loop_summary}`; + } + history += `\n`; + } + history += ''; + return history; + } + + // Other helper methods for retrieving document data and processing + + retrieveSummaries = () => { + return this.summaries; + }; + + retrieveCSVData = () => { + return this.linkedCSVs; + }; + + retrieveFormattedHistory = () => { + return this.formattedHistory; + }; + + retrieveDocIds = () => { + return this.docIds; + }; + + /** + * Handles follow-up questions when the user clicks on them. + * Automatically sets the input value to the clicked follow-up question. + * @param question The follow-up question clicked by the user. + */ + @action + handleFollowUpClick = (question: string) => { + this.inputValue = question; + }; + + /** + * Renders the chat interface, including the message list, input field, and other UI elements. + */ + render() { + return ( +
+ {this.isUploadingDocs && ( +
+
+ +
{this.currentStep}
+
+
+ )} +
+

{this.userName()}'s AI Assistant

+
+
+ {this.history.map((message, index) => ( + + ))} + {this.current_message && ( + + )} +
+
+ (this.inputValue = e.target.value)} /> + +
+ {/* Popup for citation */} + {this.citationPopup.visible && ( +
+

+ Text from your document: {this.citationPopup.text} +

+
+ )} +
+ ); + } +} + +/** + * Register the ChatBox component as the template for CHAT document types. + */ +Docs.Prototypes.TemplateMap.set(DocumentType.CHAT, { + layout: { view: ChatBox, dataField: 'data' }, + options: { acl: '', chat: '', chat_history: '', chat_thread_id: '', chat_assistant_id: '', chat_vector_store_id: '' }, +}); diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/MessageComponent.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/MessageComponent.tsx new file mode 100644 index 000000000..801becb64 --- /dev/null +++ b/src/client/views/nodes/chatbot/chatboxcomponents/MessageComponent.tsx @@ -0,0 +1,146 @@ +import React, { useState } from 'react'; +import { observer } from 'mobx-react'; +import { AssistantMessage, Citation, MessageContent, PROCESSING_TYPE, ProcessingInfo, TEXT_TYPE } from '../types/types'; +import ReactMarkdown from 'react-markdown'; + +/** + * Props for the MessageComponentBox. + * @interface MessageComponentProps + * @property {AssistantMessage} message - The message data to display. + * @property {number} index - The index of the message. + * @property {Function} onFollowUpClick - Callback to handle follow-up question clicks. + * @property {Function} onCitationClick - Callback to handle citation clicks. + * @property {Function} updateMessageCitations - Function to update message citations. + */ +interface MessageComponentProps { + message: AssistantMessage; + index: number; + onFollowUpClick: (question: string) => void; + onCitationClick: (citation: Citation) => void; + updateMessageCitations: (index: number, citations: Citation[]) => void; +} + +/** + * MessageComponentBox displays the content of an AssistantMessage including text, citations, + * processing information, and follow-up questions. + * @param {MessageComponentProps} props - The props for the component. + */ +const MessageComponentBox: React.FC = ({ message, index, onFollowUpClick, onCitationClick, updateMessageCitations }) => { + // State for managing whether the dropdown is open or closed for processing info + const [dropdownOpen, setDropdownOpen] = useState(false); + + /** + * Renders the content of the message based on the type (e.g., grounded text, normal text). + * @param {MessageContent} item - The content item to render. + * @returns {JSX.Element} JSX element rendering the content. + */ + const renderContent = (item: MessageContent) => { + const i = item.index; + + // Handle grounded text with citations + if (item.type === TEXT_TYPE.GROUNDED) { + const citation_ids = item.citation_ids || []; + return ( + + {item.text} + {citation_ids.map((id, idx) => { + const citation = message.citations?.find(c => c.citation_id === id); + if (!citation) return null; + return ( + + ); + })} + + ); + } + + // Handle normal text + else if (item.type === TEXT_TYPE.NORMAL) { + return ( + + {item.text} + + ); + } + + // Handle query type content + else if ('query' in item) { + return ( + + {JSON.stringify(item.query)} + + ); + } + + // Fallback for any other content type + else { + return ( + + {JSON.stringify(item)} + + ); + } + }; + + // Check if the message contains processing information (thoughts/actions) + const hasProcessingInfo = message.processing_info && message.processing_info.length > 0; + + /** + * Renders processing information such as thoughts or actions during message handling. + * @param {ProcessingInfo} info - The processing information to render. + * @returns {JSX.Element | null} JSX element rendering the processing info or null. + */ + const renderProcessingInfo = (info: ProcessingInfo) => { + if (info.type === PROCESSING_TYPE.THOUGHT) { + return ( +
+ Thought: {info.content} +
+ ); + } else if (info.type === PROCESSING_TYPE.ACTION) { + return ( +
+ Action: {info.content} +
+ ); + } + return null; + }; + + return ( +
+ {/* Processing Information Dropdown */} + {hasProcessingInfo && ( +
+ + {dropdownOpen &&
{message.processing_info.map(renderProcessingInfo)}
} +
+
+ )} + + {/* Message Content */} +
{message.content && message.content.map(messageFragment => {renderContent(messageFragment)})}
+ + {/* Follow-up Questions Section */} + {message.follow_up_questions && message.follow_up_questions.length > 0 && ( +
+

Follow-up Questions:

+
+ {message.follow_up_questions.map((question, idx) => ( + + ))} +
+
+ )} +
+ ); +}; + +// Export the observer-wrapped component to allow MobX to react to state changes +export default observer(MessageComponentBox); diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss new file mode 100644 index 000000000..ff5be4a38 --- /dev/null +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss @@ -0,0 +1,69 @@ +.spinner-container { + display: flex; + flex-direction: column; + justify-content: center; + align-items: center; + height: 100%; +} + +.spinner { + width: 60px; + height: 60px; + position: relative; + margin-bottom: 20px; // Space between spinner and text +} + +.double-bounce1, +.double-bounce2 { + width: 100%; + height: 100%; + border-radius: 50%; + background-color: #4a90e2; + opacity: 0.6; + position: absolute; + top: 0; + left: 0; + animation: bounce 2s infinite ease-in-out; +} + +.double-bounce2 { + animation-delay: -1s; +} + +@keyframes bounce { + 0%, + 100% { + transform: scale(0); + } + 50% { + transform: scale(1); + } +} + +.uploading-overlay { + position: absolute; + top: 0; + left: 0; + right: 0; + bottom: 0; + background-color: rgba(255, 255, 255, 0.8); + display: flex; + align-items: center; + justify-content: center; + z-index: 1000; +} + +.progress-container { + display: flex; + flex-direction: column; + align-items: center; + text-align: center; +} + +.step-name { + font-size: 18px; + color: #333; + text-align: center; + width: 100%; + margin-top: -10px; // Adjust to move the text closer to the spinner +} diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.tsx new file mode 100644 index 000000000..b9fd08742 --- /dev/null +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.tsx @@ -0,0 +1,22 @@ +import React from 'react'; +import './ProgressBar.scss'; + +/** + * ProgressBar is a functional React component that displays a loading spinner + * to indicate progress or ongoing processing. It uses two bouncing elements + * to create a smooth animation that represents an active state. + * + * The animation consists of two divs (`double-bounce1` and `double-bounce2`), + * each of which will bounce in and out of view, creating a pulsating effect. + */ +export const ProgressBar: React.FC = () => { + return ( +
+ {/* Spinner div containing two bouncing elements */} +
+
{/* First bouncing element */} +
{/* Second bouncing element */} +
+
+ ); +}; diff --git a/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts b/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts new file mode 100644 index 000000000..3b4fdb6f5 --- /dev/null +++ b/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts @@ -0,0 +1,125 @@ +import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getChunkType, ProcessingInfo } from '../types/types'; +import { v4 as uuid } from 'uuid'; + +export class AnswerParser { + static parse(xml: string, processingInfo: ProcessingInfo[]): AssistantMessage { + const answerRegex = /([\s\S]*?)<\/answer>/; + const citationsRegex = /([\s\S]*?)<\/citations>/; + const citationRegex = /([\s\S]*?)<\/citation>/g; + const followUpQuestionsRegex = /([\s\S]*?)<\/follow_up_questions>/; + const questionRegex = /(.*?)<\/question>/g; + const groundedTextRegex = /([\s\S]*?)<\/grounded_text>/g; + const normalTextRegex = /([\s\S]*?)<\/normal_text>/g; + const loopSummaryRegex = /([\s\S]*?)<\/loop_summary>/; + + const answerMatch = answerRegex.exec(xml); + const citationsMatch = citationsRegex.exec(xml); + const followUpQuestionsMatch = followUpQuestionsRegex.exec(xml); + const loopSummaryMatch = loopSummaryRegex.exec(xml); + + if (!answerMatch) { + throw new Error('Invalid XML: Missing tag.'); + } + + let rawTextContent = answerMatch[1].trim(); + let content: AssistantMessage['content'] = []; + let citations: Citation[] = []; + let contentIndex = 0; + + // Remove citations and follow-up questions from rawTextContent + if (citationsMatch) { + rawTextContent = rawTextContent.replace(citationsMatch[0], '').trim(); + } + if (followUpQuestionsMatch) { + rawTextContent = rawTextContent.replace(followUpQuestionsMatch[0], '').trim(); + } + if (loopSummaryMatch) { + rawTextContent = rawTextContent.replace(loopSummaryMatch[0], '').trim(); + } + + // Parse citations + let citationMatch; + const citationMap = new Map(); + if (citationsMatch) { + const citationsContent = citationsMatch[1]; + while ((citationMatch = citationRegex.exec(citationsContent)) !== null) { + const [_, index, chunk_id, type, direct_text] = citationMatch; + const citation_id = uuid(); + citationMap.set(index, citation_id); + citations.push({ + direct_text: direct_text.trim(), + type: getChunkType(type), + chunk_id, + citation_id, + }); + } + } + + rawTextContent = rawTextContent.replace(normalTextRegex, '$1'); + + // Parse text content (normal and grounded) + let lastIndex = 0; + let match; + + while ((match = groundedTextRegex.exec(rawTextContent)) !== null) { + const [fullMatch, citationIndex, groundedText] = match; + + // Add normal text that is before the grounded text + if (match.index > lastIndex) { + const normalText = rawTextContent.slice(lastIndex, match.index).trim(); + if (normalText) { + content.push({ + index: contentIndex++, + type: TEXT_TYPE.NORMAL, + text: normalText, + citation_ids: null, + }); + } + } + + // Add grounded text + const citation_ids = citationIndex.split(',').map(index => citationMap.get(index) || ''); + content.push({ + index: contentIndex++, + type: TEXT_TYPE.GROUNDED, + text: groundedText.trim(), + citation_ids, + }); + + lastIndex = match.index + fullMatch.length; + } + + // Add any remaining normal text after the last grounded text + if (lastIndex < rawTextContent.length) { + const remainingText = rawTextContent.slice(lastIndex).trim(); + if (remainingText) { + content.push({ + index: contentIndex++, + type: TEXT_TYPE.NORMAL, + text: remainingText, + citation_ids: null, + }); + } + } + + let followUpQuestions: string[] = []; + if (followUpQuestionsMatch) { + const questionsText = followUpQuestionsMatch[1]; + let questionMatch; + while ((questionMatch = questionRegex.exec(questionsText)) !== null) { + followUpQuestions.push(questionMatch[1].trim()); + } + } + + const assistantResponse: AssistantMessage = { + role: ASSISTANT_ROLE.ASSISTANT, + content, + follow_up_questions: followUpQuestions, + citations, + processing_info: processingInfo, + loop_summary: loopSummaryMatch ? loopSummaryMatch[1].trim() : undefined, + }; + + return assistantResponse; + } +} diff --git a/src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts b/src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts new file mode 100644 index 000000000..3585cab4a --- /dev/null +++ b/src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts @@ -0,0 +1,73 @@ +import { threadId } from 'worker_threads'; + +enum ParserState { + Outside, + InGroundedText, + InNormalText, +} + +export class StreamedAnswerParser { + private state: ParserState = ParserState.Outside; + private buffer: string = ''; + private result: string = ''; + private isStartOfLine: boolean = true; + + public parse(char: string): string { + switch (this.state) { + case ParserState.Outside: + if (char === '<') { + this.buffer = '<'; + } else if (char === '>') { + if (this.buffer.startsWith('') { + this.state = ParserState.Outside; + this.buffer = ''; + } else if (this.buffer.startsWith('') { + this.state = ParserState.Outside; + this.buffer = ''; + } else if (this.buffer.startsWith('<')) { + this.buffer += char; + } else { + this.processChar(char); + } + break; + } + + return this.result.trim(); + } + + private processChar(char: string): void { + if (this.isStartOfLine && char === ' ') { + // Skip leading spaces + return; + } + if (char === '\n') { + this.result += char; + this.isStartOfLine = true; + } else { + this.result += char; + this.isStartOfLine = false; + } + } + + public reset(): void { + this.state = ParserState.Outside; + this.buffer = ''; + this.result = ''; + this.isStartOfLine = true; + } +} diff --git a/src/client/views/nodes/chatbot/tools/BaseTool.ts b/src/client/views/nodes/chatbot/tools/BaseTool.ts new file mode 100644 index 000000000..b57f1c8e4 --- /dev/null +++ b/src/client/views/nodes/chatbot/tools/BaseTool.ts @@ -0,0 +1,24 @@ +import { Tool } from '../types/types'; + +export abstract class BaseTool = Record> implements Tool { + constructor( + public name: string, + public description: string, + public parameters: Record, + public citationRules: string, + public briefSummary: string + ) {} + + abstract execute(args: T): Promise; + + getActionRule(): Record { + return { + [this.name]: { + name: this.name, + citationRules: this.citationRules, + description: this.description, + parameters: this.parameters, + }, + }; + } +} diff --git a/src/client/views/nodes/chatbot/tools/CalculateTool.ts b/src/client/views/nodes/chatbot/tools/CalculateTool.ts new file mode 100644 index 000000000..74b7ca27b --- /dev/null +++ b/src/client/views/nodes/chatbot/tools/CalculateTool.ts @@ -0,0 +1,26 @@ +import { BaseTool } from './BaseTool'; + +export class CalculateTool extends BaseTool<{ expression: string }> { + constructor() { + super( + 'calculate', + 'Perform a calculation', + { + expression: { + type: 'string', + description: 'The mathematical expression to evaluate', + required: 'true', + max_inputs: '1', + }, + }, + 'Provide a mathematical expression to calculate that would work with JavaScript eval().', + 'Runs a calculation and returns the number - uses JavaScript so be sure to use floating point syntax if necessary' + ); + } + + async execute(args: { expression: string }): Promise { + // Note: Using eval() can be dangerous. Consider using a safer alternative. + const result = eval(args.expression); + return [{ type: 'text', text: result.toString() }]; + } +} diff --git a/src/client/views/nodes/chatbot/tools/CreateCSVTool.ts b/src/client/views/nodes/chatbot/tools/CreateCSVTool.ts new file mode 100644 index 000000000..55015846b --- /dev/null +++ b/src/client/views/nodes/chatbot/tools/CreateCSVTool.ts @@ -0,0 +1,51 @@ +import { BaseTool } from './BaseTool'; +import { Networking } from '../../../../Network'; + +export class CreateCSVTool extends BaseTool<{ csvData: string; filename: string }> { + private _handleCSVResult: (url: string, filename: string, id: string, data: string) => void; + + constructor(handleCSVResult: (url: string, title: string, id: string, data: string) => void) { + super( + 'createCSV', + 'Creates a CSV file from raw CSV data and saves it to the server', + { + type: 'object', + properties: { + csvData: { + type: 'string', + description: 'A string of comma-separated values representing the CSV data.', + }, + filename: { + type: 'string', + description: 'The base name of the CSV file to be created. Should end in ".csv".', + }, + }, + required: ['csvData', 'filename'], + }, + 'Provide a CSV string and a filename to create a CSV file.', + 'Creates a CSV file from the provided CSV string and saves it to the server with a unique identifier, returning the file URL and UUID.' + ); + this._handleCSVResult = handleCSVResult; + } + + async execute(args: { csvData: string; filename: string }): Promise { + try { + console.log('Creating CSV file:', args.filename, ' with data:', args.csvData); + // Post the raw CSV data to the createCSV endpoint on the server + const { fileUrl, id } = await Networking.PostToServer('/createCSV', { filename: args.filename, data: args.csvData }); + + // Handle the result by invoking the callback + this._handleCSVResult(fileUrl, args.filename, id, args.csvData); + + return [ + { + type: 'text', + text: `File successfully created: ${fileUrl}. \nNow a CSV file with this data and the name ${args.filename} is available as a user doc.`, + }, + ]; + } catch (error) { + console.error('Error creating CSV file:', error); + throw new Error('Failed to create CSV file.'); + } + } +} diff --git a/src/client/views/nodes/chatbot/tools/CreateCollectionTool.ts b/src/client/views/nodes/chatbot/tools/CreateCollectionTool.ts new file mode 100644 index 000000000..573428179 --- /dev/null +++ b/src/client/views/nodes/chatbot/tools/CreateCollectionTool.ts @@ -0,0 +1,36 @@ +import { DocCast } from '../../../../../fields/Types'; +import { DocServer } from '../../../../DocServer'; +import { Docs } from '../../../../documents/Documents'; +import { DocumentView } from '../../DocumentView'; +import { OpenWhere } from '../../OpenWhere'; +import { BaseTool } from './BaseTool'; + +export class GetDocsContentTool extends BaseTool<{ title: string; document_ids: string[] }> { + private _docView: DocumentView; + constructor(docView: DocumentView) { + super( + 'retrieveDocs', + 'Retrieves the contents of all Documents that the user is interacting with in Dash ', + { + title: { + type: 'string', + description: 'the title of the collection that you will be making', + required: 'true', + max_inputs: '1', + }, + }, + 'Provide a mathematical expression to calculate that would work with JavaScript eval().', + 'Runs a calculation and returns the number - uses JavaScript so be sure to use floating point syntax if necessary' + ); + this._docView = docView; + } + + async execute(args: { title: string; document_ids: string[] }): Promise { + // Note: Using eval() can be dangerous. Consider using a safer alternative. + const docs = args.document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id))); + const collection = Docs.Create.FreeformDocument(docs, { title: args.title }); + this._docView._props.addDocTab(collection, OpenWhere.addRight); //in future, create popup prompting user where to add + return [{ type: 'text', text: 'Collection created in Dash called ' + args.title }]; + } +} +//export function create_collection(docView: DocumentView, document_ids: string[], title: string): string {} diff --git a/src/client/views/nodes/chatbot/tools/DataAnalysisTool.ts b/src/client/views/nodes/chatbot/tools/DataAnalysisTool.ts new file mode 100644 index 000000000..a12ee46e5 --- /dev/null +++ b/src/client/views/nodes/chatbot/tools/DataAnalysisTool.ts @@ -0,0 +1,59 @@ +import { BaseTool } from './BaseTool'; + +export class DataAnalysisTool extends BaseTool<{ csv_file_name: string | string[] }> { + private csv_files_function: () => { filename: string; id: string; text: string }[]; + + constructor(csv_files: () => { filename: string; id: string; text: string }[]) { + super( + 'dataAnalysis', + 'Analyzes, and provides insights, from one or more CSV files', + { + csv_file_name: { + type: 'string', + description: 'Name(s) of the CSV file(s) to analyze', + required: 'true', + max_inputs: '3', + }, + }, + 'Provide the name(s) of up to 3 CSV files to analyze based on the user query and whichever available CSV files may be relevant.', + 'Provides the full CSV file text for your analysis based on the user query and the available CSV file(s). ' + ); + this.csv_files_function = csv_files; + } + + getFileContent(filename: string): string | undefined { + const files = this.csv_files_function(); + const file = files.find(f => f.filename === filename); + return file?.text; + } + + getFileID(filename: string): string | undefined { + const files = this.csv_files_function(); + const file = files.find(f => f.filename === filename); + return file?.id; + } + + async execute(args: { csv_file_name: string | string[] }): Promise { + const filenames = Array.isArray(args.csv_file_name) ? args.csv_file_name : [args.csv_file_name]; + const results = []; + + for (const filename of filenames) { + const fileContent = this.getFileContent(filename); + const fileID = this.getFileID(filename); + + if (fileContent && fileID) { + results.push({ + type: 'text', + text: `${fileContent}`, + }); + } else { + results.push({ + type: 'text', + text: `File not found: ${filename}`, + }); + } + } + + return results; + } +} diff --git a/src/client/views/nodes/chatbot/tools/GetDocsTool.ts b/src/client/views/nodes/chatbot/tools/GetDocsTool.ts new file mode 100644 index 000000000..f970ca8ee --- /dev/null +++ b/src/client/views/nodes/chatbot/tools/GetDocsTool.ts @@ -0,0 +1,29 @@ +import { DocCast } from '../../../../../fields/Types'; +import { DocServer } from '../../../../DocServer'; +import { Docs } from '../../../../documents/Documents'; +import { DocumentView } from '../../DocumentView'; +import { OpenWhere } from '../../OpenWhere'; +import { BaseTool } from './BaseTool'; + +export class GetDocsTool extends BaseTool<{ title: string; document_ids: string[] }> { + private _docView: DocumentView; + constructor(docView: DocumentView) { + super( + 'retrieveDocs', + 'Retrieves the contents of all Documents that the user is interacting with in Dash', + {}, + 'No need to provide anything. Just run the tool and it will retrieve the contents of all Documents that the user is interacting with in Dash.', + 'Returns the the documents in Dash in JSON form. This will include the title of the document, the location in the FreeFormDocument, and the content of the document, any applicable data fields, the layout of the document, etc.' + ); + this._docView = docView; + } + + async execute(args: { title: string; document_ids: string[] }): Promise { + // Note: Using eval() can be dangerous. Consider using a safer alternative. + const docs = args.document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id))); + const collection = Docs.Create.FreeformDocument(docs, { title: args.title }); + this._docView._props.addDocTab(collection, OpenWhere.addRight); //in future, create popup prompting user where to add + return [{ type: 'text', text: 'Collection created in Dash called ' + args.title }]; + } +} +//export function create_collection(docView: DocumentView, document_ids: string[], title: string): string {} diff --git a/src/client/views/nodes/chatbot/tools/NoTool.ts b/src/client/views/nodes/chatbot/tools/NoTool.ts new file mode 100644 index 000000000..1f0830a77 --- /dev/null +++ b/src/client/views/nodes/chatbot/tools/NoTool.ts @@ -0,0 +1,18 @@ +// tools/NoTool.ts +import { BaseTool } from './BaseTool'; + +export class NoTool extends BaseTool<{}> { + constructor() { + super( + 'no_tool', + 'Use this when no external tool or action is required to answer the question.', + {}, + 'When using the "no_tool" action, simply provide an empty element. The observation will always be "No tool used. Proceed with answering the question."', + 'Use when no external tool or action is required to answer the question.' + ); + } + + async execute(args: {}): Promise { + return [{ type: 'text', text: 'No tool used. Proceed with answering the question.' }]; + } +} diff --git a/src/client/views/nodes/chatbot/tools/RAGTool.ts b/src/client/views/nodes/chatbot/tools/RAGTool.ts new file mode 100644 index 000000000..c24306dcd --- /dev/null +++ b/src/client/views/nodes/chatbot/tools/RAGTool.ts @@ -0,0 +1,138 @@ +import { BaseTool } from './BaseTool'; +import { Vectorstore } from '../vectorstore/Vectorstore'; +import { RAGChunk } from '../types/types'; +import * as fs from 'fs'; +import { Networking } from '../../../../Network'; +import { file } from 'jszip'; +import { ChatCompletion, ChatCompletionContentPart, ChatCompletionMessageParam } from 'openai/resources'; + +export class RAGTool extends BaseTool { + constructor(private vectorstore: Vectorstore) { + super( + 'rag', + 'Perform a RAG search on user documents', + { + hypothetical_document_chunk: { + type: 'string', + description: + "Detailed version of the prompt that is effectively a hypothetical document chunk that would be ideal to embed and compare to the vectors of real document chunks to fetch the most relevant document chunks to answer the user's query", + required: 'true', + }, + }, + ` + Your task is to provide a comprehensive response to the user's prompt based on the given chunks and chat history. Follow these structural guidelines meticulously: + + 1. Overall Structure: + + [Main content with grounded_text tags interspersed with normal plain text (information that is not derived from chunks' information)] + + [Individual citation tags] + + + [Three question tags] + + + + 2. Grounded Text Tag Structure: + - Basic format: + + [Your generated text based on information from a subset of a chunk (a citation's direct text)] + + + 3. Citation Tag Structure: + + [For text: relevant subset of original chunk] + [For image/table: leave empty] + + + 4. Detailed Grounded Text Guidelines: + a. Wrap all information derived from chunks in grounded_text tags. + b. DO NOT PUT ANYTHING THAT IS NOT DIRECTLY DERIVED FROM INFORMATION FROM CHUNKS (EITHER IMAGE, TABLE, OR TEXT) IN GROUNDED_TEXT TAGS. + c. Use a single grounded_text tag for suquential and closely related information that references the same citation. If other citations' information are used sequentially, create new grounded_text tags. + d. Ensure every grounded_text tag has up to a few corresponding citations (should not be more than 3 and only 1 is fine). Multiple citation indices should be separated by commas. + e. Grounded text can be as short as a few words or as long as several sentences. + f. Avoid overlapping or nesting grounded_text tags; instead, use sequential tags. + + 5. Detailed Citation Guidelines: + a. Create a unique citation for each distinct piece of information from the chunks that is used to support grounded_text. + b. ALL TEXT CITATIONS must have direct text in its element content (e.g. DIRECT TEXT HERE) that is a relevant SUBSET of the original text chunk that is being cited specifically. + c. DO NOT paraphrase or summarize the text; use the original text as much as possible. + d. DO NOT USE THE FULL TEXT CHUNK as the citation content; only use the relevant subset of the text that the grounded_text is base. AS SHORT AS POSSIBLE WHILE PROVIDING INFORMATION (ONE TO TWO SENTENCES USUALLY)! + e. Ensure each citation has a unique index number. + f. Specify the correct type: "text", "image", or "table". + g. For text chunks, the content of the citation should ALWAYS have the relevant subset of the original text that the grounded_text is based on. + h. For image/table chunks, leave the citation content empty. + i. One citation can be used for multiple grounded_text tags if they are based on the same chunk information. + j. !!!DO NOT OVERCITE - only include citations for information that is directly relevant to the grounded_text. + + 6. Structural Integrity Checks: + a. Ensure all opening tags have corresponding closing tags. + b. Verify that all grounded_text tags have valid citation_index attributes (they should be equal to the associated citation(s) index field—not their chunk_id field). + c. Check that all cited indices in grounded_text tags have corresponding citations. + + Example of grounded_text usage: + + + + Artificial Intelligence (AI) is revolutionizing various sectors, with healthcare experiencing significant transformations in areas such as diagnosis and treatment planning. + + + In the field of medical diagnosis, AI has shown remarkable capabilities, particularly in radiology. For instance, AI systems have drastically improved mammogram analysis, achieving 99% accuracy at a rate 30 times faster than human radiologists. + + + This advancement not only enhances the efficiency of healthcare systems but also significantly reduces the occurrence of false positives, leading to fewer unnecessary biopsies and reduced patient stress. + + + + Beyond diagnosis, AI is playing a crucial role in drug discovery and development. By analyzing vast amounts of genetic and molecular data, AI algorithms can identify potential drug candidates much faster than traditional methods. + + + This could potentially reduce the time and cost of bringing new medications to market, especially for rare diseases that have historically received less attention due to limited market potential. + + + [... rest of the content ...] + + + Artificial Intelligence is revolutionizing various industries, with healthcare being one of the most profoundly affected sectors. + AI has shown particular promise in the field of radiology, enhancing the accuracy and speed of image analysis. + According to recent studies, AI systems have achieved 99% accuracy in mammogram analysis, performing the task 30 times faster than human radiologists. + The improvement in mammogram accuracy has led to a significant reduction in false positives, decreasing the need for unnecessary biopsies and reducing patient anxiety. + AI is accelerating the drug discovery process by analyzing complex molecular and genetic data to identify potential drug candidates. + The use of AI in drug discovery could significantly reduce the time and cost associated with bringing new medications to market, particularly for rare diseases. + + + + How might AI-driven personalized medicine impact the cost and accessibility of healthcare in the future? + What measures can be taken to ensure that AI systems in healthcare are free from biases and equally effective for diverse populations? + How could the role of healthcare professionals evolve as AI becomes more integrated into medical practices? + + + `, + + `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a + set of document chunks (either images or text) that can be used to provide a grounded response based on + user documents` + ); + } + + async execute(args: { hypothetical_document_chunk: string }): Promise { + const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk); + const formatted_chunks = await this.getFormattedChunks(relevantChunks); + return formatted_chunks; + } + + async getFormattedChunks(relevantChunks: RAGChunk[]): Promise { + try { + const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks }); + + if (!formattedChunks) { + throw new Error('Failed to format chunks'); + } + + return formattedChunks; + } catch (error) { + console.error('Error formatting chunks:', error); + throw error; + } + } +} diff --git a/src/client/views/nodes/chatbot/tools/SearchTool.ts b/src/client/views/nodes/chatbot/tools/SearchTool.ts new file mode 100644 index 000000000..b926cbadc --- /dev/null +++ b/src/client/views/nodes/chatbot/tools/SearchTool.ts @@ -0,0 +1,54 @@ +import { max } from 'lodash'; +import { Networking } from '../../../../Network'; +import { BaseTool } from './BaseTool'; +import { v4 as uuidv4 } from 'uuid'; + +export class SearchTool extends BaseTool<{ query: string | string[] }> { + private _addLinkedUrlDoc: (url: string, id: string) => void; + private _max_results: number; + constructor(addLinkedUrlDoc: (url: string, id: string) => void, max_results: number = 5) { + super( + 'searchTool', + 'Search the web to find a wide range of websites related to a query or multiple queries', + { + query: { + type: 'string', + description: 'The search query or queries to use for finding websites', + required: 'true', + max_inputs: '3', + }, + }, + 'Provide up to 3 search queries to find a broad range of websites. This tool is intended to help you identify relevant websites, but not to be used for providing the final answer. Use this information to determine which specific website to investigate further.', + 'Returns a list of websites and their overviews based on the search queries, helping to identify which websites might contain relevant information.' + ); + this._addLinkedUrlDoc = addLinkedUrlDoc; + this._max_results = max_results; + } + + async execute(args: { query: string | string[] }): Promise { + const queries = Array.isArray(args.query) ? args.query : [args.query]; + const allResults = []; + + for (const query of queries) { + try { + const { results } = await Networking.PostToServer('/getWebSearchResults', { query, max_results: this._max_results }); + const data: { type: string; text: string }[] = results.map((result: { url: string; snippet: string }) => { + const id = uuidv4(); + return { + type: 'text', + text: ` + ${result.url} + ${result.snippet} + `, + }; + }); + allResults.push(...data); + } catch (error) { + console.log(error); + allResults.push({ type: 'text', text: `An error occurred while performing the web search for query: ${query}` }); + } + } + + return allResults; + } +} diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts new file mode 100644 index 000000000..2118218f6 --- /dev/null +++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts @@ -0,0 +1,83 @@ +import { Networking } from '../../../../Network'; +import { BaseTool } from './BaseTool'; +import { v4 as uuidv4 } from 'uuid'; + +export class WebsiteInfoScraperTool extends BaseTool<{ url: string | string[] }> { + private _addLinkedUrlDoc: (url: string, id: string) => void; + + constructor(addLinkedUrlDoc: (url: string, id: string) => void) { + super( + 'websiteInfoScraper', + 'Scrape detailed information from specific websites relevant to the user query', + { + url: { + type: 'string', + description: 'The URL(s) of the website(s) to scrape', + required: true, + max_inputs: 3, + }, + }, + ` + Your task is to provide a comprehensive response to the user's prompt using the content scraped from relevant websites. Ensure you follow these guidelines for structuring your response: + + 1. Grounded Text Tag Structure: + - Wrap all text derived from the scraped website(s) in tags. + - **Do not include non-sourced information** in tags. + - Use a single tag for content derived from a single website. If citing multiple websites, create new tags for each. + - Ensure each tag has a citation index corresponding to the scraped URL. + + 2. Citation Tag Structure: + - Create a tag for each distinct piece of information used from the website(s). + - Each tag must reference a URL chunk using the chunk_id attribute. + - For URL-based citations, leave the citation content empty, but reference the chunk_id and type as 'url'. + + 3. Structural Integrity Checks: + - Ensure all opening and closing tags are matched properly. + - Verify that all citation_index attributes in tags correspond to valid citations. + - Do not over-cite—cite only the most relevant parts of the websites. + + Example Usage: + + + + Based on data from the World Bank, economic growth has stabilized in recent years, following a surge in investments. + + + According to information retrieved from the International Monetary Fund, the inflation rate has been gradually decreasing since 2020. + + + + + + + + + What are the long-term economic impacts of increased investments on GDP? + How might inflation trends affect future monetary policy? + Are there additional factors that could influence economic growth beyond investments and inflation? + + + `, + 'Returns the text content of the webpages for further analysis and grounding.' + ); + this._addLinkedUrlDoc = addLinkedUrlDoc; + } + + async execute(args: { url: string | string[] }): Promise { + const urls = Array.isArray(args.url) ? args.url : [args.url]; + const results = []; + + for (const url of urls) { + try { + const { website_plain_text } = await Networking.PostToServer('/scrapeWebsite', { url }); + const id = uuidv4(); + this._addLinkedUrlDoc(url, id); + results.push({ type: 'text', text: `\n${website_plain_text}\n\n` }); + } catch (error) { + results.push({ type: 'text', text: `An error occurred while scraping the website: ${url}` }); + } + } + + return results; + } +} diff --git a/src/client/views/nodes/chatbot/tools/WikipediaTool.ts b/src/client/views/nodes/chatbot/tools/WikipediaTool.ts new file mode 100644 index 000000000..143d91d80 --- /dev/null +++ b/src/client/views/nodes/chatbot/tools/WikipediaTool.ts @@ -0,0 +1,37 @@ +import { title } from 'process'; +import { Networking } from '../../../../Network'; +import { BaseTool } from './BaseTool'; +import axios from 'axios'; +import { v4 as uuidv4 } from 'uuid'; + +export class WikipediaTool extends BaseTool<{ title: string }> { + private _addLinkedUrlDoc: (url: string, id: string) => void; + constructor(addLinkedUrlDoc: (url: string, id: string) => void) { + super( + 'wikipedia', + 'Search Wikipedia and return a summary', + { + title: { + type: 'string', + description: 'The title of the Wikipedia article to search', + required: true, + }, + }, + 'Provide simply the title you want to search on Wikipedia and nothing more. If re-using this tool, try a different title for different information.', + 'Returns a summary from searching an article title on Wikipedia' + ); + this._addLinkedUrlDoc = addLinkedUrlDoc; + } + + async execute(args: { title: string }): Promise { + try { + const { text } = await Networking.PostToServer('/getWikipediaSummary', { title: args.title }); + const id = uuidv4(); + const url = `https://en.wikipedia.org/wiki/${args.title.replace(/ /g, '_')}`; + this._addLinkedUrlDoc(url, id); + return [{ type: 'text', text: ` ${text} ` }]; + } catch (error) { + return [{ type: 'text', text: 'An error occurred while fetching the article.' }]; + } + } +} diff --git a/src/client/views/nodes/chatbot/types/types.ts b/src/client/views/nodes/chatbot/types/types.ts new file mode 100644 index 000000000..f5d14ad6a --- /dev/null +++ b/src/client/views/nodes/chatbot/types/types.ts @@ -0,0 +1,129 @@ +export enum ASSISTANT_ROLE { + USER = 'user', + ASSISTANT = 'assistant', +} + +export enum TEXT_TYPE { + NORMAL = 'normal', + GROUNDED = 'grounded', + ERROR = 'error', +} + +export enum CHUNK_TYPE { + TEXT = 'text', + IMAGE = 'image', + TABLE = 'table', + URL = 'url', + CSV = 'CSV', +} + +export enum PROCESSING_TYPE { + THOUGHT = 'thought', + ACTION = 'action', + //eventually migrate error to here +} + +export function getChunkType(type: string): CHUNK_TYPE { + switch (type.toLowerCase()) { + case 'text': + return CHUNK_TYPE.TEXT; + break; + case 'image': + return CHUNK_TYPE.IMAGE; + break; + case 'table': + return CHUNK_TYPE.TABLE; + break; + case 'CSV': + return CHUNK_TYPE.CSV; + break; + case 'url': + return CHUNK_TYPE.URL; + break; + default: + return CHUNK_TYPE.TEXT; + break; + } +} + +export interface ProcessingInfo { + index: number; + type: PROCESSING_TYPE; + content: string; +} + +export interface AssistantMessage { + role: ASSISTANT_ROLE; + content: MessageContent[]; + follow_up_questions?: string[]; + citations?: Citation[]; + processing_info: ProcessingInfo[]; + loop_summary?: string; +} + +export interface MessageContent { + index: number; + type: TEXT_TYPE; + text: string; + citation_ids: string[] | null; +} + +export interface Citation { + direct_text?: string; + type: CHUNK_TYPE; + chunk_id: string; + citation_id: string; + url?: string; +} + +export interface RAGChunk { + id: string; + values: number[]; + metadata: { + text: string; + type: CHUNK_TYPE; + original_document: string; + file_path: string; + doc_id: string; + location: string; + start_page: number; + end_page: number; + base64_data?: string | undefined; + page_width?: number | undefined; + page_height?: number | undefined; + }; +} + +export interface SimplifiedChunk { + chunkId: string; + startPage: number; + endPage: number; + location?: string; + chunkType: CHUNK_TYPE; + url?: string; + canDisplay?: boolean; +} + +export interface AI_Document { + purpose: string; + file_name: string; + num_pages: number; + summary: string; + chunks: RAGChunk[]; + type: string; +} + +export interface Tool = Record> { + name: string; + description: string; + parameters: Record; + citationRules: string; + briefSummary: string; + execute: (args: T) => Promise; + getActionRule: () => Record; +} + +export interface AgentMessage { + role: 'system' | 'user' | 'assistant'; + content: string | { type: string; text?: string; image_url?: { url: string } }[]; +} diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts new file mode 100644 index 000000000..07a2b73bc --- /dev/null +++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts @@ -0,0 +1,258 @@ +import { Pinecone, Index, IndexList, PineconeRecord, RecordMetadata, QueryResponse } from '@pinecone-database/pinecone'; +import { CohereClient } from 'cohere-ai'; +import { EmbedResponse } from 'cohere-ai/api'; +import dotenv from 'dotenv'; +import { RAGChunk, AI_Document, CHUNK_TYPE } from '../types/types'; +import { Doc } from '../../../../../fields/Doc'; +import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types'; +import { Networking } from '../../../../Network'; + +dotenv.config(); + +/** + * The Vectorstore class integrates with Pinecone for vector-based document indexing and retrieval, + * and Cohere for text embedding. It handles AI document management, uploads, and query-based retrieval. + */ +export class Vectorstore { + private pinecone: Pinecone; // Pinecone client for managing the vector index. + private index!: Index; // The specific Pinecone index used for document chunks. + private cohere: CohereClient; // Cohere client for generating embeddings. + private indexName: string = 'pdf-chatbot'; // Default name for the index. + private _id: string; // Unique ID for the Vectorstore instance. + private _doc_ids: string[] = []; // List of document IDs handled by this instance. + + documents: AI_Document[] = []; // Store the documents indexed in the vectorstore. + + /** + * Constructor initializes the Pinecone and Cohere clients, sets up the document ID list, + * and initializes the Pinecone index. + * @param id The unique identifier for the vectorstore instance. + * @param doc_ids A function that returns a list of document IDs. + */ + constructor(id: string, doc_ids: () => string[]) { + const pineconeApiKey = process.env.PINECONE_API_KEY; + if (!pineconeApiKey) { + throw new Error('PINECONE_API_KEY is not defined.'); + } + + // Initialize Pinecone and Cohere clients with API keys from the environment. + this.pinecone = new Pinecone({ apiKey: pineconeApiKey }); + this.cohere = new CohereClient({ token: process.env.COHERE_API_KEY }); + this._id = id; + this._doc_ids = doc_ids(); + this.initializeIndex(); + } + + /** + * Initializes the Pinecone index by checking if it exists, and creating it if not. + * The index is set to use the cosine metric for vector similarity. + */ + private async initializeIndex() { + const indexList: IndexList = await this.pinecone.listIndexes(); + + // Check if the index already exists, otherwise create it. + if (!indexList.indexes?.some(index => index.name === this.indexName)) { + await this.pinecone.createIndex({ + name: this.indexName, + dimension: 1024, + metric: 'cosine', + spec: { + serverless: { + cloud: 'aws', + region: 'us-east-1', + }, + }, + }); + } + + // Set the index for future use. + this.index = this.pinecone.Index(this.indexName); + } + + /** + * Adds an AI document to the vectorstore. This method handles document chunking, uploading to the + * vectorstore, and updating the progress for long-running tasks like file uploads. + * @param doc The document to be added to the vectorstore. + * @param progressCallback Callback to update the progress of the upload. + */ + async addAIDoc(doc: Doc, progressCallback: (progress: number, step: string) => void) { + console.log('Adding AI Document:', doc); + const ai_document_status: string = StrCast(doc.ai_document_status); + + // Skip if the document is already in progress or completed. + if (ai_document_status !== undefined && ai_document_status.trim() !== '' && ai_document_status !== '{}') { + if (ai_document_status === 'IN PROGRESS') { + console.log('Already in progress.'); + return; + } + if (!this._doc_ids.includes(StrCast(doc.ai_doc_id))) { + this._doc_ids.push(StrCast(doc.ai_doc_id)); + } + } else { + // Start processing the document. + doc.ai_document_status = 'PROGRESS'; + console.log(doc); + + // Get the local file path (CSV or PDF). + const local_file_path: string = CsvCast(doc.data)?.url?.pathname ?? PDFCast(doc.data)?.url?.pathname; + console.log('Local File Path:', local_file_path); + + if (local_file_path) { + console.log('Creating AI Document...'); + // Start the document creation process by sending the file to the server. + const { jobId } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); + + // Poll the server for progress updates. + let inProgress: boolean = true; + let result: any = null; + while (inProgress) { + // Polling interval for status updates. + await new Promise(resolve => setTimeout(resolve, 2000)); + + // Check if the job is completed. + const resultResponse = await Networking.FetchFromServer(`/getResult/${jobId}`); + const resultResponseJson = JSON.parse(resultResponse); + if (resultResponseJson.status === 'completed') { + console.log('Result here:', resultResponseJson); + result = resultResponseJson; + break; + } + + // Fetch progress information and update the progress callback. + const progressResponse = await Networking.FetchFromServer(`/getProgress/${jobId}`); + const progressResponseJson = JSON.parse(progressResponse); + if (progressResponseJson) { + const progress = progressResponseJson.progress; + const step = progressResponseJson.step; + progressCallback(progress, step); + } + } + + // Once completed, process the document and add it to the vectorstore. + console.log('Document JSON:', result); + this.documents.push(result); + await this.indexDocument(result); + console.log(`Document added: ${result.file_name}`); + + // Update document metadata such as summary, purpose, and vectorstore ID. + doc.summary = result.summary; + doc.ai_doc_id = result.doc_id; + this._doc_ids.push(result.doc_id); + doc.ai_purpose = result.purpose; + + if (!doc.vectorstore_id) { + doc.vectorstore_id = JSON.stringify([this._id]); + } else { + doc.vectorstore_id = JSON.stringify(JSON.parse(StrCast(doc.vectorstore_id)).concat([this._id])); + } + + if (!doc.chunk_simpl) { + doc.chunk_simpl = JSON.stringify({ chunks: [] }); + } + + // Process each chunk of the document and update the document's chunk_simpl field. + result.chunks.forEach((chunk: RAGChunk) => { + const chunkToAdd = { + chunkId: chunk.id, + startPage: chunk.metadata.start_page, + endPage: chunk.metadata.end_page, + location: chunk.metadata.location, + chunkType: chunk.metadata.type as CHUNK_TYPE, + text: chunk.metadata.text, + }; + const new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl)); + new_chunk_simpl.chunks = new_chunk_simpl.chunks.concat(chunkToAdd); + doc.chunk_simpl = JSON.stringify(new_chunk_simpl); + }); + + // Mark the document status as completed. + doc.ai_document_status = 'COMPLETED'; + } + } + } + + /** + * Indexes the processed document by uploading the document's vector chunks to the Pinecone index. + * @param document The processed document containing its chunks and metadata. + */ + private async indexDocument(document: any) { + console.log('Uploading vectors to content namespace...'); + + // Prepare Pinecone records for each chunk in the document. + const pineconeRecords: PineconeRecord[] = (document.chunks as RAGChunk[]).map(chunk => ({ + id: chunk.id, + values: chunk.values, + metadata: { ...chunk.metadata } as RecordMetadata, + })); + + // Upload the records to Pinecone. + await this.index.upsert(pineconeRecords); + } + + /** + * Retrieves the top K document chunks relevant to the user's query. + * This involves embedding the query using Cohere, then querying Pinecone for matching vectors. + * @param query The search query string. + * @param topK The number of top results to return (default is 10). + * @returns A list of document chunks that match the query. + */ + async retrieve(query: string, topK: number = 10): Promise { + console.log(`Retrieving chunks for query: ${query}`); + try { + // Generate an embedding for the query using Cohere. + const queryEmbeddingResponse: EmbedResponse = await this.cohere.embed({ + texts: [query], + model: 'embed-english-v3.0', + inputType: 'search_query', + }); + + let queryEmbedding: number[]; + + // Extract the embedding from the response. + if (Array.isArray(queryEmbeddingResponse.embeddings)) { + queryEmbedding = queryEmbeddingResponse.embeddings[0]; + } else if (queryEmbeddingResponse.embeddings && 'embeddings' in queryEmbeddingResponse.embeddings) { + queryEmbedding = (queryEmbeddingResponse.embeddings as { embeddings: number[][] }).embeddings[0]; + } else { + throw new Error('Invalid embedding response format'); + } + + if (!Array.isArray(queryEmbedding)) { + throw new Error('Query embedding is not an array'); + } + + // Query the Pinecone index using the embedding and filter by document IDs. + const queryResponse: QueryResponse = await this.index.query({ + vector: queryEmbedding, + filter: { + doc_id: { $in: this._doc_ids }, + }, + topK, + includeValues: true, + includeMetadata: true, + }); + + // Map the results into RAGChunks and return them. + return queryResponse.matches.map( + match => + ({ + id: match.id, + values: match.values as number[], + metadata: match.metadata as { + text: string; + type: string; + original_document: string; + file_path: string; + doc_id: string; + location: string; + start_page: number; + end_page: number; + }, + }) as RAGChunk + ); + } catch (error) { + console.error(`Error retrieving chunks: ${error}`); + return []; + } + } +} diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 58d35ce64..88a81c5de 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -8,7 +8,7 @@ import { filesDirectory, publicDirectory } from '../SocketData'; import { Method } from '../RouteManager'; import ApiManager, { Registration } from './ApiManager'; import axios from 'axios'; -import { RAGChunk } from '../../client/views/nodes/ChatBox/types'; +import { RAGChunk } from '../../client/views/nodes/chatbot/types/types'; import { UnstructuredClient } from 'unstructured-client'; import { PartitionResponse } from 'unstructured-client/sdk/models/operations'; import { ChunkingStrategy, Strategy } from 'unstructured-client/sdk/models/shared'; -- cgit v1.2.3-70-g09d2 From ba0520baaa1f84d9fb08d3b2880c68302d28350a Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Sat, 7 Sep 2024 13:07:33 -0400 Subject: added clarifying structural info to ReAct prompt (still ~69% shorter than previous prompt) and shortened the RAG prompt --- .../views/nodes/chatbot/agentsystem/prompts.ts | 28 +++++- .../nodes/chatbot/chatboxcomponents/ChatBox.tsx | 7 +- src/client/views/nodes/chatbot/tools/RAGTool.ts | 104 +++++---------------- 3 files changed, 52 insertions(+), 87 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/chatbot/agentsystem/prompts.ts b/src/client/views/nodes/chatbot/agentsystem/prompts.ts index 9daabc35f..7000d8634 100644 --- a/src/client/views/nodes/chatbot/agentsystem/prompts.ts +++ b/src/client/views/nodes/chatbot/agentsystem/prompts.ts @@ -26,12 +26,33 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto Ensure that **ALL answers follow the answer structure**: grounded text wrapped in tags with corresponding citations, normal text in tags, and three follow-up questions at the end. + + + + Always provide a thought before each action to explain why you are choosing the next step or tool. This helps clarify your reasoning for the action you will take. + + + + + + + + Always describe what the action will do in the tag. Be clear about how the tool will process the input and why it is appropriate for this stage. + + + + Provide the actual inputs for the action in the tag. Ensure that each input is specific to the tool being used. Inputs should match the expected parameters for the tool (e.g., a search term for the website scraper, document references for RAG). + + + + + - All information derived from tools or user documents must be wrapped in these tags with proper citation. - - Use this tag for text not derived from tools or user documents. + - Use this tag for text not derived from tools or user documents. It should only be for narrative-like text or extremely common knowledge information. - - Provide proper citations for each , referencing the tool or document chunk used. + - Provide proper citations for each , referencing the tool or document chunk used. ENSURE THAT THERE IS A CITATION WHOSE INDEX MATCHES FOR EVERY GROUNDED TEXT CITATION INDEX. - Provide exactly three user-perspective follow-up questions. - Summarize the actions and tools used in the conversation. @@ -41,7 +62,8 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto **Wrap ALL tool-based information** in tags and provide citations. Use separate tags for distinct information or when switching to a different tool or document. - Ensure that **EVERY** tag includes a citation index referencing the source of the information. + Ensure that **EVERY** tag includes a citation index aligned with a citation that you provide that references the source of the information. + There should be a one-to-one relationship between tags and citations. Over-citing is discouraged—only cite the information that is directly relevant to the user's query. diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx index 6dc691798..28bfbeae3 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -51,8 +51,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { private vectorstore_id: string; private vectorstore: Vectorstore; private agent: Agent; - private _oldWheel: HTMLDivElement | null = null; - private messagesRef: React.RefObject; + private messagesRef: React.RefObject; /** * Static method that returns the layout string for the field. @@ -246,12 +245,12 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { * @param event The form submission event. */ @action - askGPT = async (event: React.FormEvent): Promise => { + askGPT = async (event: React.FormEvent): Promise => { event.preventDefault(); this.inputValue = ''; // Extract the user's message - const textInput = event.currentTarget.elements.namedItem('messageInput') as HTMLInputElement; + const textInput = (event.currentTarget as HTMLFormElement).elements.namedItem('messageInput') as HTMLInputElement; const trimmedText = textInput.value.trim(); if (trimmedText) { diff --git a/src/client/views/nodes/chatbot/tools/RAGTool.ts b/src/client/views/nodes/chatbot/tools/RAGTool.ts index c24306dcd..f4b7b42ea 100644 --- a/src/client/views/nodes/chatbot/tools/RAGTool.ts +++ b/src/client/views/nodes/chatbot/tools/RAGTool.ts @@ -14,114 +14,58 @@ export class RAGTool extends BaseTool { { hypothetical_document_chunk: { type: 'string', - description: - "Detailed version of the prompt that is effectively a hypothetical document chunk that would be ideal to embed and compare to the vectors of real document chunks to fetch the most relevant document chunks to answer the user's query", + description: "A detailed prompt representing an ideal chunk to embed and compare against document vectors to retrieve the most relevant content for answering the user's query.", required: 'true', }, }, ` - Your task is to provide a comprehensive response to the user's prompt based on the given chunks and chat history. Follow these structural guidelines meticulously: + When using the RAG tool, the structure must adhere to the format described in the ReAct prompt. Below are additional guidelines specifically for RAG-based responses: - 1. Overall Structure: - - [Main content with grounded_text tags interspersed with normal plain text (information that is not derived from chunks' information)] - - [Individual citation tags] - - - [Three question tags] - - - - 2. Grounded Text Tag Structure: - - Basic format: - - [Your generated text based on information from a subset of a chunk (a citation's direct text)] - + 1. **Grounded Text Guidelines**: + - Each tag must correspond to exactly one citation, ensuring a one-to-one relationship. + - Always cite a **subset** of the chunk, never the full text. The citation should be as short as possible while providing the relevant information (typically one to two sentences). + - Do not paraphrase the chunk text in the citation; use the original subset directly from the chunk. + - If multiple citations are needed for different sections of the response, create new tags for each. - 3. Citation Tag Structure: - - [For text: relevant subset of original chunk] - [For image/table: leave empty] - + 2. **Citation Guidelines**: + - The citation must include only the relevant excerpt from the chunk being referenced. + - Use unique citation indices and reference the chunk_id for the source of the information. + - For text chunks, the citation content must reflect the **exact subset** of the original chunk that is relevant to the grounded_text tag. - 4. Detailed Grounded Text Guidelines: - a. Wrap all information derived from chunks in grounded_text tags. - b. DO NOT PUT ANYTHING THAT IS NOT DIRECTLY DERIVED FROM INFORMATION FROM CHUNKS (EITHER IMAGE, TABLE, OR TEXT) IN GROUNDED_TEXT TAGS. - c. Use a single grounded_text tag for suquential and closely related information that references the same citation. If other citations' information are used sequentially, create new grounded_text tags. - d. Ensure every grounded_text tag has up to a few corresponding citations (should not be more than 3 and only 1 is fine). Multiple citation indices should be separated by commas. - e. Grounded text can be as short as a few words or as long as several sentences. - f. Avoid overlapping or nesting grounded_text tags; instead, use sequential tags. - - 5. Detailed Citation Guidelines: - a. Create a unique citation for each distinct piece of information from the chunks that is used to support grounded_text. - b. ALL TEXT CITATIONS must have direct text in its element content (e.g. DIRECT TEXT HERE) that is a relevant SUBSET of the original text chunk that is being cited specifically. - c. DO NOT paraphrase or summarize the text; use the original text as much as possible. - d. DO NOT USE THE FULL TEXT CHUNK as the citation content; only use the relevant subset of the text that the grounded_text is base. AS SHORT AS POSSIBLE WHILE PROVIDING INFORMATION (ONE TO TWO SENTENCES USUALLY)! - e. Ensure each citation has a unique index number. - f. Specify the correct type: "text", "image", or "table". - g. For text chunks, the content of the citation should ALWAYS have the relevant subset of the original text that the grounded_text is based on. - h. For image/table chunks, leave the citation content empty. - i. One citation can be used for multiple grounded_text tags if they are based on the same chunk information. - j. !!!DO NOT OVERCITE - only include citations for information that is directly relevant to the grounded_text. - - 6. Structural Integrity Checks: - a. Ensure all opening tags have corresponding closing tags. - b. Verify that all grounded_text tags have valid citation_index attributes (they should be equal to the associated citation(s) index field—not their chunk_id field). - c. Check that all cited indices in grounded_text tags have corresponding citations. - - Example of grounded_text usage: + **Example**: - - Artificial Intelligence (AI) is revolutionizing various sectors, with healthcare experiencing significant transformations in areas such as diagnosis and treatment planning. - - - In the field of medical diagnosis, AI has shown remarkable capabilities, particularly in radiology. For instance, AI systems have drastically improved mammogram analysis, achieving 99% accuracy at a rate 30 times faster than human radiologists. + + Artificial Intelligence is revolutionizing various sectors, with healthcare seeing transformations in diagnosis and treatment planning. - - This advancement not only enhances the efficiency of healthcare systems but also significantly reduces the occurrence of false positives, leading to fewer unnecessary biopsies and reduced patient stress. + + Based on recent data, AI has drastically improved mammogram analysis, achieving 99% accuracy at a rate 30 times faster than human radiologists. - - - Beyond diagnosis, AI is playing a crucial role in drug discovery and development. By analyzing vast amounts of genetic and molecular data, AI algorithms can identify potential drug candidates much faster than traditional methods. - - - This could potentially reduce the time and cost of bringing new medications to market, especially for rare diseases that have historically received less attention due to limited market potential. - - - [... rest of the content ...] - Artificial Intelligence is revolutionizing various industries, with healthcare being one of the most profoundly affected sectors. - AI has shown particular promise in the field of radiology, enhancing the accuracy and speed of image analysis. - According to recent studies, AI systems have achieved 99% accuracy in mammogram analysis, performing the task 30 times faster than human radiologists. - The improvement in mammogram accuracy has led to a significant reduction in false positives, decreasing the need for unnecessary biopsies and reducing patient anxiety. - AI is accelerating the drug discovery process by analyzing complex molecular and genetic data to identify potential drug candidates. - The use of AI in drug discovery could significantly reduce the time and cost associated with bringing new medications to market, particularly for rare diseases. + Artificial Intelligence is revolutionizing various industries, especially in healthcare. + - How might AI-driven personalized medicine impact the cost and accessibility of healthcare in the future? - What measures can be taken to ensure that AI systems in healthcare are free from biases and equally effective for diverse populations? - How could the role of healthcare professionals evolve as AI becomes more integrated into medical practices? + How can AI enhance patient outcomes in fields outside radiology? + What are the challenges in implementing AI systems across different hospitals? + How might AI-driven advancements impact healthcare costs? `, - `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a - set of document chunks (either images or text) that can be used to provide a grounded response based on - user documents` + `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a set of document chunks (text or images) to provide a grounded response based on user documents.` ); } - async execute(args: { hypothetical_document_chunk: string }): Promise { + async execute(args: { hypothetical_document_chunk: string }): Promise { const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk); const formatted_chunks = await this.getFormattedChunks(relevantChunks); return formatted_chunks; } - async getFormattedChunks(relevantChunks: RAGChunk[]): Promise { + async getFormattedChunks(relevantChunks: RAGChunk[]): Promise { try { const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks }); -- cgit v1.2.3-70-g09d2 From 00b12111c4007ef6ecff645327007a67f6655d8b Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 19 Sep 2024 09:53:19 -0400 Subject: added comments and fixed some styling and error handling --- .../views/nodes/chatbot/agentsystem/prompts.ts | 12 +- .../nodes/chatbot/chatboxcomponents/ChatBox.scss | 6 +- .../nodes/chatbot/chatboxcomponents/ChatBox.tsx | 11 +- src/server/ApiManagers/AssistantManager.ts | 224 +++++++++++++++------ 4 files changed, 179 insertions(+), 74 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/chatbot/agentsystem/prompts.ts b/src/client/views/nodes/chatbot/agentsystem/prompts.ts index 7000d8634..01c30d444 100644 --- a/src/client/views/nodes/chatbot/agentsystem/prompts.ts +++ b/src/client/views/nodes/chatbot/agentsystem/prompts.ts @@ -22,8 +22,9 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto **STRUCTURE**: Always use the correct stage tags (e.g., ) for every response. Use only even-numbered stages for your responses. **STOP after every stage and wait for input. Do not combine multiple stages in one response.** If a tool is needed, select the most appropriate tool based on the query. - **If one tool does not yield satisfactory results or fails twice, try another tool that might work better for the query.** + **If one tool does not yield satisfactory results or fails twice, try another tool that might work better for the query.** This often happens with the rag tool, which may not yeild great results. If this happens, try the search tool. Ensure that **ALL answers follow the answer structure**: grounded text wrapped in tags with corresponding citations, normal text in tags, and three follow-up questions at the end. + If you use a tool that will do something (i.e. creating a CSV), and want to also use a tool that will provide you with information (i.e. RAG), use the tool that will provide you with information first. Then proceed with the tool that will do something. @@ -48,8 +49,9 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto + ALL answers must follow this structure and everything must be witin the tag: - - All information derived from tools or user documents must be wrapped in these tags with proper citation. + - All information derived from tools or user documents must be wrapped in these tags with proper citation. This should not be word for word, but paraphrased from the text. - Use this tag for text not derived from tools or user documents. It should only be for narrative-like text or extremely common knowledge information. - Provide proper citations for each , referencing the tool or document chunk used. ENSURE THAT THERE IS A CITATION WHOSE INDEX MATCHES FOR EVERY GROUNDED TEXT CITATION INDEX. @@ -65,6 +67,10 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto Ensure that **EVERY** tag includes a citation index aligned with a citation that you provide that references the source of the information. There should be a one-to-one relationship between tags and citations. Over-citing is discouraged—only cite the information that is directly relevant to the user's query. + Paraphrase the information in the tags, but ensure that the meaning is preserved. + Do not include the full text of the chunk in the citation—only the relevant excerpt. + For text chunks, the citation content must reflect the exact subset of the original chunk that is relevant to the grounded_text tag. + Do not use citations from previous interactions. Only use citations from the current action loop. @@ -130,7 +136,7 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto - With key moments from the World Cup retrieved, I will now use the website scraper tool to gather data on Qatar’s tourism impact during the World Cup. + With key moments from the World Cup retrieved, I will now use the website scraper tool to gather data on Qatar's tourism impact during the World Cup. websiteInfoScraper diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss index 42f6a0d61..50111f678 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss @@ -93,6 +93,7 @@ $transition: all 0.3s ease; display: flex; align-items: center; justify-content: center; + position: relative; &:hover { background-color: darken($primary-color, 10%); @@ -109,10 +110,7 @@ $transition: all 0.3s ease; border: 3px solid rgba(255, 255, 255, 0.3); border-top: 3px solid #fff; border-radius: 50%; - animation: spin 1s linear infinite; - display: flex; - align-items: center; - justify-content: center; + animation: spin 2s linear infinite; } } } diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx index 28bfbeae3..d4deff78b 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -23,6 +23,8 @@ import { ClientUtils } from '../../../../../ClientUtils'; import { ProgressBar } from './ProgressBar'; import { DocumentView } from '../../DocumentView'; import { Networking } from '../../../../Network'; +import { PDFViewer } from '../../../pdf/PDFViewer'; +import { PDFBox } from '../../PDFBox'; dotenv.config(); @@ -225,9 +227,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { * Scrolls the chat messages container to the bottom, ensuring the latest message is visible. */ scrollToBottom = () => { - if (this.messagesRef.current) { - this.messagesRef.current.scrollTop = this.messagesRef.current.scrollHeight; - } + // if (this.messagesRef.current) { + // this.messagesRef.current.scrollTop = this.messagesRef.current.scrollHeight; + // } }; /** @@ -445,7 +447,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { const firstView = Array.from(doc[DocViews])[0] as DocumentView; - firstView.ComponentView?.search?.(citation.direct_text ?? ''); + (firstView.ComponentView as PDFBox)?.gotoPage?.(foundChunk.startPage); + (firstView.ComponentView as PDFBox)?.search?.(citation.direct_text ?? ''); }); break; case CHUNK_TYPE.URL: diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 88a81c5de..063ba193a 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -19,6 +19,7 @@ import * as puppeteer from 'puppeteer'; import { JSDOM } from 'jsdom'; import { Readability } from '@mozilla/readability'; +// Enumeration of directories where different file types are stored export enum Directory { parsed_files = 'parsed_files', images = 'images', @@ -32,41 +33,76 @@ export enum Directory { scrape_images = 'scrape_images', } +/** + * Constructs a normalized path to a file in the server's file system. + * @param directory The directory where the file is stored. + * @param filename The name of the file. + * @returns The full normalized path to the file. + */ export function serverPathToFile(directory: Directory, filename: string) { return path.normalize(`${filesDirectory}/${directory}/${filename}`); } +/** + * Constructs a normalized path to a directory in the server's file system. + * @param directory The directory to access. + * @returns The full normalized path to the directory. + */ export function pathToDirectory(directory: Directory) { return path.normalize(`${filesDirectory}/${directory}`); } +/** + * Constructs the client-accessible URL for a file. + * @param directory The directory where the file is stored. + * @param filename The name of the file. + * @returns The URL path to the file. + */ export function clientPathToFile(directory: Directory, filename: string) { return `/files/${directory}/${filename}`; } +// Promisified versions of filesystem functions const writeFileAsync = promisify(writeFile); const readFileAsync = promisify(fs.readFile); +/** + * Class responsible for handling various API routes related to the Assistant functionality. + * This class extends `ApiManager` and handles registration of routes and secure request handlers. + */ export default class AssistantManager extends ApiManager { + /** + * Registers all API routes and initializes necessary services like OpenAI, Scrapfly, and UnstructuredClient. + * @param register The registration method to register routes and handlers. + */ protected initialize(register: Registration): void { + // Initialize OpenAI API with client key const openai = new OpenAI({ - apiKey: process.env._CLIENT_OPENAI_KEY, // Use client key so don't have to set key seperately for client and server. + apiKey: process.env._CLIENT_OPENAI_KEY, dangerouslyAllowBrowser: true, }); + + // Initialize UnstructuredClient for document processing const unstructuredClient = new UnstructuredClient({ security: { apiKeyAuth: process.env._CLIENT_UNSTRUCTURED_API_KEY!, }, }); + + // Initialize ScrapflyClient for scraping purposes const scrapflyClient = new ScrapflyClient({ key: process.env._CLIENT_SCRAPFLY_API_KEY! }); + + // Initialize Google Custom Search API const customsearch = google.customsearch('v1'); + // Register Wikipedia summary API route register({ method: Method.POST, subscription: '/getWikipediaSummary', secureHandler: async ({ req, res }) => { const { title } = req.body; try { + // Fetch summary from Wikipedia using axios const response = await axios.get('https://en.wikipedia.org/w/api.php', { params: { action: 'query', @@ -75,25 +111,26 @@ export default class AssistantManager extends ApiManager { format: 'json', }, }); - const summary = response.data.query.search[0].snippet; - if (!summary || summary.length === 0 || summary === '' || summary === ' ') { - res.send({ text: 'No article found with that title.' }); - } else { - res.send({ text: summary }); - } + const summary = response.data.query.search[0]?.snippet || 'No article found with that title.'; + res.send({ text: summary }); } catch (error: any) { - console.error('Error retrieving article summary from Wikipedia:', error); - res.status(500).send({ error: 'Error retrieving article summary from Wikipedia.', details: error.message }); + console.error('Error retrieving Wikipedia summary:', error); + res.status(500).send({ + error: 'Error retrieving article summary from Wikipedia.', + details: error.message, + }); } }, }); + // Register Google Web Search Results API route register({ method: Method.POST, subscription: '/getWebSearchResults', secureHandler: async ({ req, res }) => { const { query, max_results } = req.body; try { + // Fetch search results using Google Custom Search API const response = await customsearch.cse.list({ q: query, cx: process.env._CLIENT_GOOGLE_SEARCH_ENGINE_ID, @@ -111,25 +148,40 @@ export default class AssistantManager extends ApiManager { res.send({ results }); } catch (error: any) { console.error('Error performing web search:', error); - res.status(500).send({ error: 'Failed to perform web search', details: error.message }); + res.status(500).send({ + error: 'Failed to perform web search', + details: error.message, + }); } }, }); + // Axios instance with custom headers for scraping const axiosInstance = axios.create({ headers: { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', }, }); + /** + * Utility function to introduce delay (used for retries). + * @param ms Delay in milliseconds. + */ const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)); + /** + * Function to fetch a URL with retry logic, handling rate limits. + * Retries a request if it fails due to rate limits (HTTP status 429). + * @param url The URL to fetch. + * @param retries The number of retry attempts. + * @param backoff Initial backoff time in milliseconds. + */ const fetchWithRetry = async (url: string, retries = 3, backoff = 300) => { try { const response = await axiosInstance.get(url); return response.data; } catch (error: any) { - if (retries > 0 && error.response && error.response.status === 429) { + if (retries > 0 && error.response?.status === 429) { console.log(`Rate limited. Retrying in ${backoff}ms...`); await delay(backoff); return fetchWithRetry(url, retries - 1, backoff * 2); @@ -138,6 +190,7 @@ export default class AssistantManager extends ApiManager { } }; + // Register a proxy fetch API route register({ method: Method.POST, subscription: '/proxyFetch', @@ -154,18 +207,22 @@ export default class AssistantManager extends ApiManager { res.send({ data }); } catch (error: any) { console.error('Error fetching the URL:', error); - res.status(500).send({ error: 'Failed to fetch the URL', details: error.message }); + res.status(500).send({ + error: 'Failed to fetch the URL', + details: error.message, + }); } }, }); + // Register an API route to scrape website content using Puppeteer and JSDOM register({ method: Method.POST, subscription: '/scrapeWebsite', secureHandler: async ({ req, res }) => { const { url } = req.body; try { - // Launch Puppeteer to navigate to the webpage + // Launch Puppeteer browser to navigate to the webpage const browser = await puppeteer.launch({ args: ['--no-sandbox', '--disable-setuid-sandbox'], }); @@ -173,45 +230,47 @@ export default class AssistantManager extends ApiManager { await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'); await page.goto(url, { waitUntil: 'networkidle2' }); - // Get the HTML content of the page + // Extract HTML content const htmlContent = await page.content(); await browser.close(); - // Use JSDOM to parse the HTML content + // Parse HTML content using JSDOM const dom = new JSDOM(htmlContent, { url }); - // Use Readability to extract the readable content + // Extract readable content using Mozilla's Readability API const reader = new Readability(dom.window.document); const article = reader.parse(); if (article) { - // Extract the plain text from the article content const plainText = article.textContent; - - // Return the plain text content res.send({ website_plain_text: plainText }); } else { res.status(500).send({ error: 'Failed to extract readable content' }); } } catch (error: any) { console.error('Error scraping website:', error); - res.status(500).send({ error: 'Failed to scrape website', details: error.message }); + res.status(500).send({ + error: 'Failed to scrape website', + details: error.message, + }); } }, }); + // Register an API route to create documents by sending files to a chatbot register({ method: Method.POST, subscription: '/createDocument', secureHandler: async ({ req, res }) => { const { file_path } = req.body; - const public_path = path.join(publicDirectory, file_path); - const file_name = path.basename(file_path); + const public_path = path.join(publicDirectory, file_path); // Resolve the file path in the public directory + const file_name = path.basename(file_path); // Extract the file name from the path try { - // Read file data and convert to base64 + // Read the file data and encode it as base64 const file_data: string = fs.readFileSync(public_path, { encoding: 'base64' }); + // Send the file data to a local chatbot API for document creation const response = await axios.post( 'http://localhost:8080/createDocument', { @@ -225,65 +284,79 @@ export default class AssistantManager extends ApiManager { } ); + // Retrieve the job ID from the response const jobId = response.data['job_id']; console.log('Job ID:', jobId); + // Send the job ID back to the client res.send({ jobId }); } catch (error: any) { console.error('Error communicating with chatbot:', error); - res.status(500).send({ error: 'Failed to communicate with the chatbot', details: error.message }); + res.status(500).send({ + error: 'Failed to communicate with the chatbot', + details: error.message, + }); } }, }); + // Register an API route to check the progress of a document creation job register({ method: Method.GET, subscription: '/getProgress/:jobId', secureHandler: async ({ req, res }) => { - const { jobId } = req.params; + const { jobId } = req.params; // Get the job ID from the URL parameters try { + // Query the local API to get the progress of the job const progressResponse = await axios.get(`http://localhost:8080/getProgress/${jobId}`); console.log(`Current step: ${progressResponse.data.step}, Progress within step: ${progressResponse.data.progress}%`); - res.json(progressResponse.data); + res.json(progressResponse.data); // Send the progress data back to the client } catch (error) { console.error('Error getting progress:', error); - res.status(500).send({ error: 'Failed to get progress', details: JSON.parse(error as string).message }); + res.status(500).send({ + error: 'Failed to get progress', + details: error, + }); } }, }); + // Register an API route to get the final result of a document creation job register({ method: Method.GET, subscription: '/getResult/:jobId', secureHandler: async ({ req, res }) => { - const { jobId } = req.params; + const { jobId } = req.params; // Get the job ID from the URL parameters try { + // Query the local API to get the final result of the job const finalResponse = await axios.get(`http://localhost:8080/getResult/${jobId}`); console.log('Result:', finalResponse.data); const result = finalResponse.data; + // If the result contains image or table chunks, save the base64 data as image files if (result.chunks && Array.isArray(result.chunks)) { for (const chunk of result.chunks) { if (chunk.metadata && (chunk.metadata.type === 'image' || chunk.metadata.type === 'table')) { let files_directory = '/files/chunk_images/'; const directory = path.join(publicDirectory, files_directory); + // Ensure the directory exists or create it if (!fs.existsSync(directory)) { fs.mkdirSync(directory); } - const fileName = path.basename(chunk.metadata.file_path); - const filePath = path.join(directory, fileName); + const fileName = path.basename(chunk.metadata.file_path); // Get the file name from the path + const filePath = path.join(directory, fileName); // Create the full file path - // Check if base64_data exists + // Check if the chunk contains base64 encoded data if (chunk.metadata.base64_data) { - // Decode Base64 and save as file + // Decode the base64 data and write it to a file const buffer = Buffer.from(chunk.metadata.base64_data, 'base64'); await fs.promises.writeFile(filePath, buffer); - // Update the file path in the chunk + // Update the file path in the chunk's metadata chunk.metadata.file_path = path.join(files_directory, fileName); - chunk.metadata.base64_data = undefined; + chunk.metadata.base64_data = undefined; // Remove the base64 data from the metadata } else { console.warn(`No base64_data found for chunk: ${fileName}`); } @@ -294,32 +367,42 @@ export default class AssistantManager extends ApiManager { console.warn('Not ready'); result.status = 'pending'; } - res.json(result); + res.json(result); // Send the result back to the client } catch (error) { - console.error('Error getting progress:', error); - res.status(500).send({ error: 'Failed to get progress', details: error }); + console.error('Error getting result:', error); + res.status(500).send({ + error: 'Failed to get result', + details: error, + }); } }, }); + // Register an API route to format chunks (e.g., text or image chunks) for display register({ method: Method.POST, subscription: '/formatChunks', secureHandler: async ({ req, res }) => { - const { relevantChunks } = req.body; + const { relevantChunks } = req.body; // Get the relevant chunks from the request body + + // Initialize an array to hold the formatted content const content: { type: string; text?: string; image_url?: { url: string } }[] = [{ type: 'text', text: '' }]; for (const chunk of relevantChunks) { + // Format each chunk by adding its metadata and content content.push({ type: 'text', - text: ``, + text: ``, }); + // If the chunk is an image or table, read the corresponding file and encode it as base64 if (chunk.metadata.type === 'image' || chunk.metadata.type === 'table') { try { - const filePath = serverPathToFile(Directory.chunk_images, chunk.metadata.file_path); - const imageBuffer = await readFileAsync(filePath); - const base64Image = imageBuffer.toString('base64'); + const filePath = serverPathToFile(Directory.chunk_images, chunk.metadata.file_path); // Get the file path + const imageBuffer = await readFileAsync(filePath); // Read the image file + const base64Image = imageBuffer.toString('base64'); // Convert the image to base64 + + // Add the base64-encoded image to the content array if (base64Image) { content.push({ type: 'image_url', @@ -334,33 +417,35 @@ export default class AssistantManager extends ApiManager { console.error(`Error reading image file for chunk ${chunk.id}:`, error); } } + + // Add the chunk's text content to the formatted content content.push({ type: 'text', text: `${chunk.metadata.text}\n\n` }); } content.push({ type: 'text', text: '' }); + // Send the formatted content back to the client res.send({ formattedChunks: content }); }, }); + // Register an API route to create and save a CSV file on the server register({ method: Method.POST, subscription: '/createCSV', secureHandler: async ({ req, res }) => { const { filename, data } = req.body; - // Validate input + // Validate that both the filename and data are provided if (!filename || !data) { res.status(400).send({ error: 'Filename and data fields are required.' }); return; } try { - // Generate a UUID for the file + // Generate a UUID for the file to ensure unique naming const uuidv4 = uuid.v4(); - - // Construct the full filename with the UUID prefix - const fullFilename = `${uuidv4}-${filename}`; + const fullFilename = `${uuidv4}-${filename}`; // Prefix the file name with the UUID // Get the full server path where the file will be saved const serverFilePath = serverPathToFile(Directory.csv, fullFilename); @@ -368,60 +453,73 @@ export default class AssistantManager extends ApiManager { // Write the CSV data (which is a raw string) to the file await writeFileAsync(serverFilePath, data, 'utf8'); - // Construct the full client URL for accessing the file + // Construct the client-accessible URL for the file const fileUrl = clientPathToFile(Directory.csv, fullFilename); - // Return the file URL and UUID to the client + // Send the file URL and UUID back to the client res.send({ fileUrl, id: uuidv4 }); } catch (error: any) { console.error('Error creating CSV file:', error); - res.status(500).send({ error: 'Failed to create CSV file.', details: error.message }); + res.status(500).send({ + error: 'Failed to create CSV file.', + details: error.message, + }); } }, }); + // Register an API route to chunk a document using the UnstructuredClient register({ method: Method.POST, subscription: '/chunkDocument', secureHandler: async ({ req, res }) => { - const { file_path } = req.body; - const public_path = path.join(publicDirectory, file_path); - const file_name = path.basename(file_path); + const { file_path } = req.body; // Get the file path from the request body + const public_path = path.join(publicDirectory, file_path); // Resolve the full path in the public directory + const file_name = path.basename(file_path); // Extract the file name from the path try { - // Read file data and convert to base64 + // Read the file content as a Buffer const file_data = await fs.promises.readFile(public_path); try { + // Use UnstructuredClient to partition the document into chunks const result = await unstructuredClient.general.partition({ partitionParameters: { files: { content: file_data, fileName: file_name, }, - strategy: Strategy.Auto, - chunkingStrategy: ChunkingStrategy.ByTitle, - extractImageBlockTypes: ['Image', 'Table'], + strategy: Strategy.Auto, // Automatically determine the chunking strategy + chunkingStrategy: ChunkingStrategy.ByTitle, // Chunk by title + extractImageBlockTypes: ['Image', 'Table'], // Extract images and tables }, }); if (result.statusCode === 200) { console.log(result.elements); const jsonElements = JSON.stringify(result.elements, null, 2); - // Print the processed data. - console.log(jsonElements); - res.send({ document_json: jsonElements }); + console.log(jsonElements); // Log the JSON result of the partitioned elements + res.send({ document_json: jsonElements }); // Send the partitioned data as a JSON response } else { console.error(`Unexpected status code: ${result.statusCode}`); - res.status(result.statusCode).send({ error: 'Failed to process the document', details: result }); + res.status(result.statusCode).send({ + error: 'Failed to process the document', + details: result, + }); } } catch (e: any) { console.error('Error during partitioning:', e); - res.status(500).send({ error: 'Failed to partition the document', details: e.message }); + res.status(500).send({ + error: 'Failed to partition the document', + details: e.message, + }); } } catch (error: any) { console.error('Error reading file:', error); - res.status(500).send({ error: 'Failed to read the file', details: error.message }); + res.status(500).send({ + error: 'Failed to read the file', + details: error.message, + }); } }, }); -- cgit v1.2.3-70-g09d2 From 5441df3cb1ff5986f4e2e871dd3d2c7a312a2d21 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 19 Sep 2024 09:55:36 -0400 Subject: got rid of put (unused) --- src/server/RouteManager.ts | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src') diff --git a/src/server/RouteManager.ts b/src/server/RouteManager.ts index 22e608868..d8e0455f6 100644 --- a/src/server/RouteManager.ts +++ b/src/server/RouteManager.ts @@ -8,7 +8,6 @@ import { DashUserModel } from './authentication/DashUserModel'; export enum Method { GET, POST, - PUT, } export interface CoreArguments { @@ -209,9 +208,6 @@ export default class RouteManager { case Method.POST: this.server.post(route, supervised); break; - case Method.PUT: - this.server.put(route, supervised); - break; default: } } -- cgit v1.2.3-70-g09d2 From 2e67d85a1e7921eeb6487d01f7a36f69d725ab84 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 19 Sep 2024 09:58:26 -0400 Subject: got rid of unnecessary endpoint on server (chunkDocument) --- src/server/ApiManagers/AssistantManager.ts | 69 +----------------------------- 1 file changed, 1 insertion(+), 68 deletions(-) (limited to 'src') diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 063ba193a..d98d32d30 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -13,7 +13,6 @@ import { UnstructuredClient } from 'unstructured-client'; import { PartitionResponse } from 'unstructured-client/sdk/models/operations'; import { ChunkingStrategy, Strategy } from 'unstructured-client/sdk/models/shared'; import * as cheerio from 'cheerio'; -import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk'; import { google } from 'googleapis'; import * as puppeteer from 'puppeteer'; import { JSDOM } from 'jsdom'; @@ -72,7 +71,7 @@ const readFileAsync = promisify(fs.readFile); */ export default class AssistantManager extends ApiManager { /** - * Registers all API routes and initializes necessary services like OpenAI, Scrapfly, and UnstructuredClient. + * Registers all API routes and initializes necessary services like OpenAI and Google Custom Search. * @param register The registration method to register routes and handlers. */ protected initialize(register: Registration): void { @@ -82,16 +81,6 @@ export default class AssistantManager extends ApiManager { dangerouslyAllowBrowser: true, }); - // Initialize UnstructuredClient for document processing - const unstructuredClient = new UnstructuredClient({ - security: { - apiKeyAuth: process.env._CLIENT_UNSTRUCTURED_API_KEY!, - }, - }); - - // Initialize ScrapflyClient for scraping purposes - const scrapflyClient = new ScrapflyClient({ key: process.env._CLIENT_SCRAPFLY_API_KEY! }); - // Initialize Google Custom Search API const customsearch = google.customsearch('v1'); @@ -467,61 +456,5 @@ export default class AssistantManager extends ApiManager { } }, }); - - // Register an API route to chunk a document using the UnstructuredClient - register({ - method: Method.POST, - subscription: '/chunkDocument', - secureHandler: async ({ req, res }) => { - const { file_path } = req.body; // Get the file path from the request body - const public_path = path.join(publicDirectory, file_path); // Resolve the full path in the public directory - const file_name = path.basename(file_path); // Extract the file name from the path - - try { - // Read the file content as a Buffer - const file_data = await fs.promises.readFile(public_path); - - try { - // Use UnstructuredClient to partition the document into chunks - const result = await unstructuredClient.general.partition({ - partitionParameters: { - files: { - content: file_data, - fileName: file_name, - }, - strategy: Strategy.Auto, // Automatically determine the chunking strategy - chunkingStrategy: ChunkingStrategy.ByTitle, // Chunk by title - extractImageBlockTypes: ['Image', 'Table'], // Extract images and tables - }, - }); - - if (result.statusCode === 200) { - console.log(result.elements); - const jsonElements = JSON.stringify(result.elements, null, 2); - console.log(jsonElements); // Log the JSON result of the partitioned elements - res.send({ document_json: jsonElements }); // Send the partitioned data as a JSON response - } else { - console.error(`Unexpected status code: ${result.statusCode}`); - res.status(result.statusCode).send({ - error: 'Failed to process the document', - details: result, - }); - } - } catch (e: any) { - console.error('Error during partitioning:', e); - res.status(500).send({ - error: 'Failed to partition the document', - details: e.message, - }); - } - } catch (error: any) { - console.error('Error reading file:', error); - res.status(500).send({ - error: 'Failed to read the file', - details: error.message, - }); - } - }, - }); } } -- cgit v1.2.3-70-g09d2 From badc8362c80ca33d2b3d93dda6a73b3bfb35a214 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 19 Sep 2024 12:19:04 -0400 Subject: added python files to server --- .gitignore | 1 + src/server/ApiManagers/AssistantManager.ts | 121 +++-- src/server/chunker/pdf_chunker.py | 744 +++++++++++++++++++++++++++++ src/server/chunker/requirements.txt | 15 + startup.sh | 30 ++ 5 files changed, 868 insertions(+), 43 deletions(-) create mode 100644 src/server/chunker/pdf_chunker.py create mode 100644 src/server/chunker/requirements.txt (limited to 'src') diff --git a/.gitignore b/.gitignore index 6a1963b4e..db43e512e 100644 --- a/.gitignore +++ b/.gitignore @@ -17,3 +17,4 @@ debug.log .vscodeignore Dockerfile .vscode/launch.json +src/server/chunker/venv diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index d98d32d30..dfe5d747b 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -17,6 +17,7 @@ import { google } from 'googleapis'; import * as puppeteer from 'puppeteer'; import { JSDOM } from 'jsdom'; import { Readability } from '@mozilla/readability'; +import { spawn } from 'child_process'; // Enumeration of directories where different file types are stored export enum Directory { @@ -32,6 +33,10 @@ export enum Directory { scrape_images = 'scrape_images', } +// In-memory job tracking +let jobResults: { [key: string]: any } = {}; +let jobProgress: { [key: string]: any } = {}; + /** * Constructs a normalized path to a file in the server's file system. * @param directory The directory where the file is stored. @@ -246,7 +251,6 @@ export default class AssistantManager extends ApiManager { }, }); - // Register an API route to create documents by sending files to a chatbot register({ method: Method.POST, subscription: '/createDocument', @@ -259,30 +263,18 @@ export default class AssistantManager extends ApiManager { // Read the file data and encode it as base64 const file_data: string = fs.readFileSync(public_path, { encoding: 'base64' }); - // Send the file data to a local chatbot API for document creation - const response = await axios.post( - 'http://localhost:8080/createDocument', - { - file_data, - file_name, - }, - { - headers: { - 'Content-Type': 'application/json', - }, - } - ); + // Generate a unique job ID for tracking + const jobId = uuid.v4(); - // Retrieve the job ID from the response - const jobId = response.data['job_id']; - console.log('Job ID:', jobId); + // Spawn the Python process and track its progress/output + spawnPythonProcess(jobId, file_name, file_data); - // Send the job ID back to the client + // Send the job ID back to the client for tracking res.send({ jobId }); } catch (error: any) { - console.error('Error communicating with chatbot:', error); + console.error('Error initiating document creation:', error); res.status(500).send({ - error: 'Failed to communicate with the chatbot', + error: 'Failed to initiate document creation', details: error.message, }); } @@ -295,17 +287,11 @@ export default class AssistantManager extends ApiManager { subscription: '/getProgress/:jobId', secureHandler: async ({ req, res }) => { const { jobId } = req.params; // Get the job ID from the URL parameters - try { - // Query the local API to get the progress of the job - const progressResponse = await axios.get(`http://localhost:8080/getProgress/${jobId}`); - console.log(`Current step: ${progressResponse.data.step}, Progress within step: ${progressResponse.data.progress}%`); - res.json(progressResponse.data); // Send the progress data back to the client - } catch (error) { - console.error('Error getting progress:', error); - res.status(500).send({ - error: 'Failed to get progress', - details: error, - }); + // Check if the job progress is available + if (jobProgress[jobId]) { + res.json(jobProgress[jobId]); + } else { + res.status(404).send({ error: 'Job not found' }); } }, }); @@ -316,11 +302,9 @@ export default class AssistantManager extends ApiManager { subscription: '/getResult/:jobId', secureHandler: async ({ req, res }) => { const { jobId } = req.params; // Get the job ID from the URL parameters - try { - // Query the local API to get the final result of the job - const finalResponse = await axios.get(`http://localhost:8080/getResult/${jobId}`); - console.log('Result:', finalResponse.data); - const result = finalResponse.data; + // Check if the job result is available + if (jobResults[jobId]) { + const result = jobResults[jobId]; // If the result contains image or table chunks, save the base64 data as image files if (result.chunks && Array.isArray(result.chunks)) { @@ -353,16 +337,11 @@ export default class AssistantManager extends ApiManager { } result['status'] = 'completed'; } else { - console.warn('Not ready'); result.status = 'pending'; } res.json(result); // Send the result back to the client - } catch (error) { - console.error('Error getting result:', error); - res.status(500).send({ - error: 'Failed to get result', - details: error, - }); + } else { + res.status(202).send({ status: 'pending' }); } }, }); @@ -458,3 +437,59 @@ export default class AssistantManager extends ApiManager { }); } } + +function spawnPythonProcess(jobId: string, file_name: string, file_data: string) { + const pythonPath = + process.platform === 'win32' + ? path.join(__dirname, '../chunker/venv/Scripts/python') // Windows path + : path.join(__dirname, '../chunker/venv/bin/python3'); // Linux/Mac path + + const pythonProcess = spawn(pythonPath, [ + path.join(__dirname, '../chunker/pdf_chunker.py'), // Correct path to Python script + jobId, + file_name, + file_data, + ]); + + let pythonOutput = ''; // Accumulate stdout data + + // Handle stdout data (progress and final results) + pythonProcess.stdout.on('data', data => { + pythonOutput += data.toString(); // Accumulate data + + const lines = pythonOutput.split('\n'); // Handle multi-line JSON + lines.forEach(line => { + if (line.trim()) { + try { + const parsedOutput = JSON.parse(line); // Parse each line of JSON + + if (parsedOutput.job_id && parsedOutput.progress !== undefined) { + jobProgress[parsedOutput.job_id] = { + step: parsedOutput.step, + progress: parsedOutput.progress, + }; + } else if (parsedOutput.chunks) { + jobResults[parsedOutput.job_id] = parsedOutput; + jobProgress[parsedOutput.job_id] = { step: 'Complete', progress: 100 }; + } + } catch (err) { + console.error('Error parsing Python output:', err); + } + } + }); + }); + + // Handle stderr (error logging) + pythonProcess.stderr.on('data', data => { + console.error(`Python script error: ${data}`); + }); + + // Handle process exit + pythonProcess.on('close', code => { + if (code !== 0) { + console.error(`Python process exited with code ${code}`); + console.error(`Command: python3 ${path.join(__dirname, '../chunker/pdf_chunker.py')} ${jobId} ${file_name}`); + jobResults[jobId] = { error: 'Python process failed' }; + } + }); +} diff --git a/src/server/chunker/pdf_chunker.py b/src/server/chunker/pdf_chunker.py new file mode 100644 index 000000000..c9f6737e7 --- /dev/null +++ b/src/server/chunker/pdf_chunker.py @@ -0,0 +1,744 @@ +import asyncio +import concurrent +import sys + +from tqdm.asyncio import tqdm_asyncio # Progress bar for async tasks +import PIL +from anthropic import Anthropic # For language model API +from packaging.version import parse # Version checking +import pytesseract # OCR library for text extraction from images +import re +import dotenv # For environment variable loading +from lxml import etree # XML parsing +from tqdm import tqdm # Progress bar for non-async tasks +import fitz # PyMuPDF, PDF processing library +from PIL import Image, ImageDraw # Image processing +from typing import List, Dict, Any, TypedDict # Typing for function annotations +from ultralyticsplus import YOLO # Object detection model (YOLO) +import base64 +import io +import json +import os +import uuid # For generating unique IDs +from enum import Enum # Enums for types like document type and purpose +import cohere # Embedding client +import numpy as np +from PyPDF2 import PdfReader # PDF text extraction +from openai import OpenAI # OpenAI client for text completion +from sklearn.cluster import KMeans # Clustering for summarization + +dotenv.load_dotenv() # Load environment variables + +# Fix for newer versions of PIL +if parse(PIL.__version__) >= parse('10.0.0'): + Image.LINEAR = Image.BILINEAR + +# Global dictionary to track progress of document processing jobs +current_progress = {} + + +def update_progress(job_id, step, progress_value): + """ + Output the progress in JSON format to stdout for the Node.js process to capture. + """ + progress_data = { + "job_id": job_id, + "step": step, + "progress": progress_value + } + print(json.dumps(progress_data)) # Output progress to stdout + sys.stdout.flush() # Ensure it's sent immediately + + +def get_current_progress(): + """ + Return the current progress of all jobs. + """ + return current_progress + + +class ElementExtractor: + def __init__(self, output_folder: str): + self.output_folder = output_folder + self.model = YOLO('keremberke/yolov8m-table-extraction') + self.model.overrides['conf'] = 0.25 + self.model.overrides['iou'] = 0.45 + self.padding = 5 + + async def extract_elements(self, page, padding: int = 20) -> List[Dict[str, Any]]: + tasks = [ + asyncio.create_task(self.extract_tables(page.image, page.page_num)), + asyncio.create_task(self.extract_images(page.page, page.image, page.page_num)) + ] + results = await asyncio.gather(*tasks) + return [item for sublist in results for item in sublist] + + async def extract_tables(self, img: Image.Image, page_num: int) -> List[Dict[str, Any]]: + results = self.model.predict(img, verbose=False) + tables = [] + + for idx, box in enumerate(results[0].boxes): + x1, y1, x2, y2 = map(int, box.xyxy[0]) + + # Draw a red rectangle on the full page image around the table + page_with_outline = img.copy() + draw = ImageDraw.Draw(page_with_outline) + draw.rectangle( + [max(0, x1 + self.padding), max(0, y1 + self.padding), min(page_with_outline.width, x2 + self.padding), + min(page_with_outline.height, y2 + self.padding)], outline="red", width=2) # Draw red outline + + # Save the full page with the red outline + table_filename = f"table_page{page_num + 1}_{idx + 1}.png" + table_path = os.path.join(self.output_folder, table_filename) + page_with_outline.save(table_path) + + # Convert the full-page image with red outline to base64 + base64_data = self.image_to_base64(page_with_outline) + + tables.append({ + 'metadata': { + "type": "table", + "location": [x1 / img.width, y1 / img.height, x2 / img.width, y2 / img.height], + "file_path": table_path, + "start_page": page_num, + "end_page": page_num, + "base64_data": base64_data, + } + }) + + return tables + + async def extract_images(self, page: fitz.Page, img: Image.Image, page_num: int) -> List[Dict[str, Any]]: + images = [] + image_list = page.get_images(full=True) + + if not image_list: + return images + + for img_index, img_info in enumerate(image_list): + xref = img_info[0] + #try: + base_image = page.parent.extract_image(xref) + image_bytes = base_image["image"] + image = Image.open(io.BytesIO(image_bytes)) + width_ratio = img.width / page.rect.width + height_ratio = img.height / page.rect.height + + # Get image coordinates or default to page rectangle + rect_list = page.get_image_rects(xref) + if rect_list: + rect = rect_list[0] + x1, y1, x2, y2 = rect + else: + rect = page.rect + x1, y1, x2, y2 = rect + + # Draw a red rectangle on the full page image around the embedded image + page_with_outline = img.copy() + draw = ImageDraw.Draw(page_with_outline) + draw.rectangle([x1 * width_ratio, y1 * height_ratio, x2 * width_ratio, y2 * height_ratio], + outline="red", width=2) # Draw red outline + + # Save the full page with the red outline + image_filename = f"image_page{page_num + 1}_{img_index + 1}.png" + image_path = os.path.join(self.output_folder, image_filename) + page_with_outline.save(image_path) + + # Convert the full-page image with red outline to base64 + base64_data = self.image_to_base64(page_with_outline) + + images.append({ + 'metadata': { + "type": "image", + "location": [x1 / page.rect.width, y1 / page.rect.height, x2 / page.rect.width, + y2 / page.rect.height], + "file_path": image_path, + "start_page": page_num, + "end_page": page_num, + "base64_data": base64_data, + } + }) + + #except Exception as e: + # print(f"Error processing image on page {page_num + 1}, image {img_index + 1}: {str(e)}") + return images + + @staticmethod + def image_to_base64(image: Image.Image) -> str: + buffered = io.BytesIO() + image.save(buffered, format="PNG") + return base64.b64encode(buffered.getvalue()).decode('utf-8') + + +class ChunkMetaData(TypedDict): + """ + A TypedDict that defines the metadata structure for chunks of text and visual elements. + """ + text: str + type: str + original_document: str + file_path: str + doc_id: str + location: str + start_page: int + end_page: int + base64_data: str + + +class Chunk(TypedDict): + """ + A TypedDict that defines the structure for a document chunk, including metadata and embeddings. + """ + id: str + values: List[float] + metadata: ChunkMetaData + + +class Page: + """ + A class that represents a single PDF page, handling its image representation and element masking. + """ + + def __init__(self, page: fitz.Page, page_num: int): + self.page = page + self.page_num = page_num + # Get high-resolution image of the page (for table/image extraction) + self.pix = page.get_pixmap(matrix=fitz.Matrix(2, 2)) + self.image = Image.frombytes("RGB", [self.pix.width, self.pix.height], self.pix.samples) + self.masked_image = self.image.copy() # Image with masked elements (tables/images) + self.draw = ImageDraw.Draw(self.masked_image) + self.elements = [] # List to store extracted elements + + def add_element(self, element): + """ + Adds a detected element (table/image) to the page and masks its location on the page image. + """ + self.elements.append(element) + # Mask the element on the page image by drawing a white rectangle over its location + x1, y1, x2, y2 = [coord * self.image.width if i % 2 == 0 else coord * self.image.height + for i, coord in enumerate(element['metadata']['location'])] + self.draw.rectangle([x1, y1, x2, y2], fill="white") + + +class PDFChunker: + """ + The main class responsible for chunking PDF files into text and visual elements (tables/images). + """ + + def __init__(self, output_folder: str = "output", image_batch_size: int = 5) -> None: + self.client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) # Initialize the Anthropic API client + self.output_folder = output_folder + self.image_batch_size = image_batch_size # Batch size for image processing + self.element_extractor = ElementExtractor(output_folder) # Initialize the element extractor + + async def chunk_pdf(self, file_data: bytes, file_name: str, doc_id: str, job_id: str) -> List[Dict[str, Any]]: + """ + Processes a PDF file, extracting text and visual elements, and returning structured chunks. + """ + with fitz.open(stream=file_data, filetype="pdf") as pdf_document: + num_pages = len(pdf_document) # Get the total number of pages in the PDF + pages = [Page(pdf_document[i], i) for i in tqdm(range(num_pages), desc="Initializing Pages")] + + update_progress(job_id, "Extracting tables and images...", 0) + await self.extract_and_mask_elements(pages, job_id) + + update_progress(job_id, "Processing tables and images...", 0) + await self.process_visual_elements(pages, self.image_batch_size, job_id) + + update_progress(job_id, "Extracting text...", 0) + page_texts = await self.extract_text_from_masked_pages(pages, job_id) + + update_progress(job_id, "Processing text...", 0) + text_chunks = self.chunk_text_with_metadata(page_texts, max_words=1000, job_id=job_id) + + # Combine text and visual elements into a unified structure (chunks) + chunks = self.combine_chunks(text_chunks, [elem for page in pages for elem in page.elements], file_name, + doc_id) + + return chunks + + async def extract_and_mask_elements(self, pages: List[Page], job_id: str): + """ + Extract visual elements (tables and images) from each page and mask them on the page. + """ + total_pages = len(pages) + tasks = [] + + for i, page in enumerate(pages): + tasks.append(asyncio.create_task(self.element_extractor.extract_elements(page))) + progress = ((i + 1) / total_pages) * 100 + update_progress(job_id, "Extracting tables and images...", progress) + + # Gather all extraction results + results = await asyncio.gather(*tasks) + + # Mask the detected elements on the page images + for page, elements in zip(pages, results): + for element in elements: + page.add_element(element) + + async def process_visual_elements(self, pages: List[Page], image_batch_size: int, job_id: str) -> List[ + Dict[str, Any]]: + """ + Process extracted visual elements in batches, generating summaries or descriptions. + """ + pre_elements = [element for page in pages for element in page.elements] # Flatten list of elements + processed_elements = [] + total_batches = (len(pre_elements) // image_batch_size) + 1 + + loop = asyncio.get_event_loop() + with concurrent.futures.ThreadPoolExecutor() as executor: + # Process elements in batches + for i in tqdm(range(0, len(pre_elements), image_batch_size), desc="Processing Visual Elements"): + batch = pre_elements[i:i + image_batch_size] + # Run image summarization in a separate thread + summaries = await loop.run_in_executor( + executor, self.batch_summarize_images, + {j + 1: element.get('metadata').get('base64_data') for j, element in enumerate(batch)} + ) + + # Append generated summaries to the elements + for j, elem in enumerate(batch, start=1): + if j in summaries: + elem['metadata']['text'] = re.sub(r'^(Image|Table):\s*', '', summaries[j]) + processed_elements.append(elem) + + progress = ((i // image_batch_size) + 1) / total_batches * 100 + update_progress(job_id, "Processing tables and images...", progress) + + return processed_elements + + async def extract_text_from_masked_pages(self, pages: List[Page], job_id: str) -> Dict[int, str]: + """ + Extract text from masked page images (where tables and images have been masked out). + """ + total_pages = len(pages) + tasks = [] + + for i, page in enumerate(pages): + tasks.append(asyncio.create_task(self.extract_text(page.masked_image, page.page_num))) + progress = ((i + 1) / total_pages) * 100 + update_progress(job_id, "Extracting text...", progress) + + # Return extracted text from each page + return dict(await asyncio.gather(*tasks)) + + @staticmethod + async def extract_text(image: Image.Image, page_num: int) -> (int, str): + """ + Perform OCR on the provided image to extract text. + """ + result = pytesseract.image_to_string(image) + return page_num + 1, result.strip() # Return the page number and extracted text + + def chunk_text_with_metadata(self, page_texts: Dict[int, str], max_words: int, job_id: str) -> List[Dict[str, Any]]: + """ + Break the extracted text into smaller chunks with metadata (e.g., page numbers). + """ + chunks = [] + current_chunk = "" + current_start_page = 0 + total_words = 0 + + def add_chunk(chunk_text, start_page, end_page): + # Add a chunk of text with metadata + chunks.append({ + "text": chunk_text.strip(), + "start_page": start_page, + "end_page": end_page + }) + + total_pages = len(page_texts) + for i, (page_num, text) in enumerate(tqdm(page_texts.items(), desc="Chunking Text")): + sentences = self.split_into_sentences(text) + for sentence in sentences: + word_count = len(sentence.split()) + # If adding this sentence exceeds max_words, create a new chunk + if total_words + word_count > max_words: + add_chunk(current_chunk, current_start_page, page_num) + current_chunk = sentence + " " + current_start_page = page_num + total_words = word_count + else: + current_chunk += sentence + " " + total_words += word_count + current_chunk += "\n\n" + + progress = ((i + 1) / total_pages) * 100 + update_progress(job_id, "Processing text...", progress) + + # Add the last chunk if there is leftover text + if current_chunk.strip(): + add_chunk(current_chunk, current_start_page, page_num) + + return chunks + + @staticmethod + def split_into_sentences(text): + """ + Split the text into sentences using regular expressions. + """ + return re.split(r'(?<=[.!?])\s+', text) + + @staticmethod + def combine_chunks(text_chunks: List[Dict[str, Any]], visual_elements: List[Dict[str, Any]], pdf_path: str, + doc_id: str) -> List[Chunk]: + """ + Combine text and visual chunks into a unified list. + """ + combined_chunks = [] + # Add text chunks + for text_chunk in text_chunks: + chunk_metadata: ChunkMetaData = { + "text": text_chunk["text"], + "type": "text", + "original_document": pdf_path, + "file_path": "", + "location": "", + "start_page": text_chunk["start_page"], + "end_page": text_chunk["end_page"], + "base64_data": "", + "doc_id": doc_id, + } + chunk_dict: Chunk = { + "id": str(uuid.uuid4()), + "values": [], + "metadata": chunk_metadata, + } + combined_chunks.append(chunk_dict) + + # Add visual chunks (tables/images) + for elem in visual_elements: + visual_chunk_metadata: ChunkMetaData = { + "type": elem['metadata']['type'], + "file_path": elem['metadata']['file_path'], + "text": elem['metadata'].get('text', ''), + "start_page": elem['metadata']['start_page'], + "end_page": elem['metadata']['end_page'], + "location": str(elem['metadata']['location']), + "base64_data": elem['metadata']['base64_data'], + "doc_id": doc_id, + "original_document": pdf_path, + } + visual_chunk_dict: Chunk = { + "id": str(uuid.uuid4()), + "values": [], + "metadata": visual_chunk_metadata, + } + combined_chunks.append(visual_chunk_dict) + + return combined_chunks + + def batch_summarize_images(self, images: Dict[int, str]) -> Dict[int, str]: + """ + Summarize images or tables by generating descriptive text. + """ + # Prompt for the AI model to summarize images and tables + prompt = f""" + + You are tasked with summarizing a series of {len(images)} images and tables for use in a RAG (Retrieval-Augmented Generation) system. + Your goal is to create concise, informative summaries that capture the essential content of each image or table. + These summaries will be used for embedding, so they should be descriptive and relevant. The image or table will be outlined in red on an image of the full page that it is on. Where necessary, use the context of the full page to heklp with the summary but don't summarize other content on the page. + + + + Identify whether it's an image or a table. + Examine its content carefully. + + Write a detailed summary that captures the main points or visual elements: +
+ After summarizing what the table is about, include the column headers, a detailed summary of the data, and any notable data trends.
+ Describe the main subjects, actions, or notable features. +
+
+ Focus on writing summaries that would make it easy to retrieve the content if compared to a user query using vector similarity search. + Keep summaries concise and include important words that may help with retrieval (but do not include numbers and numerical data). +
+ + + Avoid using special characters like &, <, >, ", ', $, %, etc. Instead, use their word equivalents: + Use "and" instead of &. + Use "dollars" instead of $. + Use "percent" instead of %. + Refrain from using quotation marks " or apostrophes ' unless absolutely necessary. + Ensure your output is in valid XML format. + + + + Enclose all summaries within a root element called <summaries>. + Use <summary> tags to enclose each individual summary. + Include an attribute 'number' in each <summary> tag to indicate the sequence, matching the provided image numbers. + Start each summary by indicating whether it's an image or a table (e.g., "This image shows..." or "The table presents..."). + If an image is completely blank, leave the summary blank (e.g., <summary number="3"></summary>). + + + + Do not replicate the example below—stay grounded to the content of the table or image and describe it completely and accurately. + + <summaries> + <summary number="1"> + The image shows two men shaking hands on stage at a formal event. The man on the left, in a dark suit and glasses, has a professional appearance, possibly an academic or business figure. The man on the right, Tim Cook, CEO of Apple, is recognizable by his silver hair and dark blue blazer. Cook holds a document titled "Tsinghua SEM EMBA," suggesting a link to Tsinghua University’s Executive MBA program. The backdrop displays English and Chinese text about business management and education, with the event dated October 23, 2014. + </summary> + <summary number="2"> + The table compares the company's assets between December 30, 2023, and September 30, 2023. Key changes include an increase in cash and cash equivalents, while marketable securities had a slight rise. Accounts receivable and vendor non-trade receivables decreased. Inventories and other current assets saw minor fluctuations. Non-current assets like marketable securities slightly declined, while property, plant, and equipment remained stable. Total assets showed minimal change, holding steady at around three hundred fifty-three billion dollars. + </summary> + <summary number="3"> + The table outlines the company's shareholders' equity as of December 30, 2023, versus September 30, 2023. Common stock and additional paid-in capital increased, and retained earnings shifted from a deficit to a positive figure. Accumulated other comprehensive loss decreased. Overall, total shareholders' equity rose significantly, while total liabilities and equity remained nearly unchanged at about three hundred fifty-three billion dollars. + </summary> + <summary number="4"> + The table details the company's liabilities as of December 30, 2023, compared to September 30, 2023. Current liabilities decreased due to lower accounts payable and other current liabilities, while deferred revenue slightly increased. Commercial paper significantly decreased, and term debt rose modestly. Non-current liabilities were stable, with minimal changes in term debt and other non-current liabilities. Total liabilities dropped from two hundred ninety billion dollars to two hundred seventy-nine billion dollars. + </summary> + <summary number="5"> + </summary> + </summaries> + + + + + Process each image or table in the order provided. + Maintain consistent formatting throughout your response. + Ensure the output is in full, valid XML format with the root <summaries> element and each summary being within a <summary> element with the summary number specified as well. + +
+ """ + content = [] + for number, img in images.items(): + content.append({"type": "text", "text": f"\nImage {number}:\n"}) + content.append({"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": img}}) + + messages = [ + {"role": "user", "content": content} + ] + + try: + response = self.client.messages.create( + model='claude-3-5-sonnet-20240620', + system=prompt, + max_tokens=400 * len(images), # Increased token limit for more detailed summaries + messages=messages, + temperature=0, + extra_headers={"anthropic-beta": "max-tokens-3-5-sonnet-2024-07-15"} + ) + + # Parse the response + text = response.content[0].text + #print(text) + # Attempt to parse and fix the XML if necessary + parser = etree.XMLParser(recover=True) + root = etree.fromstring(text, parser=parser) + # Check if there were errors corrected + # if parser.error_log: + # #print("XML Parsing Errors:") + # for error in parser.error_log: + # #print(error) + # Extract summaries + summaries = {} + for summary in root.findall('summary'): + number = int(summary.get('number')) + content = summary.text.strip() if summary.text else "" + if content: # Only include non-empty summaries + summaries[number] = content + + return summaries + + except Exception: + #print(f"Error in batch_summarize_images: {str(e)}") + #print("Returning placeholder summaries") + return {number: "Error: No summary available" for number in images} + + +class DocumentType(Enum): + PDF = "pdf" + CSV = "csv" + TXT = "txt" + HTML = "html" + + +class FileTypeNotSupportedException(Exception): + """ + Exception raised for unsupported file types. + """ + + def __init__(self, file_extension: str): + self.file_extension = file_extension + self.message = f"File type '{file_extension}' is not supported." + super().__init__(self.message) + + +class Document: + """ + Represents a document being processed, such as a PDF, handling chunking and embedding. + """ + + def __init__(self, file_data: bytes, file_name: str, job_id: str): + self.file_data = file_data + self.file_name = file_name + self.job_id = job_id + self.type = self._get_document_type(file_name) + self.doc_id = job_id # Use job_id as document ID + self.chunks = [] + self.num_pages = 0 + self.summary = "" + + self._process() # Start processing the document + + def _process(self): + """ + Process the document: chunk it, embed chunks, and generate a summary. + """ + pdf_chunker = PDFChunker(output_folder="output") + self.chunks = asyncio.run(pdf_chunker.chunk_pdf(self.file_data, self.file_name, self.doc_id, self.job_id)) + + self.num_pages = self._get_pdf_pages() # Get the number of pages + self._embed_chunks() # Embed the text chunks + self.summary = self._generate_summary() # Generate a summary + + def _get_document_type(self, file_name: str) -> DocumentType: + """ + Determine the document type based on its file extension. + """ + _, extension = os.path.splitext(file_name) + extension = extension.lower().lstrip('.') + try: + return DocumentType(extension) + except ValueError: + raise FileTypeNotSupportedException(extension) + + def _get_pdf_pages(self) -> int: + """ + Get the total number of pages in the PDF. + """ + pdf_file = io.BytesIO(self.file_data) + pdf_reader = PdfReader(pdf_file) + return len(pdf_reader.pages) + + def _embed_chunks(self) -> None: + """ + Embed the text chunks using the Cohere API. + """ + co = cohere.Client(os.getenv("COHERE_API_KEY")) + batch_size = 90 + chunks_len = len(self.chunks) + for i in tqdm(range(0, chunks_len, batch_size), desc="Embedding Chunks"): + batch = self.chunks[i: min(i + batch_size, chunks_len)] + texts = [chunk['metadata']['text'] for chunk in batch] + #try: + chunk_embs_batch = co.embed( + texts=texts, + model="embed-english-v3.0", + input_type="search_document" + ) + for j, emb in enumerate(chunk_embs_batch.embeddings): + self.chunks[i + j]['values'] = emb + #except Exception as e: + #print(f"Error embedding batch for {self.file_name}: {str(e)}") + + def _generate_summary(self) -> str: + """ + Generate a summary of the document using KMeans clustering and a language model. + """ + num_clusters = min(10, len(self.chunks)) + kmeans = KMeans(n_clusters=num_clusters, random_state=42) + doc_chunks = [chunk['values'] for chunk in self.chunks if 'values' in chunk] + cluster_labels = kmeans.fit_predict(doc_chunks) + + # Select representative chunks from each cluster + selected_chunks = [] + for i in range(num_clusters): + cluster_chunks = [chunk for chunk, label in zip(self.chunks, cluster_labels) if label == i] + cluster_embs = [emb for emb, label in zip(doc_chunks, cluster_labels) if label == i] + centroid = kmeans.cluster_centers_[i] + distances = [np.linalg.norm(np.array(emb) - centroid) for emb in cluster_embs] + closest_chunk = cluster_chunks[np.argmin(distances)] + selected_chunks.append(closest_chunk) + + # Combine selected chunks into a summary + combined_text = "\n\n".join([chunk['metadata']['text'] for chunk in selected_chunks]) + + client = OpenAI() # Call OpenAI API for text generation (summarization) + completion = client.chat.completions.create( + model="gpt-3.5-turbo", + messages=[ + {"role": "system", + "content": "You are an AI assistant tasked with summarizing a document. You are provided with important chunks from the document and provide a summary, as best you can, of what the document will contain overall. Be concise and brief with your response."}, + {"role": "user", "content": f"""Please provide a comprehensive summary of what you think the document from which these chunks were sampled would be. + Ensure the summary captures the main ideas and key points from all provided chunks. Be concise and brief and only provide the summary in paragraph form. + + Sample text chunks: + ``` + {combined_text} + ``` + ********** + Summary: + """} + ], + max_tokens=300 + ) + return completion.choices[0].message.content.strip() + + def to_json(self) -> str: + """ + Return the document's data in JSON format. + """ + return json.dumps({ + "file_name": self.file_name, + "num_pages": self.num_pages, + "summary": self.summary, + "chunks": self.chunks, + "type": self.type.value, + "doc_id": self.doc_id + }, indent=2) + + +def process_document(file_data, file_name, job_id): + """ + Top-level function to process a document and return the JSON output. + """ + new_document = Document(file_data, file_name, job_id) + return new_document.to_json() + + +def print_progress(job_id, step, progress_value): + """ + Output the progress in JSON format to stdout for the Node.js process to capture. + """ + progress_data = { + "job_id": job_id, + "step": step, + "progress": progress_value + } + print(json.dumps(progress_data)) # Output progress to stdout + sys.stdout.flush() # Ensure it's sent immediately + + +def main(): + """ + Main entry point for the script, called with arguments from Node.js. + """ + if len(sys.argv) != 4: + print(json.dumps({"error": "Invalid arguments"})) + return + + job_id = sys.argv[1] + file_name = sys.argv[2] + file_data = sys.argv[3] + + try: + # Decode the base64 file data + file_bytes = base64.b64decode(file_data) + + # Process the document + document_result = process_document(file_bytes, file_name, job_id) + + # Output the final result as JSON + print(document_result) + sys.stdout.flush() + + except Exception as e: + # If any error occurs, print the error to stdout for Node.js to capture + print(json.dumps({"error": str(e)})) + sys.stdout.flush() + + +if __name__ == "__main__": + main() diff --git a/src/server/chunker/requirements.txt b/src/server/chunker/requirements.txt new file mode 100644 index 000000000..20bd486e5 --- /dev/null +++ b/src/server/chunker/requirements.txt @@ -0,0 +1,15 @@ +anthropic==0.34.0 +cohere==5.8.0 +python-dotenv==1.0.1 +pymupdf==1.22.2 +lxml==5.3.0 +layoutparser==0.3.4 +numpy==1.26.4 +openai==1.40.6 +Pillow==10.4.0 +pytesseract==0.3.10 +PyPDF2==3.0.1 +scikit-learn==1.5.1 +tqdm==4.66.5 +ultralyticsplus==0.0.28 +easyocr==1.7.0 \ No newline at end of file diff --git a/startup.sh b/startup.sh index d741658ad..c0ee53a4f 100644 --- a/startup.sh +++ b/startup.sh @@ -1,4 +1,34 @@ #!/bin/bash +cd /c/Users/dash/Documents/GitHub/Dash-Web/src/server # cd /c/Users/dash/Documents/Dash-Web instead for dash-release +echo "Navigating to the Python chunker directory..." +cd chunker/ + +# Step 3: Set up Python virtual environment (if it doesn't exist, create it) +if [ ! -d "venv" ]; then + echo "Creating Python virtual environment..." + python3 -m venv venv +fi + +# Step 4: Activate the Python virtual environment +echo "Activating Python virtual environment..." +source venv/bin/activate + +# Step 5: Install Python dependencies +echo "Installing Python dependencies..." +pip install -r requirements.txt + +# Step 6: Go back to the main directory where the Node.js app will run +cd .. + +# Step 7: Load environment variables from .env file (if it exists) +if [ -f .env ]; then + echo "Loading environment variables from .env file..." + export $(grep -v '^#' .env | xargs) +else + echo ".env file not found, skipping environment variable loading." +fi + + cd /c/Users/dash/Documents/GitHub/Dash-Web # cd /c/Users/dash/Documents/Dash-Web instead for dash-release npm run start-release # works for browndash \ No newline at end of file -- cgit v1.2.3-70-g09d2 From 2d61b3b0d00c239f05615c691ffbf4b98f3054e9 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 19 Sep 2024 12:36:18 -0400 Subject: Working now with Python script --- src/server/ApiManagers/AssistantManager.ts | 47 ++++++++++++++++++++---------- src/server/chunker/pdf_chunker.py | 40 +++++++++---------------- 2 files changed, 44 insertions(+), 43 deletions(-) (limited to 'src') diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index dfe5d747b..224d47d3b 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -291,7 +291,10 @@ export default class AssistantManager extends ApiManager { if (jobProgress[jobId]) { res.json(jobProgress[jobId]); } else { - res.status(404).send({ error: 'Job not found' }); + res.json({ + step: 'Processing Document...', + progress: '0', + }); } }, }); @@ -452,43 +455,55 @@ function spawnPythonProcess(jobId: string, file_name: string, file_data: string) ]); let pythonOutput = ''; // Accumulate stdout data + let stderrOutput = ''; // For stderr logs and progress - // Handle stdout data (progress and final results) + // Handle stdout data (final result in JSON format) pythonProcess.stdout.on('data', data => { - pythonOutput += data.toString(); // Accumulate data + pythonOutput += data.toString(); // Accumulate data from stdout + }); - const lines = pythonOutput.split('\n'); // Handle multi-line JSON + // Handle stderr (progress logs or errors) + pythonProcess.stderr.on('data', data => { + stderrOutput += data.toString(); + const lines = stderrOutput.split('\n'); lines.forEach(line => { if (line.trim()) { try { - const parsedOutput = JSON.parse(line); // Parse each line of JSON + // Progress and warnings are printed as JSON to stderr + const parsedOutput = JSON.parse(line); + // Handle progress updates if (parsedOutput.job_id && parsedOutput.progress !== undefined) { jobProgress[parsedOutput.job_id] = { step: parsedOutput.step, progress: parsedOutput.progress, }; - } else if (parsedOutput.chunks) { - jobResults[parsedOutput.job_id] = parsedOutput; - jobProgress[parsedOutput.job_id] = { step: 'Complete', progress: 100 }; + } else if (parsedOutput.progress !== undefined) { + jobProgress[jobId] = { + step: parsedOutput.step, + progress: parsedOutput.progress, + }; } } catch (err) { - console.error('Error parsing Python output:', err); + console.error('Progress log from Python:', line); } } }); }); - // Handle stderr (error logging) - pythonProcess.stderr.on('data', data => { - console.error(`Python script error: ${data}`); - }); - // Handle process exit pythonProcess.on('close', code => { - if (code !== 0) { + if (code === 0) { + // Parse final JSON output (stdout) + try { + const finalResult = JSON.parse(pythonOutput); // Parse JSON from stdout + jobResults[jobId] = finalResult; + jobProgress[jobId] = { step: 'Complete', progress: 100 }; + } catch (err) { + console.error('Error parsing final JSON result:', err); + } + } else { console.error(`Python process exited with code ${code}`); - console.error(`Command: python3 ${path.join(__dirname, '../chunker/pdf_chunker.py')} ${jobId} ${file_name}`); jobResults[jobId] = { error: 'Python process failed' }; } }); diff --git a/src/server/chunker/pdf_chunker.py b/src/server/chunker/pdf_chunker.py index c9f6737e7..12e71c29d 100644 --- a/src/server/chunker/pdf_chunker.py +++ b/src/server/chunker/pdf_chunker.py @@ -26,6 +26,12 @@ import numpy as np from PyPDF2 import PdfReader # PDF text extraction from openai import OpenAI # OpenAI client for text completion from sklearn.cluster import KMeans # Clustering for summarization +import warnings + +# Silence specific warnings +warnings.filterwarnings('ignore', message="Valid config keys have changed") +warnings.filterwarnings('ignore', message="torch.load") + dotenv.load_dotenv() # Load environment variables @@ -36,7 +42,6 @@ if parse(PIL.__version__) >= parse('10.0.0'): # Global dictionary to track progress of document processing jobs current_progress = {} - def update_progress(job_id, step, progress_value): """ Output the progress in JSON format to stdout for the Node.js process to capture. @@ -46,15 +51,8 @@ def update_progress(job_id, step, progress_value): "step": step, "progress": progress_value } - print(json.dumps(progress_data)) # Output progress to stdout - sys.stdout.flush() # Ensure it's sent immediately - - -def get_current_progress(): - """ - Return the current progress of all jobs. - """ - return current_progress + print(json.dumps(progress_data), file=sys.stderr) # Use stderr for progress logs + sys.stderr.flush() # Ensure it's sent immediately class ElementExtractor: @@ -698,25 +696,13 @@ def process_document(file_data, file_name, job_id): return new_document.to_json() -def print_progress(job_id, step, progress_value): - """ - Output the progress in JSON format to stdout for the Node.js process to capture. - """ - progress_data = { - "job_id": job_id, - "step": step, - "progress": progress_value - } - print(json.dumps(progress_data)) # Output progress to stdout - sys.stdout.flush() # Ensure it's sent immediately - def main(): """ Main entry point for the script, called with arguments from Node.js. """ if len(sys.argv) != 4: - print(json.dumps({"error": "Invalid arguments"})) + print(json.dumps({"error": "Invalid arguments"}), file=sys.stderr) return job_id = sys.argv[1] @@ -730,14 +716,14 @@ def main(): # Process the document document_result = process_document(file_bytes, file_name, job_id) - # Output the final result as JSON + # Output the final result as JSON to stdout print(document_result) sys.stdout.flush() except Exception as e: - # If any error occurs, print the error to stdout for Node.js to capture - print(json.dumps({"error": str(e)})) - sys.stdout.flush() + # Print errors to stderr so they don't interfere with JSON output + print(json.dumps({"error": str(e)}), file=sys.stderr) + sys.stderr.flush() if __name__ == "__main__": -- cgit v1.2.3-70-g09d2 From 34a828b2820439b6f7a17a55bca80a8212ba3dca Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Fri, 20 Sep 2024 11:55:24 -0400 Subject: added creating of the venv --- src/server/ApiManagers/AssistantManager.ts | 147 +++++++++++++++++------------ startup.sh | 30 ------ 2 files changed, 87 insertions(+), 90 deletions(-) (limited to 'src') diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 224d47d3b..42e544e1d 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -442,69 +442,96 @@ export default class AssistantManager extends ApiManager { } function spawnPythonProcess(jobId: string, file_name: string, file_data: string) { - const pythonPath = - process.platform === 'win32' - ? path.join(__dirname, '../chunker/venv/Scripts/python') // Windows path - : path.join(__dirname, '../chunker/venv/bin/python3'); // Linux/Mac path - - const pythonProcess = spawn(pythonPath, [ - path.join(__dirname, '../chunker/pdf_chunker.py'), // Correct path to Python script - jobId, - file_name, - file_data, - ]); - - let pythonOutput = ''; // Accumulate stdout data - let stderrOutput = ''; // For stderr logs and progress - - // Handle stdout data (final result in JSON format) - pythonProcess.stdout.on('data', data => { - pythonOutput += data.toString(); // Accumulate data from stdout - }); - - // Handle stderr (progress logs or errors) - pythonProcess.stderr.on('data', data => { - stderrOutput += data.toString(); - const lines = stderrOutput.split('\n'); - lines.forEach(line => { - if (line.trim()) { - try { - // Progress and warnings are printed as JSON to stderr - const parsedOutput = JSON.parse(line); - - // Handle progress updates - if (parsedOutput.job_id && parsedOutput.progress !== undefined) { - jobProgress[parsedOutput.job_id] = { - step: parsedOutput.step, - progress: parsedOutput.progress, - }; - } else if (parsedOutput.progress !== undefined) { - jobProgress[jobId] = { - step: parsedOutput.step, - progress: parsedOutput.progress, - }; + const venvPath = path.join(__dirname, '../chunker/venv'); + const requirementsPath = path.join(__dirname, '../chunker/requirements.txt'); + const pythonScriptPath = path.join(__dirname, '../chunker/pdf_chunker.py'); + + // Check if venv exists + if (!fs.existsSync(venvPath)) { + console.log('Virtual environment not found. Creating and setting up...'); + + // Create venv + const createVenvProcess = spawn('python', ['-m', 'venv', venvPath]); + + createVenvProcess.on('close', code => { + if (code !== 0) { + console.error(`Failed to create virtual environment. Exit code: ${code}`); + return; + } + + console.log('Virtual environment created. Installing requirements...'); + + // Determine the pip path based on the OS + const pipPath = process.platform === 'win32' ? path.join(venvPath, 'Scripts', 'pip') : path.join(venvPath, 'bin', 'pip'); + + // Install requirements + const installRequirementsProcess = spawn(pipPath, ['install', '-r', requirementsPath]); + + installRequirementsProcess.on('close', code => { + if (code !== 0) { + console.error(`Failed to install requirements. Exit code: ${code}`); + return; + } + + console.log('Requirements installed. Running Python script...'); + runPythonScript(); + }); + }); + } else { + console.log('Virtual environment found. Running Python script...'); + runPythonScript(); + } + + function runPythonScript() { + const pythonPath = process.platform === 'win32' ? path.join(venvPath, 'Scripts', 'python') : path.join(venvPath, 'bin', 'python3'); + + const pythonProcess = spawn(pythonPath, [pythonScriptPath, jobId, file_name, file_data]); + + let pythonOutput = ''; + let stderrOutput = ''; + + pythonProcess.stdout.on('data', data => { + pythonOutput += data.toString(); + }); + + pythonProcess.stderr.on('data', data => { + stderrOutput += data.toString(); + const lines = stderrOutput.split('\n'); + lines.forEach(line => { + if (line.trim()) { + try { + const parsedOutput = JSON.parse(line); + if (parsedOutput.job_id && parsedOutput.progress !== undefined) { + jobProgress[parsedOutput.job_id] = { + step: parsedOutput.step, + progress: parsedOutput.progress, + }; + } else if (parsedOutput.progress !== undefined) { + jobProgress[jobId] = { + step: parsedOutput.step, + progress: parsedOutput.progress, + }; + } + } catch (err) { + console.error('Progress log from Python:', line); } - } catch (err) { - console.error('Progress log from Python:', line); } - } + }); }); - }); - // Handle process exit - pythonProcess.on('close', code => { - if (code === 0) { - // Parse final JSON output (stdout) - try { - const finalResult = JSON.parse(pythonOutput); // Parse JSON from stdout - jobResults[jobId] = finalResult; - jobProgress[jobId] = { step: 'Complete', progress: 100 }; - } catch (err) { - console.error('Error parsing final JSON result:', err); + pythonProcess.on('close', code => { + if (code === 0) { + try { + const finalResult = JSON.parse(pythonOutput); + jobResults[jobId] = finalResult; + jobProgress[jobId] = { step: 'Complete', progress: 100 }; + } catch (err) { + console.error('Error parsing final JSON result:', err); + } + } else { + console.error(`Python process exited with code ${code}`); + jobResults[jobId] = { error: 'Python process failed' }; } - } else { - console.error(`Python process exited with code ${code}`); - jobResults[jobId] = { error: 'Python process failed' }; - } - }); + }); + } } diff --git a/startup.sh b/startup.sh index c0ee53a4f..d741658ad 100644 --- a/startup.sh +++ b/startup.sh @@ -1,34 +1,4 @@ #!/bin/bash -cd /c/Users/dash/Documents/GitHub/Dash-Web/src/server # cd /c/Users/dash/Documents/Dash-Web instead for dash-release -echo "Navigating to the Python chunker directory..." -cd chunker/ - -# Step 3: Set up Python virtual environment (if it doesn't exist, create it) -if [ ! -d "venv" ]; then - echo "Creating Python virtual environment..." - python3 -m venv venv -fi - -# Step 4: Activate the Python virtual environment -echo "Activating Python virtual environment..." -source venv/bin/activate - -# Step 5: Install Python dependencies -echo "Installing Python dependencies..." -pip install -r requirements.txt - -# Step 6: Go back to the main directory where the Node.js app will run -cd .. - -# Step 7: Load environment variables from .env file (if it exists) -if [ -f .env ]; then - echo "Loading environment variables from .env file..." - export $(grep -v '^#' .env | xargs) -else - echo ".env file not found, skipping environment variable loading." -fi - - cd /c/Users/dash/Documents/GitHub/Dash-Web # cd /c/Users/dash/Documents/Dash-Web instead for dash-release npm run start-release # works for browndash \ No newline at end of file -- cgit v1.2.3-70-g09d2 From a59236e903f984772ae7da017832a4e96620c25a Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Sun, 22 Sep 2024 15:07:37 -0400 Subject: creates and installs virtual env --- src/server/ApiManagers/AssistantManager.ts | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 42e544e1d..e940fb121 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -462,11 +462,28 @@ function spawnPythonProcess(jobId: string, file_name: string, file_data: string) console.log('Virtual environment created. Installing requirements...'); // Determine the pip path based on the OS - const pipPath = process.platform === 'win32' ? path.join(venvPath, 'Scripts', 'pip') : path.join(venvPath, 'bin', 'pip'); + const pipPath = process.platform === 'win32' ? path.join(venvPath, 'Scripts', 'pip.exe') : path.join(venvPath, 'bin', 'pip3'); // Try 'pip3' for Unix-like systems + + if (!fs.existsSync(pipPath)) { + console.error(`pip executable not found at ${pipPath}`); + return; + } // Install requirements const installRequirementsProcess = spawn(pipPath, ['install', '-r', requirementsPath]); + installRequirementsProcess.stdout.on('data', data => { + console.log(`pip stdout: ${data}`); + }); + + installRequirementsProcess.stderr.on('data', data => { + console.error(`pip stderr: ${data}`); + }); + + installRequirementsProcess.on('error', error => { + console.error(`Error starting pip process: ${error}`); + }); + installRequirementsProcess.on('close', code => { if (code !== 0) { console.error(`Failed to install requirements. Exit code: ${code}`); -- cgit v1.2.3-70-g09d2 From b08befda6d7ec07a0e6653ccf5040474886dcd44 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Mon, 23 Sep 2024 08:55:37 -0400 Subject: added comments to pdf chunker --- src/server/chunker/pdf_chunker.py | 317 ++++++++++++++++++++++++++------------ 1 file changed, 215 insertions(+), 102 deletions(-) (limited to 'src') diff --git a/src/server/chunker/pdf_chunker.py b/src/server/chunker/pdf_chunker.py index 12e71c29d..4fe3b9dbf 100644 --- a/src/server/chunker/pdf_chunker.py +++ b/src/server/chunker/pdf_chunker.py @@ -32,7 +32,6 @@ import warnings warnings.filterwarnings('ignore', message="Valid config keys have changed") warnings.filterwarnings('ignore', message="torch.load") - dotenv.load_dotenv() # Load environment variables # Fix for newer versions of PIL @@ -45,6 +44,10 @@ current_progress = {} def update_progress(job_id, step, progress_value): """ Output the progress in JSON format to stdout for the Node.js process to capture. + + :param job_id: The unique identifier for the processing job. + :param step: The current step of the job. + :param progress_value: The percentage of completion for the current step. """ progress_data = { "job_id": job_id, @@ -56,27 +59,50 @@ def update_progress(job_id, step, progress_value): class ElementExtractor: + """ + A class that uses a YOLO model to extract tables and images from a PDF page. + """ + def __init__(self, output_folder: str): + """ + Initializes the ElementExtractor with the output folder for saving images and the YOLO model. + + :param output_folder: Path to the folder where extracted elements will be saved. + """ self.output_folder = output_folder - self.model = YOLO('keremberke/yolov8m-table-extraction') - self.model.overrides['conf'] = 0.25 - self.model.overrides['iou'] = 0.45 - self.padding = 5 + self.model = YOLO('keremberke/yolov8m-table-extraction') # Load YOLO model for table extraction + self.model.overrides['conf'] = 0.25 # Set confidence threshold for detection + self.model.overrides['iou'] = 0.45 # Set Intersection over Union (IoU) threshold + self.padding = 5 # Padding around detected elements async def extract_elements(self, page, padding: int = 20) -> List[Dict[str, Any]]: + """ + Asynchronously extract tables and images from a PDF page. + + :param page: A Page object representing a PDF page. + :param padding: Padding around the extracted elements. + :return: A list of dictionaries containing the extracted elements. + """ tasks = [ - asyncio.create_task(self.extract_tables(page.image, page.page_num)), - asyncio.create_task(self.extract_images(page.page, page.image, page.page_num)) + asyncio.create_task(self.extract_tables(page.image, page.page_num)), # Extract tables from the page + asyncio.create_task(self.extract_images(page.page, page.image, page.page_num)) # Extract images from the page ] - results = await asyncio.gather(*tasks) - return [item for sublist in results for item in sublist] + results = await asyncio.gather(*tasks) # Wait for both tasks to complete + return [item for sublist in results for item in sublist] # Flatten and return results async def extract_tables(self, img: Image.Image, page_num: int) -> List[Dict[str, Any]]: - results = self.model.predict(img, verbose=False) + """ + Asynchronously extract tables from a given page image using the YOLO model. + + :param img: The image of the PDF page. + :param page_num: The current page number. + :return: A list of dictionaries with metadata about the detected tables. + """ + results = self.model.predict(img, verbose=False) # Predict table locations using YOLO tables = [] for idx, box in enumerate(results[0].boxes): - x1, y1, x2, y2 = map(int, box.xyxy[0]) + x1, y1, x2, y2 = map(int, box.xyxy[0]) # Extract bounding box coordinates # Draw a red rectangle on the full page image around the table page_with_outline = img.copy() @@ -107,20 +133,27 @@ class ElementExtractor: return tables async def extract_images(self, page: fitz.Page, img: Image.Image, page_num: int) -> List[Dict[str, Any]]: + """ + Asynchronously extract embedded images from a PDF page. + + :param page: A fitz.Page object representing the PDF page. + :param img: The image of the PDF page. + :param page_num: The current page number. + :return: A list of dictionaries with metadata about the detected images. + """ images = [] - image_list = page.get_images(full=True) + image_list = page.get_images(full=True) # Get a list of images on the page if not image_list: return images for img_index, img_info in enumerate(image_list): - xref = img_info[0] - #try: - base_image = page.parent.extract_image(xref) + xref = img_info[0] # XREF of the image in the PDF + base_image = page.parent.extract_image(xref) # Extract the image by its XREF image_bytes = base_image["image"] - image = Image.open(io.BytesIO(image_bytes)) - width_ratio = img.width / page.rect.width - height_ratio = img.height / page.rect.height + image = Image.open(io.BytesIO(image_bytes)) # Convert bytes to PIL image + width_ratio = img.width / page.rect.width # Scale factor for width + height_ratio = img.height / page.rect.height # Scale factor for height # Get image coordinates or default to page rectangle rect_list = page.get_image_rects(xref) @@ -157,15 +190,19 @@ class ElementExtractor: } }) - #except Exception as e: - # print(f"Error processing image on page {page_num + 1}, image {img_index + 1}: {str(e)}") return images @staticmethod def image_to_base64(image: Image.Image) -> str: + """ + Convert a PIL image to a base64-encoded string. + + :param image: The PIL image to be converted. + :return: The base64-encoded string of the image. + """ buffered = io.BytesIO() - image.save(buffered, format="PNG") - return base64.b64encode(buffered.getvalue()).decode('utf-8') + image.save(buffered, format="PNG") # Save image as PNG to an in-memory buffer + return base64.b64encode(buffered.getvalue()).decode('utf-8') # Convert to base64 and return class ChunkMetaData(TypedDict): @@ -198,6 +235,12 @@ class Page: """ def __init__(self, page: fitz.Page, page_num: int): + """ + Initializes the Page with its page number and the image representation of the page. + + :param page: A fitz.Page object representing the PDF page. + :param page_num: The number of the page in the PDF. + """ self.page = page self.page_num = page_num # Get high-resolution image of the page (for table/image extraction) @@ -210,12 +253,14 @@ class Page: def add_element(self, element): """ Adds a detected element (table/image) to the page and masks its location on the page image. + + :param element: A dictionary containing metadata about the detected element. """ self.elements.append(element) # Mask the element on the page image by drawing a white rectangle over its location x1, y1, x2, y2 = [coord * self.image.width if i % 2 == 0 else coord * self.image.height for i, coord in enumerate(element['metadata']['location'])] - self.draw.rectangle([x1, y1, x2, y2], fill="white") + self.draw.rectangle([x1, y1, x2, y2], fill="white") # Draw a white rectangle to mask the element class PDFChunker: @@ -224,6 +269,12 @@ class PDFChunker: """ def __init__(self, output_folder: str = "output", image_batch_size: int = 5) -> None: + """ + Initializes the PDFChunker with an output folder and an element extractor for visual elements. + + :param output_folder: Folder to store the output files (extracted tables/images). + :param image_batch_size: The batch size for processing visual elements. + """ self.client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) # Initialize the Anthropic API client self.output_folder = output_folder self.image_batch_size = image_batch_size # Batch size for image processing @@ -232,22 +283,28 @@ class PDFChunker: async def chunk_pdf(self, file_data: bytes, file_name: str, doc_id: str, job_id: str) -> List[Dict[str, Any]]: """ Processes a PDF file, extracting text and visual elements, and returning structured chunks. + + :param file_data: The binary data of the PDF file. + :param file_name: The name of the PDF file. + :param doc_id: The unique document ID for this job. + :param job_id: The unique job ID for the processing task. + :return: A list of structured chunks containing text and visual elements. """ with fitz.open(stream=file_data, filetype="pdf") as pdf_document: num_pages = len(pdf_document) # Get the total number of pages in the PDF - pages = [Page(pdf_document[i], i) for i in tqdm(range(num_pages), desc="Initializing Pages")] + pages = [Page(pdf_document[i], i) for i in tqdm(range(num_pages), desc="Initializing Pages")] # Initialize each page update_progress(job_id, "Extracting tables and images...", 0) - await self.extract_and_mask_elements(pages, job_id) + await self.extract_and_mask_elements(pages, job_id) # Extract and mask elements (tables/images) update_progress(job_id, "Processing tables and images...", 0) - await self.process_visual_elements(pages, self.image_batch_size, job_id) + await self.process_visual_elements(pages, self.image_batch_size, job_id) # Process visual elements update_progress(job_id, "Extracting text...", 0) - page_texts = await self.extract_text_from_masked_pages(pages, job_id) + page_texts = await self.extract_text_from_masked_pages(pages, job_id) # Extract text from masked pages update_progress(job_id, "Processing text...", 0) - text_chunks = self.chunk_text_with_metadata(page_texts, max_words=1000, job_id=job_id) + text_chunks = self.chunk_text_with_metadata(page_texts, max_words=1000, job_id=job_id) # Chunk text into smaller parts # Combine text and visual elements into a unified structure (chunks) chunks = self.combine_chunks(text_chunks, [elem for page in pages for elem in page.elements], file_name, @@ -258,13 +315,16 @@ class PDFChunker: async def extract_and_mask_elements(self, pages: List[Page], job_id: str): """ Extract visual elements (tables and images) from each page and mask them on the page. + + :param pages: A list of Page objects representing the PDF pages. + :param job_id: The unique job ID for the processing task. """ total_pages = len(pages) tasks = [] for i, page in enumerate(pages): - tasks.append(asyncio.create_task(self.element_extractor.extract_elements(page))) - progress = ((i + 1) / total_pages) * 100 + tasks.append(asyncio.create_task(self.element_extractor.extract_elements(page))) # Extract elements asynchronously + progress = ((i + 1) / total_pages) * 100 # Calculate progress update_progress(job_id, "Extracting tables and images...", progress) # Gather all extraction results @@ -273,16 +333,20 @@ class PDFChunker: # Mask the detected elements on the page images for page, elements in zip(pages, results): for element in elements: - page.add_element(element) + page.add_element(element) # Mask each extracted element on the page - async def process_visual_elements(self, pages: List[Page], image_batch_size: int, job_id: str) -> List[ - Dict[str, Any]]: + async def process_visual_elements(self, pages: List[Page], image_batch_size: int, job_id: str) -> List[Dict[str, Any]]: """ Process extracted visual elements in batches, generating summaries or descriptions. + + :param pages: A list of Page objects representing the PDF pages. + :param image_batch_size: The batch size for processing visual elements. + :param job_id: The unique job ID for the processing task. + :return: A list of processed elements with metadata and generated summaries. """ pre_elements = [element for page in pages for element in page.elements] # Flatten list of elements processed_elements = [] - total_batches = (len(pre_elements) // image_batch_size) + 1 + total_batches = (len(pre_elements) // image_batch_size) + 1 # Calculate total number of batches loop = asyncio.get_event_loop() with concurrent.futures.ThreadPoolExecutor() as executor: @@ -301,7 +365,7 @@ class PDFChunker: elem['metadata']['text'] = re.sub(r'^(Image|Table):\s*', '', summaries[j]) processed_elements.append(elem) - progress = ((i // image_batch_size) + 1) / total_batches * 100 + progress = ((i // image_batch_size) + 1) / total_batches * 100 # Calculate progress update_progress(job_id, "Processing tables and images...", progress) return processed_elements @@ -309,13 +373,17 @@ class PDFChunker: async def extract_text_from_masked_pages(self, pages: List[Page], job_id: str) -> Dict[int, str]: """ Extract text from masked page images (where tables and images have been masked out). + + :param pages: A list of Page objects representing the PDF pages. + :param job_id: The unique job ID for the processing task. + :return: A dictionary mapping page numbers to extracted text. """ total_pages = len(pages) tasks = [] for i, page in enumerate(pages): - tasks.append(asyncio.create_task(self.extract_text(page.masked_image, page.page_num))) - progress = ((i + 1) / total_pages) * 100 + tasks.append(asyncio.create_task(self.extract_text(page.masked_image, page.page_num))) # Perform OCR on each page + progress = ((i + 1) / total_pages) * 100 # Calculate progress update_progress(job_id, "Extracting text...", progress) # Return extracted text from each page @@ -325,13 +393,22 @@ class PDFChunker: async def extract_text(image: Image.Image, page_num: int) -> (int, str): """ Perform OCR on the provided image to extract text. + + :param image: The PIL image of the page. + :param page_num: The current page number. + :return: A tuple containing the page number and the extracted text. """ - result = pytesseract.image_to_string(image) + result = pytesseract.image_to_string(image) # Extract text using Tesseract OCR return page_num + 1, result.strip() # Return the page number and extracted text def chunk_text_with_metadata(self, page_texts: Dict[int, str], max_words: int, job_id: str) -> List[Dict[str, Any]]: """ Break the extracted text into smaller chunks with metadata (e.g., page numbers). + + :param page_texts: A dictionary mapping page numbers to extracted text. + :param max_words: The maximum number of words allowed in a chunk. + :param job_id: The unique job ID for the processing task. + :return: A list of dictionaries containing text chunks with metadata. """ chunks = [] current_chunk = "" @@ -362,7 +439,7 @@ class PDFChunker: total_words += word_count current_chunk += "\n\n" - progress = ((i + 1) / total_pages) * 100 + progress = ((i + 1) / total_pages) * 100 # Calculate progress update_progress(job_id, "Processing text...", progress) # Add the last chunk if there is leftover text @@ -375,6 +452,9 @@ class PDFChunker: def split_into_sentences(text): """ Split the text into sentences using regular expressions. + + :param text: The raw text to be split into sentences. + :return: A list of sentences. """ return re.split(r'(?<=[.!?])\s+', text) @@ -383,6 +463,12 @@ class PDFChunker: doc_id: str) -> List[Chunk]: """ Combine text and visual chunks into a unified list. + + :param text_chunks: A list of dictionaries containing text chunks with metadata. + :param visual_elements: A list of dictionaries containing visual elements (tables/images) with metadata. + :param pdf_path: The path to the original PDF file. + :param doc_id: The unique document ID for this job. + :return: A list of Chunk objects representing the combined data. """ combined_chunks = [] # Add text chunks @@ -399,7 +485,7 @@ class PDFChunker: "doc_id": doc_id, } chunk_dict: Chunk = { - "id": str(uuid.uuid4()), + "id": str(uuid.uuid4()), # Generate a unique ID for the chunk "values": [], "metadata": chunk_metadata, } @@ -419,7 +505,7 @@ class PDFChunker: "original_document": pdf_path, } visual_chunk_dict: Chunk = { - "id": str(uuid.uuid4()), + "id": str(uuid.uuid4()), # Generate a unique ID for the visual chunk "values": [], "metadata": visual_chunk_metadata, } @@ -430,6 +516,9 @@ class PDFChunker: def batch_summarize_images(self, images: Dict[int, str]) -> Dict[int, str]: """ Summarize images or tables by generating descriptive text. + + :param images: A dictionary mapping image numbers to base64-encoded image data. + :return: A dictionary mapping image numbers to their generated summaries. """ # Prompt for the AI model to summarize images and tables prompt = f""" @@ -544,118 +633,136 @@ class PDFChunker: #print("Returning placeholder summaries") return {number: "Error: No summary available" for number in images} - class DocumentType(Enum): - PDF = "pdf" - CSV = "csv" - TXT = "txt" - HTML = "html" + """ + Enum representing different types of documents that can be processed. + """ + PDF = "pdf" # PDF file type + CSV = "csv" # CSV file type + TXT = "txt" # Plain text file type + HTML = "html" # HTML file type class FileTypeNotSupportedException(Exception): """ - Exception raised for unsupported file types. + Exception raised when a file type is unsupported during document processing. """ def __init__(self, file_extension: str): + """ + Initialize the exception with the unsupported file extension. + + :param file_extension: The file extension that triggered the exception. + """ self.file_extension = file_extension self.message = f"File type '{file_extension}' is not supported." - super().__init__(self.message) + super().__init__(self.message) # Call the parent class constructor with the message class Document: """ - Represents a document being processed, such as a PDF, handling chunking and embedding. + Represents a document being processed, such as a PDF, handling chunking, embedding, and summarization. """ def __init__(self, file_data: bytes, file_name: str, job_id: str): + """ + Initialize the Document with file data, file name, and job ID. + + :param file_data: The binary data of the file being processed. + :param file_name: The name of the file being processed. + :param job_id: The job ID associated with this document processing task. + """ self.file_data = file_data self.file_name = file_name self.job_id = job_id - self.type = self._get_document_type(file_name) - self.doc_id = job_id # Use job_id as document ID - self.chunks = [] - self.num_pages = 0 - self.summary = "" + self.type = self._get_document_type(file_name) # Determine the document type (PDF, CSV, etc.) + self.doc_id = job_id # Use the job ID as the document ID + self.chunks = [] # List to hold text and visual chunks + self.num_pages = 0 # Number of pages in the document (if applicable) + self.summary = "" # The generated summary for the document self._process() # Start processing the document def _process(self): """ - Process the document: chunk it, embed chunks, and generate a summary. + Process the document: extract chunks, embed them, and generate a summary. """ - pdf_chunker = PDFChunker(output_folder="output") - self.chunks = asyncio.run(pdf_chunker.chunk_pdf(self.file_data, self.file_name, self.doc_id, self.job_id)) + pdf_chunker = PDFChunker(output_folder="output") # Initialize the PDF chunker + self.chunks = asyncio.run(pdf_chunker.chunk_pdf(self.file_data, self.file_name, self.doc_id, self.job_id)) # Extract chunks - self.num_pages = self._get_pdf_pages() # Get the number of pages - self._embed_chunks() # Embed the text chunks - self.summary = self._generate_summary() # Generate a summary + self.num_pages = self._get_pdf_pages() # Get the number of pages in the document + self._embed_chunks() # Embed the text chunks into embeddings + self.summary = self._generate_summary() # Generate a summary for the document def _get_document_type(self, file_name: str) -> DocumentType: """ Determine the document type based on its file extension. + + :param file_name: The name of the file being processed. + :return: The DocumentType enum value corresponding to the file extension. """ - _, extension = os.path.splitext(file_name) - extension = extension.lower().lstrip('.') + _, extension = os.path.splitext(file_name) # Split the file name to get the extension + extension = extension.lower().lstrip('.') # Convert to lowercase and remove leading period try: - return DocumentType(extension) + return DocumentType(extension) # Try to match the extension to a DocumentType except ValueError: - raise FileTypeNotSupportedException(extension) + raise FileTypeNotSupportedException(extension) # Raise exception if file type is unsupported def _get_pdf_pages(self) -> int: """ - Get the total number of pages in the PDF. + Get the total number of pages in the PDF document. + + :return: The number of pages in the PDF. """ - pdf_file = io.BytesIO(self.file_data) - pdf_reader = PdfReader(pdf_file) - return len(pdf_reader.pages) + pdf_file = io.BytesIO(self.file_data) # Convert the file data to an in-memory binary stream + pdf_reader = PdfReader(pdf_file) # Initialize PDF reader + return len(pdf_reader.pages) # Return the number of pages in the PDF def _embed_chunks(self) -> None: """ Embed the text chunks using the Cohere API. """ - co = cohere.Client(os.getenv("COHERE_API_KEY")) - batch_size = 90 - chunks_len = len(self.chunks) + co = cohere.Client(os.getenv("COHERE_API_KEY")) # Initialize Cohere client with API key + batch_size = 90 # Batch size for embedding + chunks_len = len(self.chunks) # Total number of chunks to embed for i in tqdm(range(0, chunks_len, batch_size), desc="Embedding Chunks"): - batch = self.chunks[i: min(i + batch_size, chunks_len)] - texts = [chunk['metadata']['text'] for chunk in batch] - #try: + batch = self.chunks[i: min(i + batch_size, chunks_len)] # Get batch of chunks + texts = [chunk['metadata']['text'] for chunk in batch] # Extract text from each chunk chunk_embs_batch = co.embed( texts=texts, - model="embed-english-v3.0", - input_type="search_document" + model="embed-english-v3.0", # Use Cohere's embedding model + input_type="search_document" # Specify input type ) for j, emb in enumerate(chunk_embs_batch.embeddings): - self.chunks[i + j]['values'] = emb - #except Exception as e: - #print(f"Error embedding batch for {self.file_name}: {str(e)}") + self.chunks[i + j]['values'] = emb # Store the embeddings in the corresponding chunks def _generate_summary(self) -> str: """ Generate a summary of the document using KMeans clustering and a language model. + + :return: The generated summary of the document. """ - num_clusters = min(10, len(self.chunks)) - kmeans = KMeans(n_clusters=num_clusters, random_state=42) - doc_chunks = [chunk['values'] for chunk in self.chunks if 'values' in chunk] - cluster_labels = kmeans.fit_predict(doc_chunks) + num_clusters = min(10, len(self.chunks)) # Set number of clusters for KMeans, capped at 10 + kmeans = KMeans(n_clusters=num_clusters, random_state=42) # Initialize KMeans with 10 clusters + doc_chunks = [chunk['values'] for chunk in self.chunks if 'values' in chunk] # Extract embeddings + cluster_labels = kmeans.fit_predict(doc_chunks) # Assign each chunk to a cluster # Select representative chunks from each cluster selected_chunks = [] for i in range(num_clusters): - cluster_chunks = [chunk for chunk, label in zip(self.chunks, cluster_labels) if label == i] - cluster_embs = [emb for emb, label in zip(doc_chunks, cluster_labels) if label == i] - centroid = kmeans.cluster_centers_[i] - distances = [np.linalg.norm(np.array(emb) - centroid) for emb in cluster_embs] - closest_chunk = cluster_chunks[np.argmin(distances)] + cluster_chunks = [chunk for chunk, label in zip(self.chunks, cluster_labels) if label == i] # Get all chunks in this cluster + cluster_embs = [emb for emb, label in zip(doc_chunks, cluster_labels) if label == i] # Get embeddings for this cluster + centroid = kmeans.cluster_centers_[i] # Get the centroid of the cluster + distances = [np.linalg.norm(np.array(emb) - centroid) for emb in cluster_embs] # Compute distance to centroid + closest_chunk = cluster_chunks[np.argmin(distances)] # Select chunk closest to the centroid selected_chunks.append(closest_chunk) # Combine selected chunks into a summary - combined_text = "\n\n".join([chunk['metadata']['text'] for chunk in selected_chunks]) + combined_text = "\n\n".join([chunk['metadata']['text'] for chunk in selected_chunks]) # Concatenate chunk texts - client = OpenAI() # Call OpenAI API for text generation (summarization) + client = OpenAI() # Initialize OpenAI client for text generation completion = client.chat.completions.create( - model="gpt-3.5-turbo", + model="gpt-3.5-turbo", # Specify the language model messages=[ {"role": "system", "content": "You are an AI assistant tasked with summarizing a document. You are provided with important chunks from the document and provide a summary, as best you can, of what the document will contain overall. Be concise and brief with your response."}, @@ -670,13 +777,15 @@ class Document: Summary: """} ], - max_tokens=300 + max_tokens=300 # Set max tokens for the summary ) - return completion.choices[0].message.content.strip() + return completion.choices[0].message.content.strip() # Return the generated summary def to_json(self) -> str: """ Return the document's data in JSON format. + + :return: JSON string representing the document's metadata, chunks, and summary. """ return json.dumps({ "file_name": self.file_name, @@ -685,16 +794,20 @@ class Document: "chunks": self.chunks, "type": self.type.value, "doc_id": self.doc_id - }, indent=2) + }, indent=2) # Convert the document's attributes to JSON format def process_document(file_data, file_name, job_id): """ Top-level function to process a document and return the JSON output. - """ - new_document = Document(file_data, file_name, job_id) - return new_document.to_json() + :param file_data: The binary data of the file being processed. + :param file_name: The name of the file being processed. + :param job_id: The job ID for this document processing task. + :return: The processed document's data in JSON format. + """ + new_document = Document(file_data, file_name, job_id) # Create a new Document object + return new_document.to_json() # Return the document's JSON data def main(): @@ -702,12 +815,12 @@ def main(): Main entry point for the script, called with arguments from Node.js. """ if len(sys.argv) != 4: - print(json.dumps({"error": "Invalid arguments"}), file=sys.stderr) + print(json.dumps({"error": "Invalid arguments"}), file=sys.stderr) # Print error if incorrect number of arguments return - job_id = sys.argv[1] - file_name = sys.argv[2] - file_data = sys.argv[3] + job_id = sys.argv[1] # Get the job ID from command-line arguments + file_name = sys.argv[2] # Get the file name from command-line arguments + file_data = sys.argv[3] # Get the base64-encoded file data from command-line arguments try: # Decode the base64 file data @@ -727,4 +840,4 @@ def main(): if __name__ == "__main__": - main() + main() # Execute the main function when the script is run -- cgit v1.2.3-70-g09d2 From 139a3cb0b3b081c270187e9b4ca281d04ca923bf Mon Sep 17 00:00:00 2001 From: bobzel Date: Mon, 30 Sep 2024 12:19:22 -0400 Subject: upate AJ assistant from master and fix some lint errors --- eslint.config.mjs | 2 +- src/client/documents/Documents.ts | 9 -- src/client/util/CurrentUserUtils.ts | 8 +- src/client/util/Scripting.ts | 13 +- src/client/views/MainView.tsx | 7 +- .../collections/CollectionNoteTakingViewColumn.tsx | 5 +- src/client/views/nodes/PDFBox.tsx | 21 ++- .../views/nodes/RecordingBox/ProgressBar.tsx | 2 - .../views/nodes/chatbot/agentsystem/Agent.ts | 33 ++-- .../nodes/chatbot/chatboxcomponents/ChatBox.tsx | 61 +++---- .../nodes/chatbot/response_parsers/AnswerParser.ts | 9 +- .../response_parsers/StreamedAnswerParser.ts | 2 - src/client/views/nodes/chatbot/tools/BaseTool.ts | 8 +- .../views/nodes/chatbot/tools/CalculateTool.ts | 2 +- .../views/nodes/chatbot/tools/CreateCSVTool.ts | 2 +- .../nodes/chatbot/tools/CreateCollectionTool.ts | 2 +- .../views/nodes/chatbot/tools/DataAnalysisTool.ts | 2 +- .../views/nodes/chatbot/tools/GetDocsTool.ts | 2 +- src/client/views/nodes/chatbot/tools/NoTool.ts | 5 +- src/client/views/nodes/chatbot/tools/RAGTool.ts | 13 +- src/client/views/nodes/chatbot/tools/SearchTool.ts | 5 +- .../nodes/chatbot/tools/WebsiteInfoScraperTool.ts | 5 +- .../views/nodes/chatbot/tools/WikipediaTool.ts | 7 +- src/client/views/nodes/chatbot/types/types.ts | 25 ++- .../views/nodes/chatbot/vectorstore/Vectorstore.ts | 13 +- src/client/views/pdf/PDFViewer.tsx | 46 ++---- src/server/ApiManagers/AssistantManager.ts | 178 ++++++++++----------- 27 files changed, 215 insertions(+), 272 deletions(-) (limited to 'src') diff --git a/eslint.config.mjs b/eslint.config.mjs index aebdc20d0..f7063caa5 100644 --- a/eslint.config.mjs +++ b/eslint.config.mjs @@ -48,7 +48,7 @@ export default [ 'no-return-assign': 'error', 'no-await-in-loop': 'error', 'no-loop-func': 'error', - '@typescript-eslint/no-cond-assign': 'error', + 'no-cond-assign': 'error', 'no-use-before-define': 'error', '@typescript-eslint/no-explicit-any': 'error', 'no-restricted-globals': ['error', 'event'], diff --git a/src/client/documents/Documents.ts b/src/client/documents/Documents.ts index e0a9918f4..d77f76b81 100644 --- a/src/client/documents/Documents.ts +++ b/src/client/documents/Documents.ts @@ -1,5 +1,3 @@ -/* eslint-disable prefer-destructuring */ -/* eslint-disable default-param-last */ /* eslint-disable no-use-before-define */ import { reaction } from 'mobx'; import { basename } from 'path'; @@ -671,7 +669,6 @@ export namespace Docs { * only when creating a DockDocument from the current user's already existing * main document. */ - // eslint-disable-next-line default-param-last function InstanceFromProto(proto: Doc, data: FieldType | undefined, options: DocumentOptions, delegId?: string, fieldKey: string = 'data', protoId?: string, placeholderDocIn?: Doc, noView?: boolean) { const placeholderDoc = placeholderDocIn; const viewKeys = ['x', 'y', 'isSystem']; // keys that should be addded to the view document even though they don't begin with an "_" @@ -732,7 +729,6 @@ export namespace Docs { return dataDoc; } - // eslint-disable-next-line default-param-last export function ImageDocument(url: string | ImageField, options: DocumentOptions = {}, overwriteDoc?: Doc) { const imgField = url instanceof ImageField ? url : url ? new ImageField(url) : undefined; return InstanceFromProto(Prototypes.get(DocumentType.IMG), imgField, { title: basename(imgField?.url.href ?? '-no image-'), ...options }, undefined, undefined, undefined, overwriteDoc); @@ -751,7 +747,6 @@ export namespace Docs { * @param fieldKey the field that the compiled script is written into. * @returns the Scripting Doc */ - // eslint-disable-next-line default-param-last export function ScriptingDocument(script: Opt | null, options: DocumentOptions = {}, fieldKey?: string) { return InstanceFromProto(Prototypes.get(DocumentType.SCRIPTING), script || undefined, { ...options, layout: fieldKey ? `` /* ScriptingBox.LayoutString(fieldKey) */ : undefined }); } @@ -759,7 +754,6 @@ export namespace Docs { export function ChatDocument(options?: DocumentOptions) { return InstanceFromProto(Prototypes.get(DocumentType.CHAT), undefined, { ...(options || {}) }); } - // eslint-disable-next-line default-param-last export function VideoDocument(url: string, options: DocumentOptions = {}, overwriteDoc?: Doc) { return InstanceFromProto(Prototypes.get(DocumentType.VID), new VideoField(url), options, undefined, undefined, undefined, overwriteDoc); } @@ -779,7 +773,6 @@ export namespace Docs { return InstanceFromProto(Prototypes.get(DocumentType.DIAGRAM), undefined, options); } - // eslint-disable-next-line default-param-last export function AudioDocument(url: string, options: DocumentOptions = {}, overwriteDoc?: Doc) { return InstanceFromProto(Prototypes.get(DocumentType.AUDIO), new AudioField(url), options, undefined, undefined, undefined, overwriteDoc); } @@ -839,7 +832,6 @@ export namespace Docs { return InstanceFromProto(Prototypes.get(DocumentType.RTF), field, options, undefined, fieldKey); } - // eslint-disable-next-line default-param-last export function LinkDocument(source: Doc, target: Doc, options: DocumentOptions = {}, id?: string) { const linkDoc = InstanceFromProto( Prototypes.get(DocumentType.LINK), @@ -883,7 +875,6 @@ export namespace Docs { return ink; } - // eslint-disable-next-line default-param-last export function PdfDocument(url: string, options: DocumentOptions = {}, overwriteDoc?: Doc) { const width = options._width || undefined; const height = options._height || undefined; diff --git a/src/client/util/CurrentUserUtils.ts b/src/client/util/CurrentUserUtils.ts index 96b30c429..09adf70f5 100644 --- a/src/client/util/CurrentUserUtils.ts +++ b/src/client/util/CurrentUserUtils.ts @@ -2,7 +2,7 @@ import { reaction, runInAction } from "mobx"; import * as rp from 'request-promise'; import { ClientUtils, OmitKeys } from "../../ClientUtils"; -import { Doc, DocListCast, DocListCastAsync, FieldType, Opt, StrListCast } from "../../fields/Doc"; +import { Doc, DocListCast, DocListCastAsync, FieldType, Opt } from "../../fields/Doc"; import { DocData } from "../../fields/DocSymbols"; import { InkTool } from "../../fields/InkField"; import { List } from "../../fields/List"; @@ -366,11 +366,11 @@ pie title Minerals in my tap water {key: "Button", creator: Docs.Create.ButtonDocument, opts: { _width: 150, _height: 50, _xPadding: 10, _yPadding: 10, title_custom: true, waitForDoubleClickToClick: 'never'}, scripts: {onClick: FollowLinkScript()?.script.originalScript ?? ""}}, {key: "Script", creator: opts => Docs.Create.ScriptingDocument(null, opts), opts: { _width: 200, _height: 250, }}, {key: "DataViz", creator: opts => Docs.Create.DataVizDocument("/users/rz/Downloads/addresses.csv", opts), opts: { _width: 300, _height: 300 }}, - {key: "Chat", creator: Docs.Create.ChatDocument, opts: { _width: 500, _height: 500, }}, + {key: "Chat", creator: Docs.Create.ChatDocument, opts: { _width: 500, _height: 500, }}, {key: "Header", creator: headerTemplate, opts: { _width: 300, _height: 120, _header_pointerEvents: "all", _header_height: 50, _header_fontSize: 9,_layout_autoHeightMargins: 50, _layout_autoHeight: true, treeView_HideUnrendered: true}}, {key: "ViewSlide", creator: slideView, opts: { _width: 400, _height: 300, _xMargin: 3, _yMargin: 3,}}, {key: "Trail", creator: Docs.Create.PresDocument, opts: { _width: 400, _height: 30, _type_collection: CollectionViewType.Stacking, _layout_dontCenter:'xy', dropAction: dropActionType.embed, treeView_HideTitle: true, _layout_fitWidth:true, layout_boxShadow: "0 0" }}, - {key: "Tab", creator: opts => Docs.Create.FreeformDocument([], opts), opts: { _width: 500, _height: 800, _layout_fitWidth: true, _freeform_backgroundGrid: true, }}, + {key: "Tab", creator: opts => Docs.Create.FreeformDocument([], opts), opts: { _width: 500, _height: 800, _layout_fitWidth: true, _freeform_backgroundGrid: true, }}, {key: "Slide", creator: opts => Docs.Create.TreeDocument([], opts), opts: { _width: 300, _height: 200, _type_collection: CollectionViewType.Tree, treeView_HasOverlay: true, _text_fontSize: "20px", _layout_autoHeight: true, dropAction:dropActionType.move, treeView_Type: TreeViewType.outline, @@ -801,7 +801,7 @@ pie title Minerals in my tap water { title: "Num", icon:"", toolTip: "Frame # (click to toggle edit mode)",btnType: ButtonType.TextButton, expertMode: true, toolType:CollectionViewType.Freeform, funcs: {hidden: '!SelectedDocType(this.toolType, this.expertMode)', buttonText: 'selectedDocs()?.lastElement()?.currentFrame?.toString()'}, width: 20, scripts: { onClick: '{ return curKeyFrame(_readOnly_);}'}}, { title: "Fwd", icon: "chevron-right", toolTip: "Next Animation Frame", btnType: ButtonType.ClickButton, expertMode: true, toolType:CollectionViewType.Freeform, funcs: {hidden: '!SelectedDocType(this.toolType, this.expertMode)'}, width: 30, scripts: { onClick: 'nextKeyFrame(_readOnly_)'}}, - { title: "Filter", icon: "=", toolTip: "Filter cards by tags", subMenu: CurrentUserUtils.tagGroupTools(),ignoreClick:true, toolType:DocumentType.COL, funcs: {hidden: '!SelectedDocType(this.toolType, this.expertMode)'}, btnType: ButtonType.MultiToggleButton, width: 30, backgroundColor: doc.userVariantColor}, + { title: "Filter", icon: "=", toolTip: "Filter cards by tags", subMenu: CurrentUserUtils.tagGroupTools(),ignoreClick:true, toolType:DocumentType.COL, funcs: {hidden: '!SelectedDocType(this.toolType, this.expertMode)'}, btnType: ButtonType.MultiToggleButton, width: 30, backgroundColor: doc.userVariantColor as string}, { title: "Text", icon: "Text", toolTip: "Text functions", subMenu: CurrentUserUtils.textTools(), expertMode: false, toolType:DocumentType.RTF, funcs: { linearView_IsOpen: `SelectedDocType(this.toolType, this.expertMode)`} }, // Always available { title: "Ink", icon: "Ink", toolTip: "Ink functions", subMenu: CurrentUserUtils.inkTools(), expertMode: false, toolType:DocumentType.INK, funcs: {hidden: `IsExploreMode()`, linearView_IsOpen: `SelectedDocType(this.toolType, this.expertMode)`}, scripts: { onClick: 'setInkToolDefaults()'} }, // Always available { title: "Doc", icon: "Doc", toolTip: "Freeform Doc tools", subMenu: CurrentUserUtils.freeTools(), expertMode: false, toolType:CollectionViewType.Freeform, funcs: {hidden: `!SelectedDocType(this.toolType, this.expertMode, true)`, linearView_IsOpen: `SelectedDocType(this.toolType, this.expertMode)`} }, // Always available diff --git a/src/client/util/Scripting.ts b/src/client/util/Scripting.ts index 3e7a2df02..c7b86815a 100644 --- a/src/client/util/Scripting.ts +++ b/src/client/util/Scripting.ts @@ -1,7 +1,7 @@ // export const ts = (window as any).ts; // import * as typescriptlib from '!!raw-loader!../../../node_modules/typescript/lib/lib.d.ts' // import * as typescriptes5 from '!!raw-loader!../../../node_modules/typescript/lib/lib.es5.d.ts' -// import typescriptlib from 'type_decls.d'; +import typescriptlib from 'type_decls.d'; import * as ts from 'typescript'; import { Doc, FieldType } from '../../fields/Doc'; import { RefField } from '../../fields/RefField'; @@ -29,7 +29,7 @@ export interface CompiledScript { readonly compiled: true; readonly originalScript: string; // eslint-disable-next-line no-use-before-define - readonly options: Readonly; + readonly options: Readonly; run(args?: { [name: string]: unknown }, onError?: (res: string) => void, errorVal?: unknown): ScriptResult; } @@ -60,7 +60,6 @@ function Run(script: string | undefined, customParams: string[], diagnostics: ts // let params: any[] = [Docs, ...fieldTypes]; const compiledFunction = (() => { try { - // eslint-disable-next-line no-new-func return new Function(...paramNames, `return ${script}`); } catch (e) { console.log(e); @@ -69,10 +68,8 @@ function Run(script: string | undefined, customParams: string[], diagnostics: ts })(); if (!compiledFunction) return { compiled: false, errors }; const { capturedVariables = {} } = options; - // eslint-disable-next-line default-param-last const run = (args: { [name: string]: unknown } = {}, onError?: (e: string) => void, errorVal?: ts.Diagnostic): ScriptResult => { const argsArray: unknown[] = []; - // eslint-disable-next-line no-restricted-syntax for (const name of customParams) { if (name !== 'this') { argsArray.push(name in args ? args[name] : capturedVariables[name]); @@ -154,7 +151,7 @@ class ScriptingCompilerHost { export type Traverser = (node: ts.Node, indentation: string) => boolean | void; export type TraverserParam = Traverser | { onEnter: Traverser; onLeave: Traverser }; export type Transformer = { - transformer: ts.TransformerFactory; + transformer: ts.TransformerFactory; getVars?: () => { [name: string]: FieldType }; }; export interface ScriptOptions { @@ -224,7 +221,6 @@ export function CompileScript(script: string, options: ScriptOptions = {}): Comp if ('this' in params || 'this' in capturedVariables) { paramNames.push('this'); } - // eslint-disable-next-line no-restricted-syntax for (const key in params) { if (key !== 'this') { paramNames.push(key); @@ -234,7 +230,6 @@ export function CompileScript(script: string, options: ScriptOptions = {}): Comp const val = params[key]; return `${key}: ${val}`; }); - // eslint-disable-next-line no-restricted-syntax for (const key in capturedVariables) { if (key !== 'this') { const val = capturedVariables[key]; @@ -248,7 +243,7 @@ export function CompileScript(script: string, options: ScriptOptions = {}): Comp const funcScript = `(function(${paramString})${reqTypes} { ${body} })`; host.writeFile('file.ts', funcScript); - // if (typecheck) host.writeFile('node_modules/typescript/lib/lib.d.ts', typescriptlib); + if (typecheck) host.writeFile('node_modules/typescript/lib/lib.d.ts', typescriptlib); const program = ts.createProgram(['file.ts'], {}, host); const testResult = program.emit(); const outputText = host.readFile('file.js'); diff --git a/src/client/views/MainView.tsx b/src/client/views/MainView.tsx index 8b8f85dfb..abe154de4 100644 --- a/src/client/views/MainView.tsx +++ b/src/client/views/MainView.tsx @@ -75,7 +75,7 @@ import { AnchorMenu } from './pdf/AnchorMenu'; import { GPTPopup } from './pdf/GPTPopup/GPTPopup'; import { TopBar } from './topbar/TopBar'; -// eslint-disable-next-line @typescript-eslint/no-var-requires, @typescript-eslint/no-require-imports +// eslint-disable-next-line @typescript-eslint/no-require-imports const { LEFT_MENU_WIDTH, TOPBAR_HEIGHT } = require('./global/globalCssVariables.module.scss'); // prettier-ignore @observer @@ -1059,10 +1059,7 @@ export class MainView extends ObservableReactComponent { docView={DocButtonState.Instance.LinkEditorDocView} /> ) : null} - {LinkInfo.Instance?.LinkInfo ? ( - // eslint-disable-next-line react/jsx-props-no-spreading - - ) : null} + {LinkInfo.Instance?.LinkInfo ? : null} {((page: string) => { // prettier-ignore switch (page) { diff --git a/src/client/views/collections/CollectionNoteTakingViewColumn.tsx b/src/client/views/collections/CollectionNoteTakingViewColumn.tsx index fc5f5cb71..226d06f37 100644 --- a/src/client/views/collections/CollectionNoteTakingViewColumn.tsx +++ b/src/client/views/collections/CollectionNoteTakingViewColumn.tsx @@ -252,10 +252,7 @@ export class CollectionNoteTakingViewColumn extends ObservableReactComponent
- { - // eslint-disable-next-line react/jsx-props-no-spreading - - } +
) : null} diff --git a/src/client/views/nodes/PDFBox.tsx b/src/client/views/nodes/PDFBox.tsx index 4616ec057..596975062 100644 --- a/src/client/views/nodes/PDFBox.tsx +++ b/src/client/views/nodes/PDFBox.tsx @@ -10,7 +10,7 @@ import { DocData } from '../../../fields/DocSymbols'; import { Id } from '../../../fields/FieldSymbols'; import { InkTool } from '../../../fields/InkField'; import { ComputedField } from '../../../fields/ScriptField'; -import { Cast, FieldValue, ImageCast, NumCast, StrCast, toList } from '../../../fields/Types'; +import { Cast, FieldValue, NumCast, StrCast, toList } from '../../../fields/Types'; import { ImageField, PdfField } from '../../../fields/URLField'; import { TraceMobx } from '../../../fields/util'; import { emptyFunction } from '../../../Utils'; @@ -43,14 +43,14 @@ export class PDFBox extends ViewBoxAnnotatableComponent() { public static openSidebarWidth = 250; public static sidebarResizerWidth = 5; private _searchString: string = ''; - private _initialScrollTarget: Opt; + private _initialScrollTarget: Opt; private _pdfViewer: PDFViewer | undefined; private _searchRef = React.createRef(); private _disposers: { [name: string]: IReactionDisposer } = {}; private _sidebarRef = React.createRef(); @observable private _searching: boolean = false; - @observable private _pdf: Opt = undefined; + @observable private _pdf: Opt = undefined; @observable private _pageControls = false; @computed get pdfUrl() { @@ -242,13 +242,13 @@ export class PDFBox extends ViewBoxAnnotatableComponent() { options.didMove = true; this.toggleSidebar(false); } - return new Promise(res => { + return new Promise>(res => { DocumentView.addViewRenderedCb(doc, dv => res(dv)); }); }; getAnchor = (addAsAnnotation: boolean, pinProps?: PinProps) => { - let ele: Opt; + let ele: Opt; if (this._pdfViewer?.selectionContent()) { ele = document.createElement('div'); ele.append(this._pdfViewer.selectionContent()!); @@ -327,7 +327,7 @@ export class PDFBox extends ViewBoxAnnotatableComponent() { this._initialScrollTarget = undefined; } }; - searchStringChanged = (e: React.ChangeEvent) => { + searchStringChanged = (e: React.ChangeEvent) => { this._searchString = e.currentTarget.value; }; @@ -365,8 +365,8 @@ export class PDFBox extends ViewBoxAnnotatableComponent() { } ); }; - @observable _previewNativeWidth: Opt = undefined; - @observable _previewWidth: Opt = undefined; + @observable _previewNativeWidth: Opt = undefined; + @observable _previewWidth: Opt = undefined; toggleSidebar = action((preview: boolean = false) => { const nativeWidth = NumCast(this.layoutDoc[this.fieldKey + '_nativeWidth']); const sideratio = ((!this.layoutDoc.nativeWidth || this.layoutDoc.nativeWidth === nativeWidth ? PDFBox.openSidebarWidth : 0) + nativeWidth) / nativeWidth; @@ -540,7 +540,6 @@ export class PDFBox extends ViewBoxAnnotatableComponent() { return ComponentTag === CollectionStackingView ? ( () { ) : (
setupMoveUpEvents(this, e, returnFalse, emptyFunction, () => this._props.select(false), true)}> () { top: 0, }}> () { } static pdfcache = new Map(); - static pdfpromise = new Map(); + static pdfpromise = new Map>(); render() { TraceMobx(); const pdfView = !this._pdf ? null : this.renderPdfView; diff --git a/src/client/views/nodes/RecordingBox/ProgressBar.tsx b/src/client/views/nodes/RecordingBox/ProgressBar.tsx index 62798bc2f..7e91df7ab 100644 --- a/src/client/views/nodes/RecordingBox/ProgressBar.tsx +++ b/src/client/views/nodes/RecordingBox/ProgressBar.tsx @@ -1,5 +1,3 @@ -/* eslint-disable react/no-array-index-key */ -/* eslint-disable react/require-default-props */ import * as React from 'react'; import { useEffect, useState, useRef } from 'react'; import './ProgressBar.scss'; diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts index 180d05cf3..ccf9caf15 100644 --- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts +++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts @@ -1,20 +1,19 @@ +import dotenv from 'dotenv'; +import { XMLBuilder, XMLParser } from 'fast-xml-parser'; import OpenAI from 'openai'; -import { Tool, AgentMessage, AssistantMessage, TEXT_TYPE, CHUNK_TYPE, ASSISTANT_ROLE, ProcessingInfo, PROCESSING_TYPE } from '../types/types'; -import { getReactPrompt } from './prompts'; -import { XMLParser, XMLBuilder } from 'fast-xml-parser'; -import { Vectorstore } from '../vectorstore/Vectorstore'; import { ChatCompletionMessageParam } from 'openai/resources'; -import dotenv from 'dotenv'; -import { CalculateTool } from '../tools/CalculateTool'; -import { RAGTool } from '../tools/RAGTool'; -import { DataAnalysisTool } from '../tools/DataAnalysisTool'; -import { WebsiteInfoScraperTool } from '../tools/WebsiteInfoScraperTool'; -import { SearchTool } from '../tools/SearchTool'; -import { NoTool } from '../tools/NoTool'; -import { v4 as uuidv4 } from 'uuid'; import { AnswerParser } from '../response_parsers/AnswerParser'; import { StreamedAnswerParser } from '../response_parsers/StreamedAnswerParser'; +import { CalculateTool } from '../tools/CalculateTool'; import { CreateCSVTool } from '../tools/CreateCSVTool'; +import { DataAnalysisTool } from '../tools/DataAnalysisTool'; +import { NoTool } from '../tools/NoTool'; +import { RAGTool } from '../tools/RAGTool'; +import { SearchTool } from '../tools/SearchTool'; +import { WebsiteInfoScraperTool } from '../tools/WebsiteInfoScraperTool'; +import { AgentMessage, AssistantMessage, PROCESSING_TYPE, ProcessingInfo, Tool } from '../types/types'; +import { Vectorstore } from '../vectorstore/Vectorstore'; +import { getReactPrompt } from './prompts'; dotenv.config(); @@ -25,7 +24,7 @@ dotenv.config(); export class Agent { // Private properties private client: OpenAI; - private tools: Record>; + private tools: Record>; // bcz: need a real type here private messages: AgentMessage[] = []; private interMessages: AgentMessage[] = []; private vectorstore: Vectorstore; @@ -102,7 +101,7 @@ export class Agent { ignoreAttributes: false, attributeNamePrefix: '@_', textNodeName: '_text', - isArray: (name, jpath, isLeafNode, isAttribute) => ['query', 'url'].indexOf(name) !== -1, + isArray: (name /* , jpath, isLeafNode, isAttribute */) => ['query', 'url'].indexOf(name) !== -1, }); const builder = new XMLBuilder({ ignoreAttributes: false, attributeNamePrefix: '@_' }); @@ -167,7 +166,7 @@ export class Agent { if (currentAction) { try { // Process the action with its input - const observation = await this.processAction(currentAction, actionInput.inputs); + const observation = (await this.processAction(currentAction, actionInput.inputs)) as any; // bcz: really need a type here const nextPrompt = [{ type: 'text', text: ` ` }, ...observation, { type: 'text', text: '' }]; console.log(observation); this.interMessages.push({ role: 'user', content: nextPrompt }); @@ -214,7 +213,7 @@ export class Agent { // Process each chunk of the streamed response for await (const chunk of stream) { - let content = chunk.choices[0]?.delta?.content || ''; + const content = chunk.choices[0]?.delta?.content || ''; fullResponse += content; // Parse the streamed content character by character @@ -267,7 +266,7 @@ export class Agent { * @param actionInput The inputs for the action. * @returns The result of the action. */ - private async processAction(action: string, actionInput: any): Promise { + private async processAction(action: string, actionInput: unknown): Promise { if (!(action in this.tools)) { throw new Error(`Unknown action: ${action}`); } diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx index d4deff78b..613cb7078 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -1,30 +1,29 @@ -import { action, computed, makeObservable, observable, observe, reaction, runInAction, ObservableSet } from 'mobx'; +import dotenv from 'dotenv'; +import { ObservableSet, action, computed, makeObservable, observable, observe, reaction, runInAction } from 'mobx'; import { observer } from 'mobx-react'; import OpenAI, { ClientOptions } from 'openai'; import * as React from 'react'; +import { v4 as uuidv4 } from 'uuid'; +import { ClientUtils } from '../../../../../ClientUtils'; import { Doc, DocListCast } from '../../../../../fields/Doc'; +import { DocData, DocViews } from '../../../../../fields/DocSymbols'; import { CsvCast, DocCast, PDFCast, RTFCast, StrCast } from '../../../../../fields/Types'; +import { Networking } from '../../../../Network'; +import { DocUtils } from '../../../../documents/DocUtils'; import { DocumentType } from '../../../../documents/DocumentTypes'; import { Docs } from '../../../../documents/Documents'; +import { DocumentManager } from '../../../../util/DocumentManager'; import { LinkManager } from '../../../../util/LinkManager'; import { ViewBoxAnnotatableComponent } from '../../../DocComponent'; +import { DocumentView } from '../../DocumentView'; import { FieldView, FieldViewProps } from '../../FieldView'; +import { PDFBox } from '../../PDFBox'; +import { Agent } from '../agentsystem/Agent'; +import { ASSISTANT_ROLE, AssistantMessage, CHUNK_TYPE, Citation, ProcessingInfo, SimplifiedChunk, TEXT_TYPE } from '../types/types'; +import { Vectorstore } from '../vectorstore/Vectorstore'; import './ChatBox.scss'; import MessageComponentBox from './MessageComponent'; -import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, SimplifiedChunk, ProcessingInfo } from '../types/types'; -import { Vectorstore } from '../vectorstore/Vectorstore'; -import { Agent } from '../agentsystem/Agent'; -import dotenv from 'dotenv'; -import { DocData, DocViews } from '../../../../../fields/DocSymbols'; -import { DocumentManager } from '../../../../util/DocumentManager'; -import { v4 as uuidv4 } from 'uuid'; -import { DocUtils } from '../../../../documents/DocUtils'; -import { ClientUtils } from '../../../../../ClientUtils'; import { ProgressBar } from './ProgressBar'; -import { DocumentView } from '../../DocumentView'; -import { Networking } from '../../../../Network'; -import { PDFViewer } from '../../../pdf/PDFViewer'; -import { PDFBox } from '../../PDFBox'; dotenv.config(); @@ -247,7 +246,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { * @param event The form submission event. */ @action - askGPT = async (event: React.FormEvent): Promise => { + askGPT = async (event: React.FormEvent): Promise => { event.preventDefault(); this.inputValue = ''; @@ -413,7 +412,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { const chunkId = citation.chunk_id; // Loop through the linked documents to find the matching chunk and handle its display - for (let doc of currentLinkedDocs) { + for (const doc of currentLinkedDocs) { if (doc.chunk_simpl) { const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] }; const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkId === chunkId); @@ -422,24 +421,26 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { switch (foundChunk.chunkType) { case CHUNK_TYPE.IMAGE: case CHUNK_TYPE.TABLE: - const values = foundChunk.location?.replace(/[\[\]]/g, '').split(','); + { + const values = foundChunk.location?.replace(/[[\]]/g, '').split(','); - if (values?.length !== 4) { - console.error('Location string must contain exactly 4 numbers'); - return; - } + if (values?.length !== 4) { + console.error('Location string must contain exactly 4 numbers'); + return; + } - const x1 = parseFloat(values[0]) * Doc.NativeWidth(doc); - const y1 = parseFloat(values[1]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc); - const x2 = parseFloat(values[2]) * Doc.NativeWidth(doc); - const y2 = parseFloat(values[3]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc); + const x1 = parseFloat(values[0]) * Doc.NativeWidth(doc); + const y1 = parseFloat(values[1]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc); + const x2 = parseFloat(values[2]) * Doc.NativeWidth(doc); + const y2 = parseFloat(values[3]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc); - const annotationKey = Doc.LayoutFieldKey(doc) + '_annotations'; + const annotationKey = Doc.LayoutFieldKey(doc) + '_annotations'; - const existingDoc = DocListCast(doc[DocData][annotationKey]).find(d => d.citation_id === citation.citation_id); - const highlightDoc = existingDoc ?? this.createImageCitationHighlight(x1, y1, x2, y2, citation, annotationKey, doc); + const existingDoc = DocListCast(doc[DocData][annotationKey]).find(d => d.citation_id === citation.citation_id); + const highlightDoc = existingDoc ?? this.createImageCitationHighlight(x1, y1, x2, y2, citation, annotationKey, doc); - DocumentManager.Instance.showDocument(highlightDoc, { willZoomCentered: true }, () => {}); + DocumentManager.Instance.showDocument(highlightDoc, { willZoomCentered: true }, () => {}); + } break; case CHUNK_TYPE.TEXT: this.citationPopup = { text: citation.direct_text ?? 'No text available', visible: true }; @@ -695,7 +696,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() {
)}
-

{this.userName()}'s AI Assistant

+

{this.userName()}'s AI Assistant

{this.history.map((message, index) => ( diff --git a/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts b/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts index 3b4fdb6f5..1ac753790 100644 --- a/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts +++ b/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts @@ -1,5 +1,5 @@ -import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getChunkType, ProcessingInfo } from '../types/types'; import { v4 as uuid } from 'uuid'; +import { ASSISTANT_ROLE, AssistantMessage, Citation, ProcessingInfo, TEXT_TYPE, getChunkType } from '../types/types'; export class AnswerParser { static parse(xml: string, processingInfo: ProcessingInfo[]): AssistantMessage { @@ -22,8 +22,8 @@ export class AnswerParser { } let rawTextContent = answerMatch[1].trim(); - let content: AssistantMessage['content'] = []; - let citations: Citation[] = []; + const content: AssistantMessage['content'] = []; + const citations: Citation[] = []; let contentIndex = 0; // Remove citations and follow-up questions from rawTextContent @@ -43,6 +43,7 @@ export class AnswerParser { if (citationsMatch) { const citationsContent = citationsMatch[1]; while ((citationMatch = citationRegex.exec(citationsContent)) !== null) { + // eslint-disable-next-line @typescript-eslint/no-unused-vars const [_, index, chunk_id, type, direct_text] = citationMatch; const citation_id = uuid(); citationMap.set(index, citation_id); @@ -102,7 +103,7 @@ export class AnswerParser { } } - let followUpQuestions: string[] = []; + const followUpQuestions: string[] = []; if (followUpQuestionsMatch) { const questionsText = followUpQuestionsMatch[1]; let questionMatch; diff --git a/src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts b/src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts index 3585cab4a..4149f3da9 100644 --- a/src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts +++ b/src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts @@ -1,5 +1,3 @@ -import { threadId } from 'worker_threads'; - enum ParserState { Outside, InGroundedText, diff --git a/src/client/views/nodes/chatbot/tools/BaseTool.ts b/src/client/views/nodes/chatbot/tools/BaseTool.ts index b57f1c8e4..10780617b 100644 --- a/src/client/views/nodes/chatbot/tools/BaseTool.ts +++ b/src/client/views/nodes/chatbot/tools/BaseTool.ts @@ -1,17 +1,17 @@ import { Tool } from '../types/types'; -export abstract class BaseTool = Record> implements Tool { +export abstract class BaseTool = Record> implements Tool { constructor( public name: string, public description: string, - public parameters: Record, + public parameters: Record, public citationRules: string, public briefSummary: string ) {} - abstract execute(args: T): Promise; + abstract execute(args: T): Promise; - getActionRule(): Record { + getActionRule(): Record { return { [this.name]: { name: this.name, diff --git a/src/client/views/nodes/chatbot/tools/CalculateTool.ts b/src/client/views/nodes/chatbot/tools/CalculateTool.ts index 74b7ca27b..77ab1b39b 100644 --- a/src/client/views/nodes/chatbot/tools/CalculateTool.ts +++ b/src/client/views/nodes/chatbot/tools/CalculateTool.ts @@ -18,7 +18,7 @@ export class CalculateTool extends BaseTool<{ expression: string }> { ); } - async execute(args: { expression: string }): Promise { + async execute(args: { expression: string }): Promise { // Note: Using eval() can be dangerous. Consider using a safer alternative. const result = eval(args.expression); return [{ type: 'text', text: result.toString() }]; diff --git a/src/client/views/nodes/chatbot/tools/CreateCSVTool.ts b/src/client/views/nodes/chatbot/tools/CreateCSVTool.ts index 55015846b..d3ded0de0 100644 --- a/src/client/views/nodes/chatbot/tools/CreateCSVTool.ts +++ b/src/client/views/nodes/chatbot/tools/CreateCSVTool.ts @@ -28,7 +28,7 @@ export class CreateCSVTool extends BaseTool<{ csvData: string; filename: string this._handleCSVResult = handleCSVResult; } - async execute(args: { csvData: string; filename: string }): Promise { + async execute(args: { csvData: string; filename: string }): Promise { try { console.log('Creating CSV file:', args.filename, ' with data:', args.csvData); // Post the raw CSV data to the createCSV endpoint on the server diff --git a/src/client/views/nodes/chatbot/tools/CreateCollectionTool.ts b/src/client/views/nodes/chatbot/tools/CreateCollectionTool.ts index 573428179..1e479a62c 100644 --- a/src/client/views/nodes/chatbot/tools/CreateCollectionTool.ts +++ b/src/client/views/nodes/chatbot/tools/CreateCollectionTool.ts @@ -25,7 +25,7 @@ export class GetDocsContentTool extends BaseTool<{ title: string; document_ids: this._docView = docView; } - async execute(args: { title: string; document_ids: string[] }): Promise { + async execute(args: { title: string; document_ids: string[] }): Promise { // Note: Using eval() can be dangerous. Consider using a safer alternative. const docs = args.document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id))); const collection = Docs.Create.FreeformDocument(docs, { title: args.title }); diff --git a/src/client/views/nodes/chatbot/tools/DataAnalysisTool.ts b/src/client/views/nodes/chatbot/tools/DataAnalysisTool.ts index a12ee46e5..2e663fed1 100644 --- a/src/client/views/nodes/chatbot/tools/DataAnalysisTool.ts +++ b/src/client/views/nodes/chatbot/tools/DataAnalysisTool.ts @@ -33,7 +33,7 @@ export class DataAnalysisTool extends BaseTool<{ csv_file_name: string | string[ return file?.id; } - async execute(args: { csv_file_name: string | string[] }): Promise { + async execute(args: { csv_file_name: string | string[] }): Promise { const filenames = Array.isArray(args.csv_file_name) ? args.csv_file_name : [args.csv_file_name]; const results = []; diff --git a/src/client/views/nodes/chatbot/tools/GetDocsTool.ts b/src/client/views/nodes/chatbot/tools/GetDocsTool.ts index f970ca8ee..903f3f69c 100644 --- a/src/client/views/nodes/chatbot/tools/GetDocsTool.ts +++ b/src/client/views/nodes/chatbot/tools/GetDocsTool.ts @@ -18,7 +18,7 @@ export class GetDocsTool extends BaseTool<{ title: string; document_ids: string[ this._docView = docView; } - async execute(args: { title: string; document_ids: string[] }): Promise { + async execute(args: { title: string; document_ids: string[] }): Promise { // Note: Using eval() can be dangerous. Consider using a safer alternative. const docs = args.document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id))); const collection = Docs.Create.FreeformDocument(docs, { title: args.title }); diff --git a/src/client/views/nodes/chatbot/tools/NoTool.ts b/src/client/views/nodes/chatbot/tools/NoTool.ts index 1f0830a77..edd3160ec 100644 --- a/src/client/views/nodes/chatbot/tools/NoTool.ts +++ b/src/client/views/nodes/chatbot/tools/NoTool.ts @@ -1,7 +1,7 @@ // tools/NoTool.ts import { BaseTool } from './BaseTool'; -export class NoTool extends BaseTool<{}> { +export class NoTool extends BaseTool> { constructor() { super( 'no_tool', @@ -12,7 +12,8 @@ export class NoTool extends BaseTool<{}> { ); } - async execute(args: {}): Promise { + // eslint-disable-next-line @typescript-eslint/no-unused-vars + async execute(args: object): Promise { return [{ type: 'text', text: 'No tool used. Proceed with answering the question.' }]; } } diff --git a/src/client/views/nodes/chatbot/tools/RAGTool.ts b/src/client/views/nodes/chatbot/tools/RAGTool.ts index f4b7b42ea..4cc2f26ff 100644 --- a/src/client/views/nodes/chatbot/tools/RAGTool.ts +++ b/src/client/views/nodes/chatbot/tools/RAGTool.ts @@ -1,10 +1,7 @@ -import { BaseTool } from './BaseTool'; -import { Vectorstore } from '../vectorstore/Vectorstore'; -import { RAGChunk } from '../types/types'; -import * as fs from 'fs'; import { Networking } from '../../../../Network'; -import { file } from 'jszip'; -import { ChatCompletion, ChatCompletionContentPart, ChatCompletionMessageParam } from 'openai/resources'; +import { RAGChunk } from '../types/types'; +import { Vectorstore } from '../vectorstore/Vectorstore'; +import { BaseTool } from './BaseTool'; export class RAGTool extends BaseTool { constructor(private vectorstore: Vectorstore) { @@ -59,13 +56,13 @@ export class RAGTool extends BaseTool { ); } - async execute(args: { hypothetical_document_chunk: string }): Promise { + async execute(args: { hypothetical_document_chunk: string }): Promise { const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk); const formatted_chunks = await this.getFormattedChunks(relevantChunks); return formatted_chunks; } - async getFormattedChunks(relevantChunks: RAGChunk[]): Promise { + async getFormattedChunks(relevantChunks: RAGChunk[]): Promise { try { const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks }); diff --git a/src/client/views/nodes/chatbot/tools/SearchTool.ts b/src/client/views/nodes/chatbot/tools/SearchTool.ts index b926cbadc..3a4668422 100644 --- a/src/client/views/nodes/chatbot/tools/SearchTool.ts +++ b/src/client/views/nodes/chatbot/tools/SearchTool.ts @@ -1,7 +1,6 @@ -import { max } from 'lodash'; +import { v4 as uuidv4 } from 'uuid'; import { Networking } from '../../../../Network'; import { BaseTool } from './BaseTool'; -import { v4 as uuidv4 } from 'uuid'; export class SearchTool extends BaseTool<{ query: string | string[] }> { private _addLinkedUrlDoc: (url: string, id: string) => void; @@ -25,7 +24,7 @@ export class SearchTool extends BaseTool<{ query: string | string[] }> { this._max_results = max_results; } - async execute(args: { query: string | string[] }): Promise { + async execute(args: { query: string | string[] }): Promise { const queries = Array.isArray(args.query) ? args.query : [args.query]; const allResults = []; diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts index 2118218f6..1efb389b8 100644 --- a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts +++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts @@ -1,6 +1,6 @@ +import { v4 as uuidv4 } from 'uuid'; import { Networking } from '../../../../Network'; import { BaseTool } from './BaseTool'; -import { v4 as uuidv4 } from 'uuid'; export class WebsiteInfoScraperTool extends BaseTool<{ url: string | string[] }> { private _addLinkedUrlDoc: (url: string, id: string) => void; @@ -63,7 +63,7 @@ export class WebsiteInfoScraperTool extends BaseTool<{ url: string | string[] }> this._addLinkedUrlDoc = addLinkedUrlDoc; } - async execute(args: { url: string | string[] }): Promise { + async execute(args: { url: string | string[] }): Promise { const urls = Array.isArray(args.url) ? args.url : [args.url]; const results = []; @@ -74,6 +74,7 @@ export class WebsiteInfoScraperTool extends BaseTool<{ url: string | string[] }> this._addLinkedUrlDoc(url, id); results.push({ type: 'text', text: `\n${website_plain_text}\n\n` }); } catch (error) { + console.log(error); results.push({ type: 'text', text: `An error occurred while scraping the website: ${url}` }); } } diff --git a/src/client/views/nodes/chatbot/tools/WikipediaTool.ts b/src/client/views/nodes/chatbot/tools/WikipediaTool.ts index 143d91d80..692dff749 100644 --- a/src/client/views/nodes/chatbot/tools/WikipediaTool.ts +++ b/src/client/views/nodes/chatbot/tools/WikipediaTool.ts @@ -1,8 +1,6 @@ -import { title } from 'process'; +import { v4 as uuidv4 } from 'uuid'; import { Networking } from '../../../../Network'; import { BaseTool } from './BaseTool'; -import axios from 'axios'; -import { v4 as uuidv4 } from 'uuid'; export class WikipediaTool extends BaseTool<{ title: string }> { private _addLinkedUrlDoc: (url: string, id: string) => void; @@ -23,7 +21,7 @@ export class WikipediaTool extends BaseTool<{ title: string }> { this._addLinkedUrlDoc = addLinkedUrlDoc; } - async execute(args: { title: string }): Promise { + async execute(args: { title: string }): Promise { try { const { text } = await Networking.PostToServer('/getWikipediaSummary', { title: args.title }); const id = uuidv4(); @@ -31,6 +29,7 @@ export class WikipediaTool extends BaseTool<{ title: string }> { this._addLinkedUrlDoc(url, id); return [{ type: 'text', text: ` ${text} ` }]; } catch (error) { + console.log(error); return [{ type: 'text', text: 'An error occurred while fetching the article.' }]; } } diff --git a/src/client/views/nodes/chatbot/types/types.ts b/src/client/views/nodes/chatbot/types/types.ts index f5d14ad6a..2bc7f4952 100644 --- a/src/client/views/nodes/chatbot/types/types.ts +++ b/src/client/views/nodes/chatbot/types/types.ts @@ -52,15 +52,6 @@ export interface ProcessingInfo { content: string; } -export interface AssistantMessage { - role: ASSISTANT_ROLE; - content: MessageContent[]; - follow_up_questions?: string[]; - citations?: Citation[]; - processing_info: ProcessingInfo[]; - loop_summary?: string; -} - export interface MessageContent { index: number; type: TEXT_TYPE; @@ -75,6 +66,14 @@ export interface Citation { citation_id: string; url?: string; } +export interface AssistantMessage { + role: ASSISTANT_ROLE; + content: MessageContent[]; + follow_up_questions?: string[]; + citations?: Citation[]; + processing_info: ProcessingInfo[]; + loop_summary?: string; +} export interface RAGChunk { id: string; @@ -113,14 +112,14 @@ export interface AI_Document { type: string; } -export interface Tool = Record> { +export interface Tool = Record> { name: string; description: string; - parameters: Record; + parameters: Record; citationRules: string; briefSummary: string; - execute: (args: T) => Promise; - getActionRule: () => Record; + execute: (args: T) => Promise; + getActionRule: () => Record; } export interface AgentMessage { diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts index 07a2b73bc..9575277f7 100644 --- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts +++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts @@ -1,11 +1,11 @@ -import { Pinecone, Index, IndexList, PineconeRecord, RecordMetadata, QueryResponse } from '@pinecone-database/pinecone'; +import { Index, IndexList, Pinecone, PineconeRecord, QueryResponse, RecordMetadata } from '@pinecone-database/pinecone'; import { CohereClient } from 'cohere-ai'; import { EmbedResponse } from 'cohere-ai/api'; import dotenv from 'dotenv'; -import { RAGChunk, AI_Document, CHUNK_TYPE } from '../types/types'; import { Doc } from '../../../../../fields/Doc'; import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types'; import { Networking } from '../../../../Network'; +import { AI_Document, CHUNK_TYPE, RAGChunk } from '../types/types'; dotenv.config(); @@ -103,8 +103,8 @@ export class Vectorstore { const { jobId } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); // Poll the server for progress updates. - let inProgress: boolean = true; - let result: any = null; + const inProgress = true; + let result: (AI_Document & { doc_id: string }) | null = null; // bcz: is this the correct type?? while (inProgress) { // Polling interval for status updates. await new Promise(resolve => setTimeout(resolve, 2000)); @@ -127,6 +127,9 @@ export class Vectorstore { progressCallback(progress, step); } } + if (!result) { + throw new Error('no result received...'); // bcz: is this an Error? + } // Once completed, process the document and add it to the vectorstore. console.log('Document JSON:', result); @@ -175,7 +178,7 @@ export class Vectorstore { * Indexes the processed document by uploading the document's vector chunks to the Pinecone index. * @param document The processed document containing its chunks and metadata. */ - private async indexDocument(document: any) { + private async indexDocument(document: AI_Document) { console.log('Uploading vectors to content namespace...'); // Prepare Pinecone records for each chunk in the document. diff --git a/src/client/views/pdf/PDFViewer.tsx b/src/client/views/pdf/PDFViewer.tsx index 7543b3fb1..b5c69bff0 100644 --- a/src/client/views/pdf/PDFViewer.tsx +++ b/src/client/views/pdf/PDFViewer.tsx @@ -51,7 +51,7 @@ interface IViewerProps extends FieldViewProps { * Handles rendering and virtualization of the pdf */ @observer -export class PDFViewer extends ObservableReactComponent { +export class PDFViewer extends ObservableReactComponent { static _annotationStyle = addStyleSheet(); constructor(props: IViewerProps) { @@ -68,12 +68,12 @@ export class PDFViewer extends ObservableReactComponent { private _pdfViewer!: PDFJSViewer.PDFViewer; private _styleRule: number | undefined; // stylesheet rule for making hyperlinks clickable private _retries = 0; // number of times tried to create the PDF viewer - private _setPreviewCursor: undefined | ((x: number, y: number, drag: boolean, hide: boolean, doc: Opt) => void); + private _setPreviewCursor: undefined | ((x: number, y: number, drag: boolean, hide: boolean, doc: Opt) => void); private _marqueeref = React.createRef(); - private _annotationLayer: React.RefObject = React.createRef(); + private _annotationLayer: React.RefObject = React.createRef(); private _disposers: { [name: string]: IReactionDisposer } = {}; - private _viewer: React.RefObject = React.createRef(); - _mainCont: React.RefObject = React.createRef(); + private _viewer: React.RefObject = React.createRef(); + _mainCont: React.RefObject = React.createRef(); private _selectionText: string = ''; private _selectionContent: DocumentFragment | undefined; private _downX: number = 0; @@ -81,9 +81,9 @@ export class PDFViewer extends ObservableReactComponent { private _lastSearch = false; private _viewerIsSetup = false; private _ignoreScroll = false; - private _initialScroll: { loc: Opt; easeFunc: 'linear' | 'ease' | undefined } | undefined; + private _initialScroll: { loc: Opt; easeFunc: 'linear' | 'ease' | undefined } | undefined; private _forcedScroll = true; - _getAnchor: (savedAnnotations: Opt, addAsAnnotation: boolean) => Opt = () => undefined; + _getAnchor: (savedAnnotations: Opt>, addAsAnnotation: boolean) => Opt = () => undefined; selectionText = () => this._selectionText; selectionContent = () => this._selectionContent; @@ -179,7 +179,7 @@ export class PDFViewer extends ObservableReactComponent { // otherwise it will scroll smoothly. scrollFocus = (doc: Doc, scrollTop: number, options: FocusViewOptions) => { const mainCont = this._mainCont.current; - let focusSpeed: Opt; + let focusSpeed: Opt; if (doc !== this._props.Document && mainCont) { const windowHeight = this._props.PanelHeight() / (this._props.NativeDimScaling?.() || 1); const scrollTo = ClientUtils.scrollIntoView(scrollTop, doc[Height](), NumCast(this._props.layoutDoc._layout_scrollTop), windowHeight, windowHeight * 0.1, this._scrollHeight); @@ -394,30 +394,6 @@ export class PDFViewer extends ObservableReactComponent { } }; - // @action - // createMarquee = (coords: [x1: number, x2: number, y1: number, y2: number]): void => { - // // const hit = document.elementFromPoint(e.clientX, e.clientY); - // // bcz: Change. drag selecting requires that preventDefault is NOT called. This used to happen in DocumentView, - // // but that's changed, so this shouldn't be needed. - // // if (hit && hit.localName === "span" && this.annotationsActive(true)) { // drag selecting text stops propagation - // // e.button === 0 && e.stopPropagation(); - // // } - // // if alt+left click, drag and annotate - // this._downX = coords[0]; - // this._downY = coords[2]; - - // if ((this._props.Document._freeform_scale || 1) !== 1) return; - // this._props.select(false); - // MarqueeAnnotator.clearAnnotations(this._savedAnnotations); - // this.isAnnotating = true; - // this._textSelecting = false; - // // if textLayer is hit, then we select text instead of using a marquee so clear out the marquee. - // this._styleRule = addStyleSheetRule(PDFViewer._annotationStyle, 'htmlAnnotation', { 'pointer-events': 'none' }); - - // this._marqueeref.current?.onInitiateSelection([coords[0], coords[2]]); - // this._marqueeref.current?.onTerminateSelection(); - // }; - @action finishMarquee = (/* x?: number, y?: number */) => { this._getAnchor = AnchorMenu.Instance?.GetAnchor; @@ -495,7 +471,7 @@ export class PDFViewer extends ObservableReactComponent { // e.stopPropagation(); // bcz: not sure why this was here. We need to allow the DocumentView to get clicks to process doubleClicks }; - setPreviewCursor = (func?: (x: number, y: number, drag: boolean, hide: boolean, doc: Opt) => void) => { + setPreviewCursor = (func?: (x: number, y: number, drag: boolean, hide: boolean, doc: Opt) => void) => { this._setPreviewCursor = func; }; @@ -520,7 +496,6 @@ export class PDFViewer extends ObservableReactComponent { return (
{inlineAnnos.map(anno => ( - // eslint-disable-next-line react/jsx-props-no-spreading ))}
@@ -534,7 +509,7 @@ export class PDFViewer extends ObservableReactComponent { panelHeight = () => this._props.PanelHeight() / (this._props.NativeDimScaling?.() || 1); transparentFilter = () => [...this._props.childFilters(), ClientUtils.TransparentBackgroundFilter]; opaqueFilter = () => [...this._props.childFilters(), ClientUtils.noDragDocsFilter, ...(SnappingManager.CanEmbed && this._props.isContentActive() ? [] : [ClientUtils.OpaqueBackgroundFilter])]; - childStyleProvider = (doc: Doc | undefined, props: Opt, property: string) => { + childStyleProvider = (doc: Doc | undefined, props: Opt, property: string) => { if (doc instanceof Doc && property === StyleProp.PointerEvents) { if (this.inlineTextAnnotations.includes(doc) || this._props.isContentActive() === false) return 'none'; const isInk = doc.layout_isSvg && !props?.LayoutTemplateString; @@ -554,7 +529,6 @@ export class PDFViewer extends ObservableReactComponent { pointerEvents: Doc.ActiveTool !== InkTool.None ? 'all' : undefined, }}> ({ + response.data.items?.map(item => ({ url: item.link, snippet: item.snippet, })) || []; res.send({ results }); - } catch (error: any) { + } catch (error) { console.error('Error performing web search:', error); res.status(500).send({ error: 'Failed to perform web search', - details: error.message, + details: (error as { message: string }).message ?? error, // bcz: don't know wha tthe error type contains... }); } }, @@ -170,16 +166,16 @@ export default class AssistantManager extends ApiManager { * @param retries The number of retry attempts. * @param backoff Initial backoff time in milliseconds. */ - const fetchWithRetry = async (url: string, retries = 3, backoff = 300) => { + const fetchWithRetry = async (url: string, retries = 3, backoff = 300): Promise => { try { const response = await axiosInstance.get(url); return response.data; - } catch (error: any) { - if (retries > 0 && error.response?.status === 429) { + } catch (error) { + if (retries > 0 && (error as { response: { status: number } }).response?.status === 429) { // bcz: don't know the error type console.log(`Rate limited. Retrying in ${backoff}ms...`); await delay(backoff); return fetchWithRetry(url, retries - 1, backoff * 2); - } + } // prettier-ignore throw error; } }; @@ -199,11 +195,11 @@ export default class AssistantManager extends ApiManager { try { const data = await fetchWithRetry(url); res.send({ data }); - } catch (error: any) { + } catch (error) { console.error('Error fetching the URL:', error); res.status(500).send({ error: 'Failed to fetch the URL', - details: error.message, + details: (error as { message: string }).message ?? error, // bcz: don't know wha tthe error type contains... }); } }, @@ -241,11 +237,11 @@ export default class AssistantManager extends ApiManager { } else { res.status(500).send({ error: 'Failed to extract readable content' }); } - } catch (error: any) { + } catch (error) { console.error('Error scraping website:', error); res.status(500).send({ error: 'Failed to scrape website', - details: error.message, + details: (error as { message: string }).message ?? error, // bcz: don't know wha tthe error type contains... }); } }, @@ -267,15 +263,16 @@ export default class AssistantManager extends ApiManager { const jobId = uuid.v4(); // Spawn the Python process and track its progress/output + // eslint-disable-next-line no-use-before-define spawnPythonProcess(jobId, file_name, file_data); // Send the job ID back to the client for tracking res.send({ jobId }); - } catch (error: any) { + } catch (error) { console.error('Error initiating document creation:', error); res.status(500).send({ error: 'Failed to initiate document creation', - details: error.message, + details: (error as { message: string }).message ?? error, // bcz: don't know wha tthe error type contains... }); } }, @@ -307,13 +304,13 @@ export default class AssistantManager extends ApiManager { const { jobId } = req.params; // Get the job ID from the URL parameters // Check if the job result is available if (jobResults[jobId]) { - const result = jobResults[jobId]; + const result = jobResults[jobId] as AI_Document & { status: string }; // If the result contains image or table chunks, save the base64 data as image files if (result.chunks && Array.isArray(result.chunks)) { for (const chunk of result.chunks) { if (chunk.metadata && (chunk.metadata.type === 'image' || chunk.metadata.type === 'table')) { - let files_directory = '/files/chunk_images/'; + const files_directory = '/files/chunk_images/'; const directory = path.join(publicDirectory, files_directory); // Ensure the directory exists or create it @@ -338,7 +335,7 @@ export default class AssistantManager extends ApiManager { } } } - result['status'] = 'completed'; + result.status = 'completed'; } else { result.status = 'pending'; } @@ -429,11 +426,11 @@ export default class AssistantManager extends ApiManager { // Send the file URL and UUID back to the client res.send({ fileUrl, id: uuidv4 }); - } catch (error: any) { + } catch (error) { console.error('Error creating CSV file:', error); res.status(500).send({ error: 'Failed to create CSV file.', - details: error.message, + details: (error as { message: string }).message ?? error, // bcz: don't know what the error type contains... }); } }, @@ -446,59 +443,6 @@ function spawnPythonProcess(jobId: string, file_name: string, file_data: string) const requirementsPath = path.join(__dirname, '../chunker/requirements.txt'); const pythonScriptPath = path.join(__dirname, '../chunker/pdf_chunker.py'); - // Check if venv exists - if (!fs.existsSync(venvPath)) { - console.log('Virtual environment not found. Creating and setting up...'); - - // Create venv - const createVenvProcess = spawn('python', ['-m', 'venv', venvPath]); - - createVenvProcess.on('close', code => { - if (code !== 0) { - console.error(`Failed to create virtual environment. Exit code: ${code}`); - return; - } - - console.log('Virtual environment created. Installing requirements...'); - - // Determine the pip path based on the OS - const pipPath = process.platform === 'win32' ? path.join(venvPath, 'Scripts', 'pip.exe') : path.join(venvPath, 'bin', 'pip3'); // Try 'pip3' for Unix-like systems - - if (!fs.existsSync(pipPath)) { - console.error(`pip executable not found at ${pipPath}`); - return; - } - - // Install requirements - const installRequirementsProcess = spawn(pipPath, ['install', '-r', requirementsPath]); - - installRequirementsProcess.stdout.on('data', data => { - console.log(`pip stdout: ${data}`); - }); - - installRequirementsProcess.stderr.on('data', data => { - console.error(`pip stderr: ${data}`); - }); - - installRequirementsProcess.on('error', error => { - console.error(`Error starting pip process: ${error}`); - }); - - installRequirementsProcess.on('close', code => { - if (code !== 0) { - console.error(`Failed to install requirements. Exit code: ${code}`); - return; - } - - console.log('Requirements installed. Running Python script...'); - runPythonScript(); - }); - }); - } else { - console.log('Virtual environment found. Running Python script...'); - runPythonScript(); - } - function runPythonScript() { const pythonPath = process.platform === 'win32' ? path.join(venvPath, 'Scripts', 'python') : path.join(venvPath, 'bin', 'python3'); @@ -530,7 +474,7 @@ function spawnPythonProcess(jobId: string, file_name: string, file_data: string) }; } } catch (err) { - console.error('Progress log from Python:', line); + console.error('Progress log from Python:', line, err); } } }); @@ -551,4 +495,56 @@ function spawnPythonProcess(jobId: string, file_name: string, file_data: string) } }); } + // Check if venv exists + if (!fs.existsSync(venvPath)) { + console.log('Virtual environment not found. Creating and setting up...'); + + // Create venv + const createVenvProcess = spawn('python', ['-m', 'venv', venvPath]); + + createVenvProcess.on('close', code => { + if (code !== 0) { + console.error(`Failed to create virtual environment. Exit code: ${code}`); + return; + } + + console.log('Virtual environment created. Installing requirements...'); + + // Determine the pip path based on the OS + const pipPath = process.platform === 'win32' ? path.join(venvPath, 'Scripts', 'pip.exe') : path.join(venvPath, 'bin', 'pip3'); // Try 'pip3' for Unix-like systems + + if (!fs.existsSync(pipPath)) { + console.error(`pip executable not found at ${pipPath}`); + return; + } + + // Install requirements + const installRequirementsProcess = spawn(pipPath, ['install', '-r', requirementsPath]); + + installRequirementsProcess.stdout.on('data', data => { + console.log(`pip stdout: ${data}`); + }); + + installRequirementsProcess.stderr.on('data', data => { + console.error(`pip stderr: ${data}`); + }); + + installRequirementsProcess.on('error', error => { + console.error(`Error starting pip process: ${error}`); + }); + + installRequirementsProcess.on('close', closecode => { + if (closecode !== 0) { + console.error(`Failed to install requirements. Exit code: ${closecode}`); + return; + } + + console.log('Requirements installed. Running Python script...'); + runPythonScript(); + }); + }); + } else { + console.log('Virtual environment found. Running Python script...'); + runPythonScript(); + } } -- cgit v1.2.3-70-g09d2 From cd7e213e7e0b9b6606ae068c29412bb64cdf6f2d Mon Sep 17 00:00:00 2001 From: bobzel Date: Mon, 7 Oct 2024 10:43:48 -0400 Subject: comment cleanup --- .../views/collections/CollectionCardDeckView.tsx | 24 ++++++++++------------ 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'src') diff --git a/src/client/views/collections/CollectionCardDeckView.tsx b/src/client/views/collections/CollectionCardDeckView.tsx index a9ab9de26..0c2bcc580 100644 --- a/src/client/views/collections/CollectionCardDeckView.tsx +++ b/src/client/views/collections/CollectionCardDeckView.tsx @@ -411,15 +411,14 @@ export class CollectionCardView extends CollectionSubView() { translateOverflowX = (realIndex: number, calcRowCards: number) => (realIndex < this._maxRowCount ? 0 : (this._maxRowCount - calcRowCards) * (this.childPanelWidth() / 2)); /** - * Determines how far to translate a card in the y direction depending on its index, whether or not its being hovered, or if it's selected - * @param isHovered - * @param isActive - * @param realIndex - * @param amCards - * @param calcRowIndex - * @returns + * Determines how far to translate a card in the y direction depending on its index and if it's selected + * @param isActive whether the card is focused for interaction + * @param realIndex index of card from start of deck + * @param amCards ?? + * @param calcRowIndex index of card from start of row + * @returns Y translation of card */ - calculateTranslateY = (isHovered: boolean, isActive: boolean, realIndex: number, amCards: number, calcRowIndex: number) => { + calculateTranslateY = (isActive: boolean, realIndex: number, amCards: number, calcRowIndex: number) => { const rowHeight = (this._props.PanelHeight() * this.fitContentScale) / this.numRows; const rowIndex = Math.trunc(realIndex / this._maxRowCount); const rowToCenterShift = this.numRows / 2 - rowIndex; @@ -592,9 +591,8 @@ export class CollectionCardView extends CollectionSubView() { // Map sorted documents to their rendered components return this.sortedDocs.map((doc, index) => { - const realIndex = this.sortedDocs.indexOf(doc); - const calcRowIndex = this.overflowIndexCalc(realIndex); - const amCards = this.overflowAmCardsCalc(realIndex); + const calcRowIndex = this.overflowIndexCalc(index); + const amCards = this.overflowAmCardsCalc(index); const view = DocumentView.getDocumentView(doc, this.DocumentView?.()); const childScreenToLocal = this.childScreenToLocal(doc, index, calcRowIndex, !!view?.IsContentActive, amCards); @@ -617,8 +615,8 @@ export class CollectionCardView extends CollectionSubView() { style={{ width: this.childPanelWidth(), height: 'max-content', - transform: `translateY(${this.calculateTranslateY(this._hoveredNodeIndex === index, !!view?.IsContentActive, realIndex, amCards, calcRowIndex)}px) - translateX(calc(${view?.IsContentActive ? translateIfSelected() : 0}% + ${this.translateOverflowX(realIndex, amCards)}px)) + transform: `translateY(${this.calculateTranslateY(!!view?.IsContentActive, index, amCards, calcRowIndex)}px) + translateX(calc(${view?.IsContentActive ? translateIfSelected() : 0}% + ${this.translateOverflowX(index, amCards)}px)) rotate(${!view?.IsContentActive ? this.rotate(amCards, calcRowIndex) : 0}deg) scale(${view?.IsContentActive ? `${Math.min(hscale, vscale) * 100}%` : this._hoveredNodeIndex === index ? 1.1 : 1})`, }} // prettier-ignore -- cgit v1.2.3-70-g09d2 From 34fdaf6f1405c59dbf18d03cc489a50715a7e4e9 Mon Sep 17 00:00:00 2001 From: bobzel Date: Mon, 7 Oct 2024 13:02:15 -0400 Subject: lint fixes --- src/client/util/UndoManager.ts | 5 ++- src/client/views/GestureOverlay.tsx | 3 +- src/client/views/ScriptingRepl.tsx | 3 +- .../views/collections/CollectionDockingView.tsx | 8 ++--- .../CollectionFreeFormInfoState.tsx | 1 - src/client/views/nodes/DataVizBox/DataVizBox.tsx | 1 - src/fields/Doc.ts | 5 ++- src/fields/RichTextUtils.ts | 6 ---- src/server/websocket.ts | 42 +++++++++++----------- 9 files changed, 31 insertions(+), 43 deletions(-) (limited to 'src') diff --git a/src/client/util/UndoManager.ts b/src/client/util/UndoManager.ts index ce0e7768b..07d3bb708 100644 --- a/src/client/util/UndoManager.ts +++ b/src/client/util/UndoManager.ts @@ -43,7 +43,6 @@ export function undoable(fn: (...args: any[]) => T, batchName: string): (...a return function (...fargs) { const batch = UndoManager.StartBatch(batchName); try { - // eslint-disable-next-line prefer-rest-params return fn.apply(undefined, fargs); } finally { batch.end(); @@ -51,9 +50,9 @@ export function undoable(fn: (...args: any[]) => T, batchName: string): (...a }; } -// eslint-disable-next-line no-redeclare, @typescript-eslint/no-explicit-any +// eslint-disable-next-line @typescript-eslint/no-explicit-any export function undoBatch(target: any, key: string | symbol, descriptor?: TypedPropertyDescriptor): any; -// eslint-disable-next-line no-redeclare, @typescript-eslint/no-explicit-any +// eslint-disable-next-line @typescript-eslint/no-explicit-any export function undoBatch(target: any, key?: string | symbol, descriptor?: TypedPropertyDescriptor<(...args: any[]) => unknown>): any { if (!key) { return function (...fargs: unknown[]) { diff --git a/src/client/views/GestureOverlay.tsx b/src/client/views/GestureOverlay.tsx index 5fddaec9a..afeecaa63 100644 --- a/src/client/views/GestureOverlay.tsx +++ b/src/client/views/GestureOverlay.tsx @@ -70,7 +70,6 @@ export class GestureOverlay extends ObservableReactComponent ({ X: pt.x, Y: pt.y }))); if (intersectRect(scribbleBounds, strokeBounds)) { diff --git a/src/client/views/ScriptingRepl.tsx b/src/client/views/ScriptingRepl.tsx index 2de867746..8ab91a6b5 100644 --- a/src/client/views/ScriptingRepl.tsx +++ b/src/client/views/ScriptingRepl.tsx @@ -1,4 +1,3 @@ -/* eslint-disable react/no-array-index-key */ import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'; import { action, makeObservable, observable } from 'mobx'; import { observer } from 'mobx-react'; @@ -182,7 +181,7 @@ export class ScriptingRepl extends ObservableReactComponent { this.maybeScrollToBottom(); return; } - const result = undoable(() => script.run({}, e => this.commands.push({ command: this.commandString, result: e as string })), 'run:' + this.commandString)(); + const result = undoable(() => script.run({}, err => this.commands.push({ command: this.commandString, result: err as string })), 'run:' + this.commandString)(); if (result.success) { this.commands.push({ command: this.commandString, result: result.result }); this.commandsHistory.push(this.commandString); diff --git a/src/client/views/collections/CollectionDockingView.tsx b/src/client/views/collections/CollectionDockingView.tsx index d1304b8f4..e1786d2c9 100644 --- a/src/client/views/collections/CollectionDockingView.tsx +++ b/src/client/views/collections/CollectionDockingView.tsx @@ -31,17 +31,17 @@ import { ScriptingRepl } from '../ScriptingRepl'; import { UndoStack } from '../UndoStack'; import './CollectionDockingView.scss'; import { CollectionSubView, SubCollectionViewProps } from './CollectionSubView'; -import { TabHTMLElement } from './TabDocView'; +import { TabDocView, TabHTMLElement } from './TabDocView'; @observer export class CollectionDockingView extends CollectionSubView() { - static tabClass: unknown = null; + static tabClass?: typeof TabDocView; /** * Initialize by assigning the add split method to DocumentView and by * configuring golden layout to render its documents using the specified React component * @param ele - typically would be set to TabDocView */ - public static Init(ele: unknown) { + public static Init(ele: typeof TabDocView) { this.tabClass = ele; DocumentView.addSplit = CollectionDockingView.AddSplit; } @@ -544,7 +544,7 @@ export class CollectionDockingView extends CollectionSubView() { tabCreated = (tab: { contentItem: { element: HTMLElement[] } }) => { this.tabMap.add(tab); // InitTab is added to the tab's HTMLElement in TabDocView - const tabdocviewContent = tab.contentItem.element[0]?.firstChild?.firstChild as unknown as TabHTMLElement; + const tabdocviewContent = tab.contentItem.element[0]?.firstChild?.firstChild as TabHTMLElement; tabdocviewContent?.InitTab?.(tab); // have to explicitly initialize tabs that reuse contents from previous tabs (ie, when dragging a tab around a new tab is created for the old content) }; diff --git a/src/client/views/collections/collectionFreeForm/CollectionFreeFormInfoState.tsx b/src/client/views/collections/collectionFreeForm/CollectionFreeFormInfoState.tsx index c17371151..51add85a8 100644 --- a/src/client/views/collections/collectionFreeForm/CollectionFreeFormInfoState.tsx +++ b/src/client/views/collections/collectionFreeForm/CollectionFreeFormInfoState.tsx @@ -46,7 +46,6 @@ export function InfoState( gif?: string, entryFunc?: () => unknown ) { - // eslint-disable-next-line new-cap return new infoState(msg, arcs, gif, entryFunc); } diff --git a/src/client/views/nodes/DataVizBox/DataVizBox.tsx b/src/client/views/nodes/DataVizBox/DataVizBox.tsx index df6e74d85..3dd568fda 100644 --- a/src/client/views/nodes/DataVizBox/DataVizBox.tsx +++ b/src/client/views/nodes/DataVizBox/DataVizBox.tsx @@ -1,4 +1,3 @@ -/* eslint-disable react/jsx-props-no-spreading */ import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'; import { Checkbox } from '@mui/material'; import { Colors, Toggle, ToggleType, Type } from 'browndash-components'; diff --git a/src/fields/Doc.ts b/src/fields/Doc.ts index b3a17de8f..60cf8b321 100644 --- a/src/fields/Doc.ts +++ b/src/fields/Doc.ts @@ -97,9 +97,8 @@ export namespace Field { }); return script; } - export function toString(fieldIn: unknown) { - const field = fieldIn as FieldType; - if (typeof field === 'string' || typeof field === 'number' || typeof field === 'boolean') return String(field); + export function toString(field: FieldResult | FieldType | undefined) { + if (field instanceof Promise || typeof field === 'string' || typeof field === 'number' || typeof field === 'boolean') return String(field); return field?.[ToString]?.() || ''; } export function IsField(field: unknown): field is FieldType; diff --git a/src/fields/RichTextUtils.ts b/src/fields/RichTextUtils.ts index b3534dde7..8c073c87b 100644 --- a/src/fields/RichTextUtils.ts +++ b/src/fields/RichTextUtils.ts @@ -1,4 +1,3 @@ -/* eslint-disable @typescript-eslint/no-namespace */ /* eslint-disable no-await-in-loop */ /* eslint-disable no-use-before-define */ import { AssertionError } from 'assert'; @@ -175,7 +174,6 @@ export namespace RichTextUtils { const indentMap = new Map(); let globalOffset = 0; const nodes: Node[] = []; - // eslint-disable-next-line no-restricted-syntax for (const element of structured) { if (Array.isArray(element)) { lists.push(element); @@ -374,11 +372,9 @@ export namespace RichTextUtils { const marksToStyle = async (nodes: (Node | null)[]): Promise => { const requests: docsV1.Schema$Request[] = []; let position = 1; - // eslint-disable-next-line no-restricted-syntax for (const node of nodes) { if (node === null) { position += 2; - // eslint-disable-next-line no-continue continue; } const { marks, attrs, nodeSize } = node; @@ -390,9 +386,7 @@ export namespace RichTextUtils { }; let mark: Mark; const markMap = BuildMarkMap(marks); - // eslint-disable-next-line no-restricted-syntax for (const markName of Object.keys(schema.marks)) { - // eslint-disable-next-line no-cond-assign if (ignored.includes(markName) || !(mark = markMap[markName])) { continue; } diff --git a/src/server/websocket.ts b/src/server/websocket.ts index 1e25a8a27..effe94219 100644 --- a/src/server/websocket.ts +++ b/src/server/websocket.ts @@ -61,27 +61,6 @@ export namespace WebSocket { Database.Instance.getDocuments(ids, callback); } - const pendingOps = new Map(); - - function dispatchNextOp(id: string): unknown { - const next = pendingOps.get(id)?.shift(); - // eslint-disable-next-line @typescript-eslint/no-unused-vars - const nextOp = (res: boolean) => dispatchNextOp(id); - if (next) { - const { diff, socket } = next; - // ideally, we'd call the Database update method for all actions, but for now we handle list insertion/removal on our own - switch (diff.diff.$addToSet ? 'add' : diff.diff.$remFromSet ? 'rem' : 'set') { - case 'add': return GetRefFieldLocal(id, (result) => addToListField(socket, diff, result, nextOp)); // prettier-ignore - case 'rem': return GetRefFieldLocal(id, (result) => remFromListField(socket, diff, result, nextOp)); // prettier-ignore - default: return Database.Instance.update(id, diff.diff, - () => nextOp(socket.broadcast.emit(MessageStore.UpdateField.Message, diff)), - false - ); // prettier-ignore - } - } - return !pendingOps.get(id)?.length && pendingOps.delete(id); - } - function addToListField(socket: Socket, diff: Diff, listDoc: serializedDoctype | undefined, cb: (res: boolean) => void): void { const $addToSet = diff.diff.$addToSet as serializedFieldsType; const updatefield = Array.from(Object.keys($addToSet ?? {}))[0]; @@ -181,6 +160,27 @@ export namespace WebSocket { } else cb(false); } + const pendingOps = new Map(); + + function dispatchNextOp(id: string): unknown { + const next = pendingOps.get(id)?.shift(); + // eslint-disable-next-line @typescript-eslint/no-unused-vars + const nextOp = (res: boolean) => dispatchNextOp(id); + if (next) { + const { diff, socket } = next; + // ideally, we'd call the Database update method for all actions, but for now we handle list insertion/removal on our own + switch (diff.diff.$addToSet ? 'add' : diff.diff.$remFromSet ? 'rem' : 'set') { + case 'add': return GetRefFieldLocal(id, (result) => addToListField(socket, diff, result, nextOp)); // prettier-ignore + case 'rem': return GetRefFieldLocal(id, (result) => remFromListField(socket, diff, result, nextOp)); // prettier-ignore + default: return Database.Instance.update(id, diff.diff, + () => nextOp(socket.broadcast.emit(MessageStore.UpdateField.Message, diff)), + false + ); // prettier-ignore + } + } + return !pendingOps.get(id)?.length && pendingOps.delete(id); + } + function UpdateField(socket: Socket, diff: Diff) { const curUser = socketMap.get(socket); if (curUser) { -- cgit v1.2.3-70-g09d2 From 3137f422a004adef82124989280b40852a033afe Mon Sep 17 00:00:00 2001 From: bobzel Date: Mon, 7 Oct 2024 13:38:54 -0400 Subject: fixed card 'pile' function. lint fixes. --- src/client/views/global/globalScripts.ts | 27 +++++----------------- .../views/nodes/RecordingBox/ProgressBar.tsx | 2 -- src/fields/Doc.ts | 7 +----- 3 files changed, 7 insertions(+), 29 deletions(-) (limited to 'src') diff --git a/src/client/views/global/globalScripts.ts b/src/client/views/global/globalScripts.ts index 423a2d6ef..2b8908899 100644 --- a/src/client/views/global/globalScripts.ts +++ b/src/client/views/global/globalScripts.ts @@ -4,6 +4,7 @@ import { runInAction } from 'mobx'; import { Doc, DocListCast, Opt, StrListCast } from '../../../fields/Doc'; import { DocData } from '../../../fields/DocSymbols'; import { InkTool } from '../../../fields/InkField'; +import { List } from '../../../fields/List'; import { BoolCast, Cast, NumCast, StrCast } from '../../../fields/Types'; import { WebField } from '../../../fields/URLField'; import { Gestures } from '../../../pen-gestures/GestureTypes'; @@ -16,7 +17,6 @@ import { UndoManager, undoable } from '../../util/UndoManager'; import { GestureOverlay } from '../GestureOverlay'; import { InkTranscription } from '../InkTranscription'; import { InkingStroke } from '../InkingStroke'; -import { MainView } from '../MainView'; import { PropertiesView } from '../PropertiesView'; import { CollectionFreeFormView } from '../collections/collectionFreeForm'; import { CollectionFreeFormDocumentView } from '../nodes/CollectionFreeFormDocumentView'; @@ -40,6 +40,7 @@ import { VideoBox } from '../nodes/VideoBox'; import { WebBox } from '../nodes/WebBox'; import { RichTextMenu } from '../nodes/formattedText/RichTextMenu'; import { GPTPopup, GPTPopupMode } from '../pdf/GPTPopup/GPTPopup'; +import { OpenWhere } from '../nodes/OpenWhere'; // eslint-disable-next-line prefer-arrow-callback ScriptingGlobals.add(function IsNoneSelected() { @@ -240,31 +241,15 @@ ScriptingGlobals.add(function showFreeform( ['pile', { checkResult: (doc: Doc) => doc._type_collection == CollectionViewType.Freeform, setDoc: (doc: Doc, dv: DocumentView) => { - doc._type_collection = CollectionViewType.Freeform; const newCol = Docs.Create.CarouselDocument(DocListCast(doc[Doc.LayoutFieldKey(doc)]), { + title: doc.title + "_carousel", _width: 250, _height: 200, _layout_fitWidth: false, _layout_autoHeight: true, + childFilters: new List(StrListCast(doc.childFilters)) }); - - - const iconMap: { [key: number]: string } = { - 0: 'star', - 1: 'heart', - 2: 'cloud', - 3: 'bolt' - }; - - for (let i=0; i<4; i++){ - if (isAttrFiltered(iconMap[i])){ - newCol[iconMap[i]] = true - } - } - - newCol && dv.ComponentView?.addDocument?.(newCol); - DocumentView.showDocument(newCol, { willZoomCentered: true }) - + dv._props.addDocTab?.(newCol, OpenWhere.addRight); }, }], ]); @@ -300,7 +285,7 @@ ScriptingGlobals.add(function setTagFilter(tag: string, added: boolean, checkRes added ? Doc.setDocFilter(selected, 'tags', tag, 'check') : Doc.setDocFilter(selected, 'tags', tag, 'remove'); } else { SnappingManager.PropertiesWidth < 5 && SnappingManager.SetPropertiesWidth(0); - SnappingManager.SetPropertiesWidth(MainView.Instance.propertiesWidth() < 15 ? 250 : 0); + SnappingManager.SetPropertiesWidth(SnappingManager.PropertiesWidth < 15 ? 250 : 0); PropertiesView.Instance?.CloseAll(); runInAction(() => (PropertiesView.Instance.openFilters = SnappingManager.PropertiesWidth > 5)); } diff --git a/src/client/views/nodes/RecordingBox/ProgressBar.tsx b/src/client/views/nodes/RecordingBox/ProgressBar.tsx index 62798bc2f..7e91df7ab 100644 --- a/src/client/views/nodes/RecordingBox/ProgressBar.tsx +++ b/src/client/views/nodes/RecordingBox/ProgressBar.tsx @@ -1,5 +1,3 @@ -/* eslint-disable react/no-array-index-key */ -/* eslint-disable react/require-default-props */ import * as React from 'react'; import { useEffect, useState, useRef } from 'react'; import './ProgressBar.scss'; diff --git a/src/fields/Doc.ts b/src/fields/Doc.ts index 60cf8b321..4d256e8f2 100644 --- a/src/fields/Doc.ts +++ b/src/fields/Doc.ts @@ -110,7 +110,7 @@ export namespace Field { export function Copy(field: unknown) { return field instanceof ObjectField ? ObjectField.MakeCopy(field) : (field as FieldType); } - UndoManager.SetFieldPrinter(toString); + UndoManager.SetFieldPrinter((val: unknown) => (IsField(val) ? toString(val) : '')); } export type FieldType = number | string | boolean | ObjectField | RefField; export type Opt = T | undefined; @@ -336,7 +336,6 @@ export class Doc extends RefField { if (!id || forceSave) { DocServer.CreateDocField(docProxy); } - // eslint-disable-next-line no-constructor-return return docProxy; // need to return the proxy from the constructor so that all our added fields will get called } @@ -463,8 +462,6 @@ export class Doc extends RefField { }); } } - -// eslint-disable-next-line no-redeclare export namespace Doc { export let SelectOnLoad: Doc | undefined; export function SetSelectOnLoad(doc: Doc | undefined) { @@ -660,7 +657,6 @@ export namespace Doc { if (reversed) list.splice(0, 0, doc); else list.push(doc); } else { - // eslint-disable-next-line no-lonely-if if (reversed) list.splice(before ? list.length - ind + 1 : list.length - ind, 0, doc); else list.splice(before ? ind : ind + 1, 0, doc); } @@ -1192,7 +1188,6 @@ export namespace Doc { return Cast(Doc.UserDoc().myLinkDatabase, Doc, null); } export function SetUserDoc(doc: Doc) { - // eslint-disable-next-line no-return-assign return (manager._user_doc = doc); } -- cgit v1.2.3-70-g09d2 From 827dd6aba9eefcd3beddbc246134cd6d6492fcf8 Mon Sep 17 00:00:00 2001 From: bobzel Date: Mon, 7 Oct 2024 13:49:08 -0400 Subject: fixed import cycle by making addHotKey a prop --- src/client/views/FilterPanel.tsx | 36 ++++-------------------------------- src/client/views/MainView.tsx | 35 ++++++++++++++++++++++++++++++++--- src/client/views/PropertiesView.tsx | 3 ++- 3 files changed, 38 insertions(+), 36 deletions(-) (limited to 'src') diff --git a/src/client/views/FilterPanel.tsx b/src/client/views/FilterPanel.tsx index e34b66963..11425e477 100644 --- a/src/client/views/FilterPanel.tsx +++ b/src/client/views/FilterPanel.tsx @@ -1,3 +1,4 @@ +import { IconProp } from '@fortawesome/fontawesome-svg-core'; import { FontAwesomeIcon } from '@fortawesome/react-fontawesome'; import { Tooltip } from '@mui/material'; import { action, computed, makeObservable, observable, ObservableMap } from 'mobx'; @@ -12,18 +13,15 @@ import { DocData } from '../../fields/DocSymbols'; import { Id } from '../../fields/FieldSymbols'; import { List } from '../../fields/List'; import { RichTextField } from '../../fields/RichTextField'; -import { DocCast, StrCast } from '../../fields/Types'; -import { Button, CurrentUserUtils } from '../util/CurrentUserUtils'; +import { StrCast } from '../../fields/Types'; import { SearchUtil } from '../util/SearchUtil'; import { SnappingManager } from '../util/SnappingManager'; import { undoable } from '../util/UndoManager'; import { FieldsDropdown } from './FieldsDropdown'; import './FilterPanel.scss'; import { DocumentView } from './nodes/DocumentView'; -import { ButtonType } from './nodes/FontIconBox/FontIconBox'; import { Handle, Tick, TooltipRail, Track } from './nodes/SliderBox-components'; import { ObservableReactComponent } from './ObservableReactComponent'; -import { IconProp } from '@fortawesome/fontawesome-svg-core'; interface HotKeyButtonProps { hotKey: Doc; @@ -159,6 +157,7 @@ const HotKeyIconButton: React.FC = observer(({ hotKey /*, sel interface filterProps { Document: Doc; + addHotKey: (hotKey: string) => void; } @observer @@ -356,33 +355,6 @@ export class FilterPanel extends ObservableReactComponent { return nonNumbers / facetValues.length > 0.1 ? facetValues.sort() : facetValues.sort((n1: string, n2: string) => Number(n1) - Number(n2)); }; - /** - * Allows users to add a filter hotkey to the properties panel. Will also update the multitoggle at the top menu and the - * icontags tht are displayed on the documents themselves - * @param hotKey tite of the new hotkey - */ - addHotkey = (hotKey: string) => { - const buttons = DocCast(Doc.UserDoc().myContextMenuBtns); - const filter = DocCast(buttons.Filter); - const title = hotKey.startsWith('#') ? hotKey.substring(1) : hotKey; - - const newKey: Button = { - title, - icon: 'question', - toolTip: `Click to toggle the ${title}'s group's visibility`, - btnType: ButtonType.ToggleButton, - expertMode: false, - toolType: '#' + title, - funcs: {}, - scripts: { onClick: '{ return handleTags(this.toolType, _readOnly_);}' }, - }; - - const newBtn = CurrentUserUtils.setupContextMenuBtn(newKey, filter); - newBtn.isSystem = newBtn[DocData].isSystem = undefined; - - Doc.AddToFilterHotKeys(newBtn); - }; - /** * Renders the newly formed hotkey icon buttons * @returns the buttons to be rendered @@ -472,7 +444,7 @@ export class FilterPanel extends ObservableReactComponent {
- +
diff --git a/src/client/views/MainView.tsx b/src/client/views/MainView.tsx index 31d7e82a6..c61cdea54 100644 --- a/src/client/views/MainView.tsx +++ b/src/client/views/MainView.tsx @@ -20,6 +20,7 @@ import { CollectionViewType, DocumentType } from '../documents/DocumentTypes'; import { Docs } from '../documents/Documents'; import { CalendarManager } from '../util/CalendarManager'; import { CaptureManager } from '../util/CaptureManager'; +import { Button, CurrentUserUtils } from '../util/CurrentUserUtils'; import { DocumentManager } from '../util/DocumentManager'; import { DragManager } from '../util/DragManager'; import { dropActionType } from '../util/DropActionTypes'; @@ -41,6 +42,7 @@ import { DashboardView } from './DashboardView'; import { DictationOverlay } from './DictationOverlay'; import { DocumentDecorations } from './DocumentDecorations'; import { GestureOverlay } from './GestureOverlay'; +import { InkTranscription } from './InkTranscription'; import { LightboxView } from './LightboxView'; import './MainView.scss'; import { ObservableReactComponent } from './ObservableReactComponent'; @@ -60,6 +62,7 @@ import { LinkMenu } from './linking/LinkMenu'; import { SchemaCSVPopUp } from './nodes/DataVizBox/SchemaCSVPopUp'; import { DocButtonState } from './nodes/DocumentLinksButton'; import { DocumentView, DocumentViewInternal } from './nodes/DocumentView'; +import { ButtonType } from './nodes/FontIconBox/FontIconBox'; import { ImageEditorData as ImageEditor } from './nodes/ImageBox'; import { LinkDescriptionPopup } from './nodes/LinkDescriptionPopup'; import { LinkDocPreview, LinkInfo } from './nodes/LinkDocPreview'; @@ -73,9 +76,8 @@ import GenerativeFill from './nodes/generativeFill/GenerativeFill'; import { PresBox } from './nodes/trails'; import { AnchorMenu } from './pdf/AnchorMenu'; import { GPTPopup } from './pdf/GPTPopup/GPTPopup'; -import { TopBar } from './topbar/TopBar'; import { SmartDrawHandler } from './smartdraw/SmartDrawHandler'; -import { InkTranscription } from './InkTranscription'; +import { TopBar } from './topbar/TopBar'; // eslint-disable-next-line @typescript-eslint/no-require-imports const { LEFT_MENU_WIDTH, TOPBAR_HEIGHT } = require('./global/globalCssVariables.module.scss'); // prettier-ignore @@ -850,6 +852,33 @@ export class MainView extends ObservableReactComponent { return true; }; + /** + * Allows users to add a filter hotkey to the properties panel. Will also update the multitoggle at the top menu and the + * icontags tht are displayed on the documents themselves + * @param hotKey tite of the new hotkey + */ + addHotKey = (hotKey: string) => { + const buttons = DocCast(Doc.UserDoc().myContextMenuBtns); + const filter = DocCast(buttons.Filter); + const title = hotKey.startsWith('#') ? hotKey.substring(1) : hotKey; + + const newKey: Button = { + title, + icon: 'question', + toolTip: `Click to toggle the ${title}'s group's visibility`, + btnType: ButtonType.ToggleButton, + expertMode: false, + toolType: '#' + title, + funcs: {}, + scripts: { onClick: '{ return handleTags(this.toolType, _readOnly_);}' }, + }; + + const newBtn = CurrentUserUtils.setupContextMenuBtn(newKey, filter); + newBtn.isSystem = newBtn[DocData].isSystem = undefined; + + Doc.AddToFilterHotKeys(newBtn); + }; + @computed get mainInnerContent() { const leftMenuFlyoutWidth = this._leftMenuFlyoutWidth + this.leftMenuWidth(); const width = this.propertiesWidth() + leftMenuFlyoutWidth; @@ -878,7 +907,7 @@ export class MainView extends ObservableReactComponent { )}
- +
diff --git a/src/client/views/PropertiesView.tsx b/src/client/views/PropertiesView.tsx index d0c47875f..715f079d8 100644 --- a/src/client/views/PropertiesView.tsx +++ b/src/client/views/PropertiesView.tsx @@ -50,6 +50,7 @@ interface PropertiesViewProps { height: number; styleProvider?: StyleProviderFuncType; addDocTab: (doc: Doc, where: OpenWhere) => boolean; + addHotKey: (hotKey: string) => void; } @observer @@ -1277,7 +1278,7 @@ export class PropertiesView extends ObservableReactComponent { this.openFilters = bool; })} onDoubleClick={this.CloseAll}>
- +
); // prettier-ignore -- cgit v1.2.3-70-g09d2 From 09b722789afa5d7c5edfc701978f17e4745113bc Mon Sep 17 00:00:00 2001 From: bobzel Date: Mon, 7 Oct 2024 14:25:35 -0400 Subject: more lint --- src/client/documents/DocUtils.ts | 2 -- src/client/views/TagsView.tsx | 4 +--- 2 files changed, 1 insertion(+), 5 deletions(-) (limited to 'src') diff --git a/src/client/documents/DocUtils.ts b/src/client/documents/DocUtils.ts index 0699ea09f..1130a9ae8 100644 --- a/src/client/documents/DocUtils.ts +++ b/src/client/documents/DocUtils.ts @@ -1,5 +1,3 @@ -/* eslint-disable prefer-destructuring */ -/* eslint-disable default-param-last */ /* eslint-disable no-use-before-define */ import { IconProp } from '@fortawesome/fontawesome-svg-core'; import { saveAs } from 'file-saver'; diff --git a/src/client/views/TagsView.tsx b/src/client/views/TagsView.tsx index 9858e7b61..072cae3af 100644 --- a/src/client/views/TagsView.tsx +++ b/src/client/views/TagsView.tsx @@ -84,9 +84,7 @@ export class TagItem extends ObservableReactComponent { */ public static allDocsWithTag = (tag: string) => DocListCast(TagItem.findTagCollectionDoc(tag)?.[DocData].docs); - public static docHasTag = (doc: Doc, tag: string) => { - return StrListCast(doc?.tags).includes(tag); - }; + public static docHasTag = (doc: Doc, tag: string) => StrListCast(doc?.tags).includes(tag); /** * Adds a tag to the metadata of this document and adds the Doc to the corresponding tag collection Doc (or creates it) * @param tag tag string -- cgit v1.2.3-70-g09d2 From b9fda86731a01ebfc3f21ebdd4eaf43a1c9eccc6 Mon Sep 17 00:00:00 2001 From: bobzel Date: Tue, 8 Oct 2024 23:07:52 -0400 Subject: yet another fix to cardView to make animations smooth - need to specifically target hidingDecorations without going using IsResizing as a hack --- src/client/util/SnappingManager.ts | 3 +++ src/client/views/DocumentDecorations.tsx | 9 +++------ src/client/views/collections/CollectionCardDeckView.tsx | 5 +++-- .../collections/collectionFreeForm/CollectionFreeFormView.tsx | 2 +- 4 files changed, 10 insertions(+), 9 deletions(-) (limited to 'src') diff --git a/src/client/util/SnappingManager.ts b/src/client/util/SnappingManager.ts index 95ccc7735..5f6c7d9ac 100644 --- a/src/client/util/SnappingManager.ts +++ b/src/client/util/SnappingManager.ts @@ -28,6 +28,7 @@ export class SnappingManager { @observable _lastBtnId: string = ''; @observable _propertyWid: number = 0; @observable _printToConsole: boolean = false; + @observable _hideDecorations: boolean = false; private constructor() { SnappingManager._manager = this; @@ -59,6 +60,7 @@ export class SnappingManager { public static get LastPressedBtn() { return this.Instance._lastBtnId; } // prettier-ignore public static get PropertiesWidth(){ return this.Instance._propertyWid; } // prettier-ignore public static get PrintToConsole() { return this.Instance._printToConsole; } // prettier-ignore + public static get HideDecorations(){ return this.Instance._hideDecorations; } // prettier-ignore public static SetLongPress = (press: boolean) => runInAction(() => {this.Instance._longPress = press}); // prettier-ignore public static SetShiftKey = (down: boolean) => runInAction(() => {this.Instance._shiftKey = down}); // prettier-ignore @@ -75,6 +77,7 @@ export class SnappingManager { public static SetLastPressedBtn = (id:string) =>runInAction(() => {this.Instance._lastBtnId = id}); // prettier-ignore public static SetPropertiesWidth= (wid:number) =>runInAction(() => {this.Instance._propertyWid = wid}); // prettier-ignore public static SetPrintToConsole = (state:boolean) =>runInAction(() => {this.Instance._printToConsole = state}); // prettier-ignore + public static SetHideDecorations= (state:boolean) =>runInAction(() => {this.Instance._hideDecorations = state}); // prettier-ignore public static userColor: string | undefined; public static userVariantColor: string | undefined; diff --git a/src/client/views/DocumentDecorations.tsx b/src/client/views/DocumentDecorations.tsx index 1c0d51e17..62f2de776 100644 --- a/src/client/views/DocumentDecorations.tsx +++ b/src/client/views/DocumentDecorations.tsx @@ -63,7 +63,6 @@ export class DocumentDecorations extends ObservableReactComponent dv.ContentDiv!), dragData, e.x, e.y, { - dragComplete: action(() => { - this._hidden = false; - }), + dragComplete: () => SnappingManager.SetHideDecorations(false), hideSource: true, } ); @@ -653,7 +650,7 @@ export class DocumentDecorations extends ObservableReactComponent { this._editingTitle = false; diff --git a/src/client/views/collections/CollectionCardDeckView.tsx b/src/client/views/collections/CollectionCardDeckView.tsx index 0c2bcc580..92c69c3cf 100644 --- a/src/client/views/collections/CollectionCardDeckView.tsx +++ b/src/client/views/collections/CollectionCardDeckView.tsx @@ -22,6 +22,7 @@ import { GPTPopup, GPTPopupMode } from '../pdf/GPTPopup/GPTPopup'; import './CollectionCardDeckView.scss'; import { CollectionSubView, SubCollectionViewProps } from './CollectionSubView'; import { computedFn } from 'mobx-utils'; +import { DocumentDecorations } from '../DocumentDecorations'; enum cardSortings { Time = 'time', @@ -566,10 +567,10 @@ export class CollectionCardView extends CollectionSubView() { } else { // otherwise, turn off documentDecorations becase we're in a selection transition and want to avoid artifacts. // Turn them back on when the animation has completed and the render and backend structures are in synch - SnappingManager.SetIsResizing(doc[Id]); + SnappingManager.SetHideDecorations(true); setTimeout( action(() => { - SnappingManager.SetIsResizing(undefined); + SnappingManager.SetHideDecorations(false); this._forceChildXf++; }), 1000 diff --git a/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx b/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx index 0cc63d632..d8678eebc 100644 --- a/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx +++ b/src/client/views/collections/collectionFreeForm/CollectionFreeFormView.tsx @@ -138,7 +138,7 @@ export class CollectionFreeFormView extends CollectionSubView Date: Wed, 9 Oct 2024 21:56:45 -0400 Subject: lint fixes --- src/client/apis/gpt/GPT.ts | 2 +- src/client/views/nodes/ComparisonBox.tsx | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'src') diff --git a/src/client/apis/gpt/GPT.ts b/src/client/apis/gpt/GPT.ts index 209e8e87e..858957ac2 100644 --- a/src/client/apis/gpt/GPT.ts +++ b/src/client/apis/gpt/GPT.ts @@ -110,7 +110,7 @@ let lastResp = ''; * @param inputText Text to process * @returns AI Output */ -const gptAPICall = async (inputTextIn: string, callType: GPTCallType, prompt?: any, dontCache?: boolean) => { +const gptAPICall = async (inputTextIn: string, callType: GPTCallType, prompt?: string, dontCache?: boolean) => { const inputText = [GPTCallType.SUMMARY, GPTCallType.FLASHCARD, GPTCallType.QUIZ].includes(callType) ? inputTextIn + '.' : inputTextIn; const opts: GPTCallOpts = callTypeMap[callType]; if (lastCall === inputText && dontCache !== true) return lastResp; diff --git a/src/client/views/nodes/ComparisonBox.tsx b/src/client/views/nodes/ComparisonBox.tsx index 39a2e3a31..c1446a77a 100644 --- a/src/client/views/nodes/ComparisonBox.tsx +++ b/src/client/views/nodes/ComparisonBox.tsx @@ -274,7 +274,7 @@ export class ComparisonBox extends ViewBoxAnnotatableComponent() return; } this._outputValue = res; - } catch (err) { + } catch { console.error('GPT call failed'); } }; @@ -301,7 +301,6 @@ export class ComparisonBox extends ViewBoxAnnotatableComponent() return targetDoc || layoutString ? ( <> () const dataSplit = StrCast(this.dataDoc.data).split('Answer'); const newDoc = Docs.Create.TextDocument(dataSplit[1]); // if there is text from the pdf ai cards, put the question on the front side. - // eslint-disable-next-line prefer-destructuring newDoc[DocData].text = dataSplit[1]; this.addDoc(newDoc, this.fieldKey + '_0'); } @@ -350,7 +348,6 @@ export class ComparisonBox extends ViewBoxAnnotatableComponent() const dataSplit = StrCast(this.dataDoc.data).split('Answer'); const newDoc = Docs.Create.TextDocument(dataSplit[0]); // if there is text from the pdf ai cards, put the answer on the alternate side. - // eslint-disable-next-line prefer-destructuring newDoc[DocData].text = dataSplit[0]; this.addDoc(newDoc, this.fieldKey + '_1'); } -- cgit v1.2.3-70-g09d2 From 75b98f184260567c0dabb54dd8ef22a8e2510512 Mon Sep 17 00:00:00 2001 From: bobzel Date: Wed, 9 Oct 2024 22:11:32 -0400 Subject: from last --- src/client/util/CurrentUserUtils.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/client/util/CurrentUserUtils.ts b/src/client/util/CurrentUserUtils.ts index 29f756f6a..7670827f8 100644 --- a/src/client/util/CurrentUserUtils.ts +++ b/src/client/util/CurrentUserUtils.ts @@ -390,7 +390,7 @@ pie title Minerals in my tap water {key: "Header", creator: headerTemplate, opts: { _width: 300, _height: 120, _header_pointerEvents: "all", _header_height: 50, _header_fontSize: 9,_layout_autoHeightMargins: 50, _layout_autoHeight: true, treeView_HideUnrendered: true}}, {key: "ViewSlide", creator: slideView, opts: { _width: 400, _height: 300, _xMargin: 3, _yMargin: 3,}}, {key: "Trail", creator: Docs.Create.PresDocument, opts: { _width: 400, _height: 30, _type_collection: CollectionViewType.Stacking, _layout_dontCenter:'xy', dropAction: dropActionType.embed, treeView_HideTitle: true, _layout_fitWidth:true, layout_boxShadow: "0 0" }}, - {key: "Tab", creator: opts => Docs.Create.FreeformDocument([], opts), opts: { _width: 500, _height: 800, _layout_fitWidth: true, _freeform_backgroundGrid: true, }}, + {key: "Tab", creator: opts => Docs.Create.FreeformDocument([], opts), opts: { _width: 500, _height: 800, _layout_fitWidth: true, _freeform_backgroundGrid: true, }}, {key: "Slide", creator: opts => Docs.Create.TreeDocument([], opts), opts: { _width: 300, _height: 200, _type_collection: CollectionViewType.Tree, treeView_HasOverlay: true, _text_fontSize: "20px", _layout_autoHeight: true, dropAction:dropActionType.move, treeView_Type: TreeViewType.outline, -- cgit v1.2.3-70-g09d2 From d347fc59feefd91a796012892da57511787bb6d0 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 10 Oct 2024 11:39:19 -0400 Subject: added new file header comments and fixed some error handling --- .../views/nodes/chatbot/agentsystem/prompts.ts | 9 ++++++++- .../nodes/chatbot/chatboxcomponents/ChatBox.tsx | 9 +++++++++ .../chatbot/chatboxcomponents/MessageComponent.tsx | 9 +++++++++ .../chatbot/chatboxcomponents/ProgressBar.tsx | 8 ++++++++ .../nodes/chatbot/response_parsers/AnswerParser.ts | 8 ++++++++ .../response_parsers/StreamedAnswerParser.ts | 8 ++++++++ src/client/views/nodes/chatbot/tools/BaseTool.ts | 8 ++++++++ .../views/nodes/chatbot/vectorstore/Vectorstore.ts | 10 +++++++++- src/server/ApiManagers/AssistantManager.ts | 22 ++++++++++------------ 9 files changed, 77 insertions(+), 14 deletions(-) (limited to 'src') diff --git a/src/client/views/nodes/chatbot/agentsystem/prompts.ts b/src/client/views/nodes/chatbot/agentsystem/prompts.ts index 01c30d444..f5aec3130 100644 --- a/src/client/views/nodes/chatbot/agentsystem/prompts.ts +++ b/src/client/views/nodes/chatbot/agentsystem/prompts.ts @@ -1,4 +1,11 @@ -// prompts.ts +/** + * @file prompts.ts + * @description This file contains functions that generate prompts for various AI tasks, including + * generating system messages for structured AI assistant interactions and summarizing document chunks. + * It defines prompt structures to ensure the AI follows specific guidelines for response formatting, + * tool usage, and citation rules, with a rigid structure in mind for tasks such as answering user queries + * and summarizing content from provided text chunks. + */ import { Tool } from '../types/types'; diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx index 613cb7078..44c231c87 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -1,3 +1,12 @@ +/** + * @file ChatBox.tsx + * @description This file defines the ChatBox component, which manages user interactions with + * an AI assistant. It handles document uploads, chat history, message input, and integration + * with the OpenAI API. The ChatBox is MobX-observable and tracks the progress of tasks such as + * document analysis and AI-driven summaries. It also maintains real-time chat functionality + * with support for follow-up questions and citation management. + */ + import dotenv from 'dotenv'; import { ObservableSet, action, computed, makeObservable, observable, observe, reaction, runInAction } from 'mobx'; import { observer } from 'mobx-react'; diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/MessageComponent.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/MessageComponent.tsx index 801becb64..d48f46963 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/MessageComponent.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/MessageComponent.tsx @@ -1,3 +1,12 @@ +/** + * @file MessageComponentBox.tsx + * @description This file defines the MessageComponentBox component, which renders the content + * of an AssistantMessage. It supports rendering various message types such as grounded text, + * normal text, and follow-up questions. The component uses React and MobX for state management + * and includes functionality for handling citation and follow-up actions, as well as displaying + * agent processing information. + */ + import React, { useState } from 'react'; import { observer } from 'mobx-react'; import { AssistantMessage, Citation, MessageContent, PROCESSING_TYPE, ProcessingInfo, TEXT_TYPE } from '../types/types'; diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.tsx index b9fd08742..240862f8b 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.tsx @@ -1,3 +1,11 @@ +/** + * @file ProgressBar.tsx + * @description This file defines the ProgressBar component, which displays a loading spinner + * to indicate progress during ongoing tasks or processing. The animation consists of two + * bouncing elements that create a pulsating effect, providing a visual cue for active progress. + * The component is styled using the accompanying `ProgressBar.scss` for smooth animation. + */ + import React from 'react'; import './ProgressBar.scss'; diff --git a/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts b/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts index 1ac753790..ed78cc7cb 100644 --- a/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts +++ b/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts @@ -1,3 +1,11 @@ +/** + * @file AnswerParser.ts + * @description This file defines the AnswerParser class, which processes structured XML-like responses + * from the AI system, parsing grounded text, normal text, citations, follow-up questions, and loop summaries. + * The parser converts the XML response into an AssistantMessage format, extracting key information like + * citations and processing steps for further use in the assistant's workflow. + */ + import { v4 as uuid } from 'uuid'; import { ASSISTANT_ROLE, AssistantMessage, Citation, ProcessingInfo, TEXT_TYPE, getChunkType } from '../types/types'; diff --git a/src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts b/src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts index 4149f3da9..dbd568faa 100644 --- a/src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts +++ b/src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts @@ -1,3 +1,11 @@ +/** + * @file StreamedAnswerParser.ts + * @description This file defines the StreamedAnswerParser class, which parses incoming character streams + * to extract grounded or normal text based on the tags found in the input stream. It maintains state + * between grounded text and normal text sections, handling buffered input and ensuring proper text formatting + * for AI assistant responses. + */ + enum ParserState { Outside, InGroundedText, diff --git a/src/client/views/nodes/chatbot/tools/BaseTool.ts b/src/client/views/nodes/chatbot/tools/BaseTool.ts index 10780617b..a77f567a5 100644 --- a/src/client/views/nodes/chatbot/tools/BaseTool.ts +++ b/src/client/views/nodes/chatbot/tools/BaseTool.ts @@ -1,3 +1,11 @@ +/** + * @file BaseTool.ts + * @description This file defines the abstract BaseTool class, which serves as a blueprint + * for tool implementations in the AI assistant system. Each tool has a name, description, + * parameters, and citation rules. The BaseTool class provides a structure for executing actions + * and retrieving action rules for use within the assistant's workflow. + */ + import { Tool } from '../types/types'; export abstract class BaseTool = Record> implements Tool { diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts index 9575277f7..f96f55997 100644 --- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts +++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts @@ -1,3 +1,10 @@ +/** + * @file Vectorstore.ts + * @description This file defines the Vectorstore class, which integrates with Pinecone for vector-based document indexing and Cohere for text embeddings. + * It handles tasks such as AI document management, document chunking, and retrieval of relevant document sections based on user queries. + * The class supports adding documents to the vectorstore, managing document status, and querying Pinecone for document chunks matching a query. + */ + import { Index, IndexList, Pinecone, PineconeRecord, QueryResponse, RecordMetadata } from '@pinecone-database/pinecone'; import { CohereClient } from 'cohere-ai'; import { EmbedResponse } from 'cohere-ai/api'; @@ -128,7 +135,8 @@ export class Vectorstore { } } if (!result) { - throw new Error('no result received...'); // bcz: is this an Error? + console.error('Error processing document.'); + return; } // Once completed, process the document and add it to the vectorstore. diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index b4ebb1eae..b7d4191ca 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -1,3 +1,13 @@ +/** + * @file AssistantManager.ts + * @description This file defines the AssistantManager class, responsible for managing various + * API routes related to the Assistant functionality. It provides features such as file handling, + * web scraping, and integration with third-party APIs like OpenAI and Google Custom Search. + * It also handles job tracking and progress reporting for tasks like document creation and web scraping. + * Utility functions for path manipulation and file operations are included, along with + * a mechanism for handling retry logic during API calls. + */ + import { Readability } from '@mozilla/readability'; import axios from 'axios'; import { spawn } from 'child_process'; @@ -76,12 +86,6 @@ export default class AssistantManager extends ApiManager { * @param register The registration method to register routes and handlers. */ protected initialize(register: Registration): void { - // Initialize OpenAI API with client key - const openai = new OpenAI({ // bcz: is this needed? variable is never used... - apiKey: process.env._CLIENT_OPENAI_KEY, - dangerouslyAllowBrowser: true, - }); // prettier-ignore - // Initialize Google Custom Search API const customsearch = google.customsearch('v1'); @@ -107,7 +111,6 @@ export default class AssistantManager extends ApiManager { console.error('Error retrieving Wikipedia summary:', error); res.status(500).send({ error: 'Error retrieving article summary from Wikipedia.', - details: (error as { message: string }).message ?? error, // bcz: don't know what the error type contains... }); } }, @@ -140,7 +143,6 @@ export default class AssistantManager extends ApiManager { console.error('Error performing web search:', error); res.status(500).send({ error: 'Failed to perform web search', - details: (error as { message: string }).message ?? error, // bcz: don't know wha tthe error type contains... }); } }, @@ -199,7 +201,6 @@ export default class AssistantManager extends ApiManager { console.error('Error fetching the URL:', error); res.status(500).send({ error: 'Failed to fetch the URL', - details: (error as { message: string }).message ?? error, // bcz: don't know wha tthe error type contains... }); } }, @@ -241,7 +242,6 @@ export default class AssistantManager extends ApiManager { console.error('Error scraping website:', error); res.status(500).send({ error: 'Failed to scrape website', - details: (error as { message: string }).message ?? error, // bcz: don't know wha tthe error type contains... }); } }, @@ -272,7 +272,6 @@ export default class AssistantManager extends ApiManager { console.error('Error initiating document creation:', error); res.status(500).send({ error: 'Failed to initiate document creation', - details: (error as { message: string }).message ?? error, // bcz: don't know wha tthe error type contains... }); } }, @@ -430,7 +429,6 @@ export default class AssistantManager extends ApiManager { console.error('Error creating CSV file:', error); res.status(500).send({ error: 'Failed to create CSV file.', - details: (error as { message: string }).message ?? error, // bcz: don't know what the error type contains... }); } }, -- cgit v1.2.3-70-g09d2