diff options
-rw-r--r-- | src/client/views/nodes/ChatBox/Agent.ts | 32 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/ChatBox.tsx | 111 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/tools/RAGTool.ts | 12 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/types.ts | 3 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts | 41 | ||||
-rw-r--r-- | src/client/views/pdf/PDFViewer.tsx | 24 |
6 files changed, 128 insertions, 95 deletions
diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index fd3c6e5e8..210d3c804 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -8,6 +8,7 @@ import { RAGTool } from './tools/RAGTool'; import { Vectorstore } from './vectorstore/VectorstoreUpload'; import { ChatCompletionAssistantMessageParam, ChatCompletionMessageParam } from 'openai/resources'; import dotenv from 'dotenv'; +import { ChatBox } from './ChatBox'; dotenv.config(); export class Agent { @@ -15,41 +16,24 @@ export class Agent { private tools: Record<string, Tool<any>>; private messages: AgentMessage[] = []; private interMessages: AgentMessage[] = []; - private summaries: string; + private vectorstore: Vectorstore; + private history: () => string; - constructor(private vectorstore: Vectorstore) { + constructor(_vectorstore: Vectorstore, summaries: () => string, _history: () => string) { this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); - this.summaries = this.vectorstore ? this.vectorstore.getSummaries() : 'No documents available.'; + this.vectorstore = _vectorstore; + this.history = _history; this.tools = { wikipedia: new WikipediaTool(), calculate: new CalculateTool(), - rag: new RAGTool(vectorstore, this.summaries), + rag: new RAGTool(this.vectorstore, summaries), }; } - private refreshSummaries(): void { - this.summaries = this.vectorstore ? this.vectorstore.getSummaries() : 'No documents available.'; - this.tools.rag = new RAGTool(this.vectorstore, this.summaries); - } - - private formatChatHistory(): string { - let history = '<chat_history>\n'; - for (const message of this.messages) { - if (message.role === 'user') { - history += `<user>${message.content}</user>\n`; - } else if (message.role === 'assistant') { - history += `<assistant>${message.content}</assistant>\n`; - } - } - history += '</chat_history>'; - return history; - } - async askAgent(question: string, maxTurns: number = 8): Promise<string> { - this.refreshSummaries(); console.log(`Starting query: ${question}`); this.messages.push({ role: 'user', content: question }); - const chatHistory = this.formatChatHistory(); + const chatHistory = this.history(); console.log(`Chat history: ${chatHistory}`); const systemPrompt = getReactPrompt(Object.values(this.tools), chatHistory); console.log(`System prompt: ${systemPrompt}`); diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index 4d1cd38a0..a47e9a95b 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -1,4 +1,4 @@ -import { action, computed, makeObservable, observable, observe, reaction, runInAction } from 'mobx'; +import { action, computed, makeObservable, observable, observe, reaction, runInAction, ObservableSet } from 'mobx'; import { observer } from 'mobx-react'; import OpenAI, { ClientOptions } from 'openai'; import * as React from 'react'; @@ -25,6 +25,7 @@ import { DocumentManager } from '../../../util/DocumentManager'; import { UUID } from 'bson'; import { v4 as uuidv4 } from 'uuid'; import { aS } from '@fullcalendar/core/internal-common'; +import { computeRect } from '@fullcalendar/core/internal'; dotenv.config(); @@ -34,10 +35,10 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { @observable.deep current_message: AssistantMessage | undefined = undefined; @observable isLoading: boolean = false; - @observable isInitializing: boolean = true; + @observable isInitializing: boolean = false; @observable expandedScratchpadIndex: number | null = null; @observable inputValue: string = ''; - @observable private linked_docs_to_add: Doc[] = []; + @observable private linked_docs_to_add: ObservableSet<Doc> = observable.set(); private openai: OpenAI; private vectorstore_id: string; private documents: AI_Document[] = []; @@ -55,13 +56,19 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { this.history = [{ role: ASSISTANT_ROLE.ASSISTANT, text_content: 'Welcome to the Document Analyser Assistant! Link a document or ask questions to get started.' }]; this.openai = this.initializeOpenAI(); if (StrCast(this.dataDoc.vectorstore_id) == '') { + console.log('new_id'); this.vectorstore_id = uuidv4(); this.dataDoc.vectorstore_id = this.vectorstore_id; } else { this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id); } this.vectorstore = new Vectorstore(this.vectorstore_id); - this.agent = new Agent(this.vectorstore); // Initialize the Agent + this.agent = new Agent( + this.vectorstore, + () => this.summaries, + () => this.formattedHistory + ); + reaction( () => this.history.map((msg: AssistantMessage) => ({ role: msg.role, text_content: msg.text_content, follow_up_questions: msg.follow_up_questions, citations: msg.citations })), serializableHistory => { @@ -70,8 +77,11 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { ); } - addDocsToVectorstore = async (linkedDocs: Doc[]) => { - await this.vectorstore.addAIDocs(linkedDocs); + @action + addDocToVectorstore = async (newLinkedDoc: Doc) => { + this.isInitializing = true; + await this.vectorstore.addAIDoc(newLinkedDoc); + this.isInitializing = false; }; // @action @@ -152,20 +162,18 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { const currentLinkedDocs: Doc[] = this.linkedDocs; const chunk_id = citation.chunk_id; for (let doc of currentLinkedDocs) { - const doc_chunks: Chunk[] = JSON.parse(StrCast(doc.ai_document)).chunks; - const chunk_file_name = doc_chunks.find(chunk => chunk.id === chunk_id)?.metadata.file_path; + const doc_chunk_ids: string[] = JSON.parse(StrCast(doc.chunk_ids)); + if (!doc_chunk_ids.includes(chunk_id)) continue; const doc_url = CsvCast(doc.data, PDFCast(doc.data)).url.pathname; - console.log('URL: ' + doc_url + ' Citation URL: ' + chunk_file_name); + console.log('URL: ' + doc_url); //const ai_field_id = doc[this.Document[Id] + '_ai_field_id']; - if (chunk_file_name == doc_url) { - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { - console.log(doc.data); - //look at context path for each docview and choose the doc view that has as - //its parent the same collection view the chatbox is in - const first_view = Array.from(doc[DocViews])[0]; - first_view.ComponentView?.search?.(citation.direct_text); - }); - } + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { + console.log(doc.data); + //look at context path for each docview and choose the doc view that has as + //its parent the same collection view the chatbox is in + const first_view = Array.from(doc[DocViews])[0]; + first_view.ComponentView?.search?.(citation.direct_text); + }); } // You can implement additional functionality here, such as showing a modal with the full citation content }; @@ -234,32 +242,29 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { return linkedDocs; }, - linked => this.linked_docs_to_add.push(...linked.filter(linkedDoc => !this.linked_docs_to_add.includes(linkedDoc))) + linked => linked.forEach(doc => this.linked_docs_to_add.add(doc)) ); - observe( - // right now this skips during initialization which is necessary because it would be blank - // However, it will upload the same link twice when it is - this.linked_docs_to_add, - change => { - // observe pushes/splices on a user link DB 'data' field (should only happen for local changes) - switch (change.type as any) { - case 'splice': - if ((change as any).addedCount > 0) { - // maybe check here if its already in the urls datadoc array so doesn't add twice - console.log((change as any).added as Doc[]); - this.addDocsToVectorstore((change as any).added as Doc[]); - } - // (change as any).removed.forEach((link: any) => remLinkFromDoc(toRealField(link))); - break; - case 'update': // let oldValue = change.oldValue; - default: - } - }, - true - ); + observe(this.linked_docs_to_add, change => { + if (change.type === 'add') { + this.addDocToVectorstore(change.newValue); + } else if (change.type === 'delete') { + console.log('Deleted docs: ', change.oldValue); + } + }); } + // case 'splice': + // if ((change as any).addedCount > 0) { + // // maybe check here if its already in the urls datadoc array so doesn't add twice + // console.log((change as any).added as Doc[]); + // this.addDocsToVectorstore((change as any).added as Doc[]); + // } + // // (change as any).removed.forEach((link: any) => remLinkFromDoc(toRealField(link))); + // break; + // case 'update': // let oldValue = change.oldValue; + // default: + @computed get linkedDocs() { //return (CollectionFreeFormDocumentView.from(this._props.DocumentView?.())?._props.parent as CollectionFreeFormView)?.childDocs.filter(doc => doc != this.Document) ?? []; @@ -269,6 +274,32 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { .filter(d => d); } + @computed + get summaries(): string { + return ( + LinkManager.Instance.getAllRelatedLinks(this.Document) + .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) + .map(d => DocCast(d?.annotationOn, d)) + .filter(d => d) + .map((doc, index) => `${index + 1}) ${doc.summary}`) + .join('\n') + '\n' + ); + } + + @computed + get formattedHistory(): string { + let history = '<chat_history>\n'; + for (const message of this.history) { + history += `<${message.role}>${message.text_content}</${message.role}>\n`; + } + history += '</chat_history>'; + return history; + } + + retrieveSummaries(): string { + return this.summaries; + } + @action handleFollowUpClick = (question: string) => { console.log('Follow-up question clicked:', question); diff --git a/src/client/views/nodes/ChatBox/tools/RAGTool.ts b/src/client/views/nodes/ChatBox/tools/RAGTool.ts index 36e4bc3ce..90f7bebfe 100644 --- a/src/client/views/nodes/ChatBox/tools/RAGTool.ts +++ b/src/client/views/nodes/ChatBox/tools/RAGTool.ts @@ -6,7 +6,7 @@ import * as fs from 'fs'; export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { constructor( private vectorstore: Vectorstore, - summaries: string + summaries: () => string ) { super( 'rag', @@ -46,18 +46,10 @@ export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> { !!!IMPORTANT Use the RAG tool ANYTIME the question may potentially (even if you are not sure) relate to one of the user's documents. Here are the summaries of the user's documents: - ${summaries}` + ${summaries()}` ); } - changeSummaries(summaries: string) { - this.briefSummary = `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a set of document chunks (either images or text) that can be used to provide a grounded response based on user documents. - -!!!IMPORTANT Use the RAG tool ANYTIME the question may potentially (even if you are not sure) relate to one of the user's documents. -Here are the summaries of the user's documents: -${summaries}`; - } - async execute(args: { hypothetical_document_chunk: string }): Promise<any> { const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk); return this.getFormattedChunks(relevantChunks); diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts index a0d295e92..e510837c8 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/ChatBox/types.ts @@ -1,3 +1,6 @@ +import { Doc } from '../../../../fields/Doc'; +import { StrCast } from '../../../../fields/Types'; + export enum ASSISTANT_ROLE { USER = 'user', ASSISTANT = 'assistant', diff --git a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts index 64b89225c..b47e276e7 100644 --- a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts +++ b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts @@ -55,28 +55,17 @@ export class Vectorstore { this.index = this.pinecone.Index(this.indexName); } - async addAIDocs(visible_docs: Doc[]) { - console.log('All Docs:', visible_docs); - visible_docs?.forEach(async doc => { - await this.addAIDoc(doc); - }); - return; - } - async addAIDoc(doc: Doc) { console.log('Adding AI Document:', doc); - console.log('AI Document1:', doc[DocData].ai_document); - console.log('AI Document2:', doc.ai_document); - const ai_document_string: string = StrCast(doc.ai_document); - if (ai_document_string !== undefined && ai_document_string !== null && ai_document_string !== '' && ai_document_string !== ' ' && ai_document_string !== '{}') { - if (ai_document_string === 'IN PROGRESS') { + const ai_document_status: string = StrCast(doc.ai_document_status); + if (ai_document_status !== undefined && ai_document_status !== null && ai_document_status !== '' && ai_document_status !== ' ' && ai_document_status !== '{}') { + if (ai_document_status === 'IN PROGRESS') { console.log('Already in progress.'); return; } - this.documents.push(convertToAIDocument(JSON.parse(StrCast(doc.ai_document)))); console.log(`Document already added: ${doc.file_name}`); } else { - doc.ai_document = 'IN PROGRESS'; + doc.ai_document_status = 'PROGRESS'; console.log(doc); console.log(PDFCast(doc.data)?.url?.pathname); console.log(CsvCast(doc.data)?.url?.pathname); @@ -89,15 +78,25 @@ export class Vectorstore { this.documents.push(ai_document); await this.indexDocument(ai_document); console.log(`Document added: ${ai_document.file_name}`); - doc.ai_document = JSON.stringify(document_json); + doc.summary = ai_document.summary; + doc.ai_purpose = ai_document.purpose; + if (doc.vectorstore_id === undefined || doc.vectorstore_id === null || doc.vectorstore_id === '' || doc.vectorstore_id === '[]') { + doc.vectorstore_id = JSON.stringify([this.id]); + } else { + doc.vectorstore_id = JSON.stringify(JSON.parse(StrCast(doc.vectorstore_id)).concat([this.id])); + } + if (doc.chunk_ids === undefined || doc.chunk_ids === null || doc.chunk_ids === '' || doc.chunk_ids === '[]') { + doc.chunk_ids = JSON.stringify([]); + } + ai_document.chunks.forEach(chunk => { + console.log(doc.chunk_ids); + doc.chunk_ids = JSON.stringify(JSON.parse(StrCast(doc.chunk_ids)).concat([chunk.id])); + }); + doc.ai_document_status = 'COMPLETED'; } } } - getSummaries(): string { - return this.documents.map((doc, index) => `${index + 1}) ${doc.summary}`).join('\n') + '\n'; - } - private async indexDocument(document: AI_Document) { console.log('Uploading vectors to content namespace...'); const pineconeRecords: PineconeRecord<RecordMetadata>[] = document.chunks.map( @@ -105,7 +104,7 @@ export class Vectorstore { ({ id: chunk.id, values: chunk.values, - metadata: { ...chunk.metadata, vectorestore_id: this.id } as RecordMetadata, + metadata: { ...chunk.metadata, vectorstore_id: this.id } as RecordMetadata, }) as PineconeRecord ); await this.index.upsert(pineconeRecords); diff --git a/src/client/views/pdf/PDFViewer.tsx b/src/client/views/pdf/PDFViewer.tsx index 6c1617c38..2d6ed57d5 100644 --- a/src/client/views/pdf/PDFViewer.tsx +++ b/src/client/views/pdf/PDFViewer.tsx @@ -406,6 +406,30 @@ export class PDFViewer extends ObservableReactComponent<IViewerProps> { } }; + // @action + // createMarquee = (coords: [x1: number, x2: number, y1: number, y2: number]): void => { + // // const hit = document.elementFromPoint(e.clientX, e.clientY); + // // bcz: Change. drag selecting requires that preventDefault is NOT called. This used to happen in DocumentView, + // // but that's changed, so this shouldn't be needed. + // // if (hit && hit.localName === "span" && this.annotationsActive(true)) { // drag selecting text stops propagation + // // e.button === 0 && e.stopPropagation(); + // // } + // // if alt+left click, drag and annotate + // this._downX = coords[0]; + // this._downY = coords[2]; + + // if ((this._props.Document._freeform_scale || 1) !== 1) return; + // this._props.select(false); + // MarqueeAnnotator.clearAnnotations(this._savedAnnotations); + // this.isAnnotating = true; + // this._textSelecting = false; + // // if textLayer is hit, then we select text instead of using a marquee so clear out the marquee. + // this._styleRule = addStyleSheetRule(PDFViewer._annotationStyle, 'htmlAnnotation', { 'pointer-events': 'none' }); + + // this._marqueeref.current?.onInitiateSelection([coords[0], coords[2]]); + // this._marqueeref.current?.onTerminateSelection(); + // }; + @action finishMarquee = (/* x?: number, y?: number */) => { this._getAnchor = AnchorMenu.Instance?.GetAnchor; |