diff options
author | A.J. Shulman <Shulman.aj@gmail.com> | 2024-07-09 13:55:03 -0400 |
---|---|---|
committer | A.J. Shulman <Shulman.aj@gmail.com> | 2024-07-09 13:55:03 -0400 |
commit | c789d3d41a68c89e75fdfc12b1b05377ceef32d1 (patch) | |
tree | 1079016e962a4f0ece1dd02aff92e07c6c2826ab /src | |
parent | a578f43335b0009927df4c341be3aee4f74be6d9 (diff) |
starting to improve vectorstore
Diffstat (limited to 'src')
-rw-r--r-- | src/client/views/nodes/ChatBox/ChatBox.tsx | 304 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/tools.ts | 26 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/types.ts | 82 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts (renamed from src/client/apis/vectorstore/VectorstoreUpload.ts) | 83 | ||||
-rw-r--r-- | src/server/ApiManagers/AssistantManager.ts | 16 | ||||
-rw-r--r-- | src/server/RouteManager.ts | 4 |
6 files changed, 221 insertions, 294 deletions
diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index b986c7393..9f4e6f07e 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -19,91 +19,82 @@ import { ViewBoxAnnotatableComponent } from '../../DocComponent'; import { FieldView, FieldViewProps } from '../FieldView'; import './ChatBox.scss'; import MessageComponent from './MessageComponent'; -import { ANNOTATION_LINK_TYPE, ASSISTANT_ROLE, AssistantMessage, DOWNLOAD_TYPE } from './types'; +import { ASSISTANT_ROLE, AssistantMessage, AI_Document, convertToAIDocument } from './types'; import { Annotation } from 'mobx/dist/internal'; import { FormEvent } from 'react'; +import { url } from 'inspector'; +import { Vectorstore } from './vectorstore/VectorstoreUpload'; +import { DocumentView } from '../DocumentView'; +import { CollectionFreeFormDocumentView } from '../CollectionFreeFormDocumentView'; +import { CollectionFreeFormView } from '../../collections/collectionFreeForm'; @observer export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { - @observable modalStatus = false; - @observable currentFile = { url: '' }; @observable history: AssistantMessage[] = []; @observable.deep current_message: AssistantMessage | undefined = undefined; @observable isLoading: boolean = false; @observable isInitializing: boolean = true; - @observable expandedLogIndex: number | null = null; + @observable expandedScratchpadIndex: number | null = null; @observable linked_docs_to_add: Doc[] = []; @observable inputValue: string = ''; private openai: OpenAI; - private assistantID: string = ''; - private threadID: string = ''; + private documents: AI_Document[] = []; private _oldWheel: any; - private vectorStoreID: string = ''; - private mathJaxConfig: any; - private linkedCsvIDs: string[] = []; + private vectorstore: Vectorstore; public static LayoutString(fieldKey: string) { return FieldView.LayoutString(ChatBox, fieldKey); } + constructor(props: FieldViewProps) { super(props); makeObservable(this); this.openai = this.initializeOpenAI(); this.history = [{ role: ASSISTANT_ROLE.ASSISTANT, text: 'Welcome to the Document Analyser Assistant! Link a document or ask questions to get started.' }]; - this.threadID = StrCast(this.dataDoc.thread_id); - this.assistantID = StrCast(this.dataDoc.assistant_id); - this.vectorStoreID = StrCast(this.dataDoc.vector_store_id); this.openai = this.initializeOpenAI(); - if (this.assistantID === '' || this.threadID === '' || this.vectorStoreID === '') { - this.createAssistant(); - } else { - this.retrieveCsvUrls(); - this.isInitializing = false; - } - this.mathJaxConfig = { - loader: { load: ['input/asciimath'] }, - tex: { - inlineMath: [ - ['$', '$'], - ['\\(', '\\)'], - ], - displayMath: [ - ['$$', '$$'], - ['[', ']'], - ], - }, - }; + this.getLinkedDocs(); + this.vectorstore = new Vectorstore(); + reaction( - () => this.history.map((msg: AssistantMessage) => ({ role: msg.role, text: msg.text, image: msg.image, tool_logs: msg.tool_logs })), + () => this.history.map((msg: AssistantMessage) => ({ role: msg.role, text: msg.text, follow_up_questions: msg.follow_up_questions, citations: msg.citations })), serializableHistory => { this.dataDoc.data = JSON.stringify(serializableHistory); } ); } - @action - toggleToolLogs = (index: number) => { - this.expandedLogIndex = this.expandedLogIndex === index ? null : index; - }; + getLinkedDocs = async () => { + const visual_docs = (CollectionFreeFormDocumentView.from(this._props.DocumentView?.())?._props.parent as CollectionFreeFormView)?.childDocs.filter(doc => doc != this.Document); + console.log('All Docs:', visual_docs); - retrieveCsvUrls() { - const linkedDocs = LinkManager.Instance.getAllRelatedLinks(this.Document) - .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) - .map(d => DocCast(d?.annotationOn, d)) - .filter(d => d); + visual_docs?.forEach(async doc => { + const local_file_path: string = CsvCast(doc.data, PDFCast(doc.data)).url?.pathname; - linkedDocs.forEach(doc => { - const aiFieldId = StrCast(doc[this.Document[Id] + '_ai_field_id']); - if (CsvCast(doc.data)) { - this.linkedCsvIDs.push(StrCast(aiFieldId)); - console.log(this.linkedCsvIDs); + if (local_file_path) { + const { document_json } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); + const ai_document: AI_Document = convertToAIDocument(document_json); + this.documents.push(ai_document); + await this.vectorstore.addDocument(ai_document); + doc['ai_document'] = document_json; } }); - } + }; + + @action + uploadNewDocument = async (newDoc: Doc) => { + const local_file_path: string = CsvCast(newDoc.data, PDFCast(newDoc.data)).url.pathname; + const { document_json } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); + this.documents.push(...document_json.map(convertToAIDocument)); + newDoc['ai_document'] = document_json; + }; + + @action + toggleToolLogs = (index: number) => { + this.expandedScratchpadIndex = this.expandedScratchpadIndex === index ? null : index; + }; initializeOpenAI() { - //console.log(process.env._CLIENT_OPENAI_KEY); const configuration: ClientOptions = { apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true, @@ -118,50 +109,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { }; @action - createAssistant = async () => { - this.isInitializing = true; - try { - const vectorStore = await this.openai.beta.vectorStores.create({ - name: 'Vector Store for Assistant', - }); - const assistant = await this.openai.beta.assistants.create({ - name: 'Document Analyser Assistant', - instructions: ` - You will analyse documents with which you are provided. You will answer questions and provide insights based on the information in the documents. - For writing math formulas: - You have a MathJax render environment. - - Write all in-line equations within a single dollar sign, $, to render them as TeX (this means any time you want to use a dollar sign to represent a dollar sign itself, you must escape it with a backslash: "$"); - - Use a double dollar sign, $$, to render equations on a new line; - Example: $$x^2 + 3x$$ is output for "x² + 3x" to appear as TeX.`, - model: 'gpt-4-turbo', - tools: [{ type: 'file_search' }, { type: 'code_interpreter' }], - tool_resources: { - file_search: { - vector_store_ids: [vectorStore.id], - }, - code_interpreter: { - file_ids: this.linkedCsvIDs, - }, - }, - }); - const thread = await this.openai.beta.threads.create(); - - runInAction(() => { - this.dataDoc.assistant_id = assistant.id; - this.dataDoc.thread_id = thread.id; - this.dataDoc.vector_store_id = vectorStore.id; - this.assistantID = assistant.id; - this.threadID = thread.id; - this.vectorStoreID = vectorStore.id; - this.isInitializing = false; - }); - } catch (error) { - console.error('Initialization failed:', error); - this.isInitializing = false; - } - }; - - @action askGPT = async (event: React.FormEvent<HTMLFormElement>): Promise<void> => { event.preventDefault(); this.inputValue = ''; @@ -222,111 +169,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { // // } // }; - downloadToComputer = (url: string, fileName: string) => { - fetch(url, { method: 'get', mode: 'no-cors', referrerPolicy: 'no-referrer' }) - .then(res => res.blob()) - .then(res => { - const aElement = document.createElement('a'); - aElement.setAttribute('download', fileName); - const href = URL.createObjectURL(res); - aElement.href = href; - aElement.setAttribute('target', '_blank'); - aElement.click(); - URL.revokeObjectURL(href); - }); - }; - - createDocumentInDash = async (url: string) => { - const fileSuffix = url.substring(url.lastIndexOf('.') + 1); - console.log(fileSuffix); - let doc: Doc | null = null; - switch (fileSuffix) { - case 'pdf': - doc = DocCast(await DocUtils.DocumentFromType('pdf', url, {})); - break; - case 'csv': - doc = DocCast(await DocUtils.DocumentFromType('csv', url, {})); - break; - case 'png': - case 'jpg': - case 'jpeg': - doc = DocCast(await DocUtils.DocumentFromType('image', url, {})); - break; - default: - console.error('Unsupported file type:', fileSuffix); - break; - } - if (doc) { - doc && this._props.addDocument?.(doc); - //add to overlay - await DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); - } - }; - - downloadFile = async (fileInfo: string, downloadType: DOWNLOAD_TYPE) => { - try { - console.log(fileInfo); - const [fileId, fileName] = fileInfo.split(/!!!/); - const { file_path: filePath } = await Networking.PostToServer('/downloadFileFromOpenAI', { file_id: fileId, file_name: fileName }); - const fileLink = CsvCast(new CsvField(filePath)).url.href; - if (downloadType === DOWNLOAD_TYPE.DASH) { - this.createDocumentInDash(fileLink); - } else { - this.downloadToComputer(fileLink, fileName); - } - } catch (error) { - console.error('Error downloading file:', error); - } - }; - - handleDownloadToDevice = () => { - this.downloadFile(this.currentFile.url, DOWNLOAD_TYPE.DEVICE); - this.modalStatus = false; // Close the modal after the action - this.currentFile = { url: '' }; // Reset the current file - }; - - handleAddToDash = () => { - // Assuming `downloadFile` is a method that handles adding to Dash - this.downloadFile(this.currentFile.url, DOWNLOAD_TYPE.DASH); - this.modalStatus = false; // Close the modal after the action - this.currentFile = { url: '' }; // Reset the current file - }; - - renderModal = () => { - if (!this.modalStatus) return null; - - return ( - <div className="modal"> - <div className="modal-content"> - <h4>File Actions</h4> - <p>Choose an action for the file:</p> - <button type="button" onClick={this.handleDownloadToDevice}> - Download to Device - </button> - <button type="button" onClick={this.handleAddToDash}> - Add to Dash - </button> - <button - type="button" - onClick={() => { - this.modalStatus = false; - }}> - Cancel - </button> - </div> - </div> - ); - }; - @action - showModal = () => { - this.modalStatus = true; - }; - - @action - setCurrentFile = (file: { url: string }) => { - this.currentFile = file; - }; - componentDidMount() { this._props.setContentViewBox?.(this); if (this.dataDoc.data) { @@ -337,9 +179,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { ...storedHistory.map((msg: AssistantMessage) => ({ role: msg.role, text: msg.text, - quote: msg.quote, - tool_logs: msg.tool_logs, - image: msg.image, + follow_up_questions: msg.follow_up_questions, + citations: msg.citations, })) ); }); @@ -355,7 +196,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { .filter(d => d); return linkedDocs; }, - linked => this.linked_docs_to_add.push(...linked.filter(linkedDoc => !this.linked_docs_to_add.includes(linkedDoc))) ); @@ -370,7 +210,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { if ((change as any).addedCount > 0) { // maybe check here if its already in the urls datadoc array so doesn't add twice console.log((change as any).added as Doc[]); - this.uploadLinks((change as any).added as Doc[]); + ((change as any).added as Doc[]).forEach(doc => { + this.uploadNewDocument(doc); + }); } // (change as any).removed.forEach((link: any) => remLinkFromDoc(toRealField(link))); break; @@ -392,7 +234,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { /** <MathJaxContext config={this.mathJaxConfig}> **/ <div className="chatBox"> {this.isInitializing && <div className="initializing-overlay">Initializing...</div>} - {this.renderModal()} <div className="scroll-box chat-content" ref={r => { @@ -401,32 +242,37 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { r?.addEventListener('wheel', this.onPassiveWheel, { passive: false }); }}> <div className="messages"> - {this.history.map((message, index) => ( - <MessageComponent - key={index} - message={message} - toggleToolLogs={this.toggleToolLogs} - expandedLogIndex={this.expandedLogIndex} - index={index} - showModal={this.showModal} - goToLinkedDoc={() => {}} - setCurrentFile={this.setCurrentFile} - onFollowUpClick={this.handleFollowUpClick} - /> - ))} - {!this.current_message ? null : ( - <MessageComponent - key={this.history.length} - message={this.current_message} - toggleToolLogs={this.toggleToolLogs} - expandedLogIndex={this.expandedLogIndex} - index={this.history.length} - showModal={this.showModal} - goToLinkedDoc={() => {}} - setCurrentFile={this.setCurrentFile} - onFollowUpClick={this.handleFollowUpClick} - /> - )} + { + //this.history.map((message, index) => ( + // <MessageComponent + // key={index} + // message={message} + // toggleToolLogs={this.toggleToolLogs} + // expandedLogIndex={this.expandedLogIndex} + // index={index} + // showModal={this.showModal} + // goToLinkedDoc={() => {}} + // setCurrentFile={this.setCurrentFile} + // onFollowUpClick={this.handleFollowUpClick} + // /> + //) + //) + } + { + //!this.current_message ? null : ( + // <MessageComponent + // key={this.history.length} + // message={this.current_message} + // toggleToolLogs={this.toggleToolLogs} + // expandedLogIndex={this.expandedLogIndex} + // index={this.history.length} + // showModal={this.showModal} + // goToLinkedDoc={() => {}} + // setCurrentFile={this.setCurrentFile} + // onFollowUpClick={this.handleFollowUpClick} + // /> + //) + } </div> </div> <form onSubmit={this.askGPT} className="chat-form"> diff --git a/src/client/views/nodes/ChatBox/tools.ts b/src/client/views/nodes/ChatBox/tools.ts new file mode 100644 index 000000000..4035280a8 --- /dev/null +++ b/src/client/views/nodes/ChatBox/tools.ts @@ -0,0 +1,26 @@ +import { DocCast } from '../../../../fields/Types'; +import { DocServer } from '../../../DocServer'; +import { Docs } from '../../../documents/Documents'; +import { DocUtils } from '../../../documents/DocUtils'; +import { TabDocView } from '../../collections/TabDocView'; +import { DocumentView } from '../DocumentView'; +import { OpenWhere } from '../OpenWhere'; + +export function retrieval(json: any): string { + return ''; +} + +export function create_collection(docView: DocumentView, document_ids: string[], title: string): string { + const docs = document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id))); + const collection = Docs.Create.FreeformDocument(docs, { title }); + docView._props.addDocTab(collection, OpenWhere.addRight); //in future, create popup prompting user where to add + return 'Collection created in Dash called ' + title; +} + +export function create_link(docView: DocumentView, document_ids: string[]): string { + //Make document_ids a size 2 array + const docs = document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id))); + const linkDoc = DocUtils.MakeLink(docs[0], docs[1], {})!; + DocumentView.linkCommonAncestor(linkDoc)?.ComponentView?.addDocument?.(linkDoc); + return 'Link created between ' + docs[0].title + ' and ' + docs[1].title; +} diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/ChatBox/types.ts index cfda0d40e..7acb96c15 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/ChatBox/types.ts @@ -1,22 +1,78 @@ export enum ASSISTANT_ROLE { - USER = 'User', - ASSISTANT = 'Assistant', + USER = 'user', + ASSISTANT = 'assistant', } -export enum ANNOTATION_LINK_TYPE { - DASH_DOC = 'citation', - DOWNLOAD_FILE = 'file_path', -} - -export enum DOWNLOAD_TYPE { - DASH = 'dash', - DEVICE = 'device', +export enum CHUNK_TYPE { + TEXT = 'text', + IMAGE = 'image', } export interface AssistantMessage { role: ASSISTANT_ROLE; text: string; - quote?: string; - image?: string; - tool_logs?: string; + follow_up_questions?: string[]; + citations?: Citation[]; +} + +export interface Citation { + text: string; + type: CHUNK_TYPE; + span: [number, number]; + chunk_id: string; + direct_text?: string; +} + +export interface Chunk { + id: string; + values: number[]; + metadata: { + text: string; + type: CHUNK_TYPE; + original_document: string; + file_path: string; + location: string; + start_page: number; + end_page: number; + }; +} + +export interface AI_Document { + purpose: string; + file_name: string; + num_pages: number; + summary: string; + chunks: Chunk[]; + type: string; +} + +export function convertToAIDocument(json: any): AI_Document { + if (!json) { + throw new Error('Invalid JSON object'); + } + + const chunks: Chunk[] = json.chunks.map((chunk: any) => ({ + id: chunk.id, + values: chunk.values, + metadata: { + text: chunk.metadata.text, + type: chunk.metadata.type as CHUNK_TYPE, // Ensure type casting + original_document: chunk.metadata.original_document, + file_path: chunk.metadata.file_path, + location: chunk.metadata.location, + start_page: chunk.metadata.start_page, + end_page: chunk.metadata.end_page, + }, + })); + + const aiDocument: AI_Document = { + purpose: json.purpose, + file_name: json.file_name, + num_pages: json.num_pages, + summary: json.summary, + chunks: chunks, + type: json.type, + }; + + return aiDocument; } diff --git a/src/client/apis/vectorstore/VectorstoreUpload.ts b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts index 6c60ad0c8..d16e117b6 100644 --- a/src/client/apis/vectorstore/VectorstoreUpload.ts +++ b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts @@ -1,54 +1,34 @@ -import { Pinecone, Index, IndexList, PineconeRecord } from '@pinecone-database/pinecone'; +import { Pinecone, Index, IndexList, PineconeRecord, RecordMetadata, QueryResponse } from '@pinecone-database/pinecone'; import { CohereClient } from 'cohere-ai'; import { EmbedResponse } from 'cohere-ai/api'; import dotenv from 'dotenv'; +import { Chunk, AI_Document } from '../types'; dotenv.config(); -interface ChunkMetadata { - text: string; - type: string; - original_document: string; - file_path: string; - location: string; - start_page: number; - end_page: number; - [key: string]: string | number; // Add this line -} - -interface Chunk { - id: string; - values: number[]; - metadata: ChunkMetadata; -} - -interface Document { - purpose: string; - file_name: string; - num_pages: number; - summary: string; - chunks: Chunk[]; - type: string; -} - -class Vectorstore { +export class Vectorstore { private pinecone: Pinecone; - private index: Index; + private index!: Index; private cohere: CohereClient; private indexName: string = 'pdf-chatbot'; - private documents: Document[] = []; + private documents: AI_Document[] = []; constructor() { + const pineconeApiKey = process.env.PINECONE_API_KEY; + if (!pineconeApiKey) { + throw new Error('PINECONE_API_KEY is not defined.'); + } + this.pinecone = new Pinecone({ - apiKey: process.env.PINECONE_API_KEY!, + apiKey: pineconeApiKey, }); this.cohere = new CohereClient({ - token: process.env.COHERE_API_KEY!, + token: process.env.COHERE_API_KEY, }); - this.createIndex(); + this.initializeIndex(); } - private async createIndex() { + private async initializeIndex() { const indexList: IndexList = await this.pinecone.listIndexes(); if (!indexList.indexes?.some(index => index.name === this.indexName)) { @@ -68,18 +48,22 @@ class Vectorstore { this.index = this.pinecone.Index(this.indexName); } - async addDocument(document: Document) { + async addDocument(document: AI_Document) { this.documents.push(document); await this.indexDocument(document); + console.log(`Document added: ${document.file_name}`); } - private async indexDocument(document: Document) { + private async indexDocument(document: AI_Document) { console.log('Uploading vectors to content namespace...'); - const pineconeRecords: PineconeRecord[] = document.chunks.map(chunk => ({ - id: chunk.id, - values: chunk.values, - metadata: chunk.metadata, - })); + const pineconeRecords: PineconeRecord<RecordMetadata>[] = document.chunks.map( + chunk => + ({ + id: chunk.id, + values: chunk.values, + metadata: chunk.metadata as RecordMetadata, + }) as PineconeRecord + ); await this.index.upsert(pineconeRecords); } @@ -106,18 +90,21 @@ class Vectorstore { throw new Error('Query embedding is not an array'); } - const queryResponse = await this.index.query({ + const queryResponse: QueryResponse<RecordMetadata> = await this.index.query({ vector: queryEmbedding, topK, includeValues: true, includeMetadata: true, }); - return queryResponse.matches.map(match => ({ - id: match.id, - values: match.values as number[], - metadata: match.metadata as ChunkMetadata, - })); + return queryResponse.matches.map( + match => + ({ + id: match.id, + values: match.values as number[], + metadata: match.metadata as { text: string; type: string; original_document: string; file_path: string; location: string; start_page: number; end_page: number }, + }) as Chunk + ); } catch (error) { console.error(`Error retrieving chunks: ${error}`); return []; @@ -125,6 +112,6 @@ class Vectorstore { } getSummaries(): string { - return this.documents.map((doc, index) => `${index + 1}. ${doc.summary}`).join('\n') + '\n'; + return this.documents.map((doc, index) => `${index + 1}) ${doc.summary}`).join('\n') + '\n'; } } diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 77d8af724..a35708ccd 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -160,10 +160,18 @@ export default class AssistantManager extends ApiManager { // Read file data and convert to base64 const file_data = fs.readFileSync(public_path, { encoding: 'base64' }); - const response = await axios.post('http://localhost:8080/createDocument', { - file_data, - file_name, - }); + const response = await axios.post( + 'http://localhost:8080/createDocument', + { + file_data, + file_name, + }, + { + headers: { + 'Content-Type': 'application/json', + }, + } + ); res.send({ document: response.data }); } catch (error: any) { diff --git a/src/server/RouteManager.ts b/src/server/RouteManager.ts index d8e0455f6..22e608868 100644 --- a/src/server/RouteManager.ts +++ b/src/server/RouteManager.ts @@ -8,6 +8,7 @@ import { DashUserModel } from './authentication/DashUserModel'; export enum Method { GET, POST, + PUT, } export interface CoreArguments { @@ -208,6 +209,9 @@ export default class RouteManager { case Method.POST: this.server.post(route, supervised); break; + case Method.PUT: + this.server.put(route, supervised); + break; default: } } |