diff options
-rw-r--r-- | src/client/views/nodes/ChatBox/Agent.ts | 4 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/ChatBox.scss | 27 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/ChatBox.tsx | 12 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/ChunkManager.ts | 24 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts (renamed from src/client/views/nodes/ChatBox/AnswerParser.ts) | 2 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/response_parsers/StreamedAnswerParser.ts (renamed from src/client/views/nodes/ChatBox/StreamedAnswerParser.ts) | 0 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts | 125 |
7 files changed, 129 insertions, 65 deletions
diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts index eaa17d283..9eb069c78 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/ChatBox/Agent.ts @@ -13,8 +13,8 @@ import { SearchTool } from './tools/SearchTool'; import { NoTool } from './tools/NoTool'; import { on } from 'events'; import { v4 as uuidv4 } from 'uuid'; -import { AnswerParser } from './AnswerParser'; -import { StreamedAnswerParser } from './StreamedAnswerParser'; +import { AnswerParser } from './response_parsers/AnswerParser'; +import { StreamedAnswerParser } from './response_parsers/StreamedAnswerParser'; import { CreateCSVTool } from './tools/CreateCSVTool'; dotenv.config(); diff --git a/src/client/views/nodes/ChatBox/ChatBox.scss b/src/client/views/nodes/ChatBox/ChatBox.scss index adb0663c3..42f6a0d61 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.scss +++ b/src/client/views/nodes/ChatBox/ChatBox.scss @@ -116,6 +116,33 @@ $transition: all 0.3s ease; } } } + .citation-popup { + position: fixed; + bottom: 50px; + left: 50%; + transform: translateX(-50%); + background-color: rgba(0, 0, 0, 0.8); + color: white; + padding: 10px 20px; + border-radius: 10px; + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2); + z-index: 1000; + animation: fadeIn 0.3s ease-in-out; + + p { + margin: 0; + font-size: 14px; + } + + @keyframes fadeIn { + from { + opacity: 0; + } + to { + opacity: 1; + } + } + } } .message { diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx index ff699aab3..98a2e6002 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/ChatBox/ChatBox.tsx @@ -44,6 +44,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { @observable private linked_docs_to_add: ObservableSet = observable.set(); @observable private linked_csv_files: { filename: string; id: string; text: string }[] = []; @observable private isUploadingDocs: boolean = false; + @observable private citationPopup: { text: string; visible: boolean } = { text: '', visible: false }; // Private properties for managing OpenAI API, vector store, agent, and UI elements private openai: OpenAI; @@ -450,6 +451,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { DocumentManager.Instance.showDocument(highlightDoc, { willZoomCentered: true }, () => {}); break; case CHUNK_TYPE.TEXT: + this.citationPopup = { text: citation.direct_text ?? 'No text available', visible: true }; + setTimeout(() => (this.citationPopup.visible = false), 3000); // Hide after 3 seconds + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { const firstView = Array.from(doc[DocViews])[0] as DocumentView; firstView.ComponentView?.search?.(citation.direct_text ?? ''); @@ -730,6 +734,14 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { )} </button> </form> + {/* Popup for citation */} + {this.citationPopup.visible && ( + <div className="citation-popup"> + <p> + <strong>Text from your document: </strong> {this.citationPopup.text} + </p> + </div> + )} </div> ); } diff --git a/src/client/views/nodes/ChatBox/ChunkManager.ts b/src/client/views/nodes/ChatBox/ChunkManager.ts deleted file mode 100644 index 64c073640..000000000 --- a/src/client/views/nodes/ChatBox/ChunkManager.ts +++ /dev/null @@ -1,24 +0,0 @@ -import { SimplifiedChunk } from './types'; - -class ChunkManager { - private chunks: SimplifiedChunk[]; - - constructor() { - this.chunks = []; - } - - addChunk(chunk: SimplifiedChunk) { - this.chunks.push(chunk); - } - - removeChunk(chunk: SimplifiedChunk) { - const index = this.chunks.indexOf(chunk); - if (index !== -1) { - this.chunks.splice(index, 1); - } - } - - getChunks() { - return this.chunks; - } -} diff --git a/src/client/views/nodes/ChatBox/AnswerParser.ts b/src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts index 885114195..79b53b0a3 100644 --- a/src/client/views/nodes/ChatBox/AnswerParser.ts +++ b/src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts @@ -1,4 +1,4 @@ -import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getChunkType, ProcessingInfo } from './types'; +import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getChunkType, ProcessingInfo } from '../types'; import { v4 as uuid } from 'uuid'; export class AnswerParser { diff --git a/src/client/views/nodes/ChatBox/StreamedAnswerParser.ts b/src/client/views/nodes/ChatBox/response_parsers/StreamedAnswerParser.ts index 3585cab4a..3585cab4a 100644 --- a/src/client/views/nodes/ChatBox/StreamedAnswerParser.ts +++ b/src/client/views/nodes/ChatBox/response_parsers/StreamedAnswerParser.ts diff --git a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts index b5145c1f7..cc3b1ccd5 100644 --- a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts +++ b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts @@ -2,47 +2,55 @@ import { Pinecone, Index, IndexList, PineconeRecord, RecordMetadata, QueryRespon import { CohereClient } from 'cohere-ai'; import { EmbedResponse } from 'cohere-ai/api'; import dotenv from 'dotenv'; -import axios from 'axios'; -import { SimplifiedChunk } from '../types'; - import { RAGChunk, AI_Document, CHUNK_TYPE } from '../types'; import { Doc } from '../../../../../fields/Doc'; -import { DocData } from '../../../../../fields/DocSymbols'; import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types'; import { Networking } from '../../../../Network'; dotenv.config(); +/** + * The Vectorstore class integrates with Pinecone for vector-based document indexing and retrieval, + * and Cohere for text embedding. It handles AI document management, uploads, and query-based retrieval. + */ export class Vectorstore { - private pinecone: Pinecone; - private index!: Index; - private cohere: CohereClient; - private indexName: string = 'pdf-chatbot'; - private _id: string; - private _doc_ids: string[] = []; + private pinecone: Pinecone; // Pinecone client for managing the vector index. + private index!: Index; // The specific Pinecone index used for document chunks. + private cohere: CohereClient; // Cohere client for generating embeddings. + private indexName: string = 'pdf-chatbot'; // Default name for the index. + private _id: string; // Unique ID for the Vectorstore instance. + private _doc_ids: string[] = []; // List of document IDs handled by this instance. - documents: AI_Document[] = []; + documents: AI_Document[] = []; // Store the documents indexed in the vectorstore. + /** + * Constructor initializes the Pinecone and Cohere clients, sets up the document ID list, + * and initializes the Pinecone index. + * @param id The unique identifier for the vectorstore instance. + * @param doc_ids A function that returns a list of document IDs. + */ constructor(id: string, doc_ids: () => string[]) { const pineconeApiKey = process.env.PINECONE_API_KEY; if (!pineconeApiKey) { throw new Error('PINECONE_API_KEY is not defined.'); } - this.pinecone = new Pinecone({ - apiKey: pineconeApiKey, - }); - this.cohere = new CohereClient({ - token: process.env.COHERE_API_KEY, - }); + // Initialize Pinecone and Cohere clients with API keys from the environment. + this.pinecone = new Pinecone({ apiKey: pineconeApiKey }); + this.cohere = new CohereClient({ token: process.env.COHERE_API_KEY }); this._id = id; this._doc_ids = doc_ids(); this.initializeIndex(); } + /** + * Initializes the Pinecone index by checking if it exists, and creating it if not. + * The index is set to use the cosine metric for vector similarity. + */ private async initializeIndex() { const indexList: IndexList = await this.pinecone.listIndexes(); + // Check if the index already exists, otherwise create it. if (!indexList.indexes?.some(index => index.name === this.indexName)) { await this.pinecone.createIndex({ name: this.indexName, @@ -57,62 +65,76 @@ export class Vectorstore { }); } + // Set the index for future use. this.index = this.pinecone.Index(this.indexName); } + /** + * Adds an AI document to the vectorstore. This method handles document chunking, uploading to the + * vectorstore, and updating the progress for long-running tasks like file uploads. + * @param doc The document to be added to the vectorstore. + * @param progressCallback Callback to update the progress of the upload. + */ async addAIDoc(doc: Doc, progressCallback: (progress: number, step: string) => void) { console.log('Adding AI Document:', doc); const ai_document_status: string = StrCast(doc.ai_document_status); - if (ai_document_status !== undefined && ai_document_status !== null && ai_document_status.trim() !== '' && ai_document_status !== '{}') { + // Skip if the document is already in progress or completed. + if (ai_document_status !== undefined && ai_document_status.trim() !== '' && ai_document_status !== '{}') { if (ai_document_status === 'IN PROGRESS') { console.log('Already in progress.'); return; } - if (!this._doc_ids.includes(StrCast(doc.ai_doc_id))) this._doc_ids.push(StrCast(doc.ai_doc_id)); + if (!this._doc_ids.includes(StrCast(doc.ai_doc_id))) { + this._doc_ids.push(StrCast(doc.ai_doc_id)); + } } else { + // Start processing the document. doc.ai_document_status = 'PROGRESS'; console.log(doc); + + // Get the local file path (CSV or PDF). const local_file_path: string = CsvCast(doc.data)?.url?.pathname ?? PDFCast(doc.data)?.url?.pathname; console.log('Local File Path:', local_file_path); if (local_file_path) { console.log('Creating AI Document...'); - // Start the document creation process + // Start the document creation process by sending the file to the server. const { jobId } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); - // Poll the server for progress updates + // Poll the server for progress updates. let inProgress: boolean = true; let result: any = null; while (inProgress) { - await new Promise(resolve => setTimeout(resolve, 2000)); // Polling interval + // Polling interval for status updates. + await new Promise(resolve => setTimeout(resolve, 2000)); + // Check if the job is completed. const resultResponse = await Networking.FetchFromServer(`/getResult/${jobId}`); const resultResponseJson = JSON.parse(resultResponse); - //console.log('Result Response:', resultResponseJson); if (resultResponseJson.status === 'completed') { console.log('Result here:', resultResponseJson); result = resultResponseJson; break; } + // Fetch progress information and update the progress callback. const progressResponse = await Networking.FetchFromServer(`/getProgress/${jobId}`); const progressResponseJson = JSON.parse(progressResponse); - //console.log('Progress Response:', progressResponseJson); - if (progressResponseJson) { - console.log('Progress:', progressResponseJson); const progress = progressResponseJson.progress; const step = progressResponseJson.step; progressCallback(progress, step); } } - // Process the final document result + // Once completed, process the document and add it to the vectorstore. console.log('Document JSON:', result); this.documents.push(result); - await this.indexDocument(JSON.parse(JSON.stringify(result, (key, value) => (value === null || value === undefined ? undefined : value)))); + await this.indexDocument(result); console.log(`Document added: ${result.file_name}`); + + // Update document metadata such as summary, purpose, and vectorstore ID. doc.summary = result.summary; doc.ai_doc_id = result.doc_id; this._doc_ids.push(result.doc_id); @@ -128,6 +150,7 @@ export class Vectorstore { doc.chunk_simpl = JSON.stringify({ chunks: [] }); } + // Process each chunk of the document and update the document's chunk_simpl field. result.chunks.forEach((chunk: RAGChunk) => { const chunkToAdd = { chunkId: chunk.id, @@ -142,27 +165,41 @@ export class Vectorstore { doc.chunk_simpl = JSON.stringify(new_chunk_simpl); }); + // Mark the document status as completed. doc.ai_document_status = 'COMPLETED'; } } } + /** + * Indexes the processed document by uploading the document's vector chunks to the Pinecone index. + * @param document The processed document containing its chunks and metadata. + */ private async indexDocument(document: any) { console.log('Uploading vectors to content namespace...'); - const pineconeRecords: PineconeRecord[] = (document.chunks as RAGChunk[]).map( - chunk => - ({ - id: chunk.id, - values: chunk.values, - metadata: { ...chunk.metadata } as RecordMetadata, - }) as PineconeRecord - ); + + // Prepare Pinecone records for each chunk in the document. + const pineconeRecords: PineconeRecord[] = (document.chunks as RAGChunk[]).map(chunk => ({ + id: chunk.id, + values: chunk.values, + metadata: { ...chunk.metadata } as RecordMetadata, + })); + + // Upload the records to Pinecone. await this.index.upsert(pineconeRecords); } - async retrieve(query: string, topK: number = 10): Promise { + /** + * Retrieves the top K document chunks relevant to the user's query. + * This involves embedding the query using Cohere, then querying Pinecone for matching vectors. + * @param query The search query string. + * @param topK The number of top results to return (default is 10). + * @returns A list of document chunks that match the query. + */ + async retrieve(query: string, topK: number = 10): Promise<RAGChunk[]> { console.log(`Retrieving chunks for query: ${query}`); try { + // Generate an embedding for the query using Cohere. const queryEmbeddingResponse: EmbedResponse = await this.cohere.embed({ texts: [query], model: 'embed-english-v3.0', @@ -171,6 +208,7 @@ export class Vectorstore { let queryEmbedding: number[]; + // Extract the embedding from the response. if (Array.isArray(queryEmbeddingResponse.embeddings)) { queryEmbedding = queryEmbeddingResponse.embeddings[0]; } else if (queryEmbeddingResponse.embeddings && 'embeddings' in queryEmbeddingResponse.embeddings) { @@ -183,6 +221,7 @@ export class Vectorstore { throw new Error('Query embedding is not an array'); } + // Query the Pinecone index using the embedding and filter by document IDs. const queryResponse: QueryResponse = await this.index.query({ vector: queryEmbedding, filter: { @@ -193,12 +232,22 @@ export class Vectorstore { includeMetadata: true, }); + // Map the results into RAGChunks and return them. return queryResponse.matches.map( match => ({ id: match.id, values: match.values as number[], - metadata: match.metadata as { text: string; type: string; original_document: string; file_path: string; doc_id: string; location: string; start_page: number; end_page: number }, + metadata: match.metadata as { + text: string; + type: string; + original_document: string; + file_path: string; + doc_id: string; + location: string; + start_page: number; + end_page: number; + }, }) as RAGChunk ); } catch (error) { |