From a4107cdf6d53654275a678a79eff9962bcd02beb Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Mon, 22 Jul 2024 14:21:07 -0400 Subject: works better now --- .../views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'src/client/views/nodes/ChatBox/vectorstore') diff --git a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts index ab0b6e617..0737e2392 100644 --- a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts +++ b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts @@ -17,9 +17,10 @@ export class Vectorstore { private cohere: CohereClient; private indexName: string = 'pdf-chatbot'; private id: string; + private file_ids: string[] = []; documents: AI_Document[] = []; - constructor(id: string) { + constructor(id: string, doc_ids: () => string[]) { const pineconeApiKey = process.env.PINECONE_API_KEY; if (!pineconeApiKey) { throw new Error('PINECONE_API_KEY is not defined.'); @@ -32,6 +33,7 @@ export class Vectorstore { token: process.env.COHERE_API_KEY, }); this.id = id; + this.file_ids = doc_ids(); this.initializeIndex(); } @@ -63,7 +65,7 @@ export class Vectorstore { console.log('Already in progress.'); return; } - console.log(`Document already added: ${doc.file_name}`); + if (!this.file_ids.includes(StrCast(doc.ai_doc_id))) this.file_ids.push(StrCast(doc.ai_doc_id)); } else { doc.ai_document_status = 'PROGRESS'; console.log(doc); @@ -79,6 +81,8 @@ export class Vectorstore { await this.indexDocument(JSON.parse(JSON.stringify(document_json, (key, value) => (value === null || value === undefined ? undefined : value)))); console.log(`Document added: ${document_json.file_name}`); doc.summary = document_json.summary; + doc.ai_doc_id = document_json.doc_id; + this.file_ids.push(document_json.doc_id); doc.ai_purpose = document_json.purpose; if (doc.vectorstore_id === undefined || doc.vectorstore_id === null || doc.vectorstore_id === '' || doc.vectorstore_id === '[]') { doc.vectorstore_id = JSON.stringify([this.id]); @@ -125,7 +129,7 @@ export class Vectorstore { ({ id: chunk.id, values: chunk.values, - metadata: { ...chunk.metadata, vectorstore_id: this.id } as RecordMetadata, + metadata: { ...chunk.metadata } as RecordMetadata, }) as PineconeRecord ); await this.index.upsert(pineconeRecords); @@ -157,7 +161,7 @@ export class Vectorstore { const queryResponse: QueryResponse = await this.index.query({ vector: queryEmbedding, filter: { - vectorstore_id: this.id, + doc_id: { $in: this.file_ids }, }, topK, includeValues: true, @@ -169,7 +173,7 @@ export class Vectorstore { ({ id: match.id, values: match.values as number[], - metadata: match.metadata as { text: string; type: string; original_document: string; file_path: string; location: string; start_page: number; end_page: number }, + metadata: match.metadata as { text: string; type: string; original_document: string; file_path: string; doc_id: string; location: string; start_page: number; end_page: number }, }) as Chunk ); } catch (error) { -- cgit v1.2.3-70-g09d2