diff options
Diffstat (limited to 'src/client/views/nodes/ChatBox/vectorstore')
| -rw-r--r-- | src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts | 117 |
1 files changed, 117 insertions, 0 deletions
diff --git a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts new file mode 100644 index 000000000..d16e117b6 --- /dev/null +++ b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts @@ -0,0 +1,117 @@ +import { Pinecone, Index, IndexList, PineconeRecord, RecordMetadata, QueryResponse } from '@pinecone-database/pinecone'; +import { CohereClient } from 'cohere-ai'; +import { EmbedResponse } from 'cohere-ai/api'; +import dotenv from 'dotenv'; +import { Chunk, AI_Document } from '../types'; + +dotenv.config(); + +export class Vectorstore { + private pinecone: Pinecone; + private index!: Index; + private cohere: CohereClient; + private indexName: string = 'pdf-chatbot'; + private documents: AI_Document[] = []; + + constructor() { + const pineconeApiKey = process.env.PINECONE_API_KEY; + if (!pineconeApiKey) { + throw new Error('PINECONE_API_KEY is not defined.'); + } + + this.pinecone = new Pinecone({ + apiKey: pineconeApiKey, + }); + this.cohere = new CohereClient({ + token: process.env.COHERE_API_KEY, + }); + this.initializeIndex(); + } + + private async initializeIndex() { + const indexList: IndexList = await this.pinecone.listIndexes(); + + if (!indexList.indexes?.some(index => index.name === this.indexName)) { + await this.pinecone.createIndex({ + name: this.indexName, + dimension: 1024, + metric: 'cosine', + spec: { + serverless: { + cloud: 'aws', + region: 'us-east-1', + }, + }, + }); + } + + this.index = this.pinecone.Index(this.indexName); + } + + async addDocument(document: AI_Document) { + this.documents.push(document); + await this.indexDocument(document); + console.log(`Document added: ${document.file_name}`); + } + + private async indexDocument(document: AI_Document) { + console.log('Uploading vectors to content namespace...'); + const pineconeRecords: PineconeRecord<RecordMetadata>[] = document.chunks.map( + chunk => + ({ + id: chunk.id, + values: chunk.values, + metadata: chunk.metadata as RecordMetadata, + }) as PineconeRecord + ); + await this.index.upsert(pineconeRecords); + } + + async retrieve(query: string, topK: number = 10): Promise<Chunk[]> { + console.log(`Retrieving chunks for query: ${query}`); + try { + const queryEmbeddingResponse: EmbedResponse = await this.cohere.embed({ + texts: [query], + model: 'embed-english-v3.0', + inputType: 'search_query', + }); + + let queryEmbedding: number[]; + + if (Array.isArray(queryEmbeddingResponse.embeddings)) { + queryEmbedding = queryEmbeddingResponse.embeddings[0]; + } else if (queryEmbeddingResponse.embeddings && 'embeddings' in queryEmbeddingResponse.embeddings) { + queryEmbedding = (queryEmbeddingResponse.embeddings as { embeddings: number[][] }).embeddings[0]; + } else { + throw new Error('Invalid embedding response format'); + } + + if (!Array.isArray(queryEmbedding)) { + throw new Error('Query embedding is not an array'); + } + + const queryResponse: QueryResponse<RecordMetadata> = await this.index.query({ + vector: queryEmbedding, + topK, + includeValues: true, + includeMetadata: true, + }); + + return queryResponse.matches.map( + match => + ({ + id: match.id, + values: match.values as number[], + metadata: match.metadata as { text: string; type: string; original_document: string; file_path: string; location: string; start_page: number; end_page: number }, + }) as Chunk + ); + } catch (error) { + console.error(`Error retrieving chunks: ${error}`); + return []; + } + } + + getSummaries(): string { + return this.documents.map((doc, index) => `${index + 1}) ${doc.summary}`).join('\n') + '\n'; + } +} |
