aboutsummaryrefslogtreecommitdiff
path: root/src/client/views/nodes/ChatBox/vectorstore
diff options
context:
space:
mode:
Diffstat (limited to 'src/client/views/nodes/ChatBox/vectorstore')
-rw-r--r--src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts117
1 files changed, 117 insertions, 0 deletions
diff --git a/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts
new file mode 100644
index 000000000..d16e117b6
--- /dev/null
+++ b/src/client/views/nodes/ChatBox/vectorstore/VectorstoreUpload.ts
@@ -0,0 +1,117 @@
+import { Pinecone, Index, IndexList, PineconeRecord, RecordMetadata, QueryResponse } from '@pinecone-database/pinecone';
+import { CohereClient } from 'cohere-ai';
+import { EmbedResponse } from 'cohere-ai/api';
+import dotenv from 'dotenv';
+import { Chunk, AI_Document } from '../types';
+
+dotenv.config();
+
+export class Vectorstore {
+ private pinecone: Pinecone;
+ private index!: Index;
+ private cohere: CohereClient;
+ private indexName: string = 'pdf-chatbot';
+ private documents: AI_Document[] = [];
+
+ constructor() {
+ const pineconeApiKey = process.env.PINECONE_API_KEY;
+ if (!pineconeApiKey) {
+ throw new Error('PINECONE_API_KEY is not defined.');
+ }
+
+ this.pinecone = new Pinecone({
+ apiKey: pineconeApiKey,
+ });
+ this.cohere = new CohereClient({
+ token: process.env.COHERE_API_KEY,
+ });
+ this.initializeIndex();
+ }
+
+ private async initializeIndex() {
+ const indexList: IndexList = await this.pinecone.listIndexes();
+
+ if (!indexList.indexes?.some(index => index.name === this.indexName)) {
+ await this.pinecone.createIndex({
+ name: this.indexName,
+ dimension: 1024,
+ metric: 'cosine',
+ spec: {
+ serverless: {
+ cloud: 'aws',
+ region: 'us-east-1',
+ },
+ },
+ });
+ }
+
+ this.index = this.pinecone.Index(this.indexName);
+ }
+
+ async addDocument(document: AI_Document) {
+ this.documents.push(document);
+ await this.indexDocument(document);
+ console.log(`Document added: ${document.file_name}`);
+ }
+
+ private async indexDocument(document: AI_Document) {
+ console.log('Uploading vectors to content namespace...');
+ const pineconeRecords: PineconeRecord<RecordMetadata>[] = document.chunks.map(
+ chunk =>
+ ({
+ id: chunk.id,
+ values: chunk.values,
+ metadata: chunk.metadata as RecordMetadata,
+ }) as PineconeRecord
+ );
+ await this.index.upsert(pineconeRecords);
+ }
+
+ async retrieve(query: string, topK: number = 10): Promise<Chunk[]> {
+ console.log(`Retrieving chunks for query: ${query}`);
+ try {
+ const queryEmbeddingResponse: EmbedResponse = await this.cohere.embed({
+ texts: [query],
+ model: 'embed-english-v3.0',
+ inputType: 'search_query',
+ });
+
+ let queryEmbedding: number[];
+
+ if (Array.isArray(queryEmbeddingResponse.embeddings)) {
+ queryEmbedding = queryEmbeddingResponse.embeddings[0];
+ } else if (queryEmbeddingResponse.embeddings && 'embeddings' in queryEmbeddingResponse.embeddings) {
+ queryEmbedding = (queryEmbeddingResponse.embeddings as { embeddings: number[][] }).embeddings[0];
+ } else {
+ throw new Error('Invalid embedding response format');
+ }
+
+ if (!Array.isArray(queryEmbedding)) {
+ throw new Error('Query embedding is not an array');
+ }
+
+ const queryResponse: QueryResponse<RecordMetadata> = await this.index.query({
+ vector: queryEmbedding,
+ topK,
+ includeValues: true,
+ includeMetadata: true,
+ });
+
+ return queryResponse.matches.map(
+ match =>
+ ({
+ id: match.id,
+ values: match.values as number[],
+ metadata: match.metadata as { text: string; type: string; original_document: string; file_path: string; location: string; start_page: number; end_page: number },
+ }) as Chunk
+ );
+ } catch (error) {
+ console.error(`Error retrieving chunks: ${error}`);
+ return [];
+ }
+ }
+
+ getSummaries(): string {
+ return this.documents.map((doc, index) => `${index + 1}) ${doc.summary}`).join('\n') + '\n';
+ }
+}