import { Pinecone, Index, IndexList, PineconeRecord } from '@pinecone-database/pinecone'; import { CohereClient } from 'cohere-ai'; import { EmbedResponse } from 'cohere-ai/api'; import dotenv from 'dotenv'; dotenv.config(); interface ChunkMetadata { text: string; type: string; original_document: string; file_path: string; location: string; start_page: number; end_page: number; [key: string]: string | number; // Add this line } interface Chunk { id: string; values: number[]; metadata: ChunkMetadata; } interface Document { purpose: string; file_name: string; num_pages: number; summary: string; chunks: Chunk[]; type: string; } class Vectorstore { private pinecone: Pinecone; private index: Index; private cohere: CohereClient; private indexName: string = 'pdf-chatbot'; private documents: Document[] = []; constructor() { this.pinecone = new Pinecone({ apiKey: process.env.PINECONE_API_KEY!, }); this.cohere = new CohereClient({ token: process.env.COHERE_API_KEY!, }); this.createIndex(); } private async createIndex() { const indexList: IndexList = await this.pinecone.listIndexes(); if (!indexList.indexes?.some(index => index.name === this.indexName)) { await this.pinecone.createIndex({ name: this.indexName, dimension: 1024, metric: 'cosine', spec: { serverless: { cloud: 'aws', region: 'us-east-1', }, }, }); } this.index = this.pinecone.Index(this.indexName); } async addDocument(document: Document) { this.documents.push(document); await this.indexDocument(document); } private async indexDocument(document: Document) { console.log('Uploading vectors to content namespace...'); const pineconeRecords: PineconeRecord[] = document.chunks.map(chunk => ({ id: chunk.id, values: chunk.values, metadata: chunk.metadata, })); await this.index.upsert(pineconeRecords); } async retrieve(query: string, topK: number = 10): Promise { console.log(`Retrieving chunks for query: ${query}`); try { const queryEmbeddingResponse: EmbedResponse = await this.cohere.embed({ texts: [query], model: 'embed-english-v3.0', inputType: 'search_query', }); let queryEmbedding: number[]; if (Array.isArray(queryEmbeddingResponse.embeddings)) { queryEmbedding = queryEmbeddingResponse.embeddings[0]; } else if (queryEmbeddingResponse.embeddings && 'embeddings' in queryEmbeddingResponse.embeddings) { queryEmbedding = (queryEmbeddingResponse.embeddings as { embeddings: number[][] }).embeddings[0]; } else { throw new Error('Invalid embedding response format'); } if (!Array.isArray(queryEmbedding)) { throw new Error('Query embedding is not an array'); } const queryResponse = await this.index.query({ vector: queryEmbedding, topK, includeValues: true, includeMetadata: true, }); return queryResponse.matches.map(match => ({ id: match.id, values: match.values as number[], metadata: match.metadata as ChunkMetadata, })); } catch (error) { console.error(`Error retrieving chunks: ${error}`); return []; } } getSummaries(): string { return this.documents.map((doc, index) => `${index + 1}. ${doc.summary}`).join('\n') + '\n'; } }