aboutsummaryrefslogtreecommitdiff
path: root/src/client/apis/vectorstore
diff options
context:
space:
mode:
authorA.J. Shulman <Shulman.aj@gmail.com>2024-07-09 13:55:03 -0400
committerA.J. Shulman <Shulman.aj@gmail.com>2024-07-09 13:55:03 -0400
commitc789d3d41a68c89e75fdfc12b1b05377ceef32d1 (patch)
tree1079016e962a4f0ece1dd02aff92e07c6c2826ab /src/client/apis/vectorstore
parenta578f43335b0009927df4c341be3aee4f74be6d9 (diff)
starting to improve vectorstore
Diffstat (limited to 'src/client/apis/vectorstore')
-rw-r--r--src/client/apis/vectorstore/VectorstoreUpload.ts130
1 files changed, 0 insertions, 130 deletions
diff --git a/src/client/apis/vectorstore/VectorstoreUpload.ts b/src/client/apis/vectorstore/VectorstoreUpload.ts
deleted file mode 100644
index 6c60ad0c8..000000000
--- a/src/client/apis/vectorstore/VectorstoreUpload.ts
+++ /dev/null
@@ -1,130 +0,0 @@
-import { Pinecone, Index, IndexList, PineconeRecord } from '@pinecone-database/pinecone';
-import { CohereClient } from 'cohere-ai';
-import { EmbedResponse } from 'cohere-ai/api';
-import dotenv from 'dotenv';
-
-dotenv.config();
-
-interface ChunkMetadata {
- text: string;
- type: string;
- original_document: string;
- file_path: string;
- location: string;
- start_page: number;
- end_page: number;
- [key: string]: string | number; // Add this line
-}
-
-interface Chunk {
- id: string;
- values: number[];
- metadata: ChunkMetadata;
-}
-
-interface Document {
- purpose: string;
- file_name: string;
- num_pages: number;
- summary: string;
- chunks: Chunk[];
- type: string;
-}
-
-class Vectorstore {
- private pinecone: Pinecone;
- private index: Index;
- private cohere: CohereClient;
- private indexName: string = 'pdf-chatbot';
- private documents: Document[] = [];
-
- constructor() {
- this.pinecone = new Pinecone({
- apiKey: process.env.PINECONE_API_KEY!,
- });
- this.cohere = new CohereClient({
- token: process.env.COHERE_API_KEY!,
- });
- this.createIndex();
- }
-
- private async createIndex() {
- const indexList: IndexList = await this.pinecone.listIndexes();
-
- if (!indexList.indexes?.some(index => index.name === this.indexName)) {
- await this.pinecone.createIndex({
- name: this.indexName,
- dimension: 1024,
- metric: 'cosine',
- spec: {
- serverless: {
- cloud: 'aws',
- region: 'us-east-1',
- },
- },
- });
- }
-
- this.index = this.pinecone.Index(this.indexName);
- }
-
- async addDocument(document: Document) {
- this.documents.push(document);
- await this.indexDocument(document);
- }
-
- private async indexDocument(document: Document) {
- console.log('Uploading vectors to content namespace...');
- const pineconeRecords: PineconeRecord[] = document.chunks.map(chunk => ({
- id: chunk.id,
- values: chunk.values,
- metadata: chunk.metadata,
- }));
- await this.index.upsert(pineconeRecords);
- }
-
- async retrieve(query: string, topK: number = 10): Promise<Chunk[]> {
- console.log(`Retrieving chunks for query: ${query}`);
- try {
- const queryEmbeddingResponse: EmbedResponse = await this.cohere.embed({
- texts: [query],
- model: 'embed-english-v3.0',
- inputType: 'search_query',
- });
-
- let queryEmbedding: number[];
-
- if (Array.isArray(queryEmbeddingResponse.embeddings)) {
- queryEmbedding = queryEmbeddingResponse.embeddings[0];
- } else if (queryEmbeddingResponse.embeddings && 'embeddings' in queryEmbeddingResponse.embeddings) {
- queryEmbedding = (queryEmbeddingResponse.embeddings as { embeddings: number[][] }).embeddings[0];
- } else {
- throw new Error('Invalid embedding response format');
- }
-
- if (!Array.isArray(queryEmbedding)) {
- throw new Error('Query embedding is not an array');
- }
-
- const queryResponse = await this.index.query({
- vector: queryEmbedding,
- topK,
- includeValues: true,
- includeMetadata: true,
- });
-
- return queryResponse.matches.map(match => ({
- id: match.id,
- values: match.values as number[],
- metadata: match.metadata as ChunkMetadata,
- }));
- } catch (error) {
- console.error(`Error retrieving chunks: ${error}`);
- return [];
- }
- }
-
- getSummaries(): string {
- return this.documents.map((doc, index) => `${index + 1}. ${doc.summary}`).join('\n') + '\n';
- }
-}