diff options
| author | A.J. Shulman <Shulman.aj@gmail.com> | 2024-08-15 08:47:46 -0400 |
|---|---|---|
| committer | A.J. Shulman <Shulman.aj@gmail.com> | 2024-08-15 08:47:46 -0400 |
| commit | 0c8001c61a55540cdeeb6ae249fdd2835580121c (patch) | |
| tree | 924aae0ad45fd85c98986f5161ce6ccf2482847d /src/client/views/nodes/ChatBox/vectorstore | |
| parent | cd4b13bacd6639d2a731a05dfca700b201b2073c (diff) | |
currently works
Diffstat (limited to 'src/client/views/nodes/ChatBox/vectorstore')
| -rw-r--r-- | src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts | 47 |
1 files changed, 23 insertions, 24 deletions
diff --git a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts index 25aec751f..8e7be6eec 100644 --- a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts +++ b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts @@ -4,7 +4,7 @@ import { EmbedResponse } from 'cohere-ai/api'; import dotenv from 'dotenv'; import axios from 'axios'; -import { Chunk, AI_Document, CHUNK_TYPE } from '../types'; +import { RAGChunk, AI_Document, CHUNK_TYPE } from '../types'; import { Doc } from '../../../../../fields/Doc'; import { DocData } from '../../../../../fields/DocSymbols'; import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types'; @@ -44,7 +44,7 @@ export class Vectorstore { if (!indexList.indexes?.some(index => index.name === this.indexName)) { await this.pinecone.createIndex({ name: this.indexName, - dimension: 768, + dimension: 1024, metric: 'cosine', spec: { serverless: { @@ -98,7 +98,7 @@ export class Vectorstore { image_chunks: [], }; - document_json.chunks.forEach((chunk: Chunk) => { + document_json.chunks.forEach((chunk: RAGChunk) => { let chunk_to_add: { chunk_id: string; start_page: number; end_page: number }[] | { chunk_id: string; location: string; page: number }[]; switch (chunk.metadata.type) { case CHUNK_TYPE.TEXT: @@ -125,7 +125,7 @@ export class Vectorstore { private async indexDocument(document: any) { console.log('Uploading vectors to content namespace...'); - const pineconeRecords: PineconeRecord<RecordMetadata>[] = (document.chunks as Chunk[]).map( + const pineconeRecords: PineconeRecord<RecordMetadata>[] = (document.chunks as RAGChunk[]).map( chunk => ({ id: chunk.id, @@ -136,29 +136,28 @@ export class Vectorstore { await this.index.upsert(pineconeRecords); } - async retrieve(query: string, topK: number = 10): Promise<Chunk[]> { + async retrieve(query: string, topK: number = 10): Promise<RAGChunk[]> { console.log(`Retrieving chunks for query: ${query}`); try { - const url = 'https://api.jina.ai/v1/embeddings'; - const headers = { - 'Content-Type': 'application/json', - Authorization: `Bearer ${process.env.JINA_API_KEY}`, - }; - const data = { - model: 'jina-clip-v1', - normalized: true, - embedding_type: 'float', - input: [{ text: query }], - }; - - const response = await axios.post(url, data, { headers }); - const embeddings = response.data?.data?.[0]?.embedding; - - if (!embeddings || !Array.isArray(embeddings)) { - throw new Error('Invalid embedding response format from Jina API'); + const queryEmbeddingResponse: EmbedResponse = await this.cohere.embed({ + texts: [query], + model: 'embed-english-v3.0', + inputType: 'search_query', + }); + + let queryEmbedding: number[]; + + if (Array.isArray(queryEmbeddingResponse.embeddings)) { + queryEmbedding = queryEmbeddingResponse.embeddings[0]; + } else if (queryEmbeddingResponse.embeddings && 'embeddings' in queryEmbeddingResponse.embeddings) { + queryEmbedding = (queryEmbeddingResponse.embeddings as { embeddings: number[][] }).embeddings[0]; + } else { + throw new Error('Invalid embedding response format'); } - const queryEmbedding = embeddings; + if (!Array.isArray(queryEmbedding)) { + throw new Error('Query embedding is not an array'); + } const queryResponse: QueryResponse<RecordMetadata> = await this.index.query({ vector: queryEmbedding, @@ -176,7 +175,7 @@ export class Vectorstore { id: match.id, values: match.values as number[], metadata: match.metadata as { text: string; type: string; original_document: string; file_path: string; doc_id: string; location: string; start_page: number; end_page: number }, - }) as Chunk + }) as RAGChunk ); } catch (error) { console.error(`Error retrieving chunks: ${error}`); |
