aboutsummaryrefslogtreecommitdiff
path: root/src/client/views/nodes/ChatBox/vectorstore
diff options
context:
space:
mode:
Diffstat (limited to 'src/client/views/nodes/ChatBox/vectorstore')
-rw-r--r--src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts47
1 files changed, 23 insertions, 24 deletions
diff --git a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts
index 25aec751f..8e7be6eec 100644
--- a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts
@@ -4,7 +4,7 @@ import { EmbedResponse } from 'cohere-ai/api';
import dotenv from 'dotenv';
import axios from 'axios';
-import { Chunk, AI_Document, CHUNK_TYPE } from '../types';
+import { RAGChunk, AI_Document, CHUNK_TYPE } from '../types';
import { Doc } from '../../../../../fields/Doc';
import { DocData } from '../../../../../fields/DocSymbols';
import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types';
@@ -44,7 +44,7 @@ export class Vectorstore {
if (!indexList.indexes?.some(index => index.name === this.indexName)) {
await this.pinecone.createIndex({
name: this.indexName,
- dimension: 768,
+ dimension: 1024,
metric: 'cosine',
spec: {
serverless: {
@@ -98,7 +98,7 @@ export class Vectorstore {
image_chunks: [],
};
- document_json.chunks.forEach((chunk: Chunk) => {
+ document_json.chunks.forEach((chunk: RAGChunk) => {
let chunk_to_add: { chunk_id: string; start_page: number; end_page: number }[] | { chunk_id: string; location: string; page: number }[];
switch (chunk.metadata.type) {
case CHUNK_TYPE.TEXT:
@@ -125,7 +125,7 @@ export class Vectorstore {
private async indexDocument(document: any) {
console.log('Uploading vectors to content namespace...');
- const pineconeRecords: PineconeRecord<RecordMetadata>[] = (document.chunks as Chunk[]).map(
+ const pineconeRecords: PineconeRecord<RecordMetadata>[] = (document.chunks as RAGChunk[]).map(
chunk =>
({
id: chunk.id,
@@ -136,29 +136,28 @@ export class Vectorstore {
await this.index.upsert(pineconeRecords);
}
- async retrieve(query: string, topK: number = 10): Promise<Chunk[]> {
+ async retrieve(query: string, topK: number = 10): Promise<RAGChunk[]> {
console.log(`Retrieving chunks for query: ${query}`);
try {
- const url = 'https://api.jina.ai/v1/embeddings';
- const headers = {
- 'Content-Type': 'application/json',
- Authorization: `Bearer ${process.env.JINA_API_KEY}`,
- };
- const data = {
- model: 'jina-clip-v1',
- normalized: true,
- embedding_type: 'float',
- input: [{ text: query }],
- };
-
- const response = await axios.post(url, data, { headers });
- const embeddings = response.data?.data?.[0]?.embedding;
-
- if (!embeddings || !Array.isArray(embeddings)) {
- throw new Error('Invalid embedding response format from Jina API');
+ const queryEmbeddingResponse: EmbedResponse = await this.cohere.embed({
+ texts: [query],
+ model: 'embed-english-v3.0',
+ inputType: 'search_query',
+ });
+
+ let queryEmbedding: number[];
+
+ if (Array.isArray(queryEmbeddingResponse.embeddings)) {
+ queryEmbedding = queryEmbeddingResponse.embeddings[0];
+ } else if (queryEmbeddingResponse.embeddings && 'embeddings' in queryEmbeddingResponse.embeddings) {
+ queryEmbedding = (queryEmbeddingResponse.embeddings as { embeddings: number[][] }).embeddings[0];
+ } else {
+ throw new Error('Invalid embedding response format');
}
- const queryEmbedding = embeddings;
+ if (!Array.isArray(queryEmbedding)) {
+ throw new Error('Query embedding is not an array');
+ }
const queryResponse: QueryResponse<RecordMetadata> = await this.index.query({
vector: queryEmbedding,
@@ -176,7 +175,7 @@ export class Vectorstore {
id: match.id,
values: match.values as number[],
metadata: match.metadata as { text: string; type: string; original_document: string; file_path: string; doc_id: string; location: string; start_page: number; end_page: number },
- }) as Chunk
+ }) as RAGChunk
);
} catch (error) {
console.error(`Error retrieving chunks: ${error}`);