2 files changed, 25 insertions, 44 deletions
diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
index b2b0c9aea..19fd6ae36 100644
--- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts
+++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
@@ -22,6 +22,7 @@ import { ChatCompletionMessageParam } from 'openai/resources';
 import { Doc } from '../../../../../fields/Doc';
 import { parsedDoc } from '../chatboxcomponents/ChatBox';
 import { WebsiteInfoScraperTool } from '../tools/WebsiteInfoScraperTool';
+import { RAGTool } from '../tools/RAGTool';
 //import { CreateTextDocTool } from '../tools/CreateTextDocumentTool';
 
 dotenv.config();
@@ -76,7 +77,7 @@ export class Agent {
         // Define available tools for the assistant
         this.tools = {
             calculate: new CalculateTool(),
-            // rag: new RAGTool(this.vectorstore),
+            rag: new RAGTool(this.vectorstore),
             dataAnalysis: new DataAnalysisTool(csvData),
             websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc),
             searchTool: new SearchTool(addLinkedUrlDoc),
diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
index ef24e59bc..afd34f28d 100644
--- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
@@ -1,13 +1,11 @@
 /**
  * @file Vectorstore.ts
- * @description This file defines the Vectorstore class, which integrates with Pinecone for vector-based document indexing and Cohere for text embeddings.
+ * @description This file defines the Vectorstore class, which integrates with Pinecone for vector-based document indexing and OpenAI text-embedding-3-large for text embeddings.
  * It manages AI document handling, including adding documents, processing media files, combining document chunks, indexing documents,
  * and retrieving relevant sections based on user queries.
  */
 
 import { Index, IndexList, Pinecone, PineconeRecord, QueryResponse, RecordMetadata } from '@pinecone-database/pinecone';
-import { CohereClient } from 'cohere-ai';
-import { EmbedResponse } from 'cohere-ai/api';
 import dotenv from 'dotenv';
 import path from 'path';
 import { v4 as uuidv4 } from 'uuid';
@@ -15,17 +13,20 @@ import { Doc } from '../../../../../fields/Doc';
 import { AudioCast, CsvCast, PDFCast, StrCast, VideoCast } from '../../../../../fields/Types';
 import { Networking } from '../../../../Network';
 import { AI_Document, CHUNK_TYPE, RAGChunk } from '../types/types';
+import OpenAI from 'openai';
+import { Embedding } from 'openai/resources';
+import { PineconeEnvironmentVarsNotSupportedError } from '@pinecone-database/pinecone/dist/errors';
 
 dotenv.config();
 
 /**
  * The Vectorstore class integrates with Pinecone for vector-based document indexing and retrieval,
- * and Cohere for text embedding. It handles AI document management, uploads, and query-based retrieval.
+ * and OpenAI text-embedding-3-large for text embedding. It handles AI document management, uploads, and query-based retrieval.
  */
 export class Vectorstore {
     private pinecone: Pinecone; // Pinecone client for managing the vector index.
     private index!: Index; // The specific Pinecone index used for document chunks.
-    private cohere: CohereClient; // Cohere client for generating embeddings.
+    private openai: OpenAI; // OpenAI client for generating embeddings.
     private indexName: string = 'pdf-chatbot'; // Default name for the index.
     private _id: string; // Unique ID for the Vectorstore instance.
     private _doc_ids: () => string[]; // List of document IDs handled by this instance.
@@ -33,20 +34,20 @@ export class Vectorstore {
     documents: AI_Document[] = []; // Store the documents indexed in the vectorstore.
 
     /**
-     * Initializes the Pinecone and Cohere clients, sets up the document ID list,
+     * Initializes the Pinecone and OpenAI clients, sets up the document ID list,
      * and initializes the Pinecone index.
      * @param id The unique identifier for the vectorstore instance.
      * @param doc_ids A function that returns a list of document IDs.
      */
     constructor(id: string, doc_ids: () => string[]) {
-        const pineconeApiKey = '51738e9a-bea2-4c11-b6bf-48a825e774dc';
+        const pineconeApiKey = process.env.PINECONE_API_KEY;
         if (!pineconeApiKey) {
             throw new Error('PINECONE_API_KEY is not defined.');
         }
 
-        // Initialize Pinecone and Cohere clients with API keys from the environment.
+        // Initialize Pinecone and OpenAI clients with API keys from the environment.
         this.pinecone = new Pinecone({ apiKey: pineconeApiKey });
-        // this.cohere = new CohereClient({ token: process.env.COHERE_API_KEY });
+        this.openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, dangerouslyAllowBrowser: true });
         this._id = id;
         this._doc_ids = doc_ids;
         this.initializeIndex();
@@ -63,7 +64,7 @@ export class Vectorstore {
         if (!indexList.indexes?.some(index => index.name === this.indexName)) {
             await this.pinecone.createIndex({
                 name: this.indexName,
-                dimension: 1024,
+                dimension: 3072,
                 metric: 'cosine',
                 spec: {
                     serverless: {
@@ -119,23 +120,12 @@ export class Vectorstore {
                 const texts = segmentedTranscript.map((chunk: any) => chunk.text);
 
                 try {
-                    const embeddingsResponse = await this.cohere.v2.embed({
-                        model: 'embed-english-v3.0',
-                        inputType: 'classification',
-                        embeddingTypes: ['float'], // Specify that embeddings should be floats
-                        texts, // Pass the array of chunk texts
+                    const embeddingsResponse = await this.openai.embeddings.create({
+                        model: 'text-embedding-3-large',
+                        input: texts,
+                        encoding_format: 'float',
                     });
 
-                    if (!embeddingsResponse.embeddings.float || embeddingsResponse.embeddings.float.length !== texts.length) {
-                        throw new Error('Mismatch between embeddings and the number of chunks');
-                    }
-
-                    // Assign embeddings to each chunk
-                    segmentedTranscript.forEach((chunk: any, index: number) => {
-                        if (!embeddingsResponse.embeddings || !embeddingsResponse.embeddings.float) {
-                            throw new Error('Invalid embeddings response');
-                        }
-                    });
                     doc.original_segments = JSON.stringify(response.full);
                     doc.ai_type = local_file_path.endsWith('.mp3') ? 'audio' : 'video';
                     const doc_id = uuidv4();
@@ -149,7 +139,7 @@ export class Vectorstore {
                         summary: '',
                         chunks: segmentedTranscript.map((chunk: any, index: number) => ({
                             id: uuidv4(),
-                            values: (embeddingsResponse.embeddings.float as number[][])[index], // Assign embedding
+                            values: (embeddingsResponse.data as Embedding[])[index].embedding, // Assign embedding
                             metadata: {
                                 indexes: chunk.indexes,
                                 original_document: local_file_path,
@@ -291,7 +281,7 @@ export class Vectorstore {
 
     /**
      * Retrieves the most relevant document chunks for a given query.
-     * Uses Cohere for embedding the query and Pinecone for vector similarity matching.
+     * Uses OpenAI for embedding the query and Pinecone for vector similarity matching.
      * @param query The search query string.
      * @param topK The number of top results to return (default is 10).
      * @returns A list of document chunks that match the query.
@@ -299,27 +289,17 @@ export class Vectorstore {
     async retrieve(query: string, topK: number = 10): Promise<RAGChunk[]> {
         console.log(`Retrieving chunks for query: ${query}`);
         try {
-            // Generate an embedding for the query using Cohere.
-            const queryEmbeddingResponse: EmbedResponse = await this.cohere.embed({
-                texts: [query],
-                model: 'embed-english-v3.0',
-                inputType: 'search_query',
+            // Generate an embedding for the query using OpenAI.
+            const queryEmbeddingResponse = await this.openai.embeddings.create({
+                model: 'text-embedding-3-large',
+                input: query,
+                encoding_format: 'float',
             });
 
-            let queryEmbedding: number[];
+            let queryEmbedding = queryEmbeddingResponse.data[0].embedding;
 
             // Extract the embedding from the response.
-            if (Array.isArray(queryEmbeddingResponse.embeddings)) {
-                queryEmbedding = queryEmbeddingResponse.embeddings[0];
-            } else if (queryEmbeddingResponse.embeddings && 'embeddings' in queryEmbeddingResponse.embeddings) {
-                queryEmbedding = (queryEmbeddingResponse.embeddings as { embeddings: number[][] }).embeddings[0];
-            } else {
-                throw new Error('Invalid embedding response format');
-            }
 
-            if (!Array.isArray(queryEmbedding)) {
-                throw new Error('Query embedding is not an array');
-            }
             console.log(this._doc_ids());
             // Query the Pinecone index using the embedding and filter by document IDs.
             const queryResponse: QueryResponse = await this.index.query({