aboutsummaryrefslogtreecommitdiff
path: root/src/client/views/nodes/chatbot
diff options
context:
space:
mode:
authorbobzel <zzzman@gmail.com>2025-03-06 16:17:47 -0500
committerbobzel <zzzman@gmail.com>2025-03-06 16:17:47 -0500
commit5ad858090f3006631062877d90120e3cc505fada (patch)
tree9f87a8e1e7098a1025f6f4aac332dbc854db5be3 /src/client/views/nodes/chatbot
parent9c2a7c14fd9d0e44609aab30c6323583162009db (diff)
parentadaa107aac8558fa6f46e6ba1263c650c212d506 (diff)
Merge branch 'master' into aarav_edit
Diffstat (limited to 'src/client/views/nodes/chatbot')
-rw-r--r--src/client/views/nodes/chatbot/agentsystem/Agent.ts6
-rw-r--r--src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss11
-rw-r--r--src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx3
-rw-r--r--src/client/views/nodes/chatbot/tools/CreateAnyDocTool.ts30
-rw-r--r--src/client/views/nodes/chatbot/tools/CreateCSVTool.ts4
-rw-r--r--src/client/views/nodes/chatbot/tools/CreateDocumentTool.ts38
-rw-r--r--src/client/views/nodes/chatbot/tools/ImageCreationTool.ts43
-rw-r--r--src/client/views/nodes/chatbot/tools/RAGTool.ts2
-rw-r--r--src/client/views/nodes/chatbot/tools/SearchTool.ts8
-rw-r--r--src/client/views/nodes/chatbot/types/types.ts6
-rw-r--r--src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts66
11 files changed, 97 insertions, 120 deletions
diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
index b2b0c9aea..e93fb87db 100644
--- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts
+++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
@@ -22,6 +22,8 @@ import { ChatCompletionMessageParam } from 'openai/resources';
import { Doc } from '../../../../../fields/Doc';
import { parsedDoc } from '../chatboxcomponents/ChatBox';
import { WebsiteInfoScraperTool } from '../tools/WebsiteInfoScraperTool';
+import { Upload } from '../../../../../server/SharedMediaTypes';
+import { RAGTool } from '../tools/RAGTool';
//import { CreateTextDocTool } from '../tools/CreateTextDocumentTool';
dotenv.config();
@@ -61,7 +63,7 @@ export class Agent {
history: () => string,
csvData: () => { filename: string; id: string; text: string }[],
addLinkedUrlDoc: (url: string, id: string) => void,
- createImage: (result: any, options: DocumentOptions) => void,
+ createImage: (result: Upload.FileInformation & Upload.InspectionResults, options: DocumentOptions) => void,
addLinkedDoc: (doc: parsedDoc) => Doc | undefined,
// eslint-disable-next-line @typescript-eslint/no-unused-vars
createCSVInDash: (url: string, title: string, id: string, data: string) => void
@@ -76,7 +78,7 @@ export class Agent {
// Define available tools for the assistant
this.tools = {
calculate: new CalculateTool(),
- // rag: new RAGTool(this.vectorstore),
+ rag: new RAGTool(this.vectorstore),
dataAnalysis: new DataAnalysisTool(csvData),
websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc),
searchTool: new SearchTool(addLinkedUrlDoc),
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss
index 9cf760a12..3d27fa887 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss
@@ -1,3 +1,4 @@
+@use 'sass:color';
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&display=swap');
$primary-color: #3f51b5;
@@ -68,7 +69,7 @@ $transition: all 0.2s ease-in-out;
&:focus {
outline: none;
border-color: $primary-color;
- box-shadow: 0 0 0 2px rgba($primary-color, 0.2);
+ box-shadow: 0 0 0 2px color.adjust($primary-color, $alpha: -0.8);
}
&:disabled {
@@ -92,11 +93,11 @@ $transition: all 0.2s ease-in-out;
transition: $transition;
&:hover {
- background-color: darken($primary-color, 10%);
+ background-color: color.adjust($primary-color, $lightness: -10%);
}
&:disabled {
- background-color: lighten($primary-color, 20%);
+ background-color: color.adjust($primary-color, $lightness: 20%);
cursor: not-allowed;
}
@@ -178,7 +179,7 @@ $transition: all 0.2s ease-in-out;
margin-bottom: 16px;
&:hover {
- background-color: rgba($primary-color, 0.1);
+ background-color: color.adjust($primary-color, $alpha: -0.9);
}
}
@@ -220,7 +221,7 @@ $transition: all 0.2s ease-in-out;
transition: $transition;
&:hover {
- background-color: rgba($primary-color, 0.2);
+ background-color: color.adjust($primary-color, $alpha: -0.8);
color: #fff;
}
}
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index 16da360fc..6e9307d37 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -42,6 +42,7 @@ import './ChatBox.scss';
import MessageComponentBox from './MessageComponent';
import { ProgressBar } from './ProgressBar';
import { OpenWhere } from '../../OpenWhere';
+import { Upload } from '../../../../../server/SharedMediaTypes';
dotenv.config();
@@ -412,7 +413,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
});
@action
- createImageInDash = async (result: any, options: DocumentOptions) => {
+ createImageInDash = async (result: Upload.FileInformation & Upload.InspectionResults, options: DocumentOptions) => {
const newImgSrc =
result.accessPaths.agnostic.client.indexOf('dashblobstore') === -1 //
? ClientUtils.prepend(result.accessPaths.agnostic.client)
diff --git a/src/client/views/nodes/chatbot/tools/CreateAnyDocTool.ts b/src/client/views/nodes/chatbot/tools/CreateAnyDocTool.ts
index ef4bbbc47..754d230c8 100644
--- a/src/client/views/nodes/chatbot/tools/CreateAnyDocTool.ts
+++ b/src/client/views/nodes/chatbot/tools/CreateAnyDocTool.ts
@@ -13,63 +13,63 @@ const standardOptions = ['title', 'backgroundColor'];
* Description of document options and data field for each type.
*/
const documentTypesInfo: { [key in supportedDocTypes]: { options: string[]; dataDescription: string } } = {
- [supportedDocumentTypes.flashcard]: {
+ [supportedDocTypes.flashcard]: {
options: [...standardOptions, 'fontColor', 'text_align'],
dataDescription: 'an array of two strings. the first string contains a question, and the second string contains an answer',
},
- [supportedDocumentTypes.text]: {
+ [supportedDocTypes.text]: {
options: [...standardOptions, 'fontColor', 'text_align'],
dataDescription: 'The text content of the document.',
},
- [supportedDocumentTypes.html]: {
+ [supportedDocTypes.html]: {
options: [],
dataDescription: 'The HTML-formatted text content of the document.',
},
- [supportedDocumentTypes.equation]: {
+ [supportedDocTypes.equation]: {
options: [...standardOptions, 'fontColor'],
dataDescription: 'The equation content as a string.',
},
- [supportedDocumentTypes.functionplot]: {
+ [supportedDocTypes.functionplot]: {
options: [...standardOptions, 'function_definition'],
dataDescription: 'The function definition(s) for plotting. Provide as a string or array of function definitions.',
},
- [supportedDocumentTypes.dataviz]: {
+ [supportedDocTypes.dataviz]: {
options: [...standardOptions, 'chartType'],
dataDescription: 'A string of comma-separated values representing the CSV data.',
},
- [supportedDocumentTypes.notetaking]: {
+ [supportedDocTypes.notetaking]: {
options: standardOptions,
dataDescription: 'The initial content or structure for note-taking.',
},
- [supportedDocumentTypes.rtf]: {
+ [supportedDocTypes.rtf]: {
options: standardOptions,
dataDescription: 'The rich text content in RTF format.',
},
- [supportedDocumentTypes.image]: {
+ [supportedDocTypes.image]: {
options: standardOptions,
dataDescription: 'The image content as an image file URL.',
},
- [supportedDocumentTypes.pdf]: {
+ [supportedDocTypes.pdf]: {
options: standardOptions,
dataDescription: 'the pdf content as a PDF file url.',
},
- [supportedDocumentTypes.audio]: {
+ [supportedDocTypes.audio]: {
options: standardOptions,
dataDescription: 'The audio content as a file url.',
},
- [supportedDocumentTypes.video]: {
+ [supportedDocTypes.video]: {
options: standardOptions,
dataDescription: 'The video content as a file url.',
},
- [supportedDocumentTypes.message]: {
+ [supportedDocTypes.message]: {
options: standardOptions,
dataDescription: 'The message content of the document.',
},
- [supportedDocumentTypes.diagram]: {
+ [supportedDocTypes.diagram]: {
options: ['title', 'backgroundColor'],
dataDescription: 'diagram content as a text string in Mermaid format.',
},
- [supportedDocumentTypes.script]: {
+ [supportedDocTypes.script]: {
options: ['title', 'backgroundColor'],
dataDescription: 'The compilable JavaScript code. Use this for creating scripts.',
},
diff --git a/src/client/views/nodes/chatbot/tools/CreateCSVTool.ts b/src/client/views/nodes/chatbot/tools/CreateCSVTool.ts
index e8ef3fbfe..290c48d6c 100644
--- a/src/client/views/nodes/chatbot/tools/CreateCSVTool.ts
+++ b/src/client/views/nodes/chatbot/tools/CreateCSVTool.ts
@@ -38,10 +38,10 @@ export class CreateCSVTool extends BaseTool<CreateCSVToolParamsType> {
async execute(args: ParametersType<CreateCSVToolParamsType>): Promise<Observation[]> {
try {
console.log('Creating CSV file:', args.filename, ' with data:', args.csvData);
- const { fileUrl, id } = await Networking.PostToServer('/createCSV', {
+ const { fileUrl, id } = (await Networking.PostToServer('/createCSV', {
filename: args.filename,
data: args.csvData,
- });
+ })) as { fileUrl: string; id: string };
this._handleCSVResult(fileUrl, args.filename, id, args.csvData);
diff --git a/src/client/views/nodes/chatbot/tools/CreateDocumentTool.ts b/src/client/views/nodes/chatbot/tools/CreateDocumentTool.ts
index 6dc36b0d1..284879a4a 100644
--- a/src/client/views/nodes/chatbot/tools/CreateDocumentTool.ts
+++ b/src/client/views/nodes/chatbot/tools/CreateDocumentTool.ts
@@ -263,79 +263,79 @@ const standardOptions = ['title', 'backgroundColor'];
* Description of document options and data field for each type.
*/
const documentTypesInfo: { [key in supportedDocTypes]: { options: string[]; dataDescription: string } } = {
- [supportedDocTypes.comparison]: {
+ comparison: {
options: [...standardOptions, 'fontColor', 'text_align'],
dataDescription: 'an array of two documents of any kind that can be compared.',
},
- [supportedDocTypes.deck]: {
+ deck: {
options: [...standardOptions, 'fontColor', 'text_align'],
dataDescription: 'an array of flashcard docs',
},
- [supportedDocTypes.flashcard]: {
+ flashcard: {
options: [...standardOptions, 'fontColor', 'text_align'],
dataDescription: 'an array of two strings. the first string contains a question, and the second string contains an answer',
},
- [supportedDocTypes.text]: {
+ text: {
options: [...standardOptions, 'fontColor', 'text_align'],
dataDescription: 'The text content of the document.',
},
- [supportedDocTypes.web]: {
+ web: {
options: [],
dataDescription: 'A URL to a webpage. Example: https://en.wikipedia.org/wiki/Brown_University',
},
- [supportedDocTypes.html]: {
+ html: {
options: [],
dataDescription: 'The HTML-formatted text content of the document.',
},
- [supportedDocTypes.equation]: {
+ equation: {
options: [...standardOptions, 'fontColor'],
dataDescription: 'The equation content represented as a MathML string.',
},
- [supportedDocTypes.functionplot]: {
+ functionplot: {
options: [...standardOptions, 'function_definition'],
dataDescription: 'The function definition(s) for plotting. Provide as a string or array of function definitions.',
},
- [supportedDocTypes.dataviz]: {
+ dataviz: {
options: [...standardOptions, 'chartType'],
dataDescription: 'A string of comma-separated values representing the CSV data.',
},
- [supportedDocTypes.notetaking]: {
+ notetaking: {
options: standardOptions,
dataDescription: 'An array of related text documents with small amounts of text.',
},
- [supportedDocTypes.rtf]: {
+ rtf: {
options: standardOptions,
dataDescription: 'The rich text content in RTF format.',
},
- [supportedDocTypes.image]: {
+ image: {
options: standardOptions,
dataDescription: `A url string that must end with '.png', '.jpeg', '.gif', or '.jpg'`,
},
- [supportedDocTypes.pdf]: {
+ pdf: {
options: standardOptions,
dataDescription: 'the pdf content as a PDF file url.',
},
- [supportedDocTypes.audio]: {
+ audio: {
options: standardOptions,
dataDescription: 'The audio content as a file url.',
},
- [supportedDocTypes.video]: {
+ video: {
options: standardOptions,
dataDescription: 'The video content as a file url.',
},
- [supportedDocTypes.message]: {
+ message: {
options: standardOptions,
dataDescription: 'The message content of the document.',
},
- [supportedDocTypes.diagram]: {
+ diagram: {
options: standardOptions,
dataDescription: 'diagram content as a text string in Mermaid format.',
},
- [supportedDocTypes.script]: {
+ script: {
options: standardOptions,
dataDescription: 'The compilable JavaScript code. Use this for creating scripts.',
},
- [supportedDocTypes.collection]: {
+ collection: {
options: [...standardOptions, 'type_collection'],
dataDescription: 'A collection of Docs represented as an array.',
},
diff --git a/src/client/views/nodes/chatbot/tools/ImageCreationTool.ts b/src/client/views/nodes/chatbot/tools/ImageCreationTool.ts
index 177552c5c..37907fd4f 100644
--- a/src/client/views/nodes/chatbot/tools/ImageCreationTool.ts
+++ b/src/client/views/nodes/chatbot/tools/ImageCreationTool.ts
@@ -1,10 +1,11 @@
-import { v4 as uuidv4 } from 'uuid';
import { RTFCast } from '../../../../../fields/Types';
import { DocumentOptions } from '../../../../documents/Documents';
import { Networking } from '../../../../Network';
import { ParametersType, ToolInfo } from '../types/tool_types';
import { Observation } from '../types/types';
import { BaseTool } from './BaseTool';
+import { Upload } from '../../../../../server/SharedMediaTypes';
+import { List } from '../../../../../fields/List';
const imageCreationToolParams = [
{
@@ -25,8 +26,8 @@ const imageCreationToolInfo: ToolInfo<ImageCreationToolParamsType> = {
};
export class ImageCreationTool extends BaseTool<ImageCreationToolParamsType> {
- private _createImage: (result: any, options: DocumentOptions) => void;
- constructor(createImage: (result: any, options: DocumentOptions) => void) {
+ private _createImage: (result: Upload.FileInformation & Upload.InspectionResults, options: DocumentOptions) => void;
+ constructor(createImage: (result: Upload.FileInformation & Upload.InspectionResults, options: DocumentOptions) => void) {
super(imageCreationToolInfo);
this._createImage = createImage;
}
@@ -37,28 +38,24 @@ export class ImageCreationTool extends BaseTool<ImageCreationToolParamsType> {
console.log(`Generating image for prompt: ${image_prompt}`);
// Create an array of promises, each one handling a search for a query
try {
- const { result, url } = await Networking.PostToServer('/generateImage', {
+ const { result, url } = (await Networking.PostToServer('/generateImage', {
image_prompt,
- });
+ })) as { result: Upload.FileInformation & Upload.InspectionResults; url: string };
console.log('Image generation result:', result);
- this._createImage(result, { text: RTFCast(image_prompt) });
- if (url) {
- const id = uuidv4();
-
- return [
- {
- type: 'image_url',
- image_url: { url },
- },
- ];
- } else {
- return [
- {
- type: 'text',
- text: `An error occurred while generating image.`,
- },
- ];
- }
+ this._createImage(result, { text: RTFCast(image_prompt), ai: 'dall-e-3', tags: new List<string>(['@ai']) });
+ return url
+ ? [
+ {
+ type: 'image_url',
+ image_url: { url },
+ },
+ ]
+ : [
+ {
+ type: 'text',
+ text: `An error occurred while generating image.`,
+ },
+ ];
} catch (error) {
console.log(error);
return [
diff --git a/src/client/views/nodes/chatbot/tools/RAGTool.ts b/src/client/views/nodes/chatbot/tools/RAGTool.ts
index 2db61c768..ef374ed22 100644
--- a/src/client/views/nodes/chatbot/tools/RAGTool.ts
+++ b/src/client/views/nodes/chatbot/tools/RAGTool.ts
@@ -75,7 +75,7 @@ export class RAGTool extends BaseTool<RAGToolParamsType> {
async getFormattedChunks(relevantChunks: RAGChunk[]): Promise<Observation[]> {
try {
- const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks });
+ const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks }) as { formattedChunks: Observation[]}
if (!formattedChunks) {
throw new Error('Failed to format chunks');
diff --git a/src/client/views/nodes/chatbot/tools/SearchTool.ts b/src/client/views/nodes/chatbot/tools/SearchTool.ts
index 5fc6ab768..6a11407a5 100644
--- a/src/client/views/nodes/chatbot/tools/SearchTool.ts
+++ b/src/client/views/nodes/chatbot/tools/SearchTool.ts
@@ -41,15 +41,15 @@ export class SearchTool extends BaseTool<SearchToolParamsType> {
// Create an array of promises, each one handling a search for a query
const searchPromises = queries.map(async query => {
try {
- const { results } = await Networking.PostToServer('/getWebSearchResults', {
+ const { results } = (await Networking.PostToServer('/getWebSearchResults', {
query,
max_results: this._max_results,
- });
+ })) as { results: { url: string; snippet: string }[] };
const data = results.map((result: { url: string; snippet: string }) => {
const id = uuidv4();
this._addLinkedUrlDoc(result.url, id);
return {
- type: 'text',
+ type: 'text' as const,
text: `<chunk chunk_id="${id}" chunk_type="url"><url>${result.url}</url><overview>${result.snippet}</overview></chunk>`,
};
});
@@ -58,7 +58,7 @@ export class SearchTool extends BaseTool<SearchToolParamsType> {
console.log(error);
return [
{
- type: 'text',
+ type: 'text' as const,
text: `An error occurred while performing the web search for query: ${query}`,
},
];
diff --git a/src/client/views/nodes/chatbot/types/types.ts b/src/client/views/nodes/chatbot/types/types.ts
index 995ac531d..882e74ebb 100644
--- a/src/client/views/nodes/chatbot/types/types.ts
+++ b/src/client/views/nodes/chatbot/types/types.ts
@@ -1,6 +1,3 @@
-import { indexes } from 'd3';
-import { AnyLayer } from 'react-map-gl';
-
export enum ASSISTANT_ROLE {
USER = 'user',
ASSISTANT = 'assistant',
@@ -122,9 +119,8 @@ export interface AI_Document {
type: string;
}
+export type Observation = { type: 'text'; text: string } | { type: 'image_url'; image_url: { url: string } };
export interface AgentMessage {
role: 'system' | 'user' | 'assistant';
content: string | Observation[];
}
-
-export type Observation = { type: 'text'; text: string } | { type: 'image_url'; image_url: { url: string } };
diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
index ef24e59bc..afd34f28d 100644
--- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
@@ -1,13 +1,11 @@
/**
* @file Vectorstore.ts
- * @description This file defines the Vectorstore class, which integrates with Pinecone for vector-based document indexing and Cohere for text embeddings.
+ * @description This file defines the Vectorstore class, which integrates with Pinecone for vector-based document indexing and OpenAI text-embedding-3-large for text embeddings.
* It manages AI document handling, including adding documents, processing media files, combining document chunks, indexing documents,
* and retrieving relevant sections based on user queries.
*/
import { Index, IndexList, Pinecone, PineconeRecord, QueryResponse, RecordMetadata } from '@pinecone-database/pinecone';
-import { CohereClient } from 'cohere-ai';
-import { EmbedResponse } from 'cohere-ai/api';
import dotenv from 'dotenv';
import path from 'path';
import { v4 as uuidv4 } from 'uuid';
@@ -15,17 +13,20 @@ import { Doc } from '../../../../../fields/Doc';
import { AudioCast, CsvCast, PDFCast, StrCast, VideoCast } from '../../../../../fields/Types';
import { Networking } from '../../../../Network';
import { AI_Document, CHUNK_TYPE, RAGChunk } from '../types/types';
+import OpenAI from 'openai';
+import { Embedding } from 'openai/resources';
+import { PineconeEnvironmentVarsNotSupportedError } from '@pinecone-database/pinecone/dist/errors';
dotenv.config();
/**
* The Vectorstore class integrates with Pinecone for vector-based document indexing and retrieval,
- * and Cohere for text embedding. It handles AI document management, uploads, and query-based retrieval.
+ * and OpenAI text-embedding-3-large for text embedding. It handles AI document management, uploads, and query-based retrieval.
*/
export class Vectorstore {
private pinecone: Pinecone; // Pinecone client for managing the vector index.
private index!: Index; // The specific Pinecone index used for document chunks.
- private cohere: CohereClient; // Cohere client for generating embeddings.
+ private openai: OpenAI; // OpenAI client for generating embeddings.
private indexName: string = 'pdf-chatbot'; // Default name for the index.
private _id: string; // Unique ID for the Vectorstore instance.
private _doc_ids: () => string[]; // List of document IDs handled by this instance.
@@ -33,20 +34,20 @@ export class Vectorstore {
documents: AI_Document[] = []; // Store the documents indexed in the vectorstore.
/**
- * Initializes the Pinecone and Cohere clients, sets up the document ID list,
+ * Initializes the Pinecone and OpenAI clients, sets up the document ID list,
* and initializes the Pinecone index.
* @param id The unique identifier for the vectorstore instance.
* @param doc_ids A function that returns a list of document IDs.
*/
constructor(id: string, doc_ids: () => string[]) {
- const pineconeApiKey = '51738e9a-bea2-4c11-b6bf-48a825e774dc';
+ const pineconeApiKey = process.env.PINECONE_API_KEY;
if (!pineconeApiKey) {
throw new Error('PINECONE_API_KEY is not defined.');
}
- // Initialize Pinecone and Cohere clients with API keys from the environment.
+ // Initialize Pinecone and OpenAI clients with API keys from the environment.
this.pinecone = new Pinecone({ apiKey: pineconeApiKey });
- // this.cohere = new CohereClient({ token: process.env.COHERE_API_KEY });
+ this.openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, dangerouslyAllowBrowser: true });
this._id = id;
this._doc_ids = doc_ids;
this.initializeIndex();
@@ -63,7 +64,7 @@ export class Vectorstore {
if (!indexList.indexes?.some(index => index.name === this.indexName)) {
await this.pinecone.createIndex({
name: this.indexName,
- dimension: 1024,
+ dimension: 3072,
metric: 'cosine',
spec: {
serverless: {
@@ -119,23 +120,12 @@ export class Vectorstore {
const texts = segmentedTranscript.map((chunk: any) => chunk.text);
try {
- const embeddingsResponse = await this.cohere.v2.embed({
- model: 'embed-english-v3.0',
- inputType: 'classification',
- embeddingTypes: ['float'], // Specify that embeddings should be floats
- texts, // Pass the array of chunk texts
+ const embeddingsResponse = await this.openai.embeddings.create({
+ model: 'text-embedding-3-large',
+ input: texts,
+ encoding_format: 'float',
});
- if (!embeddingsResponse.embeddings.float || embeddingsResponse.embeddings.float.length !== texts.length) {
- throw new Error('Mismatch between embeddings and the number of chunks');
- }
-
- // Assign embeddings to each chunk
- segmentedTranscript.forEach((chunk: any, index: number) => {
- if (!embeddingsResponse.embeddings || !embeddingsResponse.embeddings.float) {
- throw new Error('Invalid embeddings response');
- }
- });
doc.original_segments = JSON.stringify(response.full);
doc.ai_type = local_file_path.endsWith('.mp3') ? 'audio' : 'video';
const doc_id = uuidv4();
@@ -149,7 +139,7 @@ export class Vectorstore {
summary: '',
chunks: segmentedTranscript.map((chunk: any, index: number) => ({
id: uuidv4(),
- values: (embeddingsResponse.embeddings.float as number[][])[index], // Assign embedding
+ values: (embeddingsResponse.data as Embedding[])[index].embedding, // Assign embedding
metadata: {
indexes: chunk.indexes,
original_document: local_file_path,
@@ -291,7 +281,7 @@ export class Vectorstore {
/**
* Retrieves the most relevant document chunks for a given query.
- * Uses Cohere for embedding the query and Pinecone for vector similarity matching.
+ * Uses OpenAI for embedding the query and Pinecone for vector similarity matching.
* @param query The search query string.
* @param topK The number of top results to return (default is 10).
* @returns A list of document chunks that match the query.
@@ -299,27 +289,17 @@ export class Vectorstore {
async retrieve(query: string, topK: number = 10): Promise<RAGChunk[]> {
console.log(`Retrieving chunks for query: ${query}`);
try {
- // Generate an embedding for the query using Cohere.
- const queryEmbeddingResponse: EmbedResponse = await this.cohere.embed({
- texts: [query],
- model: 'embed-english-v3.0',
- inputType: 'search_query',
+ // Generate an embedding for the query using OpenAI.
+ const queryEmbeddingResponse = await this.openai.embeddings.create({
+ model: 'text-embedding-3-large',
+ input: query,
+ encoding_format: 'float',
});
- let queryEmbedding: number[];
+ let queryEmbedding = queryEmbeddingResponse.data[0].embedding;
// Extract the embedding from the response.
- if (Array.isArray(queryEmbeddingResponse.embeddings)) {
- queryEmbedding = queryEmbeddingResponse.embeddings[0];
- } else if (queryEmbeddingResponse.embeddings && 'embeddings' in queryEmbeddingResponse.embeddings) {
- queryEmbedding = (queryEmbeddingResponse.embeddings as { embeddings: number[][] }).embeddings[0];
- } else {
- throw new Error('Invalid embedding response format');
- }
- if (!Array.isArray(queryEmbedding)) {
- throw new Error('Query embedding is not an array');
- }
console.log(this._doc_ids());
// Query the Pinecone index using the embedding and filter by document IDs.
const queryResponse: QueryResponse = await this.index.query({