aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/client/views/nodes/chatbot/tools/DictionaryTool.ts3
-rw-r--r--src/client/views/nodes/chatbot/tools/ReplicateUserTaskTool.ts0
-rw-r--r--src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts30
-rw-r--r--src/server/ApiManagers/AssistantManager.ts33
4 files changed, 49 insertions, 17 deletions
diff --git a/src/client/views/nodes/chatbot/tools/DictionaryTool.ts b/src/client/views/nodes/chatbot/tools/DictionaryTool.ts
index 377101641..3493f38d7 100644
--- a/src/client/views/nodes/chatbot/tools/DictionaryTool.ts
+++ b/src/client/views/nodes/chatbot/tools/DictionaryTool.ts
@@ -20,6 +20,9 @@ const dictionaryToolInfo: ToolInfo<DictionaryToolParamsType> = {
description: 'Fetches the definition of a given word using an open dictionary API.',
};
+/**
+ * DictionaryTool is a tool that fetches the definition of a given word using an open dictionary API.
+ */
export class DictionaryTool extends BaseTool<DictionaryToolParamsType> {
constructor() {
super(dictionaryToolInfo);
diff --git a/src/client/views/nodes/chatbot/tools/ReplicateUserTaskTool.ts b/src/client/views/nodes/chatbot/tools/ReplicateUserTaskTool.ts
deleted file mode 100644
index e69de29bb..000000000
--- a/src/client/views/nodes/chatbot/tools/ReplicateUserTaskTool.ts
+++ /dev/null
diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
index d962b887f..5334f7df0 100644
--- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
@@ -1,8 +1,8 @@
/**
* @file Vectorstore.ts
* @description This file defines the Vectorstore class, which integrates with Pinecone for vector-based document indexing and Cohere for text embeddings.
- * It handles tasks such as AI document management, document chunking, and retrieval of relevant document sections based on user queries.
- * The class supports adding documents to the vectorstore, managing document status, and querying Pinecone for document chunks matching a query.
+ * It manages AI document handling, including adding documents, processing media files, combining document chunks, indexing documents,
+ * and retrieving relevant sections based on user queries.
*/
import { Index, IndexList, Pinecone, PineconeRecord, QueryResponse, RecordMetadata } from '@pinecone-database/pinecone';
@@ -34,7 +34,7 @@ export class Vectorstore {
documents: AI_Document[] = []; // Store the documents indexed in the vectorstore.
/**
- * Constructor initializes the Pinecone and Cohere clients, sets up the document ID list,
+ * Initializes the Pinecone and Cohere clients, sets up the document ID list,
* and initializes the Pinecone index.
* @param id The unique identifier for the vectorstore instance.
* @param doc_ids A function that returns a list of document IDs.
@@ -54,8 +54,8 @@ export class Vectorstore {
}
/**
- * Initializes the Pinecone index by checking if it exists, and creating it if not.
- * The index is set to use the cosine metric for vector similarity.
+ * Initializes the Pinecone index by checking if it exists and creating it if necessary.
+ * Sets the index to use cosine similarity for vector similarity calculations.
*/
private async initializeIndex() {
const indexList: IndexList = await this.pinecone.listIndexes();
@@ -80,10 +80,10 @@ export class Vectorstore {
}
/**
- * Adds an AI document to the vectorstore, handling media files separately.
- * Preserves all existing document processing logic.
+ * Adds an AI document to the vectorstore. Handles media file processing for audio/video,
+ * and text embedding for all document types. Updates document metadata during processing.
* @param doc The document to add.
- * @param progressCallback Callback to track progress.
+ * @param progressCallback Callback to track the progress of the addition process.
*/
async addAIDoc(doc: Doc, progressCallback: (progress: number, step: string) => void) {
const ai_document_status: string = StrCast(doc.ai_document_status);
@@ -238,7 +238,8 @@ export class Vectorstore {
}
/**
- * Indexes the processed document by uploading the document's vector chunks to the Pinecone index.
+ * Uploads the document's vector chunks to the Pinecone index.
+ * Prepares the metadata for each chunk and uses Pinecone's upsert operation.
* @param document The processed document containing its chunks and metadata.
*/
private async indexDocument(document: AI_Document) {
@@ -256,9 +257,10 @@ export class Vectorstore {
}
/**
- * Combines chunks until their combined text is at least 500 words.
- * @param chunks The original chunks.
- * @returns Combined chunks.
+ * Combines document chunks until their combined text reaches a minimum word count.
+ * This is used to optimize retrieval and indexing processes.
+ * @param chunks The original chunks to combine.
+ * @returns Combined chunks with updated text and metadata.
*/
private combineChunks(chunks: RAGChunk[]): RAGChunk[] {
const combinedChunks: RAGChunk[] = [];
@@ -289,8 +291,8 @@ export class Vectorstore {
}
/**
- * Retrieves the top K document chunks relevant to the user's query.
- * This involves embedding the query using Cohere, then querying Pinecone for matching vectors.
+ * Retrieves the most relevant document chunks for a given query.
+ * Uses Cohere for embedding the query and Pinecone for vector similarity matching.
* @param query The search query string.
* @param topK The number of top results to return (default is 10).
* @returns A list of document chunks that match the query.
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index fbda74194..c5ba4b830 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -124,6 +124,8 @@ export default class AssistantManager extends ApiManager {
},
});
+ // Register an API route to retrieve web search results using Google Custom Search
+ // This route filters results by checking their x-frame-options headers for security purposes
register({
method: Method.POST,
subscription: '/getWebSearchResults',
@@ -205,6 +207,13 @@ export default class AssistantManager extends ApiManager {
}
},
});
+
+ /**
+ * Converts a video file to audio format using ffmpeg.
+ * @param videoPath The path to the input video file.
+ * @param outputAudioPath The path to the output audio file.
+ * @returns A promise that resolves when the conversion is complete.
+ */
function convertVideoToAudio(videoPath: string, outputAudioPath: string): Promise<void> {
return new Promise((resolve, reject) => {
const ffmpegProcess = spawn('ffmpeg', [
@@ -238,6 +247,8 @@ export default class AssistantManager extends ApiManager {
});
}
+ // Register an API route to process a media file (audio or video)
+ // Extracts audio from video files, transcribes the audio using OpenAI Whisper, and provides a summary
register({
method: Method.POST,
subscription: '/processMediaFile',
@@ -412,6 +423,8 @@ export default class AssistantManager extends ApiManager {
}
};
+ // Register an API route to generate an image using OpenAI's DALL-E model
+ // Uploads the generated image to the server and provides a URL for access
register({
method: Method.POST,
subscription: '/generateImage',
@@ -440,7 +453,8 @@ export default class AssistantManager extends ApiManager {
},
});
- // Register a proxy fetch API route
+ // Register an API route to fetch data from a URL using a proxy with retry logic
+ // Useful for bypassing rate limits or scraping inaccessible data
register({
method: Method.POST,
subscription: '/proxyFetch',
@@ -465,6 +479,7 @@ export default class AssistantManager extends ApiManager {
});
// Register an API route to scrape website content using Puppeteer and JSDOM
+ // Extracts and returns readable content from a given URL
register({
method: Method.POST,
subscription: '/scrapeWebsite',
@@ -505,6 +520,8 @@ export default class AssistantManager extends ApiManager {
},
});
+ // Register an API route to create a document and start a background job for processing
+ // Uses Python scripts to process files and generate document chunks for further use
register({
method: Method.POST,
subscription: '/createDocument',
@@ -536,6 +553,7 @@ export default class AssistantManager extends ApiManager {
});
// Register an API route to check the progress of a document creation job
+ // Returns the current step and progress percentage
register({
method: Method.GET,
subscription: '/getProgress/:jobId',
@@ -553,7 +571,8 @@ export default class AssistantManager extends ApiManager {
},
});
- // Register an API route to get the final result of a document creation job
+ // Register an API route to retrieve the final result of a document creation job
+ // Returns the processed data or an error status if the job is incomplete
register({
method: Method.GET,
subscription: '/getResult/:jobId',
@@ -574,7 +593,8 @@ export default class AssistantManager extends ApiManager {
},
});
- // Register an API route to format chunks (e.g., text or image chunks) for display
+ // Register an API route to format chunks of text or images for structured display
+ // Converts raw chunk data into a structured format for frontend consumption
register({
method: Method.POST,
subscription: '/formatChunks',
@@ -630,6 +650,7 @@ export default class AssistantManager extends ApiManager {
});
// Register an API route to create and save a CSV file on the server
+ // Writes the CSV content to a unique file and provides a URL for download
register({
method: Method.POST,
subscription: '/createCSV',
@@ -669,6 +690,12 @@ export default class AssistantManager extends ApiManager {
}
}
+/**
+ * Spawns a Python process to handle file processing tasks.
+ * @param jobId The job ID for tracking progress.
+ * @param file_name The name of the file to process.
+ * @param file_path The filepath of the file to process.
+ */
function spawnPythonProcess(jobId: string, file_name: string, file_path: string) {
const venvPath = path.join(__dirname, '../chunker/venv');
const requirementsPath = path.join(__dirname, '../chunker/requirements.txt');