diff options
author | A.J. Shulman <Shulman.aj@gmail.com> | 2025-01-23 11:51:50 -0500 |
---|---|---|
committer | A.J. Shulman <Shulman.aj@gmail.com> | 2025-01-23 11:51:50 -0500 |
commit | 99e2a1728b9b97d30f5e0fffe6a10201bdee49c2 (patch) | |
tree | 33b0e487348883582dd2479e55739cf75bf79f19 /src | |
parent | 971d107574031885c17c339d39c4fd813682cc02 (diff) |
added commenting
Diffstat (limited to 'src')
4 files changed, 49 insertions, 17 deletions
diff --git a/src/client/views/nodes/chatbot/tools/DictionaryTool.ts b/src/client/views/nodes/chatbot/tools/DictionaryTool.ts index 377101641..3493f38d7 100644 --- a/src/client/views/nodes/chatbot/tools/DictionaryTool.ts +++ b/src/client/views/nodes/chatbot/tools/DictionaryTool.ts @@ -20,6 +20,9 @@ const dictionaryToolInfo: ToolInfo<DictionaryToolParamsType> = { description: 'Fetches the definition of a given word using an open dictionary API.', }; +/** + * DictionaryTool is a tool that fetches the definition of a given word using an open dictionary API. + */ export class DictionaryTool extends BaseTool<DictionaryToolParamsType> { constructor() { super(dictionaryToolInfo); diff --git a/src/client/views/nodes/chatbot/tools/ReplicateUserTaskTool.ts b/src/client/views/nodes/chatbot/tools/ReplicateUserTaskTool.ts deleted file mode 100644 index e69de29bb..000000000 --- a/src/client/views/nodes/chatbot/tools/ReplicateUserTaskTool.ts +++ /dev/null diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts index d962b887f..5334f7df0 100644 --- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts +++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts @@ -1,8 +1,8 @@ /** * @file Vectorstore.ts * @description This file defines the Vectorstore class, which integrates with Pinecone for vector-based document indexing and Cohere for text embeddings. - * It handles tasks such as AI document management, document chunking, and retrieval of relevant document sections based on user queries. - * The class supports adding documents to the vectorstore, managing document status, and querying Pinecone for document chunks matching a query. + * It manages AI document handling, including adding documents, processing media files, combining document chunks, indexing documents, + * and retrieving relevant sections based on user queries. */ import { Index, IndexList, Pinecone, PineconeRecord, QueryResponse, RecordMetadata } from '@pinecone-database/pinecone'; @@ -34,7 +34,7 @@ export class Vectorstore { documents: AI_Document[] = []; // Store the documents indexed in the vectorstore. /** - * Constructor initializes the Pinecone and Cohere clients, sets up the document ID list, + * Initializes the Pinecone and Cohere clients, sets up the document ID list, * and initializes the Pinecone index. * @param id The unique identifier for the vectorstore instance. * @param doc_ids A function that returns a list of document IDs. @@ -54,8 +54,8 @@ export class Vectorstore { } /** - * Initializes the Pinecone index by checking if it exists, and creating it if not. - * The index is set to use the cosine metric for vector similarity. + * Initializes the Pinecone index by checking if it exists and creating it if necessary. + * Sets the index to use cosine similarity for vector similarity calculations. */ private async initializeIndex() { const indexList: IndexList = await this.pinecone.listIndexes(); @@ -80,10 +80,10 @@ export class Vectorstore { } /** - * Adds an AI document to the vectorstore, handling media files separately. - * Preserves all existing document processing logic. + * Adds an AI document to the vectorstore. Handles media file processing for audio/video, + * and text embedding for all document types. Updates document metadata during processing. * @param doc The document to add. - * @param progressCallback Callback to track progress. + * @param progressCallback Callback to track the progress of the addition process. */ async addAIDoc(doc: Doc, progressCallback: (progress: number, step: string) => void) { const ai_document_status: string = StrCast(doc.ai_document_status); @@ -238,7 +238,8 @@ export class Vectorstore { } /** - * Indexes the processed document by uploading the document's vector chunks to the Pinecone index. + * Uploads the document's vector chunks to the Pinecone index. + * Prepares the metadata for each chunk and uses Pinecone's upsert operation. * @param document The processed document containing its chunks and metadata. */ private async indexDocument(document: AI_Document) { @@ -256,9 +257,10 @@ export class Vectorstore { } /** - * Combines chunks until their combined text is at least 500 words. - * @param chunks The original chunks. - * @returns Combined chunks. + * Combines document chunks until their combined text reaches a minimum word count. + * This is used to optimize retrieval and indexing processes. + * @param chunks The original chunks to combine. + * @returns Combined chunks with updated text and metadata. */ private combineChunks(chunks: RAGChunk[]): RAGChunk[] { const combinedChunks: RAGChunk[] = []; @@ -289,8 +291,8 @@ export class Vectorstore { } /** - * Retrieves the top K document chunks relevant to the user's query. - * This involves embedding the query using Cohere, then querying Pinecone for matching vectors. + * Retrieves the most relevant document chunks for a given query. + * Uses Cohere for embedding the query and Pinecone for vector similarity matching. * @param query The search query string. * @param topK The number of top results to return (default is 10). * @returns A list of document chunks that match the query. diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index fbda74194..c5ba4b830 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -124,6 +124,8 @@ export default class AssistantManager extends ApiManager { }, }); + // Register an API route to retrieve web search results using Google Custom Search + // This route filters results by checking their x-frame-options headers for security purposes register({ method: Method.POST, subscription: '/getWebSearchResults', @@ -205,6 +207,13 @@ export default class AssistantManager extends ApiManager { } }, }); + + /** + * Converts a video file to audio format using ffmpeg. + * @param videoPath The path to the input video file. + * @param outputAudioPath The path to the output audio file. + * @returns A promise that resolves when the conversion is complete. + */ function convertVideoToAudio(videoPath: string, outputAudioPath: string): Promise<void> { return new Promise((resolve, reject) => { const ffmpegProcess = spawn('ffmpeg', [ @@ -238,6 +247,8 @@ export default class AssistantManager extends ApiManager { }); } + // Register an API route to process a media file (audio or video) + // Extracts audio from video files, transcribes the audio using OpenAI Whisper, and provides a summary register({ method: Method.POST, subscription: '/processMediaFile', @@ -412,6 +423,8 @@ export default class AssistantManager extends ApiManager { } }; + // Register an API route to generate an image using OpenAI's DALL-E model + // Uploads the generated image to the server and provides a URL for access register({ method: Method.POST, subscription: '/generateImage', @@ -440,7 +453,8 @@ export default class AssistantManager extends ApiManager { }, }); - // Register a proxy fetch API route + // Register an API route to fetch data from a URL using a proxy with retry logic + // Useful for bypassing rate limits or scraping inaccessible data register({ method: Method.POST, subscription: '/proxyFetch', @@ -465,6 +479,7 @@ export default class AssistantManager extends ApiManager { }); // Register an API route to scrape website content using Puppeteer and JSDOM + // Extracts and returns readable content from a given URL register({ method: Method.POST, subscription: '/scrapeWebsite', @@ -505,6 +520,8 @@ export default class AssistantManager extends ApiManager { }, }); + // Register an API route to create a document and start a background job for processing + // Uses Python scripts to process files and generate document chunks for further use register({ method: Method.POST, subscription: '/createDocument', @@ -536,6 +553,7 @@ export default class AssistantManager extends ApiManager { }); // Register an API route to check the progress of a document creation job + // Returns the current step and progress percentage register({ method: Method.GET, subscription: '/getProgress/:jobId', @@ -553,7 +571,8 @@ export default class AssistantManager extends ApiManager { }, }); - // Register an API route to get the final result of a document creation job + // Register an API route to retrieve the final result of a document creation job + // Returns the processed data or an error status if the job is incomplete register({ method: Method.GET, subscription: '/getResult/:jobId', @@ -574,7 +593,8 @@ export default class AssistantManager extends ApiManager { }, }); - // Register an API route to format chunks (e.g., text or image chunks) for display + // Register an API route to format chunks of text or images for structured display + // Converts raw chunk data into a structured format for frontend consumption register({ method: Method.POST, subscription: '/formatChunks', @@ -630,6 +650,7 @@ export default class AssistantManager extends ApiManager { }); // Register an API route to create and save a CSV file on the server + // Writes the CSV content to a unique file and provides a URL for download register({ method: Method.POST, subscription: '/createCSV', @@ -669,6 +690,12 @@ export default class AssistantManager extends ApiManager { } } +/** + * Spawns a Python process to handle file processing tasks. + * @param jobId The job ID for tracking progress. + * @param file_name The name of the file to process. + * @param file_path The filepath of the file to process. + */ function spawnPythonProcess(jobId: string, file_name: string, file_path: string) { const venvPath = path.join(__dirname, '../chunker/venv'); const requirementsPath = path.join(__dirname, '../chunker/requirements.txt'); |