/**
 * @file Vectorstore.ts
 * @description This file defines the Vectorstore class, which integrates with Pinecone for vector-based document indexing and OpenAI text-embedding-3-large for text embeddings.
 * It manages AI document handling, including adding documents, processing media files, combining document chunks, indexing documents,
 * and retrieving relevant sections based on user queries.
 */

import { Index, IndexList, Pinecone, PineconeRecord, QueryResponse, RecordMetadata } from '@pinecone-database/pinecone';
import dotenv from 'dotenv';
import path from 'path';
import { v4 as uuidv4 } from 'uuid';
import { Doc } from '../../../../../fields/Doc';
import { AudioCast, CsvCast, PDFCast, StrCast, VideoCast } from '../../../../../fields/Types';
import { Networking } from '../../../../Network';
import { AI_Document, CHUNK_TYPE, RAGChunk } from '../types/types';
import OpenAI from 'openai';
import { Embedding } from 'openai/resources';
import { AgentDocumentManager } from '../utils/AgentDocumentManager';
import { Id } from '../../../../../fields/FieldSymbols';

dotenv.config();

/**
 * The Vectorstore class integrates with Pinecone for vector-based document indexing and retrieval,
 * and OpenAI text-embedding-3-large for text embedding. It handles AI document management, uploads, and query-based retrieval.
 */
export class Vectorstore {
    private pinecone!: Pinecone; // Pinecone client for managing the vector index.
    private index!: Index; // The specific Pinecone index used for document chunks.
    private summaryIndex!: Index; // The Pinecone index used for file summaries.
    private openai!: OpenAI; // OpenAI client for generating embeddings.
    private indexName: string = 'pdf-chatbot'; // Default name for the index.
    private summaryIndexName: string = 'file-summaries'; // Name for the summaries index.
    private _id!: string; // Unique ID for the Vectorstore instance.
    private docManager!: AgentDocumentManager; // Document manager for handling documents
    private summaryCacheCount: number = 0; // Cache for the number of summaries
    documents: AI_Document[] = []; // Store the documents indexed in the vectorstore.
    private debug: boolean = true; // Enable debugging
    private initialized: boolean = false;

    /**
     * Initializes the Pinecone and OpenAI clients, sets up the document ID list,
     * and initializes the Pinecone index.
     * @param id The unique identifier for the vectorstore instance.
     * @param docManager An instance of AgentDocumentManager to handle document management.
     */
    constructor(id: string, docManager: AgentDocumentManager) {
        if (this.debug) console.log(`[DEBUG] Initializing Vectorstore with ID: ${id}`);
        const pineconeApiKey = 'pcsk_3txLxJ_9fxdmAph4csnq4yxoDF5De5A8bJvjWaXXigBgshy4eoXggrXcxATJiH8vzXbrKm';
        if (!pineconeApiKey) {
            console.log('PINECONE_API_KEY is not defined - Vectorstore will be unavailable');
            return;
        }

        // Initialize Pinecone and OpenAI clients with API keys from the environment.
        this.pinecone = new Pinecone({ apiKey: pineconeApiKey });
        this.openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, dangerouslyAllowBrowser: true });
        this._id = id;
        this.docManager = docManager;

        // Proper async initialization sequence
        this.initializeAsync(id);
    }

    /**
     * Handles async initialization of all components
     */
    private async initializeAsync(id: string) {
        try {
            if (this.debug) console.log(`[DEBUG] Starting async initialization sequence for Vectorstore ID: ${id}`);

            // Initialize the main document index
            await this.initializeIndex();

            // Initialize the summary index
            await this.initializeSummaryIndex();

            this.initialized = true;
            if (this.debug) console.log(`[DEBUG] ✅ Vectorstore initialization complete, running test query...`);

            // Run a single test query instead of multiple
            await this.runSingleTestQuery();
        } catch (error) {
            console.error('[ERROR] Failed to initialize Vectorstore:', error);
        }
    }

    async getFileNames() {
        const response = await Networking.FetchFromServer('/getFileNames');
        const filepaths = JSON.parse(response);
        return filepaths;
    }

    /**
     * Initializes the Pinecone index by checking if it exists and creating it if necessary.
     * Sets the index to use cosine similarity for vector similarity calculations.
     */
    private async initializeIndex() {
        if (this.debug) console.log(`[DEBUG] Initializing main document index: ${this.indexName}`);
        const indexList: IndexList = await this.pinecone.listIndexes();
        if (this.debug) console.log(`[DEBUG] Available Pinecone indexes: ${indexList.indexes?.map(i => i.name).join(', ') || 'none'}`);

        // Check if the index already exists, otherwise create it.
        if (!indexList.indexes?.some(index => index.name === this.indexName)) {
            if (this.debug) console.log(`[DEBUG] Creating new index: ${this.indexName}`);
            await this.pinecone.createIndex({
                name: this.indexName,
                dimension: 3072,
                metric: 'cosine',
                spec: {
                    serverless: {
                        cloud: 'aws',
                        region: 'us-east-1',
                    },
                },
            });
            if (this.debug) console.log(`[DEBUG] ✅ Index ${this.indexName} created successfully`);
        } else {
            if (this.debug) console.log(`[DEBUG] ✅ Using existing index: ${this.indexName}`);
        }

        // Set the index for future use.
        this.index = this.pinecone.Index(this.indexName);
    }

    /**
     * Initializes the Pinecone index for file summaries.
     * Checks if it exists and creates it if necessary.
     */
    private async initializeSummaryIndex() {
        if (this.debug) console.log(`[DEBUG] Initializing file summaries index: ${this.summaryIndexName}`);
        const indexList: IndexList = await this.pinecone.listIndexes();

        // Check if the index already exists, otherwise create it.
        if (!indexList.indexes?.some(index => index.name === this.summaryIndexName)) {
            if (this.debug) console.log(`[DEBUG] Creating new summary index: ${this.summaryIndexName}`);
            await this.pinecone.createIndex({
                name: this.summaryIndexName,
                dimension: 3072,
                metric: 'cosine',
                spec: {
                    serverless: {
                        cloud: 'aws',
                        region: 'us-east-1',
                    },
                },
            });
            if (this.debug) console.log(`[DEBUG] ✅ Summary index ${this.summaryIndexName} created successfully`);
        } else {
            if (this.debug) console.log(`[DEBUG] ✅ Using existing summary index: ${this.summaryIndexName}`);
        }

        // Set the summaries index for future use.
        this.summaryIndex = this.pinecone.Index(this.summaryIndexName);

        // Check if we need to index the file summaries
        await this.processFileSummaries();
    }

    /**
     * Processes file summaries from the JSON file if needed.
     * Checks if the index contains the correct number of summaries before embedding.
     */
    private async processFileSummaries() {
        if (this.debug) console.log(`[DEBUG] Starting file summaries processing`);
        try {
            // Get file summaries from the server
            if (this.debug) console.log(`[DEBUG] Fetching file summaries from server...`);
            const response = await Networking.FetchFromServer('/getFileSummaries');

            if (!response) {
                console.error('[ERROR] Failed to fetch file summaries');
                return;
            }
            if (this.debug) console.log(`[DEBUG] File summaries response received (${response.length} bytes)`);

            const summaries = JSON.parse(response);
            const filepaths = Object.keys(summaries);
            const summaryCount = filepaths.length;
            this.summaryCacheCount = summaryCount;

            if (this.debug) {
                console.log(`[DEBUG] File summaries parsed: ${summaryCount} files`);
                console.log(`[DEBUG] Sample filepaths: ${filepaths.slice(0, 3).join(', ')}...`);
                console.log(`[DEBUG] Sample summary: "${summaries[filepaths[0]].substring(0, 100)}..."`);
            }

            // Check if index already has the correct number of summaries
            try {
                if (this.debug) console.log(`[DEBUG] Checking summary index stats...`);
                const indexStats = await this.summaryIndex.describeIndexStats();
                const vectorCount = indexStats.totalRecordCount;

                if (this.debug) console.log(`[DEBUG] Summary index has ${vectorCount} records, expecting ${summaryCount}`);

                if (vectorCount === summaryCount) {
                    console.log(`[DEBUG] ✅ Summary index already contains ${vectorCount} entries, skipping embedding.`);
                    return;
                }

                if (this.debug) console.log(`[DEBUG] ⚠️ Summary index contains ${vectorCount} entries, but there are ${summaryCount} summaries. Re-indexing.`);
            } catch (error) {
                console.error('[ERROR] Error checking summary index stats:', error);
            }

            // If we get here, we need to embed the summaries
            await this.embedAndIndexFileSummaries(summaries);
        } catch (error) {
            console.error('[ERROR] Error processing file summaries:', error);
        }
    }

    /**
     * Embeds and indexes file summaries into the summary index.
     * @param summaries Object mapping filepaths to summaries
     */
    private async embedAndIndexFileSummaries(summaries: Record<string, string>) {
        if (this.debug) console.log(`[DEBUG] Starting embedding and indexing of file summaries...`);

        const filepaths = Object.keys(summaries);
        const summaryTexts = Object.values(summaries);

        // Split into batches of 100 to avoid exceeding API limits
        const batchSize = 100;
        const totalBatches = Math.ceil(filepaths.length / batchSize);

        if (this.debug) console.log(`[DEBUG] Processing ${filepaths.length} files in ${totalBatches} batches of size ${batchSize}`);

        for (let i = 0; i < filepaths.length; i += batchSize) {
            const batchFilepaths = filepaths.slice(i, i + batchSize);
            const batchTexts = summaryTexts.slice(i, i + batchSize);

            if (this.debug) {
                console.log(`[DEBUG] Processing batch ${Math.floor(i / batchSize) + 1}/${totalBatches}`);
                console.log(`[DEBUG] First file in batch: ${batchFilepaths[0]}`);
                console.log(`[DEBUG] First summary in batch: "${batchTexts[0].substring(0, 50)}..."`);
            }

            try {
                // Generate embeddings for this batch
                if (this.debug) console.log(`[DEBUG] Generating embeddings for batch of ${batchTexts.length} summaries...`);
                const startTime = Date.now();
                const embeddingResponse = await this.openai.embeddings.create({
                    model: 'text-embedding-3-large',
                    input: batchTexts,
                    encoding_format: 'float',
                });
                const duration = Date.now() - startTime;
                if (this.debug) console.log(`[DEBUG] ✅ Embeddings generated in ${duration}ms`);

                // Prepare Pinecone records
                if (this.debug) console.log(`[DEBUG] Preparing Pinecone records...`);
                const pineconeRecords: PineconeRecord[] = batchTexts.map((text, index) => {
                    const embedding = (embeddingResponse.data as Embedding[])[index].embedding;
                    if (this.debug && index === 0) console.log(`[DEBUG] Sample embedding dimensions: ${embedding.length}, first few values: [${embedding.slice(0, 5).join(', ')}...]`);

                    return {
                        id: uuidv4(), // Generate a unique ID for each summary
                        values: embedding,
                        metadata: {
                            filepath: batchFilepaths[index],
                            summary: text,
                        } as RecordMetadata,
                    };
                });

                // Upload to Pinecone
                if (this.debug) console.log(`[DEBUG] Upserting ${pineconeRecords.length} records to Pinecone...`);
                const upsertStart = Date.now();
                try {
                    await this.summaryIndex.upsert(pineconeRecords);
                    const upsertDuration = Date.now() - upsertStart;
                    if (this.debug) console.log(`[DEBUG] ✅ Batch ${Math.floor(i / batchSize) + 1}/${totalBatches} indexed in ${upsertDuration}ms`);
                } catch (upsertError) {
                    console.error(`[ERROR] Failed to upsert batch ${Math.floor(i / batchSize) + 1}/${totalBatches} to Pinecone:`, upsertError);
                    // Try again with smaller batch
                    if (batchTexts.length > 20) {
                        console.log(`[DEBUG] 🔄 Retrying with smaller batch size...`);
                        // Split the batch in half and retry recursively
                        const midpoint = Math.floor(batchTexts.length / 2);
                        const firstHalf = {
                            filepaths: batchFilepaths.slice(0, midpoint),
                            texts: batchTexts.slice(0, midpoint),
                        };
                        const secondHalf = {
                            filepaths: batchFilepaths.slice(midpoint),
                            texts: batchTexts.slice(midpoint),
                        };

                        // Create a helper function to retry smaller batches
                        const retryBatch = async (paths: string[], texts: string[], batchNum: string) => {
                            try {
                                if (this.debug) console.log(`[DEBUG] Generating embeddings for sub-batch ${batchNum}...`);
                                const embRes = await this.openai.embeddings.create({
                                    model: 'text-embedding-3-large',
                                    input: texts,
                                    encoding_format: 'float',
                                });

                                const records = texts.map((t, idx) => ({
                                    id: uuidv4(),
                                    values: (embRes.data as Embedding[])[idx].embedding,
                                    metadata: {
                                        filepath: paths[idx],
                                        summary: t,
                                    } as RecordMetadata,
                                }));

                                if (this.debug) console.log(`[DEBUG] Upserting sub-batch ${batchNum} (${records.length} records)...`);
                                await this.summaryIndex.upsert(records);
                                if (this.debug) console.log(`[DEBUG] ✅ Sub-batch ${batchNum} upserted successfully`);
                            } catch (retryError) {
                                console.error(`[ERROR] Failed to upsert sub-batch ${batchNum}:`, retryError);
                            }
                        };

                        await retryBatch(firstHalf.filepaths, firstHalf.texts, `${Math.floor(i / batchSize) + 1}.1`);
                        await retryBatch(secondHalf.filepaths, secondHalf.texts, `${Math.floor(i / batchSize) + 1}.2`);
                    }
                }
            } catch (error) {
                console.error('[ERROR] Error processing batch:', error);
            }
        }

        if (this.debug) console.log(`[DEBUG] ✅ File summary indexing complete for all ${filepaths.length} files`);

        // Verify the index was populated correctly
        try {
            const indexStats = await this.summaryIndex.describeIndexStats();
            const vectorCount = indexStats.totalRecordCount;
            if (this.debug) console.log(`[DEBUG] 🔍 Final index verification: ${vectorCount} records in Pinecone index (expected ${filepaths.length})`);
        } catch (error) {
            console.error('[ERROR] Failed to verify index stats:', error);
        }
    }

    /**
     * Searches for file summaries similar to the given query.
     * @param query The search query
     * @param topK Number of results to return (default: 5)
     * @returns Array of filepath and summary pairs with relevance scores
     */
    async searchFileSummaries(query: string, topK: number = 5): Promise<Array<{ filepath: string; summary: string; score?: number }>> {
        if (!this.initialized) {
            console.error('[ERROR] Cannot search - Vectorstore not fully initialized');
            return [];
        }

        if (this.debug) console.log(`[DEBUG] Searching file summaries for query: "${query}" (topK=${topK})`);
        try {
            // Generate embedding for the query
            if (this.debug) console.log(`[DEBUG] Generating embedding for query...`);
            const startTime = Date.now();
            const queryEmbeddingResponse = await this.openai.embeddings.create({
                model: 'text-embedding-3-large',
                input: query,
                encoding_format: 'float',
            });
            const duration = Date.now() - startTime;

            const queryEmbedding = queryEmbeddingResponse.data[0].embedding;
            if (this.debug) {
                console.log(`[DEBUG] ✅ Query embedding generated in ${duration}ms`);
                console.log(`[DEBUG] Query embedding dimensions: ${queryEmbedding.length}`);
            }

            // Check if summary index is ready
            try {
                const indexStats = await this.summaryIndex.describeIndexStats();
                const vectorCount = indexStats.totalRecordCount;
                if (this.debug) console.log(`[DEBUG] Summary index contains ${vectorCount} records`);

                if (vectorCount === 0) {
                    console.error('[ERROR] Summary index is empty, cannot perform search');
                    return [];
                }
            } catch (statsError) {
                console.error('[ERROR] Failed to check summary index stats:', statsError);
                console.error('[ERROR] Stats error details:', JSON.stringify(statsError));
            }

            // Test direct API access to Pinecone
            if (this.debug) console.log(`[DEBUG] Testing Pinecone connection...`);
            try {
                const indexes = await this.pinecone.listIndexes();
                console.log(`[DEBUG] Available Pinecone indexes: ${indexes.indexes?.map(idx => idx.name).join(', ')}`);
            } catch (connectionError) {
                console.error('[ERROR] Could not connect to Pinecone:', connectionError);
            }

            // Query the summaries index
            if (this.debug) console.log(`[DEBUG] Querying Pinecone summary index (${this.summaryIndexName})...`);
            const queryStart = Date.now();

            let queryResponse;
            try {
                // First, make sure we can access the index
                const indexInfo = await this.summaryIndex.describeIndexStats();
                if (this.debug) console.log(`[DEBUG] Index stats:`, indexInfo);

                queryResponse = await this.summaryIndex.query({
                    vector: queryEmbedding,
                    topK,
                    includeMetadata: true,
                });

                const queryDuration = Date.now() - queryStart;

                if (this.debug) {
                    console.log(`[DEBUG] ✅ Pinecone query completed in ${queryDuration}ms`);
                    console.log(`[DEBUG] Raw Pinecone response:`, JSON.stringify(queryResponse, null, 2));
                    if (queryResponse.matches) {
                        console.log(`[DEBUG] Found ${queryResponse.matches.length} matching summaries`);
                        console.log(`[DEBUG] Match scores: ${queryResponse.matches.map(m => m.score?.toFixed(4)).join(', ')}`);
                    } else {
                        console.log(`[DEBUG] No matches in response`);
                    }
                }
            } catch (queryError) {
                console.error('[ERROR] Pinecone query failed:', queryError);
                if (typeof queryError === 'object' && queryError !== null) {
                    console.error('[ERROR] Query error details:', JSON.stringify(queryError, null, 2));
                }
                return [];
            }

            if (!queryResponse || !queryResponse.matches || queryResponse.matches.length === 0) {
                console.log('[DEBUG] ⚠️ No matches found in Pinecone for query');
                return [];
            }

            // Format results
            const results = queryResponse.matches.map(match => {
                if (!match.metadata) {
                    console.error('[ERROR] Match is missing metadata:', match);
                    return { filepath: 'unknown', summary: 'No summary available' };
                }

                return {
                    filepath: (match.metadata as { filepath: string }).filepath || 'unknown',
                    summary: (match.metadata as { summary: string }).summary || 'No summary available',
                    score: match.score,
                };
            });

            if (this.debug) {
                if (results.length > 0) {
                    console.log(`[DEBUG] Top result filepath: ${results[0]?.filepath}`);
                    console.log(`[DEBUG] Top result score: ${results[0]?.score}`);
                    console.log(`[DEBUG] Top result summary excerpt: "${results[0]?.summary?.substring(0, 100)}..."`);
                } else {
                    console.log(`[DEBUG] No results returned after processing`);
                }
            }

            return results;
        } catch (error) {
            console.error('[ERROR] Error searching file summaries:', error);
            if (typeof error === 'object' && error !== null) {
                console.error('[ERROR] Full error details:', JSON.stringify(error, null, 2));
            }
            return [];
        }
    }

    /**
     * Runs a single test query after setup to validate the file summary search functionality.
     */
    private async runSingleTestQuery() {
        console.log(`\n[TEST] Running single test query to validate file summary search functionality...`);

        // Verify the index is accessible
        try {
            const indexStats = await this.summaryIndex.describeIndexStats();
            console.log(`[TEST] Pinecone index stats:`, JSON.stringify(indexStats, null, 2));
            console.log(`[TEST] Summary index contains ${indexStats.totalRecordCount} indexed summaries`);
        } catch (error) {
            console.error('[TEST] ❌ Failed to access Pinecone index:', error);
            return;
        }

        // Add a brief delay to ensure Pinecone has finished processing
        console.log('[TEST] Waiting 2 seconds for Pinecone indexing to complete...');
        await new Promise(resolve => setTimeout(resolve, 2000));

        // Run a single test query
        const query = 'React components for the UI';
        console.log(`\n[TEST] Executing query: "${query}"`);

        try {
            const results = await this.searchFileSummaries(query);
            console.log(`[TEST] Search returned ${results.length} results:`);

            results.forEach((result, i) => {
                console.log(`\n[TEST] Result ${i + 1}:`);
                console.log(`[TEST] File: ${result.filepath}`);
                console.log(`[TEST] Score: ${result.score}`);
                console.log(`[TEST] Summary: "${result.summary?.substring(0, 150)}..."`);
            });

            // If we have results, fetch the content for the first one
            if (results.length > 0) {
                const topFilepath = results[0].filepath;
                console.log(`\n[TEST] Fetching full content for top result: ${topFilepath}`);
                const content = await this.getFileContent(topFilepath);

                if (content) {
                    console.log(`[TEST] ✅ Content retrieved successfully (${content.length} chars)`);
                    console.log(`[TEST] Content excerpt:\n---\n${content.substring(0, 300)}...\n---`);
                } else {
                    console.log(`[TEST] ❌ Failed to retrieve content for ${topFilepath}`);
                }
            } else {
                console.log(`\n[TEST] ⚠️ No results to fetch content for`);
            }

            console.log(`\n[TEST] ✅ Test query completed`);
        } catch (testError) {
            console.error(`[TEST] ❌ Test query failed:`, testError);
            if (typeof testError === 'object' && testError !== null) {
                console.error('[TEST] Full error details:', JSON.stringify(testError, null, 2));
            }
        }
    }

    /**
     * Gets the full content of a file by its filepath.
     * @param filepath The filepath to look up
     * @returns The file content or null if not found
     */
    async getFileContent(filepath: string): Promise<string | null> {
        if (this.debug) console.log(`[DEBUG] Getting file content for: ${filepath}`);
        try {
            const startTime = Date.now();

            // Use the Networking utility for consistent API access
            // But convert the response to text manually to avoid JSON parsing
            const rawResponse = await fetch('/getRawFileContent', {
                method: 'POST',
                headers: {
                    'Content-Type': 'application/json',
                },
                body: JSON.stringify({ filepath }),
            });

            if (!rawResponse.ok) {
                const errorText = await rawResponse.text();
                console.error(`[ERROR] Server returned error ${rawResponse.status}: ${errorText}`);
                return null;
            }

            // Get the raw text content without JSON parsing
            const content = await rawResponse.text();
            const duration = Date.now() - startTime;

            if (this.debug) {
                console.log(`[DEBUG] ✅ File content retrieved in ${duration}ms`);
                console.log(`[DEBUG] Content length: ${content.length} chars`);
                console.log(`[DEBUG] Content excerpt: "${content.substring(0, 100)}..."`);
            }

            return content;
        } catch (error) {
            console.error('[ERROR] Error getting file content:', error);
            if (typeof error === 'object' && error !== null) {
                console.error('[ERROR] Full error details:', JSON.stringify(error, null, 2));
            }
            return null;
        }
    }

    /**
     * Adds an AI document to the vectorstore. Handles media file processing for audio/video,
     * and text embedding for all document types. Updates document metadata during processing.
     * @param doc The document to add.
     * @param progressCallback Callback to track the progress of the addition process.
     */
    async addAIDoc(doc: Doc, progressCallback: (progress: number, step: string) => void) {
        const ai_document_status: string = StrCast(doc.ai_document_status);

        // Skip if the document is already in progress or completed.
        if (ai_document_status !== undefined && ai_document_status.trim() !== '' && ai_document_status !== '{}') {
            if (ai_document_status === 'PROGRESS') {
                console.log('Already in progress.');
                return;
            } else if (ai_document_status === 'COMPLETED') {
                console.log('Already completed.');
                return;
            }
        } else {
            // Start processing the document.
            doc.ai_document_status = 'PROGRESS';
            const local_file_path = CsvCast(doc.data)?.url?.pathname ?? PDFCast(doc.data)?.url?.pathname ?? VideoCast(doc.data)?.url?.pathname ?? AudioCast(doc.data)?.url?.pathname;

            if (!local_file_path) {
                console.log('Not adding to vectorstore. Invalid file path for vectorstore addition.');
                return;
            }

            const isAudioOrVideo = local_file_path.endsWith('.mp3') || local_file_path.endsWith('.mp4');
            let result: AI_Document & { doc_id: string };

            if (isAudioOrVideo) {
                console.log('Processing media file...');
                progressCallback(10, 'Preparing media file for transcription...');

                // Post to processMediaFile endpoint to get the transcript
                const response = await Networking.PostToServer('/processMediaFile', { fileName: path.basename(local_file_path) });
                progressCallback(60, 'Transcription completed. Processing transcript...');

                // Type assertion to handle the response properties
                const typedResponse = response as {
                    condensed: Array<{ text: string; indexes: string[]; start: number; end: number }>;
                    full: Array<unknown>;
                    summary: string;
                };

                const segmentedTranscript = typedResponse.condensed;
                console.log(segmentedTranscript);
                const summary = typedResponse.summary;
                doc.summary = summary;

                // Generate embeddings for each chunk
                const texts = segmentedTranscript.map(chunk => chunk.text);

                try {
                    const embeddingsResponse = await this.openai.embeddings.create({
                        model: 'text-embedding-3-large',
                        input: texts,
                        encoding_format: 'float',
                    });
                    progressCallback(85, 'Embeddings generated. Finalizing document...');

                    doc.original_segments = JSON.stringify(typedResponse.full);
                    const doc_id = doc[Id];
                    console.log('doc_id in vectorstore', doc_id);

                    // Generate chunk IDs upfront so we can register them
                    const chunkIds = segmentedTranscript.map(() => uuidv4());
                    // Add transcript and embeddings to metadata
                    result = {
                        doc_id,
                        purpose: '',
                        file_name: local_file_path,
                        num_pages: 0,
                        summary: summary,
                        chunks: segmentedTranscript.map((chunk, index) => ({
                            id: chunkIds[index], // Use pre-generated chunk ID
                            values: (embeddingsResponse.data as Embedding[])[index].embedding, // Assign embedding
                            metadata: {
                                indexes: chunk.indexes,
                                original_document: local_file_path,
                                doc_id: doc_id, // Ensure doc_id is consistent
                                file_path: local_file_path,
                                start_time: chunk.start,
                                end_time: chunk.end,
                                text: chunk.text,
                                type: local_file_path.endsWith('.mp3') ? CHUNK_TYPE.AUDIO : CHUNK_TYPE.VIDEO,
                            },
                        })),
                        type: 'media',
                    };
                    progressCallback(95, 'Adding document to vectorstore...');
                } catch (error) {
                    console.error('Error generating embeddings:', error);
                    doc.ai_document_status = 'ERROR';
                    throw new Error('Embedding generation failed');
                }

                doc.segmented_transcript = JSON.stringify(segmentedTranscript);
                // Use doc manager to add simplified chunks
                const docType = local_file_path.endsWith('.mp3') ? 'audio' : 'video';
                const simplifiedChunks = this.docManager.getSimplifiedChunks(result.chunks, docType);
                doc.chunk_simplified = JSON.stringify(simplifiedChunks);
                this.docManager.addSimplifiedChunks(simplifiedChunks);
            } else {
                // Process regular document
                console.log('Processing regular document...');
                const createDocumentResponse = await Networking.PostToServer('/createDocument', { file_path: local_file_path, doc_id: doc[Id] });

                // Type assertion for the response
                const { jobId } = createDocumentResponse as { jobId: string };

                while (true) {
                    await new Promise(resolve => setTimeout(resolve, 2000));
                    const resultResponse = await Networking.FetchFromServer(`/getResult/${jobId}`);
                    const resultResponseJson = JSON.parse(resultResponse);
                    if (resultResponseJson.status === 'completed') {
                        result = resultResponseJson;
                        break;
                    }
                    const progressResponse = await Networking.FetchFromServer(`/getProgress/${jobId}`);
                    const progressResponseJson = JSON.parse(progressResponse);
                    if (progressResponseJson) {
                        progressCallback(progressResponseJson.progress, progressResponseJson.step);
                    }
                }

                // Collect all chunk IDs
                const chunkIds = result.chunks.map(chunk => chunk.id);

                if (result.doc_id !== doc[Id]) {
                    console.log('doc_id in vectorstore', result.doc_id, 'does not match doc_id in doc', doc[Id]);
                }

                // Use doc manager to add simplified chunks - determine document type from file extension
                const fileExt = path.extname(local_file_path).toLowerCase();
                const docType = fileExt === '.pdf' ? 'pdf' : fileExt === '.csv' ? 'csv' : 'text';
                const simplifiedChunks = this.docManager.getSimplifiedChunks(result.chunks, docType);
                doc.chunk_simplified = JSON.stringify(simplifiedChunks);
                this.docManager.addSimplifiedChunks(simplifiedChunks);

                doc.summary = result.summary;
                doc.ai_purpose = result.purpose;
            }

            // Index the document
            await this.indexDocument(result);
            progressCallback(100, 'Document added successfully!');

            // Preserve existing metadata updates
            if (!doc.vectorstore_id) {
                doc.vectorstore_id = JSON.stringify([this._id]);
            } else {
                doc.vectorstore_id = JSON.stringify(JSON.parse(StrCast(doc.vectorstore_id)).concat([this._id]));
            }

            doc.ai_doc_id = result.doc_id;

            console.log(`Document added: ${result.file_name}`);
            doc.ai_document_status = 'COMPLETED';
        }
    }

    /**
     * Uploads the document's vector chunks to the Pinecone index.
     * Prepares the metadata for each chunk and uses Pinecone's upsert operation.
     * @param document The processed document containing its chunks and metadata.
     */
    private async indexDocument(document: AI_Document) {
        console.log('Uploading vectors to content namespace...');

        // Prepare Pinecone records for each chunk in the document.
        const pineconeRecords: PineconeRecord[] = (document.chunks as RAGChunk[]).map(chunk => ({
            id: chunk.id,
            values: chunk.values,
            metadata: { ...chunk.metadata } as RecordMetadata,
        }));

        // Upload the records to Pinecone.
        await this.index.upsert(pineconeRecords);
    }

    /**
     * Combines document chunks until their combined text reaches a minimum word count.
     * This is used to optimize retrieval and indexing processes.
     * @param chunks The original chunks to combine.
     * @returns Combined chunks with updated text and metadata.
     */
    private combineChunks(chunks: RAGChunk[]): RAGChunk[] {
        const combinedChunks: RAGChunk[] = [];
        let currentChunk: RAGChunk | null = null;
        let wordCount = 0;

        chunks.forEach(chunk => {
            const textWords = chunk.metadata.text.split(' ').length;

            if (!currentChunk) {
                currentChunk = { ...chunk, metadata: { ...chunk.metadata, text: chunk.metadata.text } };
                wordCount = textWords;
            } else if (wordCount + textWords >= 500) {
                combinedChunks.push(currentChunk);
                currentChunk = { ...chunk, metadata: { ...chunk.metadata, text: chunk.metadata.text } };
                wordCount = textWords;
            } else {
                currentChunk.metadata.text += ` ${chunk.metadata.text}`;
                wordCount += textWords;
            }
        });

        if (currentChunk) {
            combinedChunks.push(currentChunk);
        }

        return combinedChunks;
    }

    /**
     * Retrieves the most relevant document chunks for a given query.
     * Uses OpenAI for embedding the query and Pinecone for vector similarity matching.
     * @param query The search query string.
     * @param topK The number of top results to return (default is 15).
     * @returns A list of document chunks that match the query.
     */
    async retrieve(query: string, topK: number = 15, docIds?: string[]): Promise<RAGChunk[]> {
        console.log(`Retrieving chunks for query: ${query}`);
        try {
            // Generate an embedding for the query using OpenAI.
            const queryEmbeddingResponse = await this.openai.embeddings.create({
                model: 'text-embedding-3-large',
                input: query,
                encoding_format: 'float',
            });

            const queryEmbedding = queryEmbeddingResponse.data[0].embedding;
            const _docIds = docIds?.length === 0 || !docIds ? this.docManager.docIds : docIds;

            console.log('Using document IDs for retrieval:', _docIds);

            // Query the Pinecone index using the embedding and filter by document IDs.
            // We'll query based on document IDs that are registered in the document manager
            const queryResponse: QueryResponse = await this.index.query({
                vector: queryEmbedding,
                filter: {
                    doc_id: { $in: _docIds },
                },
                topK,
                includeValues: true,
                includeMetadata: true,
            });
            console.log(`Found ${queryResponse.matches.length} matching chunks`);

            // For each retrieved chunk, ensure its document ID is registered in the document manager
            // This maintains compatibility with existing code while ensuring consistency
            const processedMatches = queryResponse.matches.map(match => {
                const chunk = {
                    id: match.id,
                    values: match.values as number[],
                    metadata: match.metadata as {
                        text: string;
                        type: string;
                        original_document: string;
                        file_path: string;
                        doc_id: string;
                        location: string;
                        start_page: number;
                        end_page: number;
                    },
                } as RAGChunk;

                return chunk;
            });

            return processedMatches;
        } catch (error) {
            console.error(`Error retrieving chunks: ${error}`);
            return [];
        }
    }
}