1 files changed, 284 insertions, 166 deletions
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index 43765c1ce..35dbee3e9 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -71,7 +71,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     @observable private _citationPopup: { text: string; visible: boolean } = { text: '', visible: false };
 
     // Private properties for managing OpenAI API, vector store, agent, and UI elements
-    private openai: OpenAI;
+    private openai!: OpenAI; // Using definite assignment assertion
     private vectorstore_id: string;
     private vectorstore: Vectorstore;
     private agent: Agent;
@@ -98,25 +98,34 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
      */
     constructor(props: FieldViewProps) {
         super(props);
-        makeObservable(this); // Enable MobX observables
+        makeObservable(this);
 
-        // Initialize OpenAI, vectorstore, and agent
-        this.openai = this.initializeOpenAI();
-        if (StrCast(this.dataDoc.vectorstore_id) == '') {
-            this.vectorstore_id = uuidv4();
-            this.dataDoc.vectorstore_id = this.vectorstore_id;
-        } else {
-            this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id);
-        }
-        this.vectorstore = new Vectorstore(this.vectorstore_id, this.retrieveDocIds);
+        this.messagesRef = React.createRef();
         this.docManager = new AgentDocumentManager(this);
-        this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.getLinkedUrlDocIds, this.createImageInDash, this.createCSVInDash, this, this.docManager);
 
-        // Reinitialize the DocumentMetadataTool with a direct reference to this ChatBox instance
-        // This ensures the tool can properly access documents in the same Freeform view
-        this.agent.reinitializeDocumentMetadataTool();
+        // Initialize OpenAI client
+        this.initializeOpenAI();
+
+        // Create a unique vectorstore ID for this ChatBox
+        this.vectorstore_id = uuidv4();
+
+        // Initialize vectorstore with the document manager
+        this.vectorstore = new Vectorstore(this.vectorstore_id, this.docManager);
+
+        // Create an agent with the vectorstore
+        this.agent = new Agent(
+            this.vectorstore,
+            this.retrieveSummaries.bind(this),
+            this.retrieveFormattedHistory.bind(this),
+            this.retrieveCSVData.bind(this),
+            this.retrieveDocIds.bind(this),
+            this.createImageInDash.bind(this),
+            this.createCSVInDash.bind(this),
+            this.docManager
+        );
 
-        this.messagesRef = React.createRef<HTMLDivElement>();
+        // Add event listeners
+        this.addScrollListener();
 
         // Reaction to update dataDoc when chat history changes
         reaction(
@@ -140,22 +149,25 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
      */
     @action
     addDocToVectorstore = async (newLinkedDoc: Doc) => {
-        this._uploadProgress = 0;
-        this._currentStep = 'Initializing...';
-        this._isUploadingDocs = true;
-
         try {
-            // Add the document to the vectorstore
+            this._isUploadingDocs = true;
+
+            // Process the document first to ensure it has a valid ID
+            this.docManager.processDocument(newLinkedDoc);
+
+            // Add the document to the vectorstore which will also register chunks
             await this.vectorstore.addAIDoc(newLinkedDoc, this.updateProgress);
-        } catch (error) {
-            console.error('Error uploading document:', error);
-            this._currentStep = 'Error during upload';
-        } finally {
-            runInAction(() => {
-                this._isUploadingDocs = false;
-                this._uploadProgress = 0;
-                this._currentStep = '';
-            });
+
+            // No longer needed as documents are tracked by the AgentDocumentManager
+            // this._linked_docs_to_add.add(newLinkedDoc);
+
+            this._isUploadingDocs = false;
+
+            return true;
+        } catch (err) {
+            console.error('Error adding document to vectorstore:', err);
+            this._isUploadingDocs = false;
+            return false;
         }
     };
 
@@ -238,7 +250,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
             apiKey: process.env.OPENAI_KEY,
             dangerouslyAllowBrowser: true,
         };
-        return new OpenAI(configuration);
+        this.openai = new OpenAI(configuration);
     }
 
     /**
@@ -376,49 +388,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     };
 
     /**
-     * Adds a linked document from a URL for future reference and analysis.
-     * @param url The URL of the document to add.
-     * @param id The unique identifier for the document.
-     */
-    @action
-    addLinkedUrlDoc = async (url: string, id: string) => {
-        const doc = Docs.Create.WebDocument(url, { data_useCors: true });
-        this.docManager.addCustomId(doc, id);
-        const linkDoc = Docs.Create.LinkDocument(this.Document, doc);
-        LinkManager.Instance.addLink(linkDoc);
-
-        const chunkToAdd = {
-            chunkId: id,
-            chunkType: CHUNK_TYPE.URL,
-            url: url,
-        };
-
-        doc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] });
-        this.docManager.processDocument(doc);
-    };
-
-    /**
-     * Retrieves the IDs of linked url documents.
-     * @returns An array of document IDs.
-     */
-    @action
-    getLinkedUrlDocIds = () => {
-        const linkedDocs: Doc[] = this.linkedDocs;
-        const linkedUrlDocIds: string[] = [];
-
-        for (const doc of linkedDocs) {
-            if (doc.chunk_simpl) {
-                const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] };
-                const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkType === CHUNK_TYPE.URL);
-                if (foundChunk) {
-                    linkedUrlDocIds.push(foundChunk.chunkId);
-                }
-            }
-        }
-        return linkedUrlDocIds;
-    };
-
-    /**
      * Getter to retrieve the current user's name from the client utils.
      */
     @computed
@@ -613,82 +582,224 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
      */
     @action
     handleCitationClick = async (citation: Citation) => {
-        const currentLinkedDocs: Doc[] = this.linkedDocs;
-        const chunkId = citation.chunk_id;
+        try {
+            // Extract values from MobX proxy object if needed
+            const chunkId = typeof citation.chunk_id === 'object' ? (citation.chunk_id as any).toString() : citation.chunk_id;
+
+            // For debugging
+            console.log('Citation clicked:', {
+                chunkId,
+                citation: JSON.stringify(citation, null, 2),
+            });
 
-        for (const doc of currentLinkedDocs) {
-            if (doc.chunk_simpl) {
-                const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] };
-                const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkId === chunkId);
+            // Try to find the document
+            const linkedDocs = this.linkedDocs;
+            let doc: Doc | undefined;
 
-                if (foundChunk) {
-                    // Handle media chunks specifically
+            // First try to find the document using the document manager's chunk ID lookup
+            const parentDocId = this.docManager.getDocIdByChunkId(chunkId);
+            if (parentDocId) {
+                doc = this.docManager.getDocument(parentDocId);
+                console.log(`Found document by chunk ID lookup: ${parentDocId}`);
+            }
 
-                    if (doc.ai_type == 'video' || doc.ai_type == 'audio') {
-                        const directMatchSegmentStart = this.getDirectMatchingSegmentStart(doc, citation.direct_text || '', foundChunk.indexes || []);
+            // If not found, fall back to searching through linked docs (maintains compatibility)
+            if (!doc) {
+                for (const linkedDoc of linkedDocs) {
+                    if (linkedDoc.chunk_simpl) {
+                        try {
+                            const docChunkSimpl = JSON.parse(StrCast(linkedDoc.chunk_simpl)) as { chunks: SimplifiedChunk[] };
+                            const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkId === chunkId);
+                            if (foundChunk) {
+                                doc = linkedDoc;
+                                console.log(`Found document by iterating through linked docs`);
+                                break;
+                            }
+                        } catch (e) {
+                            console.error(`Error parsing chunk_simpl for doc ${linkedDoc.id}:`, e);
+                        }
+                    }
+                }
+            }
 
-                        if (directMatchSegmentStart) {
-                            // Navigate to the segment's start time in the media player
-                            await this.goToMediaTimestamp(doc, directMatchSegmentStart, doc.ai_type);
-                        } else {
-                            console.error('No direct matching segment found for the citation.');
+            if (!doc) {
+                console.warn(`Document not found for citation with chunk_id: ${chunkId}`);
+                return;
+            }
+
+            // Process the chunk data
+            let docChunkSimpl: { chunks: SimplifiedChunk[] } = { chunks: [] };
+            try {
+                docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl) || '{"chunks":[]}');
+            } catch (e) {
+                console.error(`Error parsing chunk_simpl for the found document:`, e);
+                return;
+            }
+
+            const foundChunk = docChunkSimpl.chunks.find((chunk: SimplifiedChunk) => chunk.chunkId === chunkId);
+
+            // Handle different chunk types
+            if (foundChunk) {
+                console.log(`Found chunk in document:`, foundChunk);
+
+                // Handle video chunks
+                if (foundChunk.chunkType === CHUNK_TYPE.VIDEO) {
+                    if (foundChunk.start_time !== undefined) {
+                        await this.goToMediaTimestamp(doc, foundChunk.start_time, 'video');
+                    } else {
+                        console.warn('Video chunk missing start_time:', foundChunk);
+                    }
+                }
+                // Handle audio chunks - note that we're using string comparison since 'audio' isn't in CHUNK_TYPE enum
+                else if (String(foundChunk.chunkType).toLowerCase() === 'audio') {
+                    if (foundChunk.start_time !== undefined) {
+                        await this.goToMediaTimestamp(doc, foundChunk.start_time, 'audio');
+                    } else {
+                        console.warn('Audio chunk missing start_time:', foundChunk);
+                    }
+                }
+                // Handle table or image chunks
+                else if (foundChunk.chunkType === CHUNK_TYPE.TABLE || foundChunk.chunkType === CHUNK_TYPE.IMAGE) {
+                    this.handleOtherChunkTypes(foundChunk, citation, doc);
+                }
+                // Handle text chunks
+                else if (foundChunk.chunkType === CHUNK_TYPE.TEXT) {
+                    // Find text from the document's chunks metadata
+                    let chunkText = '';
+
+                    try {
+                        // We already parsed the chunks earlier, so use that
+                        const matchingChunk = docChunkSimpl.chunks.find(c => c.chunkId === foundChunk.chunkId);
+                        if (matchingChunk && 'text' in matchingChunk) {
+                            // If the text property exists on the chunk (even though it's not in the type)
+                            chunkText = String(matchingChunk['text'] || '');
                         }
+                    } catch (e) {
+                        console.error('Error getting chunk text:', e);
+                    }
+
+                    // Default text if none found
+                    if (!chunkText) {
+                        chunkText = 'Text content not available';
+                    }
+
+                    this._citationPopup = {
+                        text: chunkText,
+                        visible: true,
+                    };
+                }
+                // Handle URL chunks
+                else if (foundChunk.chunkType === CHUNK_TYPE.URL) {
+                    if (foundChunk.url) {
+                        // Instead of opening the URL in a new window, show the document in the viewer
+                        DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
+                        console.log(`Navigated to web document with URL: ${foundChunk.url}`);
                     } else {
-                        // Handle other chunk types as before
-                        this.handleOtherChunkTypes(foundChunk, citation, doc);
+                        console.warn('URL chunk missing URL:', foundChunk);
                     }
                 }
+            } else if (doc?.original_segments) {
+                // Handle original segments for media files
+                let original_segments: any[] = [];
+                try {
+                    original_segments = JSON.parse(StrCast(doc.original_segments));
+                } catch (e) {
+                    console.error(`Error parsing original_segments:`, e);
+                    return;
+                }
+
+                // Check if there's direct text to find in the segments
+                if (citation.direct_text) {
+                    // Find the segment that contains the direct text
+                    const start = this.getDirectMatchingSegmentStart(doc, citation.direct_text, []);
+                    if (start !== -1) {
+                        await this.goToMediaTimestamp(doc, start, doc.ai_type === 'audio' ? 'audio' : 'video');
+                    }
+                }
+            } else {
+                console.warn('Unable to find chunk or segments for citation', citation);
             }
+        } catch (error) {
+            console.error('Error handling citation click:', error);
         }
     };
 
+    /**
+     * Finds a matching segment in a document based on text content.
+     * @param doc The document to search in
+     * @param citationText The text to find in the document
+     * @param indexesOfSegments Optional indexes of segments to search in
+     * @returns The starting timestamp of the matching segment, or -1 if not found
+     */
     getDirectMatchingSegmentStart = (doc: Doc, citationText: string, indexesOfSegments: string[]): number => {
-        const originalSegments = JSON.parse(StrCast(doc.original_segments!)).map((segment: any, index: number) => ({
-            index: index.toString(),
-            text: segment.text,
-            start: segment.start,
-            end: segment.end,
-        }));
-
-        if (!Array.isArray(originalSegments) || originalSegments.length === 0 || !Array.isArray(indexesOfSegments)) {
-            return 0;
+        if (!doc || !citationText) return -1;
+
+        // Get original segments from the document
+        const original_segments = doc.original_segments ? JSON.parse(StrCast(doc.original_segments)) : [];
+
+        if (!original_segments || !Array.isArray(original_segments) || original_segments.length === 0) {
+            return -1;
         }
 
-        // Create itemsToSearch array based on indexesOfSegments
-        const itemsToSearch = indexesOfSegments.map((indexStr: string) => {
-            const index = parseInt(indexStr, 10);
-            const segment = originalSegments[index];
-            return { text: segment.text, start: segment.start };
-        });
+        let segments = original_segments;
 
-        console.log('Constructed itemsToSearch:', itemsToSearch);
+        // If specific indexes are provided, filter segments by those indexes
+        if (indexesOfSegments && indexesOfSegments.length > 0) {
+            segments = original_segments.filter((segment: any) => indexesOfSegments.includes(segment.index));
+        }
+
+        // If no segments match the indexes, use all segments
+        if (segments.length === 0) {
+            segments = original_segments;
+        }
 
-        // Helper function to calculate word overlap score
+        // First try to find an exact match
+        const exactMatch = segments.find((segment: any) => segment.text && segment.text.includes(citationText));
+
+        if (exactMatch) {
+            return exactMatch.start;
+        }
+
+        // If no exact match, find segment with best word overlap
         const calculateWordOverlap = (text1: string, text2: string): number => {
-            const words1 = new Set(text1.toLowerCase().split(/\W+/));
-            const words2 = new Set(text2.toLowerCase().split(/\W+/));
-            const intersection = new Set([...words1].filter(word => words2.has(word)));
-            return intersection.size / Math.max(words1.size, words2.size); // Jaccard similarity
+            if (!text1 || !text2) return 0;
+
+            const words1 = text1.toLowerCase().split(/\s+/);
+            const words2 = text2.toLowerCase().split(/\s+/);
+            const wordSet1 = new Set(words1);
+
+            let overlap = 0;
+            for (const word of words2) {
+                if (wordSet1.has(word)) {
+                    overlap++;
+                }
+            }
+
+            // Return percentage of overlap relative to the shorter text
+            return overlap / Math.min(words1.length, words2.length);
         };
 
-        // Search for the best matching segment
-        let bestMatchStart = 0;
-        let bestScore = 0;
-
-        console.log(`Searching for best match for query: "${citationText}"`);
-        itemsToSearch.forEach(item => {
-            const score = calculateWordOverlap(citationText, item.text);
-            console.log(`Comparing query to segment: "${item.text}" | Score: ${score}`);
-            if (score > bestScore) {
-                bestScore = score;
-                bestMatchStart = item.start;
+        // Find segment with highest word overlap
+        let bestMatch = null;
+        let highestOverlap = 0;
+
+        for (const segment of segments) {
+            if (!segment.text) continue;
+
+            const overlap = calculateWordOverlap(segment.text, citationText);
+            if (overlap > highestOverlap) {
+                highestOverlap = overlap;
+                bestMatch = segment;
             }
-        });
+        }
 
-        console.log('Best match found with score:', bestScore, '| Start time:', bestMatchStart);
+        // Only return matches with significant overlap (more than 30%)
+        if (bestMatch && highestOverlap > 0.3) {
+            return bestMatch.start;
+        }
 
-        // Return the start time of the best match
-        return bestMatchStart;
+        // If no good match found, return the start of the first segment as fallback
+        return segments.length > 0 ? segments[0].start : -1;
     };
 
     /**
@@ -772,7 +883,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                 break;
             case CHUNK_TYPE.CSV:
             case CHUNK_TYPE.URL:
-                DocumentManager.Instance.showDocument(doc, { willZoomCentered: true });
+                DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
+                    console.log(`Showing web document in viewer with URL: ${foundChunk.url}`);
+                });
                 break;
             default:
                 console.error('Unhandled chunk type:', foundChunk.chunkType);
@@ -879,6 +992,16 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
             }
         });
         this.addScrollListener();
+
+        // Initialize the document manager by finding existing documents
+        this.docManager.initializeFindDocsFreeform();
+
+        // If there are stored doc IDs in our list of docs to add, process them
+        if (this._linked_docs_to_add.size > 0) {
+            this._linked_docs_to_add.forEach(doc => {
+                this.docManager.processDocument(doc);
+            });
+        }
     }
 
     /**
@@ -892,28 +1015,28 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     /**
      * Getter that retrieves all linked documents for the current document.
      */
-    @computed
-    get linkedDocs() {
-        return LinkManager.Instance.getAllRelatedLinks(this.Document)
-            .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document)))
-            .map(d => DocCast(d?.annotationOn, d))
-            .filter(d => d);
+    @computed get linkedDocs(): Doc[] {
+        const docIds = this.docManager.listDocs();
+        const docs: Doc[] = [];
+
+        // Get documents from the document manager using the getDocument method
+        docIds.forEach(id => {
+            const doc = this.docManager.getDocument(id);
+            if (doc) {
+                docs.push(doc);
+            }
+        });
+
+        return docs;
     }
 
     /**
-     * Getter that retrieves document IDs of linked documents that have AI-related content.
+     * Getter that retrieves document IDs of linked documents that have PDF_chunker–parsed content.
      */
     @computed
-    get docIds() {
-        return LinkManager.Instance.getAllRelatedLinks(this.Document)
-            .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document)))
-            .map(d => DocCast(d?.annotationOn, d))
-            .filter(d => d)
-            .filter(d => {
-                console.log(d.ai_doc_id);
-                return d.ai_doc_id;
-            })
-            .map(d => StrCast(d.ai_doc_id));
+    get docIds(): string[] {
+        // Use the document manager to get all document IDs
+        return Array.from(this.docManager.listDocs());
     }
 
     /**
@@ -921,23 +1044,18 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
      */
     @computed
     get summaries(): string {
-        return (
-            LinkManager.Instance.getAllRelatedLinks(this.Document)
-                .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document)))
-                .map(d => DocCast(d?.annotationOn, d))
-                .filter(d => d)
-                .filter(d => d.summary)
-                .map((doc, index) => {
-                    if (PDFCast(doc.data)) {
-                        return `<summary file_name="${PDFCast(doc.data).url.pathname}" applicable_tools=["rag"]>${doc.summary}</summary>`;
-                    } else if (CsvCast(doc.data)) {
-                        return `<summary file_name="${CsvCast(doc.data).url.pathname}" applicable_tools=["dataAnalysis"]>${doc.summary}</summary>`;
-                    } else {
-                        return `${index + 1}) ${doc.summary}`;
-                    }
-                })
-                .join('\n') + '\n'
-        );
+        const linkedDocs = Array.from(this.docManager.listDocs())
+            .map(id => {
+                const doc = this.docManager.extractDocumentMetadata(id);
+                if (doc && doc.fields && (doc.fields.layout.summary || doc.fields.data.summary)) {
+                    return doc.fields.layout.summary || doc.fields.data.summary;
+                }
+                return null;
+            })
+            .filter(Boolean)
+            .join('\n\n');
+
+        return linkedDocs;
     }
 
     /**
@@ -965,7 +1083,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
 
     // Other helper methods for retrieving document data and processing
 
-    retrieveSummaries = () => {
+    retrieveSummaries = (): string => {
         return this.summaries;
     };
 
@@ -973,12 +1091,12 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         return this.linkedCSVs;
     };
 
-    retrieveFormattedHistory = () => {
+    retrieveFormattedHistory = (): string => {
         return this.formattedHistory;
     };
 
-    retrieveDocIds = () => {
-        return this.docIds;
+    retrieveDocIds = (): string[] => {
+        return Array.from(this.docManager.listDocs());
     };
 
     /**