From 321977e670cbdf10f6c49fc9071e3260a8bd4aae Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Thu, 24 Apr 2025 12:06:11 -0400
Subject: Linking docs now works with visible docs

---
 .../nodes/chatbot/utils/AgentDocumentManager.ts    | 923 +++++++++++++++++++++
 1 file changed, 923 insertions(+)
 create mode 100644 src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts

(limited to 'src/client/views/nodes/chatbot/utils')

diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
new file mode 100644
index 000000000..c954226e4
--- /dev/null
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -0,0 +1,923 @@
+import { ChatBox } from '../chatboxcomponents/ChatBox';
+import { Doc, FieldType, Opt } from '../../../../../fields/Doc';
+import { DocData } from '../../../../../fields/DocSymbols';
+import { Observation } from '../types/types';
+import { ParametersType, ToolInfo, Parameter } from '../types/tool_types';
+import { BaseTool } from '../tools/BaseTool';
+import { Docs, DocumentOptions } from '../../../../documents/Documents';
+import { CollectionFreeFormDocumentView } from '../../CollectionFreeFormDocumentView';
+import { v4 as uuidv4 } from 'uuid';
+import { LinkManager, UPDATE_SERVER_CACHE } from '../../../../util/LinkManager';
+import { DocCast, StrCast } from '../../../../../fields/Types';
+import { supportedDocTypes } from '../types/tool_types';
+import { parsedDoc } from '../chatboxcomponents/ChatBox';
+import { faThumbTackSlash } from '@fortawesome/free-solid-svg-icons';
+import { DocumentManager } from '../../../../util/DocumentManager';
+import { DocumentView } from '../../DocumentView';
+
+/**
+ * Interface representing a document in the freeform view
+ */
+interface AgentDocument {
+    layoutDoc: Doc;
+    dataDoc: Doc;
+}
+
+/**
+ * Class to manage documents in a freeform view
+ */
+export class AgentDocumentManager {
+    private documentsById: Map<string, AgentDocument>;
+    private chatBox: ChatBox;
+    private chatBoxDocument: Doc | null = null;
+    private fieldMetadata: Record<string, any> = {};
+    private readonly DOCUMENT_ID_FIELD = '_dash_document_id';
+
+    /**
+     * Creates a new DocumentManager
+     * @param templateDocument The document that serves as a template for new documents
+     */
+    constructor(chatBox: ChatBox) {
+        this.documentsById = new Map<string, AgentDocument>();
+        this.chatBox = chatBox;
+        this.chatBoxDocument = chatBox.Document;
+        this.processDocument(this.chatBoxDocument);
+        this.initializeFieldMetadata();
+    }
+
+    /**
+     * Extracts field metadata from DocumentOptions class
+     */
+    private initializeFieldMetadata() {
+        // Parse DocumentOptions to extract field definitions
+        const documentOptionsInstance = new DocumentOptions();
+        const documentOptionsEntries = Object.entries(documentOptionsInstance);
+
+        for (const [fieldName, fieldInfo] of documentOptionsEntries) {
+            // Extract field information
+            const fieldData: Record<string, any> = {
+                name: fieldName,
+                withoutUnderscore: fieldName.startsWith('_') ? fieldName.substring(1) : fieldName,
+                description: '',
+                type: 'unknown',
+                required: false,
+                defaultValue: undefined,
+                possibleValues: [],
+            };
+
+            // Check if fieldInfo has description property (it's likely a FInfo instance)
+            if (fieldInfo && typeof fieldInfo === 'object' && 'description' in fieldInfo) {
+                fieldData.description = fieldInfo.description;
+
+                // Extract field type if available
+                if ('fieldType' in fieldInfo) {
+                    fieldData.type = fieldInfo.fieldType;
+                }
+
+                // Extract possible values if available
+                if ('values' in fieldInfo && Array.isArray(fieldInfo.values)) {
+                    fieldData.possibleValues = fieldInfo.values;
+                }
+            }
+
+            this.fieldMetadata[fieldName] = fieldData;
+        }
+    }
+
+    /**
+     * Gets all documents in the same Freeform view as the ChatBox
+     * Uses the LinkManager to get all linked documents, similar to how ChatBox does it
+     */
+    public initializeFindDocsFreeform() {
+        // Reset collections
+        this.documentsById.clear();
+
+        try {
+            // Use the LinkManager approach which is proven to work in ChatBox
+            if (this.chatBoxDocument) {
+                console.log('Finding documents linked to ChatBox document with ID:', this.chatBoxDocument.id);
+
+                // Get directly linked documents via LinkManager
+                const linkedDocs = LinkManager.Instance.getAllRelatedLinks(this.chatBoxDocument)
+                    .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.chatBoxDocument!)))
+                    .map(d => DocCast(d?.annotationOn, d))
+                    .filter(d => d);
+
+                console.log(`Found ${linkedDocs.length} linked documents via LinkManager`);
+
+                // Process the linked documents
+                linkedDocs.forEach((doc: Doc) => {
+                    if (doc) {
+                        this.processDocument(doc);
+                    }
+                });
+
+                // Include the ChatBox document itself
+                this.processDocument(this.chatBoxDocument);
+
+                // If we have access to the Document's parent, try to find sibling documents
+                if (this.chatBoxDocument.parent) {
+                    const parent = this.chatBoxDocument.parent;
+                    console.log('Found parent document, checking for siblings');
+
+                    // Check if parent is a Doc type and has a childDocs function
+                    if (parent && typeof parent === 'object' && 'childDocs' in parent && typeof parent.childDocs === 'function') {
+                        try {
+                            const siblingDocs = parent.childDocs();
+                            if (Array.isArray(siblingDocs)) {
+                                console.log(`Found ${siblingDocs.length} sibling documents via parent.childDocs()`);
+                                siblingDocs.forEach((doc: Doc) => {
+                                    if (doc) {
+                                        this.processDocument(doc);
+                                    }
+                                });
+                            }
+                        } catch (e) {
+                            console.warn('Error accessing parent.childDocs:', e);
+                        }
+                    }
+                }
+            } else if (this.chatBox && this.chatBox.linkedDocs) {
+                // If we have direct access to the linkedDocs computed property from ChatBox
+                console.log('Using ChatBox.linkedDocs directly');
+                const linkedDocs = this.chatBox.linkedDocs;
+                if (Array.isArray(linkedDocs)) {
+                    console.log(`Found ${linkedDocs.length} documents via ChatBox.linkedDocs`);
+                    linkedDocs.forEach((doc: Doc) => {
+                        if (doc) {
+                            this.processDocument(doc);
+                        }
+                    });
+                }
+
+                // Process the ChatBox document if available
+                if (this.chatBox.Document) {
+                    this.processDocument(this.chatBox.Document);
+                }
+            } else {
+                console.warn('No ChatBox document reference available for finding linked documents');
+            }
+
+            console.log(`DocumentMetadataTool found ${this.documentsById.size} total documents`);
+        } catch (error) {
+            console.error('Error finding documents in Freeform view:', error);
+        }
+    }
+
+    /**
+     * Process a document by ensuring it has an ID and adding it to the appropriate collections
+     * @param doc The document to process
+     */
+    public processDocument(doc: Doc) {
+        // Ensure document has a persistent ID
+        const docId = this.ensureDocumentId(doc);
+        // Only add if we haven't already processed this document
+        if (!this.documentsById.has(docId)) {
+            this.documentsById.set(docId, { layoutDoc: doc, dataDoc: doc[DocData] });
+        }
+    }
+
+    /**
+     * Ensures a document has a persistent ID stored in its metadata
+     * @param doc The document to ensure has an ID
+     * @returns The document's ID
+     */
+    private ensureDocumentId(doc: Doc): string {
+        let docId: string | undefined;
+
+        // First try to get the ID from our custom field
+        if (doc[this.DOCUMENT_ID_FIELD]) {
+            docId = String(doc[this.DOCUMENT_ID_FIELD]);
+            return docId;
+        }
+
+        // Try different ways to get a document ID
+
+        // 1. Try the direct id property if it exists
+        if (doc.id && typeof doc.id === 'string') {
+            docId = doc.id;
+        }
+        // 2. Try doc._id if it exists
+        else if (doc._id && typeof doc._id === 'string') {
+            docId = doc._id;
+        }
+        // 3. Try doc.data?.id if it exists
+        else if (doc.data && typeof doc.data === 'object' && 'id' in doc.data && typeof doc.data.id === 'string') {
+            docId = doc.data.id;
+        }
+        // 4. If none of the above work, generate a UUID
+        else {
+            docId = uuidv4();
+            console.log(`Generated new UUID for document with title: ${doc.title || 'Untitled'}`);
+        }
+
+        // Store the ID in the document's metadata so it persists
+        try {
+            doc[this.DOCUMENT_ID_FIELD] = docId;
+        } catch (e) {
+            console.warn(`Could not assign ID to document property`, e);
+        }
+
+        return docId;
+    }
+
+    /**
+     * Extracts metadata from a specific document
+     * @param docId The ID of the document to extract metadata from
+     * @returns An object containing the document's metadata
+     */
+    public extractDocumentMetadata(doc?: AgentDocument) {
+        if (!doc) return null;
+        const layoutDoc = doc.layoutDoc;
+        const dataDoc = doc.dataDoc;
+
+        const metadata: Record<string, any> = {
+            id: layoutDoc.dash_document_id || layoutDoc.id || '',
+            title: layoutDoc.title || '',
+            type: layoutDoc.type || '',
+            fields: {
+                layout: {},
+                data: {},
+            },
+            fieldLocationMap: {},
+        };
+
+        // Process all known field definitions
+        Object.keys(this.fieldMetadata).forEach(fieldName => {
+            const fieldDef = this.fieldMetadata[fieldName];
+            const strippedName = fieldName.startsWith('_') ? fieldName.substring(1) : fieldName;
+
+            // Check if field exists on layout document
+            let layoutValue = undefined;
+            if (layoutDoc) {
+                layoutValue = layoutDoc[fieldName];
+                if (layoutValue !== undefined) {
+                    // Field exists on layout document
+                    metadata.fields.layout[fieldName] = this.formatFieldValue(layoutValue);
+                    metadata.fieldLocationMap[strippedName] = 'layout';
+                }
+            }
+
+            // Check if field exists on data document
+            let dataValue = undefined;
+            if (dataDoc) {
+                dataValue = dataDoc[fieldName];
+                if (dataValue !== undefined) {
+                    // Field exists on data document
+                    metadata.fields.data[fieldName] = this.formatFieldValue(dataValue);
+                    if (!metadata.fieldLocationMap[strippedName]) {
+                        metadata.fieldLocationMap[strippedName] = 'data';
+                    }
+                }
+            }
+
+            // For fields with stripped names (without leading underscore),
+            // also check if they exist on documents without the underscore
+            if (fieldName.startsWith('_')) {
+                const nonUnderscoreFieldName = fieldName.substring(1);
+
+                if (layoutDoc) {
+                    const nonUnderscoreLayoutValue = layoutDoc[nonUnderscoreFieldName];
+                    if (nonUnderscoreLayoutValue !== undefined) {
+                        metadata.fields.layout[nonUnderscoreFieldName] = this.formatFieldValue(nonUnderscoreLayoutValue);
+                        metadata.fieldLocationMap[nonUnderscoreFieldName] = 'layout';
+                    }
+                }
+
+                if (dataDoc) {
+                    const nonUnderscoreDataValue = dataDoc[nonUnderscoreFieldName];
+                    if (nonUnderscoreDataValue !== undefined) {
+                        metadata.fields.data[nonUnderscoreFieldName] = this.formatFieldValue(nonUnderscoreDataValue);
+                        if (!metadata.fieldLocationMap[nonUnderscoreFieldName]) {
+                            metadata.fieldLocationMap[nonUnderscoreFieldName] = 'data';
+                        }
+                    }
+                }
+            }
+        });
+
+        // Add common field aliases for easier discovery
+        // This helps users understand both width and _width refer to the same property
+        if (metadata.fields.layout._width !== undefined && metadata.fields.layout.width === undefined) {
+            metadata.fields.layout.width = metadata.fields.layout._width;
+            metadata.fieldLocationMap.width = 'layout';
+        }
+
+        if (metadata.fields.layout._height !== undefined && metadata.fields.layout.height === undefined) {
+            metadata.fields.layout.height = metadata.fields.layout._height;
+            metadata.fieldLocationMap.height = 'layout';
+        }
+
+        return metadata;
+    }
+
+    /**
+     * Formats a field value for JSON output
+     * @param value The field value to format
+     * @returns A JSON-friendly representation of the field value
+     */
+    private formatFieldValue(value: any): any {
+        if (value === undefined || value === null) {
+            return null;
+        }
+
+        // Handle Doc objects
+        if (value instanceof Doc) {
+            return {
+                type: 'Doc',
+                id: value.id || this.ensureDocumentId(value),
+                title: value.title || '',
+                docType: value.type || '',
+            };
+        }
+
+        // Handle RichTextField (try to extract plain text)
+        if (typeof value === 'string' && value.includes('"type":"doc"') && value.includes('"content":')) {
+            try {
+                const rtfObj = JSON.parse(value);
+                // If this looks like a rich text field structure
+                if (rtfObj.doc && rtfObj.doc.content) {
+                    // Recursively extract text from the content
+                    let plainText = '';
+                    const extractText = (node: any) => {
+                        if (node.text) {
+                            plainText += node.text;
+                        }
+                        if (node.content && Array.isArray(node.content)) {
+                            node.content.forEach((child: any) => extractText(child));
+                        }
+                    };
+
+                    extractText(rtfObj.doc);
+
+                    // If we successfully extracted text, show it, but also preserve the original value
+                    if (plainText) {
+                        return {
+                            type: 'RichText',
+                            text: plainText,
+                            length: plainText.length,
+                            // Don't include the full value as it can be very large
+                        };
+                    }
+                }
+            } catch (e) {
+                // If parsing fails, just treat as a regular string
+            }
+        }
+
+        // Handle arrays and complex objects
+        if (typeof value === 'object') {
+            // If the object has a toString method, use it
+            if (value.toString && value.toString !== Object.prototype.toString) {
+                return value.toString();
+            }
+
+            try {
+                // Try to convert to JSON string
+                return JSON.stringify(value);
+            } catch (e) {
+                return '[Complex Object]';
+            }
+        }
+
+        // Return primitive values as is
+        return value;
+    }
+
+    /**
+     * Converts a string field value to the appropriate type based on field metadata
+     * @param fieldName The name of the field
+     * @param fieldValue The string value to convert
+     * @returns The converted value with the appropriate type
+     */
+    private convertFieldValue(fieldName: string, fieldValue: any): any {
+        // If fieldValue is already a number or boolean, we don't need to convert it from string
+        if (typeof fieldValue === 'number' || typeof fieldValue === 'boolean') {
+            return fieldValue;
+        }
+
+        // If fieldValue is a string "true" or "false", convert to boolean
+        if (typeof fieldValue === 'string') {
+            if (fieldValue.toLowerCase() === 'true') {
+                return true;
+            }
+            if (fieldValue.toLowerCase() === 'false') {
+                return false;
+            }
+        }
+
+        // If fieldValue is not a string (and not a number or boolean), convert it to string
+        if (typeof fieldValue !== 'string') {
+            fieldValue = String(fieldValue);
+        }
+
+        // Special handling for text field - convert to proper RichTextField format
+        if (fieldName === 'text') {
+            try {
+                // Check if it's already a valid JSON RichTextField
+                JSON.parse(fieldValue);
+                return fieldValue;
+            } catch (e) {
+                // It's a plain text string, so convert it to RichTextField format
+                const rtf = {
+                    doc: {
+                        type: 'doc',
+                        content: [
+                            {
+                                type: 'paragraph',
+                                content: [
+                                    {
+                                        type: 'text',
+                                        text: fieldValue,
+                                    },
+                                ],
+                            },
+                        ],
+                    },
+                };
+                return JSON.stringify(rtf);
+            }
+        }
+
+        // Get field metadata
+        const normalizedFieldName = fieldName.startsWith('_') ? fieldName : `_${fieldName}`;
+        const strippedFieldName = fieldName.startsWith('_') ? fieldName.substring(1) : fieldName;
+
+        // Check both versions of the field name in metadata
+        const fieldMeta = this.fieldMetadata[normalizedFieldName] || this.fieldMetadata[strippedFieldName];
+
+        // Special handling for width and height without metadata
+        if (!fieldMeta && (fieldName === '_width' || fieldName === '_height' || fieldName === 'width' || fieldName === 'height')) {
+            const num = Number(fieldValue);
+            return isNaN(num) ? fieldValue : num;
+        }
+
+        if (!fieldMeta) {
+            // If no metadata found, just return the string value
+            return fieldValue;
+        }
+
+        // Convert based on field type
+        const fieldType = fieldMeta.type;
+
+        if (fieldType === 'boolean') {
+            // Convert to boolean
+            return fieldValue.toLowerCase() === 'true';
+        } else if (fieldType === 'number') {
+            // Convert to number
+            const num = Number(fieldValue);
+            return isNaN(num) ? fieldValue : num;
+        } else if (fieldType === 'date') {
+            // Try to convert to date (stored as number timestamp)
+            try {
+                return new Date(fieldValue).getTime();
+            } catch (e) {
+                return fieldValue;
+            }
+        } else if (fieldType.includes('list') || fieldType.includes('array')) {
+            // Try to parse as JSON array
+            try {
+                return JSON.parse(fieldValue);
+            } catch (e) {
+                return fieldValue;
+            }
+        } else if (fieldType === 'json' || fieldType === 'object') {
+            // Try to parse as JSON object
+            try {
+                return JSON.parse(fieldValue);
+            } catch (e) {
+                return fieldValue;
+            }
+        }
+
+        // Default to string
+        return fieldValue;
+    }
+
+    /**
+     * Extracts all field metadata from DocumentOptions
+     * @returns A structured object containing metadata about all available document fields
+     */
+    public getAllFieldMetadata() {
+        // Start with our already populated fieldMetadata from the DocumentOptions class
+        const result: Record<string, any> = {
+            fieldCount: Object.keys(this.fieldMetadata).length,
+            fields: {},
+            fieldsByType: {
+                string: [],
+                number: [],
+                boolean: [],
+                //doc: [],
+                //list: [],
+                //date: [],
+                //enumeration: [],
+                //other: [],
+            },
+            fieldNameMappings: {},
+            commonFields: {
+                appearance: [],
+                position: [],
+                size: [],
+                content: [],
+                behavior: [],
+                layout: [],
+            },
+        };
+
+        // Process each field in the metadata
+        Object.entries(this.fieldMetadata).forEach(([fieldName, fieldInfo]) => {
+            const strippedName = fieldName.startsWith('_') ? fieldName.substring(1) : fieldName;
+
+            // Add to fieldNameMappings
+            if (fieldName.startsWith('_')) {
+                result.fieldNameMappings[strippedName] = fieldName;
+            }
+
+            // Create structured field metadata
+            const fieldData: Record<string, any> = {
+                name: fieldName,
+                displayName: strippedName,
+                description: fieldInfo.description || '',
+                type: fieldInfo.fieldType || 'unknown',
+                possibleValues: fieldInfo.values || [],
+            };
+
+            // Add field to fields collection
+            result.fields[fieldName] = fieldData;
+
+            // Categorize by field type
+            const type = fieldInfo.fieldType?.toLowerCase() || 'unknown';
+            if (type === 'string') {
+                result.fieldsByType.string.push(fieldName);
+            } else if (type === 'number') {
+                result.fieldsByType.number.push(fieldName);
+            } else if (type === 'boolean') {
+                result.fieldsByType.boolean.push(fieldName);
+            } else if (type === 'doc') {
+                //result.fieldsByType.doc.push(fieldName);
+            } else if (type === 'list') {
+                //result.fieldsByType.list.push(fieldName);
+            } else if (type === 'date') {
+                //result.fieldsByType.date.push(fieldName);
+            } else if (type === 'enumeration') {
+                //result.fieldsByType.enumeration.push(fieldName);
+            } else {
+                //result.fieldsByType.other.push(fieldName);
+            }
+
+            // Categorize by field purpose
+            if (fieldName.includes('width') || fieldName.includes('height') || fieldName.includes('size')) {
+                result.commonFields.size.push(fieldName);
+            } else if (fieldName.includes('color') || fieldName.includes('background') || fieldName.includes('border')) {
+                result.commonFields.appearance.push(fieldName);
+            } else if (fieldName.includes('x') || fieldName.includes('y') || fieldName.includes('position') || fieldName.includes('pan')) {
+                result.commonFields.position.push(fieldName);
+            } else if (fieldName.includes('text') || fieldName.includes('title') || fieldName.includes('data')) {
+                result.commonFields.content.push(fieldName);
+            } else if (fieldName.includes('action') || fieldName.includes('click') || fieldName.includes('event')) {
+                result.commonFields.behavior.push(fieldName);
+            } else if (fieldName.includes('layout')) {
+                result.commonFields.layout.push(fieldName);
+            }
+        });
+
+        // Add special section for auto-sizing related fields
+        result.autoSizingFields = {
+            height: {
+                autoHeightField: '_layout_autoHeight',
+                heightField: '_height',
+                displayName: 'height',
+                usage: 'To manually set height, first set layout_autoHeight to false',
+            },
+            width: {
+                autoWidthField: '_layout_autoWidth',
+                widthField: '_width',
+                displayName: 'width',
+                usage: 'To manually set width, first set layout_autoWidth to false',
+            },
+        };
+
+        // Add special section for text field format
+        result.specialFields = {
+            text: {
+                name: 'text',
+                description: 'Document text content',
+                format: 'RichTextField',
+                note: 'When setting text, provide plain text - it will be automatically converted to the correct format',
+                example: 'For setting: "Hello world" (plain text); For getting: Will be converted to plaintext for display',
+            },
+        };
+
+        return result;
+    }
+
+    /**
+     * Edits a specific field on a document
+     * @param docId The ID of the document to edit
+     * @param fieldName The name of the field to edit
+     * @param fieldValue The new value for the field (string, number, or boolean)
+     * @returns Object with success status, message, and additional information
+     */
+    public editDocumentField(
+        docId: string,
+        fieldName: string,
+        fieldValue: string | number | boolean
+    ): {
+        success: boolean;
+        message: string;
+        fieldName?: string;
+        originalFieldName?: string;
+        newValue?: any;
+        warning?: string;
+    } {
+        // Normalize field name (handle with/without underscore)
+        let normalizedFieldName = fieldName.startsWith('_') ? fieldName : fieldName;
+        const strippedFieldName = fieldName.startsWith('_') ? fieldName.substring(1) : fieldName;
+
+        // Handle common field name aliases (width → _width, height → _height)
+        // Many document fields use '_' prefix for layout properties
+        if (fieldName === 'width') {
+            normalizedFieldName = '_width';
+        } else if (fieldName === 'height') {
+            normalizedFieldName = '_height';
+        }
+
+        // Get the documents
+        const doc = this.documentsById.get(docId);
+        if (!doc) {
+            return { success: false, message: `Document with ID ${docId} not found` };
+        }
+
+        const { layoutDoc, dataDoc } = this.documentsById.get(docId) ?? { layoutDoc: null, dataDoc: null };
+
+        if (!layoutDoc && !dataDoc) {
+            return { success: false, message: `Could not find layout or data document for document with ID ${docId}` };
+        }
+
+        try {
+            // Convert the field value to the appropriate type based on field metadata
+            const convertedValue = this.convertFieldValue(normalizedFieldName, fieldValue);
+
+            let targetDoc: Doc | undefined;
+            let targetLocation: string;
+
+            // First, check if field exists on layout document using Doc.Get
+            if (layoutDoc) {
+                const fieldExistsOnLayout = Doc.Get(layoutDoc, normalizedFieldName, true) !== undefined;
+
+                // If it exists on layout document, update it there
+                if (fieldExistsOnLayout) {
+                    targetDoc = layoutDoc;
+                    targetLocation = 'layout';
+                }
+                // If it has an underscore prefix, it's likely a layout property even if not yet set
+                else if (normalizedFieldName.startsWith('_')) {
+                    targetDoc = layoutDoc;
+                    targetLocation = 'layout';
+                }
+                // Otherwise, look for or create on data document
+                else if (dataDoc) {
+                    targetDoc = dataDoc;
+                    targetLocation = 'data';
+                }
+                // If no data document available, default to layout
+                else {
+                    targetDoc = layoutDoc;
+                    targetLocation = 'layout';
+                }
+            }
+            // If no layout document, use data document
+            else if (dataDoc) {
+                targetDoc = dataDoc;
+                targetLocation = 'data';
+            } else {
+                return { success: false, message: `No valid document found for editing` };
+            }
+
+            if (!targetDoc) {
+                return { success: false, message: `Target document not available` };
+            }
+
+            // Set the field value on the target document
+            targetDoc[normalizedFieldName] = convertedValue;
+
+            return {
+                success: true,
+                message: `Successfully updated field '${normalizedFieldName}' on ${targetLocation} document (ID: ${docId})`,
+                fieldName: normalizedFieldName,
+                originalFieldName: fieldName,
+                newValue: convertedValue,
+            };
+        } catch (error) {
+            console.error('Error editing document field:', error);
+            return {
+                success: false,
+                message: `Error updating field: ${error instanceof Error ? error.message : String(error)}`,
+            };
+        }
+    }
+    /**
+     * Gets metadata for a specific document or all documents
+     * @param documentId Optional ID of a specific document to get metadata for
+     * @returns Document metadata or metadata for all documents
+     */
+    public getDocumentMetadata(documentId?: string): any {
+        if (documentId) {
+            const doc = this.documentsById.get(documentId);
+            // Get metadata for a specific document
+            return this.extractDocumentMetadata(doc);
+        } else {
+            // Get metadata for all documents
+            const documentsMetadata: Record<string, any> = {};
+            for (const doc of this.documentsById.values()) {
+                documentsMetadata.add(this.extractDocumentMetadata(doc));
+            }
+
+            return {
+                documentCount: this.documentsById.size,
+                documents: documentsMetadata,
+                fieldDefinitions: this.fieldMetadata,
+            };
+        }
+    }
+
+    /**
+     * Adds links between documents based on their IDs
+     * @param docIds Array of document IDs to link
+     * @param relationship Optional relationship type for the links
+     * @returns Array of created link documents
+     */
+    public addLinks(docIds: string[]): Doc[] {
+        const createdLinks: Doc[] = [];
+        // Use string keys for Set instead of arrays which don't work as expected as keys
+        const alreadyLinked = new Set<string>();
+
+        // Iterate over the document IDs and add links
+        docIds.forEach(docId1 => {
+            const doc1 = this.documentsById.get(docId1);
+            docIds.forEach(docId2 => {
+                if (docId1 === docId2) return; // Skip self-linking
+
+                // Create a consistent key regardless of document order
+                const linkKey = [docId1, docId2].sort().join('_');
+                if (alreadyLinked.has(linkKey)) return;
+
+                const doc2 = this.documentsById.get(docId2);
+                if (doc1?.layoutDoc && doc2?.layoutDoc) {
+                    try {
+                        // Create a link document between doc1 and doc2
+                        const linkDoc = Docs.Create.LinkDocument(doc1.layoutDoc, doc2.layoutDoc);
+
+                        // Set a default color if relationship doesn't specify one
+                        if (!linkDoc.color) {
+                            linkDoc.color = 'lightBlue'; // Default blue color
+                        }
+
+                        // Ensure link is visible by setting essential properties
+                        linkDoc.link_visible = true;
+                        linkDoc.link_enabled = true;
+                        linkDoc.link_autoMove = true;
+                        linkDoc.link_showDirected = true;
+
+                        // Set the embedContainer to ensure visibility
+                        // This is shown in the image as a key difference between visible/non-visible links
+                        if (this.chatBoxDocument && this.chatBoxDocument.parent && typeof this.chatBoxDocument.parent === 'object' && 'title' in this.chatBoxDocument.parent) {
+                            linkDoc.embedContainer = String(this.chatBoxDocument.parent.title);
+                        } else if (doc1.layoutDoc.parent && typeof doc1.layoutDoc.parent === 'object' && 'title' in doc1.layoutDoc.parent) {
+                            linkDoc.embedContainer = String(doc1.layoutDoc.parent.title);
+                        } else {
+                            // Default to a tab name if we can't find one
+                            linkDoc.embedContainer = 'Untitled Tab 1';
+                        }
+
+                        // Add the link to the document system
+                        LinkManager.Instance.addLink(linkDoc);
+
+                        const ancestor = DocumentView.linkCommonAncestor(linkDoc);
+                        ancestor?.ComponentView?.addDocument?.(linkDoc);
+                        // Add to user document list to make it visible in the UI
+                        Doc.AddDocToList(Doc.UserDoc(), 'links', linkDoc);
+
+                        // Create a visual link for display
+                        if (this.chatBoxDocument) {
+                            // Make sure the docs are visible in the UI
+                            this.chatBox._props.addDocument?.(doc1.layoutDoc);
+                            this.chatBox._props.addDocument?.(doc2.layoutDoc);
+
+                            // Use DocumentManager to ensure documents are visible
+                            DocumentManager.Instance.showDocument(doc1.layoutDoc, { willZoomCentered: false });
+                            DocumentManager.Instance.showDocument(doc2.layoutDoc, { willZoomCentered: false });
+                        }
+
+                        createdLinks.push(linkDoc);
+                        alreadyLinked.add(linkKey);
+                    } catch (error) {
+                        console.error('Error creating link between documents:', error);
+                    }
+                }
+            });
+        });
+
+        // Force update of the UI to show new links
+        setTimeout(() => {
+            try {
+                // Update server cache to ensure links are persisted
+                UPDATE_SERVER_CACHE && typeof UPDATE_SERVER_CACHE === 'function' && UPDATE_SERVER_CACHE();
+            } catch (e) {
+                console.warn('Could not update server cache after creating links:', e);
+            }
+        }, 100);
+
+        return createdLinks;
+    }
+    /**
+     * Helper method to validate a document type and ensure it's a valid supportedDocType
+     * @param docType The document type to validate
+     * @returns True if the document type is valid, false otherwise
+     */
+    private isValidDocType(docType: string): boolean {
+        return Object.values(supportedDocTypes).includes(docType as supportedDocTypes);
+    }
+    /**
+     * Creates a document in the dashboard.
+     *
+     * @param {string} doc_type - The type of document to create.
+     * @param {string} data - The data used to generate the document.
+     * @param {DocumentOptions} options - Configuration options for the document.
+     * @returns {Promise<void>} A promise that resolves once the document is created and displayed.
+     */
+    createDocInDash = (docType: string, title: string, data: string) => {
+        // Validate doc_type
+        if (!this.isValidDocType(docType)) {
+            throw new Error(`Invalid document type: ${docType}`);
+        }
+
+        try {
+            // Create simple document with just title and data
+            const simpleDoc: parsedDoc = {
+                doc_type: docType,
+                title: title,
+                data: data,
+                x: 0,
+                y: 0,
+                _width: 300,
+                _height: 300,
+                _layout_fitWidth: false,
+                _layout_autoHeight: true,
+            };
+
+            // Use the chatBox's createDocInDash method to create and link the document
+            if (!this.chatBox) {
+                throw new Error('ChatBox instance not available for creating document');
+            }
+            const linkAndShowDoc = (doc: Opt<Doc>) => {
+                if (doc) {
+                    LinkManager.Instance.addLink(Docs.Create.LinkDocument(this.chatBoxDocument!, doc));
+                    this.chatBox._props.addDocument?.(doc);
+                    DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
+                }
+            };
+            const doc = this.chatBox.whichDoc(simpleDoc, false);
+            if (doc) linkAndShowDoc(doc);
+            return doc;
+        } catch (error) {
+            throw new Error(`Error creating document: ${error}`);
+        }
+    };
+
+    public has(docId: string) {
+        return this.documentsById.has(docId);
+    }
+
+    public listDocs() {
+        // List all available documents in simple format
+        const docs = Array.from(this.documentsById.entries()).map(([id, doc]) => ({
+            id,
+            title: doc.layoutDoc.title || 'Untitled Document',
+            type: doc.layoutDoc.type || doc.dataDoc.type || 'Unknown Type',
+        }));
+
+        if (docs.length === 0) {
+            return [
+                {
+                    type: 'text',
+                    text: 'No documents found in the current view.',
+                },
+            ];
+        }
+
+        return [
+            {
+                type: 'text',
+                text: `Found ${docs.length} document(s) in the current view:\n${JSON.stringify(docs, null, 2)}`,
+            },
+        ];
+    }
+
+    public createAgentDoc(doc: Doc) {
+        // Ideally check if Doc is already in there.
+        const agentDoc = { layoutDoc: doc, dataDoc: doc[DocData] };
+        this.documentsById.set(this.ensureDocumentId(doc), agentDoc);
+        return agentDoc;
+    }
+}
-- 
cgit v1.2.3-70-g09d2


From 5ce2263849bfb901e276a4c5fc8ca2dbd8b80350 Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Thu, 24 Apr 2025 13:21:00 -0400
Subject: attempt at linking docs but listing metadata doesn't work

---
 .../views/nodes/chatbot/agentsystem/Agent.ts       |  8 +++---
 .../nodes/chatbot/chatboxcomponents/ChatBox.tsx    | 29 ++++++++++++++++++++--
 src/client/views/nodes/chatbot/tools/SearchTool.ts |  2 +-
 .../nodes/chatbot/tools/WebsiteInfoScraperTool.ts  | 11 ++++----
 .../nodes/chatbot/utils/AgentDocumentManager.ts    | 10 ++++++--
 5 files changed, 46 insertions(+), 14 deletions(-)

(limited to 'src/client/views/nodes/chatbot/utils')

diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
index 5af021dbf..c021d141e 100644
--- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts
+++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
@@ -66,10 +66,12 @@ export class Agent {
         history: () => string,
         csvData: () => { filename: string; id: string; text: string }[],
         addLinkedUrlDoc: (url: string, id: string) => void,
+        getLinkedUrlDocId: (url: string) => string[],
         createImage: (result: Upload.FileInformation & Upload.InspectionResults, options: DocumentOptions) => void,
         // eslint-disable-next-line @typescript-eslint/no-unused-vars
         createCSVInDash: (url: string, title: string, id: string, data: string) => void,
-        chatBox: ChatBox
+        chatBox: ChatBox,
+        docManager: AgentDocumentManager
     ) {
         // Initialize OpenAI client with API key from environment
         this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true });
@@ -77,14 +79,14 @@ export class Agent {
         this._history = history;
         this._summaries = summaries;
         this._csvData = csvData;
-        this._docManager = new AgentDocumentManager(chatBox);
+        this._docManager = docManager;
 
         // Define available tools for the assistant
         this.tools = {
             calculate: new CalculateTool(),
             rag: new RAGTool(this.vectorstore),
             dataAnalysis: new DataAnalysisTool(csvData),
-            websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc),
+            websiteInfoScraper: new WebsiteInfoScraperTool(getLinkedUrlDocId),
             searchTool: new SearchTool(addLinkedUrlDoc),
             noTool: new NoTool(),
             //imageCreationTool: new ImageCreationTool(createImage),
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index e09b4313f..43765c1ce 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -44,6 +44,7 @@ import { ProgressBar } from './ProgressBar';
 import { OpenWhere } from '../../OpenWhere';
 import { Upload } from '../../../../../server/SharedMediaTypes';
 import { DocumentMetadataTool } from '../tools/DocumentMetadataTool';
+import { AgentDocumentManager } from '../utils/AgentDocumentManager';
 
 dotenv.config();
 
@@ -76,6 +77,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     private agent: Agent;
     private messagesRef: React.RefObject<HTMLDivElement>;
     private _textInputRef: HTMLInputElement | undefined | null;
+    private docManager: AgentDocumentManager;
 
     /**
      * Static method that returns the layout string for the field.
@@ -107,7 +109,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
             this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id);
         }
         this.vectorstore = new Vectorstore(this.vectorstore_id, this.retrieveDocIds);
-        this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.createImageInDash, this.createCSVInDash, this);
+        this.docManager = new AgentDocumentManager(this);
+        this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.getLinkedUrlDocIds, this.createImageInDash, this.createCSVInDash, this, this.docManager);
 
         // Reinitialize the DocumentMetadataTool with a direct reference to this ChatBox instance
         // This ensures the tool can properly access documents in the same Freeform view
@@ -380,7 +383,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     @action
     addLinkedUrlDoc = async (url: string, id: string) => {
         const doc = Docs.Create.WebDocument(url, { data_useCors: true });
-
+        this.docManager.addCustomId(doc, id);
         const linkDoc = Docs.Create.LinkDocument(this.Document, doc);
         LinkManager.Instance.addLink(linkDoc);
 
@@ -391,6 +394,28 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         };
 
         doc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] });
+        this.docManager.processDocument(doc);
+    };
+
+    /**
+     * Retrieves the IDs of linked url documents.
+     * @returns An array of document IDs.
+     */
+    @action
+    getLinkedUrlDocIds = () => {
+        const linkedDocs: Doc[] = this.linkedDocs;
+        const linkedUrlDocIds: string[] = [];
+
+        for (const doc of linkedDocs) {
+            if (doc.chunk_simpl) {
+                const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] };
+                const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkType === CHUNK_TYPE.URL);
+                if (foundChunk) {
+                    linkedUrlDocIds.push(foundChunk.chunkId);
+                }
+            }
+        }
+        return linkedUrlDocIds;
     };
 
     /**
diff --git a/src/client/views/nodes/chatbot/tools/SearchTool.ts b/src/client/views/nodes/chatbot/tools/SearchTool.ts
index 6a11407a5..2ee30f0cf 100644
--- a/src/client/views/nodes/chatbot/tools/SearchTool.ts
+++ b/src/client/views/nodes/chatbot/tools/SearchTool.ts
@@ -28,7 +28,7 @@ export class SearchTool extends BaseTool<SearchToolParamsType> {
     private _addLinkedUrlDoc: (url: string, id: string) => void;
     private _max_results: number;
 
-    constructor(addLinkedUrlDoc: (url: string, id: string) => void, max_results: number = 4) {
+    constructor(addLinkedUrlDoc: (url: string, id: string) => void, max_results: number = 3) {
         super(searchToolInfo);
         this._addLinkedUrlDoc = addLinkedUrlDoc;
         this._max_results = max_results;
diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
index 19ccd0b36..bff38ae15 100644
--- a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
+++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
@@ -66,11 +66,11 @@ const websiteInfoScraperToolInfo: ToolInfo<WebsiteInfoScraperToolParamsType> = {
 };
 
 export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParamsType> {
-    private _addLinkedUrlDoc: (url: string, id: string) => void;
+    private _getLinkedUrlDocId: (url: string) => string[];
 
-    constructor(addLinkedUrlDoc: (url: string, id: string) => void) {
+    constructor(getLinkedUrlDocIds: (url: string) => string[]) {
         super(websiteInfoScraperToolInfo);
-        this._addLinkedUrlDoc = addLinkedUrlDoc;
+        this._getLinkedUrlDocId = getLinkedUrlDocIds;
     }
 
     async execute(args: ParametersType<WebsiteInfoScraperToolParamsType>): Promise<Observation[]> {
@@ -79,9 +79,8 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
         // Create an array of promises, each one handling a website scrape for a URL
         const scrapingPromises = urls.map(async url => {
             try {
-                const { website_plain_text } = await Networking.PostToServer('/scrapeWebsite', { url });
-                const id = uuidv4();
-                this._addLinkedUrlDoc(url, id);
+                const { website_plain_text } = (await Networking.PostToServer('/scrapeWebsite', { url })) as { website_plain_text: string };
+                const id = this._getLinkedUrlDocId(url);
                 return {
                     type: 'text',
                     text: `<chunk chunk_id="${id}" chunk_type="url">\n${website_plain_text}\n</chunk>`,
diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
index c954226e4..4eeac3c6a 100644
--- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -90,7 +90,7 @@ export class AgentDocumentManager {
      */
     public initializeFindDocsFreeform() {
         // Reset collections
-        this.documentsById.clear();
+        //this.documentsById.clear();
 
         try {
             // Use the LinkManager approach which is proven to work in ChatBox
@@ -109,6 +109,7 @@ export class AgentDocumentManager {
                 linkedDocs.forEach((doc: Doc) => {
                     if (doc) {
                         this.processDocument(doc);
+                        console.log('Processed linked document:', doc.id, doc.title, doc.type);
                     }
                 });
 
@@ -164,6 +165,11 @@ export class AgentDocumentManager {
         }
     }
 
+    public addCustomId(doc: Doc, id: string) {
+        doc.id = id;
+        doc.DOCUMENT_ID_FIELD = id;
+    }
+
     /**
      * Process a document by ensuring it has an ID and adding it to the appropriate collections
      * @param doc The document to process
@@ -730,7 +736,7 @@ export class AgentDocumentManager {
             // Get metadata for all documents
             const documentsMetadata: Record<string, any> = {};
             for (const doc of this.documentsById.values()) {
-                documentsMetadata.add(this.extractDocumentMetadata(doc));
+                documentsMetadata.add(this.extractDocumentMetadata(doc) ?? { documentId: doc.layoutDoc.id, title: doc.layoutDoc.title, type: doc.layoutDoc.type });
             }
 
             return {
-- 
cgit v1.2.3-70-g09d2


From 3ef3d40506348d9fd537cc8f4aea975b9770689f Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Sun, 27 Apr 2025 13:14:49 -0400
Subject: new attempt with new citation unification

---
 .../views/nodes/chatbot/agentsystem/Agent.ts       |   5 +-
 .../nodes/chatbot/chatboxcomponents/ChatBox.tsx    | 450 +++++++++++++--------
 .../nodes/chatbot/tools/DocumentMetadataTool.ts    |  16 +-
 src/client/views/nodes/chatbot/tools/SearchTool.ts |  18 +-
 src/client/views/nodes/chatbot/types/types.ts      |   1 +
 .../nodes/chatbot/utils/AgentDocumentManager.ts    | 168 +++++---
 .../views/nodes/chatbot/vectorstore/Vectorstore.ts | 130 ++++--
 7 files changed, 510 insertions(+), 278 deletions(-)

(limited to 'src/client/views/nodes/chatbot/utils')

diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
index c021d141e..80fdb6533 100644
--- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts
+++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
@@ -65,12 +65,9 @@ export class Agent {
         summaries: () => string,
         history: () => string,
         csvData: () => { filename: string; id: string; text: string }[],
-        addLinkedUrlDoc: (url: string, id: string) => void,
         getLinkedUrlDocId: (url: string) => string[],
         createImage: (result: Upload.FileInformation & Upload.InspectionResults, options: DocumentOptions) => void,
-        // eslint-disable-next-line @typescript-eslint/no-unused-vars
         createCSVInDash: (url: string, title: string, id: string, data: string) => void,
-        chatBox: ChatBox,
         docManager: AgentDocumentManager
     ) {
         // Initialize OpenAI client with API key from environment
@@ -87,7 +84,7 @@ export class Agent {
             rag: new RAGTool(this.vectorstore),
             dataAnalysis: new DataAnalysisTool(csvData),
             websiteInfoScraper: new WebsiteInfoScraperTool(getLinkedUrlDocId),
-            searchTool: new SearchTool(addLinkedUrlDoc),
+            searchTool: new SearchTool(this._docManager),
             noTool: new NoTool(),
             //imageCreationTool: new ImageCreationTool(createImage),
             documentMetadata: new DocumentMetadataTool(this._docManager),
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index 43765c1ce..35dbee3e9 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -71,7 +71,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     @observable private _citationPopup: { text: string; visible: boolean } = { text: '', visible: false };
 
     // Private properties for managing OpenAI API, vector store, agent, and UI elements
-    private openai: OpenAI;
+    private openai!: OpenAI; // Using definite assignment assertion
     private vectorstore_id: string;
     private vectorstore: Vectorstore;
     private agent: Agent;
@@ -98,25 +98,34 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
      */
     constructor(props: FieldViewProps) {
         super(props);
-        makeObservable(this); // Enable MobX observables
+        makeObservable(this);
 
-        // Initialize OpenAI, vectorstore, and agent
-        this.openai = this.initializeOpenAI();
-        if (StrCast(this.dataDoc.vectorstore_id) == '') {
-            this.vectorstore_id = uuidv4();
-            this.dataDoc.vectorstore_id = this.vectorstore_id;
-        } else {
-            this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id);
-        }
-        this.vectorstore = new Vectorstore(this.vectorstore_id, this.retrieveDocIds);
+        this.messagesRef = React.createRef();
         this.docManager = new AgentDocumentManager(this);
-        this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.getLinkedUrlDocIds, this.createImageInDash, this.createCSVInDash, this, this.docManager);
 
-        // Reinitialize the DocumentMetadataTool with a direct reference to this ChatBox instance
-        // This ensures the tool can properly access documents in the same Freeform view
-        this.agent.reinitializeDocumentMetadataTool();
+        // Initialize OpenAI client
+        this.initializeOpenAI();
+
+        // Create a unique vectorstore ID for this ChatBox
+        this.vectorstore_id = uuidv4();
+
+        // Initialize vectorstore with the document manager
+        this.vectorstore = new Vectorstore(this.vectorstore_id, this.docManager);
+
+        // Create an agent with the vectorstore
+        this.agent = new Agent(
+            this.vectorstore,
+            this.retrieveSummaries.bind(this),
+            this.retrieveFormattedHistory.bind(this),
+            this.retrieveCSVData.bind(this),
+            this.retrieveDocIds.bind(this),
+            this.createImageInDash.bind(this),
+            this.createCSVInDash.bind(this),
+            this.docManager
+        );
 
-        this.messagesRef = React.createRef<HTMLDivElement>();
+        // Add event listeners
+        this.addScrollListener();
 
         // Reaction to update dataDoc when chat history changes
         reaction(
@@ -140,22 +149,25 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
      */
     @action
     addDocToVectorstore = async (newLinkedDoc: Doc) => {
-        this._uploadProgress = 0;
-        this._currentStep = 'Initializing...';
-        this._isUploadingDocs = true;
-
         try {
-            // Add the document to the vectorstore
+            this._isUploadingDocs = true;
+
+            // Process the document first to ensure it has a valid ID
+            this.docManager.processDocument(newLinkedDoc);
+
+            // Add the document to the vectorstore which will also register chunks
             await this.vectorstore.addAIDoc(newLinkedDoc, this.updateProgress);
-        } catch (error) {
-            console.error('Error uploading document:', error);
-            this._currentStep = 'Error during upload';
-        } finally {
-            runInAction(() => {
-                this._isUploadingDocs = false;
-                this._uploadProgress = 0;
-                this._currentStep = '';
-            });
+
+            // No longer needed as documents are tracked by the AgentDocumentManager
+            // this._linked_docs_to_add.add(newLinkedDoc);
+
+            this._isUploadingDocs = false;
+
+            return true;
+        } catch (err) {
+            console.error('Error adding document to vectorstore:', err);
+            this._isUploadingDocs = false;
+            return false;
         }
     };
 
@@ -238,7 +250,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
             apiKey: process.env.OPENAI_KEY,
             dangerouslyAllowBrowser: true,
         };
-        return new OpenAI(configuration);
+        this.openai = new OpenAI(configuration);
     }
 
     /**
@@ -375,49 +387,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         }
     };
 
-    /**
-     * Adds a linked document from a URL for future reference and analysis.
-     * @param url The URL of the document to add.
-     * @param id The unique identifier for the document.
-     */
-    @action
-    addLinkedUrlDoc = async (url: string, id: string) => {
-        const doc = Docs.Create.WebDocument(url, { data_useCors: true });
-        this.docManager.addCustomId(doc, id);
-        const linkDoc = Docs.Create.LinkDocument(this.Document, doc);
-        LinkManager.Instance.addLink(linkDoc);
-
-        const chunkToAdd = {
-            chunkId: id,
-            chunkType: CHUNK_TYPE.URL,
-            url: url,
-        };
-
-        doc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] });
-        this.docManager.processDocument(doc);
-    };
-
-    /**
-     * Retrieves the IDs of linked url documents.
-     * @returns An array of document IDs.
-     */
-    @action
-    getLinkedUrlDocIds = () => {
-        const linkedDocs: Doc[] = this.linkedDocs;
-        const linkedUrlDocIds: string[] = [];
-
-        for (const doc of linkedDocs) {
-            if (doc.chunk_simpl) {
-                const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] };
-                const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkType === CHUNK_TYPE.URL);
-                if (foundChunk) {
-                    linkedUrlDocIds.push(foundChunk.chunkId);
-                }
-            }
-        }
-        return linkedUrlDocIds;
-    };
-
     /**
      * Getter to retrieve the current user's name from the client utils.
      */
@@ -613,82 +582,224 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
      */
     @action
     handleCitationClick = async (citation: Citation) => {
-        const currentLinkedDocs: Doc[] = this.linkedDocs;
-        const chunkId = citation.chunk_id;
+        try {
+            // Extract values from MobX proxy object if needed
+            const chunkId = typeof citation.chunk_id === 'object' ? (citation.chunk_id as any).toString() : citation.chunk_id;
+
+            // For debugging
+            console.log('Citation clicked:', {
+                chunkId,
+                citation: JSON.stringify(citation, null, 2),
+            });
 
-        for (const doc of currentLinkedDocs) {
-            if (doc.chunk_simpl) {
-                const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] };
-                const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkId === chunkId);
+            // Try to find the document
+            const linkedDocs = this.linkedDocs;
+            let doc: Doc | undefined;
 
-                if (foundChunk) {
-                    // Handle media chunks specifically
+            // First try to find the document using the document manager's chunk ID lookup
+            const parentDocId = this.docManager.getDocIdByChunkId(chunkId);
+            if (parentDocId) {
+                doc = this.docManager.getDocument(parentDocId);
+                console.log(`Found document by chunk ID lookup: ${parentDocId}`);
+            }
 
-                    if (doc.ai_type == 'video' || doc.ai_type == 'audio') {
-                        const directMatchSegmentStart = this.getDirectMatchingSegmentStart(doc, citation.direct_text || '', foundChunk.indexes || []);
+            // If not found, fall back to searching through linked docs (maintains compatibility)
+            if (!doc) {
+                for (const linkedDoc of linkedDocs) {
+                    if (linkedDoc.chunk_simpl) {
+                        try {
+                            const docChunkSimpl = JSON.parse(StrCast(linkedDoc.chunk_simpl)) as { chunks: SimplifiedChunk[] };
+                            const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkId === chunkId);
+                            if (foundChunk) {
+                                doc = linkedDoc;
+                                console.log(`Found document by iterating through linked docs`);
+                                break;
+                            }
+                        } catch (e) {
+                            console.error(`Error parsing chunk_simpl for doc ${linkedDoc.id}:`, e);
+                        }
+                    }
+                }
+            }
 
-                        if (directMatchSegmentStart) {
-                            // Navigate to the segment's start time in the media player
-                            await this.goToMediaTimestamp(doc, directMatchSegmentStart, doc.ai_type);
-                        } else {
-                            console.error('No direct matching segment found for the citation.');
+            if (!doc) {
+                console.warn(`Document not found for citation with chunk_id: ${chunkId}`);
+                return;
+            }
+
+            // Process the chunk data
+            let docChunkSimpl: { chunks: SimplifiedChunk[] } = { chunks: [] };
+            try {
+                docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl) || '{"chunks":[]}');
+            } catch (e) {
+                console.error(`Error parsing chunk_simpl for the found document:`, e);
+                return;
+            }
+
+            const foundChunk = docChunkSimpl.chunks.find((chunk: SimplifiedChunk) => chunk.chunkId === chunkId);
+
+            // Handle different chunk types
+            if (foundChunk) {
+                console.log(`Found chunk in document:`, foundChunk);
+
+                // Handle video chunks
+                if (foundChunk.chunkType === CHUNK_TYPE.VIDEO) {
+                    if (foundChunk.start_time !== undefined) {
+                        await this.goToMediaTimestamp(doc, foundChunk.start_time, 'video');
+                    } else {
+                        console.warn('Video chunk missing start_time:', foundChunk);
+                    }
+                }
+                // Handle audio chunks - note that we're using string comparison since 'audio' isn't in CHUNK_TYPE enum
+                else if (String(foundChunk.chunkType).toLowerCase() === 'audio') {
+                    if (foundChunk.start_time !== undefined) {
+                        await this.goToMediaTimestamp(doc, foundChunk.start_time, 'audio');
+                    } else {
+                        console.warn('Audio chunk missing start_time:', foundChunk);
+                    }
+                }
+                // Handle table or image chunks
+                else if (foundChunk.chunkType === CHUNK_TYPE.TABLE || foundChunk.chunkType === CHUNK_TYPE.IMAGE) {
+                    this.handleOtherChunkTypes(foundChunk, citation, doc);
+                }
+                // Handle text chunks
+                else if (foundChunk.chunkType === CHUNK_TYPE.TEXT) {
+                    // Find text from the document's chunks metadata
+                    let chunkText = '';
+
+                    try {
+                        // We already parsed the chunks earlier, so use that
+                        const matchingChunk = docChunkSimpl.chunks.find(c => c.chunkId === foundChunk.chunkId);
+                        if (matchingChunk && 'text' in matchingChunk) {
+                            // If the text property exists on the chunk (even though it's not in the type)
+                            chunkText = String(matchingChunk['text'] || '');
                         }
+                    } catch (e) {
+                        console.error('Error getting chunk text:', e);
+                    }
+
+                    // Default text if none found
+                    if (!chunkText) {
+                        chunkText = 'Text content not available';
+                    }
+
+                    this._citationPopup = {
+                        text: chunkText,
+                        visible: true,
+                    };
+                }
+                // Handle URL chunks
+                else if (foundChunk.chunkType === CHUNK_TYPE.URL) {
+                    if (foundChunk.url) {
+                        // Instead of opening the URL in a new window, show the document in the viewer
+                        DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
+                        console.log(`Navigated to web document with URL: ${foundChunk.url}`);
                     } else {
-                        // Handle other chunk types as before
-                        this.handleOtherChunkTypes(foundChunk, citation, doc);
+                        console.warn('URL chunk missing URL:', foundChunk);
                     }
                 }
+            } else if (doc?.original_segments) {
+                // Handle original segments for media files
+                let original_segments: any[] = [];
+                try {
+                    original_segments = JSON.parse(StrCast(doc.original_segments));
+                } catch (e) {
+                    console.error(`Error parsing original_segments:`, e);
+                    return;
+                }
+
+                // Check if there's direct text to find in the segments
+                if (citation.direct_text) {
+                    // Find the segment that contains the direct text
+                    const start = this.getDirectMatchingSegmentStart(doc, citation.direct_text, []);
+                    if (start !== -1) {
+                        await this.goToMediaTimestamp(doc, start, doc.ai_type === 'audio' ? 'audio' : 'video');
+                    }
+                }
+            } else {
+                console.warn('Unable to find chunk or segments for citation', citation);
             }
+        } catch (error) {
+            console.error('Error handling citation click:', error);
         }
     };
 
+    /**
+     * Finds a matching segment in a document based on text content.
+     * @param doc The document to search in
+     * @param citationText The text to find in the document
+     * @param indexesOfSegments Optional indexes of segments to search in
+     * @returns The starting timestamp of the matching segment, or -1 if not found
+     */
     getDirectMatchingSegmentStart = (doc: Doc, citationText: string, indexesOfSegments: string[]): number => {
-        const originalSegments = JSON.parse(StrCast(doc.original_segments!)).map((segment: any, index: number) => ({
-            index: index.toString(),
-            text: segment.text,
-            start: segment.start,
-            end: segment.end,
-        }));
-
-        if (!Array.isArray(originalSegments) || originalSegments.length === 0 || !Array.isArray(indexesOfSegments)) {
-            return 0;
+        if (!doc || !citationText) return -1;
+
+        // Get original segments from the document
+        const original_segments = doc.original_segments ? JSON.parse(StrCast(doc.original_segments)) : [];
+
+        if (!original_segments || !Array.isArray(original_segments) || original_segments.length === 0) {
+            return -1;
         }
 
-        // Create itemsToSearch array based on indexesOfSegments
-        const itemsToSearch = indexesOfSegments.map((indexStr: string) => {
-            const index = parseInt(indexStr, 10);
-            const segment = originalSegments[index];
-            return { text: segment.text, start: segment.start };
-        });
+        let segments = original_segments;
 
-        console.log('Constructed itemsToSearch:', itemsToSearch);
+        // If specific indexes are provided, filter segments by those indexes
+        if (indexesOfSegments && indexesOfSegments.length > 0) {
+            segments = original_segments.filter((segment: any) => indexesOfSegments.includes(segment.index));
+        }
+
+        // If no segments match the indexes, use all segments
+        if (segments.length === 0) {
+            segments = original_segments;
+        }
 
-        // Helper function to calculate word overlap score
+        // First try to find an exact match
+        const exactMatch = segments.find((segment: any) => segment.text && segment.text.includes(citationText));
+
+        if (exactMatch) {
+            return exactMatch.start;
+        }
+
+        // If no exact match, find segment with best word overlap
         const calculateWordOverlap = (text1: string, text2: string): number => {
-            const words1 = new Set(text1.toLowerCase().split(/\W+/));
-            const words2 = new Set(text2.toLowerCase().split(/\W+/));
-            const intersection = new Set([...words1].filter(word => words2.has(word)));
-            return intersection.size / Math.max(words1.size, words2.size); // Jaccard similarity
+            if (!text1 || !text2) return 0;
+
+            const words1 = text1.toLowerCase().split(/\s+/);
+            const words2 = text2.toLowerCase().split(/\s+/);
+            const wordSet1 = new Set(words1);
+
+            let overlap = 0;
+            for (const word of words2) {
+                if (wordSet1.has(word)) {
+                    overlap++;
+                }
+            }
+
+            // Return percentage of overlap relative to the shorter text
+            return overlap / Math.min(words1.length, words2.length);
         };
 
-        // Search for the best matching segment
-        let bestMatchStart = 0;
-        let bestScore = 0;
-
-        console.log(`Searching for best match for query: "${citationText}"`);
-        itemsToSearch.forEach(item => {
-            const score = calculateWordOverlap(citationText, item.text);
-            console.log(`Comparing query to segment: "${item.text}" | Score: ${score}`);
-            if (score > bestScore) {
-                bestScore = score;
-                bestMatchStart = item.start;
+        // Find segment with highest word overlap
+        let bestMatch = null;
+        let highestOverlap = 0;
+
+        for (const segment of segments) {
+            if (!segment.text) continue;
+
+            const overlap = calculateWordOverlap(segment.text, citationText);
+            if (overlap > highestOverlap) {
+                highestOverlap = overlap;
+                bestMatch = segment;
             }
-        });
+        }
 
-        console.log('Best match found with score:', bestScore, '| Start time:', bestMatchStart);
+        // Only return matches with significant overlap (more than 30%)
+        if (bestMatch && highestOverlap > 0.3) {
+            return bestMatch.start;
+        }
 
-        // Return the start time of the best match
-        return bestMatchStart;
+        // If no good match found, return the start of the first segment as fallback
+        return segments.length > 0 ? segments[0].start : -1;
     };
 
     /**
@@ -772,7 +883,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                 break;
             case CHUNK_TYPE.CSV:
             case CHUNK_TYPE.URL:
-                DocumentManager.Instance.showDocument(doc, { willZoomCentered: true });
+                DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
+                    console.log(`Showing web document in viewer with URL: ${foundChunk.url}`);
+                });
                 break;
             default:
                 console.error('Unhandled chunk type:', foundChunk.chunkType);
@@ -879,6 +992,16 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
             }
         });
         this.addScrollListener();
+
+        // Initialize the document manager by finding existing documents
+        this.docManager.initializeFindDocsFreeform();
+
+        // If there are stored doc IDs in our list of docs to add, process them
+        if (this._linked_docs_to_add.size > 0) {
+            this._linked_docs_to_add.forEach(doc => {
+                this.docManager.processDocument(doc);
+            });
+        }
     }
 
     /**
@@ -892,28 +1015,28 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     /**
      * Getter that retrieves all linked documents for the current document.
      */
-    @computed
-    get linkedDocs() {
-        return LinkManager.Instance.getAllRelatedLinks(this.Document)
-            .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document)))
-            .map(d => DocCast(d?.annotationOn, d))
-            .filter(d => d);
+    @computed get linkedDocs(): Doc[] {
+        const docIds = this.docManager.listDocs();
+        const docs: Doc[] = [];
+
+        // Get documents from the document manager using the getDocument method
+        docIds.forEach(id => {
+            const doc = this.docManager.getDocument(id);
+            if (doc) {
+                docs.push(doc);
+            }
+        });
+
+        return docs;
     }
 
     /**
-     * Getter that retrieves document IDs of linked documents that have AI-related content.
+     * Getter that retrieves document IDs of linked documents that have PDF_chunker–parsed content.
      */
     @computed
-    get docIds() {
-        return LinkManager.Instance.getAllRelatedLinks(this.Document)
-            .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document)))
-            .map(d => DocCast(d?.annotationOn, d))
-            .filter(d => d)
-            .filter(d => {
-                console.log(d.ai_doc_id);
-                return d.ai_doc_id;
-            })
-            .map(d => StrCast(d.ai_doc_id));
+    get docIds(): string[] {
+        // Use the document manager to get all document IDs
+        return Array.from(this.docManager.listDocs());
     }
 
     /**
@@ -921,23 +1044,18 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
      */
     @computed
     get summaries(): string {
-        return (
-            LinkManager.Instance.getAllRelatedLinks(this.Document)
-                .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document)))
-                .map(d => DocCast(d?.annotationOn, d))
-                .filter(d => d)
-                .filter(d => d.summary)
-                .map((doc, index) => {
-                    if (PDFCast(doc.data)) {
-                        return `<summary file_name="${PDFCast(doc.data).url.pathname}" applicable_tools=["rag"]>${doc.summary}</summary>`;
-                    } else if (CsvCast(doc.data)) {
-                        return `<summary file_name="${CsvCast(doc.data).url.pathname}" applicable_tools=["dataAnalysis"]>${doc.summary}</summary>`;
-                    } else {
-                        return `${index + 1}) ${doc.summary}`;
-                    }
-                })
-                .join('\n') + '\n'
-        );
+        const linkedDocs = Array.from(this.docManager.listDocs())
+            .map(id => {
+                const doc = this.docManager.extractDocumentMetadata(id);
+                if (doc && doc.fields && (doc.fields.layout.summary || doc.fields.data.summary)) {
+                    return doc.fields.layout.summary || doc.fields.data.summary;
+                }
+                return null;
+            })
+            .filter(Boolean)
+            .join('\n\n');
+
+        return linkedDocs;
     }
 
     /**
@@ -965,7 +1083,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
 
     // Other helper methods for retrieving document data and processing
 
-    retrieveSummaries = () => {
+    retrieveSummaries = (): string => {
         return this.summaries;
     };
 
@@ -973,12 +1091,12 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         return this.linkedCSVs;
     };
 
-    retrieveFormattedHistory = () => {
+    retrieveFormattedHistory = (): string => {
         return this.formattedHistory;
     };
 
-    retrieveDocIds = () => {
-        return this.docIds;
+    retrieveDocIds = (): string[] => {
+        return Array.from(this.docManager.listDocs());
     };
 
     /**
diff --git a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts
index 4b751acc0..e6c2421e5 100644
--- a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts
+++ b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts
@@ -417,9 +417,9 @@ export class DocumentMetadataTool extends BaseTool<DocumentMetadataToolParamsTyp
                     const title = String(args.title);
                     const data = String(args.data);
 
-                    const createdDoc = this._docManager.createDocInDash(docType, title, data);
+                    const id = this._docManager.createDocInDash(docType, data, { title: title });
 
-                    if (!createdDoc) {
+                    if (!id) {
                         return [
                             {
                                 type: 'text',
@@ -427,18 +427,14 @@ export class DocumentMetadataTool extends BaseTool<DocumentMetadataToolParamsTyp
                             },
                         ];
                     }
-
-                    // Update our local document maps with the new document
-                    this._docManager.processDocument(createdDoc);
-
                     // Get the created document's metadata
-                    const createdMetadata = this._docManager.extractDocumentMetadata(this._docManager.createAgentDoc(createdDoc));
+                    const createdMetadata = this._docManager.extractDocumentMetadata(id);
 
                     return [
                         {
                             type: 'text',
                             text: `Document created successfully.
-Document ID: ${createdDoc.id}
+Document ID: ${id}
 Type: ${docType}
 Title: "${title}"
 
@@ -447,9 +443,9 @@ You can now use the "edit" action to modify additional properties of this docume
 
 Next steps:
 1. Use the "getFieldOptions" action to understand available editable/addable fields/properties and their dependencies.
-2. To modify this document, use: { action: "edit", documentId: "${createdDoc.id}", fieldEdits: [{"fieldName":"property","fieldValue":"value"}] }
+2. To modify this document, use: { action: "edit", documentId: "${id}", fieldEdits: [{"fieldName":"property","fieldValue":"value"}] }
 3. To add styling, consider setting backgroundColor, fontColor, or other properties
-4. For text documents, you can edit the content with: { action: "edit", documentId: "${createdDoc.id}", fieldEdits: [{"fieldName":"text","fieldValue":"New content"}] }
+4. For text documents, you can edit the content with: { action: "edit", documentId: "${id}", fieldEdits: [{"fieldName":"text","fieldValue":"New content"}] }
 
 Full metadata for the created document:
 ${JSON.stringify(createdMetadata, null, 2)}`,
diff --git a/src/client/views/nodes/chatbot/tools/SearchTool.ts b/src/client/views/nodes/chatbot/tools/SearchTool.ts
index 2ee30f0cf..53f5fc109 100644
--- a/src/client/views/nodes/chatbot/tools/SearchTool.ts
+++ b/src/client/views/nodes/chatbot/tools/SearchTool.ts
@@ -3,6 +3,9 @@ import { Networking } from '../../../../Network';
 import { BaseTool } from './BaseTool';
 import { Observation } from '../types/types';
 import { ParametersType, ToolInfo } from '../types/tool_types';
+import { Agent } from 'http';
+import { AgentDocumentManager } from '../utils/AgentDocumentManager';
+import { StrCast } from '../../../../../fields/Types';
 
 const searchToolParams = [
     {
@@ -25,12 +28,12 @@ const searchToolInfo: ToolInfo<SearchToolParamsType> = {
 };
 
 export class SearchTool extends BaseTool<SearchToolParamsType> {
-    private _addLinkedUrlDoc: (url: string, id: string) => void;
+    private _docManager: AgentDocumentManager;
     private _max_results: number;
 
-    constructor(addLinkedUrlDoc: (url: string, id: string) => void, max_results: number = 3) {
+    constructor(docManager: AgentDocumentManager, max_results: number = 3) {
         super(searchToolInfo);
-        this._addLinkedUrlDoc = addLinkedUrlDoc;
+        this._docManager = docManager;
         this._max_results = max_results;
     }
 
@@ -46,8 +49,13 @@ export class SearchTool extends BaseTool<SearchToolParamsType> {
                     max_results: this._max_results,
                 })) as { results: { url: string; snippet: string }[] };
                 const data = results.map((result: { url: string; snippet: string }) => {
-                    const id = uuidv4();
-                    this._addLinkedUrlDoc(result.url, id);
+                    // Create a web document with the URL
+                    const id = this._docManager.createDocInDash('web', result.url, {
+                        title: `Search Result: ${result.url}`,
+                        text_html: result.snippet,
+                        data_useCors: true,
+                    });
+
                     return {
                         type: 'text' as const,
                         text: `<chunk chunk_id="${id}" chunk_type="url"><url>${result.url}</url><overview>${result.snippet}</overview></chunk>`,
diff --git a/src/client/views/nodes/chatbot/types/types.ts b/src/client/views/nodes/chatbot/types/types.ts
index 882e74ebb..dcb132ec7 100644
--- a/src/client/views/nodes/chatbot/types/types.ts
+++ b/src/client/views/nodes/chatbot/types/types.ts
@@ -108,6 +108,7 @@ export interface SimplifiedChunk {
     start_time?: number;
     end_time?: number;
     indexes?: string[];
+    text?: string;
 }
 
 export interface AI_Document {
diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
index 4eeac3c6a..c3beebcde 100644
--- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -165,22 +165,18 @@ export class AgentDocumentManager {
         }
     }
 
-    public addCustomId(doc: Doc, id: string) {
-        doc.id = id;
-        doc.DOCUMENT_ID_FIELD = id;
-    }
-
     /**
      * Process a document by ensuring it has an ID and adding it to the appropriate collections
      * @param doc The document to process
      */
-    public processDocument(doc: Doc) {
+    public processDocument(doc: Doc): string {
         // Ensure document has a persistent ID
         const docId = this.ensureDocumentId(doc);
         // Only add if we haven't already processed this document
         if (!this.documentsById.has(docId)) {
             this.documentsById.set(docId, { layoutDoc: doc, dataDoc: doc[DocData] });
         }
+        return docId;
     }
 
     /**
@@ -232,7 +228,9 @@ export class AgentDocumentManager {
      * @param docId The ID of the document to extract metadata from
      * @returns An object containing the document's metadata
      */
-    public extractDocumentMetadata(doc?: AgentDocument) {
+    public extractDocumentMetadata(id: string) {
+        if (!id) return null;
+        const doc = this.documentsById.get(id);
         if (!doc) return null;
         const layoutDoc = doc.layoutDoc;
         const dataDoc = doc.dataDoc;
@@ -729,16 +727,14 @@ export class AgentDocumentManager {
      */
     public getDocumentMetadata(documentId?: string): any {
         if (documentId) {
-            const doc = this.documentsById.get(documentId);
-            // Get metadata for a specific document
-            return this.extractDocumentMetadata(doc);
+            console.log(`Returning document metadata for docID, ${documentId}:`, this.extractDocumentMetadata(documentId));
+            return this.extractDocumentMetadata(documentId);
         } else {
             // Get metadata for all documents
             const documentsMetadata: Record<string, any> = {};
-            for (const doc of this.documentsById.values()) {
-                documentsMetadata.add(this.extractDocumentMetadata(doc) ?? { documentId: doc.layoutDoc.id, title: doc.layoutDoc.title, type: doc.layoutDoc.type });
+            for (const documentId of this.documentsById.keys()) {
+                documentsMetadata.add(this.extractDocumentMetadata(documentId));
             }
-
             return {
                 documentCount: this.documentsById.size,
                 documents: documentsMetadata,
@@ -845,14 +841,15 @@ export class AgentDocumentManager {
         return Object.values(supportedDocTypes).includes(docType as supportedDocTypes);
     }
     /**
-     * Creates a document in the dashboard.
+     * Creates a document in the dashboard and returns its ID.
+     * This is a public API used by tools like SearchTool.
      *
-     * @param {string} doc_type - The type of document to create.
-     * @param {string} data - The data used to generate the document.
-     * @param {DocumentOptions} options - Configuration options for the document.
-     * @returns {Promise<void>} A promise that resolves once the document is created and displayed.
+     * @param docType The type of document to create
+     * @param data The data for the document
+     * @param options Optional configuration options
+     * @returns The ID of the created document
      */
-    createDocInDash = (docType: string, title: string, data: string) => {
+    public createDocInDash(docType: string, data: string, options?: any): string {
         // Validate doc_type
         if (!this.isValidDocType(docType)) {
             throw new Error(`Invalid document type: ${docType}`);
@@ -862,10 +859,10 @@ export class AgentDocumentManager {
             // Create simple document with just title and data
             const simpleDoc: parsedDoc = {
                 doc_type: docType,
-                title: title,
+                title: options?.title ?? `Untitled Document ${this.documentsById.size + 1}`,
                 data: data,
-                x: 0,
-                y: 0,
+                x: options?.x ?? 0,
+                y: options?.y ?? 0,
                 _width: 300,
                 _height: 300,
                 _layout_fitWidth: false,
@@ -884,46 +881,111 @@ export class AgentDocumentManager {
                 }
             };
             const doc = this.chatBox.whichDoc(simpleDoc, false);
-            if (doc) linkAndShowDoc(doc);
-            return doc;
+            if (doc) {
+                linkAndShowDoc(doc);
+                const id = this.processDocument(doc);
+                return id;
+            } else {
+                throw new Error(`Error creating document. Created document not found.`);
+            }
         } catch (error) {
             throw new Error(`Error creating document: ${error}`);
         }
-    };
+    }
 
     public has(docId: string) {
         return this.documentsById.has(docId);
     }
 
-    public listDocs() {
-        // List all available documents in simple format
-        const docs = Array.from(this.documentsById.entries()).map(([id, doc]) => ({
-            id,
-            title: doc.layoutDoc.title || 'Untitled Document',
-            type: doc.layoutDoc.type || doc.dataDoc.type || 'Unknown Type',
-        }));
-
-        if (docs.length === 0) {
-            return [
-                {
-                    type: 'text',
-                    text: 'No documents found in the current view.',
-                },
-            ];
-        }
-
-        return [
-            {
-                type: 'text',
-                text: `Found ${docs.length} document(s) in the current view:\n${JSON.stringify(docs, null, 2)}`,
-            },
-        ];
+    /**
+     * Returns a list of all document IDs in the manager.
+     * @returns An array of document IDs (strings).
+     */
+    public listDocs(): string[] {
+        return Array.from(this.documentsById.keys());
+    }
+
+    /**
+     * Adds a document with a custom ID to the manager
+     * @param doc The document to add
+     * @param customId The custom ID to assign to the document
+     * @returns The customId that was assigned
+     */
+    public addCustomId(doc: Doc, customId: string): string {
+        if (!doc) {
+            console.error('Cannot add null document with custom ID');
+            return '';
+        }
+
+        // Set the custom ID in the document's metadata
+        doc[this.DOCUMENT_ID_FIELD] = customId;
+
+        // Store the document in our map
+        this.documentsById.set(customId, {
+            layoutDoc: doc,
+            dataDoc: doc,
+        });
+
+        return customId;
     }
 
-    public createAgentDoc(doc: Doc) {
-        // Ideally check if Doc is already in there.
-        const agentDoc = { layoutDoc: doc, dataDoc: doc[DocData] };
-        this.documentsById.set(this.ensureDocumentId(doc), agentDoc);
-        return agentDoc;
+    /**
+     * Gets a document by its ID
+     * @param docId The ID of the document to retrieve
+     * @returns The document if found, undefined otherwise
+     */
+    public getDocument(docId: string): Doc | undefined {
+        const docInfo = this.documentsById.get(docId);
+        return docInfo?.layoutDoc;
+    }
+
+    /**
+     * Registers chunk IDs associated with a document in the manager
+     * @param docId The parent document ID
+     * @param chunkIds Array of chunk IDs associated with this document
+     */
+    public registerChunkIds(docId: string, chunkIds: string[]): void {
+        // Get the document if it exists
+        const docInfo = this.documentsById.get(docId);
+        if (!docInfo) {
+            console.warn(`Cannot register chunks for unknown document ID: ${docId}`);
+            return;
+        }
+
+        // Store chunk IDs on the document for future reference
+        const doc = docInfo.layoutDoc;
+        if (!doc.chunk_ids) {
+            doc.chunk_ids = JSON.stringify(chunkIds);
+        } else {
+            // Merge with existing chunk IDs if they exist
+            const existingIds = JSON.parse(doc.chunk_ids as string);
+            const updatedIds = [...new Set([...existingIds, ...chunkIds])]; // Remove duplicates
+            doc.chunk_ids = JSON.stringify(updatedIds);
+        }
+
+        // Ensure each chunk ID can be linked back to its parent document
+        chunkIds.forEach(chunkId => {
+            // Store a mapping from chunk ID to parent document ID
+            // This allows us to easily find a document by any of its chunk IDs
+            if (!this.documentsById.has(chunkId)) {
+                this.documentsById.set(chunkId, {
+                    layoutDoc: doc,
+                    dataDoc: docInfo.dataDoc,
+                });
+            }
+        });
+    }
+
+    /**
+     * Gets a document ID by a chunk ID
+     * @param chunkId The chunk ID to look up
+     * @returns The parent document ID if found
+     */
+    public getDocIdByChunkId(chunkId: string): string | undefined {
+        const docInfo = this.documentsById.get(chunkId);
+        if (docInfo) {
+            return docInfo.layoutDoc[this.DOCUMENT_ID_FIELD] as string;
+        }
+        return undefined;
     }
 }
diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
index afd34f28d..4bb61d8b2 100644
--- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
@@ -15,7 +15,7 @@ import { Networking } from '../../../../Network';
 import { AI_Document, CHUNK_TYPE, RAGChunk } from '../types/types';
 import OpenAI from 'openai';
 import { Embedding } from 'openai/resources';
-import { PineconeEnvironmentVarsNotSupportedError } from '@pinecone-database/pinecone/dist/errors';
+import { AgentDocumentManager } from '../utils/AgentDocumentManager';
 
 dotenv.config();
 
@@ -29,7 +29,7 @@ export class Vectorstore {
     private openai: OpenAI; // OpenAI client for generating embeddings.
     private indexName: string = 'pdf-chatbot'; // Default name for the index.
     private _id: string; // Unique ID for the Vectorstore instance.
-    private _doc_ids: () => string[]; // List of document IDs handled by this instance.
+    private docManager: AgentDocumentManager; // Document manager for handling documents
 
     documents: AI_Document[] = []; // Store the documents indexed in the vectorstore.
 
@@ -37,9 +37,9 @@ export class Vectorstore {
      * Initializes the Pinecone and OpenAI clients, sets up the document ID list,
      * and initializes the Pinecone index.
      * @param id The unique identifier for the vectorstore instance.
-     * @param doc_ids A function that returns a list of document IDs.
+     * @param docManager An instance of AgentDocumentManager to handle document management.
      */
-    constructor(id: string, doc_ids: () => string[]) {
+    constructor(id: string, docManager: AgentDocumentManager) {
         const pineconeApiKey = process.env.PINECONE_API_KEY;
         if (!pineconeApiKey) {
             throw new Error('PINECONE_API_KEY is not defined.');
@@ -49,7 +49,7 @@ export class Vectorstore {
         this.pinecone = new Pinecone({ apiKey: pineconeApiKey });
         this.openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, dangerouslyAllowBrowser: true });
         this._id = id;
-        this._doc_ids = doc_ids;
+        this.docManager = docManager;
         this.initializeIndex();
     }
 
@@ -109,15 +109,25 @@ export class Vectorstore {
 
             const isAudioOrVideo = local_file_path.endsWith('.mp3') || local_file_path.endsWith('.mp4');
             let result: AI_Document & { doc_id: string };
+
             if (isAudioOrVideo) {
                 console.log('Processing media file...');
                 const response = await Networking.PostToServer('/processMediaFile', { fileName: path.basename(local_file_path) });
-                const segmentedTranscript = response.condensed;
+
+                // Type assertion to handle the response properties
+                const typedResponse = response as {
+                    condensed: Array<{ text: string; indexes: string[]; start: number; end: number }>;
+                    full: Array<unknown>;
+                    summary: string;
+                };
+
+                const segmentedTranscript = typedResponse.condensed;
                 console.log(segmentedTranscript);
-                const summary = response.summary;
+                const summary = typedResponse.summary;
                 doc.summary = summary;
+
                 // Generate embeddings for each chunk
-                const texts = segmentedTranscript.map((chunk: any) => chunk.text);
+                const texts = segmentedTranscript.map(chunk => chunk.text);
 
                 try {
                     const embeddingsResponse = await this.openai.embeddings.create({
@@ -126,10 +136,19 @@ export class Vectorstore {
                         encoding_format: 'float',
                     });
 
-                    doc.original_segments = JSON.stringify(response.full);
+                    doc.original_segments = JSON.stringify(typedResponse.full);
                     doc.ai_type = local_file_path.endsWith('.mp3') ? 'audio' : 'video';
                     const doc_id = uuidv4();
 
+                    // Register the document with the AgentDocumentManager
+                    this.docManager.addCustomId(doc, doc_id);
+
+                    // Generate chunk IDs upfront so we can register them
+                    const chunkIds = segmentedTranscript.map(() => uuidv4());
+
+                    // Register all chunk IDs with the document manager
+                    this.docManager.registerChunkIds(doc_id, chunkIds);
+
                     // Add transcript and embeddings to metadata
                     result = {
                         doc_id,
@@ -137,13 +156,13 @@ export class Vectorstore {
                         file_name: local_file_path,
                         num_pages: 0,
                         summary: '',
-                        chunks: segmentedTranscript.map((chunk: any, index: number) => ({
-                            id: uuidv4(),
+                        chunks: segmentedTranscript.map((chunk, index) => ({
+                            id: chunkIds[index], // Use pre-generated chunk ID
                             values: (embeddingsResponse.data as Embedding[])[index].embedding, // Assign embedding
                             metadata: {
                                 indexes: chunk.indexes,
                                 original_document: local_file_path,
-                                doc_id: doc_id,
+                                doc_id: doc_id, // Ensure doc_id is consistent
                                 file_path: local_file_path,
                                 start_time: chunk.start,
                                 end_time: chunk.end,
@@ -159,20 +178,24 @@ export class Vectorstore {
                 }
 
                 doc.segmented_transcript = JSON.stringify(segmentedTranscript);
-                // Simplify chunks for storage
+                // Simplify chunks for storage - ensure simplified chunks use EXACTLY the same IDs
                 const simplifiedChunks = result.chunks.map(chunk => ({
-                    chunkId: chunk.id,
+                    chunkId: chunk.id, // Use the exact same ID as the full chunk
                     start_time: chunk.metadata.start_time,
                     end_time: chunk.metadata.end_time,
                     indexes: chunk.metadata.indexes,
                     chunkType: CHUNK_TYPE.VIDEO,
                     text: chunk.metadata.text,
+                    doc_id: chunk.metadata.doc_id, // Include parent doc_id for completeness
                 }));
                 doc.chunk_simpl = JSON.stringify({ chunks: simplifiedChunks });
             } else {
-                // Existing document processing logic remains unchanged
+                // Process regular document
                 console.log('Processing regular document...');
-                const { jobId } = await Networking.PostToServer('/createDocument', { file_path: local_file_path });
+                const createDocumentResponse = await Networking.PostToServer('/createDocument', { file_path: local_file_path });
+
+                // Type assertion for the response
+                const { jobId } = createDocumentResponse as { jobId: string };
 
                 while (true) {
                     await new Promise(resolve => setTimeout(resolve, 2000));
@@ -188,6 +211,16 @@ export class Vectorstore {
                         progressCallback(progressResponseJson.progress, progressResponseJson.step);
                     }
                 }
+
+                // Register the document with the AgentDocumentManager
+                this.docManager.addCustomId(doc, result.doc_id);
+
+                // Collect all chunk IDs
+                const chunkIds = result.chunks.map(chunk => chunk.id);
+
+                // Register chunks with the document manager
+                this.docManager.registerChunkIds(result.doc_id, chunkIds);
+
                 if (!doc.chunk_simpl) {
                     doc.chunk_simpl = JSON.stringify({ chunks: [] });
                 }
@@ -196,12 +229,13 @@ export class Vectorstore {
 
                 result.chunks.forEach((chunk: RAGChunk) => {
                     const chunkToAdd = {
-                        chunkId: chunk.id,
+                        chunkId: chunk.id, // Ensure we use the exact same ID
                         startPage: chunk.metadata.start_page,
                         endPage: chunk.metadata.end_page,
                         location: chunk.metadata.location,
                         chunkType: chunk.metadata.type as CHUNK_TYPE,
                         text: chunk.metadata.text,
+                        doc_id: chunk.metadata.doc_id, // Include parent doc_id for consistency
                     };
                     const new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl));
                     new_chunk_simpl.chunks = new_chunk_simpl.chunks.concat(chunkToAdd);
@@ -298,39 +332,55 @@ export class Vectorstore {
 
             let queryEmbedding = queryEmbeddingResponse.data[0].embedding;
 
-            // Extract the embedding from the response.
+            // Get document IDs from the AgentDocumentManager
+            const docIds = Array.from(this.docManager.listDocs());
+            console.log('Using document IDs for retrieval:', docIds);
 
-            console.log(this._doc_ids());
             // Query the Pinecone index using the embedding and filter by document IDs.
+            // We'll query based on document IDs that are registered in the document manager
             const queryResponse: QueryResponse = await this.index.query({
                 vector: queryEmbedding,
                 filter: {
-                    doc_id: { $in: this._doc_ids() },
+                    doc_id: { $in: docIds },
                 },
                 topK,
                 includeValues: true,
                 includeMetadata: true,
             });
-            console.log(queryResponse);
-
-            // Map the results into RAGChunks and return them.
-            return queryResponse.matches.map(
-                match =>
-                    ({
-                        id: match.id,
-                        values: match.values as number[],
-                        metadata: match.metadata as {
-                            text: string;
-                            type: string;
-                            original_document: string;
-                            file_path: string;
-                            doc_id: string;
-                            location: string;
-                            start_page: number;
-                            end_page: number;
-                        },
-                    }) as RAGChunk
-            );
+            console.log(`Found ${queryResponse.matches.length} matching chunks`);
+
+            // For each retrieved chunk, ensure its document ID is registered in the document manager
+            // This maintains compatibility with existing code while ensuring consistency
+            const processedMatches = queryResponse.matches.map(match => {
+                const chunk = {
+                    id: match.id,
+                    values: match.values as number[],
+                    metadata: match.metadata as {
+                        text: string;
+                        type: string;
+                        original_document: string;
+                        file_path: string;
+                        doc_id: string;
+                        location: string;
+                        start_page: number;
+                        end_page: number;
+                    },
+                } as RAGChunk;
+
+                // Ensure the document manager knows about this chunk
+                // This is important for maintaining backwards compatibility
+                if (chunk.id && !this.docManager.getDocIdByChunkId(chunk.id)) {
+                    // If the chunk ID isn't registered but we have a doc_id in metadata
+                    if (chunk.metadata.doc_id && this.docManager.has(chunk.metadata.doc_id)) {
+                        // Register the chunk with its parent document
+                        this.docManager.registerChunkIds(chunk.metadata.doc_id, [chunk.id]);
+                    }
+                }
+
+                return chunk;
+            });
+
+            return processedMatches;
         } catch (error) {
             console.error(`Error retrieving chunks: ${error}`);
             return [];
-- 
cgit v1.2.3-70-g09d2


From 393b7f8286422c933102449eba1ba82874a48896 Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Sun, 27 Apr 2025 14:57:39 -0400
Subject: improved consistency across doc types and parsing

---
 src/client/documents/Documents.ts                  |   1 +
 .../views/nodes/chatbot/agentsystem/Agent.ts       |  15 +-
 .../nodes/chatbot/chatboxcomponents/ChatBox.tsx    | 176 +++++++++-------
 .../chatbot/chatboxcomponents/ProgressBar.scss     |  40 +++-
 .../nodes/chatbot/utils/AgentDocumentManager.ts    | 234 ++++++++++++++++++++-
 .../views/nodes/chatbot/vectorstore/Vectorstore.ts |  49 ++---
 6 files changed, 390 insertions(+), 125 deletions(-)

(limited to 'src/client/views/nodes/chatbot/utils')

diff --git a/src/client/documents/Documents.ts b/src/client/documents/Documents.ts
index 317bb7feb..f87bd7092 100644
--- a/src/client/documents/Documents.ts
+++ b/src/client/documents/Documents.ts
@@ -273,6 +273,7 @@ export class DocumentOptions {
     _layout_reflowHorizontal?: BOOLt = new BoolInfo('permit horizontal resizing with content reflow');
     _layout_noSidebar?: BOOLt = new BoolInfo('whether to display the sidebar toggle button');
     layout_boxShadow?: string; // box-shadow css string OR "standard" to use dash standard box shadow
+    _iframe_sandbox?: STRt = new StrInfo('sandbox attributes for iframes in web documents (e.g., allow-scripts, allow-same-origin)');
     layout_maxShown?: NUMt = new NumInfo('maximum number of children to display at one time (see multicolumnview)');
     _layout_columnWidth?: NUMt = new NumInfo('width of table column', false);
     _layout_columnCount?: NUMt = new NumInfo('number of columns in a masonry view');
diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
index 80fdb6533..24471bf5b 100644
--- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts
+++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
@@ -41,7 +41,6 @@ export class Agent {
     private interMessages: AgentMessage[] = [];
     private vectorstore: Vectorstore;
     private _history: () => string;
-    private _summaries: () => string;
     private _csvData: () => { filename: string; id: string; text: string }[];
     private actionNumber: number = 0;
     private thoughtNumber: number = 0;
@@ -54,11 +53,13 @@ export class Agent {
     /**
      * The constructor initializes the agent with the vector store and toolset, and sets up the OpenAI client.
      * @param _vectorstore Vector store instance for document storage and retrieval.
-     * @param summaries A function to retrieve document summaries.
+     * @param summaries A function to retrieve document summaries (deprecated, now using docManager directly).
      * @param history A function to retrieve chat history.
      * @param csvData A function to retrieve CSV data linked to the assistant.
-     * @param addLinkedUrlDoc A function to add a linked document from a URL.
+     * @param getLinkedUrlDocId A function to get document IDs from URLs.
+     * @param createImage A function to create images in the dashboard.
      * @param createCSVInDash A function to create a CSV document in the dashboard.
+     * @param docManager The document manager instance.
      */
     constructor(
         _vectorstore: Vectorstore,
@@ -74,7 +75,6 @@ export class Agent {
         this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true });
         this.vectorstore = _vectorstore;
         this._history = history;
-        this._summaries = summaries;
         this._csvData = csvData;
         this._docManager = docManager;
 
@@ -124,7 +124,12 @@ export class Agent {
 
         // Retrieve chat history and generate system prompt
         const chatHistory = this._history();
-        const systemPrompt = getReactPrompt(Object.values(this.tools), this._summaries, chatHistory);
+        // Get document summaries directly from document manager
+        const documentSummaries = this._docManager.getAllDocumentSummaries();
+        // Create a function that returns document summaries for the prompt
+        const getSummaries = () => documentSummaries;
+        // Generate the system prompt with the summaries
+        const systemPrompt = getReactPrompt(Object.values(this.tools), getSummaries, chatHistory);
 
         // Initialize intermediate messages
         this.interMessages = [{ role: 'system', content: systemPrompt }];
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index b11bf7405..ba30cb42b 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -18,7 +18,7 @@ import { Doc, DocListCast, Opt } from '../../../../../fields/Doc';
 import { DocData, DocViews } from '../../../../../fields/DocSymbols';
 import { RichTextField } from '../../../../../fields/RichTextField';
 import { ScriptField } from '../../../../../fields/ScriptField';
-import { CsvCast, DocCast, NumCast, PDFCast, RTFCast, StrCast } from '../../../../../fields/Types';
+import { CsvCast, DocCast, NumCast, PDFCast, RTFCast, StrCast, VideoCast, AudioCast } from '../../../../../fields/Types';
 import { DocUtils } from '../../../../documents/DocUtils';
 import { CollectionViewType, DocumentType } from '../../../../documents/DocumentTypes';
 import { Docs, DocumentOptions } from '../../../../documents/Documents';
@@ -48,7 +48,14 @@ import { AgentDocumentManager } from '../utils/AgentDocumentManager';
 
 dotenv.config();
 
-export type parsedDocData = { doc_type: string; data: unknown };
+export type parsedDocData = {
+    doc_type: string;
+    data: unknown;
+    _disable_resource_loading?: boolean;
+    _sandbox_iframe?: boolean;
+    _iframe_sandbox?: string;
+    data_useCors?: boolean;
+};
 export type parsedDoc = DocumentOptions & parsedDocData;
 /**
  * ChatBox is the main class responsible for managing the interaction between the user and the assistant,
@@ -150,7 +157,14 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     @action
     addDocToVectorstore = async (newLinkedDoc: Doc) => {
         try {
-            this._isUploadingDocs = true;
+            const isAudioOrVideo = VideoCast(newLinkedDoc.data)?.url?.pathname || AudioCast(newLinkedDoc.data)?.url?.pathname;
+
+            // Set UI state to show the processing overlay
+            runInAction(() => {
+                this._isUploadingDocs = true;
+                this._uploadProgress = 0;
+                this._currentStep = isAudioOrVideo ? 'Preparing media file...' : 'Processing document...';
+            });
 
             // Process the document first to ensure it has a valid ID
             this.docManager.processDocument(newLinkedDoc);
@@ -158,15 +172,36 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
             // Add the document to the vectorstore which will also register chunks
             await this.vectorstore.addAIDoc(newLinkedDoc, this.updateProgress);
 
-            // No longer needed as documents are tracked by the AgentDocumentManager
-            // this._linked_docs_to_add.add(newLinkedDoc);
+            // Give a slight delay to show the completion message
+            if (this._uploadProgress === 100) {
+                await new Promise(resolve => setTimeout(resolve, 1000));
+            }
 
-            this._isUploadingDocs = false;
+            // Reset UI state
+            runInAction(() => {
+                this._isUploadingDocs = false;
+                this._uploadProgress = 0;
+                this._currentStep = '';
+            });
 
             return true;
         } catch (err) {
             console.error('Error adding document to vectorstore:', err);
-            this._isUploadingDocs = false;
+
+            // Show error in UI
+            runInAction(() => {
+                this._currentStep = `Error: ${err instanceof Error ? err.message : 'Failed to process document'}`;
+            });
+
+            await new Promise(resolve => setTimeout(resolve, 2000));
+
+            // Reset UI state
+            runInAction(() => {
+                this._isUploadingDocs = false;
+                this._uploadProgress = 0;
+                this._currentStep = '';
+            });
+
             return false;
         }
     };
@@ -178,8 +213,15 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
      */
     @action
     updateProgress = (progress: number, step: string) => {
-        this._uploadProgress = progress;
+        // Ensure progress is within expected bounds
+        const validProgress = Math.min(Math.max(0, progress), 100);
+        this._uploadProgress = validProgress;
         this._currentStep = step;
+
+        // Force UI update
+        if (process.env.NODE_ENV !== 'production') {
+            console.log(`Progress: ${validProgress}%, Step: ${step}`);
+        }
     };
 
     /**
@@ -453,7 +495,19 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                 case supportedDocTypes.image:      return Docs.Create.ImageDocument(data as string, options);
                 case supportedDocTypes.equation:   return Docs.Create.EquationDocument(data as string, options);
                 case supportedDocTypes.notetaking: return Docs.Create.NoteTakingDocument([], options);
-                case supportedDocTypes.web:        return Docs.Create.WebDocument(data as string, { ...options, data_useCors: true });
+                case supportedDocTypes.web:        
+                    // Create web document with enhanced safety options
+                    const webOptions = { 
+                        ...options, 
+                        data_useCors: true
+                    };
+                    
+                    // If iframe_sandbox was passed from AgentDocumentManager, add it to the options
+                    if ('_iframe_sandbox' in options) {
+                        (webOptions as any)._iframe_sandbox = options._iframe_sandbox;
+                    }
+                    
+                    return Docs.Create.WebDocument(data as string, webOptions);
                 case supportedDocTypes.dataviz:    return Docs.Create.DataVizDocument('/users/rz/Downloads/addresses.csv', options);
                 case supportedDocTypes.pdf:        return Docs.Create.PdfDocument(data as string, options);
                 case supportedDocTypes.video:      return Docs.Create.VideoDocument(data as string, options);
@@ -607,65 +661,36 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                 return;
             }
 
-            // Process the chunk data
-            let docChunkSimpl: { chunks: SimplifiedChunk[] } = { chunks: [] };
-            try {
-                docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl) || '{"chunks":[]}');
-            } catch (e) {
-                console.error(`Error parsing chunk_simpl for the found document:`, e);
+            // Get the simplified chunk using the document manager
+            const foundChunk = this.docManager.getSimplifiedChunkById(doc, chunkId);
+            if (!foundChunk) {
+                console.warn(`Chunk not found in document for chunk ID: ${chunkId}`);
+                DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
                 return;
             }
 
-            const foundChunk = docChunkSimpl.chunks.find((chunk: SimplifiedChunk) => chunk.chunkId === chunkId);
+            console.log(`Found chunk in document:`, foundChunk);
 
             // Handle different chunk types
-            if (foundChunk) {
-                console.log(`Found chunk in document:`, foundChunk);
-                if (foundChunk.chunkType === CHUNK_TYPE.AUDIO || foundChunk.chunkType === CHUNK_TYPE.VIDEO) {
-                    const directMatchSegmentStart = this.getDirectMatchingSegmentStart(doc, citation.direct_text || '', foundChunk.indexes || []);
-                    if (directMatchSegmentStart) {
-                        await this.goToMediaTimestamp(doc, directMatchSegmentStart, foundChunk.chunkType);
-                    } else {
-                        console.error('No direct matching segment found for the citation.');
-                    }
-                } else if (foundChunk.chunkType === CHUNK_TYPE.TABLE || foundChunk.chunkType === CHUNK_TYPE.IMAGE) {
-                    this.handleOtherChunkTypes(foundChunk, citation, doc);
-                } else if (foundChunk.chunkType === CHUNK_TYPE.TEXT) {
-                    // Find text from the document's chunks metadata
-                    let chunkText = '';
-
-                    try {
-                        // We already parsed the chunks earlier, so use that
-                        const matchingChunk = docChunkSimpl.chunks.find(c => c.chunkId === foundChunk.chunkId);
-                        if (matchingChunk && 'text' in matchingChunk) {
-                            // If the text property exists on the chunk (even though it's not in the type)
-                            chunkText = String(matchingChunk['text'] || '');
-                        }
-                    } catch (e) {
-                        console.error('Error getting chunk text:', e);
-                    }
-
-                    // Default text if none found
-                    if (!chunkText) {
-                        chunkText = 'Text content not available';
-                    }
-
-                    this._citationPopup = {
-                        text: chunkText,
-                        visible: true,
-                    };
-                }
-                // Handle URL chunks
-                else if (foundChunk.chunkType === CHUNK_TYPE.URL) {
-                    if (foundChunk.url) {
-                        DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
-                        console.log(`Navigated to web document with URL: ${foundChunk.url}`);
-                    } else {
-                        console.warn('URL chunk missing URL:', foundChunk);
-                    }
+            if (foundChunk.chunkType === CHUNK_TYPE.AUDIO || foundChunk.chunkType === CHUNK_TYPE.VIDEO) {
+                const directMatchSegmentStart = this.getDirectMatchingSegmentStart(doc, citation.direct_text || '', foundChunk.indexes || []);
+                if (directMatchSegmentStart) {
+                    await this.goToMediaTimestamp(doc, directMatchSegmentStart, foundChunk.chunkType);
+                } else {
+                    console.error('No direct matching segment found for the citation.');
                 }
+            } else if (foundChunk.chunkType === CHUNK_TYPE.TABLE || foundChunk.chunkType === CHUNK_TYPE.IMAGE) {
+                this.handleOtherChunkTypes(foundChunk, citation, doc);
             } else {
-                console.warn('Navigating to doc. Unable to find chunk or segments for citation', citation);
+                // Show the chunk text in citation popup
+                let chunkText = foundChunk.text || 'Text content not available';
+
+                this._citationPopup = {
+                    text: chunkText,
+                    visible: true,
+                };
+
+                // Also navigate to the document
                 DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
             }
         } catch (error) {
@@ -683,8 +708,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     getDirectMatchingSegmentStart = (doc: Doc, citationText: string, indexesOfSegments: string[]): number => {
         if (!doc || !citationText) return -1;
 
-        // Get original segments from the document
-        const original_segments = doc.original_segments ? JSON.parse(StrCast(doc.original_segments)) : [];
+        // Get original segments using document manager
+        const original_segments = this.docManager.getOriginalSegments(doc);
 
         if (!original_segments || !Array.isArray(original_segments) || original_segments.length === 0) {
             return -1;
@@ -993,18 +1018,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
      */
     @computed
     get summaries(): string {
-        const linkedDocs = Array.from(this.docManager.listDocs())
-            .map(id => {
-                const doc = this.docManager.extractDocumentMetadata(id);
-                if (doc && doc.fields && (doc.fields.layout.summary || doc.fields.data.summary)) {
-                    return doc.fields.layout.summary || doc.fields.data.summary;
-                }
-                return null;
-            })
-            .filter(Boolean)
-            .join('\n\n');
-
-        return linkedDocs;
+        // Use the document manager to get all summaries
+        return this.docManager.getAllDocumentSummaries();
     }
 
     /**
@@ -1033,7 +1048,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     // Other helper methods for retrieving document data and processing
 
     retrieveSummaries = (): string => {
-        return this.summaries;
+        return this.docManager.getAllDocumentSummaries();
     };
 
     retrieveCSVData = () => {
@@ -1068,8 +1083,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                 {this._isUploadingDocs && (
                     <div className="uploading-overlay">
                         <div className="progress-container">
-                            <ProgressBar />
-                            <div className="step-name">{this._currentStep}</div>
+                            <div className="progress-bar-wrapper">
+                                <div className="progress-bar" style={{ width: `${this._uploadProgress}%` }} />
+                            </div>
+                            <div className="progress-details">
+                                <div className="progress-percentage">{Math.round(this._uploadProgress)}%</div>
+                                <div className="step-name">{this._currentStep}</div>
+                            </div>
                         </div>
                     </div>
                 )}
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss
index ff5be4a38..3a8334695 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss
@@ -58,12 +58,48 @@
     flex-direction: column;
     align-items: center;
     text-align: center;
+    width: 80%;
+    max-width: 400px;
+    background-color: white;
+    padding: 20px;
+    border-radius: 8px;
+    box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15);
 }
 
-.step-name {
+.progress-bar-wrapper {
+    width: 100%;
+    height: 12px;
+    background-color: #e0e0e0;
+    border-radius: 6px;
+    overflow: hidden;
+    margin-bottom: 10px;
+}
+
+.progress-bar {
+    height: 100%;
+    background-color: #4a90e2;
+    border-radius: 6px;
+    transition: width 0.5s ease;
+}
+
+.progress-details {
+    display: flex;
+    flex-direction: column;
+    align-items: center;
+    width: 100%;
+}
+
+.progress-percentage {
     font-size: 18px;
+    font-weight: bold;
     color: #333;
+    margin-bottom: 5px;
+}
+
+.step-name {
+    font-size: 16px;
+    color: #666;
     text-align: center;
     width: 100%;
-    margin-top: -10px; // Adjust to move the text closer to the spinner
+    margin-top: 5px;
 }
diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
index c3beebcde..cff8380db 100644
--- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -14,6 +14,8 @@ import { parsedDoc } from '../chatboxcomponents/ChatBox';
 import { faThumbTackSlash } from '@fortawesome/free-solid-svg-icons';
 import { DocumentManager } from '../../../../util/DocumentManager';
 import { DocumentView } from '../../DocumentView';
+import { RAGChunk, CHUNK_TYPE } from '../types/types';
+import { runInAction } from 'mobx';
 
 /**
  * Interface representing a document in the freeform view
@@ -869,20 +871,43 @@ export class AgentDocumentManager {
                 _layout_autoHeight: true,
             };
 
-            // Use the chatBox's createDocInDash method to create and link the document
+            // Additional handling for web documents
+            if (docType === 'web') {
+                // For web documents, don't sanitize the URL here
+                // Instead, set properties to handle content safely when loaded
+                simpleDoc._disable_resource_loading = true;
+                simpleDoc._sandbox_iframe = true;
+                simpleDoc.data_useCors = true;
+
+                // Specify a more permissive sandbox to allow content to render properly
+                // but still maintain security
+                simpleDoc._iframe_sandbox = 'allow-same-origin allow-scripts allow-popups allow-forms';
+            }
+
+            // Use the chatBox's createDocInDash method to create the document
             if (!this.chatBox) {
                 throw new Error('ChatBox instance not available for creating document');
             }
-            const linkAndShowDoc = (doc: Opt<Doc>) => {
-                if (doc) {
-                    LinkManager.Instance.addLink(Docs.Create.LinkDocument(this.chatBoxDocument!, doc));
-                    this.chatBox._props.addDocument?.(doc);
-                    DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
-                }
-            };
+
             const doc = this.chatBox.whichDoc(simpleDoc, false);
             if (doc) {
-                linkAndShowDoc(doc);
+                // Use MobX runInAction to properly modify observable state
+                runInAction(() => {
+                    if (this.chatBoxDocument && doc) {
+                        // Create link and add it to the document system
+                        const linkDoc = Docs.Create.LinkDocument(this.chatBoxDocument, doc);
+                        LinkManager.Instance.addLink(linkDoc);
+
+                        // Add document to view
+                        this.chatBox._props.addDocument?.(doc);
+
+                        // Show document - defer actual display to prevent immediate resource loading
+                        setTimeout(() => {
+                            DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
+                        }, 100);
+                    }
+                });
+
                 const id = this.processDocument(doc);
                 return id;
             } else {
@@ -893,6 +918,62 @@ export class AgentDocumentManager {
         }
     }
 
+    /**
+     * Sanitizes web content to prevent errors with external resources
+     * @param content The web content to sanitize
+     * @returns Sanitized content
+     */
+    private sanitizeWebContent(content: string): string {
+        if (!content) return content;
+
+        try {
+            // Replace problematic resource references that might cause errors
+            const sanitized = content
+                // Remove preload links that might cause errors
+                .replace(/<link[^>]*rel=["']preload["'][^>]*>/gi, '')
+                // Remove map file references
+                .replace(/\/\/# sourceMappingURL=.*\.map/gi, '')
+                // Remove external CSS map files references
+                .replace(/\/\*# sourceMappingURL=.*\.css\.map.*\*\//gi, '')
+                // Add sandbox to iframes
+                .replace(/<iframe/gi, '<iframe sandbox="allow-same-origin" loading="lazy"')
+                // Prevent automatic resource loading for images
+                .replace(/<img/gi, '<img loading="lazy"')
+                // Prevent automatic resource loading for scripts
+                .replace(/<script/gi, '<script type="text/disabled"')
+                // Handle invalid URIs by converting relative URLs to absolute ones
+                .replace(/href=["'](\/[^"']+)["']/gi, (match, p1) => {
+                    // Only handle relative URLs starting with /
+                    if (p1.startsWith('/')) {
+                        return `href="#disabled-link"`;
+                    }
+                    return match;
+                })
+                // Prevent automatic loading of CSS
+                .replace(/<link[^>]*rel=["']stylesheet["'][^>]*href=["']([^"']+)["']/gi, (match, href) => `<link rel="prefetch" data-original-href="${href}" />`);
+
+            // Wrap the content in a sandboxed container
+            return `
+            <div class="sandboxed-web-content">
+                <style>
+                /* Override styles to prevent external resource loading */
+                @font-face { font-family: 'disabled'; src: local('Arial'); }
+                * { font-family: system-ui, -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif !important; }
+                img, iframe, frame, embed, object { max-width: 100%; }
+                </style>
+                ${sanitized}
+            </div>`;
+        } catch (e) {
+            console.warn('Error sanitizing web content:', e);
+            // Fall back to a safe container with the content as text
+            return `
+            <div class="sandboxed-web-content">
+                <p>Content could not be safely displayed. Raw content:</p>
+                <pre>${content.replace(/</g, '&lt;').replace(/>/g, '&gt;')}</pre>
+            </div>`;
+        }
+    }
+
     public has(docId: string) {
         return this.documentsById.has(docId);
     }
@@ -988,4 +1069,139 @@ export class AgentDocumentManager {
         }
         return undefined;
     }
+
+    /**
+     * Adds simplified chunks to a document for citation handling
+     * @param doc The document to add simplified chunks to
+     * @param chunks Array of full RAG chunks to simplify
+     * @param docType The type of document (e.g., 'pdf', 'video', 'audio', etc.)
+     * @returns The updated document with simplified chunks
+     */
+    public addSimplifiedChunks(doc: Doc, chunks: RAGChunk[], docType: string): Doc {
+        if (!doc) {
+            console.error('Cannot add simplified chunks to null document');
+            return doc;
+        }
+
+        // Initialize empty chunks array if not exists
+        if (!doc.chunk_simpl) {
+            doc.chunk_simpl = JSON.stringify({ chunks: [] });
+        }
+
+        // Create array of simplified chunks based on document type
+        const simplifiedChunks = chunks.map(chunk => {
+            // Common properties across all chunk types
+            const baseChunk = {
+                chunkId: chunk.id,
+                text: chunk.metadata.text,
+                doc_id: chunk.metadata.doc_id,
+                chunkType: chunk.metadata.type || CHUNK_TYPE.TEXT,
+            };
+
+            // Add type-specific properties
+            if (docType === 'video' || docType === 'audio') {
+                return {
+                    ...baseChunk,
+                    start_time: chunk.metadata.start_time,
+                    end_time: chunk.metadata.end_time,
+                    indexes: chunk.metadata.indexes,
+                    chunkType: docType === 'video' ? CHUNK_TYPE.VIDEO : CHUNK_TYPE.AUDIO,
+                };
+            } else if (docType === 'pdf') {
+                return {
+                    ...baseChunk,
+                    startPage: chunk.metadata.start_page,
+                    endPage: chunk.metadata.end_page,
+                    location: chunk.metadata.location,
+                };
+            } else if (docType === 'csv') {
+                return {
+                    ...baseChunk,
+                    rowStart: (chunk.metadata as any).row_start,
+                    rowEnd: (chunk.metadata as any).row_end,
+                    colStart: (chunk.metadata as any).col_start,
+                    colEnd: (chunk.metadata as any).col_end,
+                };
+            } else {
+                // Default for other document types
+                return baseChunk;
+            }
+        });
+
+        // Update the document with all simplified chunks at once
+        doc.chunk_simpl = JSON.stringify({ chunks: simplifiedChunks });
+
+        return doc;
+    }
+
+    /**
+     * Gets the simplified chunks from a document
+     * @param doc The document to get simplified chunks from
+     * @returns Array of simplified chunks or empty array if none exist
+     */
+    public getSimplifiedChunks(doc: Doc): any[] {
+        if (!doc || !doc.chunk_simpl) {
+            return [];
+        }
+
+        try {
+            const parsed = JSON.parse(StrCast(doc.chunk_simpl));
+            return parsed.chunks || [];
+        } catch (e) {
+            console.error('Error parsing simplified chunks:', e);
+            return [];
+        }
+    }
+
+    /**
+     * Gets a specific simplified chunk by ID
+     * @param doc The document containing chunks
+     * @param chunkId The ID of the chunk to retrieve
+     * @returns The simplified chunk if found, undefined otherwise
+     */
+    public getSimplifiedChunkById(doc: Doc, chunkId: string): any | undefined {
+        const chunks = this.getSimplifiedChunks(doc);
+        return chunks.find(chunk => chunk.chunkId === chunkId);
+    }
+
+    /**
+     * Gets the original segments from a media document
+     * @param doc The document containing original media segments
+     * @returns Array of media segments or empty array if none exist
+     */
+    public getOriginalSegments(doc: Doc): any[] {
+        if (!doc || !doc.original_segments) {
+            return [];
+        }
+
+        try {
+            return JSON.parse(StrCast(doc.original_segments)) || [];
+        } catch (e) {
+            console.error('Error parsing original segments:', e);
+            return [];
+        }
+    }
+
+    /**
+     * Gets all document summaries combined into a single string
+     * @returns String containing all document summaries
+     */
+    public getAllDocumentSummaries(): string {
+        const summaries = Array.from(this.documentsById.keys())
+            .map(id => {
+                const doc = this.getDocument(id);
+                if (doc) {
+                    // Try to get summary from either the document or its data document
+                    const summary = doc.summary || (doc[DocData] && doc[DocData].summary);
+                    if (summary) {
+                        return StrCast(summary);
+                    }
+                }
+                return null;
+            })
+            .filter(Boolean)
+            .join('\n\n');
+
+        return summaries;
+    }
 }
diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
index 4512ae3e6..4268c0180 100644
--- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
@@ -103,7 +103,7 @@ export class Vectorstore {
             const local_file_path: string = CsvCast(doc.data)?.url?.pathname ?? PDFCast(doc.data)?.url?.pathname ?? VideoCast(doc.data)?.url?.pathname ?? AudioCast(doc.data)?.url?.pathname;
 
             if (!local_file_path) {
-                console.log('Invalid file path.');
+                console.log('Not adding to vectorstore. Invalid file path for vectorstore addition.');
                 return;
             }
 
@@ -112,7 +112,11 @@ export class Vectorstore {
 
             if (isAudioOrVideo) {
                 console.log('Processing media file...');
+                progressCallback(10, 'Preparing media file for transcription...');
+
+                // Post to processMediaFile endpoint to get the transcript
                 const response = await Networking.PostToServer('/processMediaFile', { fileName: path.basename(local_file_path) });
+                progressCallback(60, 'Transcription completed. Processing transcript...');
 
                 // Type assertion to handle the response properties
                 const typedResponse = response as {
@@ -135,6 +139,7 @@ export class Vectorstore {
                         input: texts,
                         encoding_format: 'float',
                     });
+                    progressCallback(85, 'Embeddings generated. Finalizing document...');
 
                     doc.original_segments = JSON.stringify(typedResponse.full);
                     const doc_id = uuidv4();
@@ -154,7 +159,7 @@ export class Vectorstore {
                         purpose: '',
                         file_name: local_file_path,
                         num_pages: 0,
-                        summary: '',
+                        summary: summary,
                         chunks: segmentedTranscript.map((chunk, index) => ({
                             id: chunkIds[index], // Use pre-generated chunk ID
                             values: (embeddingsResponse.data as Embedding[])[index].embedding, // Assign embedding
@@ -171,23 +176,17 @@ export class Vectorstore {
                         })),
                         type: 'media',
                     };
+                    progressCallback(95, 'Adding document to vectorstore...');
                 } catch (error) {
                     console.error('Error generating embeddings:', error);
+                    doc.ai_document_status = 'ERROR';
                     throw new Error('Embedding generation failed');
                 }
 
                 doc.segmented_transcript = JSON.stringify(segmentedTranscript);
-                // Simplify chunks for storage - ensure simplified chunks use EXACTLY the same IDs
-                const simplifiedChunks = result.chunks.map(chunk => ({
-                    chunkId: chunk.id, // Use the exact same ID as the full chunk
-                    start_time: chunk.metadata.start_time,
-                    end_time: chunk.metadata.end_time,
-                    indexes: chunk.metadata.indexes,
-                    chunkType: CHUNK_TYPE.VIDEO,
-                    text: chunk.metadata.text,
-                    doc_id: chunk.metadata.doc_id, // Include parent doc_id for completeness
-                }));
-                doc.chunk_simpl = JSON.stringify({ chunks: simplifiedChunks });
+                // Use doc manager to add simplified chunks
+                const docType = local_file_path.endsWith('.mp3') ? 'audio' : 'video';
+                this.docManager.addSimplifiedChunks(doc, result.chunks, docType);
             } else {
                 // Process regular document
                 console.log('Processing regular document...');
@@ -220,30 +219,18 @@ export class Vectorstore {
                 // Register chunks with the document manager
                 this.docManager.registerChunkIds(result.doc_id, chunkIds);
 
-                if (!doc.chunk_simpl) {
-                    doc.chunk_simpl = JSON.stringify({ chunks: [] });
-                }
+                // Use doc manager to add simplified chunks - determine document type from file extension
+                const fileExt = path.extname(local_file_path).toLowerCase();
+                const docType = fileExt === '.pdf' ? 'pdf' : fileExt === '.csv' ? 'csv' : 'text';
+                this.docManager.addSimplifiedChunks(doc, result.chunks, docType);
+
                 doc.summary = result.summary;
                 doc.ai_purpose = result.purpose;
-
-                result.chunks.forEach((chunk: RAGChunk) => {
-                    const chunkToAdd = {
-                        chunkId: chunk.id, // Ensure we use the exact same ID
-                        startPage: chunk.metadata.start_page,
-                        endPage: chunk.metadata.end_page,
-                        location: chunk.metadata.location,
-                        chunkType: chunk.metadata.type as CHUNK_TYPE,
-                        text: chunk.metadata.text,
-                        doc_id: chunk.metadata.doc_id, // Include parent doc_id for consistency
-                    };
-                    const new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl));
-                    new_chunk_simpl.chunks = new_chunk_simpl.chunks.concat(chunkToAdd);
-                    doc.chunk_simpl = JSON.stringify(new_chunk_simpl);
-                });
             }
 
             // Index the document
             await this.indexDocument(result);
+            progressCallback(100, 'Document added successfully!');
 
             // Preserve existing metadata updates
             if (!doc.vectorstore_id) {
-- 
cgit v1.2.3-70-g09d2


From 256cd13bd258c18a805b1e9c6a6596d8d9e0cf4b Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Thu, 8 May 2025 15:14:04 -0400
Subject: fix: fixed getDocumentMetadata to actually work correctly for getting
 all documents.

---
 src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'src/client/views/nodes/chatbot/utils')

diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
index cff8380db..01815baec 100644
--- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -733,14 +733,19 @@ export class AgentDocumentManager {
             return this.extractDocumentMetadata(documentId);
         } else {
             // Get metadata for all documents
-            const documentsMetadata: Record<string, any> = {};
+            const documentsMetadata: Record<string, Record<string, any>> = {};
             for (const documentId of this.documentsById.keys()) {
-                documentsMetadata.add(this.extractDocumentMetadata(documentId));
+                const metadata = this.extractDocumentMetadata(documentId);
+                if (metadata) {
+                    documentsMetadata[documentId] = metadata;
+                } else {
+                    console.warn(`No metadata found for document with ID: ${documentId}`);
+                }
             }
             return {
                 documentCount: this.documentsById.size,
                 documents: documentsMetadata,
-                fieldDefinitions: this.fieldMetadata,
+                //fieldDefinitions: this.fieldMetadata, // TODO: remove this, if fieldDefinitions are not needed.
             };
         }
     }
-- 
cgit v1.2.3-70-g09d2


From 0a05616fb9f685dc8534db4949a6f7ad6b85eadb Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Fri, 9 May 2025 15:31:16 -0400
Subject: fix: making sure the assitant docuemnt manager persists

---
 .../nodes/chatbot/chatboxcomponents/ChatBox.tsx    |  2 +-
 .../nodes/chatbot/utils/AgentDocumentManager.ts    | 65 +++++++++++++++-------
 2 files changed, 46 insertions(+), 21 deletions(-)

(limited to 'src/client/views/nodes/chatbot/utils')

diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index 490739be6..d45c6c936 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -1232,5 +1232,5 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
  */
 Docs.Prototypes.TemplateMap.set(DocumentType.CHAT, {
     layout: { view: ChatBox, dataField: 'data' },
-    options: { acl: '', _layout_fitWidth: true, chat: '', chat_history: '', chat_thread_id: '', chat_assistant_id: '', chat_vector_store_id: '' },
+    options: { acl: '', _layout_fitWidth: true, chat: '', chat_history: '', chat_thread_id: '', chat_assistant_id: '', chat_vector_store_id: '', _forceActive: true },
 });
diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
index 01815baec..a27220898 100644
--- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -1,21 +1,19 @@
-import { ChatBox } from '../chatboxcomponents/ChatBox';
-import { Doc, FieldType, Opt } from '../../../../../fields/Doc';
-import { DocData } from '../../../../../fields/DocSymbols';
-import { Observation } from '../types/types';
-import { ParametersType, ToolInfo, Parameter } from '../types/tool_types';
-import { BaseTool } from '../tools/BaseTool';
-import { Docs, DocumentOptions } from '../../../../documents/Documents';
-import { CollectionFreeFormDocumentView } from '../../CollectionFreeFormDocumentView';
+import { action, makeObservable, observable, ObservableMap, reaction, runInAction } from 'mobx';
+import { observer } from 'mobx-react';
 import { v4 as uuidv4 } from 'uuid';
-import { LinkManager, UPDATE_SERVER_CACHE } from '../../../../util/LinkManager';
+import { Doc, StrListCast } from '../../../../../fields/Doc';
+import { DocData } from '../../../../../fields/DocSymbols';
+import { Id } from '../../../../../fields/FieldSymbols';
+import { List } from '../../../../../fields/List';
 import { DocCast, StrCast } from '../../../../../fields/Types';
-import { supportedDocTypes } from '../types/tool_types';
-import { parsedDoc } from '../chatboxcomponents/ChatBox';
-import { faThumbTackSlash } from '@fortawesome/free-solid-svg-icons';
+import { DocServer } from '../../../../DocServer';
+import { Docs, DocumentOptions } from '../../../../documents/Documents';
 import { DocumentManager } from '../../../../util/DocumentManager';
+import { LinkManager, UPDATE_SERVER_CACHE } from '../../../../util/LinkManager';
 import { DocumentView } from '../../DocumentView';
-import { RAGChunk, CHUNK_TYPE } from '../types/types';
-import { runInAction } from 'mobx';
+import { ChatBox, parsedDoc } from '../chatboxcomponents/ChatBox';
+import { supportedDocTypes } from '../types/tool_types';
+import { CHUNK_TYPE, RAGChunk } from '../types/types';
 
 /**
  * Interface representing a document in the freeform view
@@ -29,7 +27,7 @@ interface AgentDocument {
  * Class to manage documents in a freeform view
  */
 export class AgentDocumentManager {
-    private documentsById: Map<string, AgentDocument>;
+    @observable private documentsById: ObservableMap<string, AgentDocument>;
     private chatBox: ChatBox;
     private chatBoxDocument: Doc | null = null;
     private fieldMetadata: Record<string, any> = {};
@@ -40,9 +38,34 @@ export class AgentDocumentManager {
      * @param templateDocument The document that serves as a template for new documents
      */
     constructor(chatBox: ChatBox) {
-        this.documentsById = new Map<string, AgentDocument>();
+        makeObservable(this);
+        const agentDoc = DocCast(chatBox.Document.agentDocument) ?? new Doc();
+        agentDoc.title = chatBox.Document.title + '_agentDocument';
+        chatBox.Document.agentDocument = agentDoc;
+        this.documentsById = StrListCast(agentDoc.mapping).reduce((mapping, content) => {
+            const [id, layoutId, docId] = content.split(':');
+            const layoutDoc = DocServer.GetCachedRefField(layoutId);
+            const dataDoc = DocServer.GetCachedRefField(docId);
+            if (!layoutDoc || !dataDoc) {
+                console.warn(`Document with ID ${id} not found in mapping`);
+            } else {
+                mapping.set(id, { layoutDoc, dataDoc });
+            }
+            return mapping;
+        }, new ObservableMap<string, AgentDocument>());
+        console.log(`AgentDocumentManager initialized with ${this.documentsById.size} documents`);
         this.chatBox = chatBox;
         this.chatBoxDocument = chatBox.Document;
+
+        reaction(
+            () => this.documentsById.values(),
+            () => {
+                if (this.chatBoxDocument && DocCast(this.chatBoxDocument.agentDocument)) {
+                    DocCast(this.chatBoxDocument.agentDocument)!.mapping = new List<string>(Array.from(this.documentsById.entries()).map(([id, agent]) => `${id}:${agent.dataDoc[Id]}:${agent.layoutDoc[Id]}`));
+                }
+            }
+            //{ fireImmediately: true }
+        );
         this.processDocument(this.chatBoxDocument);
         this.initializeFieldMetadata();
     }
@@ -171,6 +194,7 @@ export class AgentDocumentManager {
      * Process a document by ensuring it has an ID and adding it to the appropriate collections
      * @param doc The document to process
      */
+    @action
     public processDocument(doc: Doc): string {
         // Ensure document has a persistent ID
         const docId = this.ensureDocumentId(doc);
@@ -997,6 +1021,7 @@ export class AgentDocumentManager {
      * @param customId The custom ID to assign to the document
      * @returns The customId that was assigned
      */
+    @action
     public addCustomId(doc: Doc, customId: string): string {
         if (!doc) {
             console.error('Cannot add null document with custom ID');
@@ -1030,6 +1055,7 @@ export class AgentDocumentManager {
      * @param docId The parent document ID
      * @param chunkIds Array of chunk IDs associated with this document
      */
+    @action
     public registerChunkIds(docId: string, chunkIds: string[]): void {
         // Get the document if it exists
         const docInfo = this.documentsById.get(docId);
@@ -1048,9 +1074,8 @@ export class AgentDocumentManager {
             const updatedIds = [...new Set([...existingIds, ...chunkIds])]; // Remove duplicates
             doc.chunk_ids = JSON.stringify(updatedIds);
         }
-
-        // Ensure each chunk ID can be linked back to its parent document
-        chunkIds.forEach(chunkId => {
+        for (const chunkId of chunkIds) {
+            // Ensure each chunk ID can be linked back to its parent document
             // Store a mapping from chunk ID to parent document ID
             // This allows us to easily find a document by any of its chunk IDs
             if (!this.documentsById.has(chunkId)) {
@@ -1059,7 +1084,7 @@ export class AgentDocumentManager {
                     dataDoc: docInfo.dataDoc,
                 });
             }
-        });
+        }
     }
 
     /**
-- 
cgit v1.2.3-70-g09d2


From 3c28aa3a706869d818bc8a089e8d1a53f7234bc0 Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Sun, 11 May 2025 11:13:09 -0400
Subject: old screenshot?

---
 src/client/views/nodes/WebBox.tsx                            | 3 ++-
 src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'src/client/views/nodes/chatbot/utils')

diff --git a/src/client/views/nodes/WebBox.tsx b/src/client/views/nodes/WebBox.tsx
index 0f0008700..045af7ecd 100644
--- a/src/client/views/nodes/WebBox.tsx
+++ b/src/client/views/nodes/WebBox.tsx
@@ -1198,7 +1198,7 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         TraceMobx();
         // const previewScale = this._previewNativeWidth ? 1 - this.sidebarWidth() / this._previewNativeWidth : 1;
         const pointerEvents = this.layoutDoc._lockedPosition ? 'none' : (this._props.pointerEvents?.() as Property.PointerEvents | undefined);
-        // const scale = previewScale * (this._props.NativeDimScaling?.() || 1);
+        const scale = this._props.NativeDimScaling?.() || 1;
         return (
             <div
                 className="webBox-outerContent"
@@ -1295,6 +1295,7 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         TraceMobx();
         const containerWidth = NumCast(this.layoutDoc._width) || this._props.PanelWidth();
         const pointerEvents = this.layoutDoc._lockedPosition ? 'none' : (this._props.pointerEvents?.() as Property.PointerEvents);
+        const scale = this._props.NativeDimScaling?.() || 1;
 
         // Force the component to be square
         this.layoutDoc._height = containerWidth;
diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
index a27220898..14cffcb70 100644
--- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -131,7 +131,7 @@ export class AgentDocumentManager {
                 console.log(`Found ${linkedDocs.length} linked documents via LinkManager`);
 
                 // Process the linked documents
-                linkedDocs.forEach((doc: Doc) => {
+                linkedDocs.forEach((doc: Doc | undefined) => {
                     if (doc) {
                         this.processDocument(doc);
                         console.log('Processed linked document:', doc.id, doc.title, doc.type);
-- 
cgit v1.2.3-70-g09d2


From a5d7f5c38192b91b7df3bd6ecace5ba7365449a6 Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Sun, 11 May 2025 13:42:00 -0400
Subject: Made it so chunk Ids are seperately managed and made sure the doc id
 is sonsistent and not created in python spawn

---
 src/client/views/nodes/WebBox.scss                 | 241 ++++----
 src/client/views/nodes/WebBox.tsx                  | 605 +++++++--------------
 src/client/views/nodes/WebBoxRenderer.js           | 103 ++++
 .../views/nodes/chatbot/agentsystem/prompts.ts     |   4 +-
 .../nodes/chatbot/chatboxcomponents/ChatBox.tsx    |  36 +-
 .../nodes/chatbot/tools/DocumentMetadataTool.ts    |  38 +-
 src/client/views/nodes/chatbot/tools/RAGTool.ts    |   5 +-
 .../nodes/chatbot/utils/AgentDocumentManager.ts    | 213 +++-----
 .../views/nodes/chatbot/vectorstore/Vectorstore.ts |  33 +-
 src/server/ApiManagers/AssistantManager.ts         |   8 +-
 src/server/chunker/pdf_chunker.py                  |  13 +-
 11 files changed, 548 insertions(+), 751 deletions(-)

(limited to 'src/client/views/nodes/chatbot/utils')

diff --git a/src/client/views/nodes/WebBox.scss b/src/client/views/nodes/WebBox.scss
index a1991d1d0..77d7716f4 100644
--- a/src/client/views/nodes/WebBox.scss
+++ b/src/client/views/nodes/WebBox.scss
@@ -1,9 +1,13 @@
 @use '../global/globalCssVariables.module.scss' as global;
 
 .webBox {
+    height: 100%;
+    width: 100%;
+    top: 0;
+    left: 0;
     position: relative;
+    display: flex;
     overflow: hidden;
-    aspect-ratio: 1 / 1; // Explicitly enforce square aspect ratio
 
     .webBox-sideResizer {
         position: absolute;
@@ -16,119 +20,6 @@
     .webBox-background {
         width: 100%;
         height: 100%;
-        position: absolute;
-        top: 0;
-        left: 0;
-    }
-
-    // Simple container for screenshot
-    .webBox-screenshot-container {
-        width: 100%;
-    }
-
-    .webBox-screenshot {
-        width: 100%;
-        height: auto; // Maintain aspect ratio
-        display: block;
-        pointer-events: none;
-    }
-
-    .webBox-loading {
-        padding: 20px;
-        text-align: center;
-        color: #666;
-        background-color: #f5f5f5;
-        min-height: 200px;
-        display: flex;
-        flex-direction: column;
-        align-items: center;
-        justify-content: center;
-    }
-
-    .webBox-loading-spinner {
-        margin-top: 15px;
-        color: #1976d2;
-        font-size: 24px;
-    }
-
-    .webBox-error {
-        padding: 20px;
-        color: #d32f2f;
-        text-align: center;
-        background-color: #ffebee;
-        min-height: 200px;
-        display: flex;
-        flex-direction: column;
-        align-items: center;
-        justify-content: center;
-        gap: 15px;
-    }
-
-    .webBox-placeholder {
-        padding: 20px;
-        text-align: center;
-        color: #757575;
-        background-color: #fafafa;
-        min-height: 200px;
-        display: flex;
-        align-items: center;
-        justify-content: center;
-    }
-
-    // Basic container layout
-    .webBox-container {
-        width: 100%;
-        height: 100%;
-        position: relative;
-    }
-
-    // Simple scrollable container - vertical only
-    .webBox-outerContent {
-        width: 100%;
-        position: relative;
-        overflow-y: auto;
-        overflow-x: hidden;
-        background-color: #f5f5f5;
-
-        // Improve scrollbar styling
-        &::-webkit-scrollbar-thumb {
-            background-color: #888;
-            border-radius: 6px;
-        }
-
-        &::-webkit-scrollbar {
-            width: 8px;
-            background-color: #f5f5f5;
-        }
-    }
-
-    .webBox-innerContent {
-        width: 100%;
-        background-color: #f5f5f5;
-    }
-
-    .webBox-htmlSpan {
-        position: absolute;
-        top: 0;
-        left: 0;
-        cursor: text;
-        padding: 15px;
-        width: 100%;
-        height: 100%;
-    }
-
-    .webBox-annotationLayer {
-        position: absolute;
-        transform-origin: left top;
-        top: 0;
-        width: 100%;
-        pointer-events: none;
-        mix-blend-mode: multiply;
-    }
-
-    .webBox-annotationBox {
-        position: absolute;
-        background-color: rgba(245, 230, 95, 0.616);
     }
 
     .webBox-ui {
@@ -177,14 +68,14 @@
             }
         }
 
-        .webBox-refreshButton {
+        .webBox-nextIcon,
+        .webBox-prevIcon {
             background: #121721;
+            color: white;
             height: 20px;
             width: 25px;
             display: flex;
-            position: absolute;
-            bottom: 0;
-            right: 40px;
+            position: relative;
             align-items: center;
             justify-content: center;
             border-radius: 3px;
@@ -192,6 +83,10 @@
             padding: 0px;
         }
 
+        .webBox-overlayButton:hover {
+            background: none;
+        }
+
         .webBox-overlayCont {
             position: absolute;
             width: calc(100% - 40px);
@@ -223,7 +118,8 @@
         justify-content: center;
         border-radius: 3px;
         pointer-events: all;
-        z-index: 1;
+        z-index: 1; // so it appears on top of the document's title, if shown
+
         box-shadow: global.$standard-box-shadow;
         transition: 0.2s;
 
@@ -238,6 +134,89 @@
         opacity: 0.1;
     }
 
+    .webBox-annotationLayer {
+        position: absolute;
+        transform-origin: left top;
+        top: 0;
+        width: 100%;
+        pointer-events: none;
+        mix-blend-mode: multiply; // bcz: makes text fuzzy!
+    }
+
+    .webBox-annotationBox {
+        position: absolute;
+        background-color: rgba(245, 230, 95, 0.616);
+    }
+
+    .webBox-container {
+        transform-origin: top left;
+        width: 100%;
+        height: 100%;
+        position: absolute;
+
+        .webBox-htmlSpan {
+            position: absolute;
+            top: 0;
+            left: 0;
+            cursor: text;
+            padding: 15px;
+            height: 100%;
+        }
+
+        .webBox-cont {
+            pointer-events: none;
+        }
+
+        .webBox-cont,
+        .webBox-cont-interactive {
+            padding: 0vw;
+            position: absolute;
+            top: 0;
+            left: 0;
+            width: 100%;
+            height: 100%;
+            transform-origin: top left;
+
+            .webBox-iframe {
+                width: 100%;
+                height: 100%;
+                position: absolute;
+                top: 0;
+                left: 0;
+                body {
+                    ::selection {
+                        color: white;
+                        background: orange;
+                    }
+                }
+            }
+        }
+
+        .webBox-cont-interactive {
+            span {
+                user-select: text !important;
+            }
+        }
+
+        .webBox-outerContent {
+            width: 100%;
+            height: 100%;
+            position: absolute;
+            transform-origin: top left;
+            top: 0;
+            left: 0;
+            overflow: auto;
+
+            .webBox-innerContent {
+                position: relative;
+            }
+        }
+
+        div.webBox-outerContent::-webkit-scrollbar-thumb {
+            cursor: nw-resize;
+        }
+    }
+
     .webBox-overlay {
         width: 100%;
         height: 100%;
@@ -277,13 +256,37 @@
         width: 100%;
         height: 100%;
         position: absolute;
+        pointer-events: all;
 
         .indicator {
             position: absolute;
+            transition: background-color 0.2s ease;
+            border-radius: 2px;
 
             &.active {
                 background-color: rgba(0, 0, 0, 0.1);
+                box-shadow: 0 0 2px rgba(0, 0, 0, 0.2);
             }
         }
     }
+
+    // Add styles to hide font errors and improve user experience
+    .font-error-hidden {
+        font-family:
+            system-ui,
+            -apple-system,
+            BlinkMacSystemFont,
+            'Segoe UI',
+            Roboto,
+            Arial,
+            sans-serif !important;
+    }
+
+    // Change iframe behavior when resource loading errors occur
+    iframe.webBox-iframe {
+        &.loading-error {
+            // Make full content accessible when external resources fail
+            pointer-events: all !important;
+        }
+    }
 }
diff --git a/src/client/views/nodes/WebBox.tsx b/src/client/views/nodes/WebBox.tsx
index 045af7ecd..1e158f484 100644
--- a/src/client/views/nodes/WebBox.tsx
+++ b/src/client/views/nodes/WebBox.tsx
@@ -4,7 +4,6 @@ import { htmlToText } from 'html-to-text';
 import { action, computed, IReactionDisposer, makeObservable, observable, ObservableMap, reaction, runInAction } from 'mobx';
 import { observer } from 'mobx-react';
 import * as React from 'react';
-import axios from 'axios';
 import * as WebRequest from 'web-request';
 import { addStyleSheet, addStyleSheetRule, clearStyleSheetRules, ClientUtils, DivHeight, getWordAtPoint, lightOrDark, returnFalse, returnOne, returnZero, setupMoveUpEvents, smoothScroll } from '../../../ClientUtils';
 import { Doc, DocListCast, Field, FieldType, Opt, StrListCast } from '../../../fields/Doc';
@@ -70,20 +69,23 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     private _scrollTimer: NodeJS.Timeout | undefined;
     private _getAnchor: (savedAnnotations: Opt<ObservableMap<number, HTMLDivElement[]>>, addAsAnnotation: boolean) => Opt<Doc> = () => undefined;
 
-    @observable private _webUrl = ''; // url of the page we want to display
-    @observable private _hackHide = false;
+    @observable private _webUrl = ''; // url of the src parameter of the embedded iframe but not necessarily the rendered page - eg, when following a link, the rendered page changes but we don't want the src parameter to also change as that would cause an unnecessary re-render.
+    @observable private _hackHide = false; // apparently changing the value of the 'sandbox' prop doesn't necessarily apply it to the active iframe.  so thisforces the ifrmae to be rebuilt when allowScripts is toggled
     @observable private _searching: boolean = false;
     @observable private _showSidebar = false;
     @observable private _webPageHasBeenRendered = false;
     @observable private _marqueeing: number[] | undefined = undefined;
-    @observable private _screenshotUrl: string | null = null; // URL to the screenshot image
-    @observable private _fullHeight: number = 0; // Full height of the webpage screenshot
-    @observable private _isLoadingScreenshot: boolean = false; // Loading state for the screenshot
+    get marqueeing() {
+        return this._marqueeing;
+    }
+    set marqueeing(val) {
+        val && this._marqueeref.current?.onInitiateSelection(val);
+        !val && this._marqueeref.current?.onTerminateSelection();
+        this._marqueeing = val;
+    }
     @observable private _iframe: HTMLIFrameElement | null = null;
     @observable private _savedAnnotations = new ObservableMap<number, (HTMLDivElement & { marqueeing?: boolean })[]>();
     @observable private _scrollHeight = NumCast(this.layoutDoc.scrollHeight);
-    @observable private _screenshotError: string | null = null; // Error message if screenshot fails
-    @observable private _loadingFromCache: boolean = false;
     @computed get _url() {
         return this.webField?.toString() || '';
     }
@@ -143,38 +145,31 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     };
 
     updateIcon = async () => {
-        if (!this._screenshotUrl) {
-            // If we don't have a screenshot yet, capture one first
-            await this.captureWebScreenshot();
-        }
-
+        if (!this._iframe) return new Promise<void>(res => res());
         const scrollTop = NumCast(this.layoutDoc._layout_scrollTop);
         const nativeWidth = NumCast(this.layoutDoc.nativeWidth);
         const nativeHeight = (nativeWidth * this._props.PanelHeight()) / this._props.PanelWidth();
-
+        let htmlString = this._iframe.contentDocument && new XMLSerializer().serializeToString(this._iframe.contentDocument);
+        if (!htmlString) {
+            htmlString = await fetch(ClientUtils.CorsProxy(this.webField!.href)).then(response => response.text());
+        }
         this.layoutDoc.thumb = undefined;
         this.Document.thumbLockout = true; // lock to prevent multiple thumb updates.
-
-        try {
-            // If we have a screenshot, use it directly for the thumbnail
-            if (this._screenshotUrl) {
-                return ClientUtils.convertDataUri(this._screenshotUrl, this.layoutDoc[Id] + '_icon_' + new Date().getTime(), true, this.layoutDoc[Id] + '_icon_').then(returnedfilename => {
+        return (CreateImage(this._webUrl.endsWith('/') ? this._webUrl.substring(0, this._webUrl.length - 1) : this._webUrl, this._iframe.contentDocument?.styleSheets ?? [], htmlString, nativeWidth, nativeHeight, scrollTop) as Promise<string>)
+            .then((dataUrl: string) => {
+                if (dataUrl.includes('<!DOCTYPE')) {
+                    console.log('BAD DATA IN THUMB CREATION');
+                    return;
+                }
+                return ClientUtils.convertDataUri(dataUrl, this.layoutDoc[Id] + '_icon_' + new Date().getTime(), true, this.layoutDoc[Id] + '_icon_').then(returnedfilename => {
                     this.Document.thumbLockout = false;
                     this.layoutDoc.thumb = new ImageField(returnedfilename);
                     this.layoutDoc.thumbScrollTop = scrollTop;
                     this.layoutDoc.thumbNativeWidth = nativeWidth;
                     this.layoutDoc.thumbNativeHeight = nativeHeight;
                 });
-            } else {
-                console.log('No screenshot available for thumbnail generation');
-                this.Document.thumbLockout = false;
-                return Promise.resolve();
-            }
-        } catch (error) {
-            console.error('Error creating thumbnail:', error);
-            this.Document.thumbLockout = false;
-            return Promise.reject(error);
-        }
+            })
+            .catch((error: object) => console.error('oops, something went wrong!', error));
     };
 
     componentDidMount() {
@@ -243,64 +238,13 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
             },
             { fireImmediately: true }
         );
-
-        // Check if we have a cached screenshot URL in metadata
-        if (this._url) {
-            this._webUrl = this._url;
-            const cachedScreenshotUrl = StrCast(this.dataDoc[this.fieldKey + '_screenshotUrl']);
-            const cachedHeight = NumCast(this.dataDoc[this.fieldKey + '_screenshotHeight']);
-
-            if (cachedScreenshotUrl && cachedHeight) {
-                // Use cached screenshot
-                this._loadingFromCache = true;
-                this._isLoadingScreenshot = true;
-
-                // Verify the cached screenshot exists by loading the image
-                const img = new Image();
-                img.onload = action(() => {
-                    this._screenshotUrl = cachedScreenshotUrl;
-                    this._fullHeight = cachedHeight;
-                    this._scrollHeight = cachedHeight;
-                    this._webPageHasBeenRendered = true;
-                    this._isLoadingScreenshot = false;
-                    this._loadingFromCache = false;
-
-                    // Apply dimensions and initial scroll
-                    if (this.layoutDoc._layout_autoHeight) {
-                        this.layoutDoc._nativeHeight = this._fullHeight;
-                        this._props.setHeight?.(this._fullHeight * (this._props.NativeDimScaling?.() || 1));
-                    }
-
-                    if (this._initialScroll !== undefined) {
-                        this.setScrollPos(this._initialScroll);
-                    }
-
-                    console.log(`Loaded cached screenshot: ${this._screenshotUrl}`);
-                });
-
-                img.onerror = action(() => {
-                    // If image fails to load, capture a new screenshot
-                    console.log('Cached screenshot not found, capturing new one');
-                    this._loadingFromCache = false;
-                    this.captureWebScreenshot();
-                });
-
-                img.src = cachedScreenshotUrl;
-            } else {
-                // No cached screenshot, capture a new one
-                this.captureWebScreenshot();
-            }
-        }
     }
     componentWillUnmount() {
-        // Clean up timers
-        if (this._scrollTimer) {
-            clearTimeout(this._scrollTimer);
-            this._scrollTimer = undefined;
-        }
-
-        // Clean up reaction disposers
+        this._iframetimeout && clearTimeout(this._iframetimeout);
+        this._iframetimeout = undefined;
         Object.values(this._disposers).forEach(disposer => disposer?.());
+        // this._iframe?.removeEventListener('wheel', this.iframeWheel, true);
+        // this._iframe?.contentDocument?.removeEventListener("pointerup", this.iframeUp);
     }
 
     private _selectionText: string = '';
@@ -414,6 +358,59 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     _textAnnotationCreator: (() => ObservableMap<number, (HTMLDivElement & { marqueeing?: boolean })[]>) | undefined;
     savedAnnotationsCreator: () => ObservableMap<number, (HTMLDivElement & { marqueeing?: boolean })[]> = () => this._textAnnotationCreator?.() || this._savedAnnotations;
 
+    @action
+    iframeMove = (e: PointerEvent) => {
+        const theclick = this.props
+            .ScreenToLocalTransform()
+            .inverse()
+            .transformPoint(e.clientX, e.clientY - NumCast(this.layoutDoc.layout_scrollTop));
+        this._marqueeref.current?.onMove(theclick);
+    };
+    @action
+    iframeUp = (e: PointerEvent) => {
+        this._iframe?.contentDocument?.removeEventListener('pointermove', this.iframeMove);
+        this.marqueeing = undefined;
+        this._getAnchor = AnchorMenu.Instance?.GetAnchor; // need to save AnchorMenu's getAnchor since a subsequent selection on another doc will overwrite this value
+        this._textAnnotationCreator = undefined;
+        this.DocumentView?.()?.cleanupPointerEvents(); // pointerup events aren't generated on containing document view, so we have to invoke it here.
+        if (this._iframe?.contentWindow && this._iframe.contentDocument && !this._iframe.contentWindow.getSelection()?.isCollapsed) {
+            const mainContBounds = ClientUtils.GetScreenTransform(this._mainCont.current!);
+            const scale = (this._props.NativeDimScaling?.() || 1) * mainContBounds.scale;
+            const sel = this._iframe.contentWindow.getSelection();
+            if (sel) {
+                this._selectionText = sel.toString();
+                AnchorMenu.Instance.setSelectedText(sel.toString());
+                this._textAnnotationCreator = () => this.createTextAnnotation(sel, !sel.isCollapsed ? sel.getRangeAt(0) : undefined);
+                AnchorMenu.Instance.jumpTo(e.clientX * scale + mainContBounds.translateX, e.clientY * scale + mainContBounds.translateY - NumCast(this.layoutDoc._layout_scrollTop) * scale);
+                // Changing which document to add the annotation to (the currently selected WebBox)
+                GPTPopup.Instance.setSidebarFieldKey(`${this._props.fieldKey}_${this._urlHash ? this._urlHash + '_' : ''}sidebar`);
+                GPTPopup.Instance.addDoc = this.sidebarAddDocument;
+            }
+        } else {
+            const theclick = this.props
+                .ScreenToLocalTransform()
+                .inverse()
+                .transformPoint(e.clientX, e.clientY - NumCast(this.layoutDoc.layout_scrollTop));
+            if (!this._marqueeref.current?.isEmpty) this._marqueeref.current?.onEnd(theclick[0], theclick[1]);
+            else {
+                if (!(e.target as HTMLElement)?.tagName?.includes('INPUT')) this.finishMarquee(theclick[0], theclick[1]);
+                this._getAnchor = AnchorMenu.Instance?.GetAnchor;
+                this.marqueeing = undefined;
+            }
+
+            ContextMenu.Instance.closeMenu();
+            ContextMenu.Instance.setIgnoreEvents(false);
+            if (e?.button === 2 || e?.altKey) {
+                e?.preventDefault();
+                e?.stopPropagation();
+                setTimeout(() => {
+                    // if menu comes up right away, the down event can still be active causing a menu item to be selected
+                    this.specificContextMenu();
+                    this.DocumentView?.().onContextMenu(undefined, theclick[0], theclick[1]);
+                });
+            }
+        }
+    };
     @action
     webClipDown = (e: React.PointerEvent) => {
         e.stopPropagation();
@@ -508,6 +505,98 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         this._scrollHeight = this._iframe?.contentDocument?.body?.scrollHeight ?? 0;
         this.addWebStyleSheetRule(this.addWebStyleSheet(this._iframe?.contentDocument), '::selection', { color: 'white', background: 'orange' }, '');
 
+        // Add error handler to suppress font CORS errors
+        if (this._iframe?.contentWindow) {
+            try {
+                // Track if any resource errors occurred
+                let hasResourceErrors = false;
+
+                // Override the console.error to filter out font CORS errors
+                const win = this._iframe.contentWindow as Window & { console: Console };
+                const originalConsoleError = win.console.error;
+                win.console.error = (...args: unknown[]) => {
+                    const errorMsg = args.map(arg => String(arg)).join(' ');
+                    if (errorMsg.includes('Access to font') && errorMsg.includes('has been blocked by CORS policy')) {
+                        // Mark that we have font errors
+                        hasResourceErrors = true;
+                        // Ignore font CORS errors
+                        return;
+                    }
+                    // Also catch other resource loading errors
+                    if (errorMsg.includes('ERR_FAILED') || errorMsg.includes('ERR_BLOCKED_BY_CLIENT')) {
+                        hasResourceErrors = true;
+                    }
+                    originalConsoleError.apply(win.console, args);
+                };
+
+                // Listen for resource loading errors
+                this._iframe.contentWindow.addEventListener(
+                    'error',
+                    (e: Event) => {
+                        const target = e.target as HTMLElement;
+                        if (target instanceof HTMLElement) {
+                            // If it's a resource that failed to load
+                            if (target.tagName === 'LINK' || target.tagName === 'IMG' || target.tagName === 'SCRIPT') {
+                                hasResourceErrors = true;
+                                // Apply error class after a short delay to allow initial content to load
+                                setTimeout(() => {
+                                    if (this._iframe && hasResourceErrors) {
+                                        this._iframe.classList.add('loading-error');
+                                    }
+                                }, 1000);
+                            }
+                        }
+                    },
+                    true
+                );
+
+                // Add fallback CSS for fonts that fail to load
+                const style = this._iframe.contentDocument?.createElement('style');
+                if (style) {
+                    style.textContent = `
+                        @font-face {
+                            font-family: 'CORS-fallback-serif';
+                            src: local('Times New Roman'), local('Georgia'), serif;
+                        }
+                        @font-face {
+                            font-family: 'CORS-fallback-sans';
+                            src: local('Arial'), local('Helvetica'), sans-serif;
+                        }
+                        /* Fallback for all fonts that fail to load */
+                        @font-face {
+                            font-display: swap !important;
+                        }
+                        
+                        /* Add a script to find and fix elements with failed fonts */
+                        @font-face {
+                            font-family: '__failed_font__';
+                            src: local('Arial');
+                            unicode-range: U+0000;
+                        }
+                    `;
+                    this._iframe.contentDocument?.head.appendChild(style);
+
+                    // Add a script to detect and fix font loading issues
+                    const script = this._iframe.contentDocument?.createElement('script');
+                    if (script) {
+                        script.textContent = `
+                            // Fix font loading issues with fallbacks
+                            setTimeout(function() {
+                                document.querySelectorAll('*').forEach(function(el) {
+                                    if (window.getComputedStyle(el).fontFamily.includes('__failed_font__')) {
+                                        el.classList.add('font-error-hidden');
+                                    }
+                                });
+                            }, 1000);
+                        `;
+                        this._iframe.contentDocument?.head.appendChild(script);
+                    }
+                }
+            } catch (e) {
+                console.log('Error setting up font error handling:', e);
+            }
+        }
+
         let href: Opt<string>;
         try {
             href = iframe?.contentWindow?.location.href;
@@ -658,23 +747,15 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                 this.dataDoc[this.fieldKey + '_history'] = new List<string>([...history, this._url]);
                 this.dataDoc[this.fieldKey] = new WebField(new URL(future.pop()!));
                 this._scrollHeight = 0;
-
-                // Reset screenshot state for new URL
-                this._screenshotUrl = null;
-                this._fullHeight = 0;
-                this._isLoadingScreenshot = false;
-
                 if (this._webUrl === this._url) {
                     this._webUrl = curUrl;
                     setTimeout(
                         action(() => {
                             this._webUrl = this._url;
-                            this.captureWebScreenshot(); // Capture screenshot for new URL
                         })
                     );
                 } else {
                     this._webUrl = this._url;
-                    this.captureWebScreenshot(); // Capture screenshot for new URL
                 }
                 return true;
             }
@@ -694,18 +775,11 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                 else this.dataDoc[this.fieldKey + '_future'] = new List<string>([...future, this._url]);
                 this.dataDoc[this.fieldKey] = new WebField(new URL(history.pop()!));
                 this._scrollHeight = 0;
-
-                // Reset screenshot state for new URL
-                this._screenshotUrl = null;
-                this._fullHeight = 0;
-                this._isLoadingScreenshot = false;
-
                 if (this._webUrl === this._url) {
                     this._webUrl = curUrl;
                     setTimeout(action(() => (this._webUrl = this._url)));
                 } else {
                     this._webUrl = this._url;
-                    this.captureWebScreenshot(); // Capture screenshot for new URL
                 }
                 return true;
             }
@@ -724,11 +798,10 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                     this.layoutDoc.thumbNativeWidth = undefined;
                     this.layoutDoc.thumbNativeHeight = undefined;
                 }
-
+            }
+            if (!preview) {
                 if (!dontUpdateIframe) {
                     this._webUrl = this._url;
-                    // Capture screenshot when URL changes
-                    this.captureWebScreenshot();
                 }
             }
         } catch {
@@ -737,85 +810,6 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         return true;
     };
 
-    @action
-    captureWebScreenshot = async () => {
-        if (!this._url || this._loadingFromCache) return;
-
-        try {
-            this._isLoadingScreenshot = true;
-            this._screenshotError = null;
-
-            console.log(`Capturing screenshot for URL: ${this._url}`);
-
-            try {
-                const response = await axios.post('/captureWebScreenshot', {
-                    url: this._url,
-                    width: NumCast(this.Document.nativeWidth, 1200),
-                    height: NumCast(this.Document.nativeHeight, 800),
-                    fullPage: true, // Request a full page screenshot
-                });
-
-                runInAction(() => {
-                    this._screenshotUrl = response.data.screenshotUrl;
-                    this._fullHeight = response.data.fullHeight;
-                    this._scrollHeight = response.data.fullHeight;
-                    this._webPageHasBeenRendered = true;
-                    this._isLoadingScreenshot = false;
-
-                    // Store screenshot URL and height in document metadata
-                    this.dataDoc[this.fieldKey + '_screenshotUrl'] = response.data.screenshotUrl;
-                    this.dataDoc[this.fieldKey + '_screenshotHeight'] = response.data.fullHeight;
-
-                    // Update native dimensions to match the screenshot
-                    if (!this.dataDoc[this.fieldKey + '_nativeWidth']) {
-                        this.dataDoc[this.fieldKey + '_nativeWidth'] = 1200; // Default width
-                    }
-
-                    if (!this.dataDoc[this.fieldKey + '_nativeHeight']) {
-                        this.dataDoc[this.fieldKey + '_nativeHeight'] = this._fullHeight;
-                    }
-
-                    // Set document height if needed
-                    if (this.layoutDoc._layout_autoHeight) {
-                        this.layoutDoc._nativeHeight = this._fullHeight;
-                        this._props.setHeight?.(this._fullHeight * (this._props.NativeDimScaling?.() || 1));
-                    }
-
-                    // Apply initial scroll if needed
-                    if (this._initialScroll !== undefined) {
-                        this.setScrollPos(this._initialScroll);
-                    }
-
-                    console.log(`Screenshot captured successfully: ${this._screenshotUrl} with height: ${this._fullHeight}px`);
-                });
-            } catch (error: any) {
-                // Handle error from the API
-                console.error('Error capturing screenshot:', error);
-                let errorMessage = 'Failed to capture webpage screenshot';
-
-                // Try to extract detailed error message from response
-                if (error.response && error.response.data && error.response.data.error) {
-                    errorMessage = error.response.data.error;
-                } else if (error.message) {
-                    errorMessage = error.message;
-                }
-
-                runInAction(() => {
-                    this._screenshotError = errorMessage;
-                    this._isLoadingScreenshot = false;
-                });
-            }
-        } catch (error: any) {
-            // Handle unexpected errors
-            runInAction(() => {
-                console.error('Unexpected error in captureWebScreenshot:', error);
-                this._screenshotError = 'An unexpected error occurred';
-                this._isLoadingScreenshot = false;
-            });
-        }
-    };
-
-    @action
     onWebUrlDrop = (e: React.DragEvent) => {
         const { dataTransfer } = e;
         const html = dataTransfer.getData('text/html');
@@ -830,28 +824,13 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     setData = (data: FieldType | Promise<RefField | undefined>) => {
         if (!(typeof data === 'string') && !(data instanceof WebField)) return false;
         if (Field.toString(data) === this._url) return false;
-
-        // Reset state for new URL
         this._scrollHeight = 0;
-        this._screenshotUrl = null;
-        this._fullHeight = 0;
-        this._isLoadingScreenshot = false;
-
-        // Clear stored screenshot metadata for the previous URL
-        this.dataDoc[this.fieldKey + '_screenshotUrl'] = undefined;
-        this.dataDoc[this.fieldKey + '_screenshotHeight'] = undefined;
-
         const oldUrl = this._url;
         const history = Cast(this.dataDoc[this.fieldKey + '_history'], listSpec('string'), []);
         const weburl = new WebField(Field.toString(data));
         this.dataDoc[this.fieldKey + '_future'] = new List<string>([]);
         this.dataDoc[this.fieldKey + '_history'] = new List<string>([...(history || []), oldUrl]);
         this.dataDoc[this.fieldKey] = weburl;
-
-        // Capture screenshot for the new URL
-        this._webUrl = weburl.toString();
-        this.captureWebScreenshot();
-
         return true;
     };
     onWebUrlValueKeyDown = (e: React.KeyboardEvent) => {
@@ -868,14 +847,26 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                     description: (this.layoutDoc[this.fieldKey + '_useCors'] ? "Don't Use" : 'Use') + ' Cors',
                     event: () => {
                         this.layoutDoc[this.fieldKey + '_useCors'] = !this.layoutDoc[this.fieldKey + '_useCors'];
-                        // Re-capture screenshot with the new setting
-                        this.captureWebScreenshot();
                     },
                     icon: 'snowflake',
                 });
-
-            // Remove the "Allow Scripts" option since it's not relevant for screenshots
-
+            funcs.push({
+                description: (this.dataDoc[this.fieldKey + '_allowScripts'] ? 'Prevent' : 'Allow') + ' Scripts',
+                event: () => {
+                    this.dataDoc[this.fieldKey + '_allowScripts'] = !this.dataDoc[this.fieldKey + '_allowScripts'];
+                    if (this._iframe) {
+                        runInAction(() => {
+                            this._hackHide = true;
+                        });
+                        setTimeout(
+                            action(() => {
+                                this._hackHide = false;
+                            })
+                        );
+                    }
+                },
+                icon: 'snowflake',
+            });
             funcs.push({
                 description: (!this.layoutDoc.layout_reflowHorizontal ? 'Force' : 'Prevent') + ' Reflow',
                 event: () => {
@@ -887,21 +878,7 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                 },
                 icon: 'snowflake',
             });
-
-            // Add a refresh option to re-capture the screenshot
-            funcs.push({
-                description: 'Refresh Screenshot',
-                event: () => this.captureWebScreenshot(),
-                icon: 'sync-alt',
-            });
-
-            !Doc.noviceMode &&
-                funcs.push({
-                    description: 'Update Icon',
-                    event: () => this.updateIcon(),
-                    icon: 'portrait',
-                });
-
+            !Doc.noviceMode && funcs.push({ description: 'Update Icon', event: () => this.updateIcon(), icon: 'portrait' });
             cm.addItem({ description: 'Options...', subitems: funcs, icon: 'asterisk' });
         }
     };
@@ -913,7 +890,7 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
      */
     @action
     onMarqueeDown = (e: React.PointerEvent) => {
-        const sel = window.document.getSelection();
+        const sel = this._url ? this._iframe?.contentDocument?.getSelection() : window.document.getSelection();
         this._textAnnotationCreator = undefined;
         if (sel?.empty)
             sel.empty(); // Chrome
@@ -948,7 +925,6 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
 
     @computed get urlContent() {
         if (this.ScreenToLocalBoxXf().Scale > 25) return <div />;
-
         setTimeout(
             action(() => {
                 if (this._initialScroll === undefined && !this._webPageHasBeenRendered) {
@@ -957,10 +933,7 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                 this._webPageHasBeenRendered = true;
             })
         );
-
         const field = this.dataDoc[this._props.fieldKey];
-
-        // Handle HTML field (text content)
         if (field instanceof HtmlField) {
             return (
                 <span
@@ -977,8 +950,6 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                 />
             );
         }
-
-        // Handle WebField (screenshot of webpage)
         if (field instanceof WebField) {
             const url = this.layoutDoc[this.fieldKey + '_useCors'] ? '/corsproxy/' + this._webUrl : this._webUrl;
             const scripts = this.dataDoc[this.fieldKey + '_allowScripts'] || this._webUrl.includes('wikipedia.org') || this._webUrl.includes('google.com') || this._webUrl.startsWith('https://bing');
@@ -1198,7 +1169,7 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         TraceMobx();
         // const previewScale = this._previewNativeWidth ? 1 - this.sidebarWidth() / this._previewNativeWidth : 1;
         const pointerEvents = this.layoutDoc._lockedPosition ? 'none' : (this._props.pointerEvents?.() as Property.PointerEvents | undefined);
-        const scale = this._props.NativeDimScaling?.() || 1;
+        // const scale = previewScale * (this._props.NativeDimScaling?.() || 1);
         return (
             <div
                 className="webBox-outerContent"
@@ -1207,16 +1178,11 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                     height: '100%', //`${100 / scale}%`,
                     pointerEvents,
                 }}
+                // when active, block wheel events from propagating since they're handled by the iframe
                 onWheel={this.onZoomWheel}
                 onScroll={() => this.setDashScrollTop(this._outerRef.current?.scrollTop || 0)}
                 onPointerDown={this.onMarqueeDown}>
-                <div
-                    className="webBox-innerContent"
-                    style={{
-                        width: '100%',
-                        pointerEvents,
-                        backgroundColor: '#f5f5f5',
-                    }}>
+                <div className="webBox-innerContent" style={{ height: (this._webPageHasBeenRendered && this._scrollHeight > this._props.PanelHeight() && this._scrollHeight) || '100%', pointerEvents }}>
                     {this.content}
                     <div style={{ display: SnappingManager.CanEmbed ? 'none' : undefined, mixBlendMode: 'multiply' }}>{this.renderTransparentAnnotations}</div>
                     {this.renderOpaqueAnnotations}
@@ -1258,13 +1224,6 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                         <FontAwesomeIcon icon={this._searching ? 'times' : 'search'} size="lg" />
                     </div>
                 </button>
-
-                {/* Refresh button */}
-                <button type="button" className="webBox-overlayButton webBox-refreshButton" title="Refresh webpage" onClick={() => this.captureWebScreenshot()}>
-                    <div className="webBox-overlayButton-iconCont" onPointerDown={e => e.stopPropagation()}>
-                        <FontAwesomeIcon icon="sync" size="lg" />
-                    </div>
-                </button>
             </div>
         );
     }
@@ -1293,25 +1252,16 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     annotationPointerEvents = () => (this._props.isContentActive() && (SnappingManager.IsDragging || Doc.ActiveTool !== InkTool.None) ? 'all' : 'none');
     render() {
         TraceMobx();
-        const containerWidth = NumCast(this.layoutDoc._width) || this._props.PanelWidth();
+        const previewScale = this._previewNativeWidth ? 1 - this.sidebarWidth() / this._previewNativeWidth : 1;
         const pointerEvents = this.layoutDoc._lockedPosition ? 'none' : (this._props.pointerEvents?.() as Property.PointerEvents);
-        const scale = this._props.NativeDimScaling?.() || 1;
-
-        // Force the component to be square
-        this.layoutDoc._height = containerWidth;
-        this.layoutDoc._width = containerWidth;
-        this.layoutDoc._forceActive = true;
-
+        const scale = previewScale * (this._props.NativeDimScaling?.() || 1);
         return (
             <div
                 className="webBox"
                 ref={this._mainCont}
                 style={{
-                    pointerEvents: this.pointerEvents(),
+                    pointerEvents: this.pointerEvents(), //
                     position: SnappingManager.IsDragging ? 'absolute' : undefined,
-                    width: `${containerWidth}px`,
-                    height: `${containerWidth}px`,
-                    aspectRatio: '1 / 1', // Explicitly enforce square aspect ratio
                 }}>
                 <div className="webBox-background" style={{ backgroundColor: this._props.styleProvider?.(this.layoutDoc, this._props, StyleProp.BackgroundColor) as string }} />
                 <div
@@ -1376,15 +1326,6 @@ export class WebBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
             </div>
         );
     }
-
-    get marqueeing() {
-        return this._marqueeing;
-    }
-    set marqueeing(val) {
-        val && this._marqueeref.current?.onInitiateSelection(val);
-        !val && this._marqueeref.current?.onTerminateSelection();
-        this._marqueeing = val;
-    }
 }
 // eslint-disable-next-line prefer-arrow-callback
 ScriptingGlobals.add(function urlHash(url: string) {
@@ -1395,149 +1336,3 @@ Docs.Prototypes.TemplateMap.set(DocumentType.WEB, {
     layout: { view: WebBox, dataField: 'data' },
     options: { acl: '', _height: 300, _layout_fitWidth: true, _layout_nativeDimEditable: true, _layout_reflowVertical: true, waitForDoubleClickToClick: 'always', systemIcon: 'BsGlobe' },
 });
-
-// Add CSS styles for screenshot mode
-const webBoxStyles = `
-.webBox-screenshot-container {
-    width: 100%;
-    position: relative;
-    overflow: visible;
-    display: flex;
-    align-items: flex-start;
-    justify-content: center;
-    background-color: #f5f5f5;
-}
-
-.webBox-screenshot {
-    width: 100%;
-    pointer-events: none;
-    display: block;
-    user-select: none;
-    object-fit: contain;
-    transition: opacity 0.3s ease;
-}
-
-.webBox-loading {
-    padding: 20px;
-    text-align: center;
-    color: #666;
-    background-color: #f5f5f5;
-    border-radius: 4px;
-    min-height: 200px;
-    display: flex;
-    flex-direction: column;
-    align-items: center;
-    justify-content: center;
-}
-
-.webBox-loading-message {
-    font-size: 16px;
-    margin-bottom: 15px;
-    color: #555;
-}
-
-.webBox-loading-spinner {
-    margin-top: 10px;
-    color: #1976d2;
-}
-
-.webBox-error {
-    padding: 20px;
-    color: #d32f2f;
-    text-align: center;
-    background-color: #ffebee;
-    border-radius: 4px;
-    min-height: 200px;
-    display: flex;
-    flex-direction: column;
-    align-items: center;
-    justify-content: center;
-    gap: 15px;
-}
-
-.webBox-error-icon {
-    color: #d32f2f;
-    margin-bottom: 10px;
-}
-
-.webBox-error-message {
-    color: #d32f2f;
-    font-size: 14px;
-    max-width: 80%;
-    line-height: 1.5;
-}
-
-.webBox-error-actions {
-    margin-top: 10px;
-}
-
-.webBox-retry-button {
-    background-color: #f44336;
-    color: white;
-    border: none;
-    padding: 8px 16px;
-    border-radius: 4px;
-    cursor: pointer;
-    font-size: 14px;
-    transition: background-color 0.3s;
-}
-
-.webBox-retry-button:hover {
-    background-color: #d32f2f;
-}
-
-.webBox-placeholder {
-    padding: 20px;
-    text-align: center;
-    color: #757575;
-    background-color: #fafafa;
-    border-radius: 4px;
-    min-height: 200px;
-    display: flex;
-    align-items: center;
-    justify-content: center;
-}
-
-.webBox-refreshButton {
-    margin-right: 5px;
-}
-
-.webBox-innerContent {
-    position: relative;
-    width: 100%;
-    background-color: #f5f5f5;
-    overflow: visible;
-}
-
-.webBox-outerContent {
-    overflow: auto;
-    width: 100%;
-    background-color: #f5f5f5;
-    position: relative;
-}
-
-.webBox-container {
-    position: relative;
-    display: flex;
-    flex-direction: column;
-    height: 100%;
-    background-color: white;
-    border-radius: 4px;
-    overflow: hidden;
-}
-
-.webBox {
-    position: relative;
-    height: 100%;
-    width: 100%;
-    overflow: hidden;
-    background-color: white;
-    border-radius: 4px;
-    box-shadow: 0 1px 3px rgba(0, 0, 0, 0.12), 0 1px 2px rgba(0, 0, 0, 0.24);
-}
-`;
-
-// Add the styles to the document
-const styleEl = document.createElement('style');
-styleEl.textContent = webBoxStyles;
-document.head.appendChild(styleEl);
diff --git a/src/client/views/nodes/WebBoxRenderer.js b/src/client/views/nodes/WebBoxRenderer.js
index ef465c453..31e0ef5e4 100644
--- a/src/client/views/nodes/WebBoxRenderer.js
+++ b/src/client/views/nodes/WebBoxRenderer.js
@@ -145,6 +145,29 @@ const ForeignHtmlRenderer = function (styleSheets) {
         return urlsFound;
     };
 
+    /**
+     * Extracts font-face URLs from CSS rules
+     * @param {String} cssRuleStr
+     * @returns {String[]}
+     */
+    const getFontFaceUrlsFromCss = function (cssRuleStr) {
+        const fontFaceUrls = [];
+        // Find @font-face blocks
+        const fontFaceBlocks = cssRuleStr.match(/@font-face\s*{[^}]*}/g) || [];
+
+        fontFaceBlocks.forEach(block => {
+            // Extract URLs from src properties
+            const urls = block.match(/src\s*:\s*[^;]*/g) || [];
+            urls.forEach(srcDeclaration => {
+                // Find all url() references in the src declaration
+                const fontUrls = getUrlsFromCssString(srcDeclaration);
+                fontFaceUrls.push(...fontUrls);
+            });
+        });
+
+        return fontFaceUrls;
+    };
+
     /**
      *
      * @param {String} html
@@ -158,6 +181,61 @@ const ForeignHtmlRenderer = function (styleSheets) {
         return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string
     };
 
+    /**
+     * Create a fallback font-face rule for handling CORS errors
+     * @returns {String}
+     */
+    const createFallbackFontFaceRules = function () {
+        return `
+            @font-face {
+                font-family: 'CORS-fallback-serif';
+                src: local('Times New Roman'), local('Georgia'), serif;
+            }
+            @font-face {
+                font-family: 'CORS-fallback-sans';
+                src: local('Arial'), local('Helvetica'), sans-serif;
+            }
+            /* Add fallback font handling */
+            [data-font-error] {
+                font-family: 'CORS-fallback-sans', sans-serif !important;
+            }
+            [data-font-error="serif"] {
+                font-family: 'CORS-fallback-serif', serif !important;
+            }
+        `;
+    };
+
+    /**
+     * Clean up and optimize CSS for better rendering
+     * @param {String} cssStyles
+     * @returns {String}
+     */
+    const optimizeCssForRendering = function (cssStyles) {
+        // Add fallback font-face rules
+        const enhanced = cssStyles + createFallbackFontFaceRules();
+
+        // Replace problematic font-face declarations with proxied versions
+        let optimized = enhanced.replace(/(url\(['"]?)(https?:\/\/[^)'"]+)(['"]?\))/gi, (match, prefix, url, suffix) => {
+            // If it's a font file, proxy it
+            if (url.match(/\.(woff2?|ttf|eot|otf)(\?.*)?$/i)) {
+                return `${prefix}${CorsProxy(url)}${suffix}`;
+            }
+            return match;
+        });
+
+        // Add error handling for fonts
+        optimized += `
+            /* Suppress font CORS errors in console */
+            @supports (font-display: swap) {
+                @font-face {
+                    font-display: swap !important;
+                }
+            }
+        `;
+
+        return optimized;
+    };
+
     /**
      *
      * @param {String} contentHtml
@@ -175,6 +253,7 @@ const ForeignHtmlRenderer = function (styleSheets) {
         // copy styles
         let cssStyles = '';
         const urlsFoundInCss = [];
+        const fontUrlsInCss = [];
 
         for (let i = 0; i < styleSheets.length; i += 1) {
             try {
@@ -182,6 +261,7 @@ const ForeignHtmlRenderer = function (styleSheets) {
                 for (let j = 0; j < rules.length; j += 1) {
                     const cssRuleStr = rules[j].cssText;
                     urlsFoundInCss.push(...getUrlsFromCssString(cssRuleStr));
+                    fontUrlsInCss.push(...getFontFaceUrlsFromCss(cssRuleStr));
                     cssStyles += cssRuleStr;
                 }
             } catch (e) {
@@ -189,6 +269,9 @@ const ForeignHtmlRenderer = function (styleSheets) {
             }
         }
 
+        // Optimize and enhance CSS
+        cssStyles = optimizeCssForRendering(cssStyles);
+
         // const fetchedResourcesFromStylesheets = await getMultipleResourcesAsBase64(webUrl, urlsFoundInCss);
         // for (let i = 0; i < fetchedResourcesFromStylesheets.length; i++) {
         //     const r = fetchedResourcesFromStylesheets[i];
@@ -203,6 +286,26 @@ const ForeignHtmlRenderer = function (styleSheets) {
             .replace(/<div class="mediaset"><\/div>/g, '') // when scripting isn't available (ie, rendering web pages here), <noscript> tags should become <div>'s.  But for Brown CS, there's a layout problem if you leave the empty <mediaset> tag
             .replace(/<link[^>]*>/g, '') // don't need to keep any linked style sheets because we've already processed all style sheets above
             .replace(/srcset="([^ "]*)[^"]*"/g, 'src="$1"'); // instead of converting each item in the srcset to a data url, just convert the first one and use that
+
+        // Add script to handle font loading errors
+        contentHtml += `
+            <script>
+                // Handle font loading errors with fallbacks
+                document.addEventListener('DOMContentLoaded', function() {
+                    // Mark elements with font issues
+                    document.querySelectorAll('*').forEach(function(el) {
+                        const style = window.getComputedStyle(el);
+                        const fontFamily = style.getPropertyValue('font-family');
+                        if (fontFamily && !fontFamily.includes('serif') && !fontFamily.includes('sans')) {
+                            el.setAttribute('data-font-error', 'sans');
+                        } else if (fontFamily && fontFamily.includes('serif')) {
+                            el.setAttribute('data-font-error', 'serif');
+                        }
+                    });
+                });
+            </script>
+        `;
+
         const urlsFoundInHtml = getImageUrlsFromFromHtml(contentHtml).filter(url => !url.startsWith('data:'));
         return getMultipleResourcesAsBase64(webUrl, urlsFoundInHtml).then(fetchedResources => {
             for (let i = 0; i < fetchedResources.length; i += 1) {
diff --git a/src/client/views/nodes/chatbot/agentsystem/prompts.ts b/src/client/views/nodes/chatbot/agentsystem/prompts.ts
index e551ef830..fcb4ab450 100644
--- a/src/client/views/nodes/chatbot/agentsystem/prompts.ts
+++ b/src/client/views/nodes/chatbot/agentsystem/prompts.ts
@@ -103,9 +103,9 @@ export function getReactPrompt(tools: BaseTool<ReadonlyArray<Parameter>>[], summ
         <note>If no external tool is required, use 'no_tool', but if there might be relevant external information, use the appropriate tool.</note>
     </tools>
 
-    <summaries>
+    <available_documents>
         ${summaries()}
-    </summaries>
+    </available_documents>
 
     <chat_history>
         ${chatHistory}
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index d919b5f7f..34a1ade2e 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -650,16 +650,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                 citation: JSON.stringify(citation, null, 2),
             });
 
-            // Try to find the document
-            let doc: Doc | undefined;
-
             // First try to find the document using the document manager's chunk ID lookup
-            const parentDocId = this.docManager.getDocIdByChunkId(chunkId);
-            if (parentDocId) {
-                doc = this.docManager.getDocument(parentDocId);
-                console.log(`Found document by chunk ID lookup: ${parentDocId}`);
-            }
-
+            const doc: Doc | undefined = this.docManager.getDocByChunkId(chunkId);
             if (!doc) {
                 console.warn(`Document not found for citation with chunk_id: ${chunkId}`);
                 return;
@@ -989,32 +981,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     componentWillUnmount() {
         this.removeScrollListener();
     }
-
-    /**
-     * Getter that retrieves all linked documents for the current document.
-     */
-    @computed get linkedDocs(): Doc[] {
-        const docIds = this.docManager.listDocs();
-        const docs: Doc[] = [];
-
-        // Get documents from the document manager using the getDocument method
-        docIds.forEach(id => {
-            const doc = this.docManager.getDocument(id);
-            if (doc) {
-                docs.push(doc);
-            }
-        });
-
-        return docs;
-    }
-
     /**
      * Getter that retrieves document IDs of linked documents that have PDF_chunker–parsed content.
      */
     @computed
     get docIds(): string[] {
         // Use the document manager to get all document IDs
-        return Array.from(this.docManager.listDocs());
+        return Array.from(this.docManager.listDocs);
     }
 
     /**
@@ -1023,7 +996,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     @computed
     get summaries(): string {
         // Use the document manager to get all summaries
-        return this.docManager.getAllDocumentSummaries();
+        console.log(this.docManager.listDocs);
+        return JSON.stringify(this.docManager.listDocs);
     }
 
     /**
@@ -1064,7 +1038,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     };
 
     retrieveDocIds = (): string[] => {
-        return Array.from(this.docManager.listDocs());
+        return Array.from(this.docManager.docIds);
     };
 
     /**
diff --git a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts
index e6c2421e5..5297292bf 100644
--- a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts
+++ b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts
@@ -18,13 +18,13 @@ const parameterDefinitions: ReadonlyArray<Parameter> = [
         name: 'action',
         type: 'string',
         required: true,
-        description: 'The action to perform: "get" to retrieve metadata, "edit" to modify metadata, "list" to enumerate documents, "getFieldOptions" to retrieve all available field options, or "create" to create a new document',
+        description: 'The action to perform: "get" to retrieve metadata, "edit" to modify metadata, "getFieldOptions" to retrieve all available field options, or "create" to create a new document',
     },
     {
         name: 'documentId',
         type: 'string',
         required: false,
-        description: 'The ID of the document to get or edit metadata for. Required for "edit", optional for "get", ignored for "list", "getFieldOptions", and "create"',
+        description: 'The ID of the document to get or edit metadata for. Required for "edit", optional for "get", ignored for "getFieldOptions", and "create"',
     },
     {
         name: 'fieldEdits',
@@ -68,7 +68,6 @@ This tool provides the following capabilities:
 - Get metadata from a specific document
 - Edit metadata fields on documents (in either layout or data documents)
 - Edit multiple fields at once (useful for updating dependent fields together)
-- List all available documents in the current view
 - Retrieve all available field options with metadata (IMPORTANT: always call this before editing)
 - Understand which fields are stored where (layout vs data document)
 - Get detailed information about all available document fields
@@ -137,8 +136,8 @@ SPECIAL FIELD HANDLING:
 - Width/Height: Set layout_autoHeight/layout_autoWidth to false before editing
 
 RECOMMENDED WORKFLOW:
-1. First call action="list" to identify available documents
-2. Then call action="getFieldOptions" to understand available fields
+0. Understand the currently available documents that were provided as <available_documents> in the prompt
+1. Call action="getFieldOptions" to understand available fields
 3. Get document metadata with action="get" to see current values
 4. Edit fields with action="edit" using proper dependencies
 OR
@@ -159,10 +158,6 @@ HANDLING DEPENDENT FIELDS:
   - width → layout_autoWidth (set to false to allow manual width)
   - Other auto-sizing related properties
 
-To LIST available documents:
-- Use action="list" to get a simple list of all documents in the current view
-- This is useful when you need to identify documents before getting details or editing them
-
 Editing fields follows these rules:
 1. First checks if the field exists on the layout document using Doc.Get
 2. If it exists on the layout document, it's updated there
@@ -172,7 +167,6 @@ Editing fields follows these rules:
 
 Examples:
 - To get field options: { action: "getFieldOptions" }
-- To list all documents: { action: "list" }
 - To get all document metadata: { action: "get" }
 - To get metadata for a specific document: { action: "get", documentId: "doc123" }
 - To edit a single field: { action: "edit", documentId: "doc123", fieldEdits: [{ fieldName: "backgroundColor", fieldValue: "#ff0000" }] }
@@ -186,7 +180,8 @@ Examples:
     { fieldName: "layout_autoHeight", fieldValue: false },
     { fieldName: "height", fieldValue: 200 }
   ]}
-- IMPORTANT: MULTI STEP WORKFLOWS ARE NOT ONLY ALLOWED BUT ENCOURAGED. TAKE THINGS 1 STEP AT A TIME.`;
+- IMPORTANT: MULTI STEP WORKFLOWS ARE NOT ONLY ALLOWED BUT ENCOURAGED. TAKE THINGS 1 STEP AT A TIME.
+- IMPORTANT: WHEN CITING A DOCUMENT, MAKE THE CHUNK ID THE DOCUMENT ID. WHENEVER YOU CITE A DOCUMENT, ALWAYS MAKE THE CITATION TYPE "text", THE "direct_text" FIELD BLANK, AND THE "chunk_id" FIELD THE DOCUMENT ID.`;
 const documentMetadataToolInfo: ToolInfo<DocumentMetadataToolParamsType> = {
     name: 'documentMetadata',
     description: toolDescription,
@@ -232,11 +227,11 @@ export class DocumentMetadataTool extends BaseTool<DocumentMetadataToolParamsTyp
 
             // Ensure the action is valid and convert to string
             const action = String(args.action);
-            if (!['get', 'edit', 'list', 'getFieldOptions', 'create'].includes(action)) {
+            if (!['get', 'edit', 'getFieldOptions', 'create'].includes(action)) {
                 return [
                     {
                         type: 'text',
-                        text: 'Error: Invalid action. Valid actions are "get", "edit", "list", "getFieldOptions", or "create".',
+                        text: 'Error: Invalid action. Valid actions are "get", "edit", "getFieldOptions", or "create".',
                     },
                 ];
             }
@@ -386,10 +381,6 @@ export class DocumentMetadataTool extends BaseTool<DocumentMetadataToolParamsTyp
                     }
                 }
 
-                case 'list': {
-                    this._docManager.listDocs();
-                }
-
                 case 'getFieldOptions': {
                     // Get all available field options with metadata
                     const fieldOptions = this._docManager.getAllFieldMetadata();
@@ -457,7 +448,7 @@ ${JSON.stringify(createdMetadata, null, 2)}`,
                     return [
                         {
                             type: 'text',
-                            text: 'Error: Unknown action. Valid actions are "get", "edit", "list", "getFieldOptions", or "create".',
+                            text: 'Error: Unknown action. Valid actions are "get", "edit", "getFieldOptions", or "create".',
                         },
                     ];
             }
@@ -537,11 +528,6 @@ ${JSON.stringify(createdMetadata, null, 2)}`,
             return true;
         }
 
-        // list action doesn't require any additional parameters
-        if (params.action === 'list') {
-            return true;
-        }
-
         return true;
     }
 
@@ -552,7 +538,7 @@ ${JSON.stringify(createdMetadata, null, 2)}`,
      */
     private getParameterRequirementsByAction(action?: string): string {
         if (!action) {
-            return 'Please specify an action: "get", "edit", "list", "getFieldOptions", or "create".';
+            return 'Please specify an action: "get", "edit", "getFieldOptions", or "create".';
         }
 
         switch (action.toLowerCase()) {
@@ -560,14 +546,12 @@ ${JSON.stringify(createdMetadata, null, 2)}`,
                 return 'The "get" action accepts an optional documentId parameter.';
             case 'edit':
                 return 'The "edit" action requires documentId and fieldEdits parameters. fieldEdits must be a JSON array of field edits.';
-            case 'list':
-                return 'The "list" action does not require any additional parameters.';
             case 'getFieldOptions':
                 return 'The "getFieldOptions" action does not require any additional parameters. It returns metadata about all available document fields.';
             case 'create':
                 return 'The "create" action requires title, data, and doc_type parameters.';
             default:
-                return `Unknown action "${action}". Valid actions are "get", "edit", "list", "getFieldOptions", or "create".`;
+                return `Unknown action "${action}". Valid actions are "get", "edit", "getFieldOptions", or "create".`;
         }
     }
 }
diff --git a/src/client/views/nodes/chatbot/tools/RAGTool.ts b/src/client/views/nodes/chatbot/tools/RAGTool.ts
index ef374ed22..90b803d21 100644
--- a/src/client/views/nodes/chatbot/tools/RAGTool.ts
+++ b/src/client/views/nodes/chatbot/tools/RAGTool.ts
@@ -3,6 +3,7 @@ import { Observation, RAGChunk } from '../types/types';
 import { ParametersType, ToolInfo } from '../types/tool_types';
 import { Vectorstore } from '../vectorstore/Vectorstore';
 import { BaseTool } from './BaseTool';
+import { DocumentMetadataTool } from './DocumentMetadataTool';
 
 const ragToolParams = [
     {
@@ -17,7 +18,7 @@ type RAGToolParamsType = typeof ragToolParams;
 
 const ragToolInfo: ToolInfo<RAGToolParamsType> = {
     name: 'rag',
-    description: 'Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a set of document chunks (text or images) to provide a grounded response based on user documents.',
+    description: `Performs a RAG (Retrieval-Augmented Generation) search on user documents (only PDF, audio, and video are supported—for information about other document types, use the ${DocumentMetadataTool.name} tool) and returns a set of document chunks (text or images) to provide a grounded response based on user documents.`,
     citationRules: `When using the RAG tool, the structure must adhere to the format described in the ReAct prompt. Below are additional guidelines specifically for RAG-based responses:
 
             1. **Grounded Text Guidelines**:
@@ -75,7 +76,7 @@ export class RAGTool extends BaseTool<RAGToolParamsType> {
 
     async getFormattedChunks(relevantChunks: RAGChunk[]): Promise<Observation[]> {
         try {
-            const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks }) as { formattedChunks: Observation[]}
+            const { formattedChunks } = (await Networking.PostToServer('/formatChunks', { relevantChunks })) as { formattedChunks: Observation[] };
 
             if (!formattedChunks) {
                 throw new Error('Failed to format chunks');
diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
index 14cffcb70..c8a6bb16b 100644
--- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -1,4 +1,4 @@
-import { action, makeObservable, observable, ObservableMap, reaction, runInAction } from 'mobx';
+import { action, computed, makeObservable, observable, ObservableMap, reaction, runInAction } from 'mobx';
 import { observer } from 'mobx-react';
 import { v4 as uuidv4 } from 'uuid';
 import { Doc, StrListCast } from '../../../../../fields/Doc';
@@ -31,7 +31,7 @@ export class AgentDocumentManager {
     private chatBox: ChatBox;
     private chatBoxDocument: Doc | null = null;
     private fieldMetadata: Record<string, any> = {};
-    private readonly DOCUMENT_ID_FIELD = '_dash_document_id';
+    @observable private documentIdsFromChunkIds: ObservableMap<string, string>;
 
     /**
      * Creates a new DocumentManager
@@ -40,8 +40,17 @@ export class AgentDocumentManager {
     constructor(chatBox: ChatBox) {
         makeObservable(this);
         const agentDoc = DocCast(chatBox.Document.agentDocument) ?? new Doc();
+        const chunkIds = DocCast(agentDoc.chunkIds) ?? new Doc();
+
         agentDoc.title = chatBox.Document.title + '_agentDocument';
+        chunkIds.title = '_chunkIds';
         chatBox.Document.agentDocument = agentDoc;
+        DocCast(chatBox.Document.agentDocument)!.chunkIds = chunkIds;
+        this.documentIdsFromChunkIds = StrListCast(chunkIds.mapping).reduce((mapping, content) => {
+            const [chunkId, docId] = content.split(':');
+            mapping.set(chunkId, docId);
+            return mapping;
+        }, new ObservableMap<string, string>());
         this.documentsById = StrListCast(agentDoc.mapping).reduce((mapping, content) => {
             const [id, layoutId, docId] = content.split(':');
             const layoutDoc = DocServer.GetCachedRefField(layoutId);
@@ -66,6 +75,19 @@ export class AgentDocumentManager {
             }
             //{ fireImmediately: true }
         );
+        reaction(
+            () => this.documentIdsFromChunkIds.values(),
+            () => {
+                if (this.chatBoxDocument && DocCast(this.chatBoxDocument.agentDocument)) {
+                    // Store the mapping with chunkId:docId format for consistency
+                    const chunkIdsDoc = DocCast(DocCast(this.chatBoxDocument.agentDocument)!.chunkIds);
+                    if (chunkIdsDoc) {
+                        chunkIdsDoc.mapping = new List<string>(Array.from(this.documentIdsFromChunkIds.entries()).map(([chunkId, docId]) => `${chunkId}:${docId}`));
+                    }
+                }
+            }
+            //{ fireImmediately: true }
+        );
         this.processDocument(this.chatBoxDocument);
         this.initializeFieldMetadata();
     }
@@ -120,7 +142,7 @@ export class AgentDocumentManager {
         try {
             // Use the LinkManager approach which is proven to work in ChatBox
             if (this.chatBoxDocument) {
-                console.log('Finding documents linked to ChatBox document with ID:', this.chatBoxDocument.id);
+                console.log('Finding documents linked to ChatBox document with ID:', this.chatBoxDocument[Id]);
 
                 // Get directly linked documents via LinkManager
                 const linkedDocs = LinkManager.Instance.getAllRelatedLinks(this.chatBoxDocument)
@@ -134,57 +156,10 @@ export class AgentDocumentManager {
                 linkedDocs.forEach((doc: Doc | undefined) => {
                     if (doc) {
                         this.processDocument(doc);
-                        console.log('Processed linked document:', doc.id, doc.title, doc.type);
+                        console.log('Processed linked document:', doc[Id], doc.title, doc.type);
                     }
                 });
-
-                // Include the ChatBox document itself
-                this.processDocument(this.chatBoxDocument);
-
-                // If we have access to the Document's parent, try to find sibling documents
-                if (this.chatBoxDocument.parent) {
-                    const parent = this.chatBoxDocument.parent;
-                    console.log('Found parent document, checking for siblings');
-
-                    // Check if parent is a Doc type and has a childDocs function
-                    if (parent && typeof parent === 'object' && 'childDocs' in parent && typeof parent.childDocs === 'function') {
-                        try {
-                            const siblingDocs = parent.childDocs();
-                            if (Array.isArray(siblingDocs)) {
-                                console.log(`Found ${siblingDocs.length} sibling documents via parent.childDocs()`);
-                                siblingDocs.forEach((doc: Doc) => {
-                                    if (doc) {
-                                        this.processDocument(doc);
-                                    }
-                                });
-                            }
-                        } catch (e) {
-                            console.warn('Error accessing parent.childDocs:', e);
-                        }
-                    }
-                }
-            } else if (this.chatBox && this.chatBox.linkedDocs) {
-                // If we have direct access to the linkedDocs computed property from ChatBox
-                console.log('Using ChatBox.linkedDocs directly');
-                const linkedDocs = this.chatBox.linkedDocs;
-                if (Array.isArray(linkedDocs)) {
-                    console.log(`Found ${linkedDocs.length} documents via ChatBox.linkedDocs`);
-                    linkedDocs.forEach((doc: Doc) => {
-                        if (doc) {
-                            this.processDocument(doc);
-                        }
-                    });
-                }
-
-                // Process the ChatBox document if available
-                if (this.chatBox.Document) {
-                    this.processDocument(this.chatBox.Document);
-                }
-            } else {
-                console.warn('No ChatBox document reference available for finding linked documents');
             }
-
-            console.log(`DocumentMetadataTool found ${this.documentsById.size} total documents`);
         } catch (error) {
             console.error('Error finding documents in Freeform view:', error);
         }
@@ -201,6 +176,7 @@ export class AgentDocumentManager {
         // Only add if we haven't already processed this document
         if (!this.documentsById.has(docId)) {
             this.documentsById.set(docId, { layoutDoc: doc, dataDoc: doc[DocData] });
+            console.log('Added document to documentsById:', doc[Id], docId, doc[Id], doc[DocData][Id]);
         }
         return docId;
     }
@@ -213,37 +189,12 @@ export class AgentDocumentManager {
     private ensureDocumentId(doc: Doc): string {
         let docId: string | undefined;
 
-        // First try to get the ID from our custom field
-        if (doc[this.DOCUMENT_ID_FIELD]) {
-            docId = String(doc[this.DOCUMENT_ID_FIELD]);
-            return docId;
-        }
-
-        // Try different ways to get a document ID
-
         // 1. Try the direct id property if it exists
-        if (doc.id && typeof doc.id === 'string') {
-            docId = doc.id;
-        }
-        // 2. Try doc._id if it exists
-        else if (doc._id && typeof doc._id === 'string') {
-            docId = doc._id;
-        }
-        // 3. Try doc.data?.id if it exists
-        else if (doc.data && typeof doc.data === 'object' && 'id' in doc.data && typeof doc.data.id === 'string') {
-            docId = doc.data.id;
-        }
-        // 4. If none of the above work, generate a UUID
-        else {
-            docId = uuidv4();
-            console.log(`Generated new UUID for document with title: ${doc.title || 'Untitled'}`);
-        }
-
-        // Store the ID in the document's metadata so it persists
-        try {
-            doc[this.DOCUMENT_ID_FIELD] = docId;
-        } catch (e) {
-            console.warn(`Could not assign ID to document property`, e);
+        if (doc[Id]) {
+            console.log('Found document ID (normal):', doc[Id]);
+            docId = doc[Id];
+        } else {
+            throw new Error('No document ID found');
         }
 
         return docId;
@@ -256,13 +207,13 @@ export class AgentDocumentManager {
      */
     public extractDocumentMetadata(id: string) {
         if (!id) return null;
-        const doc = this.documentsById.get(id);
-        if (!doc) return null;
-        const layoutDoc = doc.layoutDoc;
-        const dataDoc = doc.dataDoc;
+        const agentDoc = this.documentsById.get(id);
+        if (!agentDoc) return null;
+        const layoutDoc = agentDoc.layoutDoc;
+        const dataDoc = agentDoc.dataDoc;
 
         const metadata: Record<string, any> = {
-            id: layoutDoc.dash_document_id || layoutDoc.id || '',
+            id: layoutDoc[Id] || dataDoc[Id] || '',
             title: layoutDoc.title || '',
             type: layoutDoc.type || '',
             fields: {
@@ -355,7 +306,7 @@ export class AgentDocumentManager {
         if (value instanceof Doc) {
             return {
                 type: 'Doc',
-                id: value.id || this.ensureDocumentId(value),
+                id: value[Id] || this.ensureDocumentId(value),
                 title: value.title || '',
                 docType: value.type || '',
             };
@@ -1011,33 +962,17 @@ export class AgentDocumentManager {
      * Returns a list of all document IDs in the manager.
      * @returns An array of document IDs (strings).
      */
-    public listDocs(): string[] {
-        return Array.from(this.documentsById.keys());
+    @computed
+    public get listDocs(): string[] {
+        console.log(
+            Array.from(this.documentsById.entries()).map(([id, agentDoc]) => JSON.stringify({ id, title: agentDoc.layoutDoc.title, type: agentDoc.layoutDoc.type, summary: agentDoc.layoutDoc.summary || 'No summary available for this document.' }))
+        );
+        return Array.from(this.documentsById.entries()).map(([id, agentDoc]) => JSON.stringify({ id, title: agentDoc.layoutDoc.title, type: agentDoc.layoutDoc.type, summary: agentDoc.layoutDoc.summary || 'No summary available for this document.' }));
     }
 
-    /**
-     * Adds a document with a custom ID to the manager
-     * @param doc The document to add
-     * @param customId The custom ID to assign to the document
-     * @returns The customId that was assigned
-     */
-    @action
-    public addCustomId(doc: Doc, customId: string): string {
-        if (!doc) {
-            console.error('Cannot add null document with custom ID');
-            return '';
-        }
-
-        // Set the custom ID in the document's metadata
-        doc[this.DOCUMENT_ID_FIELD] = customId;
-
-        // Store the document in our map
-        this.documentsById.set(customId, {
-            layoutDoc: doc,
-            dataDoc: doc,
-        });
-
-        return customId;
+    @computed
+    public get docIds(): string[] {
+        return Array.from(this.documentsById.keys());
     }
 
     /**
@@ -1078,11 +1013,8 @@ export class AgentDocumentManager {
             // Ensure each chunk ID can be linked back to its parent document
             // Store a mapping from chunk ID to parent document ID
             // This allows us to easily find a document by any of its chunk IDs
-            if (!this.documentsById.has(chunkId)) {
-                this.documentsById.set(chunkId, {
-                    layoutDoc: doc,
-                    dataDoc: docInfo.dataDoc,
-                });
+            if (!this.documentIdsFromChunkIds.has(chunkId) && doc) {
+                this.documentIdsFromChunkIds.set(chunkId, doc[Id]);
             }
         }
     }
@@ -1092,11 +1024,25 @@ export class AgentDocumentManager {
      * @param chunkId The chunk ID to look up
      * @returns The parent document ID if found
      */
-    public getDocIdByChunkId(chunkId: string): string | undefined {
-        const docInfo = this.documentsById.get(chunkId);
+    public getDocByChunkId(chunkId: string): Doc | undefined {
+        // First, look up the document ID using the chunk ID mapping
+        const docId = this.documentIdsFromChunkIds.get(chunkId);
+        console.log('this.documentIdsFromChunkIds', this.documentIdsFromChunkIds);
+        console.log('docId', docId);
+        if (!docId) {
+            if (this.documentsById.has(chunkId)) {
+                return this.documentsById.get(chunkId)?.layoutDoc;
+            } else {
+                console.error('No document found for chunkId and docId', chunkId);
+                return undefined;
+            }
+        }
+        // Then get the document using the document ID
+        const docInfo = this.documentsById.get(docId);
         if (docInfo) {
-            return docInfo.layoutDoc[this.DOCUMENT_ID_FIELD] as string;
+            return docInfo.layoutDoc;
         }
+        console.error('No document found for docId', docId);
         return undefined;
     }
 
@@ -1157,7 +1103,7 @@ export class AgentDocumentManager {
                 return baseChunk;
             }
         });
-
+        console.log('simplifiedChunks', simplifiedChunks);
         // Update the document with all simplified chunks at once
         doc.chunk_simpl = JSON.stringify({ chunks: simplifiedChunks });
 
@@ -1165,32 +1111,25 @@ export class AgentDocumentManager {
     }
 
     /**
-     * Gets the simplified chunks from a document
-     * @param doc The document to get simplified chunks from
-     * @returns Array of simplified chunks or empty array if none exist
+     * Gets a specific simplified chunk by ID
+     * @param doc The document containing chunks
+     * @param chunkId The ID of the chunk to retrieve
+     * @returns The simplified chunk if found, undefined otherwise
      */
-    public getSimplifiedChunks(doc: Doc): any[] {
+    public getSimplifiedChunkById(doc: Doc, chunkId: string): any | undefined {
+        let chunks: any[] = [];
         if (!doc || !doc.chunk_simpl) {
+            chunks = [];
+            console.warn('No chunk found for chunkId', chunkId, '. Checking if document exists in documentsById.');
             return [];
         }
-
         try {
             const parsed = JSON.parse(StrCast(doc.chunk_simpl));
-            return parsed.chunks || [];
+            chunks = parsed.chunks || [];
         } catch (e) {
             console.error('Error parsing simplified chunks:', e);
             return [];
         }
-    }
-
-    /**
-     * Gets a specific simplified chunk by ID
-     * @param doc The document containing chunks
-     * @param chunkId The ID of the chunk to retrieve
-     * @returns The simplified chunk if found, undefined otherwise
-     */
-    public getSimplifiedChunkById(doc: Doc, chunkId: string): any | undefined {
-        const chunks = this.getSimplifiedChunks(doc);
         return chunks.find(chunk => chunk.chunkId === chunkId);
     }
 
diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
index 3df1294e9..1349df483 100644
--- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
@@ -16,6 +16,7 @@ import { AI_Document, CHUNK_TYPE, RAGChunk } from '../types/types';
 import OpenAI from 'openai';
 import { Embedding } from 'openai/resources';
 import { AgentDocumentManager } from '../utils/AgentDocumentManager';
+import { Id } from '../../../../../fields/FieldSymbols';
 
 dotenv.config();
 
@@ -24,13 +25,12 @@ dotenv.config();
  * and OpenAI text-embedding-3-large for text embedding. It handles AI document management, uploads, and query-based retrieval.
  */
 export class Vectorstore {
-    private pinecone: Pinecone; // Pinecone client for managing the vector index.
+    private pinecone!: Pinecone; // Pinecone client for managing the vector index.
     private index!: Index; // The specific Pinecone index used for document chunks.
-    private openai: OpenAI; // OpenAI client for generating embeddings.
+    private openai!: OpenAI; // OpenAI client for generating embeddings.
     private indexName: string = 'pdf-chatbot'; // Default name for the index.
-    private _id: string; // Unique ID for the Vectorstore instance.
-    private docManager: AgentDocumentManager; // Document manager for handling documents
-
+    private _id!: string; // Unique ID for the Vectorstore instance.
+    private docManager!: AgentDocumentManager; // Document manager for handling documents
     documents: AI_Document[] = []; // Store the documents indexed in the vectorstore.
 
     /**
@@ -143,10 +143,8 @@ export class Vectorstore {
                     progressCallback(85, 'Embeddings generated. Finalizing document...');
 
                     doc.original_segments = JSON.stringify(typedResponse.full);
-                    const doc_id = uuidv4();
-
-                    // Register the document with the AgentDocumentManager
-                    this.docManager.addCustomId(doc, doc_id);
+                    const doc_id = doc[Id];
+                    console.log('doc_id in vectorstore', doc_id);
 
                     // Generate chunk IDs upfront so we can register them
                     const chunkIds = segmentedTranscript.map(() => uuidv4());
@@ -191,7 +189,7 @@ export class Vectorstore {
             } else {
                 // Process regular document
                 console.log('Processing regular document...');
-                const createDocumentResponse = await Networking.PostToServer('/createDocument', { file_path: local_file_path });
+                const createDocumentResponse = await Networking.PostToServer('/createDocument', { file_path: local_file_path, doc_id: doc[Id] });
 
                 // Type assertion for the response
                 const { jobId } = createDocumentResponse as { jobId: string };
@@ -211,12 +209,13 @@ export class Vectorstore {
                     }
                 }
 
-                // Register the document with the AgentDocumentManager
-                this.docManager.addCustomId(doc, result.doc_id);
-
                 // Collect all chunk IDs
                 const chunkIds = result.chunks.map(chunk => chunk.id);
 
+                if (result.doc_id !== doc[Id]) {
+                    console.log('doc_id in vectorstore', result.doc_id, 'does not match doc_id in doc', doc[Id]);
+                }
+
                 // Register chunks with the document manager
                 this.docManager.registerChunkIds(result.doc_id, chunkIds);
 
@@ -319,16 +318,14 @@ export class Vectorstore {
 
             const queryEmbedding = queryEmbeddingResponse.data[0].embedding;
 
-            // Get document IDs from the AgentDocumentManager
-            const docIds = Array.from(this.docManager.listDocs());
-            console.log('Using document IDs for retrieval:', docIds);
+            console.log('Using document IDs for retrieval:', this.docManager.docIds);
 
             // Query the Pinecone index using the embedding and filter by document IDs.
             // We'll query based on document IDs that are registered in the document manager
             const queryResponse: QueryResponse = await this.index.query({
                 vector: queryEmbedding,
                 filter: {
-                    doc_id: { $in: docIds },
+                    doc_id: { $in: this.docManager.docIds },
                 },
                 topK,
                 includeValues: true,
@@ -356,7 +353,7 @@ export class Vectorstore {
 
                 // Ensure the document manager knows about this chunk
                 // This is important for maintaining backwards compatibility
-                if (chunk.id && !this.docManager.getDocIdByChunkId(chunk.id)) {
+                if (chunk.id && !this.docManager.getDocByChunkId(chunk.id)) {
                     // If the chunk ID isn't registered but we have a doc_id in metadata
                     if (chunk.metadata.doc_id && this.docManager.has(chunk.metadata.doc_id)) {
                         // Register the chunk with its parent document
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index 6d2779163..378f14094 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -559,7 +559,7 @@ export default class AssistantManager extends ApiManager {
             method: Method.POST,
             subscription: '/createDocument',
             secureHandler: async ({ req, res }) => {
-                const { file_path } = req.body;
+                const { file_path, doc_id } = req.body;
                 const public_path = path.join(publicDirectory, file_path); // Resolve the file path in the public directory
                 const file_name = path.basename(file_path); // Extract the file name from the path
 
@@ -572,7 +572,7 @@ export default class AssistantManager extends ApiManager {
 
                     // Spawn the Python process and track its progress/output
                     // eslint-disable-next-line no-use-before-define
-                    spawnPythonProcess(jobId, public_path);
+                    spawnPythonProcess(jobId, public_path, doc_id);
 
                     // Send the job ID back to the client for tracking
                     res.send({ jobId });
@@ -850,7 +850,7 @@ export default class AssistantManager extends ApiManager {
  * @param file_name The name of the file to process.
  * @param file_path The filepath of the file to process.
  */
-function spawnPythonProcess(jobId: string, file_path: string) {
+function spawnPythonProcess(jobId: string, file_path: string, doc_id: string) {
     const venvPath = path.join(__dirname, '../chunker/venv');
     const requirementsPath = path.join(__dirname, '../chunker/requirements.txt');
     const pythonScriptPath = path.join(__dirname, '../chunker/pdf_chunker.py');
@@ -860,7 +860,7 @@ function spawnPythonProcess(jobId: string, file_path: string) {
     function runPythonScript() {
         const pythonPath = process.platform === 'win32' ? path.join(venvPath, 'Scripts', 'python') : path.join(venvPath, 'bin', 'python3');
 
-        const pythonProcess = spawn(pythonPath, [pythonScriptPath, jobId, file_path, outputDirectory]);
+        const pythonProcess = spawn(pythonPath, [pythonScriptPath, jobId, file_path, outputDirectory, doc_id]);
 
         let pythonOutput = '';
         let stderrOutput = '';
diff --git a/src/server/chunker/pdf_chunker.py b/src/server/chunker/pdf_chunker.py
index e9b9ef2b3..e34753176 100644
--- a/src/server/chunker/pdf_chunker.py
+++ b/src/server/chunker/pdf_chunker.py
@@ -622,7 +622,7 @@ class Document:
     Represents a document being processed, such as a PDF, handling chunking, embedding, and summarization.
     """
 
-    def __init__(self, file_path: str, file_name: str, job_id: str, output_folder: str):
+    def __init__(self, file_path: str, file_name: str, job_id: str, output_folder: str, doc_id: str):
         """
         Initialize the Document with file data, file name, and job ID.
 
@@ -635,7 +635,7 @@ class Document:
         self.file_path = file_path
         self.job_id = job_id
         self.type = self._get_document_type(file_name)  # Determine the document type (PDF, CSV, etc.)
-        self.doc_id = job_id  # Use the job ID as the document ID
+        self.doc_id = doc_id  # Use the job ID as the document ID
         self.chunks = []  # List to hold text and visual chunks
         self.num_pages = 0  # Number of pages in the document (if applicable)
         self.summary = ""  # The generated summary for the document
@@ -755,7 +755,7 @@ class Document:
             "doc_id": self.doc_id
         }, indent=2)  # Convert the document's attributes to JSON format
 
-def process_document(file_path, job_id, output_folder):
+def process_document(file_path, job_id, output_folder, doc_id):
     """
     Top-level function to process a document and return the JSON output.
 
@@ -763,26 +763,27 @@ def process_document(file_path, job_id, output_folder):
     :param job_id: The job ID for this document processing task.
     :return: The processed document's data in JSON format.
     """
-    new_document = Document(file_path, file_path, job_id, output_folder)
+    new_document = Document(file_path, file_path, job_id, output_folder, doc_id)
     return new_document.to_json()
 
 def main():
     """
     Main entry point for the script, called with arguments from Node.js.
     """
-    if len(sys.argv) != 4:
+    if len(sys.argv) != 5:
         print(json.dumps({"error": "Invalid arguments"}), file=sys.stderr)
         return
 
     job_id = sys.argv[1]
     file_path = sys.argv[2]
     output_folder = sys.argv[3]  # Get the output folder from arguments
+    doc_id = sys.argv[4]
 
     try:
         os.makedirs(output_folder, exist_ok=True)
         
         # Process the document
-        document_result = process_document(file_path, job_id, output_folder)  # Pass output_folder
+        document_result = process_document(file_path, job_id, output_folder,doc_id)  # Pass output_folder
 
         # Output the final result as JSON to stdout
         print(document_result)
-- 
cgit v1.2.3-70-g09d2


From e141307dbd9b951f76c908610e7b89e296ad92b8 Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Sun, 11 May 2025 17:18:18 -0400
Subject: chanegd everything to be more consistent

- made both web related tools use doc manager and chunk Ids
---
 .../views/nodes/chatbot/agentsystem/Agent.ts       |   9 +-
 .../nodes/chatbot/chatboxcomponents/ChatBox.tsx    |  50 ++----
 .../nodes/chatbot/tools/DocumentMetadataTool.ts    |   2 +-
 src/client/views/nodes/chatbot/tools/SearchTool.ts |  26 +--
 .../nodes/chatbot/tools/WebsiteInfoScraperTool.ts  |  30 ++--
 .../views/nodes/chatbot/tools/WikipediaTool.ts     |   2 +-
 src/client/views/nodes/chatbot/types/types.ts      |   1 +
 .../nodes/chatbot/utils/AgentDocumentManager.ts    | 192 +++++----------------
 .../views/nodes/chatbot/vectorstore/Vectorstore.ts |  21 +--
 src/server/ApiManagers/AssistantManager.ts         | 160 ++++++++++++-----
 10 files changed, 215 insertions(+), 278 deletions(-)

(limited to 'src/client/views/nodes/chatbot/utils')

diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
index 24471bf5b..86d40864e 100644
--- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts
+++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
@@ -63,10 +63,8 @@ export class Agent {
      */
     constructor(
         _vectorstore: Vectorstore,
-        summaries: () => string,
         history: () => string,
         csvData: () => { filename: string; id: string; text: string }[],
-        getLinkedUrlDocId: (url: string) => string[],
         createImage: (result: Upload.FileInformation & Upload.InspectionResults, options: DocumentOptions) => void,
         createCSVInDash: (url: string, title: string, id: string, data: string) => void,
         docManager: AgentDocumentManager
@@ -83,7 +81,7 @@ export class Agent {
             calculate: new CalculateTool(),
             rag: new RAGTool(this.vectorstore),
             dataAnalysis: new DataAnalysisTool(csvData),
-            websiteInfoScraper: new WebsiteInfoScraperTool(getLinkedUrlDocId),
+            websiteInfoScraper: new WebsiteInfoScraperTool(this._docManager),
             searchTool: new SearchTool(this._docManager),
             noTool: new NoTool(),
             //imageCreationTool: new ImageCreationTool(createImage),
@@ -125,11 +123,8 @@ export class Agent {
         // Retrieve chat history and generate system prompt
         const chatHistory = this._history();
         // Get document summaries directly from document manager
-        const documentSummaries = this._docManager.getAllDocumentSummaries();
-        // Create a function that returns document summaries for the prompt
-        const getSummaries = () => documentSummaries;
         // Generate the system prompt with the summaries
-        const systemPrompt = getReactPrompt(Object.values(this.tools), getSummaries, chatHistory);
+        const systemPrompt = getReactPrompt(Object.values(this.tools), () => JSON.stringify(this._docManager.listDocs), chatHistory);
 
         // Initialize intermediate messages
         this.interMessages = [{ role: 'system', content: systemPrompt }];
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index 6349e554e..867e78860 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -121,16 +121,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         this.vectorstore = new Vectorstore(this.vectorstore_id, this.docManager);
 
         // Create an agent with the vectorstore
-        this.agent = new Agent(
-            this.vectorstore,
-            this.retrieveSummaries.bind(this),
-            this.retrieveFormattedHistory.bind(this),
-            this.retrieveCSVData.bind(this),
-            this.retrieveDocIds.bind(this),
-            this.createImageInDash.bind(this),
-            this.createCSVInDash.bind(this),
-            this.docManager
-        );
+        this.agent = new Agent(this.vectorstore, this.retrieveFormattedHistory.bind(this), this.retrieveCSVData.bind(this), this.createImageInDash.bind(this), this.createCSVInDash.bind(this), this.docManager);
 
         // Add event listeners
         this.addScrollListener();
@@ -228,6 +219,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         }
     };
 
+    //TODO: Update for new chunk_simpl on agentDocument
     /**
      * Adds a CSV file for analysis by sending it to OpenAI and generating a summary.
      * @param newLinkedDoc The linked document representing the CSV file.
@@ -650,18 +642,15 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                 citation: JSON.stringify(citation, null, 2),
             });
 
-            // First try to find the document using the document manager's chunk ID lookup
-            const doc: Doc | undefined = this.docManager.getDocByChunkId(chunkId);
-            if (!doc) {
-                console.warn(`Document not found for citation with chunk_id: ${chunkId}`);
-                return;
-            }
-
             // Get the simplified chunk using the document manager
-            const foundChunk = this.docManager.getSimplifiedChunkById(doc, chunkId);
+            const { foundChunk, doc } = this.docManager.getSimplifiedChunkById(chunkId);
             if (!foundChunk) {
-                console.warn(`Chunk not found in document for chunk ID: ${chunkId}`);
-                DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
+                if (doc) {
+                    console.warn(`Chunk not found in document, ${doc.id}, for chunk ID: ${chunkId}`);
+                    DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
+                } else {
+                    console.warn(`Chunk not found for chunk ID: ${chunkId}`);
+                }
                 return;
             }
 
@@ -678,6 +667,10 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
             } else if (foundChunk.chunkType === CHUNK_TYPE.TABLE || foundChunk.chunkType === CHUNK_TYPE.IMAGE) {
                 this.handleOtherChunkTypes(foundChunk, citation, doc);
             } else {
+                if (doc.type === 'web') {
+                    DocumentManager.Instance.showDocument(doc, { openLocation: OpenWhere.addRight }, () => {});
+                    return;
+                }
                 // Show the chunk text in citation popup
                 let chunkText = citation.direct_text || 'Text content not available';
                 this.showCitationPopup(chunkText);
@@ -986,16 +979,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         return Array.from(this.docManager.listDocs);
     }
 
-    /**
-     * Getter that retrieves summaries of all linked documents.
-     */
-    @computed
-    get summaries(): string {
-        // Use the document manager to get all summaries
-        console.log(this.docManager.listDocs);
-        return JSON.stringify(this.docManager.listDocs);
-    }
-
     /**
      * Getter that retrieves all linked CSV files for analysis.
      */
@@ -1022,7 +1005,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     // Other helper methods for retrieving document data and processing
 
     retrieveSummaries = (): string => {
-        return this.docManager.getAllDocumentSummaries();
+        console.log(this.docManager.listDocs);
+        return JSON.stringify(this.docManager.listDocs);
     };
 
     retrieveCSVData = () => {
@@ -1033,10 +1017,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         return this.formattedHistory;
     };
 
-    retrieveDocIds = (): string[] => {
-        return Array.from(this.docManager.docIds);
-    };
-
     /**
      * Handles follow-up questions when the user clicks on them.
      * Automatically sets the input value to the clicked follow-up question.
diff --git a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts
index 5297292bf..405949c1e 100644
--- a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts
+++ b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts
@@ -408,7 +408,7 @@ export class DocumentMetadataTool extends BaseTool<DocumentMetadataToolParamsTyp
                     const title = String(args.title);
                     const data = String(args.data);
 
-                    const id = this._docManager.createDocInDash(docType, data, { title: title });
+                    const id = await this._docManager.createDocInDash(docType, data, { title: title });
 
                     if (!id) {
                         return [
diff --git a/src/client/views/nodes/chatbot/tools/SearchTool.ts b/src/client/views/nodes/chatbot/tools/SearchTool.ts
index 53f5fc109..43f14ea83 100644
--- a/src/client/views/nodes/chatbot/tools/SearchTool.ts
+++ b/src/client/views/nodes/chatbot/tools/SearchTool.ts
@@ -48,19 +48,21 @@ export class SearchTool extends BaseTool<SearchToolParamsType> {
                     query,
                     max_results: this._max_results,
                 })) as { results: { url: string; snippet: string }[] };
-                const data = results.map((result: { url: string; snippet: string }) => {
-                    // Create a web document with the URL
-                    const id = this._docManager.createDocInDash('web', result.url, {
-                        title: `Search Result: ${result.url}`,
-                        text_html: result.snippet,
-                        data_useCors: true,
-                    });
+                const data = await Promise.all(
+                    results.map(async (result: { url: string; snippet: string }) => {
+                        // Create a web document with the URL
+                        const id = await this._docManager.createDocInDash('web', result.url, {
+                            title: `Search Result: ${result.url}`,
+                            text_html: result.snippet,
+                            data_useCors: true,
+                        });
 
-                    return {
-                        type: 'text' as const,
-                        text: `<chunk chunk_id="${id}" chunk_type="url"><url>${result.url}</url><overview>${result.snippet}</overview></chunk>`,
-                    };
-                });
+                        return {
+                            type: 'text' as const,
+                            text: `<chunk chunk_id="${id}" chunk_type="url"><url>${result.url}</url><overview>${result.snippet}</overview></chunk>`,
+                        };
+                    })
+                );
                 return data;
             } catch (error) {
                 console.log(error);
diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
index 3c7b4e3db..495a985cb 100644
--- a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
+++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
@@ -3,12 +3,14 @@ import { Networking } from '../../../../Network';
 import { BaseTool } from './BaseTool';
 import { Observation } from '../types/types';
 import { ParametersType, ToolInfo } from '../types/tool_types';
-
+import { AgentDocumentManager } from '../utils/AgentDocumentManager';
+import { Doc } from '../../../../../fields/Doc';
+import { StrCast, WebCast } from '../../../../../fields/Types';
 const websiteInfoScraperToolParams = [
     {
-        name: 'urls',
+        name: 'chunk_ids',
         type: 'string[]',
-        description: 'The URLs of the websites to scrape',
+        description: 'The chunk_ids of the urls to scrape from the SearchTool.',
         required: true,
         max_inputs: 3,
     },
@@ -66,11 +68,11 @@ const websiteInfoScraperToolInfo: ToolInfo<WebsiteInfoScraperToolParamsType> = {
 };
 
 export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParamsType> {
-    private _getLinkedUrlDocId: (url: string) => string[];
+    private _docManager: AgentDocumentManager;
 
-    constructor(getLinkedUrlDocIds: (url: string) => string[]) {
+    constructor(docManager: AgentDocumentManager) {
         super(websiteInfoScraperToolInfo);
-        this._getLinkedUrlDocId = getLinkedUrlDocIds;
+        this._docManager = docManager;
     }
 
     /**
@@ -79,10 +81,13 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
      * @param maxRetries Maximum number of retry attempts
      * @returns The scraped content or error message
      */
-    private async scrapeWithRetry(url: string, maxRetries = 2): Promise<Observation> {
+    private async scrapeWithRetry(chunkDoc: Doc, maxRetries = 2): Promise<Observation> {
         let lastError = '';
         let retryCount = 0;
-
+        const url = WebCast(chunkDoc.data!)!.url.href;
+        console.log(url);
+        console.log(chunkDoc);
+        console.log(chunkDoc.data);
         // Validate URL format
         try {
             new URL(url); // This will throw if URL is invalid
@@ -110,7 +115,6 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
                 }
 
                 const { website_plain_text } = response as { website_plain_text: string };
-                const id = this._getLinkedUrlDocId(url);
 
                 // Validate content quality
                 if (!website_plain_text) {
@@ -126,7 +130,7 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
                     if (retryCount === maxRetries) {
                         return {
                             type: 'text',
-                            text: `<chunk chunk_id="${id}" chunk_type="url">\n${website_plain_text}\nNote: Limited content was retrieved from this URL.\n</chunk>`,
+                            text: `<chunk chunk_id="${chunkDoc.id}" chunk_type="url">\n${website_plain_text}\nNote: Limited content was retrieved from this URL.\n</chunk>`,
                         } as Observation;
                     }
 
@@ -138,7 +142,7 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
                 // Process and return content if it looks good
                 return {
                     type: 'text',
-                    text: `<chunk chunk_id="${id}" chunk_type="url">\n${website_plain_text}\n</chunk>`,
+                    text: `<chunk chunk_id="${chunkDoc.id}" chunk_type="url">\n${website_plain_text}\n</chunk>`,
                 } as Observation;
             } catch (error) {
                 lastError = error instanceof Error ? error.message : 'Unknown error';
@@ -156,10 +160,10 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
     }
 
     async execute(args: ParametersType<WebsiteInfoScraperToolParamsType>): Promise<Observation[]> {
-        const urls = args.urls;
+        const chunk_ids = args.chunk_ids;
 
         // Create an array of promises, each one handling a website scrape for a URL
-        const scrapingPromises = urls.map(url => this.scrapeWithRetry(url));
+        const scrapingPromises = chunk_ids.map(chunk_id => this.scrapeWithRetry(this._docManager.getDocument(chunk_id)!));
 
         // Wait for all scraping promises to resolve
         const results = await Promise.all(scrapingPromises);
diff --git a/src/client/views/nodes/chatbot/tools/WikipediaTool.ts b/src/client/views/nodes/chatbot/tools/WikipediaTool.ts
index ee815532a..ec5d83e52 100644
--- a/src/client/views/nodes/chatbot/tools/WikipediaTool.ts
+++ b/src/client/views/nodes/chatbot/tools/WikipediaTool.ts
@@ -32,7 +32,7 @@ export class WikipediaTool extends BaseTool<WikipediaToolParamsType> {
 
     async execute(args: ParametersType<WikipediaToolParamsType>): Promise<Observation[]> {
         try {
-            const { text } = await Networking.PostToServer('/getWikipediaSummary', { title: args.title });
+            const { text } = (await Networking.PostToServer('/getWikipediaSummary', { title: args.title })) as { text: string };
             const id = uuidv4();
             const url = `https://en.wikipedia.org/wiki/${args.title.replace(/ /g, '_')}`;
             this._addLinkedUrlDoc(url, id);
diff --git a/src/client/views/nodes/chatbot/types/types.ts b/src/client/views/nodes/chatbot/types/types.ts
index 90b5e7e11..0d1804b2d 100644
--- a/src/client/views/nodes/chatbot/types/types.ts
+++ b/src/client/views/nodes/chatbot/types/types.ts
@@ -101,6 +101,7 @@ export interface RAGChunk {
 
 export interface SimplifiedChunk {
     chunkId: string;
+    doc_id: string;
     startPage?: number;
     endPage?: number;
     location?: string;
diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
index c8a6bb16b..5a09b945b 100644
--- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -13,7 +13,7 @@ import { LinkManager, UPDATE_SERVER_CACHE } from '../../../../util/LinkManager';
 import { DocumentView } from '../../DocumentView';
 import { ChatBox, parsedDoc } from '../chatboxcomponents/ChatBox';
 import { supportedDocTypes } from '../types/tool_types';
-import { CHUNK_TYPE, RAGChunk } from '../types/types';
+import { CHUNK_TYPE, RAGChunk, SimplifiedChunk } from '../types/types';
 
 /**
  * Interface representing a document in the freeform view
@@ -31,7 +31,7 @@ export class AgentDocumentManager {
     private chatBox: ChatBox;
     private chatBoxDocument: Doc | null = null;
     private fieldMetadata: Record<string, any> = {};
-    @observable private documentIdsFromChunkIds: ObservableMap<string, string>;
+    @observable private simplifiedChunks: ObservableMap<string, SimplifiedChunk>;
 
     /**
      * Creates a new DocumentManager
@@ -40,17 +40,21 @@ export class AgentDocumentManager {
     constructor(chatBox: ChatBox) {
         makeObservable(this);
         const agentDoc = DocCast(chatBox.Document.agentDocument) ?? new Doc();
-        const chunkIds = DocCast(agentDoc.chunkIds) ?? new Doc();
+        const chunk_simpl = DocCast(agentDoc.chunk_simpl) ?? new Doc();
 
         agentDoc.title = chatBox.Document.title + '_agentDocument';
-        chunkIds.title = '_chunkIds';
+        chunk_simpl.title = '_chunk_simpl';
         chatBox.Document.agentDocument = agentDoc;
-        DocCast(chatBox.Document.agentDocument)!.chunkIds = chunkIds;
-        this.documentIdsFromChunkIds = StrListCast(chunkIds.mapping).reduce((mapping, content) => {
-            const [chunkId, docId] = content.split(':');
-            mapping.set(chunkId, docId);
+        DocCast(chatBox.Document.agentDocument)!.chunk_simpl = chunk_simpl;
+
+        this.simplifiedChunks = StrListCast(chunk_simpl.mapping).reduce((mapping, chunks) => {
+            StrListCast(chunks).forEach(chunk => {
+                const parsed = JSON.parse(StrCast(chunk));
+                mapping.set(parsed.chunkId, parsed);
+            });
             return mapping;
-        }, new ObservableMap<string, string>());
+        }, new ObservableMap<string, SimplifiedChunk>());
+
         this.documentsById = StrListCast(agentDoc.mapping).reduce((mapping, content) => {
             const [id, layoutId, docId] = content.split(':');
             const layoutDoc = DocServer.GetCachedRefField(layoutId);
@@ -76,14 +80,10 @@ export class AgentDocumentManager {
             //{ fireImmediately: true }
         );
         reaction(
-            () => this.documentIdsFromChunkIds.values(),
+            () => this.simplifiedChunks.values(),
             () => {
                 if (this.chatBoxDocument && DocCast(this.chatBoxDocument.agentDocument)) {
-                    // Store the mapping with chunkId:docId format for consistency
-                    const chunkIdsDoc = DocCast(DocCast(this.chatBoxDocument.agentDocument)!.chunkIds);
-                    if (chunkIdsDoc) {
-                        chunkIdsDoc.mapping = new List<string>(Array.from(this.documentIdsFromChunkIds.entries()).map(([chunkId, docId]) => `${chunkId}:${docId}`));
-                    }
+                    DocCast(DocCast(this.chatBoxDocument.agentDocument)!.chunk_simpl)!.mapping = new List<string>(Array.from(this.simplifiedChunks.values()).map(chunk => JSON.stringify(chunk)));
                 }
             }
             //{ fireImmediately: true }
@@ -831,7 +831,8 @@ export class AgentDocumentManager {
      * @param options Optional configuration options
      * @returns The ID of the created document
      */
-    public createDocInDash(docType: string, data: string, options?: any): string {
+
+    public async createDocInDash(docType: string, data: string, options?: any): Promise<string> {
         // Validate doc_type
         if (!this.isValidDocType(docType)) {
             throw new Error(`Invalid document type: ${docType}`);
@@ -877,14 +878,15 @@ export class AgentDocumentManager {
                         // Create link and add it to the document system
                         const linkDoc = Docs.Create.LinkDocument(this.chatBoxDocument, doc);
                         LinkManager.Instance.addLink(linkDoc);
-
-                        // Add document to view
-                        this.chatBox._props.addDocument?.(doc);
-
-                        // Show document - defer actual display to prevent immediate resource loading
-                        setTimeout(() => {
-                            DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
-                        }, 100);
+                        if (doc.type !== 'web') {
+                            // Add document to view
+                            this.chatBox._props.addDocument?.(doc);
+
+                            // Show document - defer actual display to prevent immediate resource loading
+                            setTimeout(() => {
+                                DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
+                            }, 100);
+                        }
                     }
                 });
 
@@ -985,67 +987,6 @@ export class AgentDocumentManager {
         return docInfo?.layoutDoc;
     }
 
-    /**
-     * Registers chunk IDs associated with a document in the manager
-     * @param docId The parent document ID
-     * @param chunkIds Array of chunk IDs associated with this document
-     */
-    @action
-    public registerChunkIds(docId: string, chunkIds: string[]): void {
-        // Get the document if it exists
-        const docInfo = this.documentsById.get(docId);
-        if (!docInfo) {
-            console.warn(`Cannot register chunks for unknown document ID: ${docId}`);
-            return;
-        }
-
-        // Store chunk IDs on the document for future reference
-        const doc = docInfo.layoutDoc;
-        if (!doc.chunk_ids) {
-            doc.chunk_ids = JSON.stringify(chunkIds);
-        } else {
-            // Merge with existing chunk IDs if they exist
-            const existingIds = JSON.parse(doc.chunk_ids as string);
-            const updatedIds = [...new Set([...existingIds, ...chunkIds])]; // Remove duplicates
-            doc.chunk_ids = JSON.stringify(updatedIds);
-        }
-        for (const chunkId of chunkIds) {
-            // Ensure each chunk ID can be linked back to its parent document
-            // Store a mapping from chunk ID to parent document ID
-            // This allows us to easily find a document by any of its chunk IDs
-            if (!this.documentIdsFromChunkIds.has(chunkId) && doc) {
-                this.documentIdsFromChunkIds.set(chunkId, doc[Id]);
-            }
-        }
-    }
-
-    /**
-     * Gets a document ID by a chunk ID
-     * @param chunkId The chunk ID to look up
-     * @returns The parent document ID if found
-     */
-    public getDocByChunkId(chunkId: string): Doc | undefined {
-        // First, look up the document ID using the chunk ID mapping
-        const docId = this.documentIdsFromChunkIds.get(chunkId);
-        console.log('this.documentIdsFromChunkIds', this.documentIdsFromChunkIds);
-        console.log('docId', docId);
-        if (!docId) {
-            if (this.documentsById.has(chunkId)) {
-                return this.documentsById.get(chunkId)?.layoutDoc;
-            } else {
-                console.error('No document found for chunkId and docId', chunkId);
-                return undefined;
-            }
-        }
-        // Then get the document using the document ID
-        const docInfo = this.documentsById.get(docId);
-        if (docInfo) {
-            return docInfo.layoutDoc;
-        }
-        console.error('No document found for docId', docId);
-        return undefined;
-    }
-
     /**
      * Adds simplified chunks to a document for citation handling
      * @param doc The document to add simplified chunks to
@@ -1053,21 +994,13 @@ export class AgentDocumentManager {
      * @param docType The type of document (e.g., 'pdf', 'video', 'audio', etc.)
      * @returns The updated document with simplified chunks
      */
-    public addSimplifiedChunks(doc: Doc, chunks: RAGChunk[], docType: string): Doc {
-        if (!doc) {
-            console.error('Cannot add simplified chunks to null document');
-            return doc;
-        }
-
-        // Initialize empty chunks array if not exists
-        if (!doc.chunk_simpl) {
-            doc.chunk_simpl = JSON.stringify({ chunks: [] });
-        }
-
+    @action
+    public addSimplifiedChunks(chunks: RAGChunk[], docType: string) {
+        console.log('chunks', chunks, 'simplifiedChunks', this.simplifiedChunks);
         // Create array of simplified chunks based on document type
-        const simplifiedChunks = chunks.map(chunk => {
+        for (const chunk of chunks) {
             // Common properties across all chunk types
-            const baseChunk = {
+            const baseChunk: SimplifiedChunk = {
                 chunkId: chunk.id,
                 text: chunk.metadata.text,
                 doc_id: chunk.metadata.doc_id,
@@ -1076,38 +1009,33 @@ export class AgentDocumentManager {
 
             // Add type-specific properties
             if (docType === 'video' || docType === 'audio') {
-                return {
+                this.simplifiedChunks.set(chunk.id, {
                     ...baseChunk,
                     start_time: chunk.metadata.start_time,
                     end_time: chunk.metadata.end_time,
                     indexes: chunk.metadata.indexes,
                     chunkType: docType === 'video' ? CHUNK_TYPE.VIDEO : CHUNK_TYPE.AUDIO,
-                };
+                } as SimplifiedChunk);
             } else if (docType === 'pdf') {
-                return {
+                this.simplifiedChunks.set(chunk.id, {
                     ...baseChunk,
                     startPage: chunk.metadata.start_page,
                     endPage: chunk.metadata.end_page,
                     location: chunk.metadata.location,
-                };
+                } as SimplifiedChunk);
             } else if (docType === 'csv') {
-                return {
+                this.simplifiedChunks.set(chunk.id, {
                     ...baseChunk,
                     rowStart: (chunk.metadata as any).row_start,
                     rowEnd: (chunk.metadata as any).row_end,
                     colStart: (chunk.metadata as any).col_start,
                     colEnd: (chunk.metadata as any).col_end,
-                };
+                } as SimplifiedChunk);
             } else {
                 // Default for other document types
-                return baseChunk;
+                this.simplifiedChunks.set(chunk.id, baseChunk as SimplifiedChunk);
             }
-        });
-        console.log('simplifiedChunks', simplifiedChunks);
-        // Update the document with all simplified chunks at once
-        doc.chunk_simpl = JSON.stringify({ chunks: simplifiedChunks });
-
-        return doc;
+        }
     }
 
     /**
@@ -1116,21 +1044,10 @@ export class AgentDocumentManager {
      * @param chunkId The ID of the chunk to retrieve
      * @returns The simplified chunk if found, undefined otherwise
      */
-    public getSimplifiedChunkById(doc: Doc, chunkId: string): any | undefined {
-        let chunks: any[] = [];
-        if (!doc || !doc.chunk_simpl) {
-            chunks = [];
-            console.warn('No chunk found for chunkId', chunkId, '. Checking if document exists in documentsById.');
-            return [];
-        }
-        try {
-            const parsed = JSON.parse(StrCast(doc.chunk_simpl));
-            chunks = parsed.chunks || [];
-        } catch (e) {
-            console.error('Error parsing simplified chunks:', e);
-            return [];
-        }
-        return chunks.find(chunk => chunk.chunkId === chunkId);
+    public getSimplifiedChunkById(chunkId: string): any | undefined {
+        console.log('chunkId', chunkId, 'simplifiedChunks', this.simplifiedChunks);
+        console.log('doc', this.getDocument(this.simplifiedChunks.get(chunkId)?.doc_id || ''));
+        return { foundChunk: this.simplifiedChunks.get(chunkId), doc: this.getDocument(this.simplifiedChunks.get(chunkId)?.doc_id || '') };
     }
 
     /**
@@ -1150,27 +1067,4 @@ export class AgentDocumentManager {
             return [];
         }
     }
-
-    /**
-     * Gets all document summaries combined into a single string
-     * @returns String containing all document summaries
-     */
-    public getAllDocumentSummaries(): string {
-        const summaries = Array.from(this.documentsById.keys())
-            .map(id => {
-                const doc = this.getDocument(id);
-                if (doc) {
-                    // Try to get summary from either the document or its data document
-                    const summary = doc.summary || (doc[DocData] && doc[DocData].summary);
-                    if (summary) {
-                        return StrCast(summary);
-                    }
-                }
-                return null;
-            })
-            .filter(Boolean)
-            .join('\n\n');
-
-        return summaries;
-    }
 }
diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
index 1349df483..f1fae6f11 100644
--- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
@@ -148,10 +148,6 @@ export class Vectorstore {
 
                     // Generate chunk IDs upfront so we can register them
                     const chunkIds = segmentedTranscript.map(() => uuidv4());
-
-                    // Register all chunk IDs with the document manager
-                    this.docManager.registerChunkIds(doc_id, chunkIds);
-
                     // Add transcript and embeddings to metadata
                     result = {
                         doc_id,
@@ -185,7 +181,7 @@ export class Vectorstore {
                 doc.segmented_transcript = JSON.stringify(segmentedTranscript);
                 // Use doc manager to add simplified chunks
                 const docType = local_file_path.endsWith('.mp3') ? 'audio' : 'video';
-                this.docManager.addSimplifiedChunks(doc, result.chunks, docType);
+                this.docManager.addSimplifiedChunks(result.chunks, docType);
             } else {
                 // Process regular document
                 console.log('Processing regular document...');
@@ -216,13 +212,10 @@ export class Vectorstore {
                     console.log('doc_id in vectorstore', result.doc_id, 'does not match doc_id in doc', doc[Id]);
                 }
 
-                // Register chunks with the document manager
-                this.docManager.registerChunkIds(result.doc_id, chunkIds);
-
                 // Use doc manager to add simplified chunks - determine document type from file extension
                 const fileExt = path.extname(local_file_path).toLowerCase();
                 const docType = fileExt === '.pdf' ? 'pdf' : fileExt === '.csv' ? 'csv' : 'text';
-                this.docManager.addSimplifiedChunks(doc, result.chunks, docType);
+                this.docManager.addSimplifiedChunks(result.chunks, docType);
 
                 doc.summary = result.summary;
                 doc.ai_purpose = result.purpose;
@@ -351,16 +344,6 @@ export class Vectorstore {
                     },
                 } as RAGChunk;
 
-                // Ensure the document manager knows about this chunk
-                // This is important for maintaining backwards compatibility
-                if (chunk.id && !this.docManager.getDocByChunkId(chunk.id)) {
-                    // If the chunk ID isn't registered but we have a doc_id in metadata
-                    if (chunk.metadata.doc_id && this.docManager.has(chunk.metadata.doc_id)) {
-                        // Register the chunk with its parent document
-                        this.docManager.registerChunkIds(chunk.metadata.doc_id, [chunk.id]);
-                    }
-                }
-
                 return chunk;
             });
 
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index 378f14094..b7ce4f663 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -514,30 +514,37 @@ export default class AssistantManager extends ApiManager {
                     await browser.close();
                     browser = null;
 
-                    // Use a try-catch block specifically for JSDOM parsing
+                    let extractedText = '';
+
+                    // First try with Readability
                     try {
                         // Parse HTML content using JSDOM
                         const dom = new JSDOM(htmlContent, { url });
 
                         // Extract readable content using Mozilla's Readability API
-                        const reader = new Readability(dom.window.document);
+                        const reader = new Readability(dom.window.document, {
+                            // Readability configuration to focus on text content
+                            charThreshold: 100,
+                            keepClasses: false,
+                        });
                         const article = reader.parse();
 
-                        if (article) {
-                            const plainText = article.textContent;
-                            res.send({ website_plain_text: plainText });
+                        if (article && article.textContent) {
+                            extractedText = article.textContent;
                         } else {
-                            // If Readability fails, fallback to extracting main content
-                            const mainContent = await extractMainContent(htmlContent);
-                            res.send({ website_plain_text: mainContent });
+                            // If Readability doesn't return useful content, try alternate method
+                            extractedText = await extractEnhancedContent(htmlContent);
                         }
                     } catch (parsingError) {
-                        console.error('Error parsing website content:', parsingError);
-
-                        // Fallback to a simplified extraction method
-                        const mainContent = await extractMainContent(htmlContent);
-                        res.send({ website_plain_text: mainContent });
+                        console.error('Error parsing website content with Readability:', parsingError);
+                        // Fallback to enhanced content extraction
+                        extractedText = await extractEnhancedContent(htmlContent);
                     }
+
+                    // Clean up the extracted text
+                    extractedText = cleanupText(extractedText);
+
+                    res.send({ website_plain_text: extractedText });
                 } catch (error) {
                     console.error('Error scraping website:', error);
 
@@ -985,48 +992,119 @@ function spawnPythonProcess(jobId: string, file_path: string, doc_id: string) {
 }
 
 /**
- * Extracts main content from HTML by removing scripts, styles, and non-content elements
- * Used as a fallback when Readability fails
+ * Enhanced content extraction that focuses on meaningful text content.
  * @param html The HTML content to process
- * @returns Extracted main text content
+ * @returns Extracted and cleaned text content
  */
-async function extractMainContent(html: string): Promise<string> {
+async function extractEnhancedContent(html: string): Promise<string> {
     try {
-        // Create a simple DOM to extract content
+        // Create DOM to extract content
         const dom = new JSDOM(html, { runScripts: 'outside-only' });
         const document = dom.window.document;
 
-        // Remove scripts, styles, and other non-content elements
-        const elementsToRemove = ['script', 'style', 'iframe', 'noscript', 'svg', 'header', 'footer', 'nav', 'aside', 'ads', 'banner', 'form', 'button', 'input'];
-
-        elementsToRemove.forEach(tag => {
-            const elements = document.querySelectorAll(tag);
+        // Remove all non-content elements
+        const elementsToRemove = [
+            'script',
+            'style',
+            'iframe',
+            'noscript',
+            'svg',
+            'canvas',
+            'header',
+            'footer',
+            'nav',
+            'aside',
+            'form',
+            'button',
+            'input',
+            'select',
+            'textarea',
+            'meta',
+            'link',
+            'img',
+            'video',
+            'audio',
+            '.ad',
+            '.ads',
+            '.advertisement',
+            '.banner',
+            '.cookie',
+            '.popup',
+            '.modal',
+            '.newsletter',
+            '[role="banner"]',
+            '[role="navigation"]',
+            '[role="complementary"]',
+        ];
+
+        elementsToRemove.forEach(selector => {
+            const elements = document.querySelectorAll(selector);
             elements.forEach(el => el.remove());
         });
 
-        // Try to find the main content container using common selectors
-        const mainSelectors = ['main', 'article', '#content', '.content', '#main', '.main', '.post-content', '.article-content', '.entry-content'];
-
-        let mainContent = '';
-
-        // Try each selector to find main content
-        for (const selector of mainSelectors) {
-            const element = document.querySelector(selector);
-            if (element && element.textContent && element.textContent.trim().length > 100) {
-                mainContent = element.textContent;
-                break;
+        // Get all text paragraphs with meaningful content
+        const contentElements = [
+            ...Array.from(document.querySelectorAll('p')),
+            ...Array.from(document.querySelectorAll('h1')),
+            ...Array.from(document.querySelectorAll('h2')),
+            ...Array.from(document.querySelectorAll('h3')),
+            ...Array.from(document.querySelectorAll('h4')),
+            ...Array.from(document.querySelectorAll('h5')),
+            ...Array.from(document.querySelectorAll('h6')),
+            ...Array.from(document.querySelectorAll('li')),
+            ...Array.from(document.querySelectorAll('td')),
+            ...Array.from(document.querySelectorAll('article')),
+            ...Array.from(document.querySelectorAll('section')),
+            ...Array.from(document.querySelectorAll('div:not([class]):not([id])')),
+        ];
+
+        // Extract text from content elements that have meaningful text
+        let contentParts: string[] = [];
+        contentElements.forEach(el => {
+            const text = el.textContent?.trim();
+            // Only include elements with substantial text (more than just a few characters)
+            if (text && text.length > 10 && !contentParts.includes(text)) {
+                contentParts.push(text);
             }
-        }
+        });
 
-        // If no main content found with selectors, use body content
-        if (!mainContent || mainContent.length < 200) {
-            mainContent = document.body.textContent || '';
+        // If no significant content found with selective approach, fallback to body
+        if (contentParts.length < 3) {
+            return document.body.textContent || '';
         }
 
-        // Clean up the text
-        return mainContent.replace(/\s+/g, ' ').replace(/\n+/g, '\n').trim();
+        return contentParts.join('\n\n');
     } catch (error) {
-        console.error('Error extracting main content:', error);
+        console.error('Error extracting enhanced content:', error);
         return 'Failed to extract content from the webpage.';
     }
 }
+
+/**
+ * Cleans up extracted text to improve readability and focus on useful content.
+ * @param text The raw extracted text
+ * @returns Cleaned and formatted text
+ */
+function cleanupText(text: string): string {
+    if (!text) return '';
+
+    return (
+        text
+            // Remove excessive whitespace and normalize line breaks
+            .replace(/\s+/g, ' ')
+            .replace(/\n\s*\n\s*\n+/g, '\n\n')
+            // Remove common boilerplate phrases
+            .replace(/cookie policy|privacy policy|terms of service|all rights reserved|copyright ©/gi, '')
+            // Remove email addresses
+            .replace(/[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}/g, '')
+            // Remove URLs
+            .replace(/https?:\/\/[^\s]+/g, '')
+            // Remove social media handles
+            .replace(/@[a-zA-Z0-9_]+/g, '')
+            // Clean up any remaining HTML tags that might have been missed
+            .replace(/<[^>]*>/g, '')
+            // Fix spacing issues after cleanup
+            .replace(/ +/g, ' ')
+            .trim()
+    );
+}
-- 
cgit v1.2.3-70-g09d2


From 1ba55505d65af9b98a7a16e424d51119e4254c53 Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Sun, 11 May 2025 17:20:47 -0400
Subject: Made sure if there was no chunk in a document, that it would still
 revert back to using the chunkid as a documentID

---
 src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/client/views/nodes/chatbot/utils')

diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
index 5a09b945b..82b7ed3df 100644
--- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -1047,7 +1047,7 @@ export class AgentDocumentManager {
     public getSimplifiedChunkById(chunkId: string): any | undefined {
         console.log('chunkId', chunkId, 'simplifiedChunks', this.simplifiedChunks);
         console.log('doc', this.getDocument(this.simplifiedChunks.get(chunkId)?.doc_id || ''));
-        return { foundChunk: this.simplifiedChunks.get(chunkId), doc: this.getDocument(this.simplifiedChunks.get(chunkId)?.doc_id || '') };
+        return { foundChunk: this.simplifiedChunks.get(chunkId), doc: this.getDocument(this.simplifiedChunks.get(chunkId)?.doc_id || chunkId) };
     }
 
     /**
-- 
cgit v1.2.3-70-g09d2


From dc60f3c37f72874e9bee15c3571bc50ea5826c17 Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Sun, 11 May 2025 17:30:18 -0400
Subject: cleaned up unused functions and also made available documents area
 json.

---
 .../nodes/chatbot/chatboxcomponents/ChatBox.tsx      | 13 -------------
 .../nodes/chatbot/utils/AgentDocumentManager.ts      | 20 +++++++++++++++-----
 2 files changed, 15 insertions(+), 18 deletions(-)

(limited to 'src/client/views/nodes/chatbot/utils')

diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index 867e78860..00077d68d 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -970,14 +970,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     componentWillUnmount() {
         this.removeScrollListener();
     }
-    /**
-     * Getter that retrieves document IDs of linked documents that have PDF_chunker–parsed content.
-     */
-    @computed
-    get docIds(): string[] {
-        // Use the document manager to get all document IDs
-        return Array.from(this.docManager.listDocs);
-    }
 
     /**
      * Getter that retrieves all linked CSV files for analysis.
@@ -1004,11 +996,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
 
     // Other helper methods for retrieving document data and processing
 
-    retrieveSummaries = (): string => {
-        console.log(this.docManager.listDocs);
-        return JSON.stringify(this.docManager.listDocs);
-    };
-
     retrieveCSVData = () => {
         return this.linkedCSVs;
     };
diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
index 82b7ed3df..e9d41efbd 100644
--- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -965,11 +965,21 @@ export class AgentDocumentManager {
      * @returns An array of document IDs (strings).
      */
     @computed
-    public get listDocs(): string[] {
-        console.log(
-            Array.from(this.documentsById.entries()).map(([id, agentDoc]) => JSON.stringify({ id, title: agentDoc.layoutDoc.title, type: agentDoc.layoutDoc.type, summary: agentDoc.layoutDoc.summary || 'No summary available for this document.' }))
-        );
-        return Array.from(this.documentsById.entries()).map(([id, agentDoc]) => JSON.stringify({ id, title: agentDoc.layoutDoc.title, type: agentDoc.layoutDoc.type, summary: agentDoc.layoutDoc.summary || 'No summary available for this document.' }));
+    public get listDocs(): string {
+        const xmlDocs = Array.from(this.documentsById.entries()).map(([id, agentDoc]) => {
+            return `<document>
+  <id>${id}</id>
+  <title>${this.escapeXml(StrCast(agentDoc.layoutDoc.title))}</title>
+  <type>${this.escapeXml(StrCast(agentDoc.layoutDoc.type))}</type>
+  <summary>${this.escapeXml(StrCast(agentDoc.layoutDoc.summary))}</summary>
+</document>`;
+        });
+
+        return xmlDocs.join('\n');
+    }
+
+    private escapeXml(str: string): string {
+        return str.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;').replace(/"/g, '&quot;').replace(/'/g, '&apos;');
     }
 
     @computed
-- 
cgit v1.2.3-70-g09d2


From b3aa238043d01cbc58293b45867706fa9b36cefe Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Mon, 12 May 2025 15:46:30 -0400
Subject: workign better

---
 src/client/views/nodes/PDFBox.scss                 |  23 +
 src/client/views/nodes/PDFBox.tsx                  |  12 +
 .../nodes/chatbot/chatboxcomponents/ChatBox.tsx    | 143 +++++-
 .../nodes/chatbot/utils/AgentDocumentManager.ts    |  33 +-
 .../views/nodes/chatbot/vectorstore/Vectorstore.ts |  10 +-
 src/client/views/pdf/PDFViewer.tsx                 | 572 ++++++++++++++++++++-
 6 files changed, 762 insertions(+), 31 deletions(-)

(limited to 'src/client/views/nodes/chatbot/utils')

diff --git a/src/client/views/nodes/PDFBox.scss b/src/client/views/nodes/PDFBox.scss
index eaea272dc..44013a96d 100644
--- a/src/client/views/nodes/PDFBox.scss
+++ b/src/client/views/nodes/PDFBox.scss
@@ -344,3 +344,26 @@
         font-size: 30px;
     }
 }
+
+.pdfBox-fuzzy {
+    border: none;
+    background-color: #4a4a4a;
+    color: white;
+    padding: 0 8px;
+    height: 24px;
+    cursor: pointer;
+    margin-right: 4px;
+    border-radius: 3px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+
+    &.active {
+        background-color: #3498db;
+        color: white;
+    }
+
+    &:hover {
+        background-color: #2980b9;
+    }
+}
diff --git a/src/client/views/nodes/PDFBox.tsx b/src/client/views/nodes/PDFBox.tsx
index 55e6d5596..4ecbd65b6 100644
--- a/src/client/views/nodes/PDFBox.tsx
+++ b/src/client/views/nodes/PDFBox.tsx
@@ -53,6 +53,7 @@ export class PDFBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     private _sidebarRef = React.createRef<SidebarAnnos>();
 
     @observable private _searching: boolean = false;
+    @observable private _fuzzySearchEnabled: boolean = true;
     @observable private _pdf: Opt<Pdfjs.PDFDocumentProxy> = undefined;
     @observable private _pageControls = false;
 
@@ -272,6 +273,14 @@ export class PDFBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         !this.Document._layout_fitWidth && (this.Document._height = NumCast(this.Document._width) * (p.height / p.width));
     };
 
+    @action
+    toggleFuzzySearch = () => {
+        this._fuzzySearchEnabled = !this._fuzzySearchEnabled;
+        this._pdfViewer?.toggleFuzzySearch();
+        // Clear existing search results when switching modes
+        this.search('', false, true);
+    };
+
     override search = action((searchString: string, bwd?: boolean, clear: boolean = false) => {
         if (!this._searching && !clear) {
             this._searching = true;
@@ -412,6 +421,9 @@ export class PDFBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                     <button type="button" className="pdfBox-search" title="Search" onClick={e => this.search(this._searchString, e.shiftKey)}>
                         <FontAwesomeIcon icon="search" size="sm" />
                     </button>
+                    <button type="button" className={`pdfBox-fuzzy ${this._fuzzySearchEnabled ? 'active' : ''}`} title={`${this._fuzzySearchEnabled ? 'Disable' : 'Enable'} Fuzzy Search`} onClick={this.toggleFuzzySearch}>
+                        <FontAwesomeIcon icon="magic" size="sm" />
+                    </button>
                     <button type="button" className="pdfBox-prevIcon" title="Previous Annotation" onClick={this.prevAnnotation}>
                         <FontAwesomeIcon icon="arrow-up" size="lg" />
                     </button>
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index 00077d68d..af689f243 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -15,7 +15,7 @@ import * as React from 'react';
 import { v4 as uuidv4 } from 'uuid';
 import { ClientUtils, OmitKeys } from '../../../../../ClientUtils';
 import { Doc, DocListCast, Opt } from '../../../../../fields/Doc';
-import { DocData, DocViews } from '../../../../../fields/DocSymbols';
+import { DocData, DocLayout, DocViews } from '../../../../../fields/DocSymbols';
 import { RichTextField } from '../../../../../fields/RichTextField';
 import { ScriptField } from '../../../../../fields/ScriptField';
 import { CsvCast, DocCast, NumCast, PDFCast, RTFCast, StrCast, VideoCast, AudioCast } from '../../../../../fields/Types';
@@ -644,6 +644,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
 
             // Get the simplified chunk using the document manager
             const { foundChunk, doc } = this.docManager.getSimplifiedChunkById(chunkId);
+            console.log('doc: ', doc);
             if (!foundChunk) {
                 if (doc) {
                     console.warn(`Chunk not found in document, ${doc.id}, for chunk ID: ${chunkId}`);
@@ -665,12 +666,14 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                     console.error('No direct matching segment found for the citation.');
                 }
             } else if (foundChunk.chunkType === CHUNK_TYPE.TABLE || foundChunk.chunkType === CHUNK_TYPE.IMAGE) {
-                this.handleOtherChunkTypes(foundChunk, citation, doc);
+                console.log('here: ', foundChunk);
+                this.handleOtherChunkTypes(foundChunk as SimplifiedChunk, citation, doc);
             } else {
                 if (doc.type === 'web') {
                     DocumentManager.Instance.showDocument(doc, { openLocation: OpenWhere.addRight }, () => {});
                     return;
                 }
+                this.handleOtherChunkTypes(foundChunk, citation, doc);
                 // Show the chunk text in citation popup
                 let chunkText = citation.direct_text || 'Text content not available';
                 this.showCitationPopup(chunkText);
@@ -834,10 +837,45 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                 this._citationPopup = { text: citation.direct_text ?? 'No text available', visible: true };
                 this.startCitationPopupTimer();
 
+                // Check if the document is a PDF (has a PDF viewer component)
+                const isPDF = PDFCast(doc.data) !== null || doc.type === DocumentType.PDF;
+
                 DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
-                    const firstView = Array.from(doc[DocViews])[0] as DocumentView;
-                    (firstView.ComponentView as PDFBox)?.gotoPage?.(foundChunk.startPage ?? 0);
-                    (firstView.ComponentView as PDFBox)?.search?.(citation.direct_text ?? '');
+                    // Add a delay to ensure document is fully loaded and rendered
+                    setTimeout(() => {
+                        try {
+                            // Safety check: ensure the document has views
+                            if (!doc[DocViews] || doc[DocViews].size === 0) {
+                                console.warn('Document views not available yet, retrying...');
+                                this.retryPdfSearch(doc, citation, foundChunk, isPDF, 1);
+                                return;
+                            }
+
+                            const views = Array.from(doc[DocViews]);
+                            if (!views.length) {
+                                console.warn('No document views found, retrying...');
+                                this.retryPdfSearch(doc, citation, foundChunk, isPDF, 1);
+                                return;
+                            }
+
+                            const firstView = views[0] as DocumentView;
+                            if (!firstView || !firstView.ComponentView) {
+                                console.warn('Component view not available yet, retrying...');
+                                this.retryPdfSearch(doc, citation, foundChunk, isPDF, 1);
+                                return;
+                            }
+
+                            const pdfComponent = firstView.ComponentView as PDFBox;
+                            if (isPDF && pdfComponent && citation.direct_text) {
+                                // Use our helper to ensure fuzzy search is enabled and execute the search
+                                this.ensureFuzzySearchAndExecute(pdfComponent, citation.direct_text.trim(), foundChunk.startPage);
+                            }
+                        } catch (error) {
+                            console.error('Error accessing PDF component:', error);
+                            // Retry with exponential backoff
+                            this.retryPdfSearch(doc, citation, foundChunk, isPDF, 1);
+                        }
+                    }, 500); // Initial delay before first attempt
                 });
                 break;
             case CHUNK_TYPE.CSV:
@@ -851,6 +889,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                 break;
         }
     };
+
     /**
      * Creates an annotation highlight on a PDF document for image citations.
      * @param x1 X-coordinate of the top-left corner of the highlight.
@@ -1091,6 +1130,100 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         setTimeout(() => this.closeCitationPopup(), 5000);
     };
 
+    /**
+     * Retry PDF search with exponential backoff
+     */
+    retryPdfSearch = (doc: Doc, citation: Citation, foundChunk: SimplifiedChunk, isPDF: boolean, attempt: number) => {
+        if (attempt > 5) {
+            console.error('Maximum retry attempts reached for PDF search');
+            return;
+        }
+
+        const delay = Math.min(2000, 500 * Math.pow(1.5, attempt)); // Exponential backoff with max delay of 2 seconds
+
+        setTimeout(() => {
+            try {
+                if (!doc[DocViews] || doc[DocViews].size === 0) {
+                    this.retryPdfSearch(doc, citation, foundChunk, isPDF, attempt + 1);
+                    return;
+                }
+
+                const views = Array.from(doc[DocViews]);
+                if (!views.length) {
+                    this.retryPdfSearch(doc, citation, foundChunk, isPDF, attempt + 1);
+                    return;
+                }
+
+                const firstView = views[0] as DocumentView;
+                if (!firstView || !firstView.ComponentView) {
+                    this.retryPdfSearch(doc, citation, foundChunk, isPDF, attempt + 1);
+                    return;
+                }
+
+                const pdfComponent = firstView.ComponentView as PDFBox;
+                if (isPDF && pdfComponent && citation.direct_text) {
+                    console.log(`PDF component found on attempt ${attempt}, executing search...`);
+                    this.ensureFuzzySearchAndExecute(pdfComponent, citation.direct_text.trim(), foundChunk.startPage);
+                }
+            } catch (error) {
+                console.error(`Error on retry attempt ${attempt}:`, error);
+                this.retryPdfSearch(doc, citation, foundChunk, isPDF, attempt + 1);
+            }
+        }, delay);
+    };
+
+    /**
+     * Ensures fuzzy search is enabled in PDFBox and performs a search
+     * @param pdfComponent The PDFBox component
+     * @param searchText The text to search for
+     * @param startPage Optional page to navigate to before searching
+     */
+    private ensureFuzzySearchAndExecute = (pdfComponent: PDFBox, searchText: string, startPage?: number) => {
+        if (!pdfComponent) {
+            console.warn('PDF component is undefined, cannot perform search');
+            return;
+        }
+
+        if (!searchText?.trim()) {
+            console.warn('Search text is empty, skipping search');
+            return;
+        }
+
+        try {
+            // Check if the component has required methods
+            if (typeof pdfComponent.gotoPage !== 'function' || typeof pdfComponent.toggleFuzzySearch !== 'function' || typeof pdfComponent.search !== 'function') {
+                console.warn('PDF component missing required methods');
+                return;
+            }
+
+            // Navigate to the page if specified
+            if (typeof startPage === 'number') {
+                pdfComponent.gotoPage(startPage + 1);
+            }
+
+            // Always try to enable fuzzy search
+            try {
+                // PDFBox.tsx toggles fuzzy search state internally
+                // We'll call it once to make sure it's enabled
+                pdfComponent.toggleFuzzySearch();
+            } catch (toggleError) {
+                console.warn('Error toggling fuzzy search:', toggleError);
+            }
+
+            // Add a sufficient delay to ensure PDF is fully loaded before searching
+            setTimeout(() => {
+                try {
+                    console.log('Performing fuzzy search for text:', searchText);
+                    pdfComponent.search(searchText);
+                } catch (searchError) {
+                    console.error('Error performing search:', searchError);
+                }
+            }, 1000); // Increased delay for better reliability
+        } catch (error) {
+            console.error('Error in fuzzy search setup:', error);
+        }
+    };
+
     /**
      * Main render method for the ChatBox
      */
diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
index e9d41efbd..784e90c3c 100644
--- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -173,6 +173,16 @@ export class AgentDocumentManager {
     public processDocument(doc: Doc): string {
         // Ensure document has a persistent ID
         const docId = this.ensureDocumentId(doc);
+        if (doc.chunk_simplified) {
+            const newChunks: SimplifiedChunk[] = [];
+            for (const chunk of JSON.parse(StrCast(doc.chunk_simplified))) {
+                console.log('chunk', chunk);
+                newChunks.push(chunk as SimplifiedChunk);
+            }
+            console.log('Added simplified chunks to simplifiedChunks:', docId, newChunks);
+            this.addSimplifiedChunks(newChunks);
+            //DocCast(DocCast(this.chatBoxDocument!.agentDocument)!.chunk_simpl)!.mapping = new List<string>(Array.from(this.simplifiedChunks.values()).map(chunk => JSON.stringify(chunk)));
+        }
         // Only add if we haven't already processed this document
         if (!this.documentsById.has(docId)) {
             this.documentsById.set(docId, { layoutDoc: doc, dataDoc: doc[DocData] });
@@ -1005,21 +1015,28 @@ export class AgentDocumentManager {
      * @returns The updated document with simplified chunks
      */
     @action
-    public addSimplifiedChunks(chunks: RAGChunk[], docType: string) {
+    public addSimplifiedChunks(simplifiedChunks: SimplifiedChunk[]) {
+        simplifiedChunks.forEach(chunk => {
+            this.simplifiedChunks.set(chunk.chunkId, chunk);
+        });
+    }
+
+    public getSimplifiedChunks(chunks: RAGChunk[], docType: string): SimplifiedChunk[] {
         console.log('chunks', chunks, 'simplifiedChunks', this.simplifiedChunks);
+        const simplifiedChunks: SimplifiedChunk[] = [];
         // Create array of simplified chunks based on document type
         for (const chunk of chunks) {
             // Common properties across all chunk types
             const baseChunk: SimplifiedChunk = {
                 chunkId: chunk.id,
-                text: chunk.metadata.text,
+                //text: chunk.metadata.text,
                 doc_id: chunk.metadata.doc_id,
                 chunkType: chunk.metadata.type || CHUNK_TYPE.TEXT,
             };
 
             // Add type-specific properties
             if (docType === 'video' || docType === 'audio') {
-                this.simplifiedChunks.set(chunk.id, {
+                simplifiedChunks.push({
                     ...baseChunk,
                     start_time: chunk.metadata.start_time,
                     end_time: chunk.metadata.end_time,
@@ -1027,14 +1044,14 @@ export class AgentDocumentManager {
                     chunkType: docType === 'video' ? CHUNK_TYPE.VIDEO : CHUNK_TYPE.AUDIO,
                 } as SimplifiedChunk);
             } else if (docType === 'pdf') {
-                this.simplifiedChunks.set(chunk.id, {
+                simplifiedChunks.push({
                     ...baseChunk,
                     startPage: chunk.metadata.start_page,
                     endPage: chunk.metadata.end_page,
                     location: chunk.metadata.location,
                 } as SimplifiedChunk);
             } else if (docType === 'csv') {
-                this.simplifiedChunks.set(chunk.id, {
+                simplifiedChunks.push({
                     ...baseChunk,
                     rowStart: (chunk.metadata as any).row_start,
                     rowEnd: (chunk.metadata as any).row_end,
@@ -1043,9 +1060,10 @@ export class AgentDocumentManager {
                 } as SimplifiedChunk);
             } else {
                 // Default for other document types
-                this.simplifiedChunks.set(chunk.id, baseChunk as SimplifiedChunk);
+                simplifiedChunks.push(baseChunk as SimplifiedChunk);
             }
         }
+        return simplifiedChunks;
     }
 
     /**
@@ -1054,9 +1072,8 @@ export class AgentDocumentManager {
      * @param chunkId The ID of the chunk to retrieve
      * @returns The simplified chunk if found, undefined otherwise
      */
+    @action
     public getSimplifiedChunkById(chunkId: string): any | undefined {
-        console.log('chunkId', chunkId, 'simplifiedChunks', this.simplifiedChunks);
-        console.log('doc', this.getDocument(this.simplifiedChunks.get(chunkId)?.doc_id || ''));
         return { foundChunk: this.simplifiedChunks.get(chunkId), doc: this.getDocument(this.simplifiedChunks.get(chunkId)?.doc_id || chunkId) };
     }
 
diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
index f1fae6f11..252672dfc 100644
--- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
@@ -40,7 +40,7 @@ export class Vectorstore {
      * @param docManager An instance of AgentDocumentManager to handle document management.
      */
     constructor(id: string, docManager: AgentDocumentManager) {
-        const pineconeApiKey = process.env.PINECONE_API_KEY;
+        const pineconeApiKey = 'pcsk_3txLxJ_9fxdmAph4csnq4yxoDF5De5A8bJvjWaXXigBgshy4eoXggrXcxATJiH8vzXbrKm';
         if (!pineconeApiKey) {
             console.log('PINECONE_API_KEY is not defined - Vectorstore will be unavailable');
             return;
@@ -181,7 +181,9 @@ export class Vectorstore {
                 doc.segmented_transcript = JSON.stringify(segmentedTranscript);
                 // Use doc manager to add simplified chunks
                 const docType = local_file_path.endsWith('.mp3') ? 'audio' : 'video';
-                this.docManager.addSimplifiedChunks(result.chunks, docType);
+                const simplifiedChunks = this.docManager.getSimplifiedChunks(result.chunks, docType);
+                doc.chunk_simplified = JSON.stringify(simplifiedChunks);
+                this.docManager.addSimplifiedChunks(simplifiedChunks);
             } else {
                 // Process regular document
                 console.log('Processing regular document...');
@@ -215,7 +217,9 @@ export class Vectorstore {
                 // Use doc manager to add simplified chunks - determine document type from file extension
                 const fileExt = path.extname(local_file_path).toLowerCase();
                 const docType = fileExt === '.pdf' ? 'pdf' : fileExt === '.csv' ? 'csv' : 'text';
-                this.docManager.addSimplifiedChunks(result.chunks, docType);
+                const simplifiedChunks = this.docManager.getSimplifiedChunks(result.chunks, docType);
+                doc.chunk_simplified = JSON.stringify(simplifiedChunks);
+                this.docManager.addSimplifiedChunks(simplifiedChunks);
 
                 doc.summary = result.summary;
                 doc.ai_purpose = result.purpose;
diff --git a/src/client/views/pdf/PDFViewer.tsx b/src/client/views/pdf/PDFViewer.tsx
index fc2567fbc..a88d8b282 100644
--- a/src/client/views/pdf/PDFViewer.tsx
+++ b/src/client/views/pdf/PDFViewer.tsx
@@ -50,6 +50,15 @@ interface IViewerProps extends FieldViewProps {
     crop: (region: Doc | undefined, addCrop?: boolean) => Doc | undefined;
 }
 
+// Add this type definition right after the existing imports
+interface FuzzySearchResult {
+    pageIndex: number;
+    matchIndex: number;
+    text: string;
+    score?: number;
+    isParagraph?: boolean;
+}
+
 /**
  * Handles rendering and virtualization of the pdf
  */
@@ -68,6 +77,9 @@ export class PDFViewer extends ObservableReactComponent<IViewerProps> {
     @observable _showWaiting = true;
     @observable Index: number = -1;
     @observable private _loading = false;
+    @observable private _fuzzySearchEnabled = true;
+    @observable private _fuzzySearchResults: FuzzySearchResult[] = [];
+    @observable private _currentFuzzyMatchIndex = 0;
 
     private _pdfViewer!: PDFJSViewer.PDFViewer;
     private _styleRule: number | undefined; // stylesheet rule for making hyperlinks clickable
@@ -334,27 +346,557 @@ export class PDFViewer extends ObservableReactComponent<IViewerProps> {
         return index;
     };
 
+    // Normalize text by removing extra spaces, punctuation, and converting to lowercase
+    private normalizeText(text: string): string {
+        return text
+            .toLowerCase()
+            .replace(/\s+/g, ' ')
+            .replace(/[^\w\s]/g, ' ')
+            .trim();
+    }
+
+    // Compute similarity between two strings (0-1 where 1 is exact match)
+    private computeSimilarity(str1: string, str2: string): number {
+        const s1 = this.normalizeText(str1);
+        const s2 = this.normalizeText(str2);
+
+        if (s1 === s2) return 1;
+        if (s1.length === 0 || s2.length === 0) return 0;
+
+        // For very long texts, check if one contains chunks of the other
+        if (s1.length > 50 || s2.length > 50) {
+            // For long texts, check if significant chunks overlap
+            const longerText = s1.length > s2.length ? s1 : s2;
+            const shorterText = s1.length > s2.length ? s2 : s1;
+
+            // Break the shorter text into chunks
+            const words = shorterText.split(' ');
+            const chunkSize = Math.min(5, Math.floor(words.length / 2));
+
+            if (chunkSize > 0) {
+                let maxChunkMatch = 0;
+
+                // Check different chunks of the shorter text against the longer text
+                for (let i = 0; i <= words.length - chunkSize; i++) {
+                    const chunk = words.slice(i, i + chunkSize).join(' ');
+                    if (longerText.includes(chunk)) {
+                        maxChunkMatch = Math.max(maxChunkMatch, chunk.length / shorterText.length);
+                    }
+                }
+
+                if (maxChunkMatch > 0.2) {
+                    return Math.min(0.9, maxChunkMatch + 0.3); // Boost the score, max 0.9
+                }
+            }
+
+            // Check for substantial overlap in content
+            const words1 = new Set(s1.split(' '));
+            const words2 = new Set(s2.split(' '));
+
+            let commonWords = 0;
+            for (const word of words1) {
+                if (word.length > 2 && words2.has(word)) {
+                    // Only count meaningful words (length > 2)
+                    commonWords++;
+                }
+            }
+
+            // Calculate ratio of common words
+            const overlapRatio = commonWords / Math.min(words1.size, words2.size);
+
+            // For long text, a lower match can still be significant
+            if (overlapRatio > 0.4) {
+                return Math.min(0.9, overlapRatio);
+            }
+        }
+
+        // Simple contains check for shorter texts
+        if (s1.includes(s2) || s2.includes(s1)) {
+            return (0.8 * Math.min(s1.length, s2.length)) / Math.max(s1.length, s2.length);
+        }
+
+        // For shorter texts, use Levenshtein for more precision
+        if (s1.length < 100 && s2.length < 100) {
+            // Calculate Levenshtein distance
+            const dp: number[][] = Array(s1.length + 1)
+                .fill(0)
+                .map(() => Array(s2.length + 1).fill(0));
+
+            for (let i = 0; i <= s1.length; i++) dp[i][0] = i;
+            for (let j = 0; j <= s2.length; j++) dp[0][j] = j;
+
+            for (let i = 1; i <= s1.length; i++) {
+                for (let j = 1; j <= s2.length; j++) {
+                    const cost = s1[i - 1] === s2[j - 1] ? 0 : 1;
+                    dp[i][j] = Math.min(
+                        dp[i - 1][j] + 1, // deletion
+                        dp[i][j - 1] + 1, // insertion
+                        dp[i - 1][j - 1] + cost // substitution
+                    );
+                }
+            }
+
+            const distance = dp[s1.length][s2.length];
+            return 1 - distance / Math.max(s1.length, s2.length);
+        }
+
+        return 0;
+    }
+
+    // Perform fuzzy search on PDF text content
+    private async performFuzzySearch(searchString: string, bwd?: boolean): Promise<boolean> {
+        if (!this._pdfViewer || !searchString.trim()) return false;
+
+        const normalizedSearch = this.normalizeText(searchString);
+        this._fuzzySearchResults = [];
+
+        // Adjust threshold based on text length - more lenient for longer text
+        let similarityThreshold = 0.6;
+        if (searchString.length > 100) similarityThreshold = 0.35;
+        else if (searchString.length > 50) similarityThreshold = 0.45;
+
+        console.log(`Using similarity threshold: ${similarityThreshold} for query length: ${searchString.length}`);
+
+        // For longer queries, also look for partial matches
+        const searchWords = normalizedSearch.split(' ').filter(w => w.length > 3);
+        const isLongQuery = searchWords.length > 5;
+
+        // Track best match for debugging
+        let bestMatchScore = 0;
+        let bestMatchText = '';
+
+        // Fallback strategy: extract key phrases for very long search queries
+        let keyPhrases: string[] = [];
+        if (searchString.length > 200) {
+            // Extract key phrases (chunks of 3-6 words) from the search string
+            const words = normalizedSearch.split(' ');
+            for (let i = 0; i < words.length - 2; i += 2) {
+                const phraseLength = Math.min(5, words.length - i);
+                if (phraseLength >= 3) {
+                    keyPhrases.push(words.slice(i, i + phraseLength).join(' '));
+                }
+            }
+            console.log(`Using ${keyPhrases.length} key phrases for long search text`);
+        }
+
+        // Process PDF in batches to avoid memory issues
+        const totalPages = this._pageSizes.length;
+        const BATCH_SIZE = 10; // Process 10 pages at a time
+
+        console.log(`Searching all ${totalPages} pages in batches of ${BATCH_SIZE}`);
+
+        // Process PDF in batches
+        for (let batchStart = 0; batchStart < totalPages; batchStart += BATCH_SIZE) {
+            const batchEnd = Math.min(batchStart + BATCH_SIZE, totalPages);
+            console.log(`Processing pages ${batchStart + 1} to ${batchEnd} of ${totalPages}`);
+
+            // Process each page in current batch
+            for (let pageIndex = batchStart; pageIndex < batchEnd; pageIndex++) {
+                try {
+                    const page = await this._props.pdf.getPage(pageIndex + 1);
+                    const textContent = await page.getTextContent();
+
+                    // For long text, try to reconstruct paragraphs first
+                    let paragraphs: string[] = [];
+
+                    try {
+                        if (isLongQuery) {
+                            // Group text items into paragraphs based on positions
+                            let currentY: number | null = null;
+                            let currentParagraph = '';
+
+                            // Sort by Y position first, then X
+                            const sortedItems = [...textContent.items].sort((a: any, b: any) => {
+                                const aTransform = (a as any).transform || [];
+                                const bTransform = (b as any).transform || [];
+                                if (Math.abs(aTransform[5] - bTransform[5]) < 5) {
+                                    return (aTransform[4] || 0) - (bTransform[4] || 0);
+                                }
+                                return (aTransform[5] || 0) - (bTransform[5] || 0);
+                            });
+
+                            // Limit paragraph size to avoid overflows
+                            const MAX_PARAGRAPH_LENGTH = 1000;
+
+                            for (const item of sortedItems) {
+                                const text = (item as any).str || '';
+                                const transform = (item as any).transform || [];
+                                const y = transform[5];
+
+                                // If this is a new line or first item
+                                if (currentY === null || Math.abs(y - currentY) > 5 || currentParagraph.length + text.length > MAX_PARAGRAPH_LENGTH) {
+                                    if (currentParagraph) {
+                                        paragraphs.push(currentParagraph.trim());
+                                    }
+                                    currentParagraph = text;
+                                    currentY = y;
+                                } else {
+                                    // Continue the current paragraph
+                                    currentParagraph += ' ' + text;
+                                }
+                            }
+
+                            // Add the last paragraph
+                            if (currentParagraph) {
+                                paragraphs.push(currentParagraph.trim());
+                            }
+
+                            // Limit the number of paragraph combinations to avoid exponential growth
+                            const MAX_COMBINED_PARAGRAPHS = 5;
+
+                            // Also create overlapping larger paragraphs for better context, but limit size
+                            if (paragraphs.length > 1) {
+                                const combinedCount = Math.min(paragraphs.length - 1, MAX_COMBINED_PARAGRAPHS);
+                                for (let i = 0; i < combinedCount; i++) {
+                                    if (paragraphs[i].length + paragraphs[i + 1].length < MAX_PARAGRAPH_LENGTH) {
+                                        paragraphs.push(paragraphs[i] + ' ' + paragraphs[i + 1]);
+                                    }
+                                }
+                            }
+                        }
+                    } catch (paragraphError) {
+                        console.warn('Error during paragraph reconstruction:', paragraphError);
+                        // Continue with individual items if paragraph reconstruction fails
+                    }
+
+                    // For extremely long search texts, use our key phrases approach
+                    if (keyPhrases.length > 0) {
+                        // Check each paragraph for key phrases
+                        for (const paragraph of paragraphs) {
+                            let matchingPhrases = 0;
+                            let bestPhraseScore = 0;
+
+                            for (const phrase of keyPhrases) {
+                                const similarity = this.computeSimilarity(paragraph, phrase);
+                                if (similarity > 0.7) matchingPhrases++;
+                                bestPhraseScore = Math.max(bestPhraseScore, similarity);
+                            }
+
+                            // If multiple key phrases match, this is likely a good result
+                            if (matchingPhrases > 1 || bestPhraseScore > 0.8) {
+                                this._fuzzySearchResults.push({
+                                    pageIndex,
+                                    matchIndex: paragraphs.indexOf(paragraph),
+                                    text: paragraph,
+                                    score: 0.7 + matchingPhrases * 0.05,
+                                    isParagraph: true,
+                                });
+                            }
+                        }
+
+                        // Also check each item directly
+                        for (const item of textContent.items) {
+                            const text = (item as any).str || '';
+                            if (!text.trim()) continue;
+
+                            for (const phrase of keyPhrases) {
+                                const similarity = this.computeSimilarity(text, phrase);
+                                if (similarity > 0.7) {
+                                    this._fuzzySearchResults.push({
+                                        pageIndex,
+                                        matchIndex: textContent.items.indexOf(item),
+                                        text: text,
+                                        score: similarity,
+                                        isParagraph: false,
+                                    });
+                                    break; // One matching phrase is enough for direct items
+                                }
+                            }
+                        }
+
+                        continue; // Skip normal processing for this page, we've used the key phrases approach
+                    }
+
+                    // Ensure paragraphs aren't too large before checking
+                    paragraphs = paragraphs.filter(p => p.length < 5000);
+
+                    // Check both individual items and reconstructed paragraphs
+                    try {
+                        const itemsToCheck = [
+                            ...textContent.items.map((item: any) => ({
+                                idx: textContent.items.indexOf(item),
+                                text: (item as any).str || '',
+                                isParagraph: false,
+                            })),
+                            ...paragraphs.map((p, i) => ({
+                                idx: i,
+                                text: p,
+                                isParagraph: true,
+                            })),
+                        ];
+
+                        for (const item of itemsToCheck) {
+                            if (!item.text.trim() || item.text.length > 5000) continue;
+
+                            const similarity = this.computeSimilarity(item.text, normalizedSearch);
+
+                            // Track best match for debugging
+                            if (similarity > bestMatchScore) {
+                                bestMatchScore = similarity;
+                                bestMatchText = item.text.substring(0, 100);
+                            }
+
+                            if (similarity > similarityThreshold) {
+                                this._fuzzySearchResults.push({
+                                    pageIndex,
+                                    matchIndex: item.idx,
+                                    text: item.text,
+                                    score: similarity,
+                                    isParagraph: item.isParagraph,
+                                });
+                            }
+                        }
+                    } catch (itemCheckError) {
+                        console.warn('Error checking items on page:', itemCheckError);
+                    }
+                } catch (error) {
+                    console.error(`Error extracting text from page ${pageIndex + 1}:`, error);
+                    // Continue with other pages even if one fails
+                }
+            }
+
+            // Check if we already have good matches after each batch
+            // This allows us to stop early if we've found excellent matches
+            if (this._fuzzySearchResults.length > 0) {
+                // Sort results by similarity (descending)
+                this._fuzzySearchResults.sort((a, b) => (b.score || 0) - (a.score || 0));
+
+                // If we have an excellent match (score > 0.8), stop searching
+                if (this._fuzzySearchResults[0]?.score && this._fuzzySearchResults[0].score > 0.8) {
+                    console.log(`Found excellent match (score: ${this._fuzzySearchResults[0].score?.toFixed(2)}) - stopping early`);
+                    break;
+                }
+
+                // If we have several good matches (score > 0.6), stop searching
+                if (this._fuzzySearchResults.length >= 3 && this._fuzzySearchResults.every(r => r.score && r.score > 0.6)) {
+                    console.log(`Found ${this._fuzzySearchResults.length} good matches - stopping early`);
+                    break;
+                }
+            }
+
+            // Perform cleanup between batches to avoid memory buildup
+            if (batchEnd < totalPages) {
+                // Give the browser a moment to breathe and release memory
+                await new Promise(resolve => setTimeout(resolve, 1));
+            }
+        }
+
+        // If no results with advanced search, try standard search with key terms
+        if (this._fuzzySearchResults.length === 0 && searchWords.length > 3) {
+            // Find the most distinctive words (longer words are often more specific)
+            const distinctiveWords = searchWords
+                .filter(w => w.length > 4)
+                .sort((a, b) => b.length - a.length)
+                .slice(0, 3);
+
+            if (distinctiveWords.length > 0) {
+                console.log(`Falling back to standard search with distinctive term: ${distinctiveWords[0]}`);
+                this._pdfViewer.eventBus.dispatch('find', {
+                    query: distinctiveWords[0],
+                    phraseSearch: false,
+                    highlightAll: true,
+                    findPrevious: false,
+                });
+                return true;
+            }
+        }
+
+        console.log(`Best match (${bestMatchScore.toFixed(2)}): "${bestMatchText}"`);
+        console.log(`Found ${this._fuzzySearchResults.length} matches above threshold ${similarityThreshold}`);
+
+        // Sort results by similarity (descending)
+        this._fuzzySearchResults.sort((a, b) => (b.score || 0) - (a.score || 0));
+
+        // Navigate to the first/last result based on direction
+        if (this._fuzzySearchResults.length > 0) {
+            this._currentFuzzyMatchIndex = bwd ? this._fuzzySearchResults.length - 1 : 0;
+            this.navigateToFuzzyMatch(this._currentFuzzyMatchIndex);
+            return true;
+        } else if (bestMatchScore > 0) {
+            // If we found some match but below threshold, adjust threshold and try again
+            if (bestMatchScore > similarityThreshold * 0.7) {
+                console.log(`Lowering threshold to ${bestMatchScore * 0.9} and retrying search`);
+                similarityThreshold = bestMatchScore * 0.9;
+                return this.performFuzzySearch(searchString, bwd);
+            }
+        }
+
+        // Ultimate fallback: Use standard PDF.js search with the most common words
+        if (this._fuzzySearchResults.length === 0) {
+            // Extract a few words from the middle of the search string
+            const words = normalizedSearch.split(' ');
+            const middleIndex = Math.floor(words.length / 2);
+            const searchPhrase = words.slice(Math.max(0, middleIndex - 1), Math.min(words.length, middleIndex + 2)).join(' ');
+
+            console.log(`Falling back to standard search with phrase: ${searchPhrase}`);
+            this._pdfViewer.eventBus.dispatch('find', {
+                query: searchPhrase,
+                phraseSearch: true,
+                highlightAll: true,
+                findPrevious: false,
+            });
+            return true;
+        }
+
+        return false;
+    }
+
+    // Navigate to a specific fuzzy match
+    private navigateToFuzzyMatch(index: number): void {
+        if (index >= 0 && index < this._fuzzySearchResults.length) {
+            const match = this._fuzzySearchResults[index];
+            console.log(`Navigating to match: ${match.text.substring(0, 50)}... (score: ${match.score?.toFixed(2) || 'unknown'})`);
+
+            // Scroll to the page containing the match
+            this._pdfViewer.scrollPageIntoView({
+                pageNumber: match.pageIndex + 1,
+            });
+
+            // For paragraph matches, use a more specific approach
+            if (match.isParagraph) {
+                // Break the text into smaller chunks to improve highlighting
+                const words = match.text.split(/\s+/);
+                const normalizedSearch = this.normalizeText(match.text);
+
+                // Try to highlight with shorter chunks to get better visual feedback
+                if (words.length > 5) {
+                    // Create 5-word overlapping chunks
+                    const chunks = [];
+                    for (let i = 0; i < words.length - 4; i += 3) {
+                        chunks.push(words.slice(i, i + 5).join(' '));
+                    }
+
+                    // Highlight each chunk
+                    if (chunks.length > 0) {
+                        // Highlight the first chunk immediately
+                        this._pdfViewer.eventBus.dispatch('find', {
+                            query: chunks[0],
+                            phraseSearch: true,
+                            highlightAll: true,
+                            findPrevious: false,
+                        });
+
+                        // Highlight the rest with small delays to avoid conflicts
+                        chunks.slice(1).forEach((chunk, i) => {
+                            setTimeout(
+                                () => {
+                                    this._pdfViewer.eventBus.dispatch('find', {
+                                        query: chunk,
+                                        phraseSearch: true,
+                                        highlightAll: true,
+                                        findPrevious: false,
+                                    });
+                                },
+                                (i + 1) * 100
+                            );
+                        });
+                        return;
+                    }
+                }
+            }
+
+            // Standard highlighting for non-paragraph matches or short text
+            if (this._pdfViewer.findController) {
+                // For longer text, try to find the most unique phrases to highlight
+                if (match.text.length > 50) {
+                    const words = match.text.split(/\s+/);
+                    // Look for 3-5 word phrases that are likely to be unique
+                    let phraseToHighlight = match.text;
+
+                    if (words.length >= 5) {
+                        // Take a phrase from the middle of the text
+                        const middleIndex = Math.floor(words.length / 2);
+                        phraseToHighlight = words.slice(middleIndex - 2, middleIndex + 3).join(' ');
+                    }
+
+                    console.log(`Highlighting phrase: "${phraseToHighlight}"`);
+
+                    this._pdfViewer.eventBus.dispatch('find', {
+                        query: phraseToHighlight,
+                        phraseSearch: true,
+                        highlightAll: true,
+                        findPrevious: false,
+                    });
+                } else {
+                    // For shorter text, use the entire match
+                    this._pdfViewer.eventBus.dispatch('find', {
+                        query: match.text,
+                        phraseSearch: true,
+                        highlightAll: true,
+                        findPrevious: false,
+                    });
+                }
+            }
+        }
+    }
+
+    // Navigate to next fuzzy match
+    private nextFuzzyMatch(): boolean {
+        if (this._fuzzySearchResults.length === 0) return false;
+
+        this._currentFuzzyMatchIndex = (this._currentFuzzyMatchIndex + 1) % this._fuzzySearchResults.length;
+        this.navigateToFuzzyMatch(this._currentFuzzyMatchIndex);
+        return true;
+    }
+
+    // Navigate to previous fuzzy match
+    private prevFuzzyMatch(): boolean {
+        if (this._fuzzySearchResults.length === 0) return false;
+
+        this._currentFuzzyMatchIndex = (this._currentFuzzyMatchIndex - 1 + this._fuzzySearchResults.length) % this._fuzzySearchResults.length;
+        this.navigateToFuzzyMatch(this._currentFuzzyMatchIndex);
+        return true;
+    }
+
     @action
     search = (searchString: string, bwd?: boolean, clear: boolean = false) => {
-        const findOpts = {
-            caseSensitive: false,
-            findPrevious: bwd,
-            highlightAll: true,
-            phraseSearch: true,
-            query: searchString,
-        };
         if (clear) {
+            this._fuzzySearchResults = [];
             this._pdfViewer?.eventBus.dispatch('findbarclose', {});
-        } else if (!searchString) {
+            return true;
+        }
+
+        if (!searchString) {
             bwd ? this.prevAnnotation() : this.nextAnnotation();
-        } else if (this._pdfViewer?.pageViewsReady) {
-            this._pdfViewer?.eventBus.dispatch('find', { ...findOpts, type: 'again' });
-        } else if (this._mainCont.current) {
-            const executeFind = () => this._pdfViewer?.eventBus.dispatch('find', findOpts);
-            this._mainCont.current.addEventListener('pagesloaded', executeFind);
-            this._mainCont.current.addEventListener('pagerendered', executeFind);
+            return true;
         }
-        return true;
+
+        // If we already have fuzzy search results, navigate through them
+        if (this._fuzzySearchEnabled && this._fuzzySearchResults.length > 0) {
+            return bwd ? this.prevFuzzyMatch() : this.nextFuzzyMatch();
+        }
+
+        // For new search, decide between fuzzy and standard search
+        if (this._fuzzySearchEnabled) {
+            // Start fuzzy search
+            this.performFuzzySearch(searchString, bwd);
+            return true;
+        } else {
+            // Use original PDF.js search
+            const findOpts = {
+                caseSensitive: false,
+                findPrevious: bwd,
+                highlightAll: true,
+                phraseSearch: true,
+                query: searchString,
+            };
+
+            if (this._pdfViewer?.pageViewsReady) {
+                this._pdfViewer?.eventBus.dispatch('find', { ...findOpts, type: 'again' });
+            } else if (this._mainCont.current) {
+                const executeFind = () => this._pdfViewer?.eventBus.dispatch('find', findOpts);
+                this._mainCont.current.addEventListener('pagesloaded', executeFind);
+                this._mainCont.current.addEventListener('pagerendered', executeFind);
+            }
+            return true;
+        }
+    };
+
+    // Toggle fuzzy search mode
+    @action
+    toggleFuzzySearch = (): boolean => {
+        this._fuzzySearchEnabled = !this._fuzzySearchEnabled;
+        return this._fuzzySearchEnabled;
     };
 
     @action
-- 
cgit v1.2.3-70-g09d2


From 440042bbb2221ee5714482f9fb7ee7027d91e914 Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Mon, 12 May 2025 15:54:15 -0400
Subject: even better

---
 .../nodes/chatbot/chatboxcomponents/ChatBox.tsx    | 188 ++++++++++++++++-----
 .../nodes/chatbot/utils/AgentDocumentManager.ts    |   6 +-
 2 files changed, 152 insertions(+), 42 deletions(-)

(limited to 'src/client/views/nodes/chatbot/utils')

diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index af689f243..91a7adf24 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -16,12 +16,14 @@ import { v4 as uuidv4 } from 'uuid';
 import { ClientUtils, OmitKeys } from '../../../../../ClientUtils';
 import { Doc, DocListCast, Opt } from '../../../../../fields/Doc';
 import { DocData, DocLayout, DocViews } from '../../../../../fields/DocSymbols';
+import { Id } from '../../../../../fields/FieldSymbols';
 import { RichTextField } from '../../../../../fields/RichTextField';
 import { ScriptField } from '../../../../../fields/ScriptField';
 import { CsvCast, DocCast, NumCast, PDFCast, RTFCast, StrCast, VideoCast, AudioCast } from '../../../../../fields/Types';
 import { DocUtils } from '../../../../documents/DocUtils';
 import { CollectionViewType, DocumentType } from '../../../../documents/DocumentTypes';
 import { Docs, DocumentOptions } from '../../../../documents/Documents';
+import { DocServer } from '../../../../DocServer';
 import { DocumentManager } from '../../../../util/DocumentManager';
 import { ImageUtils } from '../../../../util/Import & Export/ImageUtils';
 import { LinkManager } from '../../../../util/LinkManager';
@@ -643,8 +645,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
             });
 
             // Get the simplified chunk using the document manager
-            const { foundChunk, doc } = this.docManager.getSimplifiedChunkById(chunkId);
+            const { foundChunk, doc, dataDoc } = this.docManager.getSimplifiedChunkById(chunkId);
             console.log('doc: ', doc);
+            console.log('dataDoc: ', dataDoc);
             if (!foundChunk) {
                 if (doc) {
                     console.warn(`Chunk not found in document, ${doc.id}, for chunk ID: ${chunkId}`);
@@ -673,7 +676,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                     DocumentManager.Instance.showDocument(doc, { openLocation: OpenWhere.addRight }, () => {});
                     return;
                 }
-                this.handleOtherChunkTypes(foundChunk, citation, doc);
+                this.handleOtherChunkTypes(foundChunk, citation, doc, dataDoc);
                 // Show the chunk text in citation popup
                 let chunkText = citation.direct_text || 'Text content not available';
                 this.showCitationPopup(chunkText);
@@ -795,7 +798,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
      * @param citation The citation object.
      * @param doc The document containing the chunk.
      */
-    handleOtherChunkTypes = (foundChunk: SimplifiedChunk, citation: Citation, doc: Doc) => {
+    handleOtherChunkTypes = (foundChunk: SimplifiedChunk, citation: Citation, doc: Doc, dataDoc?: Doc) => {
         switch (foundChunk.chunkType) {
             case CHUNK_TYPE.IMAGE:
             case CHUNK_TYPE.TABLE:
@@ -838,45 +841,10 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                 this.startCitationPopupTimer();
 
                 // Check if the document is a PDF (has a PDF viewer component)
-                const isPDF = PDFCast(doc.data) !== null || doc.type === DocumentType.PDF;
+                const isPDF = PDFCast(dataDoc!.data) !== null || dataDoc!.type === DocumentType.PDF;
 
-                DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
-                    // Add a delay to ensure document is fully loaded and rendered
-                    setTimeout(() => {
-                        try {
-                            // Safety check: ensure the document has views
-                            if (!doc[DocViews] || doc[DocViews].size === 0) {
-                                console.warn('Document views not available yet, retrying...');
-                                this.retryPdfSearch(doc, citation, foundChunk, isPDF, 1);
-                                return;
-                            }
-
-                            const views = Array.from(doc[DocViews]);
-                            if (!views.length) {
-                                console.warn('No document views found, retrying...');
-                                this.retryPdfSearch(doc, citation, foundChunk, isPDF, 1);
-                                return;
-                            }
-
-                            const firstView = views[0] as DocumentView;
-                            if (!firstView || !firstView.ComponentView) {
-                                console.warn('Component view not available yet, retrying...');
-                                this.retryPdfSearch(doc, citation, foundChunk, isPDF, 1);
-                                return;
-                            }
-
-                            const pdfComponent = firstView.ComponentView as PDFBox;
-                            if (isPDF && pdfComponent && citation.direct_text) {
-                                // Use our helper to ensure fuzzy search is enabled and execute the search
-                                this.ensureFuzzySearchAndExecute(pdfComponent, citation.direct_text.trim(), foundChunk.startPage);
-                            }
-                        } catch (error) {
-                            console.error('Error accessing PDF component:', error);
-                            // Retry with exponential backoff
-                            this.retryPdfSearch(doc, citation, foundChunk, isPDF, 1);
-                        }
-                    }, 500); // Initial delay before first attempt
-                });
+                // First ensure document is fully visible before trying to access its views
+                this.ensureDocumentIsVisible(dataDoc!, isPDF, citation, foundChunk, doc);
                 break;
             case CHUNK_TYPE.CSV:
             case CHUNK_TYPE.URL:
@@ -890,6 +858,144 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         }
     };
 
+    /**
+     * Ensures a document is fully visible and rendered before performing actions on it
+     * @param doc The document to ensure is visible
+     * @param isPDF Whether this is a PDF document
+     * @param citation The citation information
+     * @param foundChunk The chunk information
+     * @param doc The document to ensure is visible
+     */
+    ensureDocumentIsVisible = (doc: Doc, isPDF: boolean, citation: Citation, foundChunk: SimplifiedChunk, layoutDoc?: Doc) => {
+        try {
+            // First, check if the document already has views and is rendered
+            const hasViews = doc[DocViews] && doc[DocViews].size > 0;
+
+            console.log(`Document ${doc.id}: Current state - hasViews: ${hasViews}, isPDF: ${isPDF}`);
+
+            if (hasViews) {
+                // Document is already rendered, proceed with accessing its view
+                this.processPDFDocumentView(doc, isPDF, citation, foundChunk);
+                return;
+            } else if (layoutDoc) {
+                this.processPDFDocumentView(layoutDoc, isPDF, citation, foundChunk);
+                return;
+            }
+
+            // If document is not rendered yet, show it and wait for it to be ready
+            console.log(`Document ${doc.id} needs to be shown first`);
+
+            // Force document to be rendered by using willZoomCentered: true
+            DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
+                // Wait a bit for the document to be fully rendered (longer than our previous attempts)
+                setTimeout(() => {
+                    // Now manually check if document view exists and is valid
+                    this.verifyAndProcessDocumentView(doc, isPDF, citation, foundChunk, 1);
+                }, 800); // Increased initial delay
+            });
+        } catch (error) {
+            console.error('Error ensuring document visibility:', error);
+            // Show the document anyway as a fallback
+            DocumentManager.Instance.showDocument(doc, { willZoomCentered: true });
+        }
+    };
+
+    /**
+     * Verifies document view exists and processes it, with retries if needed
+     */
+    verifyAndProcessDocumentView = (doc: Doc, isPDF: boolean, citation: Citation, foundChunk: SimplifiedChunk, attempt: number) => {
+        // Diagnostic info
+        console.log(`Verify attempt ${attempt}: Document views for ${doc.id}:`, doc[DocViews] ? `Found ${doc[DocViews].size} views` : 'No views');
+
+        // Double-check document exists in current document system
+        const docExists = DocServer.GetCachedRefField(doc[Id]) !== undefined;
+        if (!docExists) {
+            console.warn(`Document ${doc.id} no longer exists in document system`);
+            return;
+        }
+
+        try {
+            if (!doc[DocViews] || doc[DocViews].size === 0) {
+                if (attempt >= 5) {
+                    console.error(`Maximum verification attempts (${attempt}) reached for document ${doc.id}`);
+
+                    // Last resort: force re-creation of the document view
+                    if (isPDF) {
+                        console.log('Forcing document recreation as last resort');
+                        DocumentManager.Instance.showDocument(doc, {
+                            willZoomCentered: true,
+                        });
+                    }
+                    return;
+                }
+
+                // Let's try explicitly requesting the document be shown again
+                if (attempt > 2) {
+                    console.log(`Attempt ${attempt}: Re-requesting document be shown`);
+                    DocumentManager.Instance.showDocument(doc, {
+                        willZoomCentered: true,
+                        openLocation: attempt % 2 === 0 ? OpenWhere.addRight : undefined,
+                    });
+                }
+
+                // Use exponential backoff for retries
+                const nextDelay = Math.min(2000, 500 * Math.pow(1.5, attempt));
+                console.log(`Scheduling retry ${attempt + 1} in ${nextDelay}ms`);
+
+                setTimeout(() => {
+                    this.verifyAndProcessDocumentView(doc, isPDF, citation, foundChunk, attempt + 1);
+                }, nextDelay);
+                return;
+            }
+
+            this.processPDFDocumentView(doc, isPDF, citation, foundChunk);
+        } catch (error) {
+            console.error(`Error on verification attempt ${attempt}:`, error);
+            if (attempt < 5) {
+                setTimeout(
+                    () => {
+                        this.verifyAndProcessDocumentView(doc, isPDF, citation, foundChunk, attempt + 1);
+                    },
+                    500 * Math.pow(1.5, attempt)
+                );
+            }
+        }
+    };
+
+    /**
+     * Processes a PDF document view once we're sure it exists
+     */
+    processPDFDocumentView = (doc: Doc, isPDF: boolean, citation: Citation, foundChunk: SimplifiedChunk) => {
+        try {
+            const views = Array.from(doc[DocViews] || []);
+            if (!views.length) {
+                console.warn('No document views found in document that should have views');
+                return;
+            }
+
+            const firstView = views[0] as DocumentView;
+            if (!firstView) {
+                console.warn('First view is invalid');
+                return;
+            }
+
+            console.log(`Successfully found document view for ${doc.id}:`, firstView.ComponentView ? `Component: ${firstView.ComponentView.constructor.name}` : 'No component view');
+
+            if (!firstView.ComponentView) {
+                console.warn('Component view not available');
+                return;
+            }
+
+            // For PDF documents, perform fuzzy search
+            if (isPDF && firstView.ComponentView && citation.direct_text) {
+                const pdfComponent = firstView.ComponentView as PDFBox;
+                this.ensureFuzzySearchAndExecute(pdfComponent, citation.direct_text.trim(), foundChunk.startPage);
+            }
+        } catch (error) {
+            console.error('Error processing PDF document view:', error);
+        }
+    };
+
     /**
      * Creates an annotation highlight on a PDF document for image citations.
      * @param x1 X-coordinate of the top-left corner of the highlight.
diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
index 784e90c3c..33eec5972 100644
--- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -1007,6 +1007,10 @@ export class AgentDocumentManager {
         return docInfo?.layoutDoc;
     }
 
+    public getDataDocument(docId: string): Doc | undefined {
+        const docInfo = this.documentsById.get(docId);
+        return docInfo?.dataDoc;
+    }
     /**
      * Adds simplified chunks to a document for citation handling
      * @param doc The document to add simplified chunks to
@@ -1074,7 +1078,7 @@ export class AgentDocumentManager {
      */
     @action
     public getSimplifiedChunkById(chunkId: string): any | undefined {
-        return { foundChunk: this.simplifiedChunks.get(chunkId), doc: this.getDocument(this.simplifiedChunks.get(chunkId)?.doc_id || chunkId) };
+        return { foundChunk: this.simplifiedChunks.get(chunkId), doc: this.getDocument(this.simplifiedChunks.get(chunkId)?.doc_id || chunkId), dataDoc: this.getDataDocument(this.simplifiedChunks.get(chunkId)?.doc_id || chunkId) };
     }
 
     /**
-- 
cgit v1.2.3-70-g09d2


From 0e98320d3b237f1927b9f1367494dccd7f66eda9 Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Wed, 21 May 2025 12:38:55 -0400
Subject: Added codebase search and retrieval to Vectorstore

Summary indexing: Added functionality to embed and index file summaries from file_summaries.json in Pinecone
Vector search: Implemented semantic search to find the top 5 most relevant files for a query
Content retrieval: Added method to fetch full file content from file_content.json
API endpoints:
/getFileSummaries - Retrieves all file summaries
/getFileContent - Fetches file content by path
/getRawFileContent - Returns content as plain text to avoid JSON parsing errors
Error handling: Added comprehensive error handling and debugging throughout
Initialization: Implemented proper async initialization sequence with verification
Performance: Added streaming for large files to improve memory efficiency
Testing: Added automated test queries to validate functionality
---
 .../nodes/chatbot/chatboxcomponents/ChatBox.tsx    |   8 +-
 src/client/views/nodes/chatbot/tools/RAGTool.ts    |   8 +-
 src/client/views/nodes/chatbot/tools/SearchTool.ts |   2 +-
 .../nodes/chatbot/tools/WebsiteInfoScraperTool.ts  |   6 +-
 .../nodes/chatbot/utils/AgentDocumentManager.ts    |  20 +-
 .../views/nodes/chatbot/vectorstore/Vectorstore.ts | 493 ++++++++++++++++++++-
 src/server/ApiManagers/AssistantManager.ts         | 180 ++++++++
 7 files changed, 701 insertions(+), 16 deletions(-)

(limited to 'src/client/views/nodes/chatbot/utils')

diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index 91a7adf24..470f94a8d 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -164,7 +164,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
             });
 
             // Process the document first to ensure it has a valid ID
-            this.docManager.processDocument(newLinkedDoc);
+            await this.docManager.processDocument(newLinkedDoc);
 
             // Add the document to the vectorstore which will also register chunks
             await this.vectorstore.addAIDoc(newLinkedDoc, this.updateProgress);
@@ -648,7 +648,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
             const { foundChunk, doc, dataDoc } = this.docManager.getSimplifiedChunkById(chunkId);
             console.log('doc: ', doc);
             console.log('dataDoc: ', dataDoc);
-            if (!foundChunk) {
+            if (!foundChunk || !doc) {
                 if (doc) {
                     console.warn(`Chunk not found in document, ${doc.id}, for chunk ID: ${chunkId}`);
                     DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
@@ -1102,8 +1102,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
 
         // If there are stored doc IDs in our list of docs to add, process them
         if (this._linked_docs_to_add.size > 0) {
-            this._linked_docs_to_add.forEach(doc => {
-                this.docManager.processDocument(doc);
+            this._linked_docs_to_add.forEach(async doc => {
+                await this.docManager.processDocument(doc);
             });
         }
     }
diff --git a/src/client/views/nodes/chatbot/tools/RAGTool.ts b/src/client/views/nodes/chatbot/tools/RAGTool.ts
index 90b803d21..af44de520 100644
--- a/src/client/views/nodes/chatbot/tools/RAGTool.ts
+++ b/src/client/views/nodes/chatbot/tools/RAGTool.ts
@@ -12,6 +12,12 @@ const ragToolParams = [
         description: "A detailed prompt representing an ideal chunk to embed and compare against document vectors to retrieve the most relevant content for answering the user's query.",
         required: true,
     },
+    {
+        name: 'doc_ids',
+        type: 'string[]',
+        description: 'An optional array of document IDs to retrieve chunks from. If you want to retrieve chunks from all documents, leave this as an empty array: [] (DO NOT LEAVE THIS EMPTY).',
+        required: false,
+    },
 ] as const;
 
 type RAGToolParamsType = typeof ragToolParams;
@@ -69,7 +75,7 @@ export class RAGTool extends BaseTool<RAGToolParamsType> {
     }
 
     async execute(args: ParametersType<RAGToolParamsType>): Promise<Observation[]> {
-        const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk);
+        const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk, undefined, args.doc_ids ?? undefined);
         const formattedChunks = await this.getFormattedChunks(relevantChunks);
         return formattedChunks;
     }
diff --git a/src/client/views/nodes/chatbot/tools/SearchTool.ts b/src/client/views/nodes/chatbot/tools/SearchTool.ts
index 43f14ea83..8e6edce8c 100644
--- a/src/client/views/nodes/chatbot/tools/SearchTool.ts
+++ b/src/client/views/nodes/chatbot/tools/SearchTool.ts
@@ -22,7 +22,7 @@ type SearchToolParamsType = typeof searchToolParams;
 
 const searchToolInfo: ToolInfo<SearchToolParamsType> = {
     name: 'searchTool',
-    citationRules: 'No citation needed. Cannot cite search results for a response. Use web scraping tools to cite specific information.',
+    citationRules: 'Always cite the search results for a response, if the search results are relevant to the response. Use the chunk_id to cite the search results. If the search results are not relevant to the response, do not cite them.   ',
     parameterRules: searchToolParams,
     description: 'Search the web to find a wide range of websites related to a query or multiple queries. Returns a list of websites and their overviews based on the search queries.',
 };
diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
index 495a985cb..727d35e2c 100644
--- a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
+++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
@@ -22,6 +22,7 @@ const websiteInfoScraperToolInfo: ToolInfo<WebsiteInfoScraperToolParamsType> = {
     name: 'websiteInfoScraper',
     description: 'Scrape detailed information from specific websites relevant to the user query. Returns the text content of the webpages for further analysis and grounding.',
     citationRules: `
+      !IMPORTANT! THESE CHUNKS REPLACE THE CHUNKS THAT ARE RETURNED FROM THE SEARCHTOOL.
       Your task is to provide a comprehensive response to the user's prompt using the content scraped from relevant websites. Ensure you follow these guidelines for structuring your response:
 
       1. Grounded Text Tag Structure:
@@ -88,6 +89,7 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
         console.log(url);
         console.log(chunkDoc);
         console.log(chunkDoc.data);
+        const id = chunkDoc.id;
         // Validate URL format
         try {
             new URL(url); // This will throw if URL is invalid
@@ -130,7 +132,7 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
                     if (retryCount === maxRetries) {
                         return {
                             type: 'text',
-                            text: `<chunk chunk_id="${chunkDoc.id}" chunk_type="url">\n${website_plain_text}\nNote: Limited content was retrieved from this URL.\n</chunk>`,
+                            text: `<chunk chunk_id="${id}" chunk_type="url">\n${website_plain_text}\nNote: Limited content was retrieved from this URL.\n</chunk>`,
                         } as Observation;
                     }
 
@@ -142,7 +144,7 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
                 // Process and return content if it looks good
                 return {
                     type: 'text',
-                    text: `<chunk chunk_id="${chunkDoc.id}" chunk_type="url">\n${website_plain_text}\n</chunk>`,
+                    text: `<chunk chunk_id="${id}" chunk_type="url">\n${website_plain_text}\n</chunk>`,
                 } as Observation;
             } catch (error) {
                 lastError = error instanceof Error ? error.message : 'Unknown error';
diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
index 33eec5972..3c8b49f33 100644
--- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -153,9 +153,9 @@ export class AgentDocumentManager {
                 console.log(`Found ${linkedDocs.length} linked documents via LinkManager`);
 
                 // Process the linked documents
-                linkedDocs.forEach((doc: Doc | undefined) => {
+                linkedDocs.forEach(async (doc: Doc | undefined) => {
                     if (doc) {
-                        this.processDocument(doc);
+                        await this.processDocument(doc);
                         console.log('Processed linked document:', doc[Id], doc.title, doc.type);
                     }
                 });
@@ -170,7 +170,7 @@ export class AgentDocumentManager {
      * @param doc The document to process
      */
     @action
-    public processDocument(doc: Doc): string {
+    public async processDocument(doc: Doc): Promise<string> {
         // Ensure document has a persistent ID
         const docId = this.ensureDocumentId(doc);
         if (doc.chunk_simplified) {
@@ -900,7 +900,7 @@ export class AgentDocumentManager {
                     }
                 });
 
-                const id = this.processDocument(doc);
+                const id = await this.processDocument(doc);
                 return id;
             } else {
                 throw new Error(`Error creating document. Created document not found.`);
@@ -1081,6 +1081,18 @@ export class AgentDocumentManager {
         return { foundChunk: this.simplifiedChunks.get(chunkId), doc: this.getDocument(this.simplifiedChunks.get(chunkId)?.doc_id || chunkId), dataDoc: this.getDataDocument(this.simplifiedChunks.get(chunkId)?.doc_id || chunkId) };
     }
 
+    public getChunkIdsFromDocIds(docIds: string[]): string[] {
+        return docIds
+            .map(docId => {
+                for (const chunk of this.simplifiedChunks.values()) {
+                    if (chunk.doc_id === docId) {
+                        return chunk.chunkId;
+                    }
+                }
+            })
+            .filter(chunkId => chunkId !== undefined) as string[];
+    }
+
     /**
      * Gets the original segments from a media document
      * @param doc The document containing original media segments
diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
index 252672dfc..5c2d0e5ea 100644
--- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
@@ -27,11 +27,16 @@ dotenv.config();
 export class Vectorstore {
     private pinecone!: Pinecone; // Pinecone client for managing the vector index.
     private index!: Index; // The specific Pinecone index used for document chunks.
+    private summaryIndex!: Index; // The Pinecone index used for file summaries.
     private openai!: OpenAI; // OpenAI client for generating embeddings.
     private indexName: string = 'pdf-chatbot'; // Default name for the index.
+    private summaryIndexName: string = 'file-summaries'; // Name for the summaries index.
     private _id!: string; // Unique ID for the Vectorstore instance.
     private docManager!: AgentDocumentManager; // Document manager for handling documents
+    private summaryCacheCount: number = 0; // Cache for the number of summaries
     documents: AI_Document[] = []; // Store the documents indexed in the vectorstore.
+    private debug: boolean = true; // Enable debugging
+    private initialized: boolean = false;
 
     /**
      * Initializes the Pinecone and OpenAI clients, sets up the document ID list,
@@ -40,6 +45,7 @@ export class Vectorstore {
      * @param docManager An instance of AgentDocumentManager to handle document management.
      */
     constructor(id: string, docManager: AgentDocumentManager) {
+        if (this.debug) console.log(`[DEBUG] Initializing Vectorstore with ID: ${id}`);
         const pineconeApiKey = 'pcsk_3txLxJ_9fxdmAph4csnq4yxoDF5De5A8bJvjWaXXigBgshy4eoXggrXcxATJiH8vzXbrKm';
         if (!pineconeApiKey) {
             console.log('PINECONE_API_KEY is not defined - Vectorstore will be unavailable');
@@ -51,7 +57,32 @@ export class Vectorstore {
         this.openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, dangerouslyAllowBrowser: true });
         this._id = id;
         this.docManager = docManager;
-        this.initializeIndex();
+
+        // Proper async initialization sequence
+        this.initializeAsync(id);
+    }
+
+    /**
+     * Handles async initialization of all components
+     */
+    private async initializeAsync(id: string) {
+        try {
+            if (this.debug) console.log(`[DEBUG] Starting async initialization sequence for Vectorstore ID: ${id}`);
+
+            // Initialize the main document index
+            await this.initializeIndex();
+
+            // Initialize the summary index
+            await this.initializeSummaryIndex();
+
+            this.initialized = true;
+            if (this.debug) console.log(`[DEBUG] ✅ Vectorstore initialization complete, running test query...`);
+
+            // Run a single test query instead of multiple
+            await this.runSingleTestQuery();
+        } catch (error) {
+            console.error('[ERROR] Failed to initialize Vectorstore:', error);
+        }
     }
 
     /**
@@ -59,10 +90,13 @@ export class Vectorstore {
      * Sets the index to use cosine similarity for vector similarity calculations.
      */
     private async initializeIndex() {
+        if (this.debug) console.log(`[DEBUG] Initializing main document index: ${this.indexName}`);
         const indexList: IndexList = await this.pinecone.listIndexes();
+        if (this.debug) console.log(`[DEBUG] Available Pinecone indexes: ${indexList.indexes?.map(i => i.name).join(', ') || 'none'}`);
 
         // Check if the index already exists, otherwise create it.
         if (!indexList.indexes?.some(index => index.name === this.indexName)) {
+            if (this.debug) console.log(`[DEBUG] Creating new index: ${this.indexName}`);
             await this.pinecone.createIndex({
                 name: this.indexName,
                 dimension: 3072,
@@ -74,12 +108,462 @@ export class Vectorstore {
                     },
                 },
             });
+            if (this.debug) console.log(`[DEBUG] ✅ Index ${this.indexName} created successfully`);
+        } else {
+            if (this.debug) console.log(`[DEBUG] ✅ Using existing index: ${this.indexName}`);
         }
 
         // Set the index for future use.
         this.index = this.pinecone.Index(this.indexName);
     }
 
+    /**
+     * Initializes the Pinecone index for file summaries.
+     * Checks if it exists and creates it if necessary.
+     */
+    private async initializeSummaryIndex() {
+        if (this.debug) console.log(`[DEBUG] Initializing file summaries index: ${this.summaryIndexName}`);
+        const indexList: IndexList = await this.pinecone.listIndexes();
+
+        // Check if the index already exists, otherwise create it.
+        if (!indexList.indexes?.some(index => index.name === this.summaryIndexName)) {
+            if (this.debug) console.log(`[DEBUG] Creating new summary index: ${this.summaryIndexName}`);
+            await this.pinecone.createIndex({
+                name: this.summaryIndexName,
+                dimension: 3072,
+                metric: 'cosine',
+                spec: {
+                    serverless: {
+                        cloud: 'aws',
+                        region: 'us-east-1',
+                    },
+                },
+            });
+            if (this.debug) console.log(`[DEBUG] ✅ Summary index ${this.summaryIndexName} created successfully`);
+        } else {
+            if (this.debug) console.log(`[DEBUG] ✅ Using existing summary index: ${this.summaryIndexName}`);
+        }
+
+        // Set the summaries index for future use.
+        this.summaryIndex = this.pinecone.Index(this.summaryIndexName);
+
+        // Check if we need to index the file summaries
+        await this.processFileSummaries();
+    }
+
+    /**
+     * Processes file summaries from the JSON file if needed.
+     * Checks if the index contains the correct number of summaries before embedding.
+     */
+    private async processFileSummaries() {
+        if (this.debug) console.log(`[DEBUG] Starting file summaries processing`);
+        try {
+            // Get file summaries from the server
+            if (this.debug) console.log(`[DEBUG] Fetching file summaries from server...`);
+            const response = await Networking.FetchFromServer('/getFileSummaries');
+
+            if (!response) {
+                console.error('[ERROR] Failed to fetch file summaries');
+                return;
+            }
+            if (this.debug) console.log(`[DEBUG] File summaries response received (${response.length} bytes)`);
+
+            const summaries = JSON.parse(response);
+            const filepaths = Object.keys(summaries);
+            const summaryCount = filepaths.length;
+            this.summaryCacheCount = summaryCount;
+
+            if (this.debug) {
+                console.log(`[DEBUG] File summaries parsed: ${summaryCount} files`);
+                console.log(`[DEBUG] Sample filepaths: ${filepaths.slice(0, 3).join(', ')}...`);
+                console.log(`[DEBUG] Sample summary: "${summaries[filepaths[0]].substring(0, 100)}..."`);
+            }
+
+            // Check if index already has the correct number of summaries
+            try {
+                if (this.debug) console.log(`[DEBUG] Checking summary index stats...`);
+                const indexStats = await this.summaryIndex.describeIndexStats();
+                const vectorCount = indexStats.totalRecordCount;
+
+                if (this.debug) console.log(`[DEBUG] Summary index has ${vectorCount} records, expecting ${summaryCount}`);
+
+                if (vectorCount === summaryCount) {
+                    console.log(`[DEBUG] ✅ Summary index already contains ${vectorCount} entries, skipping embedding.`);
+                    return;
+                }
+
+                if (this.debug) console.log(`[DEBUG] ⚠️ Summary index contains ${vectorCount} entries, but there are ${summaryCount} summaries. Re-indexing.`);
+            } catch (error) {
+                console.error('[ERROR] Error checking summary index stats:', error);
+            }
+
+            // If we get here, we need to embed the summaries
+            await this.embedAndIndexFileSummaries(summaries);
+        } catch (error) {
+            console.error('[ERROR] Error processing file summaries:', error);
+        }
+    }
+
+    /**
+     * Embeds and indexes file summaries into the summary index.
+     * @param summaries Object mapping filepaths to summaries
+     */
+    private async embedAndIndexFileSummaries(summaries: Record<string, string>) {
+        if (this.debug) console.log(`[DEBUG] Starting embedding and indexing of file summaries...`);
+
+        const filepaths = Object.keys(summaries);
+        const summaryTexts = Object.values(summaries);
+
+        // Split into batches of 100 to avoid exceeding API limits
+        const batchSize = 100;
+        const totalBatches = Math.ceil(filepaths.length / batchSize);
+
+        if (this.debug) console.log(`[DEBUG] Processing ${filepaths.length} files in ${totalBatches} batches of size ${batchSize}`);
+
+        for (let i = 0; i < filepaths.length; i += batchSize) {
+            const batchFilepaths = filepaths.slice(i, i + batchSize);
+            const batchTexts = summaryTexts.slice(i, i + batchSize);
+
+            if (this.debug) {
+                console.log(`[DEBUG] Processing batch ${Math.floor(i / batchSize) + 1}/${totalBatches}`);
+                console.log(`[DEBUG] First file in batch: ${batchFilepaths[0]}`);
+                console.log(`[DEBUG] First summary in batch: "${batchTexts[0].substring(0, 50)}..."`);
+            }
+
+            try {
+                // Generate embeddings for this batch
+                if (this.debug) console.log(`[DEBUG] Generating embeddings for batch of ${batchTexts.length} summaries...`);
+                const startTime = Date.now();
+                const embeddingResponse = await this.openai.embeddings.create({
+                    model: 'text-embedding-3-large',
+                    input: batchTexts,
+                    encoding_format: 'float',
+                });
+                const duration = Date.now() - startTime;
+                if (this.debug) console.log(`[DEBUG] ✅ Embeddings generated in ${duration}ms`);
+
+                // Prepare Pinecone records
+                if (this.debug) console.log(`[DEBUG] Preparing Pinecone records...`);
+                const pineconeRecords: PineconeRecord[] = batchTexts.map((text, index) => {
+                    const embedding = (embeddingResponse.data as Embedding[])[index].embedding;
+                    if (this.debug && index === 0) console.log(`[DEBUG] Sample embedding dimensions: ${embedding.length}, first few values: [${embedding.slice(0, 5).join(', ')}...]`);
+
+                    return {
+                        id: uuidv4(), // Generate a unique ID for each summary
+                        values: embedding,
+                        metadata: {
+                            filepath: batchFilepaths[index],
+                            summary: text,
+                        } as RecordMetadata,
+                    };
+                });
+
+                // Upload to Pinecone
+                if (this.debug) console.log(`[DEBUG] Upserting ${pineconeRecords.length} records to Pinecone...`);
+                const upsertStart = Date.now();
+                try {
+                    await this.summaryIndex.upsert(pineconeRecords);
+                    const upsertDuration = Date.now() - upsertStart;
+                    if (this.debug) console.log(`[DEBUG] ✅ Batch ${Math.floor(i / batchSize) + 1}/${totalBatches} indexed in ${upsertDuration}ms`);
+                } catch (upsertError) {
+                    console.error(`[ERROR] Failed to upsert batch ${Math.floor(i / batchSize) + 1}/${totalBatches} to Pinecone:`, upsertError);
+                    // Try again with smaller batch
+                    if (batchTexts.length > 20) {
+                        console.log(`[DEBUG] 🔄 Retrying with smaller batch size...`);
+                        // Split the batch in half and retry recursively
+                        const midpoint = Math.floor(batchTexts.length / 2);
+                        const firstHalf = {
+                            filepaths: batchFilepaths.slice(0, midpoint),
+                            texts: batchTexts.slice(0, midpoint),
+                        };
+                        const secondHalf = {
+                            filepaths: batchFilepaths.slice(midpoint),
+                            texts: batchTexts.slice(midpoint),
+                        };
+
+                        // Create a helper function to retry smaller batches
+                        const retryBatch = async (paths: string[], texts: string[], batchNum: string) => {
+                            try {
+                                if (this.debug) console.log(`[DEBUG] Generating embeddings for sub-batch ${batchNum}...`);
+                                const embRes = await this.openai.embeddings.create({
+                                    model: 'text-embedding-3-large',
+                                    input: texts,
+                                    encoding_format: 'float',
+                                });
+
+                                const records = texts.map((t, idx) => ({
+                                    id: uuidv4(),
+                                    values: (embRes.data as Embedding[])[idx].embedding,
+                                    metadata: {
+                                        filepath: paths[idx],
+                                        summary: t,
+                                    } as RecordMetadata,
+                                }));
+
+                                if (this.debug) console.log(`[DEBUG] Upserting sub-batch ${batchNum} (${records.length} records)...`);
+                                await this.summaryIndex.upsert(records);
+                                if (this.debug) console.log(`[DEBUG] ✅ Sub-batch ${batchNum} upserted successfully`);
+                            } catch (retryError) {
+                                console.error(`[ERROR] Failed to upsert sub-batch ${batchNum}:`, retryError);
+                            }
+                        };
+
+                        await retryBatch(firstHalf.filepaths, firstHalf.texts, `${Math.floor(i / batchSize) + 1}.1`);
+                        await retryBatch(secondHalf.filepaths, secondHalf.texts, `${Math.floor(i / batchSize) + 1}.2`);
+                    }
+                }
+            } catch (error) {
+                console.error('[ERROR] Error processing batch:', error);
+            }
+        }
+
+        if (this.debug) console.log(`[DEBUG] ✅ File summary indexing complete for all ${filepaths.length} files`);
+
+        // Verify the index was populated correctly
+        try {
+            const indexStats = await this.summaryIndex.describeIndexStats();
+            const vectorCount = indexStats.totalRecordCount;
+            if (this.debug) console.log(`[DEBUG] 🔍 Final index verification: ${vectorCount} records in Pinecone index (expected ${filepaths.length})`);
+        } catch (error) {
+            console.error('[ERROR] Failed to verify index stats:', error);
+        }
+    }
+
+    /**
+     * Searches for file summaries similar to the given query.
+     * @param query The search query
+     * @param topK Number of results to return (default: 5)
+     * @returns Array of filepath and summary pairs with relevance scores
+     */
+    async searchFileSummaries(query: string, topK: number = 5): Promise<Array<{ filepath: string; summary: string; score?: number }>> {
+        if (!this.initialized) {
+            console.error('[ERROR] Cannot search - Vectorstore not fully initialized');
+            return [];
+        }
+
+        if (this.debug) console.log(`[DEBUG] Searching file summaries for query: "${query}" (topK=${topK})`);
+        try {
+            // Generate embedding for the query
+            if (this.debug) console.log(`[DEBUG] Generating embedding for query...`);
+            const startTime = Date.now();
+            const queryEmbeddingResponse = await this.openai.embeddings.create({
+                model: 'text-embedding-3-large',
+                input: query,
+                encoding_format: 'float',
+            });
+            const duration = Date.now() - startTime;
+
+            const queryEmbedding = queryEmbeddingResponse.data[0].embedding;
+            if (this.debug) {
+                console.log(`[DEBUG] ✅ Query embedding generated in ${duration}ms`);
+                console.log(`[DEBUG] Query embedding dimensions: ${queryEmbedding.length}`);
+            }
+
+            // Check if summary index is ready
+            try {
+                const indexStats = await this.summaryIndex.describeIndexStats();
+                const vectorCount = indexStats.totalRecordCount;
+                if (this.debug) console.log(`[DEBUG] Summary index contains ${vectorCount} records`);
+
+                if (vectorCount === 0) {
+                    console.error('[ERROR] Summary index is empty, cannot perform search');
+                    return [];
+                }
+            } catch (statsError) {
+                console.error('[ERROR] Failed to check summary index stats:', statsError);
+                console.error('[ERROR] Stats error details:', JSON.stringify(statsError));
+            }
+
+            // Test direct API access to Pinecone
+            if (this.debug) console.log(`[DEBUG] Testing Pinecone connection...`);
+            try {
+                const indexes = await this.pinecone.listIndexes();
+                console.log(`[DEBUG] Available Pinecone indexes: ${indexes.indexes?.map(idx => idx.name).join(', ')}`);
+            } catch (connectionError) {
+                console.error('[ERROR] Could not connect to Pinecone:', connectionError);
+            }
+
+            // Query the summaries index
+            if (this.debug) console.log(`[DEBUG] Querying Pinecone summary index (${this.summaryIndexName})...`);
+            const queryStart = Date.now();
+
+            let queryResponse;
+            try {
+                // First, make sure we can access the index
+                const indexInfo = await this.summaryIndex.describeIndexStats();
+                if (this.debug) console.log(`[DEBUG] Index stats:`, indexInfo);
+
+                queryResponse = await this.summaryIndex.query({
+                    vector: queryEmbedding,
+                    topK,
+                    includeMetadata: true,
+                });
+
+                const queryDuration = Date.now() - queryStart;
+
+                if (this.debug) {
+                    console.log(`[DEBUG] ✅ Pinecone query completed in ${queryDuration}ms`);
+                    console.log(`[DEBUG] Raw Pinecone response:`, JSON.stringify(queryResponse, null, 2));
+                    if (queryResponse.matches) {
+                        console.log(`[DEBUG] Found ${queryResponse.matches.length} matching summaries`);
+                        console.log(`[DEBUG] Match scores: ${queryResponse.matches.map(m => m.score?.toFixed(4)).join(', ')}`);
+                    } else {
+                        console.log(`[DEBUG] No matches in response`);
+                    }
+                }
+            } catch (queryError) {
+                console.error('[ERROR] Pinecone query failed:', queryError);
+                if (typeof queryError === 'object' && queryError !== null) {
+                    console.error('[ERROR] Query error details:', JSON.stringify(queryError, null, 2));
+                }
+                return [];
+            }
+
+            if (!queryResponse || !queryResponse.matches || queryResponse.matches.length === 0) {
+                console.log('[DEBUG] ⚠️ No matches found in Pinecone for query');
+                return [];
+            }
+
+            // Format results
+            const results = queryResponse.matches.map(match => {
+                if (!match.metadata) {
+                    console.error('[ERROR] Match is missing metadata:', match);
+                    return { filepath: 'unknown', summary: 'No summary available' };
+                }
+
+                return {
+                    filepath: (match.metadata as { filepath: string }).filepath || 'unknown',
+                    summary: (match.metadata as { summary: string }).summary || 'No summary available',
+                    score: match.score,
+                };
+            });
+
+            if (this.debug) {
+                if (results.length > 0) {
+                    console.log(`[DEBUG] Top result filepath: ${results[0]?.filepath}`);
+                    console.log(`[DEBUG] Top result score: ${results[0]?.score}`);
+                    console.log(`[DEBUG] Top result summary excerpt: "${results[0]?.summary?.substring(0, 100)}..."`);
+                } else {
+                    console.log(`[DEBUG] No results returned after processing`);
+                }
+            }
+
+            return results;
+        } catch (error) {
+            console.error('[ERROR] Error searching file summaries:', error);
+            if (typeof error === 'object' && error !== null) {
+                console.error('[ERROR] Full error details:', JSON.stringify(error, null, 2));
+            }
+            return [];
+        }
+    }
+
+    /**
+     * Runs a single test query after setup to validate the file summary search functionality.
+     */
+    private async runSingleTestQuery() {
+        console.log(`\n[TEST] Running single test query to validate file summary search functionality...`);
+
+        // Verify the index is accessible
+        try {
+            const indexStats = await this.summaryIndex.describeIndexStats();
+            console.log(`[TEST] Pinecone index stats:`, JSON.stringify(indexStats, null, 2));
+            console.log(`[TEST] Summary index contains ${indexStats.totalRecordCount} indexed summaries`);
+        } catch (error) {
+            console.error('[TEST] ❌ Failed to access Pinecone index:', error);
+            return;
+        }
+
+        // Add a brief delay to ensure Pinecone has finished processing
+        console.log('[TEST] Waiting 2 seconds for Pinecone indexing to complete...');
+        await new Promise(resolve => setTimeout(resolve, 2000));
+
+        // Run a single test query
+        const query = 'React components for the UI';
+        console.log(`\n[TEST] Executing query: "${query}"`);
+
+        try {
+            const results = await this.searchFileSummaries(query);
+            console.log(`[TEST] Search returned ${results.length} results:`);
+
+            results.forEach((result, i) => {
+                console.log(`\n[TEST] Result ${i + 1}:`);
+                console.log(`[TEST] File: ${result.filepath}`);
+                console.log(`[TEST] Score: ${result.score}`);
+                console.log(`[TEST] Summary: "${result.summary?.substring(0, 150)}..."`);
+            });
+
+            // If we have results, fetch the content for the first one
+            if (results.length > 0) {
+                const topFilepath = results[0].filepath;
+                console.log(`\n[TEST] Fetching full content for top result: ${topFilepath}`);
+                const content = await this.getFileContent(topFilepath);
+
+                if (content) {
+                    console.log(`[TEST] ✅ Content retrieved successfully (${content.length} chars)`);
+                    console.log(`[TEST] Content excerpt:\n---\n${content.substring(0, 300)}...\n---`);
+                } else {
+                    console.log(`[TEST] ❌ Failed to retrieve content for ${topFilepath}`);
+                }
+            } else {
+                console.log(`\n[TEST] ⚠️ No results to fetch content for`);
+            }
+
+            console.log(`\n[TEST] ✅ Test query completed`);
+        } catch (testError) {
+            console.error(`[TEST] ❌ Test query failed:`, testError);
+            if (typeof testError === 'object' && testError !== null) {
+                console.error('[TEST] Full error details:', JSON.stringify(testError, null, 2));
+            }
+        }
+    }
+
+    /**
+     * Gets the full content of a file by its filepath.
+     * @param filepath The filepath to look up
+     * @returns The file content or null if not found
+     */
+    async getFileContent(filepath: string): Promise<string | null> {
+        if (this.debug) console.log(`[DEBUG] Getting file content for: ${filepath}`);
+        try {
+            const startTime = Date.now();
+
+            // Use the Networking utility for consistent API access
+            // But convert the response to text manually to avoid JSON parsing
+            const rawResponse = await fetch('/getRawFileContent', {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json',
+                },
+                body: JSON.stringify({ filepath }),
+            });
+
+            if (!rawResponse.ok) {
+                const errorText = await rawResponse.text();
+                console.error(`[ERROR] Server returned error ${rawResponse.status}: ${errorText}`);
+                return null;
+            }
+
+            // Get the raw text content without JSON parsing
+            const content = await rawResponse.text();
+            const duration = Date.now() - startTime;
+
+            if (this.debug) {
+                console.log(`[DEBUG] ✅ File content retrieved in ${duration}ms`);
+                console.log(`[DEBUG] Content length: ${content.length} chars`);
+                console.log(`[DEBUG] Content excerpt: "${content.substring(0, 100)}..."`);
+            }
+
+            return content;
+        } catch (error) {
+            console.error('[ERROR] Error getting file content:', error);
+            if (typeof error === 'object' && error !== null) {
+                console.error('[ERROR] Full error details:', JSON.stringify(error, null, 2));
+            }
+            return null;
+        }
+    }
+
     /**
      * Adds an AI document to the vectorstore. Handles media file processing for audio/video,
      * and text embedding for all document types. Updates document metadata during processing.
@@ -303,7 +787,7 @@ export class Vectorstore {
      * @param topK The number of top results to return (default is 10).
      * @returns A list of document chunks that match the query.
      */
-    async retrieve(query: string, topK: number = 10): Promise<RAGChunk[]> {
+    async retrieve(query: string, topK: number = 10, docIds?: string[]): Promise<RAGChunk[]> {
         console.log(`Retrieving chunks for query: ${query}`);
         try {
             // Generate an embedding for the query using OpenAI.
@@ -314,15 +798,16 @@ export class Vectorstore {
             });
 
             const queryEmbedding = queryEmbeddingResponse.data[0].embedding;
+            const _docIds = docIds?.length === 0 || !docIds ? this.docManager.docIds : docIds;
 
-            console.log('Using document IDs for retrieval:', this.docManager.docIds);
+            console.log('Using document IDs for retrieval:', _docIds);
 
             // Query the Pinecone index using the embedding and filter by document IDs.
             // We'll query based on document IDs that are registered in the document manager
             const queryResponse: QueryResponse = await this.index.query({
                 vector: queryEmbedding,
                 filter: {
-                    doc_id: { $in: this.docManager.docIds },
+                    doc_id: { $in: _docIds },
                 },
                 topK,
                 includeValues: true,
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index b7ce4f663..9d0427b52 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -39,6 +39,7 @@ export enum Directory {
     csv = 'csv',
     chunk_images = 'chunk_images',
     scrape_images = 'scrape_images',
+    vectorstore = 'vectorstore',
 }
 
 // In-memory job tracking
@@ -92,6 +93,119 @@ export default class AssistantManager extends ApiManager {
         const customsearch = google.customsearch('v1');
         const openai = new OpenAI({ apiKey: env.OPENAI_API_KEY });
 
+        // Register an endpoint to retrieve file summaries from the json file
+        register({
+            method: Method.GET,
+            subscription: '/getFileSummaries',
+            secureHandler: async ({ req, res }) => {
+                try {
+                    // Read the file summaries JSON file
+                    const filePath = path.join(filesDirectory, Directory.vectorstore, 'file_summaries.json');
+
+                    if (!fs.existsSync(filePath)) {
+                        res.status(404).send({ error: 'File summaries not found' });
+                        return;
+                    }
+
+                    const data = fs.readFileSync(filePath, 'utf8');
+                    res.send(data);
+                } catch (error) {
+                    console.error('Error retrieving file summaries:', error);
+                    res.status(500).send({
+                        error: 'Failed to retrieve file summaries',
+                    });
+                }
+            },
+        });
+
+        // Register an endpoint to retrieve file content from the content json file
+        register({
+            method: Method.POST,
+            subscription: '/getFileContent',
+            secureHandler: async ({ req, res }) => {
+                const { filepath } = req.body;
+
+                if (!filepath) {
+                    res.status(400).send({ error: 'Filepath is required' });
+                    return;
+                }
+
+                try {
+                    // Read the file content JSON file
+                    const filePath = path.join(filesDirectory, Directory.vectorstore, 'file_content.json');
+
+                    if (!fs.existsSync(filePath)) {
+                        res.status(404).send({ error: 'File content database not found' });
+                        return;
+                    }
+
+                    console.log(`[DEBUG] Retrieving content for: ${filepath}`);
+
+                    // Read the JSON file in chunks to handle large files
+                    const readStream = fs.createReadStream(filePath, { encoding: 'utf8' });
+                    let jsonData = '';
+
+                    readStream.on('data', chunk => {
+                        jsonData += chunk;
+                    });
+
+                    readStream.on('end', () => {
+                        try {
+                            // Parse the JSON
+                            const contentMap = JSON.parse(jsonData);
+
+                            // Check if the filepath exists in the map
+                            if (!contentMap[filepath]) {
+                                console.log(`[DEBUG] Content not found for: ${filepath}`);
+                                res.status(404).send({ error: `Content not found for filepath: ${filepath}` });
+                                return;
+                            }
+
+                            // Return the file content as is, not as JSON
+                            console.log(`[DEBUG] Found content for: ${filepath} (${contentMap[filepath].length} chars)`);
+                            res.send(contentMap[filepath]);
+                        } catch (parseError) {
+                            console.error('Error parsing file_content.json:', parseError);
+                            res.status(500).send({
+                                error: 'Failed to parse file content database',
+                            });
+                        }
+                    });
+
+                    readStream.on('error', streamError => {
+                        console.error('Error reading file_content.json:', streamError);
+                        res.status(500).send({
+                            error: 'Failed to read file content database',
+                        });
+                    });
+                } catch (error) {
+                    console.error('Error retrieving file content:', error);
+                    res.status(500).send({
+                        error: 'Failed to retrieve file content',
+                    });
+                }
+            },
+        });
+
+        // Register an endpoint to search file summaries
+        register({
+            method: Method.POST,
+            subscription: '/searchFileSummaries',
+            secureHandler: async ({ req, res }) => {
+                const { query, topK } = req.body;
+
+                if (!query) {
+                    res.status(400).send({ error: 'Search query is required' });
+                    return;
+                }
+
+                // This endpoint will be called by the client-side Vectorstore to perform the search
+                // The actual search is implemented in the Vectorstore class
+
+                res.send({ message: 'This endpoint should be called through the Vectorstore class' });
+            },
+        });
+
         // Register Wikipedia summary API route
         register({
             method: Method.POST,
@@ -848,6 +962,72 @@ export default class AssistantManager extends ApiManager {
                 }
             },
         });
+
+        // Register an endpoint to retrieve raw file content as plain text (no JSON parsing)
+        register({
+            method: Method.POST,
+            subscription: '/getRawFileContent',
+            secureHandler: async ({ req, res }) => {
+                const { filepath } = req.body;
+
+                if (!filepath) {
+                    res.status(400).send('Filepath is required');
+                    return;
+                }
+
+                try {
+                    // Read the file content JSON file
+                    const filePath = path.join(filesDirectory, Directory.vectorstore, 'file_content.json');
+
+                    if (!fs.existsSync(filePath)) {
+                        res.status(404).send('File content database not found');
+                        return;
+                    }
+
+                    console.log(`[DEBUG] Retrieving raw content for: ${filepath}`);
+
+                    // Read the JSON file
+                    const readStream = fs.createReadStream(filePath, { encoding: 'utf8' });
+                    let jsonData = '';
+
+                    readStream.on('data', chunk => {
+                        jsonData += chunk;
+                    });
+
+                    readStream.on('end', () => {
+                        try {
+                            // Parse the JSON
+                            const contentMap = JSON.parse(jsonData);
+
+                            // Check if the filepath exists in the map
+                            if (!contentMap[filepath]) {
+                                console.log(`[DEBUG] Content not found for: ${filepath}`);
+                                res.status(404).send(`Content not found for filepath: ${filepath}`);
+                                return;
+                            }
+
+                            // Set content type to plain text to avoid JSON parsing
+                            res.setHeader('Content-Type', 'text/plain');
+
+                            // Return the file content as plain text
+                            console.log(`[DEBUG] Found content for: ${filepath} (${contentMap[filepath].length} chars)`);
+                            res.send(contentMap[filepath]);
+                        } catch (parseError) {
+                            console.error('Error parsing file_content.json:', parseError);
+                            res.status(500).send('Failed to parse file content database');
+                        }
+                    });
+
+                    readStream.on('error', streamError => {
+                        console.error('Error reading file_content.json:', streamError);
+                        res.status(500).send('Failed to read file content database');
+                    });
+                } catch (error) {
+                    console.error('Error retrieving file content:', error);
+                    res.status(500).send('Failed to retrieve file content');
+                }
+            },
+        });
     }
 }
 
-- 
cgit v1.2.3-70-g09d2