From 321977e670cbdf10f6c49fc9071e3260a8bd4aae Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 24 Apr 2025 12:06:11 -0400 Subject: Linking docs now works with visible docs --- .../nodes/chatbot/utils/AgentDocumentManager.ts | 923 +++++++++++++++++++++ 1 file changed, 923 insertions(+) create mode 100644 src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts (limited to 'src/client/views/nodes/chatbot/utils') diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts new file mode 100644 index 000000000..c954226e4 --- /dev/null +++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts @@ -0,0 +1,923 @@ +import { ChatBox } from '../chatboxcomponents/ChatBox'; +import { Doc, FieldType, Opt } from '../../../../../fields/Doc'; +import { DocData } from '../../../../../fields/DocSymbols'; +import { Observation } from '../types/types'; +import { ParametersType, ToolInfo, Parameter } from '../types/tool_types'; +import { BaseTool } from '../tools/BaseTool'; +import { Docs, DocumentOptions } from '../../../../documents/Documents'; +import { CollectionFreeFormDocumentView } from '../../CollectionFreeFormDocumentView'; +import { v4 as uuidv4 } from 'uuid'; +import { LinkManager, UPDATE_SERVER_CACHE } from '../../../../util/LinkManager'; +import { DocCast, StrCast } from '../../../../../fields/Types'; +import { supportedDocTypes } from '../types/tool_types'; +import { parsedDoc } from '../chatboxcomponents/ChatBox'; +import { faThumbTackSlash } from '@fortawesome/free-solid-svg-icons'; +import { DocumentManager } from '../../../../util/DocumentManager'; +import { DocumentView } from '../../DocumentView'; + +/** + * Interface representing a document in the freeform view + */ +interface AgentDocument { + layoutDoc: Doc; + dataDoc: Doc; +} + +/** + * Class to manage documents in a freeform view + */ +export class AgentDocumentManager { + private documentsById: Map; + private chatBox: ChatBox; + private chatBoxDocument: Doc | null = null; + private fieldMetadata: Record = {}; + private readonly DOCUMENT_ID_FIELD = '_dash_document_id'; + + /** + * Creates a new DocumentManager + * @param templateDocument The document that serves as a template for new documents + */ + constructor(chatBox: ChatBox) { + this.documentsById = new Map(); + this.chatBox = chatBox; + this.chatBoxDocument = chatBox.Document; + this.processDocument(this.chatBoxDocument); + this.initializeFieldMetadata(); + } + + /** + * Extracts field metadata from DocumentOptions class + */ + private initializeFieldMetadata() { + // Parse DocumentOptions to extract field definitions + const documentOptionsInstance = new DocumentOptions(); + const documentOptionsEntries = Object.entries(documentOptionsInstance); + + for (const [fieldName, fieldInfo] of documentOptionsEntries) { + // Extract field information + const fieldData: Record = { + name: fieldName, + withoutUnderscore: fieldName.startsWith('_') ? fieldName.substring(1) : fieldName, + description: '', + type: 'unknown', + required: false, + defaultValue: undefined, + possibleValues: [], + }; + + // Check if fieldInfo has description property (it's likely a FInfo instance) + if (fieldInfo && typeof fieldInfo === 'object' && 'description' in fieldInfo) { + fieldData.description = fieldInfo.description; + + // Extract field type if available + if ('fieldType' in fieldInfo) { + fieldData.type = fieldInfo.fieldType; + } + + // Extract possible values if available + if ('values' in fieldInfo && Array.isArray(fieldInfo.values)) { + fieldData.possibleValues = fieldInfo.values; + } + } + + this.fieldMetadata[fieldName] = fieldData; + } + } + + /** + * Gets all documents in the same Freeform view as the ChatBox + * Uses the LinkManager to get all linked documents, similar to how ChatBox does it + */ + public initializeFindDocsFreeform() { + // Reset collections + this.documentsById.clear(); + + try { + // Use the LinkManager approach which is proven to work in ChatBox + if (this.chatBoxDocument) { + console.log('Finding documents linked to ChatBox document with ID:', this.chatBoxDocument.id); + + // Get directly linked documents via LinkManager + const linkedDocs = LinkManager.Instance.getAllRelatedLinks(this.chatBoxDocument) + .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.chatBoxDocument!))) + .map(d => DocCast(d?.annotationOn, d)) + .filter(d => d); + + console.log(`Found ${linkedDocs.length} linked documents via LinkManager`); + + // Process the linked documents + linkedDocs.forEach((doc: Doc) => { + if (doc) { + this.processDocument(doc); + } + }); + + // Include the ChatBox document itself + this.processDocument(this.chatBoxDocument); + + // If we have access to the Document's parent, try to find sibling documents + if (this.chatBoxDocument.parent) { + const parent = this.chatBoxDocument.parent; + console.log('Found parent document, checking for siblings'); + + // Check if parent is a Doc type and has a childDocs function + if (parent && typeof parent === 'object' && 'childDocs' in parent && typeof parent.childDocs === 'function') { + try { + const siblingDocs = parent.childDocs(); + if (Array.isArray(siblingDocs)) { + console.log(`Found ${siblingDocs.length} sibling documents via parent.childDocs()`); + siblingDocs.forEach((doc: Doc) => { + if (doc) { + this.processDocument(doc); + } + }); + } + } catch (e) { + console.warn('Error accessing parent.childDocs:', e); + } + } + } + } else if (this.chatBox && this.chatBox.linkedDocs) { + // If we have direct access to the linkedDocs computed property from ChatBox + console.log('Using ChatBox.linkedDocs directly'); + const linkedDocs = this.chatBox.linkedDocs; + if (Array.isArray(linkedDocs)) { + console.log(`Found ${linkedDocs.length} documents via ChatBox.linkedDocs`); + linkedDocs.forEach((doc: Doc) => { + if (doc) { + this.processDocument(doc); + } + }); + } + + // Process the ChatBox document if available + if (this.chatBox.Document) { + this.processDocument(this.chatBox.Document); + } + } else { + console.warn('No ChatBox document reference available for finding linked documents'); + } + + console.log(`DocumentMetadataTool found ${this.documentsById.size} total documents`); + } catch (error) { + console.error('Error finding documents in Freeform view:', error); + } + } + + /** + * Process a document by ensuring it has an ID and adding it to the appropriate collections + * @param doc The document to process + */ + public processDocument(doc: Doc) { + // Ensure document has a persistent ID + const docId = this.ensureDocumentId(doc); + // Only add if we haven't already processed this document + if (!this.documentsById.has(docId)) { + this.documentsById.set(docId, { layoutDoc: doc, dataDoc: doc[DocData] }); + } + } + + /** + * Ensures a document has a persistent ID stored in its metadata + * @param doc The document to ensure has an ID + * @returns The document's ID + */ + private ensureDocumentId(doc: Doc): string { + let docId: string | undefined; + + // First try to get the ID from our custom field + if (doc[this.DOCUMENT_ID_FIELD]) { + docId = String(doc[this.DOCUMENT_ID_FIELD]); + return docId; + } + + // Try different ways to get a document ID + + // 1. Try the direct id property if it exists + if (doc.id && typeof doc.id === 'string') { + docId = doc.id; + } + // 2. Try doc._id if it exists + else if (doc._id && typeof doc._id === 'string') { + docId = doc._id; + } + // 3. Try doc.data?.id if it exists + else if (doc.data && typeof doc.data === 'object' && 'id' in doc.data && typeof doc.data.id === 'string') { + docId = doc.data.id; + } + // 4. If none of the above work, generate a UUID + else { + docId = uuidv4(); + console.log(`Generated new UUID for document with title: ${doc.title || 'Untitled'}`); + } + + // Store the ID in the document's metadata so it persists + try { + doc[this.DOCUMENT_ID_FIELD] = docId; + } catch (e) { + console.warn(`Could not assign ID to document property`, e); + } + + return docId; + } + + /** + * Extracts metadata from a specific document + * @param docId The ID of the document to extract metadata from + * @returns An object containing the document's metadata + */ + public extractDocumentMetadata(doc?: AgentDocument) { + if (!doc) return null; + const layoutDoc = doc.layoutDoc; + const dataDoc = doc.dataDoc; + + const metadata: Record = { + id: layoutDoc.dash_document_id || layoutDoc.id || '', + title: layoutDoc.title || '', + type: layoutDoc.type || '', + fields: { + layout: {}, + data: {}, + }, + fieldLocationMap: {}, + }; + + // Process all known field definitions + Object.keys(this.fieldMetadata).forEach(fieldName => { + const fieldDef = this.fieldMetadata[fieldName]; + const strippedName = fieldName.startsWith('_') ? fieldName.substring(1) : fieldName; + + // Check if field exists on layout document + let layoutValue = undefined; + if (layoutDoc) { + layoutValue = layoutDoc[fieldName]; + if (layoutValue !== undefined) { + // Field exists on layout document + metadata.fields.layout[fieldName] = this.formatFieldValue(layoutValue); + metadata.fieldLocationMap[strippedName] = 'layout'; + } + } + + // Check if field exists on data document + let dataValue = undefined; + if (dataDoc) { + dataValue = dataDoc[fieldName]; + if (dataValue !== undefined) { + // Field exists on data document + metadata.fields.data[fieldName] = this.formatFieldValue(dataValue); + if (!metadata.fieldLocationMap[strippedName]) { + metadata.fieldLocationMap[strippedName] = 'data'; + } + } + } + + // For fields with stripped names (without leading underscore), + // also check if they exist on documents without the underscore + if (fieldName.startsWith('_')) { + const nonUnderscoreFieldName = fieldName.substring(1); + + if (layoutDoc) { + const nonUnderscoreLayoutValue = layoutDoc[nonUnderscoreFieldName]; + if (nonUnderscoreLayoutValue !== undefined) { + metadata.fields.layout[nonUnderscoreFieldName] = this.formatFieldValue(nonUnderscoreLayoutValue); + metadata.fieldLocationMap[nonUnderscoreFieldName] = 'layout'; + } + } + + if (dataDoc) { + const nonUnderscoreDataValue = dataDoc[nonUnderscoreFieldName]; + if (nonUnderscoreDataValue !== undefined) { + metadata.fields.data[nonUnderscoreFieldName] = this.formatFieldValue(nonUnderscoreDataValue); + if (!metadata.fieldLocationMap[nonUnderscoreFieldName]) { + metadata.fieldLocationMap[nonUnderscoreFieldName] = 'data'; + } + } + } + } + }); + + // Add common field aliases for easier discovery + // This helps users understand both width and _width refer to the same property + if (metadata.fields.layout._width !== undefined && metadata.fields.layout.width === undefined) { + metadata.fields.layout.width = metadata.fields.layout._width; + metadata.fieldLocationMap.width = 'layout'; + } + + if (metadata.fields.layout._height !== undefined && metadata.fields.layout.height === undefined) { + metadata.fields.layout.height = metadata.fields.layout._height; + metadata.fieldLocationMap.height = 'layout'; + } + + return metadata; + } + + /** + * Formats a field value for JSON output + * @param value The field value to format + * @returns A JSON-friendly representation of the field value + */ + private formatFieldValue(value: any): any { + if (value === undefined || value === null) { + return null; + } + + // Handle Doc objects + if (value instanceof Doc) { + return { + type: 'Doc', + id: value.id || this.ensureDocumentId(value), + title: value.title || '', + docType: value.type || '', + }; + } + + // Handle RichTextField (try to extract plain text) + if (typeof value === 'string' && value.includes('"type":"doc"') && value.includes('"content":')) { + try { + const rtfObj = JSON.parse(value); + // If this looks like a rich text field structure + if (rtfObj.doc && rtfObj.doc.content) { + // Recursively extract text from the content + let plainText = ''; + const extractText = (node: any) => { + if (node.text) { + plainText += node.text; + } + if (node.content && Array.isArray(node.content)) { + node.content.forEach((child: any) => extractText(child)); + } + }; + + extractText(rtfObj.doc); + + // If we successfully extracted text, show it, but also preserve the original value + if (plainText) { + return { + type: 'RichText', + text: plainText, + length: plainText.length, + // Don't include the full value as it can be very large + }; + } + } + } catch (e) { + // If parsing fails, just treat as a regular string + } + } + + // Handle arrays and complex objects + if (typeof value === 'object') { + // If the object has a toString method, use it + if (value.toString && value.toString !== Object.prototype.toString) { + return value.toString(); + } + + try { + // Try to convert to JSON string + return JSON.stringify(value); + } catch (e) { + return '[Complex Object]'; + } + } + + // Return primitive values as is + return value; + } + + /** + * Converts a string field value to the appropriate type based on field metadata + * @param fieldName The name of the field + * @param fieldValue The string value to convert + * @returns The converted value with the appropriate type + */ + private convertFieldValue(fieldName: string, fieldValue: any): any { + // If fieldValue is already a number or boolean, we don't need to convert it from string + if (typeof fieldValue === 'number' || typeof fieldValue === 'boolean') { + return fieldValue; + } + + // If fieldValue is a string "true" or "false", convert to boolean + if (typeof fieldValue === 'string') { + if (fieldValue.toLowerCase() === 'true') { + return true; + } + if (fieldValue.toLowerCase() === 'false') { + return false; + } + } + + // If fieldValue is not a string (and not a number or boolean), convert it to string + if (typeof fieldValue !== 'string') { + fieldValue = String(fieldValue); + } + + // Special handling for text field - convert to proper RichTextField format + if (fieldName === 'text') { + try { + // Check if it's already a valid JSON RichTextField + JSON.parse(fieldValue); + return fieldValue; + } catch (e) { + // It's a plain text string, so convert it to RichTextField format + const rtf = { + doc: { + type: 'doc', + content: [ + { + type: 'paragraph', + content: [ + { + type: 'text', + text: fieldValue, + }, + ], + }, + ], + }, + }; + return JSON.stringify(rtf); + } + } + + // Get field metadata + const normalizedFieldName = fieldName.startsWith('_') ? fieldName : `_${fieldName}`; + const strippedFieldName = fieldName.startsWith('_') ? fieldName.substring(1) : fieldName; + + // Check both versions of the field name in metadata + const fieldMeta = this.fieldMetadata[normalizedFieldName] || this.fieldMetadata[strippedFieldName]; + + // Special handling for width and height without metadata + if (!fieldMeta && (fieldName === '_width' || fieldName === '_height' || fieldName === 'width' || fieldName === 'height')) { + const num = Number(fieldValue); + return isNaN(num) ? fieldValue : num; + } + + if (!fieldMeta) { + // If no metadata found, just return the string value + return fieldValue; + } + + // Convert based on field type + const fieldType = fieldMeta.type; + + if (fieldType === 'boolean') { + // Convert to boolean + return fieldValue.toLowerCase() === 'true'; + } else if (fieldType === 'number') { + // Convert to number + const num = Number(fieldValue); + return isNaN(num) ? fieldValue : num; + } else if (fieldType === 'date') { + // Try to convert to date (stored as number timestamp) + try { + return new Date(fieldValue).getTime(); + } catch (e) { + return fieldValue; + } + } else if (fieldType.includes('list') || fieldType.includes('array')) { + // Try to parse as JSON array + try { + return JSON.parse(fieldValue); + } catch (e) { + return fieldValue; + } + } else if (fieldType === 'json' || fieldType === 'object') { + // Try to parse as JSON object + try { + return JSON.parse(fieldValue); + } catch (e) { + return fieldValue; + } + } + + // Default to string + return fieldValue; + } + + /** + * Extracts all field metadata from DocumentOptions + * @returns A structured object containing metadata about all available document fields + */ + public getAllFieldMetadata() { + // Start with our already populated fieldMetadata from the DocumentOptions class + const result: Record = { + fieldCount: Object.keys(this.fieldMetadata).length, + fields: {}, + fieldsByType: { + string: [], + number: [], + boolean: [], + //doc: [], + //list: [], + //date: [], + //enumeration: [], + //other: [], + }, + fieldNameMappings: {}, + commonFields: { + appearance: [], + position: [], + size: [], + content: [], + behavior: [], + layout: [], + }, + }; + + // Process each field in the metadata + Object.entries(this.fieldMetadata).forEach(([fieldName, fieldInfo]) => { + const strippedName = fieldName.startsWith('_') ? fieldName.substring(1) : fieldName; + + // Add to fieldNameMappings + if (fieldName.startsWith('_')) { + result.fieldNameMappings[strippedName] = fieldName; + } + + // Create structured field metadata + const fieldData: Record = { + name: fieldName, + displayName: strippedName, + description: fieldInfo.description || '', + type: fieldInfo.fieldType || 'unknown', + possibleValues: fieldInfo.values || [], + }; + + // Add field to fields collection + result.fields[fieldName] = fieldData; + + // Categorize by field type + const type = fieldInfo.fieldType?.toLowerCase() || 'unknown'; + if (type === 'string') { + result.fieldsByType.string.push(fieldName); + } else if (type === 'number') { + result.fieldsByType.number.push(fieldName); + } else if (type === 'boolean') { + result.fieldsByType.boolean.push(fieldName); + } else if (type === 'doc') { + //result.fieldsByType.doc.push(fieldName); + } else if (type === 'list') { + //result.fieldsByType.list.push(fieldName); + } else if (type === 'date') { + //result.fieldsByType.date.push(fieldName); + } else if (type === 'enumeration') { + //result.fieldsByType.enumeration.push(fieldName); + } else { + //result.fieldsByType.other.push(fieldName); + } + + // Categorize by field purpose + if (fieldName.includes('width') || fieldName.includes('height') || fieldName.includes('size')) { + result.commonFields.size.push(fieldName); + } else if (fieldName.includes('color') || fieldName.includes('background') || fieldName.includes('border')) { + result.commonFields.appearance.push(fieldName); + } else if (fieldName.includes('x') || fieldName.includes('y') || fieldName.includes('position') || fieldName.includes('pan')) { + result.commonFields.position.push(fieldName); + } else if (fieldName.includes('text') || fieldName.includes('title') || fieldName.includes('data')) { + result.commonFields.content.push(fieldName); + } else if (fieldName.includes('action') || fieldName.includes('click') || fieldName.includes('event')) { + result.commonFields.behavior.push(fieldName); + } else if (fieldName.includes('layout')) { + result.commonFields.layout.push(fieldName); + } + }); + + // Add special section for auto-sizing related fields + result.autoSizingFields = { + height: { + autoHeightField: '_layout_autoHeight', + heightField: '_height', + displayName: 'height', + usage: 'To manually set height, first set layout_autoHeight to false', + }, + width: { + autoWidthField: '_layout_autoWidth', + widthField: '_width', + displayName: 'width', + usage: 'To manually set width, first set layout_autoWidth to false', + }, + }; + + // Add special section for text field format + result.specialFields = { + text: { + name: 'text', + description: 'Document text content', + format: 'RichTextField', + note: 'When setting text, provide plain text - it will be automatically converted to the correct format', + example: 'For setting: "Hello world" (plain text); For getting: Will be converted to plaintext for display', + }, + }; + + return result; + } + + /** + * Edits a specific field on a document + * @param docId The ID of the document to edit + * @param fieldName The name of the field to edit + * @param fieldValue The new value for the field (string, number, or boolean) + * @returns Object with success status, message, and additional information + */ + public editDocumentField( + docId: string, + fieldName: string, + fieldValue: string | number | boolean + ): { + success: boolean; + message: string; + fieldName?: string; + originalFieldName?: string; + newValue?: any; + warning?: string; + } { + // Normalize field name (handle with/without underscore) + let normalizedFieldName = fieldName.startsWith('_') ? fieldName : fieldName; + const strippedFieldName = fieldName.startsWith('_') ? fieldName.substring(1) : fieldName; + + // Handle common field name aliases (width → _width, height → _height) + // Many document fields use '_' prefix for layout properties + if (fieldName === 'width') { + normalizedFieldName = '_width'; + } else if (fieldName === 'height') { + normalizedFieldName = '_height'; + } + + // Get the documents + const doc = this.documentsById.get(docId); + if (!doc) { + return { success: false, message: `Document with ID ${docId} not found` }; + } + + const { layoutDoc, dataDoc } = this.documentsById.get(docId) ?? { layoutDoc: null, dataDoc: null }; + + if (!layoutDoc && !dataDoc) { + return { success: false, message: `Could not find layout or data document for document with ID ${docId}` }; + } + + try { + // Convert the field value to the appropriate type based on field metadata + const convertedValue = this.convertFieldValue(normalizedFieldName, fieldValue); + + let targetDoc: Doc | undefined; + let targetLocation: string; + + // First, check if field exists on layout document using Doc.Get + if (layoutDoc) { + const fieldExistsOnLayout = Doc.Get(layoutDoc, normalizedFieldName, true) !== undefined; + + // If it exists on layout document, update it there + if (fieldExistsOnLayout) { + targetDoc = layoutDoc; + targetLocation = 'layout'; + } + // If it has an underscore prefix, it's likely a layout property even if not yet set + else if (normalizedFieldName.startsWith('_')) { + targetDoc = layoutDoc; + targetLocation = 'layout'; + } + // Otherwise, look for or create on data document + else if (dataDoc) { + targetDoc = dataDoc; + targetLocation = 'data'; + } + // If no data document available, default to layout + else { + targetDoc = layoutDoc; + targetLocation = 'layout'; + } + } + // If no layout document, use data document + else if (dataDoc) { + targetDoc = dataDoc; + targetLocation = 'data'; + } else { + return { success: false, message: `No valid document found for editing` }; + } + + if (!targetDoc) { + return { success: false, message: `Target document not available` }; + } + + // Set the field value on the target document + targetDoc[normalizedFieldName] = convertedValue; + + return { + success: true, + message: `Successfully updated field '${normalizedFieldName}' on ${targetLocation} document (ID: ${docId})`, + fieldName: normalizedFieldName, + originalFieldName: fieldName, + newValue: convertedValue, + }; + } catch (error) { + console.error('Error editing document field:', error); + return { + success: false, + message: `Error updating field: ${error instanceof Error ? error.message : String(error)}`, + }; + } + } + /** + * Gets metadata for a specific document or all documents + * @param documentId Optional ID of a specific document to get metadata for + * @returns Document metadata or metadata for all documents + */ + public getDocumentMetadata(documentId?: string): any { + if (documentId) { + const doc = this.documentsById.get(documentId); + // Get metadata for a specific document + return this.extractDocumentMetadata(doc); + } else { + // Get metadata for all documents + const documentsMetadata: Record = {}; + for (const doc of this.documentsById.values()) { + documentsMetadata.add(this.extractDocumentMetadata(doc)); + } + + return { + documentCount: this.documentsById.size, + documents: documentsMetadata, + fieldDefinitions: this.fieldMetadata, + }; + } + } + + /** + * Adds links between documents based on their IDs + * @param docIds Array of document IDs to link + * @param relationship Optional relationship type for the links + * @returns Array of created link documents + */ + public addLinks(docIds: string[]): Doc[] { + const createdLinks: Doc[] = []; + // Use string keys for Set instead of arrays which don't work as expected as keys + const alreadyLinked = new Set(); + + // Iterate over the document IDs and add links + docIds.forEach(docId1 => { + const doc1 = this.documentsById.get(docId1); + docIds.forEach(docId2 => { + if (docId1 === docId2) return; // Skip self-linking + + // Create a consistent key regardless of document order + const linkKey = [docId1, docId2].sort().join('_'); + if (alreadyLinked.has(linkKey)) return; + + const doc2 = this.documentsById.get(docId2); + if (doc1?.layoutDoc && doc2?.layoutDoc) { + try { + // Create a link document between doc1 and doc2 + const linkDoc = Docs.Create.LinkDocument(doc1.layoutDoc, doc2.layoutDoc); + + // Set a default color if relationship doesn't specify one + if (!linkDoc.color) { + linkDoc.color = 'lightBlue'; // Default blue color + } + + // Ensure link is visible by setting essential properties + linkDoc.link_visible = true; + linkDoc.link_enabled = true; + linkDoc.link_autoMove = true; + linkDoc.link_showDirected = true; + + // Set the embedContainer to ensure visibility + // This is shown in the image as a key difference between visible/non-visible links + if (this.chatBoxDocument && this.chatBoxDocument.parent && typeof this.chatBoxDocument.parent === 'object' && 'title' in this.chatBoxDocument.parent) { + linkDoc.embedContainer = String(this.chatBoxDocument.parent.title); + } else if (doc1.layoutDoc.parent && typeof doc1.layoutDoc.parent === 'object' && 'title' in doc1.layoutDoc.parent) { + linkDoc.embedContainer = String(doc1.layoutDoc.parent.title); + } else { + // Default to a tab name if we can't find one + linkDoc.embedContainer = 'Untitled Tab 1'; + } + + // Add the link to the document system + LinkManager.Instance.addLink(linkDoc); + + const ancestor = DocumentView.linkCommonAncestor(linkDoc); + ancestor?.ComponentView?.addDocument?.(linkDoc); + // Add to user document list to make it visible in the UI + Doc.AddDocToList(Doc.UserDoc(), 'links', linkDoc); + + // Create a visual link for display + if (this.chatBoxDocument) { + // Make sure the docs are visible in the UI + this.chatBox._props.addDocument?.(doc1.layoutDoc); + this.chatBox._props.addDocument?.(doc2.layoutDoc); + + // Use DocumentManager to ensure documents are visible + DocumentManager.Instance.showDocument(doc1.layoutDoc, { willZoomCentered: false }); + DocumentManager.Instance.showDocument(doc2.layoutDoc, { willZoomCentered: false }); + } + + createdLinks.push(linkDoc); + alreadyLinked.add(linkKey); + } catch (error) { + console.error('Error creating link between documents:', error); + } + } + }); + }); + + // Force update of the UI to show new links + setTimeout(() => { + try { + // Update server cache to ensure links are persisted + UPDATE_SERVER_CACHE && typeof UPDATE_SERVER_CACHE === 'function' && UPDATE_SERVER_CACHE(); + } catch (e) { + console.warn('Could not update server cache after creating links:', e); + } + }, 100); + + return createdLinks; + } + /** + * Helper method to validate a document type and ensure it's a valid supportedDocType + * @param docType The document type to validate + * @returns True if the document type is valid, false otherwise + */ + private isValidDocType(docType: string): boolean { + return Object.values(supportedDocTypes).includes(docType as supportedDocTypes); + } + /** + * Creates a document in the dashboard. + * + * @param {string} doc_type - The type of document to create. + * @param {string} data - The data used to generate the document. + * @param {DocumentOptions} options - Configuration options for the document. + * @returns {Promise} A promise that resolves once the document is created and displayed. + */ + createDocInDash = (docType: string, title: string, data: string) => { + // Validate doc_type + if (!this.isValidDocType(docType)) { + throw new Error(`Invalid document type: ${docType}`); + } + + try { + // Create simple document with just title and data + const simpleDoc: parsedDoc = { + doc_type: docType, + title: title, + data: data, + x: 0, + y: 0, + _width: 300, + _height: 300, + _layout_fitWidth: false, + _layout_autoHeight: true, + }; + + // Use the chatBox's createDocInDash method to create and link the document + if (!this.chatBox) { + throw new Error('ChatBox instance not available for creating document'); + } + const linkAndShowDoc = (doc: Opt) => { + if (doc) { + LinkManager.Instance.addLink(Docs.Create.LinkDocument(this.chatBoxDocument!, doc)); + this.chatBox._props.addDocument?.(doc); + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); + } + }; + const doc = this.chatBox.whichDoc(simpleDoc, false); + if (doc) linkAndShowDoc(doc); + return doc; + } catch (error) { + throw new Error(`Error creating document: ${error}`); + } + }; + + public has(docId: string) { + return this.documentsById.has(docId); + } + + public listDocs() { + // List all available documents in simple format + const docs = Array.from(this.documentsById.entries()).map(([id, doc]) => ({ + id, + title: doc.layoutDoc.title || 'Untitled Document', + type: doc.layoutDoc.type || doc.dataDoc.type || 'Unknown Type', + })); + + if (docs.length === 0) { + return [ + { + type: 'text', + text: 'No documents found in the current view.', + }, + ]; + } + + return [ + { + type: 'text', + text: `Found ${docs.length} document(s) in the current view:\n${JSON.stringify(docs, null, 2)}`, + }, + ]; + } + + public createAgentDoc(doc: Doc) { + // Ideally check if Doc is already in there. + const agentDoc = { layoutDoc: doc, dataDoc: doc[DocData] }; + this.documentsById.set(this.ensureDocumentId(doc), agentDoc); + return agentDoc; + } +} -- cgit v1.2.3-70-g09d2 From 5ce2263849bfb901e276a4c5fc8ca2dbd8b80350 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 24 Apr 2025 13:21:00 -0400 Subject: attempt at linking docs but listing metadata doesn't work --- .../views/nodes/chatbot/agentsystem/Agent.ts | 8 +++--- .../nodes/chatbot/chatboxcomponents/ChatBox.tsx | 29 ++++++++++++++++++++-- src/client/views/nodes/chatbot/tools/SearchTool.ts | 2 +- .../nodes/chatbot/tools/WebsiteInfoScraperTool.ts | 11 ++++---- .../nodes/chatbot/utils/AgentDocumentManager.ts | 10 ++++++-- 5 files changed, 46 insertions(+), 14 deletions(-) (limited to 'src/client/views/nodes/chatbot/utils') diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts index 5af021dbf..c021d141e 100644 --- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts +++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts @@ -66,10 +66,12 @@ export class Agent { history: () => string, csvData: () => { filename: string; id: string; text: string }[], addLinkedUrlDoc: (url: string, id: string) => void, + getLinkedUrlDocId: (url: string) => string[], createImage: (result: Upload.FileInformation & Upload.InspectionResults, options: DocumentOptions) => void, // eslint-disable-next-line @typescript-eslint/no-unused-vars createCSVInDash: (url: string, title: string, id: string, data: string) => void, - chatBox: ChatBox + chatBox: ChatBox, + docManager: AgentDocumentManager ) { // Initialize OpenAI client with API key from environment this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); @@ -77,14 +79,14 @@ export class Agent { this._history = history; this._summaries = summaries; this._csvData = csvData; - this._docManager = new AgentDocumentManager(chatBox); + this._docManager = docManager; // Define available tools for the assistant this.tools = { calculate: new CalculateTool(), rag: new RAGTool(this.vectorstore), dataAnalysis: new DataAnalysisTool(csvData), - websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc), + websiteInfoScraper: new WebsiteInfoScraperTool(getLinkedUrlDocId), searchTool: new SearchTool(addLinkedUrlDoc), noTool: new NoTool(), //imageCreationTool: new ImageCreationTool(createImage), diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx index e09b4313f..43765c1ce 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -44,6 +44,7 @@ import { ProgressBar } from './ProgressBar'; import { OpenWhere } from '../../OpenWhere'; import { Upload } from '../../../../../server/SharedMediaTypes'; import { DocumentMetadataTool } from '../tools/DocumentMetadataTool'; +import { AgentDocumentManager } from '../utils/AgentDocumentManager'; dotenv.config(); @@ -76,6 +77,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { private agent: Agent; private messagesRef: React.RefObject; private _textInputRef: HTMLInputElement | undefined | null; + private docManager: AgentDocumentManager; /** * Static method that returns the layout string for the field. @@ -107,7 +109,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id); } this.vectorstore = new Vectorstore(this.vectorstore_id, this.retrieveDocIds); - this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.createImageInDash, this.createCSVInDash, this); + this.docManager = new AgentDocumentManager(this); + this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.getLinkedUrlDocIds, this.createImageInDash, this.createCSVInDash, this, this.docManager); // Reinitialize the DocumentMetadataTool with a direct reference to this ChatBox instance // This ensures the tool can properly access documents in the same Freeform view @@ -380,7 +383,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @action addLinkedUrlDoc = async (url: string, id: string) => { const doc = Docs.Create.WebDocument(url, { data_useCors: true }); - + this.docManager.addCustomId(doc, id); const linkDoc = Docs.Create.LinkDocument(this.Document, doc); LinkManager.Instance.addLink(linkDoc); @@ -391,6 +394,28 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }; doc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] }); + this.docManager.processDocument(doc); + }; + + /** + * Retrieves the IDs of linked url documents. + * @returns An array of document IDs. + */ + @action + getLinkedUrlDocIds = () => { + const linkedDocs: Doc[] = this.linkedDocs; + const linkedUrlDocIds: string[] = []; + + for (const doc of linkedDocs) { + if (doc.chunk_simpl) { + const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] }; + const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkType === CHUNK_TYPE.URL); + if (foundChunk) { + linkedUrlDocIds.push(foundChunk.chunkId); + } + } + } + return linkedUrlDocIds; }; /** diff --git a/src/client/views/nodes/chatbot/tools/SearchTool.ts b/src/client/views/nodes/chatbot/tools/SearchTool.ts index 6a11407a5..2ee30f0cf 100644 --- a/src/client/views/nodes/chatbot/tools/SearchTool.ts +++ b/src/client/views/nodes/chatbot/tools/SearchTool.ts @@ -28,7 +28,7 @@ export class SearchTool extends BaseTool { private _addLinkedUrlDoc: (url: string, id: string) => void; private _max_results: number; - constructor(addLinkedUrlDoc: (url: string, id: string) => void, max_results: number = 4) { + constructor(addLinkedUrlDoc: (url: string, id: string) => void, max_results: number = 3) { super(searchToolInfo); this._addLinkedUrlDoc = addLinkedUrlDoc; this._max_results = max_results; diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts index 19ccd0b36..bff38ae15 100644 --- a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts +++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts @@ -66,11 +66,11 @@ const websiteInfoScraperToolInfo: ToolInfo = { }; export class WebsiteInfoScraperTool extends BaseTool { - private _addLinkedUrlDoc: (url: string, id: string) => void; + private _getLinkedUrlDocId: (url: string) => string[]; - constructor(addLinkedUrlDoc: (url: string, id: string) => void) { + constructor(getLinkedUrlDocIds: (url: string) => string[]) { super(websiteInfoScraperToolInfo); - this._addLinkedUrlDoc = addLinkedUrlDoc; + this._getLinkedUrlDocId = getLinkedUrlDocIds; } async execute(args: ParametersType): Promise { @@ -79,9 +79,8 @@ export class WebsiteInfoScraperTool extends BaseTool { try { - const { website_plain_text } = await Networking.PostToServer('/scrapeWebsite', { url }); - const id = uuidv4(); - this._addLinkedUrlDoc(url, id); + const { website_plain_text } = (await Networking.PostToServer('/scrapeWebsite', { url })) as { website_plain_text: string }; + const id = this._getLinkedUrlDocId(url); return { type: 'text', text: `\n${website_plain_text}\n`, diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts index c954226e4..4eeac3c6a 100644 --- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts +++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts @@ -90,7 +90,7 @@ export class AgentDocumentManager { */ public initializeFindDocsFreeform() { // Reset collections - this.documentsById.clear(); + //this.documentsById.clear(); try { // Use the LinkManager approach which is proven to work in ChatBox @@ -109,6 +109,7 @@ export class AgentDocumentManager { linkedDocs.forEach((doc: Doc) => { if (doc) { this.processDocument(doc); + console.log('Processed linked document:', doc.id, doc.title, doc.type); } }); @@ -164,6 +165,11 @@ export class AgentDocumentManager { } } + public addCustomId(doc: Doc, id: string) { + doc.id = id; + doc.DOCUMENT_ID_FIELD = id; + } + /** * Process a document by ensuring it has an ID and adding it to the appropriate collections * @param doc The document to process @@ -730,7 +736,7 @@ export class AgentDocumentManager { // Get metadata for all documents const documentsMetadata: Record = {}; for (const doc of this.documentsById.values()) { - documentsMetadata.add(this.extractDocumentMetadata(doc)); + documentsMetadata.add(this.extractDocumentMetadata(doc) ?? { documentId: doc.layoutDoc.id, title: doc.layoutDoc.title, type: doc.layoutDoc.type }); } return { -- cgit v1.2.3-70-g09d2 From 3ef3d40506348d9fd537cc8f4aea975b9770689f Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Sun, 27 Apr 2025 13:14:49 -0400 Subject: new attempt with new citation unification --- .../views/nodes/chatbot/agentsystem/Agent.ts | 5 +- .../nodes/chatbot/chatboxcomponents/ChatBox.tsx | 450 +++++++++++++-------- .../nodes/chatbot/tools/DocumentMetadataTool.ts | 16 +- src/client/views/nodes/chatbot/tools/SearchTool.ts | 18 +- src/client/views/nodes/chatbot/types/types.ts | 1 + .../nodes/chatbot/utils/AgentDocumentManager.ts | 168 +++++--- .../views/nodes/chatbot/vectorstore/Vectorstore.ts | 130 ++++-- 7 files changed, 510 insertions(+), 278 deletions(-) (limited to 'src/client/views/nodes/chatbot/utils') diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts index c021d141e..80fdb6533 100644 --- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts +++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts @@ -65,12 +65,9 @@ export class Agent { summaries: () => string, history: () => string, csvData: () => { filename: string; id: string; text: string }[], - addLinkedUrlDoc: (url: string, id: string) => void, getLinkedUrlDocId: (url: string) => string[], createImage: (result: Upload.FileInformation & Upload.InspectionResults, options: DocumentOptions) => void, - // eslint-disable-next-line @typescript-eslint/no-unused-vars createCSVInDash: (url: string, title: string, id: string, data: string) => void, - chatBox: ChatBox, docManager: AgentDocumentManager ) { // Initialize OpenAI client with API key from environment @@ -87,7 +84,7 @@ export class Agent { rag: new RAGTool(this.vectorstore), dataAnalysis: new DataAnalysisTool(csvData), websiteInfoScraper: new WebsiteInfoScraperTool(getLinkedUrlDocId), - searchTool: new SearchTool(addLinkedUrlDoc), + searchTool: new SearchTool(this._docManager), noTool: new NoTool(), //imageCreationTool: new ImageCreationTool(createImage), documentMetadata: new DocumentMetadataTool(this._docManager), diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx index 43765c1ce..35dbee3e9 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -71,7 +71,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @observable private _citationPopup: { text: string; visible: boolean } = { text: '', visible: false }; // Private properties for managing OpenAI API, vector store, agent, and UI elements - private openai: OpenAI; + private openai!: OpenAI; // Using definite assignment assertion private vectorstore_id: string; private vectorstore: Vectorstore; private agent: Agent; @@ -98,25 +98,34 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { */ constructor(props: FieldViewProps) { super(props); - makeObservable(this); // Enable MobX observables + makeObservable(this); - // Initialize OpenAI, vectorstore, and agent - this.openai = this.initializeOpenAI(); - if (StrCast(this.dataDoc.vectorstore_id) == '') { - this.vectorstore_id = uuidv4(); - this.dataDoc.vectorstore_id = this.vectorstore_id; - } else { - this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id); - } - this.vectorstore = new Vectorstore(this.vectorstore_id, this.retrieveDocIds); + this.messagesRef = React.createRef(); this.docManager = new AgentDocumentManager(this); - this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.getLinkedUrlDocIds, this.createImageInDash, this.createCSVInDash, this, this.docManager); - // Reinitialize the DocumentMetadataTool with a direct reference to this ChatBox instance - // This ensures the tool can properly access documents in the same Freeform view - this.agent.reinitializeDocumentMetadataTool(); + // Initialize OpenAI client + this.initializeOpenAI(); + + // Create a unique vectorstore ID for this ChatBox + this.vectorstore_id = uuidv4(); + + // Initialize vectorstore with the document manager + this.vectorstore = new Vectorstore(this.vectorstore_id, this.docManager); + + // Create an agent with the vectorstore + this.agent = new Agent( + this.vectorstore, + this.retrieveSummaries.bind(this), + this.retrieveFormattedHistory.bind(this), + this.retrieveCSVData.bind(this), + this.retrieveDocIds.bind(this), + this.createImageInDash.bind(this), + this.createCSVInDash.bind(this), + this.docManager + ); - this.messagesRef = React.createRef(); + // Add event listeners + this.addScrollListener(); // Reaction to update dataDoc when chat history changes reaction( @@ -140,22 +149,25 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { */ @action addDocToVectorstore = async (newLinkedDoc: Doc) => { - this._uploadProgress = 0; - this._currentStep = 'Initializing...'; - this._isUploadingDocs = true; - try { - // Add the document to the vectorstore + this._isUploadingDocs = true; + + // Process the document first to ensure it has a valid ID + this.docManager.processDocument(newLinkedDoc); + + // Add the document to the vectorstore which will also register chunks await this.vectorstore.addAIDoc(newLinkedDoc, this.updateProgress); - } catch (error) { - console.error('Error uploading document:', error); - this._currentStep = 'Error during upload'; - } finally { - runInAction(() => { - this._isUploadingDocs = false; - this._uploadProgress = 0; - this._currentStep = ''; - }); + + // No longer needed as documents are tracked by the AgentDocumentManager + // this._linked_docs_to_add.add(newLinkedDoc); + + this._isUploadingDocs = false; + + return true; + } catch (err) { + console.error('Error adding document to vectorstore:', err); + this._isUploadingDocs = false; + return false; } }; @@ -238,7 +250,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true, }; - return new OpenAI(configuration); + this.openai = new OpenAI(configuration); } /** @@ -375,49 +387,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } }; - /** - * Adds a linked document from a URL for future reference and analysis. - * @param url The URL of the document to add. - * @param id The unique identifier for the document. - */ - @action - addLinkedUrlDoc = async (url: string, id: string) => { - const doc = Docs.Create.WebDocument(url, { data_useCors: true }); - this.docManager.addCustomId(doc, id); - const linkDoc = Docs.Create.LinkDocument(this.Document, doc); - LinkManager.Instance.addLink(linkDoc); - - const chunkToAdd = { - chunkId: id, - chunkType: CHUNK_TYPE.URL, - url: url, - }; - - doc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] }); - this.docManager.processDocument(doc); - }; - - /** - * Retrieves the IDs of linked url documents. - * @returns An array of document IDs. - */ - @action - getLinkedUrlDocIds = () => { - const linkedDocs: Doc[] = this.linkedDocs; - const linkedUrlDocIds: string[] = []; - - for (const doc of linkedDocs) { - if (doc.chunk_simpl) { - const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] }; - const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkType === CHUNK_TYPE.URL); - if (foundChunk) { - linkedUrlDocIds.push(foundChunk.chunkId); - } - } - } - return linkedUrlDocIds; - }; - /** * Getter to retrieve the current user's name from the client utils. */ @@ -613,82 +582,224 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { */ @action handleCitationClick = async (citation: Citation) => { - const currentLinkedDocs: Doc[] = this.linkedDocs; - const chunkId = citation.chunk_id; + try { + // Extract values from MobX proxy object if needed + const chunkId = typeof citation.chunk_id === 'object' ? (citation.chunk_id as any).toString() : citation.chunk_id; + + // For debugging + console.log('Citation clicked:', { + chunkId, + citation: JSON.stringify(citation, null, 2), + }); - for (const doc of currentLinkedDocs) { - if (doc.chunk_simpl) { - const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] }; - const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkId === chunkId); + // Try to find the document + const linkedDocs = this.linkedDocs; + let doc: Doc | undefined; - if (foundChunk) { - // Handle media chunks specifically + // First try to find the document using the document manager's chunk ID lookup + const parentDocId = this.docManager.getDocIdByChunkId(chunkId); + if (parentDocId) { + doc = this.docManager.getDocument(parentDocId); + console.log(`Found document by chunk ID lookup: ${parentDocId}`); + } - if (doc.ai_type == 'video' || doc.ai_type == 'audio') { - const directMatchSegmentStart = this.getDirectMatchingSegmentStart(doc, citation.direct_text || '', foundChunk.indexes || []); + // If not found, fall back to searching through linked docs (maintains compatibility) + if (!doc) { + for (const linkedDoc of linkedDocs) { + if (linkedDoc.chunk_simpl) { + try { + const docChunkSimpl = JSON.parse(StrCast(linkedDoc.chunk_simpl)) as { chunks: SimplifiedChunk[] }; + const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkId === chunkId); + if (foundChunk) { + doc = linkedDoc; + console.log(`Found document by iterating through linked docs`); + break; + } + } catch (e) { + console.error(`Error parsing chunk_simpl for doc ${linkedDoc.id}:`, e); + } + } + } + } - if (directMatchSegmentStart) { - // Navigate to the segment's start time in the media player - await this.goToMediaTimestamp(doc, directMatchSegmentStart, doc.ai_type); - } else { - console.error('No direct matching segment found for the citation.'); + if (!doc) { + console.warn(`Document not found for citation with chunk_id: ${chunkId}`); + return; + } + + // Process the chunk data + let docChunkSimpl: { chunks: SimplifiedChunk[] } = { chunks: [] }; + try { + docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl) || '{"chunks":[]}'); + } catch (e) { + console.error(`Error parsing chunk_simpl for the found document:`, e); + return; + } + + const foundChunk = docChunkSimpl.chunks.find((chunk: SimplifiedChunk) => chunk.chunkId === chunkId); + + // Handle different chunk types + if (foundChunk) { + console.log(`Found chunk in document:`, foundChunk); + + // Handle video chunks + if (foundChunk.chunkType === CHUNK_TYPE.VIDEO) { + if (foundChunk.start_time !== undefined) { + await this.goToMediaTimestamp(doc, foundChunk.start_time, 'video'); + } else { + console.warn('Video chunk missing start_time:', foundChunk); + } + } + // Handle audio chunks - note that we're using string comparison since 'audio' isn't in CHUNK_TYPE enum + else if (String(foundChunk.chunkType).toLowerCase() === 'audio') { + if (foundChunk.start_time !== undefined) { + await this.goToMediaTimestamp(doc, foundChunk.start_time, 'audio'); + } else { + console.warn('Audio chunk missing start_time:', foundChunk); + } + } + // Handle table or image chunks + else if (foundChunk.chunkType === CHUNK_TYPE.TABLE || foundChunk.chunkType === CHUNK_TYPE.IMAGE) { + this.handleOtherChunkTypes(foundChunk, citation, doc); + } + // Handle text chunks + else if (foundChunk.chunkType === CHUNK_TYPE.TEXT) { + // Find text from the document's chunks metadata + let chunkText = ''; + + try { + // We already parsed the chunks earlier, so use that + const matchingChunk = docChunkSimpl.chunks.find(c => c.chunkId === foundChunk.chunkId); + if (matchingChunk && 'text' in matchingChunk) { + // If the text property exists on the chunk (even though it's not in the type) + chunkText = String(matchingChunk['text'] || ''); } + } catch (e) { + console.error('Error getting chunk text:', e); + } + + // Default text if none found + if (!chunkText) { + chunkText = 'Text content not available'; + } + + this._citationPopup = { + text: chunkText, + visible: true, + }; + } + // Handle URL chunks + else if (foundChunk.chunkType === CHUNK_TYPE.URL) { + if (foundChunk.url) { + // Instead of opening the URL in a new window, show the document in the viewer + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); + console.log(`Navigated to web document with URL: ${foundChunk.url}`); } else { - // Handle other chunk types as before - this.handleOtherChunkTypes(foundChunk, citation, doc); + console.warn('URL chunk missing URL:', foundChunk); } } + } else if (doc?.original_segments) { + // Handle original segments for media files + let original_segments: any[] = []; + try { + original_segments = JSON.parse(StrCast(doc.original_segments)); + } catch (e) { + console.error(`Error parsing original_segments:`, e); + return; + } + + // Check if there's direct text to find in the segments + if (citation.direct_text) { + // Find the segment that contains the direct text + const start = this.getDirectMatchingSegmentStart(doc, citation.direct_text, []); + if (start !== -1) { + await this.goToMediaTimestamp(doc, start, doc.ai_type === 'audio' ? 'audio' : 'video'); + } + } + } else { + console.warn('Unable to find chunk or segments for citation', citation); } + } catch (error) { + console.error('Error handling citation click:', error); } }; + /** + * Finds a matching segment in a document based on text content. + * @param doc The document to search in + * @param citationText The text to find in the document + * @param indexesOfSegments Optional indexes of segments to search in + * @returns The starting timestamp of the matching segment, or -1 if not found + */ getDirectMatchingSegmentStart = (doc: Doc, citationText: string, indexesOfSegments: string[]): number => { - const originalSegments = JSON.parse(StrCast(doc.original_segments!)).map((segment: any, index: number) => ({ - index: index.toString(), - text: segment.text, - start: segment.start, - end: segment.end, - })); - - if (!Array.isArray(originalSegments) || originalSegments.length === 0 || !Array.isArray(indexesOfSegments)) { - return 0; + if (!doc || !citationText) return -1; + + // Get original segments from the document + const original_segments = doc.original_segments ? JSON.parse(StrCast(doc.original_segments)) : []; + + if (!original_segments || !Array.isArray(original_segments) || original_segments.length === 0) { + return -1; } - // Create itemsToSearch array based on indexesOfSegments - const itemsToSearch = indexesOfSegments.map((indexStr: string) => { - const index = parseInt(indexStr, 10); - const segment = originalSegments[index]; - return { text: segment.text, start: segment.start }; - }); + let segments = original_segments; - console.log('Constructed itemsToSearch:', itemsToSearch); + // If specific indexes are provided, filter segments by those indexes + if (indexesOfSegments && indexesOfSegments.length > 0) { + segments = original_segments.filter((segment: any) => indexesOfSegments.includes(segment.index)); + } + + // If no segments match the indexes, use all segments + if (segments.length === 0) { + segments = original_segments; + } - // Helper function to calculate word overlap score + // First try to find an exact match + const exactMatch = segments.find((segment: any) => segment.text && segment.text.includes(citationText)); + + if (exactMatch) { + return exactMatch.start; + } + + // If no exact match, find segment with best word overlap const calculateWordOverlap = (text1: string, text2: string): number => { - const words1 = new Set(text1.toLowerCase().split(/\W+/)); - const words2 = new Set(text2.toLowerCase().split(/\W+/)); - const intersection = new Set([...words1].filter(word => words2.has(word))); - return intersection.size / Math.max(words1.size, words2.size); // Jaccard similarity + if (!text1 || !text2) return 0; + + const words1 = text1.toLowerCase().split(/\s+/); + const words2 = text2.toLowerCase().split(/\s+/); + const wordSet1 = new Set(words1); + + let overlap = 0; + for (const word of words2) { + if (wordSet1.has(word)) { + overlap++; + } + } + + // Return percentage of overlap relative to the shorter text + return overlap / Math.min(words1.length, words2.length); }; - // Search for the best matching segment - let bestMatchStart = 0; - let bestScore = 0; - - console.log(`Searching for best match for query: "${citationText}"`); - itemsToSearch.forEach(item => { - const score = calculateWordOverlap(citationText, item.text); - console.log(`Comparing query to segment: "${item.text}" | Score: ${score}`); - if (score > bestScore) { - bestScore = score; - bestMatchStart = item.start; + // Find segment with highest word overlap + let bestMatch = null; + let highestOverlap = 0; + + for (const segment of segments) { + if (!segment.text) continue; + + const overlap = calculateWordOverlap(segment.text, citationText); + if (overlap > highestOverlap) { + highestOverlap = overlap; + bestMatch = segment; } - }); + } - console.log('Best match found with score:', bestScore, '| Start time:', bestMatchStart); + // Only return matches with significant overlap (more than 30%) + if (bestMatch && highestOverlap > 0.3) { + return bestMatch.start; + } - // Return the start time of the best match - return bestMatchStart; + // If no good match found, return the start of the first segment as fallback + return segments.length > 0 ? segments[0].start : -1; }; /** @@ -772,7 +883,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { break; case CHUNK_TYPE.CSV: case CHUNK_TYPE.URL: - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }); + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { + console.log(`Showing web document in viewer with URL: ${foundChunk.url}`); + }); break; default: console.error('Unhandled chunk type:', foundChunk.chunkType); @@ -879,6 +992,16 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } }); this.addScrollListener(); + + // Initialize the document manager by finding existing documents + this.docManager.initializeFindDocsFreeform(); + + // If there are stored doc IDs in our list of docs to add, process them + if (this._linked_docs_to_add.size > 0) { + this._linked_docs_to_add.forEach(doc => { + this.docManager.processDocument(doc); + }); + } } /** @@ -892,28 +1015,28 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { /** * Getter that retrieves all linked documents for the current document. */ - @computed - get linkedDocs() { - return LinkManager.Instance.getAllRelatedLinks(this.Document) - .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) - .map(d => DocCast(d?.annotationOn, d)) - .filter(d => d); + @computed get linkedDocs(): Doc[] { + const docIds = this.docManager.listDocs(); + const docs: Doc[] = []; + + // Get documents from the document manager using the getDocument method + docIds.forEach(id => { + const doc = this.docManager.getDocument(id); + if (doc) { + docs.push(doc); + } + }); + + return docs; } /** - * Getter that retrieves document IDs of linked documents that have AI-related content. + * Getter that retrieves document IDs of linked documents that have PDF_chunker–parsed content. */ @computed - get docIds() { - return LinkManager.Instance.getAllRelatedLinks(this.Document) - .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) - .map(d => DocCast(d?.annotationOn, d)) - .filter(d => d) - .filter(d => { - console.log(d.ai_doc_id); - return d.ai_doc_id; - }) - .map(d => StrCast(d.ai_doc_id)); + get docIds(): string[] { + // Use the document manager to get all document IDs + return Array.from(this.docManager.listDocs()); } /** @@ -921,23 +1044,18 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { */ @computed get summaries(): string { - return ( - LinkManager.Instance.getAllRelatedLinks(this.Document) - .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) - .map(d => DocCast(d?.annotationOn, d)) - .filter(d => d) - .filter(d => d.summary) - .map((doc, index) => { - if (PDFCast(doc.data)) { - return `${doc.summary}`; - } else if (CsvCast(doc.data)) { - return `${doc.summary}`; - } else { - return `${index + 1}) ${doc.summary}`; - } - }) - .join('\n') + '\n' - ); + const linkedDocs = Array.from(this.docManager.listDocs()) + .map(id => { + const doc = this.docManager.extractDocumentMetadata(id); + if (doc && doc.fields && (doc.fields.layout.summary || doc.fields.data.summary)) { + return doc.fields.layout.summary || doc.fields.data.summary; + } + return null; + }) + .filter(Boolean) + .join('\n\n'); + + return linkedDocs; } /** @@ -965,7 +1083,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { // Other helper methods for retrieving document data and processing - retrieveSummaries = () => { + retrieveSummaries = (): string => { return this.summaries; }; @@ -973,12 +1091,12 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { return this.linkedCSVs; }; - retrieveFormattedHistory = () => { + retrieveFormattedHistory = (): string => { return this.formattedHistory; }; - retrieveDocIds = () => { - return this.docIds; + retrieveDocIds = (): string[] => { + return Array.from(this.docManager.listDocs()); }; /** diff --git a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts index 4b751acc0..e6c2421e5 100644 --- a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts +++ b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts @@ -417,9 +417,9 @@ export class DocumentMetadataTool extends BaseTool = { }; export class SearchTool extends BaseTool { - private _addLinkedUrlDoc: (url: string, id: string) => void; + private _docManager: AgentDocumentManager; private _max_results: number; - constructor(addLinkedUrlDoc: (url: string, id: string) => void, max_results: number = 3) { + constructor(docManager: AgentDocumentManager, max_results: number = 3) { super(searchToolInfo); - this._addLinkedUrlDoc = addLinkedUrlDoc; + this._docManager = docManager; this._max_results = max_results; } @@ -46,8 +49,13 @@ export class SearchTool extends BaseTool { max_results: this._max_results, })) as { results: { url: string; snippet: string }[] }; const data = results.map((result: { url: string; snippet: string }) => { - const id = uuidv4(); - this._addLinkedUrlDoc(result.url, id); + // Create a web document with the URL + const id = this._docManager.createDocInDash('web', result.url, { + title: `Search Result: ${result.url}`, + text_html: result.snippet, + data_useCors: true, + }); + return { type: 'text' as const, text: `${result.url}${result.snippet}`, diff --git a/src/client/views/nodes/chatbot/types/types.ts b/src/client/views/nodes/chatbot/types/types.ts index 882e74ebb..dcb132ec7 100644 --- a/src/client/views/nodes/chatbot/types/types.ts +++ b/src/client/views/nodes/chatbot/types/types.ts @@ -108,6 +108,7 @@ export interface SimplifiedChunk { start_time?: number; end_time?: number; indexes?: string[]; + text?: string; } export interface AI_Document { diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts index 4eeac3c6a..c3beebcde 100644 --- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts +++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts @@ -165,22 +165,18 @@ export class AgentDocumentManager { } } - public addCustomId(doc: Doc, id: string) { - doc.id = id; - doc.DOCUMENT_ID_FIELD = id; - } - /** * Process a document by ensuring it has an ID and adding it to the appropriate collections * @param doc The document to process */ - public processDocument(doc: Doc) { + public processDocument(doc: Doc): string { // Ensure document has a persistent ID const docId = this.ensureDocumentId(doc); // Only add if we haven't already processed this document if (!this.documentsById.has(docId)) { this.documentsById.set(docId, { layoutDoc: doc, dataDoc: doc[DocData] }); } + return docId; } /** @@ -232,7 +228,9 @@ export class AgentDocumentManager { * @param docId The ID of the document to extract metadata from * @returns An object containing the document's metadata */ - public extractDocumentMetadata(doc?: AgentDocument) { + public extractDocumentMetadata(id: string) { + if (!id) return null; + const doc = this.documentsById.get(id); if (!doc) return null; const layoutDoc = doc.layoutDoc; const dataDoc = doc.dataDoc; @@ -729,16 +727,14 @@ export class AgentDocumentManager { */ public getDocumentMetadata(documentId?: string): any { if (documentId) { - const doc = this.documentsById.get(documentId); - // Get metadata for a specific document - return this.extractDocumentMetadata(doc); + console.log(`Returning document metadata for docID, ${documentId}:`, this.extractDocumentMetadata(documentId)); + return this.extractDocumentMetadata(documentId); } else { // Get metadata for all documents const documentsMetadata: Record = {}; - for (const doc of this.documentsById.values()) { - documentsMetadata.add(this.extractDocumentMetadata(doc) ?? { documentId: doc.layoutDoc.id, title: doc.layoutDoc.title, type: doc.layoutDoc.type }); + for (const documentId of this.documentsById.keys()) { + documentsMetadata.add(this.extractDocumentMetadata(documentId)); } - return { documentCount: this.documentsById.size, documents: documentsMetadata, @@ -845,14 +841,15 @@ export class AgentDocumentManager { return Object.values(supportedDocTypes).includes(docType as supportedDocTypes); } /** - * Creates a document in the dashboard. + * Creates a document in the dashboard and returns its ID. + * This is a public API used by tools like SearchTool. * - * @param {string} doc_type - The type of document to create. - * @param {string} data - The data used to generate the document. - * @param {DocumentOptions} options - Configuration options for the document. - * @returns {Promise} A promise that resolves once the document is created and displayed. + * @param docType The type of document to create + * @param data The data for the document + * @param options Optional configuration options + * @returns The ID of the created document */ - createDocInDash = (docType: string, title: string, data: string) => { + public createDocInDash(docType: string, data: string, options?: any): string { // Validate doc_type if (!this.isValidDocType(docType)) { throw new Error(`Invalid document type: ${docType}`); @@ -862,10 +859,10 @@ export class AgentDocumentManager { // Create simple document with just title and data const simpleDoc: parsedDoc = { doc_type: docType, - title: title, + title: options?.title ?? `Untitled Document ${this.documentsById.size + 1}`, data: data, - x: 0, - y: 0, + x: options?.x ?? 0, + y: options?.y ?? 0, _width: 300, _height: 300, _layout_fitWidth: false, @@ -884,46 +881,111 @@ export class AgentDocumentManager { } }; const doc = this.chatBox.whichDoc(simpleDoc, false); - if (doc) linkAndShowDoc(doc); - return doc; + if (doc) { + linkAndShowDoc(doc); + const id = this.processDocument(doc); + return id; + } else { + throw new Error(`Error creating document. Created document not found.`); + } } catch (error) { throw new Error(`Error creating document: ${error}`); } - }; + } public has(docId: string) { return this.documentsById.has(docId); } - public listDocs() { - // List all available documents in simple format - const docs = Array.from(this.documentsById.entries()).map(([id, doc]) => ({ - id, - title: doc.layoutDoc.title || 'Untitled Document', - type: doc.layoutDoc.type || doc.dataDoc.type || 'Unknown Type', - })); - - if (docs.length === 0) { - return [ - { - type: 'text', - text: 'No documents found in the current view.', - }, - ]; - } - - return [ - { - type: 'text', - text: `Found ${docs.length} document(s) in the current view:\n${JSON.stringify(docs, null, 2)}`, - }, - ]; + /** + * Returns a list of all document IDs in the manager. + * @returns An array of document IDs (strings). + */ + public listDocs(): string[] { + return Array.from(this.documentsById.keys()); + } + + /** + * Adds a document with a custom ID to the manager + * @param doc The document to add + * @param customId The custom ID to assign to the document + * @returns The customId that was assigned + */ + public addCustomId(doc: Doc, customId: string): string { + if (!doc) { + console.error('Cannot add null document with custom ID'); + return ''; + } + + // Set the custom ID in the document's metadata + doc[this.DOCUMENT_ID_FIELD] = customId; + + // Store the document in our map + this.documentsById.set(customId, { + layoutDoc: doc, + dataDoc: doc, + }); + + return customId; } - public createAgentDoc(doc: Doc) { - // Ideally check if Doc is already in there. - const agentDoc = { layoutDoc: doc, dataDoc: doc[DocData] }; - this.documentsById.set(this.ensureDocumentId(doc), agentDoc); - return agentDoc; + /** + * Gets a document by its ID + * @param docId The ID of the document to retrieve + * @returns The document if found, undefined otherwise + */ + public getDocument(docId: string): Doc | undefined { + const docInfo = this.documentsById.get(docId); + return docInfo?.layoutDoc; + } + + /** + * Registers chunk IDs associated with a document in the manager + * @param docId The parent document ID + * @param chunkIds Array of chunk IDs associated with this document + */ + public registerChunkIds(docId: string, chunkIds: string[]): void { + // Get the document if it exists + const docInfo = this.documentsById.get(docId); + if (!docInfo) { + console.warn(`Cannot register chunks for unknown document ID: ${docId}`); + return; + } + + // Store chunk IDs on the document for future reference + const doc = docInfo.layoutDoc; + if (!doc.chunk_ids) { + doc.chunk_ids = JSON.stringify(chunkIds); + } else { + // Merge with existing chunk IDs if they exist + const existingIds = JSON.parse(doc.chunk_ids as string); + const updatedIds = [...new Set([...existingIds, ...chunkIds])]; // Remove duplicates + doc.chunk_ids = JSON.stringify(updatedIds); + } + + // Ensure each chunk ID can be linked back to its parent document + chunkIds.forEach(chunkId => { + // Store a mapping from chunk ID to parent document ID + // This allows us to easily find a document by any of its chunk IDs + if (!this.documentsById.has(chunkId)) { + this.documentsById.set(chunkId, { + layoutDoc: doc, + dataDoc: docInfo.dataDoc, + }); + } + }); + } + + /** + * Gets a document ID by a chunk ID + * @param chunkId The chunk ID to look up + * @returns The parent document ID if found + */ + public getDocIdByChunkId(chunkId: string): string | undefined { + const docInfo = this.documentsById.get(chunkId); + if (docInfo) { + return docInfo.layoutDoc[this.DOCUMENT_ID_FIELD] as string; + } + return undefined; } } diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts index afd34f28d..4bb61d8b2 100644 --- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts +++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts @@ -15,7 +15,7 @@ import { Networking } from '../../../../Network'; import { AI_Document, CHUNK_TYPE, RAGChunk } from '../types/types'; import OpenAI from 'openai'; import { Embedding } from 'openai/resources'; -import { PineconeEnvironmentVarsNotSupportedError } from '@pinecone-database/pinecone/dist/errors'; +import { AgentDocumentManager } from '../utils/AgentDocumentManager'; dotenv.config(); @@ -29,7 +29,7 @@ export class Vectorstore { private openai: OpenAI; // OpenAI client for generating embeddings. private indexName: string = 'pdf-chatbot'; // Default name for the index. private _id: string; // Unique ID for the Vectorstore instance. - private _doc_ids: () => string[]; // List of document IDs handled by this instance. + private docManager: AgentDocumentManager; // Document manager for handling documents documents: AI_Document[] = []; // Store the documents indexed in the vectorstore. @@ -37,9 +37,9 @@ export class Vectorstore { * Initializes the Pinecone and OpenAI clients, sets up the document ID list, * and initializes the Pinecone index. * @param id The unique identifier for the vectorstore instance. - * @param doc_ids A function that returns a list of document IDs. + * @param docManager An instance of AgentDocumentManager to handle document management. */ - constructor(id: string, doc_ids: () => string[]) { + constructor(id: string, docManager: AgentDocumentManager) { const pineconeApiKey = process.env.PINECONE_API_KEY; if (!pineconeApiKey) { throw new Error('PINECONE_API_KEY is not defined.'); @@ -49,7 +49,7 @@ export class Vectorstore { this.pinecone = new Pinecone({ apiKey: pineconeApiKey }); this.openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, dangerouslyAllowBrowser: true }); this._id = id; - this._doc_ids = doc_ids; + this.docManager = docManager; this.initializeIndex(); } @@ -109,15 +109,25 @@ export class Vectorstore { const isAudioOrVideo = local_file_path.endsWith('.mp3') || local_file_path.endsWith('.mp4'); let result: AI_Document & { doc_id: string }; + if (isAudioOrVideo) { console.log('Processing media file...'); const response = await Networking.PostToServer('/processMediaFile', { fileName: path.basename(local_file_path) }); - const segmentedTranscript = response.condensed; + + // Type assertion to handle the response properties + const typedResponse = response as { + condensed: Array<{ text: string; indexes: string[]; start: number; end: number }>; + full: Array; + summary: string; + }; + + const segmentedTranscript = typedResponse.condensed; console.log(segmentedTranscript); - const summary = response.summary; + const summary = typedResponse.summary; doc.summary = summary; + // Generate embeddings for each chunk - const texts = segmentedTranscript.map((chunk: any) => chunk.text); + const texts = segmentedTranscript.map(chunk => chunk.text); try { const embeddingsResponse = await this.openai.embeddings.create({ @@ -126,10 +136,19 @@ export class Vectorstore { encoding_format: 'float', }); - doc.original_segments = JSON.stringify(response.full); + doc.original_segments = JSON.stringify(typedResponse.full); doc.ai_type = local_file_path.endsWith('.mp3') ? 'audio' : 'video'; const doc_id = uuidv4(); + // Register the document with the AgentDocumentManager + this.docManager.addCustomId(doc, doc_id); + + // Generate chunk IDs upfront so we can register them + const chunkIds = segmentedTranscript.map(() => uuidv4()); + + // Register all chunk IDs with the document manager + this.docManager.registerChunkIds(doc_id, chunkIds); + // Add transcript and embeddings to metadata result = { doc_id, @@ -137,13 +156,13 @@ export class Vectorstore { file_name: local_file_path, num_pages: 0, summary: '', - chunks: segmentedTranscript.map((chunk: any, index: number) => ({ - id: uuidv4(), + chunks: segmentedTranscript.map((chunk, index) => ({ + id: chunkIds[index], // Use pre-generated chunk ID values: (embeddingsResponse.data as Embedding[])[index].embedding, // Assign embedding metadata: { indexes: chunk.indexes, original_document: local_file_path, - doc_id: doc_id, + doc_id: doc_id, // Ensure doc_id is consistent file_path: local_file_path, start_time: chunk.start, end_time: chunk.end, @@ -159,20 +178,24 @@ export class Vectorstore { } doc.segmented_transcript = JSON.stringify(segmentedTranscript); - // Simplify chunks for storage + // Simplify chunks for storage - ensure simplified chunks use EXACTLY the same IDs const simplifiedChunks = result.chunks.map(chunk => ({ - chunkId: chunk.id, + chunkId: chunk.id, // Use the exact same ID as the full chunk start_time: chunk.metadata.start_time, end_time: chunk.metadata.end_time, indexes: chunk.metadata.indexes, chunkType: CHUNK_TYPE.VIDEO, text: chunk.metadata.text, + doc_id: chunk.metadata.doc_id, // Include parent doc_id for completeness })); doc.chunk_simpl = JSON.stringify({ chunks: simplifiedChunks }); } else { - // Existing document processing logic remains unchanged + // Process regular document console.log('Processing regular document...'); - const { jobId } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); + const createDocumentResponse = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); + + // Type assertion for the response + const { jobId } = createDocumentResponse as { jobId: string }; while (true) { await new Promise(resolve => setTimeout(resolve, 2000)); @@ -188,6 +211,16 @@ export class Vectorstore { progressCallback(progressResponseJson.progress, progressResponseJson.step); } } + + // Register the document with the AgentDocumentManager + this.docManager.addCustomId(doc, result.doc_id); + + // Collect all chunk IDs + const chunkIds = result.chunks.map(chunk => chunk.id); + + // Register chunks with the document manager + this.docManager.registerChunkIds(result.doc_id, chunkIds); + if (!doc.chunk_simpl) { doc.chunk_simpl = JSON.stringify({ chunks: [] }); } @@ -196,12 +229,13 @@ export class Vectorstore { result.chunks.forEach((chunk: RAGChunk) => { const chunkToAdd = { - chunkId: chunk.id, + chunkId: chunk.id, // Ensure we use the exact same ID startPage: chunk.metadata.start_page, endPage: chunk.metadata.end_page, location: chunk.metadata.location, chunkType: chunk.metadata.type as CHUNK_TYPE, text: chunk.metadata.text, + doc_id: chunk.metadata.doc_id, // Include parent doc_id for consistency }; const new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl)); new_chunk_simpl.chunks = new_chunk_simpl.chunks.concat(chunkToAdd); @@ -298,39 +332,55 @@ export class Vectorstore { let queryEmbedding = queryEmbeddingResponse.data[0].embedding; - // Extract the embedding from the response. + // Get document IDs from the AgentDocumentManager + const docIds = Array.from(this.docManager.listDocs()); + console.log('Using document IDs for retrieval:', docIds); - console.log(this._doc_ids()); // Query the Pinecone index using the embedding and filter by document IDs. + // We'll query based on document IDs that are registered in the document manager const queryResponse: QueryResponse = await this.index.query({ vector: queryEmbedding, filter: { - doc_id: { $in: this._doc_ids() }, + doc_id: { $in: docIds }, }, topK, includeValues: true, includeMetadata: true, }); - console.log(queryResponse); - - // Map the results into RAGChunks and return them. - return queryResponse.matches.map( - match => - ({ - id: match.id, - values: match.values as number[], - metadata: match.metadata as { - text: string; - type: string; - original_document: string; - file_path: string; - doc_id: string; - location: string; - start_page: number; - end_page: number; - }, - }) as RAGChunk - ); + console.log(`Found ${queryResponse.matches.length} matching chunks`); + + // For each retrieved chunk, ensure its document ID is registered in the document manager + // This maintains compatibility with existing code while ensuring consistency + const processedMatches = queryResponse.matches.map(match => { + const chunk = { + id: match.id, + values: match.values as number[], + metadata: match.metadata as { + text: string; + type: string; + original_document: string; + file_path: string; + doc_id: string; + location: string; + start_page: number; + end_page: number; + }, + } as RAGChunk; + + // Ensure the document manager knows about this chunk + // This is important for maintaining backwards compatibility + if (chunk.id && !this.docManager.getDocIdByChunkId(chunk.id)) { + // If the chunk ID isn't registered but we have a doc_id in metadata + if (chunk.metadata.doc_id && this.docManager.has(chunk.metadata.doc_id)) { + // Register the chunk with its parent document + this.docManager.registerChunkIds(chunk.metadata.doc_id, [chunk.id]); + } + } + + return chunk; + }); + + return processedMatches; } catch (error) { console.error(`Error retrieving chunks: ${error}`); return []; -- cgit v1.2.3-70-g09d2 From 393b7f8286422c933102449eba1ba82874a48896 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Sun, 27 Apr 2025 14:57:39 -0400 Subject: improved consistency across doc types and parsing --- src/client/documents/Documents.ts | 1 + .../views/nodes/chatbot/agentsystem/Agent.ts | 15 +- .../nodes/chatbot/chatboxcomponents/ChatBox.tsx | 176 +++++++++------- .../chatbot/chatboxcomponents/ProgressBar.scss | 40 +++- .../nodes/chatbot/utils/AgentDocumentManager.ts | 234 ++++++++++++++++++++- .../views/nodes/chatbot/vectorstore/Vectorstore.ts | 49 ++--- 6 files changed, 390 insertions(+), 125 deletions(-) (limited to 'src/client/views/nodes/chatbot/utils') diff --git a/src/client/documents/Documents.ts b/src/client/documents/Documents.ts index 317bb7feb..f87bd7092 100644 --- a/src/client/documents/Documents.ts +++ b/src/client/documents/Documents.ts @@ -273,6 +273,7 @@ export class DocumentOptions { _layout_reflowHorizontal?: BOOLt = new BoolInfo('permit horizontal resizing with content reflow'); _layout_noSidebar?: BOOLt = new BoolInfo('whether to display the sidebar toggle button'); layout_boxShadow?: string; // box-shadow css string OR "standard" to use dash standard box shadow + _iframe_sandbox?: STRt = new StrInfo('sandbox attributes for iframes in web documents (e.g., allow-scripts, allow-same-origin)'); layout_maxShown?: NUMt = new NumInfo('maximum number of children to display at one time (see multicolumnview)'); _layout_columnWidth?: NUMt = new NumInfo('width of table column', false); _layout_columnCount?: NUMt = new NumInfo('number of columns in a masonry view'); diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts index 80fdb6533..24471bf5b 100644 --- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts +++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts @@ -41,7 +41,6 @@ export class Agent { private interMessages: AgentMessage[] = []; private vectorstore: Vectorstore; private _history: () => string; - private _summaries: () => string; private _csvData: () => { filename: string; id: string; text: string }[]; private actionNumber: number = 0; private thoughtNumber: number = 0; @@ -54,11 +53,13 @@ export class Agent { /** * The constructor initializes the agent with the vector store and toolset, and sets up the OpenAI client. * @param _vectorstore Vector store instance for document storage and retrieval. - * @param summaries A function to retrieve document summaries. + * @param summaries A function to retrieve document summaries (deprecated, now using docManager directly). * @param history A function to retrieve chat history. * @param csvData A function to retrieve CSV data linked to the assistant. - * @param addLinkedUrlDoc A function to add a linked document from a URL. + * @param getLinkedUrlDocId A function to get document IDs from URLs. + * @param createImage A function to create images in the dashboard. * @param createCSVInDash A function to create a CSV document in the dashboard. + * @param docManager The document manager instance. */ constructor( _vectorstore: Vectorstore, @@ -74,7 +75,6 @@ export class Agent { this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); this.vectorstore = _vectorstore; this._history = history; - this._summaries = summaries; this._csvData = csvData; this._docManager = docManager; @@ -124,7 +124,12 @@ export class Agent { // Retrieve chat history and generate system prompt const chatHistory = this._history(); - const systemPrompt = getReactPrompt(Object.values(this.tools), this._summaries, chatHistory); + // Get document summaries directly from document manager + const documentSummaries = this._docManager.getAllDocumentSummaries(); + // Create a function that returns document summaries for the prompt + const getSummaries = () => documentSummaries; + // Generate the system prompt with the summaries + const systemPrompt = getReactPrompt(Object.values(this.tools), getSummaries, chatHistory); // Initialize intermediate messages this.interMessages = [{ role: 'system', content: systemPrompt }]; diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx index b11bf7405..ba30cb42b 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -18,7 +18,7 @@ import { Doc, DocListCast, Opt } from '../../../../../fields/Doc'; import { DocData, DocViews } from '../../../../../fields/DocSymbols'; import { RichTextField } from '../../../../../fields/RichTextField'; import { ScriptField } from '../../../../../fields/ScriptField'; -import { CsvCast, DocCast, NumCast, PDFCast, RTFCast, StrCast } from '../../../../../fields/Types'; +import { CsvCast, DocCast, NumCast, PDFCast, RTFCast, StrCast, VideoCast, AudioCast } from '../../../../../fields/Types'; import { DocUtils } from '../../../../documents/DocUtils'; import { CollectionViewType, DocumentType } from '../../../../documents/DocumentTypes'; import { Docs, DocumentOptions } from '../../../../documents/Documents'; @@ -48,7 +48,14 @@ import { AgentDocumentManager } from '../utils/AgentDocumentManager'; dotenv.config(); -export type parsedDocData = { doc_type: string; data: unknown }; +export type parsedDocData = { + doc_type: string; + data: unknown; + _disable_resource_loading?: boolean; + _sandbox_iframe?: boolean; + _iframe_sandbox?: string; + data_useCors?: boolean; +}; export type parsedDoc = DocumentOptions & parsedDocData; /** * ChatBox is the main class responsible for managing the interaction between the user and the assistant, @@ -150,7 +157,14 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @action addDocToVectorstore = async (newLinkedDoc: Doc) => { try { - this._isUploadingDocs = true; + const isAudioOrVideo = VideoCast(newLinkedDoc.data)?.url?.pathname || AudioCast(newLinkedDoc.data)?.url?.pathname; + + // Set UI state to show the processing overlay + runInAction(() => { + this._isUploadingDocs = true; + this._uploadProgress = 0; + this._currentStep = isAudioOrVideo ? 'Preparing media file...' : 'Processing document...'; + }); // Process the document first to ensure it has a valid ID this.docManager.processDocument(newLinkedDoc); @@ -158,15 +172,36 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { // Add the document to the vectorstore which will also register chunks await this.vectorstore.addAIDoc(newLinkedDoc, this.updateProgress); - // No longer needed as documents are tracked by the AgentDocumentManager - // this._linked_docs_to_add.add(newLinkedDoc); + // Give a slight delay to show the completion message + if (this._uploadProgress === 100) { + await new Promise(resolve => setTimeout(resolve, 1000)); + } - this._isUploadingDocs = false; + // Reset UI state + runInAction(() => { + this._isUploadingDocs = false; + this._uploadProgress = 0; + this._currentStep = ''; + }); return true; } catch (err) { console.error('Error adding document to vectorstore:', err); - this._isUploadingDocs = false; + + // Show error in UI + runInAction(() => { + this._currentStep = `Error: ${err instanceof Error ? err.message : 'Failed to process document'}`; + }); + + await new Promise(resolve => setTimeout(resolve, 2000)); + + // Reset UI state + runInAction(() => { + this._isUploadingDocs = false; + this._uploadProgress = 0; + this._currentStep = ''; + }); + return false; } }; @@ -178,8 +213,15 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { */ @action updateProgress = (progress: number, step: string) => { - this._uploadProgress = progress; + // Ensure progress is within expected bounds + const validProgress = Math.min(Math.max(0, progress), 100); + this._uploadProgress = validProgress; this._currentStep = step; + + // Force UI update + if (process.env.NODE_ENV !== 'production') { + console.log(`Progress: ${validProgress}%, Step: ${step}`); + } }; /** @@ -453,7 +495,19 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { case supportedDocTypes.image: return Docs.Create.ImageDocument(data as string, options); case supportedDocTypes.equation: return Docs.Create.EquationDocument(data as string, options); case supportedDocTypes.notetaking: return Docs.Create.NoteTakingDocument([], options); - case supportedDocTypes.web: return Docs.Create.WebDocument(data as string, { ...options, data_useCors: true }); + case supportedDocTypes.web: + // Create web document with enhanced safety options + const webOptions = { + ...options, + data_useCors: true + }; + + // If iframe_sandbox was passed from AgentDocumentManager, add it to the options + if ('_iframe_sandbox' in options) { + (webOptions as any)._iframe_sandbox = options._iframe_sandbox; + } + + return Docs.Create.WebDocument(data as string, webOptions); case supportedDocTypes.dataviz: return Docs.Create.DataVizDocument('/users/rz/Downloads/addresses.csv', options); case supportedDocTypes.pdf: return Docs.Create.PdfDocument(data as string, options); case supportedDocTypes.video: return Docs.Create.VideoDocument(data as string, options); @@ -607,65 +661,36 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { return; } - // Process the chunk data - let docChunkSimpl: { chunks: SimplifiedChunk[] } = { chunks: [] }; - try { - docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl) || '{"chunks":[]}'); - } catch (e) { - console.error(`Error parsing chunk_simpl for the found document:`, e); + // Get the simplified chunk using the document manager + const foundChunk = this.docManager.getSimplifiedChunkById(doc, chunkId); + if (!foundChunk) { + console.warn(`Chunk not found in document for chunk ID: ${chunkId}`); + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); return; } - const foundChunk = docChunkSimpl.chunks.find((chunk: SimplifiedChunk) => chunk.chunkId === chunkId); + console.log(`Found chunk in document:`, foundChunk); // Handle different chunk types - if (foundChunk) { - console.log(`Found chunk in document:`, foundChunk); - if (foundChunk.chunkType === CHUNK_TYPE.AUDIO || foundChunk.chunkType === CHUNK_TYPE.VIDEO) { - const directMatchSegmentStart = this.getDirectMatchingSegmentStart(doc, citation.direct_text || '', foundChunk.indexes || []); - if (directMatchSegmentStart) { - await this.goToMediaTimestamp(doc, directMatchSegmentStart, foundChunk.chunkType); - } else { - console.error('No direct matching segment found for the citation.'); - } - } else if (foundChunk.chunkType === CHUNK_TYPE.TABLE || foundChunk.chunkType === CHUNK_TYPE.IMAGE) { - this.handleOtherChunkTypes(foundChunk, citation, doc); - } else if (foundChunk.chunkType === CHUNK_TYPE.TEXT) { - // Find text from the document's chunks metadata - let chunkText = ''; - - try { - // We already parsed the chunks earlier, so use that - const matchingChunk = docChunkSimpl.chunks.find(c => c.chunkId === foundChunk.chunkId); - if (matchingChunk && 'text' in matchingChunk) { - // If the text property exists on the chunk (even though it's not in the type) - chunkText = String(matchingChunk['text'] || ''); - } - } catch (e) { - console.error('Error getting chunk text:', e); - } - - // Default text if none found - if (!chunkText) { - chunkText = 'Text content not available'; - } - - this._citationPopup = { - text: chunkText, - visible: true, - }; - } - // Handle URL chunks - else if (foundChunk.chunkType === CHUNK_TYPE.URL) { - if (foundChunk.url) { - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); - console.log(`Navigated to web document with URL: ${foundChunk.url}`); - } else { - console.warn('URL chunk missing URL:', foundChunk); - } + if (foundChunk.chunkType === CHUNK_TYPE.AUDIO || foundChunk.chunkType === CHUNK_TYPE.VIDEO) { + const directMatchSegmentStart = this.getDirectMatchingSegmentStart(doc, citation.direct_text || '', foundChunk.indexes || []); + if (directMatchSegmentStart) { + await this.goToMediaTimestamp(doc, directMatchSegmentStart, foundChunk.chunkType); + } else { + console.error('No direct matching segment found for the citation.'); } + } else if (foundChunk.chunkType === CHUNK_TYPE.TABLE || foundChunk.chunkType === CHUNK_TYPE.IMAGE) { + this.handleOtherChunkTypes(foundChunk, citation, doc); } else { - console.warn('Navigating to doc. Unable to find chunk or segments for citation', citation); + // Show the chunk text in citation popup + let chunkText = foundChunk.text || 'Text content not available'; + + this._citationPopup = { + text: chunkText, + visible: true, + }; + + // Also navigate to the document DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); } } catch (error) { @@ -683,8 +708,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { getDirectMatchingSegmentStart = (doc: Doc, citationText: string, indexesOfSegments: string[]): number => { if (!doc || !citationText) return -1; - // Get original segments from the document - const original_segments = doc.original_segments ? JSON.parse(StrCast(doc.original_segments)) : []; + // Get original segments using document manager + const original_segments = this.docManager.getOriginalSegments(doc); if (!original_segments || !Array.isArray(original_segments) || original_segments.length === 0) { return -1; @@ -993,18 +1018,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { */ @computed get summaries(): string { - const linkedDocs = Array.from(this.docManager.listDocs()) - .map(id => { - const doc = this.docManager.extractDocumentMetadata(id); - if (doc && doc.fields && (doc.fields.layout.summary || doc.fields.data.summary)) { - return doc.fields.layout.summary || doc.fields.data.summary; - } - return null; - }) - .filter(Boolean) - .join('\n\n'); - - return linkedDocs; + // Use the document manager to get all summaries + return this.docManager.getAllDocumentSummaries(); } /** @@ -1033,7 +1048,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { // Other helper methods for retrieving document data and processing retrieveSummaries = (): string => { - return this.summaries; + return this.docManager.getAllDocumentSummaries(); }; retrieveCSVData = () => { @@ -1068,8 +1083,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { {this._isUploadingDocs && (
- -
{this._currentStep}
+
+
+
+
+
{Math.round(this._uploadProgress)}%
+
{this._currentStep}
+
)} diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss index ff5be4a38..3a8334695 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss @@ -58,12 +58,48 @@ flex-direction: column; align-items: center; text-align: center; + width: 80%; + max-width: 400px; + background-color: white; + padding: 20px; + border-radius: 8px; + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15); } -.step-name { +.progress-bar-wrapper { + width: 100%; + height: 12px; + background-color: #e0e0e0; + border-radius: 6px; + overflow: hidden; + margin-bottom: 10px; +} + +.progress-bar { + height: 100%; + background-color: #4a90e2; + border-radius: 6px; + transition: width 0.5s ease; +} + +.progress-details { + display: flex; + flex-direction: column; + align-items: center; + width: 100%; +} + +.progress-percentage { font-size: 18px; + font-weight: bold; color: #333; + margin-bottom: 5px; +} + +.step-name { + font-size: 16px; + color: #666; text-align: center; width: 100%; - margin-top: -10px; // Adjust to move the text closer to the spinner + margin-top: 5px; } diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts index c3beebcde..cff8380db 100644 --- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts +++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts @@ -14,6 +14,8 @@ import { parsedDoc } from '../chatboxcomponents/ChatBox'; import { faThumbTackSlash } from '@fortawesome/free-solid-svg-icons'; import { DocumentManager } from '../../../../util/DocumentManager'; import { DocumentView } from '../../DocumentView'; +import { RAGChunk, CHUNK_TYPE } from '../types/types'; +import { runInAction } from 'mobx'; /** * Interface representing a document in the freeform view @@ -869,20 +871,43 @@ export class AgentDocumentManager { _layout_autoHeight: true, }; - // Use the chatBox's createDocInDash method to create and link the document + // Additional handling for web documents + if (docType === 'web') { + // For web documents, don't sanitize the URL here + // Instead, set properties to handle content safely when loaded + simpleDoc._disable_resource_loading = true; + simpleDoc._sandbox_iframe = true; + simpleDoc.data_useCors = true; + + // Specify a more permissive sandbox to allow content to render properly + // but still maintain security + simpleDoc._iframe_sandbox = 'allow-same-origin allow-scripts allow-popups allow-forms'; + } + + // Use the chatBox's createDocInDash method to create the document if (!this.chatBox) { throw new Error('ChatBox instance not available for creating document'); } - const linkAndShowDoc = (doc: Opt) => { - if (doc) { - LinkManager.Instance.addLink(Docs.Create.LinkDocument(this.chatBoxDocument!, doc)); - this.chatBox._props.addDocument?.(doc); - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); - } - }; + const doc = this.chatBox.whichDoc(simpleDoc, false); if (doc) { - linkAndShowDoc(doc); + // Use MobX runInAction to properly modify observable state + runInAction(() => { + if (this.chatBoxDocument && doc) { + // Create link and add it to the document system + const linkDoc = Docs.Create.LinkDocument(this.chatBoxDocument, doc); + LinkManager.Instance.addLink(linkDoc); + + // Add document to view + this.chatBox._props.addDocument?.(doc); + + // Show document - defer actual display to prevent immediate resource loading + setTimeout(() => { + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); + }, 100); + } + }); + const id = this.processDocument(doc); return id; } else { @@ -893,6 +918,62 @@ export class AgentDocumentManager { } } + /** + * Sanitizes web content to prevent errors with external resources + * @param content The web content to sanitize + * @returns Sanitized content + */ + private sanitizeWebContent(content: string): string { + if (!content) return content; + + try { + // Replace problematic resource references that might cause errors + const sanitized = content + // Remove preload links that might cause errors + .replace(/]*rel=["']preload["'][^>]*>/gi, '') + // Remove map file references + .replace(/\/\/# sourceMappingURL=.*\.map/gi, '') + // Remove external CSS map files references + .replace(/\/\*# sourceMappingURL=.*\.css\.map.*\*\//gi, '') + // Add sandbox to iframes + .replace(/