From a99d8df26d331d87bae4f27cd52ce5ec4d97fa7a Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 3 Apr 2025 11:45:02 -0400 Subject: attempt --- .../views/nodes/chatbot/chatboxcomponents/ChatBox.tsx | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'src/client/views/nodes/chatbot/chatboxcomponents') diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx index 6e9307d37..065c2780c 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -43,6 +43,7 @@ import MessageComponentBox from './MessageComponent'; import { ProgressBar } from './ProgressBar'; import { OpenWhere } from '../../OpenWhere'; import { Upload } from '../../../../../server/SharedMediaTypes'; +import { DocumentMetadataTool } from '../tools/DocumentMetadataTool'; dotenv.config(); @@ -106,7 +107,21 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id); } this.vectorstore = new Vectorstore(this.vectorstore_id, this.retrieveDocIds); - this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.createImageInDash, this.createDocInDash, this.createCSVInDash); + this.agent = new Agent( + this.vectorstore, + this.retrieveSummaries, + this.retrieveFormattedHistory, + this.retrieveCSVData, + this.addLinkedUrlDoc, + this.createImageInDash, + this.createDocInDash, + this.createCSVInDash + ); + + // Reinitialize the DocumentMetadataTool with a direct reference to this ChatBox instance + // This ensures the tool can properly access documents in the same Freeform view + this.agent.reinitializeDocumentMetadataTool(this); + this.messagesRef = React.createRef(); // Reaction to update dataDoc when chat history changes @@ -408,7 +423,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { if (doc) { LinkManager.Instance.addLink(Docs.Create.LinkDocument(this.Document, doc)); this._props.addDocument?.(doc); - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}).then(() => this.addCSVForAnalysis(doc, id)); + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => this.addCSVForAnalysis(doc, id)); } }); -- cgit v1.2.3-70-g09d2 From fa0b8fcd800e5a765a6db2681807986463544405 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Fri, 4 Apr 2025 11:43:58 -0400 Subject: specifying note document and removing unnecessary tools --- .../nodes/chatbot/chatboxcomponents/ChatBox.tsx | 4 +- .../nodes/chatbot/tools/CreateDocumentTool.ts | 497 --------------------- .../nodes/chatbot/tools/CreateTextDocumentTool.ts | 57 --- .../nodes/chatbot/tools/DocumentMetadataTool.ts | 3 +- src/client/views/nodes/chatbot/types/tool_types.ts | 26 ++ 5 files changed, 30 insertions(+), 557 deletions(-) delete mode 100644 src/client/views/nodes/chatbot/tools/CreateDocumentTool.ts delete mode 100644 src/client/views/nodes/chatbot/tools/CreateTextDocumentTool.ts (limited to 'src/client/views/nodes/chatbot/chatboxcomponents') diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx index 065c2780c..3152fe923 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -35,7 +35,7 @@ import { PDFBox } from '../../PDFBox'; import { ScriptingBox } from '../../ScriptingBox'; import { VideoBox } from '../../VideoBox'; import { Agent } from '../agentsystem/Agent'; -import { supportedDocTypes } from '../tools/CreateDocumentTool'; +import { supportedDocTypes } from '../types/tool_types'; import { ASSISTANT_ROLE, AssistantMessage, CHUNK_TYPE, Citation, ProcessingInfo, SimplifiedChunk, TEXT_TYPE } from '../types/types'; import { Vectorstore } from '../vectorstore/Vectorstore'; import './ChatBox.scss'; @@ -461,7 +461,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { const ndoc = (() => { switch (doc.doc_type) { default: - case supportedDocTypes.text: return Docs.Create.TextDocument(data as string, options); + case supportedDocTypes.note: return Docs.Create.TextDocument(data as string, options); case supportedDocTypes.comparison: return this.createComparison(JSON.parse(data as string) as parsedDoc[], options); case supportedDocTypes.flashcard: return this.createFlashcard(JSON.parse(data as string) as parsedDoc[], options); case supportedDocTypes.deck: return this.createDeck(JSON.parse(data as string) as parsedDoc[], options); diff --git a/src/client/views/nodes/chatbot/tools/CreateDocumentTool.ts b/src/client/views/nodes/chatbot/tools/CreateDocumentTool.ts deleted file mode 100644 index b3bf6dfb2..000000000 --- a/src/client/views/nodes/chatbot/tools/CreateDocumentTool.ts +++ /dev/null @@ -1,497 +0,0 @@ -import { BaseTool } from './BaseTool'; -import { Observation } from '../types/types'; -import { Parameter, ParametersType, ToolInfo } from '../types/tool_types'; -import { parsedDoc } from '../chatboxcomponents/ChatBox'; -import { CollectionViewType } from '../../../../documents/DocumentTypes'; - -/** - * List of supported document types that can be created via text LLM. - */ -export enum supportedDocTypes { - flashcard = 'flashcard', - note = 'note', - html = 'html', - equation = 'equation', - functionplot = 'functionplot', - dataviz = 'dataviz', - notetaking = 'notetaking', - audio = 'audio', - video = 'video', - pdf = 'pdf', - rtf = 'rtf', - message = 'message', - collection = 'collection', - image = 'image', - deck = 'deck', - web = 'web', - comparison = 'comparison', - diagram = 'diagram', - script = 'script', -} -/** - * Tthe CreateDocTool class is responsible for creating - * documents of various types (e.g., text, flashcards, collections) and organizing them in a - * structured manner. The tool supports creating dashboards with diverse document types and - * ensures proper placement of documents without overlap. - */ - -// Example document structure for various document types -const example = [ - { - doc_type: supportedDocTypes.equation, - title: 'quadratic', - data: 'x^2 + y^2 = 3', - _width: 300, - _height: 300, - x: 0, - y: 0, - }, - { - doc_type: supportedDocTypes.collection, - title: 'Advanced Biology', - data: [ - { - doc_type: supportedDocTypes.text, - title: 'Cell Structure', - data: 'Cells are the basic building blocks of all living organisms.', - _width: 300, - _height: 300, - x: 500, - y: 0, - }, - ], - backgroundColor: '#00ff00', - _width: 600, - _height: 600, - x: 600, - y: 0, - type_collection: 'tree', - }, - { - doc_type: supportedDocTypes.image, - title: 'experiment', - data: 'https://plus.unsplash.com/premium_photo-1694819488591-a43907d1c5cc?q=80&w=2628&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D', - _width: 300, - _height: 300, - x: 600, - y: 300, - }, - { - doc_type: supportedDocTypes.deck, - title: 'Chemistry', - data: [ - { - doc_type: supportedDocTypes.flashcard, - title: 'Photosynthesis', - data: [ - { - doc_type: supportedDocTypes.text, - title: 'front_Photosynthesis', - data: 'What is photosynthesis?', - _width: 300, - _height: 300, - x: 100, - y: 600, - }, - { - doc_type: supportedDocTypes.text, - title: 'back_photosynthesis', - data: 'The process by which plants make food.', - _width: 300, - _height: 300, - x: 100, - y: 700, - }, - ], - backgroundColor: '#00ff00', - _width: 300, - _height: 300, - x: 300, - y: 1000, - }, - { - doc_type: supportedDocTypes.flashcard, - title: 'Photosynthesis', - data: [ - { - doc_type: supportedDocTypes.text, - title: 'front_Photosynthesis', - data: 'What is photosynthesis?', - _width: 300, - _height: 300, - x: 200, - y: 800, - }, - { - doc_type: supportedDocTypes.text, - title: 'back_photosynthesis', - data: 'The process by which plants make food.', - _width: 300, - _height: 300, - x: 100, - y: -100, - }, - ], - backgroundColor: '#00ff00', - _width: 300, - _height: 300, - x: 10, - y: 70, - }, - ], - backgroundColor: '#00ff00', - _width: 600, - _height: 600, - x: 200, - y: 800, - }, - { - doc_type: supportedDocTypes.web, - title: 'Brown University Wikipedia', - data: 'https://en.wikipedia.org/wiki/Brown_University', - _width: 300, - _height: 300, - x: 1000, - y: 2000, - }, - { - doc_type: supportedDocTypes.comparison, - title: 'WWI vs. WWII', - data: [ - { - doc_type: supportedDocTypes.text, - title: 'WWI', - data: 'From 1914 to 1918, fighting took place across several continents, at sea and, for the first time, in the air.', - _width: 300, - _height: 300, - x: 100, - y: 100, - }, - { - doc_type: supportedDocTypes.text, - title: 'WWII', - data: 'A devastating global conflict spanning from 1939 to 1945, saw the Allied powers fight against the Axis powers.', - _width: 300, - _height: 300, - x: 100, - y: 100, - }, - ], - _width: 300, - _height: 300, - x: 100, - y: 100, - }, - { - doc_type: supportedDocTypes.collection, - title: 'Science Collection', - data: [ - { - doc_type: supportedDocTypes.flashcard, - title: 'Photosynthesis', - data: [ - { - doc_type: supportedDocTypes.text, - title: 'front_Photosynthesis', - data: 'What is photosynthesis?', - _width: 300, - _height: 300, - }, - { - doc_type: supportedDocTypes.text, - title: 'back_photosynthesis', - data: 'The process by which plants make food.', - _width: 300, - _height: 300, - }, - ], - backgroundColor: '#00ff00', - _width: 300, - _height: 300, - }, - { - doc_type: supportedDocTypes.web, - title: 'Brown University Wikipedia', - data: 'https://en.wikipedia.org/wiki/Brown_University', - _width: 300, - _height: 300, - x: 1100, - y: 1100, - }, - { - doc_type: supportedDocTypes.text, - title: 'Water Cycle', - data: 'The continuous movement of water on, above, and below the Earth’s surface.', - _width: 300, - _height: 300, - x: 1500, - y: 500, - }, - { - doc_type: supportedDocTypes.collection, - title: 'Advanced Biology', - data: [ - { - doc_type: 'text', - title: 'Cell Structure', - data: 'Cells are the basic building blocks of all living organisms.', - _width: 300, - _height: 300, - }, - ], - backgroundColor: '#00ff00', - _width: 600, - _height: 600, - x: 1100, - y: 500, - type_collection: 'stacking', - }, - ], - _width: 600, - _height: 600, - x: 500, - y: 500, - type_collection: 'carousel', - }, -]; - -// Stringify the entire structure for transmission if needed -const finalJsonString = JSON.stringify(example); - -const standardOptions = ['title', 'backgroundColor']; -/** - * Description of document options and data field for each type. - */ -const documentTypesInfo: { [key in supportedDocTypes]: { options: string[]; dataDescription: string } } = { - comparison: { - options: [...standardOptions, 'fontColor', 'text_align'], - dataDescription: 'an array of two documents of any kind that can be compared.', - }, - deck: { - options: [...standardOptions, 'fontColor', 'text_align'], - dataDescription: 'an array of flashcard docs', - }, - flashcard: { - options: [...standardOptions, 'fontColor', 'text_align'], - dataDescription: 'an array of two strings. the first string contains a question, and the second string contains an answer', - }, - text: { - options: [...standardOptions, 'fontColor', 'text_align'], - dataDescription: 'The text content of the document.', - }, - web: { - options: [], - dataDescription: 'A URL to a webpage. Example: https://en.wikipedia.org/wiki/Brown_University', - }, - html: { - options: [], - dataDescription: 'The HTML-formatted text content of the document.', - }, - equation: { - options: [...standardOptions, 'fontColor'], - dataDescription: 'The equation content represented as a MathML string.', - }, - functionplot: { - options: [...standardOptions, 'function_definition'], - dataDescription: 'The function definition(s) for plotting. Provide as a string or array of function definitions.', - }, - dataviz: { - options: [...standardOptions, 'chartType'], - dataDescription: 'A string of comma-separated values representing the CSV data.', - }, - notetaking: { - options: standardOptions, - dataDescription: 'An array of related text documents with small amounts of text.', - }, - rtf: { - options: standardOptions, - dataDescription: 'The rich text content in RTF format.', - }, - image: { - options: standardOptions, - dataDescription: `A url string that must end with '.png', '.jpeg', '.gif', or '.jpg'`, - }, - pdf: { - options: standardOptions, - dataDescription: 'the pdf content as a PDF file url.', - }, - audio: { - options: standardOptions, - dataDescription: 'The audio content as a file url.', - }, - video: { - options: standardOptions, - dataDescription: 'The video content as a file url.', - }, - message: { - options: standardOptions, - dataDescription: 'The message content of the document.', - }, - diagram: { - options: standardOptions, - dataDescription: 'diagram content as a text string in Mermaid format.', - }, - script: { - options: standardOptions, - dataDescription: 'The compilable JavaScript code. Use this for creating scripts.', - }, - collection: { - options: [...standardOptions, 'type_collection'], - dataDescription: 'A collection of Docs represented as an array.', - }, -}; - -// Parameters for creating individual documents -const createDocToolParams: { name: string; type: 'string' | 'number' | 'boolean' | 'string[]' | 'number[]'; description: string; required: boolean }[] = [ - { - name: 'data', - type: 'string', // Accepts either string or array, supporting individual and nested data - description: - 'the data that describes the Document contents. For collections this is an' + - `Array of documents in stringified JSON format. Each item in the array should be an individual stringified JSON object. ` + - `Creates any type of document with the provided options and data. Supported document types are: ${Object.keys(documentTypesInfo).join(', ')}. - dataviz is a csv table tool, so for CSVs, use dataviz. Here are the options for each type: - ` + - Object.entries(documentTypesInfo) - .map( - ([doc_type, info]) => - ` - ${info.dataDescription} - ` + - info.options.map(option => ``).join('\n') + - ` - - ` - ) - .join('\n') + - ` An example of the structure of a collection is:` + - finalJsonString, // prettier-ignore, - required: true, - }, - { - name: 'doc_type', - type: 'string', - description: `The type of the document. Options: ${Object.keys(documentTypesInfo).join(',')}.`, - required: true, - }, - { - name: 'title', - type: 'string', - description: 'The title of the document.', - required: true, - }, - { - name: 'x', - type: 'number', - description: 'The x location of the document; 0 <= x.', - required: true, - }, - { - name: 'y', - type: 'number', - description: 'The y location of the document; 0 <= y.', - required: true, - }, - { - name: 'backgroundColor', - type: 'string', - description: 'The background color of the document as a hex string.', - required: false, - }, - { - name: 'fontColor', - type: 'string', - description: 'The font color of the document as a hex string.', - required: false, - }, - { - name: '_width', - type: 'number', - description: 'The width of the document in pixels.', - required: true, - }, - { - name: '_height', - type: 'number', - description: 'The height of the document in pixels.', - required: true, - }, - { - name: 'type_collection', - type: 'string', - description: `the visual style for a collection doc. Options include: ${Object.values(CollectionViewType).join(',')}.`, - required: false, - }, -] as const; - -type CreateDocToolParamsType = typeof createDocToolParams; - -const createDocToolInfo: ToolInfo = { - name: 'createDoc', - description: `Creates one or more documents that best fit the user’s request. - If the user requests a "dashboard," first call the search tool and then generate a variety of document types individually, with absolutely a minimum of 20 documents - with two stacks of flashcards that are small and it should have a couple nested freeform collections of things, each with different content and color schemes. - For example, create multiple individual documents, including ${Object.keys(documentTypesInfo) - .map(t => '"' + t + '"') - .join(',')} - If the "doc_type" parameter is missing, set it to an empty string (""). - Use Decks instead of Flashcards for dashboards. Decks should have at least three flashcards. - Really think about what documents are useful to the user. If they ask for a dashboard about the skeletal system, include flashcards, as they would be helpful. - Arrange the documents in a grid layout, ensuring that the x and y coordinates are calculated so no documents overlap but they should be directly next to each other with 20 padding in between. - Take into account the width and height of each document, spacing them appropriately to prevent collisions. - Use a systematic approach, such as placing each document in a grid cell based on its order, where cell dimensions match the document dimensions plus a fixed margin for spacing. - Do not nest all documents within a single collection unless explicitly requested by the user. - Instead, create a set of independent documents with diverse document types. Each type should appear separately unless specified otherwise. - Use the "data" parameter for document content and include title, color, and document dimensions. - Ensure web documents use URLs from the search tool if relevant. Each document in a dashboard should be unique and well-differentiated in type and content, - without repetition of similar types in any single collection. - When creating a dashboard, ensure that it consists of a broad range of document types. - Include a variety of documents, such as text, web, deck, comparison, image, and equation documents, - each with distinct titles and colors, following the user’s preferences. - Do not overuse collections or nest all document types within a single collection; instead, represent document types individually. Use this example for reference: - ${finalJsonString} . - Which documents are created should be random with different numbers of each document type and different for each dashboard. - Must use search tool before creating a dashboard.`, - parameterRules: createDocToolParams, - citationRules: 'No citation needed.', -}; - -// Tool class for creating documents -export class CreateDocTool extends BaseTool< - { - name: string; - type: 'string' | 'number' | 'boolean' | 'string[]' | 'number[]'; - description: string; - required: boolean; - }[] -> { - private _addLinkedDoc: (doc: parsedDoc) => void; - - constructor(addLinkedDoc: (doc: parsedDoc) => void) { - super(createDocToolInfo); - this._addLinkedDoc = addLinkedDoc; - } - - override inputValidator(inputParam: ParametersType) { - return !!inputParam.data; - } - // Executes the tool logic for creating documents - async execute( - args: ParametersType< - { - name: 'string'; - type: 'string' | 'number' | 'boolean' | 'string[]' | 'number[]'; - description: 'string'; - required: boolean; - }[] - > - ): Promise { - try { - const parsedDocs = args instanceof Array ? args : Object.keys(args).length === 1 && 'data' in args ? JSON.parse(args.data as string) : [args]; - parsedDocs.forEach((pdoc: parsedDoc) => this._addLinkedDoc({ ...pdoc, _layout_fitWidth: false, _layout_autoHeight: true })); - return [{ type: 'text', text: 'Created document.' }]; - } catch (error) { - return [{ type: 'text', text: 'Error creating text document, ' + error }]; - } - } -} diff --git a/src/client/views/nodes/chatbot/tools/CreateTextDocumentTool.ts b/src/client/views/nodes/chatbot/tools/CreateTextDocumentTool.ts deleted file mode 100644 index 16dc938bb..000000000 --- a/src/client/views/nodes/chatbot/tools/CreateTextDocumentTool.ts +++ /dev/null @@ -1,57 +0,0 @@ -import { parsedDoc } from '../chatboxcomponents/ChatBox'; -import { ParametersType, ToolInfo } from '../types/tool_types'; -import { Observation } from '../types/types'; -import { BaseTool } from './BaseTool'; -const createTextDocToolParams = [ - { - name: 'text_content', - type: 'string', - description: 'The text content that the document will display', - required: true, - }, - { - name: 'title', - type: 'string', - description: 'The title of the document', - required: true, - }, - // { - // name: 'background_color', - // type: 'string', - // description: 'The background color of the document as a hex string', - // required: false, - // }, - // { - // name: 'font_color', - // type: 'string', - // description: 'The font color of the document as a hex string', - // required: false, - // }, -] as const; - -type CreateTextDocToolParamsType = typeof createTextDocToolParams; - -const createTextDocToolInfo: ToolInfo = { - name: 'createTextDoc', - description: 'Creates a text document with the provided content and title. Use if the user wants to create a textbox or text document of some sort. Can use after a search or other tool to save information.', - citationRules: 'No citation needed.', - parameterRules: createTextDocToolParams, -}; - -export class CreateTextDocTool extends BaseTool { - private _addLinkedDoc: (doc: parsedDoc) => void; - - constructor(addLinkedDoc: (doc: parsedDoc) => void) { - super(createTextDocToolInfo); - this._addLinkedDoc = addLinkedDoc; - } - - async execute(args: ParametersType): Promise { - try { - this._addLinkedDoc({ doc_type: 'text', data: args.text_content, title: args.title }); - return [{ type: 'text', text: 'Created text document.' }]; - } catch (error) { - return [{ type: 'text', text: 'Error creating text document, ' + error }]; - } - } -} diff --git a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts index c74e502e7..a9fb45b5a 100644 --- a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts +++ b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts @@ -8,9 +8,10 @@ import { CollectionFreeFormDocumentView } from '../../../nodes/CollectionFreeFor import { v4 as uuidv4 } from 'uuid'; import { LinkManager } from '../../../../util/LinkManager'; import { DocCast, StrCast } from '../../../../../fields/Types'; -import { supportedDocTypes } from './CreateDocumentTool'; +import { supportedDocTypes } from '../types/tool_types'; import { parsedDoc } from '../chatboxcomponents/ChatBox'; + // Define the parameters for the DocumentMetadataTool const parameterDefinitions: ReadonlyArray = [ { diff --git a/src/client/views/nodes/chatbot/types/tool_types.ts b/src/client/views/nodes/chatbot/types/tool_types.ts index 6ae48992d..6a0b5e708 100644 --- a/src/client/views/nodes/chatbot/types/tool_types.ts +++ b/src/client/views/nodes/chatbot/types/tool_types.ts @@ -50,3 +50,29 @@ export type ParamType

= P['type'] extends keyof TypeMap ? T export type ParametersType

> = { [K in P[number] as K['name']]: ParamType; }; + + +/** + * List of supported document types that can be created via text LLM. + */ +export enum supportedDocTypes { + flashcard = 'flashcard', + note = 'note', + html = 'html', + equation = 'equation', + functionplot = 'functionplot', + dataviz = 'dataviz', + notetaking = 'notetaking', + audio = 'audio', + video = 'video', + pdf = 'pdf', + rtf = 'rtf', + message = 'message', + collection = 'collection', + image = 'image', + deck = 'deck', + web = 'web', + comparison = 'comparison', + diagram = 'diagram', + script = 'script', +} \ No newline at end of file -- cgit v1.2.3-70-g09d2 From 8bc8bd13293e64a99b68408ec3d24a50a5dfe4bc Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Mon, 21 Apr 2025 14:23:38 -0400 Subject: feat & fix: citations now scroll to correct page and using OpenAI 4o with structured outputs --- .../nodes/chatbot/chatboxcomponents/ChatBox.tsx | 28 ++-- src/server/chunker/pdf_chunker.py | 169 ++++++++------------- 2 files changed, 77 insertions(+), 120 deletions(-) (limited to 'src/client/views/nodes/chatbot/chatboxcomponents') diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx index 3152fe923..33419e559 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -107,21 +107,12 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id); } this.vectorstore = new Vectorstore(this.vectorstore_id, this.retrieveDocIds); - this.agent = new Agent( - this.vectorstore, - this.retrieveSummaries, - this.retrieveFormattedHistory, - this.retrieveCSVData, - this.addLinkedUrlDoc, - this.createImageInDash, - this.createDocInDash, - this.createCSVInDash - ); - + this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.createImageInDash, this.createDocInDash, this.createCSVInDash); + // Reinitialize the DocumentMetadataTool with a direct reference to this ChatBox instance // This ensures the tool can properly access documents in the same Freeform view this.agent.reinitializeDocumentMetadataTool(this); - + this.messagesRef = React.createRef(); // Reaction to update dataDoc when chat history changes @@ -743,6 +734,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); return; } + const x1 = parseFloat(values[0]) * Doc.NativeWidth(doc); const y1 = parseFloat(values[1]) * Doc.NativeHeight(doc) + foundChunk.startPage * Doc.NativeHeight(doc); const x2 = parseFloat(values[2]) * Doc.NativeWidth(doc); @@ -751,9 +743,18 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { const annotationKey = Doc.LayoutFieldKey(doc) + '_annotations'; const existingDoc = DocListCast(doc[DocData][annotationKey]).find(d => d.citation_id === citation.citation_id); + if (existingDoc) { + existingDoc.x = x1; + existingDoc.y = y1; + existingDoc._width = x2 - x1; + existingDoc._height = y2 - y1; + } const highlightDoc = existingDoc ?? this.createImageCitationHighlight(x1, y1, x2, y2, citation, annotationKey, doc); - DocumentManager.Instance.showDocument(highlightDoc, { willZoomCentered: true }, () => {}); + //doc.layout_scroll = y1; + doc._layout_curPage = foundChunk.startPage + 1; + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); + //DocumentManager.Instance.showDocument(highlightDoc, { willZoomCentered: true }, () => {}); } break; case CHUNK_TYPE.TEXT: @@ -795,6 +796,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { backgroundColor: 'rgba(255, 255, 0, 0.5)', }); highlight_doc[DocData].citation_id = citation.citation_id; + highlight_doc.freeform_scale = 1; Doc.AddDocToList(pdfDoc[DocData], annotationKey, highlight_doc); highlight_doc.annotationOn = pdfDoc; Doc.SetContainer(highlight_doc, pdfDoc); diff --git a/src/server/chunker/pdf_chunker.py b/src/server/chunker/pdf_chunker.py index feb437f1f..e9b9ef2b3 100644 --- a/src/server/chunker/pdf_chunker.py +++ b/src/server/chunker/pdf_chunker.py @@ -276,12 +276,13 @@ class PDFChunker: :param output_folder: Folder to store the output files (extracted tables/images). :param image_batch_size: The batch size for processing visual elements. """ - self.client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) # Initialize the Anthropic API client + self.client = OpenAI() # ← replaces Anthropic() self.output_folder = output_folder self.image_batch_size = image_batch_size # Batch size for image processing self.doc_id = doc_id # Add doc_id self.element_extractor = ElementExtractor(output_folder, doc_id) + async def chunk_pdf(self, file_data: bytes, file_name: str, doc_id: str, job_id: str) -> List[Dict[str, Any]]: """ Processes a PDF file, extracting text and visual elements, and returning structured chunks. @@ -518,123 +519,77 @@ class PDFChunker: def batch_summarize_images(self, images: Dict[int, str]) -> Dict[int, str]: """ - Summarize images or tables by generating descriptive text. - - :param images: A dictionary mapping image numbers to base64-encoded image data. - :return: A dictionary mapping image numbers to their generated summaries. - """ - # Prompt for the AI model to summarize images and tables - prompt = f""" - - You are tasked with summarizing a series of {len(images)} images and tables for use in a RAG (Retrieval-Augmented Generation) system. - Your goal is to create concise, informative summaries that capture the essential content of each image or table. - These summaries will be used for embedding, so they should be descriptive and relevant. The image or table will be outlined in red on an image of the full page that it is on. Where necessary, use the context of the full page to heklp with the summary but don't summarize other content on the page. - - - - Identify whether it's an image or a table. - Examine its content carefully. - - Write a detailed summary that captures the main points or visual elements: -

- After summarizing what the table is about, include the column headers, a detailed summary of the data, and any notable data trends.
- Describe the main subjects, actions, or notable features. -
- - Focus on writing summaries that would make it easy to retrieve the content if compared to a user query using vector similarity search. - Keep summaries concise and include important words that may help with retrieval (but do not include numbers and numerical data). - - - - Avoid using special characters like &, <, >, ", ', $, %, etc. Instead, use their word equivalents: - Use "and" instead of &. - Use "dollars" instead of $. - Use "percent" instead of %. - Refrain from using quotation marks " or apostrophes ' unless absolutely necessary. - Ensure your output is in valid XML format. - - - - Enclose all summaries within a root element called <summaries>. - Use <summary> tags to enclose each individual summary. - Include an attribute 'number' in each <summary> tag to indicate the sequence, matching the provided image numbers. - Start each summary by indicating whether it's an image or a table (e.g., "This image shows..." or "The table presents..."). - If an image is completely blank, leave the summary blank (e.g., <summary number="3"></summary>). - - - - Do not replicate the example below—stay grounded to the content of the table or image and describe it completely and accurately. - - <summaries> - <summary number="1"> - The image shows two men shaking hands on stage at a formal event. The man on the left, in a dark suit and glasses, has a professional appearance, possibly an academic or business figure. The man on the right, Tim Cook, CEO of Apple, is recognizable by his silver hair and dark blue blazer. Cook holds a document titled "Tsinghua SEM EMBA," suggesting a link to Tsinghua University’s Executive MBA program. The backdrop displays English and Chinese text about business management and education, with the event dated October 23, 2014. - </summary> - <summary number="2"> - The table compares the company's assets between December 30, 2023, and September 30, 2023. Key changes include an increase in cash and cash equivalents, while marketable securities had a slight rise. Accounts receivable and vendor non-trade receivables decreased. Inventories and other current assets saw minor fluctuations. Non-current assets like marketable securities slightly declined, while property, plant, and equipment remained stable. Total assets showed minimal change, holding steady at around three hundred fifty-three billion dollars. - </summary> - <summary number="3"> - The table outlines the company's shareholders' equity as of December 30, 2023, versus September 30, 2023. Common stock and additional paid-in capital increased, and retained earnings shifted from a deficit to a positive figure. Accumulated other comprehensive loss decreased. Overall, total shareholders' equity rose significantly, while total liabilities and equity remained nearly unchanged at about three hundred fifty-three billion dollars. - </summary> - <summary number="4"> - The table details the company's liabilities as of December 30, 2023, compared to September 30, 2023. Current liabilities decreased due to lower accounts payable and other current liabilities, while deferred revenue slightly increased. Commercial paper significantly decreased, and term debt rose modestly. Non-current liabilities were stable, with minimal changes in term debt and other non-current liabilities. Total liabilities dropped from two hundred ninety billion dollars to two hundred seventy-nine billion dollars. - </summary> - <summary number="5"> - </summary> - </summaries> - - - - - Process each image or table in the order provided. - Maintain consistent formatting throughout your response. - Ensure the output is in full, valid XML format with the root <summaries> element and each summary being within a <summary> element with the summary number specified as well. - - - """ - content = [] - for number, img in images.items(): - content.append({"type": "text", "text": f"\nImage {number}:\n"}) - content.append({"type": "image", "source": {"type": "base64", "media_type": "image/png", "data": img}}) + Summarise a batch of images/tables with GPT‑4o using Structured Outputs. + :param images: {image_number: base64_png} + :return: {image_number: summary_text} + """ + # -------- 1. Build the prompt ----------- + content: list[dict] = [] + for n, b64 in images.items(): + content.append({"type": "text", + "text": f"\nImage {n} (outlined in red on the page):"}) + content.append({"type": "image_url", + "image_url": {"url": f"data:image/png;base64,{b64}"}}) messages = [ - {"role": "user", "content": content} + { + "role": "system", + "content": ( + "You are generating retrieval‑ready summaries for each highlighted " + "image or table. Start by identifying whether the element is an " + "image or a table, then write one informative sentence that a vector " + "search would find useful. Provide detail but limit to a couple of paragraphs per image." + ), + }, + {"role": "user", "content": content}, ] + schema = { + "type": "object", + "properties": { + "summaries": { + "type": "array", + "items": { + "type": "object", + "properties": { + "number": {"type": "integer"}, + "type": {"type": "string", "enum": ["image", "table"]}, + "summary": {"type": "string"} + }, + "required": ["number", "type", "summary"], + "additionalProperties": False + } + } + }, + "required": ["summaries"], + "additionalProperties": False + } + + # ---------- OpenAI call ----------------------------------------------------- try: - response = self.client.messages.create( - model='claude-3-7-sonnet-20250219', - system=prompt, - max_tokens=400 * len(images), # Increased token limit for more detailed summaries + resp = self.client.chat.completions.create( + model="gpt-4o", messages=messages, + max_tokens=400 * len(images), temperature=0, + response_format={ + "type": "json_schema", + "json_schema": { + "name": "image_batch_summaries", # ← REQUIRED + "schema": schema, # ← REQUIRED + "strict": True # ← strongly recommended + }, + }, ) - # Parse the response - text = response.content[0].text - #print(text) - # Attempt to parse and fix the XML if necessary - parser = etree.XMLParser(recover=True) - root = etree.fromstring(text, parser=parser) - # Check if there were errors corrected - # if parser.error_log: - # #print("XML Parsing Errors:") - # for error in parser.error_log: - # #print(error) - # Extract summaries - summaries = {} - for summary in root.findall('summary'): - number = int(summary.get('number')) - content = summary.text.strip() if summary.text else "" - if content: # Only include non-empty summaries - summaries[number] = content - - return summaries + parsed = json.loads(resp.choices[0].message.content) # schema‑safe + return {item["number"]: item["summary"] + for item in parsed["summaries"]} except Exception as e: - # Print errors to stderr so they don't interfere with JSON output - print(json.dumps({"error": str(e)}), file=sys.stderr) - sys.stderr.flush() - + # Log and fall back gracefully + print(json.dumps({"error": str(e)}), file=sys.stderr, flush=True) + return {} class DocumentType(Enum): """ -- cgit v1.2.3-70-g09d2 From 321977e670cbdf10f6c49fc9071e3260a8bd4aae Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 24 Apr 2025 12:06:11 -0400 Subject: Linking docs now works with visible docs --- .../views/nodes/chatbot/agentsystem/Agent.ts | 17 +- .../nodes/chatbot/chatboxcomponents/ChatBox.tsx | 28 +- .../views/nodes/chatbot/tools/CreateLinksTool.ts | 68 ++ .../nodes/chatbot/tools/DocumentMetadataTool.ts | 884 +------------------- .../nodes/chatbot/utils/AgentDocumentManager.ts | 923 +++++++++++++++++++++ 5 files changed, 1032 insertions(+), 888 deletions(-) create mode 100644 src/client/views/nodes/chatbot/tools/CreateLinksTool.ts create mode 100644 src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts (limited to 'src/client/views/nodes/chatbot/chatboxcomponents') diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts index 43faf5bf4..5af021dbf 100644 --- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts +++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts @@ -20,10 +20,12 @@ import { getReactPrompt } from './prompts'; //import { DictionaryTool } from '../tools/DictionaryTool'; import { ChatCompletionMessageParam } from 'openai/resources'; import { Doc } from '../../../../../fields/Doc'; -import { parsedDoc } from '../chatboxcomponents/ChatBox'; +import { ChatBox, parsedDoc } from '../chatboxcomponents/ChatBox'; import { WebsiteInfoScraperTool } from '../tools/WebsiteInfoScraperTool'; import { Upload } from '../../../../../server/SharedMediaTypes'; import { RAGTool } from '../tools/RAGTool'; +import { AgentDocumentManager } from '../utils/AgentDocumentManager'; +import { CreateLinksTool } from '../tools/CreateLinksTool'; //import { CreateTextDocTool } from '../tools/CreateTextDocumentTool'; dotenv.config(); @@ -47,6 +49,7 @@ export class Agent { private processingInfo: ProcessingInfo[] = []; private streamedAnswerParser: StreamedAnswerParser = new StreamedAnswerParser(); private tools: Record>>; + private _docManager: AgentDocumentManager; /** * The constructor initializes the agent with the vector store and toolset, and sets up the OpenAI client. @@ -64,9 +67,9 @@ export class Agent { csvData: () => { filename: string; id: string; text: string }[], addLinkedUrlDoc: (url: string, id: string) => void, createImage: (result: Upload.FileInformation & Upload.InspectionResults, options: DocumentOptions) => void, - addLinkedDoc: (doc: parsedDoc) => Doc | undefined, // eslint-disable-next-line @typescript-eslint/no-unused-vars - createCSVInDash: (url: string, title: string, id: string, data: string) => void + createCSVInDash: (url: string, title: string, id: string, data: string) => void, + chatBox: ChatBox ) { // Initialize OpenAI client with API key from environment this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); @@ -74,6 +77,7 @@ export class Agent { this._history = history; this._summaries = summaries; this._csvData = csvData; + this._docManager = new AgentDocumentManager(chatBox); // Define available tools for the assistant this.tools = { @@ -84,7 +88,8 @@ export class Agent { searchTool: new SearchTool(addLinkedUrlDoc), noTool: new NoTool(), //imageCreationTool: new ImageCreationTool(createImage), - documentMetadata: new DocumentMetadataTool(this), + documentMetadata: new DocumentMetadataTool(this._docManager), + createLinks: new CreateLinksTool(this._docManager), }; } @@ -541,9 +546,9 @@ export class Agent { * * @param chatBox The ChatBox instance to pass to the DocumentMetadataTool */ - public reinitializeDocumentMetadataTool(chatBox: any): void { + public reinitializeDocumentMetadataTool(): void { if (this.tools && this.tools.documentMetadata) { - this.tools.documentMetadata = new DocumentMetadataTool(chatBox); + this.tools.documentMetadata = new DocumentMetadataTool(this._docManager); console.log('Agent: Reinitialized DocumentMetadataTool with ChatBox instance'); } else { console.warn('Agent: Could not reinitialize DocumentMetadataTool - tool not found'); diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx index 33419e559..e09b4313f 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -107,11 +107,11 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id); } this.vectorstore = new Vectorstore(this.vectorstore_id, this.retrieveDocIds); - this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.createImageInDash, this.createDocInDash, this.createCSVInDash); + this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.createImageInDash, this.createCSVInDash, this); // Reinitialize the DocumentMetadataTool with a direct reference to this ChatBox instance // This ensures the tool can properly access documents in the same Freeform view - this.agent.reinitializeDocumentMetadataTool(this); + this.agent.reinitializeDocumentMetadataTool(); this.messagesRef = React.createRef(); @@ -446,7 +446,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { private createCollectionWithChildren = (data: parsedDoc[], insideCol: boolean): Opt[] => data.map(doc => this.whichDoc(doc, insideCol)); @action - whichDoc = (doc: parsedDoc, insideCol: boolean): Opt => { + public whichDoc = (doc: parsedDoc, insideCol: boolean): Opt => { const options = OmitKeys(doc, ['doct_type', 'data']).omit as DocumentOptions; const data = (doc as parsedDocData).data; const ndoc = (() => { @@ -515,28 +515,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { return ndoc; }; - /** - * Creates a document in the dashboard. - * - * @param {string} doc_type - The type of document to create. - * @param {string} data - The data used to generate the document. - * @param {DocumentOptions} options - Configuration options for the document. - * @returns {Promise} A promise that resolves once the document is created and displayed. - */ - @action - createDocInDash = (pdoc: parsedDoc) => { - const linkAndShowDoc = (doc: Opt) => { - if (doc) { - LinkManager.Instance.addLink(Docs.Create.LinkDocument(this.Document, doc)); - this._props.addDocument?.(doc); - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); - } - }; - const doc = this.whichDoc(pdoc, false); - if (doc) linkAndShowDoc(doc); - return doc; - }; - /** * Creates a deck of flashcards. * diff --git a/src/client/views/nodes/chatbot/tools/CreateLinksTool.ts b/src/client/views/nodes/chatbot/tools/CreateLinksTool.ts new file mode 100644 index 000000000..c2850a8ce --- /dev/null +++ b/src/client/views/nodes/chatbot/tools/CreateLinksTool.ts @@ -0,0 +1,68 @@ +import { Observation } from '../types/types'; +import { ParametersType, ToolInfo } from '../types/tool_types'; +import { BaseTool } from './BaseTool'; +import { AgentDocumentManager } from '../utils/AgentDocumentManager'; + +const createLinksToolParams = [ + { + name: 'document_ids', + type: 'string[]', + description: 'List of document IDs to create links between. All documents will be linked to each other.', + required: true, + }, +] as const; + +type CreateLinksToolParamsType = typeof createLinksToolParams; + +const createLinksToolInfo: ToolInfo = { + name: 'createLinks', + description: 'Creates visual links between multiple documents in the dashboard. This allows related documents to be connected visually with lines that users can see.', + citationRules: 'No citation needed.', + parameterRules: createLinksToolParams, +}; + +export class CreateLinksTool extends BaseTool { + private _documentManager: AgentDocumentManager; + + constructor(documentManager: AgentDocumentManager) { + super(createLinksToolInfo); + this._documentManager = documentManager; + } + + async execute(args: ParametersType): Promise { + try { + // Validate that we have at least 2 documents to link + if (args.document_ids.length < 2) { + return [{ type: 'text', text: 'Error: At least 2 document IDs are required to create links.' }]; + } + + // Validate that all documents exist + const missingDocIds = args.document_ids.filter(id => !this._documentManager.has(id)); + if (missingDocIds.length > 0) { + return [ + { + type: 'text', + text: `Error: The following document IDs were not found: ${missingDocIds.join(', ')}`, + }, + ]; + } + + // Create links between all documents with the specified relationship + const createdLinks = this._documentManager.addLinks(args.document_ids); + + return [ + { + type: 'text', + text: `Successfully created ${createdLinks.length} visual links between ${args.document_ids.length}.`, + }, + ]; + } catch (error) { + return [ + { + type: 'text', + text: `Error creating links: ${error instanceof Error ? error.message : String(error)}`, + }, + ]; + } + } +} diff --git a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts index 08351143b..4b751acc0 100644 --- a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts +++ b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts @@ -10,6 +10,7 @@ import { LinkManager } from '../../../../util/LinkManager'; import { DocCast, StrCast } from '../../../../../fields/Types'; import { supportedDocTypes } from '../types/tool_types'; import { parsedDoc } from '../chatboxcomponents/ChatBox'; +import { AgentDocumentManager } from '../utils/AgentDocumentManager'; // Define the parameters for the DocumentMetadataTool const parameterDefinitions: ReadonlyArray = [ @@ -199,746 +200,12 @@ const documentMetadataToolInfo: ToolInfo = { * and allows for editing document fields in the correct location. */ export class DocumentMetadataTool extends BaseTool { - private freeformView: any; - private chatBox: any; - private chatBoxDocument: Doc | null = null; - private documentsById: Map = new Map(); - private layoutDocsById: Map = new Map(); - private dataDocsById: Map = new Map(); - private fieldMetadata: Record = {}; - private readonly DOCUMENT_ID_FIELD = '_dash_document_id'; - - constructor(chatBox: any) { - super(documentMetadataToolInfo); - this.chatBox = chatBox; - - // Store a direct reference to the ChatBox document - if (chatBox && chatBox.Document) { - this.chatBoxDocument = chatBox.Document; - if (this.chatBoxDocument && this.chatBoxDocument.id) { - console.log('DocumentMetadataTool initialized with ChatBox Document:', this.chatBoxDocument.id); - } else { - console.log('DocumentMetadataTool initialized with ChatBox Document (no ID)'); - } - } else if (chatBox && chatBox.props && chatBox.props.Document) { - this.chatBoxDocument = chatBox.props.Document; - if (this.chatBoxDocument && this.chatBoxDocument.id) { - console.log('DocumentMetadataTool initialized with ChatBox props.Document:', this.chatBoxDocument.id); - } else { - console.log('DocumentMetadataTool initialized with ChatBox props.Document (no ID)'); - } - } else { - console.warn('DocumentMetadataTool initialized without valid ChatBox Document reference'); - } - - this.initializeFieldMetadata(); - } - - /** - * Extracts field metadata from DocumentOptions class - */ - private initializeFieldMetadata() { - // Parse DocumentOptions to extract field definitions - const documentOptionsInstance = new DocumentOptions(); - const documentOptionsEntries = Object.entries(documentOptionsInstance); - - for (const [fieldName, fieldInfo] of documentOptionsEntries) { - // Extract field information - const fieldData: Record = { - name: fieldName, - withoutUnderscore: fieldName.startsWith('_') ? fieldName.substring(1) : fieldName, - description: '', - type: 'unknown', - required: false, - defaultValue: undefined, - possibleValues: [], - }; - - // Check if fieldInfo has description property (it's likely a FInfo instance) - if (fieldInfo && typeof fieldInfo === 'object' && 'description' in fieldInfo) { - fieldData.description = fieldInfo.description; - - // Extract field type if available - if ('fieldType' in fieldInfo) { - fieldData.type = fieldInfo.fieldType; - } - - // Extract possible values if available - if ('values' in fieldInfo && Array.isArray(fieldInfo.values)) { - fieldData.possibleValues = fieldInfo.values; - } - } - - this.fieldMetadata[fieldName] = fieldData; - } - } - - /** - * Gets all documents in the same Freeform view as the ChatBox - * Uses the LinkManager to get all linked documents, similar to how ChatBox does it - */ - private findDocumentsInFreeformView() { - // Reset collections - this.documentsById.clear(); - this.layoutDocsById.clear(); - this.dataDocsById.clear(); - - try { - // Use the LinkManager approach which is proven to work in ChatBox - if (this.chatBoxDocument) { - console.log('Finding documents linked to ChatBox document with ID:', this.chatBoxDocument.id); - - // Get directly linked documents via LinkManager - const linkedDocs = LinkManager.Instance.getAllRelatedLinks(this.chatBoxDocument) - .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.chatBoxDocument!))) - .map(d => DocCast(d?.annotationOn, d)) - .filter(d => d); - - console.log(`Found ${linkedDocs.length} linked documents via LinkManager`); - - // Process the linked documents - linkedDocs.forEach((doc: Doc) => { - if (doc) { - this.processDocument(doc); - } - }); - - // Include the ChatBox document itself - this.processDocument(this.chatBoxDocument); - - // If we have access to the Document's parent, try to find sibling documents - if (this.chatBoxDocument.parent) { - const parent = this.chatBoxDocument.parent; - console.log('Found parent document, checking for siblings'); - - // Check if parent is a Doc type and has a childDocs function - if (parent && typeof parent === 'object' && 'childDocs' in parent && typeof parent.childDocs === 'function') { - try { - const siblingDocs = parent.childDocs(); - if (Array.isArray(siblingDocs)) { - console.log(`Found ${siblingDocs.length} sibling documents via parent.childDocs()`); - siblingDocs.forEach((doc: Doc) => { - if (doc) { - this.processDocument(doc); - } - }); - } - } catch (e) { - console.warn('Error accessing parent.childDocs:', e); - } - } - } - } else if (this.chatBox && this.chatBox.linkedDocs) { - // If we have direct access to the linkedDocs computed property from ChatBox - console.log('Using ChatBox.linkedDocs directly'); - const linkedDocs = this.chatBox.linkedDocs; - if (Array.isArray(linkedDocs)) { - console.log(`Found ${linkedDocs.length} documents via ChatBox.linkedDocs`); - linkedDocs.forEach((doc: Doc) => { - if (doc) { - this.processDocument(doc); - } - }); - } - - // Process the ChatBox document if available - if (this.chatBox.Document) { - this.processDocument(this.chatBox.Document); - } - } else { - console.warn('No ChatBox document reference available for finding linked documents'); - } - - console.log(`DocumentMetadataTool found ${this.documentsById.size} total documents`); - - // If we didn't find any documents, try a fallback method - if (this.documentsById.size === 0 && this.chatBox) { - console.log('No documents found, trying fallback method'); - - // Try to access any field that might contain documents - if (this.chatBox.props && this.chatBox.props.documents) { - const documents = this.chatBox.props.documents; - if (Array.isArray(documents)) { - console.log(`Found ${documents.length} documents via ChatBox.props.documents`); - documents.forEach((doc: Doc) => { - if (doc) { - this.processDocument(doc); - } - }); - } - } - } - } catch (error) { - console.error('Error finding documents in Freeform view:', error); - } - } - - /** - * Process a document by ensuring it has an ID and adding it to the appropriate collections - * @param doc The document to process - */ - private processDocument(doc: Doc) { - // Ensure document has a persistent ID - const docId = this.ensureDocumentId(doc); - - // Only add if we haven't already processed this document - if (!this.documentsById.has(docId)) { - this.documentsById.set(docId, doc); - - // Get layout doc (the document itself or its layout) - // TODO: Check if this works. - const layoutDoc = doc; - if (layoutDoc) { - this.layoutDocsById.set(docId, layoutDoc); - } - - // Get data doc - const dataDoc = doc[DocData]; - if (dataDoc) { - this.dataDocsById.set(docId, dataDoc); - } - } - } - - /** - * Ensures a document has a persistent ID stored in its metadata - * @param doc The document to ensure has an ID - * @returns The document's ID - */ - private ensureDocumentId(doc: Doc): string { - let docId: string | undefined; - - // First try to get the ID from our custom field - if (doc[this.DOCUMENT_ID_FIELD]) { - docId = String(doc[this.DOCUMENT_ID_FIELD]); - return docId; - } - - // Try different ways to get a document ID - - // 1. Try the direct id property if it exists - if (doc.id && typeof doc.id === 'string') { - docId = doc.id; - } - // 2. Try doc._id if it exists - else if (doc._id && typeof doc._id === 'string') { - docId = doc._id; - } - // 3. Try doc.data?.id if it exists - else if (doc.data && typeof doc.data === 'object' && 'id' in doc.data && typeof doc.data.id === 'string') { - docId = doc.data.id; - } - // 4. If none of the above work, generate a UUID - else { - docId = uuidv4(); - console.log(`Generated new UUID for document with title: ${doc.title || 'Untitled'}`); - } - - // Store the ID in the document's metadata so it persists - try { - doc[this.DOCUMENT_ID_FIELD] = docId; - } catch (e) { - console.warn(`Could not assign ID to document property`, e); - } - - return docId; - } - - /** - * Extracts metadata from a specific document - * @param docId The ID of the document to extract metadata from - * @returns An object containing the document's metadata - */ - private extractDocumentMetadata(doc?: Doc) { - if (!doc) return null; - const layoutDoc = doc; - const dataDoc = doc[DocData]; - - const metadata: Record = { - id: doc.dash_document_id || doc.id || '', - title: doc.title || '', - type: doc.type || '', - fields: { - layout: {}, - data: {}, - }, - fieldLocationMap: {}, - }; - - // Process all known field definitions - Object.keys(this.fieldMetadata).forEach(fieldName => { - const fieldDef = this.fieldMetadata[fieldName]; - const strippedName = fieldName.startsWith('_') ? fieldName.substring(1) : fieldName; - - // Check if field exists on layout document - let layoutValue = undefined; - if (layoutDoc) { - layoutValue = layoutDoc[fieldName]; - if (layoutValue !== undefined) { - // Field exists on layout document - metadata.fields.layout[fieldName] = this.formatFieldValue(layoutValue); - metadata.fieldLocationMap[strippedName] = 'layout'; - } - } - - // Check if field exists on data document - let dataValue = undefined; - if (dataDoc) { - dataValue = dataDoc[fieldName]; - if (dataValue !== undefined) { - // Field exists on data document - metadata.fields.data[fieldName] = this.formatFieldValue(dataValue); - if (!metadata.fieldLocationMap[strippedName]) { - metadata.fieldLocationMap[strippedName] = 'data'; - } - } - } - - // For fields with stripped names (without leading underscore), - // also check if they exist on documents without the underscore - if (fieldName.startsWith('_')) { - const nonUnderscoreFieldName = fieldName.substring(1); - - if (layoutDoc) { - const nonUnderscoreLayoutValue = layoutDoc[nonUnderscoreFieldName]; - if (nonUnderscoreLayoutValue !== undefined) { - metadata.fields.layout[nonUnderscoreFieldName] = this.formatFieldValue(nonUnderscoreLayoutValue); - metadata.fieldLocationMap[nonUnderscoreFieldName] = 'layout'; - } - } - - if (dataDoc) { - const nonUnderscoreDataValue = dataDoc[nonUnderscoreFieldName]; - if (nonUnderscoreDataValue !== undefined) { - metadata.fields.data[nonUnderscoreFieldName] = this.formatFieldValue(nonUnderscoreDataValue); - if (!metadata.fieldLocationMap[nonUnderscoreFieldName]) { - metadata.fieldLocationMap[nonUnderscoreFieldName] = 'data'; - } - } - } - } - }); - - // Add common field aliases for easier discovery - // This helps users understand both width and _width refer to the same property - if (metadata.fields.layout._width !== undefined && metadata.fields.layout.width === undefined) { - metadata.fields.layout.width = metadata.fields.layout._width; - metadata.fieldLocationMap.width = 'layout'; - } - - if (metadata.fields.layout._height !== undefined && metadata.fields.layout.height === undefined) { - metadata.fields.layout.height = metadata.fields.layout._height; - metadata.fieldLocationMap.height = 'layout'; - } - - return metadata; - } + private _docManager: AgentDocumentManager; - /** - * Edits a specific field on a document - * @param docId The ID of the document to edit - * @param fieldName The name of the field to edit - * @param fieldValue The new value for the field (string, number, or boolean) - * @returns Object with success status, message, and additional information - */ - private editDocumentField( - docId: string, - fieldName: string, - fieldValue: string | number | boolean - ): { - success: boolean; - message: string; - fieldName?: string; - originalFieldName?: string; - newValue?: any; - warning?: string; - } { - // Normalize field name (handle with/without underscore) - let normalizedFieldName = fieldName.startsWith('_') ? fieldName : fieldName; - const strippedFieldName = fieldName.startsWith('_') ? fieldName.substring(1) : fieldName; - - // Handle common field name aliases (width → _width, height → _height) - // Many document fields use '_' prefix for layout properties - if (fieldName === 'width') { - normalizedFieldName = '_width'; - } else if (fieldName === 'height') { - normalizedFieldName = '_height'; - } - - // Get the documents - const doc = this.documentsById.get(docId); - if (!doc) { - return { success: false, message: `Document with ID ${docId} not found` }; - } - - const layoutDoc = this.layoutDocsById.get(docId); - const dataDoc = this.dataDocsById.get(docId); - - if (!layoutDoc && !dataDoc) { - return { success: false, message: `Could not find layout or data document for document with ID ${docId}` }; - } - - try { - // Convert the field value to the appropriate type based on field metadata - const convertedValue = this.convertFieldValue(normalizedFieldName, fieldValue); - - let targetDoc: Doc | undefined; - let targetLocation: string; - - // First, check if field exists on layout document using Doc.Get - if (layoutDoc) { - const fieldExistsOnLayout = Doc.Get(layoutDoc, normalizedFieldName, true) !== undefined; - - // If it exists on layout document, update it there - if (fieldExistsOnLayout) { - targetDoc = layoutDoc; - targetLocation = 'layout'; - } - // If it has an underscore prefix, it's likely a layout property even if not yet set - else if (normalizedFieldName.startsWith('_')) { - targetDoc = layoutDoc; - targetLocation = 'layout'; - } - // Otherwise, look for or create on data document - else if (dataDoc) { - targetDoc = dataDoc; - targetLocation = 'data'; - } - // If no data document available, default to layout - else { - targetDoc = layoutDoc; - targetLocation = 'layout'; - } - } - // If no layout document, use data document - else if (dataDoc) { - targetDoc = dataDoc; - targetLocation = 'data'; - } else { - return { success: false, message: `No valid document found for editing` }; - } - - if (!targetDoc) { - return { success: false, message: `Target document not available` }; - } - - // Set the field value on the target document - targetDoc[normalizedFieldName] = convertedValue; - - return { - success: true, - message: `Successfully updated field '${normalizedFieldName}' on ${targetLocation} document (ID: ${docId})`, - fieldName: normalizedFieldName, - originalFieldName: fieldName, - newValue: convertedValue, - }; - } catch (error) { - console.error('Error editing document field:', error); - return { - success: false, - message: `Error updating field: ${error instanceof Error ? error.message : String(error)}`, - }; - } - } - - /** - * Converts a string field value to the appropriate type based on field metadata - * @param fieldName The name of the field - * @param fieldValue The string value to convert - * @returns The converted value with the appropriate type - */ - private convertFieldValue(fieldName: string, fieldValue: any): any { - // If fieldValue is already a number or boolean, we don't need to convert it from string - if (typeof fieldValue === 'number' || typeof fieldValue === 'boolean') { - return fieldValue; - } - - // If fieldValue is a string "true" or "false", convert to boolean - if (typeof fieldValue === 'string') { - if (fieldValue.toLowerCase() === 'true') { - return true; - } - if (fieldValue.toLowerCase() === 'false') { - return false; - } - } - - // If fieldValue is not a string (and not a number or boolean), convert it to string - if (typeof fieldValue !== 'string') { - fieldValue = String(fieldValue); - } - - // Special handling for text field - convert to proper RichTextField format - if (fieldName === 'text') { - try { - // Check if it's already a valid JSON RichTextField - JSON.parse(fieldValue); - return fieldValue; - } catch (e) { - // It's a plain text string, so convert it to RichTextField format - const rtf = { - doc: { - type: 'doc', - content: [ - { - type: 'paragraph', - content: [ - { - type: 'text', - text: fieldValue, - }, - ], - }, - ], - }, - }; - return JSON.stringify(rtf); - } - } - - // Get field metadata - const normalizedFieldName = fieldName.startsWith('_') ? fieldName : `_${fieldName}`; - const strippedFieldName = fieldName.startsWith('_') ? fieldName.substring(1) : fieldName; - - // Check both versions of the field name in metadata - const fieldMeta = this.fieldMetadata[normalizedFieldName] || this.fieldMetadata[strippedFieldName]; - - // Special handling for width and height without metadata - if (!fieldMeta && (fieldName === '_width' || fieldName === '_height' || fieldName === 'width' || fieldName === 'height')) { - const num = Number(fieldValue); - return isNaN(num) ? fieldValue : num; - } - - if (!fieldMeta) { - // If no metadata found, just return the string value - return fieldValue; - } - - // Convert based on field type - const fieldType = fieldMeta.type; - - if (fieldType === 'boolean') { - // Convert to boolean - return fieldValue.toLowerCase() === 'true'; - } else if (fieldType === 'number') { - // Convert to number - const num = Number(fieldValue); - return isNaN(num) ? fieldValue : num; - } else if (fieldType === 'date') { - // Try to convert to date (stored as number timestamp) - try { - return new Date(fieldValue).getTime(); - } catch (e) { - return fieldValue; - } - } else if (fieldType.includes('list') || fieldType.includes('array')) { - // Try to parse as JSON array - try { - return JSON.parse(fieldValue); - } catch (e) { - return fieldValue; - } - } else if (fieldType === 'json' || fieldType === 'object') { - // Try to parse as JSON object - try { - return JSON.parse(fieldValue); - } catch (e) { - return fieldValue; - } - } - - // Default to string - return fieldValue; - } - - /** - * Formats a field value for JSON output - * @param value The field value to format - * @returns A JSON-friendly representation of the field value - */ - private formatFieldValue(value: any): any { - if (value === undefined || value === null) { - return null; - } - - // Handle Doc objects - if (value instanceof Doc) { - return { - type: 'Doc', - id: value.id || this.ensureDocumentId(value), - title: value.title || '', - docType: value.type || '', - }; - } - - // Handle RichTextField (try to extract plain text) - if (typeof value === 'string' && value.includes('"type":"doc"') && value.includes('"content":')) { - try { - const rtfObj = JSON.parse(value); - // If this looks like a rich text field structure - if (rtfObj.doc && rtfObj.doc.content) { - // Recursively extract text from the content - let plainText = ''; - const extractText = (node: any) => { - if (node.text) { - plainText += node.text; - } - if (node.content && Array.isArray(node.content)) { - node.content.forEach((child: any) => extractText(child)); - } - }; - - extractText(rtfObj.doc); - - // If we successfully extracted text, show it, but also preserve the original value - if (plainText) { - return { - type: 'RichText', - text: plainText, - length: plainText.length, - // Don't include the full value as it can be very large - }; - } - } - } catch (e) { - // If parsing fails, just treat as a regular string - } - } - - // Handle arrays and complex objects - if (typeof value === 'object') { - // If the object has a toString method, use it - if (value.toString && value.toString !== Object.prototype.toString) { - return value.toString(); - } - - try { - // Try to convert to JSON string - return JSON.stringify(value); - } catch (e) { - return '[Complex Object]'; - } - } - - // Return primitive values as is - return value; - } - - /** - * Extracts all field metadata from DocumentOptions - * @returns A structured object containing metadata about all available document fields - */ - private getAllFieldMetadata() { - // Start with our already populated fieldMetadata from the DocumentOptions class - const result: Record = { - fieldCount: Object.keys(this.fieldMetadata).length, - fields: {}, - fieldsByType: { - string: [], - number: [], - boolean: [], - //doc: [], - //list: [], - //date: [], - //enumeration: [], - //other: [], - }, - fieldNameMappings: {}, - commonFields: { - appearance: [], - position: [], - size: [], - content: [], - behavior: [], - layout: [], - }, - }; - - // Process each field in the metadata - Object.entries(this.fieldMetadata).forEach(([fieldName, fieldInfo]) => { - const strippedName = fieldName.startsWith('_') ? fieldName.substring(1) : fieldName; - - // Add to fieldNameMappings - if (fieldName.startsWith('_')) { - result.fieldNameMappings[strippedName] = fieldName; - } - - // Create structured field metadata - const fieldData: Record = { - name: fieldName, - displayName: strippedName, - description: fieldInfo.description || '', - type: fieldInfo.fieldType || 'unknown', - possibleValues: fieldInfo.values || [], - }; - - // Add field to fields collection - result.fields[fieldName] = fieldData; - - // Categorize by field type - const type = fieldInfo.fieldType?.toLowerCase() || 'unknown'; - if (type === 'string') { - result.fieldsByType.string.push(fieldName); - } else if (type === 'number') { - result.fieldsByType.number.push(fieldName); - } else if (type === 'boolean') { - result.fieldsByType.boolean.push(fieldName); - } else if (type === 'doc') { - //result.fieldsByType.doc.push(fieldName); - } else if (type === 'list') { - //result.fieldsByType.list.push(fieldName); - } else if (type === 'date') { - //result.fieldsByType.date.push(fieldName); - } else if (type === 'enumeration') { - //result.fieldsByType.enumeration.push(fieldName); - } else { - //result.fieldsByType.other.push(fieldName); - } - - // Categorize by field purpose - if (fieldName.includes('width') || fieldName.includes('height') || fieldName.includes('size')) { - result.commonFields.size.push(fieldName); - } else if (fieldName.includes('color') || fieldName.includes('background') || fieldName.includes('border')) { - result.commonFields.appearance.push(fieldName); - } else if (fieldName.includes('x') || fieldName.includes('y') || fieldName.includes('position') || fieldName.includes('pan')) { - result.commonFields.position.push(fieldName); - } else if (fieldName.includes('text') || fieldName.includes('title') || fieldName.includes('data')) { - result.commonFields.content.push(fieldName); - } else if (fieldName.includes('action') || fieldName.includes('click') || fieldName.includes('event')) { - result.commonFields.behavior.push(fieldName); - } else if (fieldName.includes('layout')) { - result.commonFields.layout.push(fieldName); - } - }); - - // Add special section for auto-sizing related fields - result.autoSizingFields = { - height: { - autoHeightField: '_layout_autoHeight', - heightField: '_height', - displayName: 'height', - usage: 'To manually set height, first set layout_autoHeight to false', - }, - width: { - autoWidthField: '_layout_autoWidth', - widthField: '_width', - displayName: 'width', - usage: 'To manually set width, first set layout_autoWidth to false', - }, - }; - - // Add special section for text field format - result.specialFields = { - text: { - name: 'text', - description: 'Document text content', - format: 'RichTextField', - note: 'When setting text, provide plain text - it will be automatically converted to the correct format', - example: 'For setting: "Hello world" (plain text); For getting: Will be converted to plaintext for display', - }, - }; - - return result; + constructor(docManager: AgentDocumentManager) { + super(documentMetadataToolInfo); + this._docManager = docManager; + this._docManager.initializeFindDocsFreeform(); } /** @@ -950,7 +217,7 @@ export class DocumentMetadataTool extends BaseTool ({ - id, - title: doc.title || 'Untitled Document', - type: doc.type || 'Unknown Type', - })); - - if (docs.length === 0) { - return [ - { - type: 'text', - text: 'No documents found in the current view.', - }, - ]; - } - - return [ - { - type: 'text', - text: `Found ${docs.length} document(s) in the current view:\n${JSON.stringify(docs, null, 2)}`, - }, - ]; + this._docManager.listDocs(); } case 'getFieldOptions': { // Get all available field options with metadata - const fieldOptions = this.getAllFieldMetadata(); + const fieldOptions = this._docManager.getAllFieldMetadata(); return [ { @@ -1171,61 +417,27 @@ export class DocumentMetadataTool extends BaseTool = {}; - for (const doc of this.documentsById.values()) { - documentsMetadata.add(this.extractDocumentMetadata(doc)); - } - - return { - documentCount: this.documentsById.size, - documents: documentsMetadata, - fieldDefinitions: this.fieldMetadata, - }; - } - } - - /** - * Helper method to validate a document type and ensure it's a valid supportedDocType - * @param docType The document type to validate - * @returns True if the document type is valid, false otherwise - */ - private isValidDocType(docType: string): boolean { - return Object.values(supportedDocTypes).includes(docType as supportedDocTypes); - } } diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts new file mode 100644 index 000000000..c954226e4 --- /dev/null +++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts @@ -0,0 +1,923 @@ +import { ChatBox } from '../chatboxcomponents/ChatBox'; +import { Doc, FieldType, Opt } from '../../../../../fields/Doc'; +import { DocData } from '../../../../../fields/DocSymbols'; +import { Observation } from '../types/types'; +import { ParametersType, ToolInfo, Parameter } from '../types/tool_types'; +import { BaseTool } from '../tools/BaseTool'; +import { Docs, DocumentOptions } from '../../../../documents/Documents'; +import { CollectionFreeFormDocumentView } from '../../CollectionFreeFormDocumentView'; +import { v4 as uuidv4 } from 'uuid'; +import { LinkManager, UPDATE_SERVER_CACHE } from '../../../../util/LinkManager'; +import { DocCast, StrCast } from '../../../../../fields/Types'; +import { supportedDocTypes } from '../types/tool_types'; +import { parsedDoc } from '../chatboxcomponents/ChatBox'; +import { faThumbTackSlash } from '@fortawesome/free-solid-svg-icons'; +import { DocumentManager } from '../../../../util/DocumentManager'; +import { DocumentView } from '../../DocumentView'; + +/** + * Interface representing a document in the freeform view + */ +interface AgentDocument { + layoutDoc: Doc; + dataDoc: Doc; +} + +/** + * Class to manage documents in a freeform view + */ +export class AgentDocumentManager { + private documentsById: Map; + private chatBox: ChatBox; + private chatBoxDocument: Doc | null = null; + private fieldMetadata: Record = {}; + private readonly DOCUMENT_ID_FIELD = '_dash_document_id'; + + /** + * Creates a new DocumentManager + * @param templateDocument The document that serves as a template for new documents + */ + constructor(chatBox: ChatBox) { + this.documentsById = new Map(); + this.chatBox = chatBox; + this.chatBoxDocument = chatBox.Document; + this.processDocument(this.chatBoxDocument); + this.initializeFieldMetadata(); + } + + /** + * Extracts field metadata from DocumentOptions class + */ + private initializeFieldMetadata() { + // Parse DocumentOptions to extract field definitions + const documentOptionsInstance = new DocumentOptions(); + const documentOptionsEntries = Object.entries(documentOptionsInstance); + + for (const [fieldName, fieldInfo] of documentOptionsEntries) { + // Extract field information + const fieldData: Record = { + name: fieldName, + withoutUnderscore: fieldName.startsWith('_') ? fieldName.substring(1) : fieldName, + description: '', + type: 'unknown', + required: false, + defaultValue: undefined, + possibleValues: [], + }; + + // Check if fieldInfo has description property (it's likely a FInfo instance) + if (fieldInfo && typeof fieldInfo === 'object' && 'description' in fieldInfo) { + fieldData.description = fieldInfo.description; + + // Extract field type if available + if ('fieldType' in fieldInfo) { + fieldData.type = fieldInfo.fieldType; + } + + // Extract possible values if available + if ('values' in fieldInfo && Array.isArray(fieldInfo.values)) { + fieldData.possibleValues = fieldInfo.values; + } + } + + this.fieldMetadata[fieldName] = fieldData; + } + } + + /** + * Gets all documents in the same Freeform view as the ChatBox + * Uses the LinkManager to get all linked documents, similar to how ChatBox does it + */ + public initializeFindDocsFreeform() { + // Reset collections + this.documentsById.clear(); + + try { + // Use the LinkManager approach which is proven to work in ChatBox + if (this.chatBoxDocument) { + console.log('Finding documents linked to ChatBox document with ID:', this.chatBoxDocument.id); + + // Get directly linked documents via LinkManager + const linkedDocs = LinkManager.Instance.getAllRelatedLinks(this.chatBoxDocument) + .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.chatBoxDocument!))) + .map(d => DocCast(d?.annotationOn, d)) + .filter(d => d); + + console.log(`Found ${linkedDocs.length} linked documents via LinkManager`); + + // Process the linked documents + linkedDocs.forEach((doc: Doc) => { + if (doc) { + this.processDocument(doc); + } + }); + + // Include the ChatBox document itself + this.processDocument(this.chatBoxDocument); + + // If we have access to the Document's parent, try to find sibling documents + if (this.chatBoxDocument.parent) { + const parent = this.chatBoxDocument.parent; + console.log('Found parent document, checking for siblings'); + + // Check if parent is a Doc type and has a childDocs function + if (parent && typeof parent === 'object' && 'childDocs' in parent && typeof parent.childDocs === 'function') { + try { + const siblingDocs = parent.childDocs(); + if (Array.isArray(siblingDocs)) { + console.log(`Found ${siblingDocs.length} sibling documents via parent.childDocs()`); + siblingDocs.forEach((doc: Doc) => { + if (doc) { + this.processDocument(doc); + } + }); + } + } catch (e) { + console.warn('Error accessing parent.childDocs:', e); + } + } + } + } else if (this.chatBox && this.chatBox.linkedDocs) { + // If we have direct access to the linkedDocs computed property from ChatBox + console.log('Using ChatBox.linkedDocs directly'); + const linkedDocs = this.chatBox.linkedDocs; + if (Array.isArray(linkedDocs)) { + console.log(`Found ${linkedDocs.length} documents via ChatBox.linkedDocs`); + linkedDocs.forEach((doc: Doc) => { + if (doc) { + this.processDocument(doc); + } + }); + } + + // Process the ChatBox document if available + if (this.chatBox.Document) { + this.processDocument(this.chatBox.Document); + } + } else { + console.warn('No ChatBox document reference available for finding linked documents'); + } + + console.log(`DocumentMetadataTool found ${this.documentsById.size} total documents`); + } catch (error) { + console.error('Error finding documents in Freeform view:', error); + } + } + + /** + * Process a document by ensuring it has an ID and adding it to the appropriate collections + * @param doc The document to process + */ + public processDocument(doc: Doc) { + // Ensure document has a persistent ID + const docId = this.ensureDocumentId(doc); + // Only add if we haven't already processed this document + if (!this.documentsById.has(docId)) { + this.documentsById.set(docId, { layoutDoc: doc, dataDoc: doc[DocData] }); + } + } + + /** + * Ensures a document has a persistent ID stored in its metadata + * @param doc The document to ensure has an ID + * @returns The document's ID + */ + private ensureDocumentId(doc: Doc): string { + let docId: string | undefined; + + // First try to get the ID from our custom field + if (doc[this.DOCUMENT_ID_FIELD]) { + docId = String(doc[this.DOCUMENT_ID_FIELD]); + return docId; + } + + // Try different ways to get a document ID + + // 1. Try the direct id property if it exists + if (doc.id && typeof doc.id === 'string') { + docId = doc.id; + } + // 2. Try doc._id if it exists + else if (doc._id && typeof doc._id === 'string') { + docId = doc._id; + } + // 3. Try doc.data?.id if it exists + else if (doc.data && typeof doc.data === 'object' && 'id' in doc.data && typeof doc.data.id === 'string') { + docId = doc.data.id; + } + // 4. If none of the above work, generate a UUID + else { + docId = uuidv4(); + console.log(`Generated new UUID for document with title: ${doc.title || 'Untitled'}`); + } + + // Store the ID in the document's metadata so it persists + try { + doc[this.DOCUMENT_ID_FIELD] = docId; + } catch (e) { + console.warn(`Could not assign ID to document property`, e); + } + + return docId; + } + + /** + * Extracts metadata from a specific document + * @param docId The ID of the document to extract metadata from + * @returns An object containing the document's metadata + */ + public extractDocumentMetadata(doc?: AgentDocument) { + if (!doc) return null; + const layoutDoc = doc.layoutDoc; + const dataDoc = doc.dataDoc; + + const metadata: Record = { + id: layoutDoc.dash_document_id || layoutDoc.id || '', + title: layoutDoc.title || '', + type: layoutDoc.type || '', + fields: { + layout: {}, + data: {}, + }, + fieldLocationMap: {}, + }; + + // Process all known field definitions + Object.keys(this.fieldMetadata).forEach(fieldName => { + const fieldDef = this.fieldMetadata[fieldName]; + const strippedName = fieldName.startsWith('_') ? fieldName.substring(1) : fieldName; + + // Check if field exists on layout document + let layoutValue = undefined; + if (layoutDoc) { + layoutValue = layoutDoc[fieldName]; + if (layoutValue !== undefined) { + // Field exists on layout document + metadata.fields.layout[fieldName] = this.formatFieldValue(layoutValue); + metadata.fieldLocationMap[strippedName] = 'layout'; + } + } + + // Check if field exists on data document + let dataValue = undefined; + if (dataDoc) { + dataValue = dataDoc[fieldName]; + if (dataValue !== undefined) { + // Field exists on data document + metadata.fields.data[fieldName] = this.formatFieldValue(dataValue); + if (!metadata.fieldLocationMap[strippedName]) { + metadata.fieldLocationMap[strippedName] = 'data'; + } + } + } + + // For fields with stripped names (without leading underscore), + // also check if they exist on documents without the underscore + if (fieldName.startsWith('_')) { + const nonUnderscoreFieldName = fieldName.substring(1); + + if (layoutDoc) { + const nonUnderscoreLayoutValue = layoutDoc[nonUnderscoreFieldName]; + if (nonUnderscoreLayoutValue !== undefined) { + metadata.fields.layout[nonUnderscoreFieldName] = this.formatFieldValue(nonUnderscoreLayoutValue); + metadata.fieldLocationMap[nonUnderscoreFieldName] = 'layout'; + } + } + + if (dataDoc) { + const nonUnderscoreDataValue = dataDoc[nonUnderscoreFieldName]; + if (nonUnderscoreDataValue !== undefined) { + metadata.fields.data[nonUnderscoreFieldName] = this.formatFieldValue(nonUnderscoreDataValue); + if (!metadata.fieldLocationMap[nonUnderscoreFieldName]) { + metadata.fieldLocationMap[nonUnderscoreFieldName] = 'data'; + } + } + } + } + }); + + // Add common field aliases for easier discovery + // This helps users understand both width and _width refer to the same property + if (metadata.fields.layout._width !== undefined && metadata.fields.layout.width === undefined) { + metadata.fields.layout.width = metadata.fields.layout._width; + metadata.fieldLocationMap.width = 'layout'; + } + + if (metadata.fields.layout._height !== undefined && metadata.fields.layout.height === undefined) { + metadata.fields.layout.height = metadata.fields.layout._height; + metadata.fieldLocationMap.height = 'layout'; + } + + return metadata; + } + + /** + * Formats a field value for JSON output + * @param value The field value to format + * @returns A JSON-friendly representation of the field value + */ + private formatFieldValue(value: any): any { + if (value === undefined || value === null) { + return null; + } + + // Handle Doc objects + if (value instanceof Doc) { + return { + type: 'Doc', + id: value.id || this.ensureDocumentId(value), + title: value.title || '', + docType: value.type || '', + }; + } + + // Handle RichTextField (try to extract plain text) + if (typeof value === 'string' && value.includes('"type":"doc"') && value.includes('"content":')) { + try { + const rtfObj = JSON.parse(value); + // If this looks like a rich text field structure + if (rtfObj.doc && rtfObj.doc.content) { + // Recursively extract text from the content + let plainText = ''; + const extractText = (node: any) => { + if (node.text) { + plainText += node.text; + } + if (node.content && Array.isArray(node.content)) { + node.content.forEach((child: any) => extractText(child)); + } + }; + + extractText(rtfObj.doc); + + // If we successfully extracted text, show it, but also preserve the original value + if (plainText) { + return { + type: 'RichText', + text: plainText, + length: plainText.length, + // Don't include the full value as it can be very large + }; + } + } + } catch (e) { + // If parsing fails, just treat as a regular string + } + } + + // Handle arrays and complex objects + if (typeof value === 'object') { + // If the object has a toString method, use it + if (value.toString && value.toString !== Object.prototype.toString) { + return value.toString(); + } + + try { + // Try to convert to JSON string + return JSON.stringify(value); + } catch (e) { + return '[Complex Object]'; + } + } + + // Return primitive values as is + return value; + } + + /** + * Converts a string field value to the appropriate type based on field metadata + * @param fieldName The name of the field + * @param fieldValue The string value to convert + * @returns The converted value with the appropriate type + */ + private convertFieldValue(fieldName: string, fieldValue: any): any { + // If fieldValue is already a number or boolean, we don't need to convert it from string + if (typeof fieldValue === 'number' || typeof fieldValue === 'boolean') { + return fieldValue; + } + + // If fieldValue is a string "true" or "false", convert to boolean + if (typeof fieldValue === 'string') { + if (fieldValue.toLowerCase() === 'true') { + return true; + } + if (fieldValue.toLowerCase() === 'false') { + return false; + } + } + + // If fieldValue is not a string (and not a number or boolean), convert it to string + if (typeof fieldValue !== 'string') { + fieldValue = String(fieldValue); + } + + // Special handling for text field - convert to proper RichTextField format + if (fieldName === 'text') { + try { + // Check if it's already a valid JSON RichTextField + JSON.parse(fieldValue); + return fieldValue; + } catch (e) { + // It's a plain text string, so convert it to RichTextField format + const rtf = { + doc: { + type: 'doc', + content: [ + { + type: 'paragraph', + content: [ + { + type: 'text', + text: fieldValue, + }, + ], + }, + ], + }, + }; + return JSON.stringify(rtf); + } + } + + // Get field metadata + const normalizedFieldName = fieldName.startsWith('_') ? fieldName : `_${fieldName}`; + const strippedFieldName = fieldName.startsWith('_') ? fieldName.substring(1) : fieldName; + + // Check both versions of the field name in metadata + const fieldMeta = this.fieldMetadata[normalizedFieldName] || this.fieldMetadata[strippedFieldName]; + + // Special handling for width and height without metadata + if (!fieldMeta && (fieldName === '_width' || fieldName === '_height' || fieldName === 'width' || fieldName === 'height')) { + const num = Number(fieldValue); + return isNaN(num) ? fieldValue : num; + } + + if (!fieldMeta) { + // If no metadata found, just return the string value + return fieldValue; + } + + // Convert based on field type + const fieldType = fieldMeta.type; + + if (fieldType === 'boolean') { + // Convert to boolean + return fieldValue.toLowerCase() === 'true'; + } else if (fieldType === 'number') { + // Convert to number + const num = Number(fieldValue); + return isNaN(num) ? fieldValue : num; + } else if (fieldType === 'date') { + // Try to convert to date (stored as number timestamp) + try { + return new Date(fieldValue).getTime(); + } catch (e) { + return fieldValue; + } + } else if (fieldType.includes('list') || fieldType.includes('array')) { + // Try to parse as JSON array + try { + return JSON.parse(fieldValue); + } catch (e) { + return fieldValue; + } + } else if (fieldType === 'json' || fieldType === 'object') { + // Try to parse as JSON object + try { + return JSON.parse(fieldValue); + } catch (e) { + return fieldValue; + } + } + + // Default to string + return fieldValue; + } + + /** + * Extracts all field metadata from DocumentOptions + * @returns A structured object containing metadata about all available document fields + */ + public getAllFieldMetadata() { + // Start with our already populated fieldMetadata from the DocumentOptions class + const result: Record = { + fieldCount: Object.keys(this.fieldMetadata).length, + fields: {}, + fieldsByType: { + string: [], + number: [], + boolean: [], + //doc: [], + //list: [], + //date: [], + //enumeration: [], + //other: [], + }, + fieldNameMappings: {}, + commonFields: { + appearance: [], + position: [], + size: [], + content: [], + behavior: [], + layout: [], + }, + }; + + // Process each field in the metadata + Object.entries(this.fieldMetadata).forEach(([fieldName, fieldInfo]) => { + const strippedName = fieldName.startsWith('_') ? fieldName.substring(1) : fieldName; + + // Add to fieldNameMappings + if (fieldName.startsWith('_')) { + result.fieldNameMappings[strippedName] = fieldName; + } + + // Create structured field metadata + const fieldData: Record = { + name: fieldName, + displayName: strippedName, + description: fieldInfo.description || '', + type: fieldInfo.fieldType || 'unknown', + possibleValues: fieldInfo.values || [], + }; + + // Add field to fields collection + result.fields[fieldName] = fieldData; + + // Categorize by field type + const type = fieldInfo.fieldType?.toLowerCase() || 'unknown'; + if (type === 'string') { + result.fieldsByType.string.push(fieldName); + } else if (type === 'number') { + result.fieldsByType.number.push(fieldName); + } else if (type === 'boolean') { + result.fieldsByType.boolean.push(fieldName); + } else if (type === 'doc') { + //result.fieldsByType.doc.push(fieldName); + } else if (type === 'list') { + //result.fieldsByType.list.push(fieldName); + } else if (type === 'date') { + //result.fieldsByType.date.push(fieldName); + } else if (type === 'enumeration') { + //result.fieldsByType.enumeration.push(fieldName); + } else { + //result.fieldsByType.other.push(fieldName); + } + + // Categorize by field purpose + if (fieldName.includes('width') || fieldName.includes('height') || fieldName.includes('size')) { + result.commonFields.size.push(fieldName); + } else if (fieldName.includes('color') || fieldName.includes('background') || fieldName.includes('border')) { + result.commonFields.appearance.push(fieldName); + } else if (fieldName.includes('x') || fieldName.includes('y') || fieldName.includes('position') || fieldName.includes('pan')) { + result.commonFields.position.push(fieldName); + } else if (fieldName.includes('text') || fieldName.includes('title') || fieldName.includes('data')) { + result.commonFields.content.push(fieldName); + } else if (fieldName.includes('action') || fieldName.includes('click') || fieldName.includes('event')) { + result.commonFields.behavior.push(fieldName); + } else if (fieldName.includes('layout')) { + result.commonFields.layout.push(fieldName); + } + }); + + // Add special section for auto-sizing related fields + result.autoSizingFields = { + height: { + autoHeightField: '_layout_autoHeight', + heightField: '_height', + displayName: 'height', + usage: 'To manually set height, first set layout_autoHeight to false', + }, + width: { + autoWidthField: '_layout_autoWidth', + widthField: '_width', + displayName: 'width', + usage: 'To manually set width, first set layout_autoWidth to false', + }, + }; + + // Add special section for text field format + result.specialFields = { + text: { + name: 'text', + description: 'Document text content', + format: 'RichTextField', + note: 'When setting text, provide plain text - it will be automatically converted to the correct format', + example: 'For setting: "Hello world" (plain text); For getting: Will be converted to plaintext for display', + }, + }; + + return result; + } + + /** + * Edits a specific field on a document + * @param docId The ID of the document to edit + * @param fieldName The name of the field to edit + * @param fieldValue The new value for the field (string, number, or boolean) + * @returns Object with success status, message, and additional information + */ + public editDocumentField( + docId: string, + fieldName: string, + fieldValue: string | number | boolean + ): { + success: boolean; + message: string; + fieldName?: string; + originalFieldName?: string; + newValue?: any; + warning?: string; + } { + // Normalize field name (handle with/without underscore) + let normalizedFieldName = fieldName.startsWith('_') ? fieldName : fieldName; + const strippedFieldName = fieldName.startsWith('_') ? fieldName.substring(1) : fieldName; + + // Handle common field name aliases (width → _width, height → _height) + // Many document fields use '_' prefix for layout properties + if (fieldName === 'width') { + normalizedFieldName = '_width'; + } else if (fieldName === 'height') { + normalizedFieldName = '_height'; + } + + // Get the documents + const doc = this.documentsById.get(docId); + if (!doc) { + return { success: false, message: `Document with ID ${docId} not found` }; + } + + const { layoutDoc, dataDoc } = this.documentsById.get(docId) ?? { layoutDoc: null, dataDoc: null }; + + if (!layoutDoc && !dataDoc) { + return { success: false, message: `Could not find layout or data document for document with ID ${docId}` }; + } + + try { + // Convert the field value to the appropriate type based on field metadata + const convertedValue = this.convertFieldValue(normalizedFieldName, fieldValue); + + let targetDoc: Doc | undefined; + let targetLocation: string; + + // First, check if field exists on layout document using Doc.Get + if (layoutDoc) { + const fieldExistsOnLayout = Doc.Get(layoutDoc, normalizedFieldName, true) !== undefined; + + // If it exists on layout document, update it there + if (fieldExistsOnLayout) { + targetDoc = layoutDoc; + targetLocation = 'layout'; + } + // If it has an underscore prefix, it's likely a layout property even if not yet set + else if (normalizedFieldName.startsWith('_')) { + targetDoc = layoutDoc; + targetLocation = 'layout'; + } + // Otherwise, look for or create on data document + else if (dataDoc) { + targetDoc = dataDoc; + targetLocation = 'data'; + } + // If no data document available, default to layout + else { + targetDoc = layoutDoc; + targetLocation = 'layout'; + } + } + // If no layout document, use data document + else if (dataDoc) { + targetDoc = dataDoc; + targetLocation = 'data'; + } else { + return { success: false, message: `No valid document found for editing` }; + } + + if (!targetDoc) { + return { success: false, message: `Target document not available` }; + } + + // Set the field value on the target document + targetDoc[normalizedFieldName] = convertedValue; + + return { + success: true, + message: `Successfully updated field '${normalizedFieldName}' on ${targetLocation} document (ID: ${docId})`, + fieldName: normalizedFieldName, + originalFieldName: fieldName, + newValue: convertedValue, + }; + } catch (error) { + console.error('Error editing document field:', error); + return { + success: false, + message: `Error updating field: ${error instanceof Error ? error.message : String(error)}`, + }; + } + } + /** + * Gets metadata for a specific document or all documents + * @param documentId Optional ID of a specific document to get metadata for + * @returns Document metadata or metadata for all documents + */ + public getDocumentMetadata(documentId?: string): any { + if (documentId) { + const doc = this.documentsById.get(documentId); + // Get metadata for a specific document + return this.extractDocumentMetadata(doc); + } else { + // Get metadata for all documents + const documentsMetadata: Record = {}; + for (const doc of this.documentsById.values()) { + documentsMetadata.add(this.extractDocumentMetadata(doc)); + } + + return { + documentCount: this.documentsById.size, + documents: documentsMetadata, + fieldDefinitions: this.fieldMetadata, + }; + } + } + + /** + * Adds links between documents based on their IDs + * @param docIds Array of document IDs to link + * @param relationship Optional relationship type for the links + * @returns Array of created link documents + */ + public addLinks(docIds: string[]): Doc[] { + const createdLinks: Doc[] = []; + // Use string keys for Set instead of arrays which don't work as expected as keys + const alreadyLinked = new Set(); + + // Iterate over the document IDs and add links + docIds.forEach(docId1 => { + const doc1 = this.documentsById.get(docId1); + docIds.forEach(docId2 => { + if (docId1 === docId2) return; // Skip self-linking + + // Create a consistent key regardless of document order + const linkKey = [docId1, docId2].sort().join('_'); + if (alreadyLinked.has(linkKey)) return; + + const doc2 = this.documentsById.get(docId2); + if (doc1?.layoutDoc && doc2?.layoutDoc) { + try { + // Create a link document between doc1 and doc2 + const linkDoc = Docs.Create.LinkDocument(doc1.layoutDoc, doc2.layoutDoc); + + // Set a default color if relationship doesn't specify one + if (!linkDoc.color) { + linkDoc.color = 'lightBlue'; // Default blue color + } + + // Ensure link is visible by setting essential properties + linkDoc.link_visible = true; + linkDoc.link_enabled = true; + linkDoc.link_autoMove = true; + linkDoc.link_showDirected = true; + + // Set the embedContainer to ensure visibility + // This is shown in the image as a key difference between visible/non-visible links + if (this.chatBoxDocument && this.chatBoxDocument.parent && typeof this.chatBoxDocument.parent === 'object' && 'title' in this.chatBoxDocument.parent) { + linkDoc.embedContainer = String(this.chatBoxDocument.parent.title); + } else if (doc1.layoutDoc.parent && typeof doc1.layoutDoc.parent === 'object' && 'title' in doc1.layoutDoc.parent) { + linkDoc.embedContainer = String(doc1.layoutDoc.parent.title); + } else { + // Default to a tab name if we can't find one + linkDoc.embedContainer = 'Untitled Tab 1'; + } + + // Add the link to the document system + LinkManager.Instance.addLink(linkDoc); + + const ancestor = DocumentView.linkCommonAncestor(linkDoc); + ancestor?.ComponentView?.addDocument?.(linkDoc); + // Add to user document list to make it visible in the UI + Doc.AddDocToList(Doc.UserDoc(), 'links', linkDoc); + + // Create a visual link for display + if (this.chatBoxDocument) { + // Make sure the docs are visible in the UI + this.chatBox._props.addDocument?.(doc1.layoutDoc); + this.chatBox._props.addDocument?.(doc2.layoutDoc); + + // Use DocumentManager to ensure documents are visible + DocumentManager.Instance.showDocument(doc1.layoutDoc, { willZoomCentered: false }); + DocumentManager.Instance.showDocument(doc2.layoutDoc, { willZoomCentered: false }); + } + + createdLinks.push(linkDoc); + alreadyLinked.add(linkKey); + } catch (error) { + console.error('Error creating link between documents:', error); + } + } + }); + }); + + // Force update of the UI to show new links + setTimeout(() => { + try { + // Update server cache to ensure links are persisted + UPDATE_SERVER_CACHE && typeof UPDATE_SERVER_CACHE === 'function' && UPDATE_SERVER_CACHE(); + } catch (e) { + console.warn('Could not update server cache after creating links:', e); + } + }, 100); + + return createdLinks; + } + /** + * Helper method to validate a document type and ensure it's a valid supportedDocType + * @param docType The document type to validate + * @returns True if the document type is valid, false otherwise + */ + private isValidDocType(docType: string): boolean { + return Object.values(supportedDocTypes).includes(docType as supportedDocTypes); + } + /** + * Creates a document in the dashboard. + * + * @param {string} doc_type - The type of document to create. + * @param {string} data - The data used to generate the document. + * @param {DocumentOptions} options - Configuration options for the document. + * @returns {Promise} A promise that resolves once the document is created and displayed. + */ + createDocInDash = (docType: string, title: string, data: string) => { + // Validate doc_type + if (!this.isValidDocType(docType)) { + throw new Error(`Invalid document type: ${docType}`); + } + + try { + // Create simple document with just title and data + const simpleDoc: parsedDoc = { + doc_type: docType, + title: title, + data: data, + x: 0, + y: 0, + _width: 300, + _height: 300, + _layout_fitWidth: false, + _layout_autoHeight: true, + }; + + // Use the chatBox's createDocInDash method to create and link the document + if (!this.chatBox) { + throw new Error('ChatBox instance not available for creating document'); + } + const linkAndShowDoc = (doc: Opt) => { + if (doc) { + LinkManager.Instance.addLink(Docs.Create.LinkDocument(this.chatBoxDocument!, doc)); + this.chatBox._props.addDocument?.(doc); + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); + } + }; + const doc = this.chatBox.whichDoc(simpleDoc, false); + if (doc) linkAndShowDoc(doc); + return doc; + } catch (error) { + throw new Error(`Error creating document: ${error}`); + } + }; + + public has(docId: string) { + return this.documentsById.has(docId); + } + + public listDocs() { + // List all available documents in simple format + const docs = Array.from(this.documentsById.entries()).map(([id, doc]) => ({ + id, + title: doc.layoutDoc.title || 'Untitled Document', + type: doc.layoutDoc.type || doc.dataDoc.type || 'Unknown Type', + })); + + if (docs.length === 0) { + return [ + { + type: 'text', + text: 'No documents found in the current view.', + }, + ]; + } + + return [ + { + type: 'text', + text: `Found ${docs.length} document(s) in the current view:\n${JSON.stringify(docs, null, 2)}`, + }, + ]; + } + + public createAgentDoc(doc: Doc) { + // Ideally check if Doc is already in there. + const agentDoc = { layoutDoc: doc, dataDoc: doc[DocData] }; + this.documentsById.set(this.ensureDocumentId(doc), agentDoc); + return agentDoc; + } +} -- cgit v1.2.3-70-g09d2 From 5ce2263849bfb901e276a4c5fc8ca2dbd8b80350 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Thu, 24 Apr 2025 13:21:00 -0400 Subject: attempt at linking docs but listing metadata doesn't work --- .../views/nodes/chatbot/agentsystem/Agent.ts | 8 +++--- .../nodes/chatbot/chatboxcomponents/ChatBox.tsx | 29 ++++++++++++++++++++-- src/client/views/nodes/chatbot/tools/SearchTool.ts | 2 +- .../nodes/chatbot/tools/WebsiteInfoScraperTool.ts | 11 ++++---- .../nodes/chatbot/utils/AgentDocumentManager.ts | 10 ++++++-- 5 files changed, 46 insertions(+), 14 deletions(-) (limited to 'src/client/views/nodes/chatbot/chatboxcomponents') diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts index 5af021dbf..c021d141e 100644 --- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts +++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts @@ -66,10 +66,12 @@ export class Agent { history: () => string, csvData: () => { filename: string; id: string; text: string }[], addLinkedUrlDoc: (url: string, id: string) => void, + getLinkedUrlDocId: (url: string) => string[], createImage: (result: Upload.FileInformation & Upload.InspectionResults, options: DocumentOptions) => void, // eslint-disable-next-line @typescript-eslint/no-unused-vars createCSVInDash: (url: string, title: string, id: string, data: string) => void, - chatBox: ChatBox + chatBox: ChatBox, + docManager: AgentDocumentManager ) { // Initialize OpenAI client with API key from environment this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); @@ -77,14 +79,14 @@ export class Agent { this._history = history; this._summaries = summaries; this._csvData = csvData; - this._docManager = new AgentDocumentManager(chatBox); + this._docManager = docManager; // Define available tools for the assistant this.tools = { calculate: new CalculateTool(), rag: new RAGTool(this.vectorstore), dataAnalysis: new DataAnalysisTool(csvData), - websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc), + websiteInfoScraper: new WebsiteInfoScraperTool(getLinkedUrlDocId), searchTool: new SearchTool(addLinkedUrlDoc), noTool: new NoTool(), //imageCreationTool: new ImageCreationTool(createImage), diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx index e09b4313f..43765c1ce 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -44,6 +44,7 @@ import { ProgressBar } from './ProgressBar'; import { OpenWhere } from '../../OpenWhere'; import { Upload } from '../../../../../server/SharedMediaTypes'; import { DocumentMetadataTool } from '../tools/DocumentMetadataTool'; +import { AgentDocumentManager } from '../utils/AgentDocumentManager'; dotenv.config(); @@ -76,6 +77,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { private agent: Agent; private messagesRef: React.RefObject; private _textInputRef: HTMLInputElement | undefined | null; + private docManager: AgentDocumentManager; /** * Static method that returns the layout string for the field. @@ -107,7 +109,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id); } this.vectorstore = new Vectorstore(this.vectorstore_id, this.retrieveDocIds); - this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.createImageInDash, this.createCSVInDash, this); + this.docManager = new AgentDocumentManager(this); + this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.getLinkedUrlDocIds, this.createImageInDash, this.createCSVInDash, this, this.docManager); // Reinitialize the DocumentMetadataTool with a direct reference to this ChatBox instance // This ensures the tool can properly access documents in the same Freeform view @@ -380,7 +383,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @action addLinkedUrlDoc = async (url: string, id: string) => { const doc = Docs.Create.WebDocument(url, { data_useCors: true }); - + this.docManager.addCustomId(doc, id); const linkDoc = Docs.Create.LinkDocument(this.Document, doc); LinkManager.Instance.addLink(linkDoc); @@ -391,6 +394,28 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }; doc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] }); + this.docManager.processDocument(doc); + }; + + /** + * Retrieves the IDs of linked url documents. + * @returns An array of document IDs. + */ + @action + getLinkedUrlDocIds = () => { + const linkedDocs: Doc[] = this.linkedDocs; + const linkedUrlDocIds: string[] = []; + + for (const doc of linkedDocs) { + if (doc.chunk_simpl) { + const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] }; + const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkType === CHUNK_TYPE.URL); + if (foundChunk) { + linkedUrlDocIds.push(foundChunk.chunkId); + } + } + } + return linkedUrlDocIds; }; /** diff --git a/src/client/views/nodes/chatbot/tools/SearchTool.ts b/src/client/views/nodes/chatbot/tools/SearchTool.ts index 6a11407a5..2ee30f0cf 100644 --- a/src/client/views/nodes/chatbot/tools/SearchTool.ts +++ b/src/client/views/nodes/chatbot/tools/SearchTool.ts @@ -28,7 +28,7 @@ export class SearchTool extends BaseTool { private _addLinkedUrlDoc: (url: string, id: string) => void; private _max_results: number; - constructor(addLinkedUrlDoc: (url: string, id: string) => void, max_results: number = 4) { + constructor(addLinkedUrlDoc: (url: string, id: string) => void, max_results: number = 3) { super(searchToolInfo); this._addLinkedUrlDoc = addLinkedUrlDoc; this._max_results = max_results; diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts index 19ccd0b36..bff38ae15 100644 --- a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts +++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts @@ -66,11 +66,11 @@ const websiteInfoScraperToolInfo: ToolInfo = { }; export class WebsiteInfoScraperTool extends BaseTool { - private _addLinkedUrlDoc: (url: string, id: string) => void; + private _getLinkedUrlDocId: (url: string) => string[]; - constructor(addLinkedUrlDoc: (url: string, id: string) => void) { + constructor(getLinkedUrlDocIds: (url: string) => string[]) { super(websiteInfoScraperToolInfo); - this._addLinkedUrlDoc = addLinkedUrlDoc; + this._getLinkedUrlDocId = getLinkedUrlDocIds; } async execute(args: ParametersType): Promise { @@ -79,9 +79,8 @@ export class WebsiteInfoScraperTool extends BaseTool { try { - const { website_plain_text } = await Networking.PostToServer('/scrapeWebsite', { url }); - const id = uuidv4(); - this._addLinkedUrlDoc(url, id); + const { website_plain_text } = (await Networking.PostToServer('/scrapeWebsite', { url })) as { website_plain_text: string }; + const id = this._getLinkedUrlDocId(url); return { type: 'text', text: `\n${website_plain_text}\n`, diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts index c954226e4..4eeac3c6a 100644 --- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts +++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts @@ -90,7 +90,7 @@ export class AgentDocumentManager { */ public initializeFindDocsFreeform() { // Reset collections - this.documentsById.clear(); + //this.documentsById.clear(); try { // Use the LinkManager approach which is proven to work in ChatBox @@ -109,6 +109,7 @@ export class AgentDocumentManager { linkedDocs.forEach((doc: Doc) => { if (doc) { this.processDocument(doc); + console.log('Processed linked document:', doc.id, doc.title, doc.type); } }); @@ -164,6 +165,11 @@ export class AgentDocumentManager { } } + public addCustomId(doc: Doc, id: string) { + doc.id = id; + doc.DOCUMENT_ID_FIELD = id; + } + /** * Process a document by ensuring it has an ID and adding it to the appropriate collections * @param doc The document to process @@ -730,7 +736,7 @@ export class AgentDocumentManager { // Get metadata for all documents const documentsMetadata: Record = {}; for (const doc of this.documentsById.values()) { - documentsMetadata.add(this.extractDocumentMetadata(doc)); + documentsMetadata.add(this.extractDocumentMetadata(doc) ?? { documentId: doc.layoutDoc.id, title: doc.layoutDoc.title, type: doc.layoutDoc.type }); } return { -- cgit v1.2.3-70-g09d2 From 3ef3d40506348d9fd537cc8f4aea975b9770689f Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Sun, 27 Apr 2025 13:14:49 -0400 Subject: new attempt with new citation unification --- .../views/nodes/chatbot/agentsystem/Agent.ts | 5 +- .../nodes/chatbot/chatboxcomponents/ChatBox.tsx | 450 +++++++++++++-------- .../nodes/chatbot/tools/DocumentMetadataTool.ts | 16 +- src/client/views/nodes/chatbot/tools/SearchTool.ts | 18 +- src/client/views/nodes/chatbot/types/types.ts | 1 + .../nodes/chatbot/utils/AgentDocumentManager.ts | 168 +++++--- .../views/nodes/chatbot/vectorstore/Vectorstore.ts | 130 ++++-- 7 files changed, 510 insertions(+), 278 deletions(-) (limited to 'src/client/views/nodes/chatbot/chatboxcomponents') diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts index c021d141e..80fdb6533 100644 --- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts +++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts @@ -65,12 +65,9 @@ export class Agent { summaries: () => string, history: () => string, csvData: () => { filename: string; id: string; text: string }[], - addLinkedUrlDoc: (url: string, id: string) => void, getLinkedUrlDocId: (url: string) => string[], createImage: (result: Upload.FileInformation & Upload.InspectionResults, options: DocumentOptions) => void, - // eslint-disable-next-line @typescript-eslint/no-unused-vars createCSVInDash: (url: string, title: string, id: string, data: string) => void, - chatBox: ChatBox, docManager: AgentDocumentManager ) { // Initialize OpenAI client with API key from environment @@ -87,7 +84,7 @@ export class Agent { rag: new RAGTool(this.vectorstore), dataAnalysis: new DataAnalysisTool(csvData), websiteInfoScraper: new WebsiteInfoScraperTool(getLinkedUrlDocId), - searchTool: new SearchTool(addLinkedUrlDoc), + searchTool: new SearchTool(this._docManager), noTool: new NoTool(), //imageCreationTool: new ImageCreationTool(createImage), documentMetadata: new DocumentMetadataTool(this._docManager), diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx index 43765c1ce..35dbee3e9 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -71,7 +71,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @observable private _citationPopup: { text: string; visible: boolean } = { text: '', visible: false }; // Private properties for managing OpenAI API, vector store, agent, and UI elements - private openai: OpenAI; + private openai!: OpenAI; // Using definite assignment assertion private vectorstore_id: string; private vectorstore: Vectorstore; private agent: Agent; @@ -98,25 +98,34 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { */ constructor(props: FieldViewProps) { super(props); - makeObservable(this); // Enable MobX observables + makeObservable(this); - // Initialize OpenAI, vectorstore, and agent - this.openai = this.initializeOpenAI(); - if (StrCast(this.dataDoc.vectorstore_id) == '') { - this.vectorstore_id = uuidv4(); - this.dataDoc.vectorstore_id = this.vectorstore_id; - } else { - this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id); - } - this.vectorstore = new Vectorstore(this.vectorstore_id, this.retrieveDocIds); + this.messagesRef = React.createRef(); this.docManager = new AgentDocumentManager(this); - this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.getLinkedUrlDocIds, this.createImageInDash, this.createCSVInDash, this, this.docManager); - // Reinitialize the DocumentMetadataTool with a direct reference to this ChatBox instance - // This ensures the tool can properly access documents in the same Freeform view - this.agent.reinitializeDocumentMetadataTool(); + // Initialize OpenAI client + this.initializeOpenAI(); + + // Create a unique vectorstore ID for this ChatBox + this.vectorstore_id = uuidv4(); + + // Initialize vectorstore with the document manager + this.vectorstore = new Vectorstore(this.vectorstore_id, this.docManager); + + // Create an agent with the vectorstore + this.agent = new Agent( + this.vectorstore, + this.retrieveSummaries.bind(this), + this.retrieveFormattedHistory.bind(this), + this.retrieveCSVData.bind(this), + this.retrieveDocIds.bind(this), + this.createImageInDash.bind(this), + this.createCSVInDash.bind(this), + this.docManager + ); - this.messagesRef = React.createRef(); + // Add event listeners + this.addScrollListener(); // Reaction to update dataDoc when chat history changes reaction( @@ -140,22 +149,25 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { */ @action addDocToVectorstore = async (newLinkedDoc: Doc) => { - this._uploadProgress = 0; - this._currentStep = 'Initializing...'; - this._isUploadingDocs = true; - try { - // Add the document to the vectorstore + this._isUploadingDocs = true; + + // Process the document first to ensure it has a valid ID + this.docManager.processDocument(newLinkedDoc); + + // Add the document to the vectorstore which will also register chunks await this.vectorstore.addAIDoc(newLinkedDoc, this.updateProgress); - } catch (error) { - console.error('Error uploading document:', error); - this._currentStep = 'Error during upload'; - } finally { - runInAction(() => { - this._isUploadingDocs = false; - this._uploadProgress = 0; - this._currentStep = ''; - }); + + // No longer needed as documents are tracked by the AgentDocumentManager + // this._linked_docs_to_add.add(newLinkedDoc); + + this._isUploadingDocs = false; + + return true; + } catch (err) { + console.error('Error adding document to vectorstore:', err); + this._isUploadingDocs = false; + return false; } }; @@ -238,7 +250,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true, }; - return new OpenAI(configuration); + this.openai = new OpenAI(configuration); } /** @@ -375,49 +387,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } }; - /** - * Adds a linked document from a URL for future reference and analysis. - * @param url The URL of the document to add. - * @param id The unique identifier for the document. - */ - @action - addLinkedUrlDoc = async (url: string, id: string) => { - const doc = Docs.Create.WebDocument(url, { data_useCors: true }); - this.docManager.addCustomId(doc, id); - const linkDoc = Docs.Create.LinkDocument(this.Document, doc); - LinkManager.Instance.addLink(linkDoc); - - const chunkToAdd = { - chunkId: id, - chunkType: CHUNK_TYPE.URL, - url: url, - }; - - doc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] }); - this.docManager.processDocument(doc); - }; - - /** - * Retrieves the IDs of linked url documents. - * @returns An array of document IDs. - */ - @action - getLinkedUrlDocIds = () => { - const linkedDocs: Doc[] = this.linkedDocs; - const linkedUrlDocIds: string[] = []; - - for (const doc of linkedDocs) { - if (doc.chunk_simpl) { - const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] }; - const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkType === CHUNK_TYPE.URL); - if (foundChunk) { - linkedUrlDocIds.push(foundChunk.chunkId); - } - } - } - return linkedUrlDocIds; - }; - /** * Getter to retrieve the current user's name from the client utils. */ @@ -613,82 +582,224 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { */ @action handleCitationClick = async (citation: Citation) => { - const currentLinkedDocs: Doc[] = this.linkedDocs; - const chunkId = citation.chunk_id; + try { + // Extract values from MobX proxy object if needed + const chunkId = typeof citation.chunk_id === 'object' ? (citation.chunk_id as any).toString() : citation.chunk_id; + + // For debugging + console.log('Citation clicked:', { + chunkId, + citation: JSON.stringify(citation, null, 2), + }); - for (const doc of currentLinkedDocs) { - if (doc.chunk_simpl) { - const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] }; - const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkId === chunkId); + // Try to find the document + const linkedDocs = this.linkedDocs; + let doc: Doc | undefined; - if (foundChunk) { - // Handle media chunks specifically + // First try to find the document using the document manager's chunk ID lookup + const parentDocId = this.docManager.getDocIdByChunkId(chunkId); + if (parentDocId) { + doc = this.docManager.getDocument(parentDocId); + console.log(`Found document by chunk ID lookup: ${parentDocId}`); + } - if (doc.ai_type == 'video' || doc.ai_type == 'audio') { - const directMatchSegmentStart = this.getDirectMatchingSegmentStart(doc, citation.direct_text || '', foundChunk.indexes || []); + // If not found, fall back to searching through linked docs (maintains compatibility) + if (!doc) { + for (const linkedDoc of linkedDocs) { + if (linkedDoc.chunk_simpl) { + try { + const docChunkSimpl = JSON.parse(StrCast(linkedDoc.chunk_simpl)) as { chunks: SimplifiedChunk[] }; + const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkId === chunkId); + if (foundChunk) { + doc = linkedDoc; + console.log(`Found document by iterating through linked docs`); + break; + } + } catch (e) { + console.error(`Error parsing chunk_simpl for doc ${linkedDoc.id}:`, e); + } + } + } + } - if (directMatchSegmentStart) { - // Navigate to the segment's start time in the media player - await this.goToMediaTimestamp(doc, directMatchSegmentStart, doc.ai_type); - } else { - console.error('No direct matching segment found for the citation.'); + if (!doc) { + console.warn(`Document not found for citation with chunk_id: ${chunkId}`); + return; + } + + // Process the chunk data + let docChunkSimpl: { chunks: SimplifiedChunk[] } = { chunks: [] }; + try { + docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl) || '{"chunks":[]}'); + } catch (e) { + console.error(`Error parsing chunk_simpl for the found document:`, e); + return; + } + + const foundChunk = docChunkSimpl.chunks.find((chunk: SimplifiedChunk) => chunk.chunkId === chunkId); + + // Handle different chunk types + if (foundChunk) { + console.log(`Found chunk in document:`, foundChunk); + + // Handle video chunks + if (foundChunk.chunkType === CHUNK_TYPE.VIDEO) { + if (foundChunk.start_time !== undefined) { + await this.goToMediaTimestamp(doc, foundChunk.start_time, 'video'); + } else { + console.warn('Video chunk missing start_time:', foundChunk); + } + } + // Handle audio chunks - note that we're using string comparison since 'audio' isn't in CHUNK_TYPE enum + else if (String(foundChunk.chunkType).toLowerCase() === 'audio') { + if (foundChunk.start_time !== undefined) { + await this.goToMediaTimestamp(doc, foundChunk.start_time, 'audio'); + } else { + console.warn('Audio chunk missing start_time:', foundChunk); + } + } + // Handle table or image chunks + else if (foundChunk.chunkType === CHUNK_TYPE.TABLE || foundChunk.chunkType === CHUNK_TYPE.IMAGE) { + this.handleOtherChunkTypes(foundChunk, citation, doc); + } + // Handle text chunks + else if (foundChunk.chunkType === CHUNK_TYPE.TEXT) { + // Find text from the document's chunks metadata + let chunkText = ''; + + try { + // We already parsed the chunks earlier, so use that + const matchingChunk = docChunkSimpl.chunks.find(c => c.chunkId === foundChunk.chunkId); + if (matchingChunk && 'text' in matchingChunk) { + // If the text property exists on the chunk (even though it's not in the type) + chunkText = String(matchingChunk['text'] || ''); } + } catch (e) { + console.error('Error getting chunk text:', e); + } + + // Default text if none found + if (!chunkText) { + chunkText = 'Text content not available'; + } + + this._citationPopup = { + text: chunkText, + visible: true, + }; + } + // Handle URL chunks + else if (foundChunk.chunkType === CHUNK_TYPE.URL) { + if (foundChunk.url) { + // Instead of opening the URL in a new window, show the document in the viewer + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); + console.log(`Navigated to web document with URL: ${foundChunk.url}`); } else { - // Handle other chunk types as before - this.handleOtherChunkTypes(foundChunk, citation, doc); + console.warn('URL chunk missing URL:', foundChunk); } } + } else if (doc?.original_segments) { + // Handle original segments for media files + let original_segments: any[] = []; + try { + original_segments = JSON.parse(StrCast(doc.original_segments)); + } catch (e) { + console.error(`Error parsing original_segments:`, e); + return; + } + + // Check if there's direct text to find in the segments + if (citation.direct_text) { + // Find the segment that contains the direct text + const start = this.getDirectMatchingSegmentStart(doc, citation.direct_text, []); + if (start !== -1) { + await this.goToMediaTimestamp(doc, start, doc.ai_type === 'audio' ? 'audio' : 'video'); + } + } + } else { + console.warn('Unable to find chunk or segments for citation', citation); } + } catch (error) { + console.error('Error handling citation click:', error); } }; + /** + * Finds a matching segment in a document based on text content. + * @param doc The document to search in + * @param citationText The text to find in the document + * @param indexesOfSegments Optional indexes of segments to search in + * @returns The starting timestamp of the matching segment, or -1 if not found + */ getDirectMatchingSegmentStart = (doc: Doc, citationText: string, indexesOfSegments: string[]): number => { - const originalSegments = JSON.parse(StrCast(doc.original_segments!)).map((segment: any, index: number) => ({ - index: index.toString(), - text: segment.text, - start: segment.start, - end: segment.end, - })); - - if (!Array.isArray(originalSegments) || originalSegments.length === 0 || !Array.isArray(indexesOfSegments)) { - return 0; + if (!doc || !citationText) return -1; + + // Get original segments from the document + const original_segments = doc.original_segments ? JSON.parse(StrCast(doc.original_segments)) : []; + + if (!original_segments || !Array.isArray(original_segments) || original_segments.length === 0) { + return -1; } - // Create itemsToSearch array based on indexesOfSegments - const itemsToSearch = indexesOfSegments.map((indexStr: string) => { - const index = parseInt(indexStr, 10); - const segment = originalSegments[index]; - return { text: segment.text, start: segment.start }; - }); + let segments = original_segments; - console.log('Constructed itemsToSearch:', itemsToSearch); + // If specific indexes are provided, filter segments by those indexes + if (indexesOfSegments && indexesOfSegments.length > 0) { + segments = original_segments.filter((segment: any) => indexesOfSegments.includes(segment.index)); + } + + // If no segments match the indexes, use all segments + if (segments.length === 0) { + segments = original_segments; + } - // Helper function to calculate word overlap score + // First try to find an exact match + const exactMatch = segments.find((segment: any) => segment.text && segment.text.includes(citationText)); + + if (exactMatch) { + return exactMatch.start; + } + + // If no exact match, find segment with best word overlap const calculateWordOverlap = (text1: string, text2: string): number => { - const words1 = new Set(text1.toLowerCase().split(/\W+/)); - const words2 = new Set(text2.toLowerCase().split(/\W+/)); - const intersection = new Set([...words1].filter(word => words2.has(word))); - return intersection.size / Math.max(words1.size, words2.size); // Jaccard similarity + if (!text1 || !text2) return 0; + + const words1 = text1.toLowerCase().split(/\s+/); + const words2 = text2.toLowerCase().split(/\s+/); + const wordSet1 = new Set(words1); + + let overlap = 0; + for (const word of words2) { + if (wordSet1.has(word)) { + overlap++; + } + } + + // Return percentage of overlap relative to the shorter text + return overlap / Math.min(words1.length, words2.length); }; - // Search for the best matching segment - let bestMatchStart = 0; - let bestScore = 0; - - console.log(`Searching for best match for query: "${citationText}"`); - itemsToSearch.forEach(item => { - const score = calculateWordOverlap(citationText, item.text); - console.log(`Comparing query to segment: "${item.text}" | Score: ${score}`); - if (score > bestScore) { - bestScore = score; - bestMatchStart = item.start; + // Find segment with highest word overlap + let bestMatch = null; + let highestOverlap = 0; + + for (const segment of segments) { + if (!segment.text) continue; + + const overlap = calculateWordOverlap(segment.text, citationText); + if (overlap > highestOverlap) { + highestOverlap = overlap; + bestMatch = segment; } - }); + } - console.log('Best match found with score:', bestScore, '| Start time:', bestMatchStart); + // Only return matches with significant overlap (more than 30%) + if (bestMatch && highestOverlap > 0.3) { + return bestMatch.start; + } - // Return the start time of the best match - return bestMatchStart; + // If no good match found, return the start of the first segment as fallback + return segments.length > 0 ? segments[0].start : -1; }; /** @@ -772,7 +883,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { break; case CHUNK_TYPE.CSV: case CHUNK_TYPE.URL: - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }); + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => { + console.log(`Showing web document in viewer with URL: ${foundChunk.url}`); + }); break; default: console.error('Unhandled chunk type:', foundChunk.chunkType); @@ -879,6 +992,16 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { } }); this.addScrollListener(); + + // Initialize the document manager by finding existing documents + this.docManager.initializeFindDocsFreeform(); + + // If there are stored doc IDs in our list of docs to add, process them + if (this._linked_docs_to_add.size > 0) { + this._linked_docs_to_add.forEach(doc => { + this.docManager.processDocument(doc); + }); + } } /** @@ -892,28 +1015,28 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { /** * Getter that retrieves all linked documents for the current document. */ - @computed - get linkedDocs() { - return LinkManager.Instance.getAllRelatedLinks(this.Document) - .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) - .map(d => DocCast(d?.annotationOn, d)) - .filter(d => d); + @computed get linkedDocs(): Doc[] { + const docIds = this.docManager.listDocs(); + const docs: Doc[] = []; + + // Get documents from the document manager using the getDocument method + docIds.forEach(id => { + const doc = this.docManager.getDocument(id); + if (doc) { + docs.push(doc); + } + }); + + return docs; } /** - * Getter that retrieves document IDs of linked documents that have AI-related content. + * Getter that retrieves document IDs of linked documents that have PDF_chunker–parsed content. */ @computed - get docIds() { - return LinkManager.Instance.getAllRelatedLinks(this.Document) - .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) - .map(d => DocCast(d?.annotationOn, d)) - .filter(d => d) - .filter(d => { - console.log(d.ai_doc_id); - return d.ai_doc_id; - }) - .map(d => StrCast(d.ai_doc_id)); + get docIds(): string[] { + // Use the document manager to get all document IDs + return Array.from(this.docManager.listDocs()); } /** @@ -921,23 +1044,18 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { */ @computed get summaries(): string { - return ( - LinkManager.Instance.getAllRelatedLinks(this.Document) - .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document))) - .map(d => DocCast(d?.annotationOn, d)) - .filter(d => d) - .filter(d => d.summary) - .map((doc, index) => { - if (PDFCast(doc.data)) { - return `${doc.summary}`; - } else if (CsvCast(doc.data)) { - return `${doc.summary}`; - } else { - return `${index + 1}) ${doc.summary}`; - } - }) - .join('\n') + '\n' - ); + const linkedDocs = Array.from(this.docManager.listDocs()) + .map(id => { + const doc = this.docManager.extractDocumentMetadata(id); + if (doc && doc.fields && (doc.fields.layout.summary || doc.fields.data.summary)) { + return doc.fields.layout.summary || doc.fields.data.summary; + } + return null; + }) + .filter(Boolean) + .join('\n\n'); + + return linkedDocs; } /** @@ -965,7 +1083,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { // Other helper methods for retrieving document data and processing - retrieveSummaries = () => { + retrieveSummaries = (): string => { return this.summaries; }; @@ -973,12 +1091,12 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { return this.linkedCSVs; }; - retrieveFormattedHistory = () => { + retrieveFormattedHistory = (): string => { return this.formattedHistory; }; - retrieveDocIds = () => { - return this.docIds; + retrieveDocIds = (): string[] => { + return Array.from(this.docManager.listDocs()); }; /** diff --git a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts index 4b751acc0..e6c2421e5 100644 --- a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts +++ b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts @@ -417,9 +417,9 @@ export class DocumentMetadataTool extends BaseTool = { }; export class SearchTool extends BaseTool { - private _addLinkedUrlDoc: (url: string, id: string) => void; + private _docManager: AgentDocumentManager; private _max_results: number; - constructor(addLinkedUrlDoc: (url: string, id: string) => void, max_results: number = 3) { + constructor(docManager: AgentDocumentManager, max_results: number = 3) { super(searchToolInfo); - this._addLinkedUrlDoc = addLinkedUrlDoc; + this._docManager = docManager; this._max_results = max_results; } @@ -46,8 +49,13 @@ export class SearchTool extends BaseTool { max_results: this._max_results, })) as { results: { url: string; snippet: string }[] }; const data = results.map((result: { url: string; snippet: string }) => { - const id = uuidv4(); - this._addLinkedUrlDoc(result.url, id); + // Create a web document with the URL + const id = this._docManager.createDocInDash('web', result.url, { + title: `Search Result: ${result.url}`, + text_html: result.snippet, + data_useCors: true, + }); + return { type: 'text' as const, text: `${result.url}${result.snippet}`, diff --git a/src/client/views/nodes/chatbot/types/types.ts b/src/client/views/nodes/chatbot/types/types.ts index 882e74ebb..dcb132ec7 100644 --- a/src/client/views/nodes/chatbot/types/types.ts +++ b/src/client/views/nodes/chatbot/types/types.ts @@ -108,6 +108,7 @@ export interface SimplifiedChunk { start_time?: number; end_time?: number; indexes?: string[]; + text?: string; } export interface AI_Document { diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts index 4eeac3c6a..c3beebcde 100644 --- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts +++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts @@ -165,22 +165,18 @@ export class AgentDocumentManager { } } - public addCustomId(doc: Doc, id: string) { - doc.id = id; - doc.DOCUMENT_ID_FIELD = id; - } - /** * Process a document by ensuring it has an ID and adding it to the appropriate collections * @param doc The document to process */ - public processDocument(doc: Doc) { + public processDocument(doc: Doc): string { // Ensure document has a persistent ID const docId = this.ensureDocumentId(doc); // Only add if we haven't already processed this document if (!this.documentsById.has(docId)) { this.documentsById.set(docId, { layoutDoc: doc, dataDoc: doc[DocData] }); } + return docId; } /** @@ -232,7 +228,9 @@ export class AgentDocumentManager { * @param docId The ID of the document to extract metadata from * @returns An object containing the document's metadata */ - public extractDocumentMetadata(doc?: AgentDocument) { + public extractDocumentMetadata(id: string) { + if (!id) return null; + const doc = this.documentsById.get(id); if (!doc) return null; const layoutDoc = doc.layoutDoc; const dataDoc = doc.dataDoc; @@ -729,16 +727,14 @@ export class AgentDocumentManager { */ public getDocumentMetadata(documentId?: string): any { if (documentId) { - const doc = this.documentsById.get(documentId); - // Get metadata for a specific document - return this.extractDocumentMetadata(doc); + console.log(`Returning document metadata for docID, ${documentId}:`, this.extractDocumentMetadata(documentId)); + return this.extractDocumentMetadata(documentId); } else { // Get metadata for all documents const documentsMetadata: Record = {}; - for (const doc of this.documentsById.values()) { - documentsMetadata.add(this.extractDocumentMetadata(doc) ?? { documentId: doc.layoutDoc.id, title: doc.layoutDoc.title, type: doc.layoutDoc.type }); + for (const documentId of this.documentsById.keys()) { + documentsMetadata.add(this.extractDocumentMetadata(documentId)); } - return { documentCount: this.documentsById.size, documents: documentsMetadata, @@ -845,14 +841,15 @@ export class AgentDocumentManager { return Object.values(supportedDocTypes).includes(docType as supportedDocTypes); } /** - * Creates a document in the dashboard. + * Creates a document in the dashboard and returns its ID. + * This is a public API used by tools like SearchTool. * - * @param {string} doc_type - The type of document to create. - * @param {string} data - The data used to generate the document. - * @param {DocumentOptions} options - Configuration options for the document. - * @returns {Promise} A promise that resolves once the document is created and displayed. + * @param docType The type of document to create + * @param data The data for the document + * @param options Optional configuration options + * @returns The ID of the created document */ - createDocInDash = (docType: string, title: string, data: string) => { + public createDocInDash(docType: string, data: string, options?: any): string { // Validate doc_type if (!this.isValidDocType(docType)) { throw new Error(`Invalid document type: ${docType}`); @@ -862,10 +859,10 @@ export class AgentDocumentManager { // Create simple document with just title and data const simpleDoc: parsedDoc = { doc_type: docType, - title: title, + title: options?.title ?? `Untitled Document ${this.documentsById.size + 1}`, data: data, - x: 0, - y: 0, + x: options?.x ?? 0, + y: options?.y ?? 0, _width: 300, _height: 300, _layout_fitWidth: false, @@ -884,46 +881,111 @@ export class AgentDocumentManager { } }; const doc = this.chatBox.whichDoc(simpleDoc, false); - if (doc) linkAndShowDoc(doc); - return doc; + if (doc) { + linkAndShowDoc(doc); + const id = this.processDocument(doc); + return id; + } else { + throw new Error(`Error creating document. Created document not found.`); + } } catch (error) { throw new Error(`Error creating document: ${error}`); } - }; + } public has(docId: string) { return this.documentsById.has(docId); } - public listDocs() { - // List all available documents in simple format - const docs = Array.from(this.documentsById.entries()).map(([id, doc]) => ({ - id, - title: doc.layoutDoc.title || 'Untitled Document', - type: doc.layoutDoc.type || doc.dataDoc.type || 'Unknown Type', - })); - - if (docs.length === 0) { - return [ - { - type: 'text', - text: 'No documents found in the current view.', - }, - ]; - } - - return [ - { - type: 'text', - text: `Found ${docs.length} document(s) in the current view:\n${JSON.stringify(docs, null, 2)}`, - }, - ]; + /** + * Returns a list of all document IDs in the manager. + * @returns An array of document IDs (strings). + */ + public listDocs(): string[] { + return Array.from(this.documentsById.keys()); + } + + /** + * Adds a document with a custom ID to the manager + * @param doc The document to add + * @param customId The custom ID to assign to the document + * @returns The customId that was assigned + */ + public addCustomId(doc: Doc, customId: string): string { + if (!doc) { + console.error('Cannot add null document with custom ID'); + return ''; + } + + // Set the custom ID in the document's metadata + doc[this.DOCUMENT_ID_FIELD] = customId; + + // Store the document in our map + this.documentsById.set(customId, { + layoutDoc: doc, + dataDoc: doc, + }); + + return customId; } - public createAgentDoc(doc: Doc) { - // Ideally check if Doc is already in there. - const agentDoc = { layoutDoc: doc, dataDoc: doc[DocData] }; - this.documentsById.set(this.ensureDocumentId(doc), agentDoc); - return agentDoc; + /** + * Gets a document by its ID + * @param docId The ID of the document to retrieve + * @returns The document if found, undefined otherwise + */ + public getDocument(docId: string): Doc | undefined { + const docInfo = this.documentsById.get(docId); + return docInfo?.layoutDoc; + } + + /** + * Registers chunk IDs associated with a document in the manager + * @param docId The parent document ID + * @param chunkIds Array of chunk IDs associated with this document + */ + public registerChunkIds(docId: string, chunkIds: string[]): void { + // Get the document if it exists + const docInfo = this.documentsById.get(docId); + if (!docInfo) { + console.warn(`Cannot register chunks for unknown document ID: ${docId}`); + return; + } + + // Store chunk IDs on the document for future reference + const doc = docInfo.layoutDoc; + if (!doc.chunk_ids) { + doc.chunk_ids = JSON.stringify(chunkIds); + } else { + // Merge with existing chunk IDs if they exist + const existingIds = JSON.parse(doc.chunk_ids as string); + const updatedIds = [...new Set([...existingIds, ...chunkIds])]; // Remove duplicates + doc.chunk_ids = JSON.stringify(updatedIds); + } + + // Ensure each chunk ID can be linked back to its parent document + chunkIds.forEach(chunkId => { + // Store a mapping from chunk ID to parent document ID + // This allows us to easily find a document by any of its chunk IDs + if (!this.documentsById.has(chunkId)) { + this.documentsById.set(chunkId, { + layoutDoc: doc, + dataDoc: docInfo.dataDoc, + }); + } + }); + } + + /** + * Gets a document ID by a chunk ID + * @param chunkId The chunk ID to look up + * @returns The parent document ID if found + */ + public getDocIdByChunkId(chunkId: string): string | undefined { + const docInfo = this.documentsById.get(chunkId); + if (docInfo) { + return docInfo.layoutDoc[this.DOCUMENT_ID_FIELD] as string; + } + return undefined; } } diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts index afd34f28d..4bb61d8b2 100644 --- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts +++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts @@ -15,7 +15,7 @@ import { Networking } from '../../../../Network'; import { AI_Document, CHUNK_TYPE, RAGChunk } from '../types/types'; import OpenAI from 'openai'; import { Embedding } from 'openai/resources'; -import { PineconeEnvironmentVarsNotSupportedError } from '@pinecone-database/pinecone/dist/errors'; +import { AgentDocumentManager } from '../utils/AgentDocumentManager'; dotenv.config(); @@ -29,7 +29,7 @@ export class Vectorstore { private openai: OpenAI; // OpenAI client for generating embeddings. private indexName: string = 'pdf-chatbot'; // Default name for the index. private _id: string; // Unique ID for the Vectorstore instance. - private _doc_ids: () => string[]; // List of document IDs handled by this instance. + private docManager: AgentDocumentManager; // Document manager for handling documents documents: AI_Document[] = []; // Store the documents indexed in the vectorstore. @@ -37,9 +37,9 @@ export class Vectorstore { * Initializes the Pinecone and OpenAI clients, sets up the document ID list, * and initializes the Pinecone index. * @param id The unique identifier for the vectorstore instance. - * @param doc_ids A function that returns a list of document IDs. + * @param docManager An instance of AgentDocumentManager to handle document management. */ - constructor(id: string, doc_ids: () => string[]) { + constructor(id: string, docManager: AgentDocumentManager) { const pineconeApiKey = process.env.PINECONE_API_KEY; if (!pineconeApiKey) { throw new Error('PINECONE_API_KEY is not defined.'); @@ -49,7 +49,7 @@ export class Vectorstore { this.pinecone = new Pinecone({ apiKey: pineconeApiKey }); this.openai = new OpenAI({ apiKey: process.env.OPENAI_API_KEY, dangerouslyAllowBrowser: true }); this._id = id; - this._doc_ids = doc_ids; + this.docManager = docManager; this.initializeIndex(); } @@ -109,15 +109,25 @@ export class Vectorstore { const isAudioOrVideo = local_file_path.endsWith('.mp3') || local_file_path.endsWith('.mp4'); let result: AI_Document & { doc_id: string }; + if (isAudioOrVideo) { console.log('Processing media file...'); const response = await Networking.PostToServer('/processMediaFile', { fileName: path.basename(local_file_path) }); - const segmentedTranscript = response.condensed; + + // Type assertion to handle the response properties + const typedResponse = response as { + condensed: Array<{ text: string; indexes: string[]; start: number; end: number }>; + full: Array; + summary: string; + }; + + const segmentedTranscript = typedResponse.condensed; console.log(segmentedTranscript); - const summary = response.summary; + const summary = typedResponse.summary; doc.summary = summary; + // Generate embeddings for each chunk - const texts = segmentedTranscript.map((chunk: any) => chunk.text); + const texts = segmentedTranscript.map(chunk => chunk.text); try { const embeddingsResponse = await this.openai.embeddings.create({ @@ -126,10 +136,19 @@ export class Vectorstore { encoding_format: 'float', }); - doc.original_segments = JSON.stringify(response.full); + doc.original_segments = JSON.stringify(typedResponse.full); doc.ai_type = local_file_path.endsWith('.mp3') ? 'audio' : 'video'; const doc_id = uuidv4(); + // Register the document with the AgentDocumentManager + this.docManager.addCustomId(doc, doc_id); + + // Generate chunk IDs upfront so we can register them + const chunkIds = segmentedTranscript.map(() => uuidv4()); + + // Register all chunk IDs with the document manager + this.docManager.registerChunkIds(doc_id, chunkIds); + // Add transcript and embeddings to metadata result = { doc_id, @@ -137,13 +156,13 @@ export class Vectorstore { file_name: local_file_path, num_pages: 0, summary: '', - chunks: segmentedTranscript.map((chunk: any, index: number) => ({ - id: uuidv4(), + chunks: segmentedTranscript.map((chunk, index) => ({ + id: chunkIds[index], // Use pre-generated chunk ID values: (embeddingsResponse.data as Embedding[])[index].embedding, // Assign embedding metadata: { indexes: chunk.indexes, original_document: local_file_path, - doc_id: doc_id, + doc_id: doc_id, // Ensure doc_id is consistent file_path: local_file_path, start_time: chunk.start, end_time: chunk.end, @@ -159,20 +178,24 @@ export class Vectorstore { } doc.segmented_transcript = JSON.stringify(segmentedTranscript); - // Simplify chunks for storage + // Simplify chunks for storage - ensure simplified chunks use EXACTLY the same IDs const simplifiedChunks = result.chunks.map(chunk => ({ - chunkId: chunk.id, + chunkId: chunk.id, // Use the exact same ID as the full chunk start_time: chunk.metadata.start_time, end_time: chunk.metadata.end_time, indexes: chunk.metadata.indexes, chunkType: CHUNK_TYPE.VIDEO, text: chunk.metadata.text, + doc_id: chunk.metadata.doc_id, // Include parent doc_id for completeness })); doc.chunk_simpl = JSON.stringify({ chunks: simplifiedChunks }); } else { - // Existing document processing logic remains unchanged + // Process regular document console.log('Processing regular document...'); - const { jobId } = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); + const createDocumentResponse = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); + + // Type assertion for the response + const { jobId } = createDocumentResponse as { jobId: string }; while (true) { await new Promise(resolve => setTimeout(resolve, 2000)); @@ -188,6 +211,16 @@ export class Vectorstore { progressCallback(progressResponseJson.progress, progressResponseJson.step); } } + + // Register the document with the AgentDocumentManager + this.docManager.addCustomId(doc, result.doc_id); + + // Collect all chunk IDs + const chunkIds = result.chunks.map(chunk => chunk.id); + + // Register chunks with the document manager + this.docManager.registerChunkIds(result.doc_id, chunkIds); + if (!doc.chunk_simpl) { doc.chunk_simpl = JSON.stringify({ chunks: [] }); } @@ -196,12 +229,13 @@ export class Vectorstore { result.chunks.forEach((chunk: RAGChunk) => { const chunkToAdd = { - chunkId: chunk.id, + chunkId: chunk.id, // Ensure we use the exact same ID startPage: chunk.metadata.start_page, endPage: chunk.metadata.end_page, location: chunk.metadata.location, chunkType: chunk.metadata.type as CHUNK_TYPE, text: chunk.metadata.text, + doc_id: chunk.metadata.doc_id, // Include parent doc_id for consistency }; const new_chunk_simpl = JSON.parse(StrCast(doc.chunk_simpl)); new_chunk_simpl.chunks = new_chunk_simpl.chunks.concat(chunkToAdd); @@ -298,39 +332,55 @@ export class Vectorstore { let queryEmbedding = queryEmbeddingResponse.data[0].embedding; - // Extract the embedding from the response. + // Get document IDs from the AgentDocumentManager + const docIds = Array.from(this.docManager.listDocs()); + console.log('Using document IDs for retrieval:', docIds); - console.log(this._doc_ids()); // Query the Pinecone index using the embedding and filter by document IDs. + // We'll query based on document IDs that are registered in the document manager const queryResponse: QueryResponse = await this.index.query({ vector: queryEmbedding, filter: { - doc_id: { $in: this._doc_ids() }, + doc_id: { $in: docIds }, }, topK, includeValues: true, includeMetadata: true, }); - console.log(queryResponse); - - // Map the results into RAGChunks and return them. - return queryResponse.matches.map( - match => - ({ - id: match.id, - values: match.values as number[], - metadata: match.metadata as { - text: string; - type: string; - original_document: string; - file_path: string; - doc_id: string; - location: string; - start_page: number; - end_page: number; - }, - }) as RAGChunk - ); + console.log(`Found ${queryResponse.matches.length} matching chunks`); + + // For each retrieved chunk, ensure its document ID is registered in the document manager + // This maintains compatibility with existing code while ensuring consistency + const processedMatches = queryResponse.matches.map(match => { + const chunk = { + id: match.id, + values: match.values as number[], + metadata: match.metadata as { + text: string; + type: string; + original_document: string; + file_path: string; + doc_id: string; + location: string; + start_page: number; + end_page: number; + }, + } as RAGChunk; + + // Ensure the document manager knows about this chunk + // This is important for maintaining backwards compatibility + if (chunk.id && !this.docManager.getDocIdByChunkId(chunk.id)) { + // If the chunk ID isn't registered but we have a doc_id in metadata + if (chunk.metadata.doc_id && this.docManager.has(chunk.metadata.doc_id)) { + // Register the chunk with its parent document + this.docManager.registerChunkIds(chunk.metadata.doc_id, [chunk.id]); + } + } + + return chunk; + }); + + return processedMatches; } catch (error) { console.error(`Error retrieving chunks: ${error}`); return []; -- cgit v1.2.3-70-g09d2 From 67a7996278ce176e227393fa410e7afc80228a83 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Sun, 27 Apr 2025 13:37:37 -0400 Subject: a bit more consistent --- .../nodes/chatbot/chatboxcomponents/ChatBox.tsx | 69 +++------------------- src/client/views/nodes/chatbot/types/types.ts | 3 +- .../views/nodes/chatbot/vectorstore/Vectorstore.ts | 3 +- 3 files changed, 12 insertions(+), 63 deletions(-) (limited to 'src/client/views/nodes/chatbot/chatboxcomponents') diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx index 35dbee3e9..b11bf7405 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -593,7 +593,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { }); // Try to find the document - const linkedDocs = this.linkedDocs; let doc: Doc | undefined; // First try to find the document using the document manager's chunk ID lookup @@ -603,25 +602,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { console.log(`Found document by chunk ID lookup: ${parentDocId}`); } - // If not found, fall back to searching through linked docs (maintains compatibility) - if (!doc) { - for (const linkedDoc of linkedDocs) { - if (linkedDoc.chunk_simpl) { - try { - const docChunkSimpl = JSON.parse(StrCast(linkedDoc.chunk_simpl)) as { chunks: SimplifiedChunk[] }; - const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkId === chunkId); - if (foundChunk) { - doc = linkedDoc; - console.log(`Found document by iterating through linked docs`); - break; - } - } catch (e) { - console.error(`Error parsing chunk_simpl for doc ${linkedDoc.id}:`, e); - } - } - } - } - if (!doc) { console.warn(`Document not found for citation with chunk_id: ${chunkId}`); return; @@ -641,29 +621,16 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { // Handle different chunk types if (foundChunk) { console.log(`Found chunk in document:`, foundChunk); - - // Handle video chunks - if (foundChunk.chunkType === CHUNK_TYPE.VIDEO) { - if (foundChunk.start_time !== undefined) { - await this.goToMediaTimestamp(doc, foundChunk.start_time, 'video'); + if (foundChunk.chunkType === CHUNK_TYPE.AUDIO || foundChunk.chunkType === CHUNK_TYPE.VIDEO) { + const directMatchSegmentStart = this.getDirectMatchingSegmentStart(doc, citation.direct_text || '', foundChunk.indexes || []); + if (directMatchSegmentStart) { + await this.goToMediaTimestamp(doc, directMatchSegmentStart, foundChunk.chunkType); } else { - console.warn('Video chunk missing start_time:', foundChunk); + console.error('No direct matching segment found for the citation.'); } - } - // Handle audio chunks - note that we're using string comparison since 'audio' isn't in CHUNK_TYPE enum - else if (String(foundChunk.chunkType).toLowerCase() === 'audio') { - if (foundChunk.start_time !== undefined) { - await this.goToMediaTimestamp(doc, foundChunk.start_time, 'audio'); - } else { - console.warn('Audio chunk missing start_time:', foundChunk); - } - } - // Handle table or image chunks - else if (foundChunk.chunkType === CHUNK_TYPE.TABLE || foundChunk.chunkType === CHUNK_TYPE.IMAGE) { + } else if (foundChunk.chunkType === CHUNK_TYPE.TABLE || foundChunk.chunkType === CHUNK_TYPE.IMAGE) { this.handleOtherChunkTypes(foundChunk, citation, doc); - } - // Handle text chunks - else if (foundChunk.chunkType === CHUNK_TYPE.TEXT) { + } else if (foundChunk.chunkType === CHUNK_TYPE.TEXT) { // Find text from the document's chunks metadata let chunkText = ''; @@ -691,33 +658,15 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { // Handle URL chunks else if (foundChunk.chunkType === CHUNK_TYPE.URL) { if (foundChunk.url) { - // Instead of opening the URL in a new window, show the document in the viewer DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); console.log(`Navigated to web document with URL: ${foundChunk.url}`); } else { console.warn('URL chunk missing URL:', foundChunk); } } - } else if (doc?.original_segments) { - // Handle original segments for media files - let original_segments: any[] = []; - try { - original_segments = JSON.parse(StrCast(doc.original_segments)); - } catch (e) { - console.error(`Error parsing original_segments:`, e); - return; - } - - // Check if there's direct text to find in the segments - if (citation.direct_text) { - // Find the segment that contains the direct text - const start = this.getDirectMatchingSegmentStart(doc, citation.direct_text, []); - if (start !== -1) { - await this.goToMediaTimestamp(doc, start, doc.ai_type === 'audio' ? 'audio' : 'video'); - } - } } else { - console.warn('Unable to find chunk or segments for citation', citation); + console.warn('Navigating to doc. Unable to find chunk or segments for citation', citation); + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); } } catch (error) { console.error('Error handling citation click:', error); diff --git a/src/client/views/nodes/chatbot/types/types.ts b/src/client/views/nodes/chatbot/types/types.ts index dcb132ec7..90b5e7e11 100644 --- a/src/client/views/nodes/chatbot/types/types.ts +++ b/src/client/views/nodes/chatbot/types/types.ts @@ -15,8 +15,9 @@ export enum CHUNK_TYPE { TABLE = 'table', URL = 'url', CSV = 'CSV', - MEDIA = 'media', + //MEDIA = 'media', VIDEO = 'video', + AUDIO = 'audio', } export enum PROCESSING_TYPE { diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts index 4bb61d8b2..4512ae3e6 100644 --- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts +++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts @@ -137,7 +137,6 @@ export class Vectorstore { }); doc.original_segments = JSON.stringify(typedResponse.full); - doc.ai_type = local_file_path.endsWith('.mp3') ? 'audio' : 'video'; const doc_id = uuidv4(); // Register the document with the AgentDocumentManager @@ -167,7 +166,7 @@ export class Vectorstore { start_time: chunk.start, end_time: chunk.end, text: chunk.text, - type: CHUNK_TYPE.VIDEO, + type: local_file_path.endsWith('.mp3') ? CHUNK_TYPE.AUDIO : CHUNK_TYPE.VIDEO, }, })), type: 'media', -- cgit v1.2.3-70-g09d2 From 393b7f8286422c933102449eba1ba82874a48896 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Sun, 27 Apr 2025 14:57:39 -0400 Subject: improved consistency across doc types and parsing --- src/client/documents/Documents.ts | 1 + .../views/nodes/chatbot/agentsystem/Agent.ts | 15 +- .../nodes/chatbot/chatboxcomponents/ChatBox.tsx | 176 +++++++++------- .../chatbot/chatboxcomponents/ProgressBar.scss | 40 +++- .../nodes/chatbot/utils/AgentDocumentManager.ts | 234 ++++++++++++++++++++- .../views/nodes/chatbot/vectorstore/Vectorstore.ts | 49 ++--- 6 files changed, 390 insertions(+), 125 deletions(-) (limited to 'src/client/views/nodes/chatbot/chatboxcomponents') diff --git a/src/client/documents/Documents.ts b/src/client/documents/Documents.ts index 317bb7feb..f87bd7092 100644 --- a/src/client/documents/Documents.ts +++ b/src/client/documents/Documents.ts @@ -273,6 +273,7 @@ export class DocumentOptions { _layout_reflowHorizontal?: BOOLt = new BoolInfo('permit horizontal resizing with content reflow'); _layout_noSidebar?: BOOLt = new BoolInfo('whether to display the sidebar toggle button'); layout_boxShadow?: string; // box-shadow css string OR "standard" to use dash standard box shadow + _iframe_sandbox?: STRt = new StrInfo('sandbox attributes for iframes in web documents (e.g., allow-scripts, allow-same-origin)'); layout_maxShown?: NUMt = new NumInfo('maximum number of children to display at one time (see multicolumnview)'); _layout_columnWidth?: NUMt = new NumInfo('width of table column', false); _layout_columnCount?: NUMt = new NumInfo('number of columns in a masonry view'); diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts index 80fdb6533..24471bf5b 100644 --- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts +++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts @@ -41,7 +41,6 @@ export class Agent { private interMessages: AgentMessage[] = []; private vectorstore: Vectorstore; private _history: () => string; - private _summaries: () => string; private _csvData: () => { filename: string; id: string; text: string }[]; private actionNumber: number = 0; private thoughtNumber: number = 0; @@ -54,11 +53,13 @@ export class Agent { /** * The constructor initializes the agent with the vector store and toolset, and sets up the OpenAI client. * @param _vectorstore Vector store instance for document storage and retrieval. - * @param summaries A function to retrieve document summaries. + * @param summaries A function to retrieve document summaries (deprecated, now using docManager directly). * @param history A function to retrieve chat history. * @param csvData A function to retrieve CSV data linked to the assistant. - * @param addLinkedUrlDoc A function to add a linked document from a URL. + * @param getLinkedUrlDocId A function to get document IDs from URLs. + * @param createImage A function to create images in the dashboard. * @param createCSVInDash A function to create a CSV document in the dashboard. + * @param docManager The document manager instance. */ constructor( _vectorstore: Vectorstore, @@ -74,7 +75,6 @@ export class Agent { this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); this.vectorstore = _vectorstore; this._history = history; - this._summaries = summaries; this._csvData = csvData; this._docManager = docManager; @@ -124,7 +124,12 @@ export class Agent { // Retrieve chat history and generate system prompt const chatHistory = this._history(); - const systemPrompt = getReactPrompt(Object.values(this.tools), this._summaries, chatHistory); + // Get document summaries directly from document manager + const documentSummaries = this._docManager.getAllDocumentSummaries(); + // Create a function that returns document summaries for the prompt + const getSummaries = () => documentSummaries; + // Generate the system prompt with the summaries + const systemPrompt = getReactPrompt(Object.values(this.tools), getSummaries, chatHistory); // Initialize intermediate messages this.interMessages = [{ role: 'system', content: systemPrompt }]; diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx index b11bf7405..ba30cb42b 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -18,7 +18,7 @@ import { Doc, DocListCast, Opt } from '../../../../../fields/Doc'; import { DocData, DocViews } from '../../../../../fields/DocSymbols'; import { RichTextField } from '../../../../../fields/RichTextField'; import { ScriptField } from '../../../../../fields/ScriptField'; -import { CsvCast, DocCast, NumCast, PDFCast, RTFCast, StrCast } from '../../../../../fields/Types'; +import { CsvCast, DocCast, NumCast, PDFCast, RTFCast, StrCast, VideoCast, AudioCast } from '../../../../../fields/Types'; import { DocUtils } from '../../../../documents/DocUtils'; import { CollectionViewType, DocumentType } from '../../../../documents/DocumentTypes'; import { Docs, DocumentOptions } from '../../../../documents/Documents'; @@ -48,7 +48,14 @@ import { AgentDocumentManager } from '../utils/AgentDocumentManager'; dotenv.config(); -export type parsedDocData = { doc_type: string; data: unknown }; +export type parsedDocData = { + doc_type: string; + data: unknown; + _disable_resource_loading?: boolean; + _sandbox_iframe?: boolean; + _iframe_sandbox?: string; + data_useCors?: boolean; +}; export type parsedDoc = DocumentOptions & parsedDocData; /** * ChatBox is the main class responsible for managing the interaction between the user and the assistant, @@ -150,7 +157,14 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { @action addDocToVectorstore = async (newLinkedDoc: Doc) => { try { - this._isUploadingDocs = true; + const isAudioOrVideo = VideoCast(newLinkedDoc.data)?.url?.pathname || AudioCast(newLinkedDoc.data)?.url?.pathname; + + // Set UI state to show the processing overlay + runInAction(() => { + this._isUploadingDocs = true; + this._uploadProgress = 0; + this._currentStep = isAudioOrVideo ? 'Preparing media file...' : 'Processing document...'; + }); // Process the document first to ensure it has a valid ID this.docManager.processDocument(newLinkedDoc); @@ -158,15 +172,36 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { // Add the document to the vectorstore which will also register chunks await this.vectorstore.addAIDoc(newLinkedDoc, this.updateProgress); - // No longer needed as documents are tracked by the AgentDocumentManager - // this._linked_docs_to_add.add(newLinkedDoc); + // Give a slight delay to show the completion message + if (this._uploadProgress === 100) { + await new Promise(resolve => setTimeout(resolve, 1000)); + } - this._isUploadingDocs = false; + // Reset UI state + runInAction(() => { + this._isUploadingDocs = false; + this._uploadProgress = 0; + this._currentStep = ''; + }); return true; } catch (err) { console.error('Error adding document to vectorstore:', err); - this._isUploadingDocs = false; + + // Show error in UI + runInAction(() => { + this._currentStep = `Error: ${err instanceof Error ? err.message : 'Failed to process document'}`; + }); + + await new Promise(resolve => setTimeout(resolve, 2000)); + + // Reset UI state + runInAction(() => { + this._isUploadingDocs = false; + this._uploadProgress = 0; + this._currentStep = ''; + }); + return false; } }; @@ -178,8 +213,15 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { */ @action updateProgress = (progress: number, step: string) => { - this._uploadProgress = progress; + // Ensure progress is within expected bounds + const validProgress = Math.min(Math.max(0, progress), 100); + this._uploadProgress = validProgress; this._currentStep = step; + + // Force UI update + if (process.env.NODE_ENV !== 'production') { + console.log(`Progress: ${validProgress}%, Step: ${step}`); + } }; /** @@ -453,7 +495,19 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { case supportedDocTypes.image: return Docs.Create.ImageDocument(data as string, options); case supportedDocTypes.equation: return Docs.Create.EquationDocument(data as string, options); case supportedDocTypes.notetaking: return Docs.Create.NoteTakingDocument([], options); - case supportedDocTypes.web: return Docs.Create.WebDocument(data as string, { ...options, data_useCors: true }); + case supportedDocTypes.web: + // Create web document with enhanced safety options + const webOptions = { + ...options, + data_useCors: true + }; + + // If iframe_sandbox was passed from AgentDocumentManager, add it to the options + if ('_iframe_sandbox' in options) { + (webOptions as any)._iframe_sandbox = options._iframe_sandbox; + } + + return Docs.Create.WebDocument(data as string, webOptions); case supportedDocTypes.dataviz: return Docs.Create.DataVizDocument('/users/rz/Downloads/addresses.csv', options); case supportedDocTypes.pdf: return Docs.Create.PdfDocument(data as string, options); case supportedDocTypes.video: return Docs.Create.VideoDocument(data as string, options); @@ -607,65 +661,36 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { return; } - // Process the chunk data - let docChunkSimpl: { chunks: SimplifiedChunk[] } = { chunks: [] }; - try { - docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl) || '{"chunks":[]}'); - } catch (e) { - console.error(`Error parsing chunk_simpl for the found document:`, e); + // Get the simplified chunk using the document manager + const foundChunk = this.docManager.getSimplifiedChunkById(doc, chunkId); + if (!foundChunk) { + console.warn(`Chunk not found in document for chunk ID: ${chunkId}`); + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); return; } - const foundChunk = docChunkSimpl.chunks.find((chunk: SimplifiedChunk) => chunk.chunkId === chunkId); + console.log(`Found chunk in document:`, foundChunk); // Handle different chunk types - if (foundChunk) { - console.log(`Found chunk in document:`, foundChunk); - if (foundChunk.chunkType === CHUNK_TYPE.AUDIO || foundChunk.chunkType === CHUNK_TYPE.VIDEO) { - const directMatchSegmentStart = this.getDirectMatchingSegmentStart(doc, citation.direct_text || '', foundChunk.indexes || []); - if (directMatchSegmentStart) { - await this.goToMediaTimestamp(doc, directMatchSegmentStart, foundChunk.chunkType); - } else { - console.error('No direct matching segment found for the citation.'); - } - } else if (foundChunk.chunkType === CHUNK_TYPE.TABLE || foundChunk.chunkType === CHUNK_TYPE.IMAGE) { - this.handleOtherChunkTypes(foundChunk, citation, doc); - } else if (foundChunk.chunkType === CHUNK_TYPE.TEXT) { - // Find text from the document's chunks metadata - let chunkText = ''; - - try { - // We already parsed the chunks earlier, so use that - const matchingChunk = docChunkSimpl.chunks.find(c => c.chunkId === foundChunk.chunkId); - if (matchingChunk && 'text' in matchingChunk) { - // If the text property exists on the chunk (even though it's not in the type) - chunkText = String(matchingChunk['text'] || ''); - } - } catch (e) { - console.error('Error getting chunk text:', e); - } - - // Default text if none found - if (!chunkText) { - chunkText = 'Text content not available'; - } - - this._citationPopup = { - text: chunkText, - visible: true, - }; - } - // Handle URL chunks - else if (foundChunk.chunkType === CHUNK_TYPE.URL) { - if (foundChunk.url) { - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); - console.log(`Navigated to web document with URL: ${foundChunk.url}`); - } else { - console.warn('URL chunk missing URL:', foundChunk); - } + if (foundChunk.chunkType === CHUNK_TYPE.AUDIO || foundChunk.chunkType === CHUNK_TYPE.VIDEO) { + const directMatchSegmentStart = this.getDirectMatchingSegmentStart(doc, citation.direct_text || '', foundChunk.indexes || []); + if (directMatchSegmentStart) { + await this.goToMediaTimestamp(doc, directMatchSegmentStart, foundChunk.chunkType); + } else { + console.error('No direct matching segment found for the citation.'); } + } else if (foundChunk.chunkType === CHUNK_TYPE.TABLE || foundChunk.chunkType === CHUNK_TYPE.IMAGE) { + this.handleOtherChunkTypes(foundChunk, citation, doc); } else { - console.warn('Navigating to doc. Unable to find chunk or segments for citation', citation); + // Show the chunk text in citation popup + let chunkText = foundChunk.text || 'Text content not available'; + + this._citationPopup = { + text: chunkText, + visible: true, + }; + + // Also navigate to the document DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); } } catch (error) { @@ -683,8 +708,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { getDirectMatchingSegmentStart = (doc: Doc, citationText: string, indexesOfSegments: string[]): number => { if (!doc || !citationText) return -1; - // Get original segments from the document - const original_segments = doc.original_segments ? JSON.parse(StrCast(doc.original_segments)) : []; + // Get original segments using document manager + const original_segments = this.docManager.getOriginalSegments(doc); if (!original_segments || !Array.isArray(original_segments) || original_segments.length === 0) { return -1; @@ -993,18 +1018,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { */ @computed get summaries(): string { - const linkedDocs = Array.from(this.docManager.listDocs()) - .map(id => { - const doc = this.docManager.extractDocumentMetadata(id); - if (doc && doc.fields && (doc.fields.layout.summary || doc.fields.data.summary)) { - return doc.fields.layout.summary || doc.fields.data.summary; - } - return null; - }) - .filter(Boolean) - .join('\n\n'); - - return linkedDocs; + // Use the document manager to get all summaries + return this.docManager.getAllDocumentSummaries(); } /** @@ -1033,7 +1048,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { // Other helper methods for retrieving document data and processing retrieveSummaries = (): string => { - return this.summaries; + return this.docManager.getAllDocumentSummaries(); }; retrieveCSVData = () => { @@ -1068,8 +1083,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent() { {this._isUploadingDocs && (
- -
{this._currentStep}
+
+
+
+
+
{Math.round(this._uploadProgress)}%
+
{this._currentStep}
+
)} diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss index ff5be4a38..3a8334695 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss @@ -58,12 +58,48 @@ flex-direction: column; align-items: center; text-align: center; + width: 80%; + max-width: 400px; + background-color: white; + padding: 20px; + border-radius: 8px; + box-shadow: 0 4px 12px rgba(0, 0, 0, 0.15); } -.step-name { +.progress-bar-wrapper { + width: 100%; + height: 12px; + background-color: #e0e0e0; + border-radius: 6px; + overflow: hidden; + margin-bottom: 10px; +} + +.progress-bar { + height: 100%; + background-color: #4a90e2; + border-radius: 6px; + transition: width 0.5s ease; +} + +.progress-details { + display: flex; + flex-direction: column; + align-items: center; + width: 100%; +} + +.progress-percentage { font-size: 18px; + font-weight: bold; color: #333; + margin-bottom: 5px; +} + +.step-name { + font-size: 16px; + color: #666; text-align: center; width: 100%; - margin-top: -10px; // Adjust to move the text closer to the spinner + margin-top: 5px; } diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts index c3beebcde..cff8380db 100644 --- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts +++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts @@ -14,6 +14,8 @@ import { parsedDoc } from '../chatboxcomponents/ChatBox'; import { faThumbTackSlash } from '@fortawesome/free-solid-svg-icons'; import { DocumentManager } from '../../../../util/DocumentManager'; import { DocumentView } from '../../DocumentView'; +import { RAGChunk, CHUNK_TYPE } from '../types/types'; +import { runInAction } from 'mobx'; /** * Interface representing a document in the freeform view @@ -869,20 +871,43 @@ export class AgentDocumentManager { _layout_autoHeight: true, }; - // Use the chatBox's createDocInDash method to create and link the document + // Additional handling for web documents + if (docType === 'web') { + // For web documents, don't sanitize the URL here + // Instead, set properties to handle content safely when loaded + simpleDoc._disable_resource_loading = true; + simpleDoc._sandbox_iframe = true; + simpleDoc.data_useCors = true; + + // Specify a more permissive sandbox to allow content to render properly + // but still maintain security + simpleDoc._iframe_sandbox = 'allow-same-origin allow-scripts allow-popups allow-forms'; + } + + // Use the chatBox's createDocInDash method to create the document if (!this.chatBox) { throw new Error('ChatBox instance not available for creating document'); } - const linkAndShowDoc = (doc: Opt) => { - if (doc) { - LinkManager.Instance.addLink(Docs.Create.LinkDocument(this.chatBoxDocument!, doc)); - this.chatBox._props.addDocument?.(doc); - DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); - } - }; + const doc = this.chatBox.whichDoc(simpleDoc, false); if (doc) { - linkAndShowDoc(doc); + // Use MobX runInAction to properly modify observable state + runInAction(() => { + if (this.chatBoxDocument && doc) { + // Create link and add it to the document system + const linkDoc = Docs.Create.LinkDocument(this.chatBoxDocument, doc); + LinkManager.Instance.addLink(linkDoc); + + // Add document to view + this.chatBox._props.addDocument?.(doc); + + // Show document - defer actual display to prevent immediate resource loading + setTimeout(() => { + DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); + }, 100); + } + }); + const id = this.processDocument(doc); return id; } else { @@ -893,6 +918,62 @@ export class AgentDocumentManager { } } + /** + * Sanitizes web content to prevent errors with external resources + * @param content The web content to sanitize + * @returns Sanitized content + */ + private sanitizeWebContent(content: string): string { + if (!content) return content; + + try { + // Replace problematic resource references that might cause errors + const sanitized = content + // Remove preload links that might cause errors + .replace(/]*rel=["']preload["'][^>]*>/gi, '') + // Remove map file references + .replace(/\/\/# sourceMappingURL=.*\.map/gi, '') + // Remove external CSS map files references + .replace(/\/\*# sourceMappingURL=.*\.css\.map.*\*\//gi, '') + // Add sandbox to iframes + .replace(/