aboutsummaryrefslogtreecommitdiff
path: root/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
diff options
context:
space:
mode:
Diffstat (limited to 'src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts')
-rw-r--r--src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts192
1 files changed, 43 insertions, 149 deletions
diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
index c8a6bb16b..5a09b945b 100644
--- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -13,7 +13,7 @@ import { LinkManager, UPDATE_SERVER_CACHE } from '../../../../util/LinkManager';
import { DocumentView } from '../../DocumentView';
import { ChatBox, parsedDoc } from '../chatboxcomponents/ChatBox';
import { supportedDocTypes } from '../types/tool_types';
-import { CHUNK_TYPE, RAGChunk } from '../types/types';
+import { CHUNK_TYPE, RAGChunk, SimplifiedChunk } from '../types/types';
/**
* Interface representing a document in the freeform view
@@ -31,7 +31,7 @@ export class AgentDocumentManager {
private chatBox: ChatBox;
private chatBoxDocument: Doc | null = null;
private fieldMetadata: Record<string, any> = {};
- @observable private documentIdsFromChunkIds: ObservableMap<string, string>;
+ @observable private simplifiedChunks: ObservableMap<string, SimplifiedChunk>;
/**
* Creates a new DocumentManager
@@ -40,17 +40,21 @@ export class AgentDocumentManager {
constructor(chatBox: ChatBox) {
makeObservable(this);
const agentDoc = DocCast(chatBox.Document.agentDocument) ?? new Doc();
- const chunkIds = DocCast(agentDoc.chunkIds) ?? new Doc();
+ const chunk_simpl = DocCast(agentDoc.chunk_simpl) ?? new Doc();
agentDoc.title = chatBox.Document.title + '_agentDocument';
- chunkIds.title = '_chunkIds';
+ chunk_simpl.title = '_chunk_simpl';
chatBox.Document.agentDocument = agentDoc;
- DocCast(chatBox.Document.agentDocument)!.chunkIds = chunkIds;
- this.documentIdsFromChunkIds = StrListCast(chunkIds.mapping).reduce((mapping, content) => {
- const [chunkId, docId] = content.split(':');
- mapping.set(chunkId, docId);
+ DocCast(chatBox.Document.agentDocument)!.chunk_simpl = chunk_simpl;
+
+ this.simplifiedChunks = StrListCast(chunk_simpl.mapping).reduce((mapping, chunks) => {
+ StrListCast(chunks).forEach(chunk => {
+ const parsed = JSON.parse(StrCast(chunk));
+ mapping.set(parsed.chunkId, parsed);
+ });
return mapping;
- }, new ObservableMap<string, string>());
+ }, new ObservableMap<string, SimplifiedChunk>());
+
this.documentsById = StrListCast(agentDoc.mapping).reduce((mapping, content) => {
const [id, layoutId, docId] = content.split(':');
const layoutDoc = DocServer.GetCachedRefField(layoutId);
@@ -76,14 +80,10 @@ export class AgentDocumentManager {
//{ fireImmediately: true }
);
reaction(
- () => this.documentIdsFromChunkIds.values(),
+ () => this.simplifiedChunks.values(),
() => {
if (this.chatBoxDocument && DocCast(this.chatBoxDocument.agentDocument)) {
- // Store the mapping with chunkId:docId format for consistency
- const chunkIdsDoc = DocCast(DocCast(this.chatBoxDocument.agentDocument)!.chunkIds);
- if (chunkIdsDoc) {
- chunkIdsDoc.mapping = new List<string>(Array.from(this.documentIdsFromChunkIds.entries()).map(([chunkId, docId]) => `${chunkId}:${docId}`));
- }
+ DocCast(DocCast(this.chatBoxDocument.agentDocument)!.chunk_simpl)!.mapping = new List<string>(Array.from(this.simplifiedChunks.values()).map(chunk => JSON.stringify(chunk)));
}
}
//{ fireImmediately: true }
@@ -831,7 +831,8 @@ export class AgentDocumentManager {
* @param options Optional configuration options
* @returns The ID of the created document
*/
- public createDocInDash(docType: string, data: string, options?: any): string {
+
+ public async createDocInDash(docType: string, data: string, options?: any): Promise<string> {
// Validate doc_type
if (!this.isValidDocType(docType)) {
throw new Error(`Invalid document type: ${docType}`);
@@ -877,14 +878,15 @@ export class AgentDocumentManager {
// Create link and add it to the document system
const linkDoc = Docs.Create.LinkDocument(this.chatBoxDocument, doc);
LinkManager.Instance.addLink(linkDoc);
-
- // Add document to view
- this.chatBox._props.addDocument?.(doc);
-
- // Show document - defer actual display to prevent immediate resource loading
- setTimeout(() => {
- DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
- }, 100);
+ if (doc.type !== 'web') {
+ // Add document to view
+ this.chatBox._props.addDocument?.(doc);
+
+ // Show document - defer actual display to prevent immediate resource loading
+ setTimeout(() => {
+ DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
+ }, 100);
+ }
}
});
@@ -986,88 +988,19 @@ export class AgentDocumentManager {
}
/**
- * Registers chunk IDs associated with a document in the manager
- * @param docId The parent document ID
- * @param chunkIds Array of chunk IDs associated with this document
- */
- @action
- public registerChunkIds(docId: string, chunkIds: string[]): void {
- // Get the document if it exists
- const docInfo = this.documentsById.get(docId);
- if (!docInfo) {
- console.warn(`Cannot register chunks for unknown document ID: ${docId}`);
- return;
- }
-
- // Store chunk IDs on the document for future reference
- const doc = docInfo.layoutDoc;
- if (!doc.chunk_ids) {
- doc.chunk_ids = JSON.stringify(chunkIds);
- } else {
- // Merge with existing chunk IDs if they exist
- const existingIds = JSON.parse(doc.chunk_ids as string);
- const updatedIds = [...new Set([...existingIds, ...chunkIds])]; // Remove duplicates
- doc.chunk_ids = JSON.stringify(updatedIds);
- }
- for (const chunkId of chunkIds) {
- // Ensure each chunk ID can be linked back to its parent document
- // Store a mapping from chunk ID to parent document ID
- // This allows us to easily find a document by any of its chunk IDs
- if (!this.documentIdsFromChunkIds.has(chunkId) && doc) {
- this.documentIdsFromChunkIds.set(chunkId, doc[Id]);
- }
- }
- }
-
- /**
- * Gets a document ID by a chunk ID
- * @param chunkId The chunk ID to look up
- * @returns The parent document ID if found
- */
- public getDocByChunkId(chunkId: string): Doc | undefined {
- // First, look up the document ID using the chunk ID mapping
- const docId = this.documentIdsFromChunkIds.get(chunkId);
- console.log('this.documentIdsFromChunkIds', this.documentIdsFromChunkIds);
- console.log('docId', docId);
- if (!docId) {
- if (this.documentsById.has(chunkId)) {
- return this.documentsById.get(chunkId)?.layoutDoc;
- } else {
- console.error('No document found for chunkId and docId', chunkId);
- return undefined;
- }
- }
- // Then get the document using the document ID
- const docInfo = this.documentsById.get(docId);
- if (docInfo) {
- return docInfo.layoutDoc;
- }
- console.error('No document found for docId', docId);
- return undefined;
- }
-
- /**
* Adds simplified chunks to a document for citation handling
* @param doc The document to add simplified chunks to
* @param chunks Array of full RAG chunks to simplify
* @param docType The type of document (e.g., 'pdf', 'video', 'audio', etc.)
* @returns The updated document with simplified chunks
*/
- public addSimplifiedChunks(doc: Doc, chunks: RAGChunk[], docType: string): Doc {
- if (!doc) {
- console.error('Cannot add simplified chunks to null document');
- return doc;
- }
-
- // Initialize empty chunks array if not exists
- if (!doc.chunk_simpl) {
- doc.chunk_simpl = JSON.stringify({ chunks: [] });
- }
-
+ @action
+ public addSimplifiedChunks(chunks: RAGChunk[], docType: string) {
+ console.log('chunks', chunks, 'simplifiedChunks', this.simplifiedChunks);
// Create array of simplified chunks based on document type
- const simplifiedChunks = chunks.map(chunk => {
+ for (const chunk of chunks) {
// Common properties across all chunk types
- const baseChunk = {
+ const baseChunk: SimplifiedChunk = {
chunkId: chunk.id,
text: chunk.metadata.text,
doc_id: chunk.metadata.doc_id,
@@ -1076,38 +1009,33 @@ export class AgentDocumentManager {
// Add type-specific properties
if (docType === 'video' || docType === 'audio') {
- return {
+ this.simplifiedChunks.set(chunk.id, {
...baseChunk,
start_time: chunk.metadata.start_time,
end_time: chunk.metadata.end_time,
indexes: chunk.metadata.indexes,
chunkType: docType === 'video' ? CHUNK_TYPE.VIDEO : CHUNK_TYPE.AUDIO,
- };
+ } as SimplifiedChunk);
} else if (docType === 'pdf') {
- return {
+ this.simplifiedChunks.set(chunk.id, {
...baseChunk,
startPage: chunk.metadata.start_page,
endPage: chunk.metadata.end_page,
location: chunk.metadata.location,
- };
+ } as SimplifiedChunk);
} else if (docType === 'csv') {
- return {
+ this.simplifiedChunks.set(chunk.id, {
...baseChunk,
rowStart: (chunk.metadata as any).row_start,
rowEnd: (chunk.metadata as any).row_end,
colStart: (chunk.metadata as any).col_start,
colEnd: (chunk.metadata as any).col_end,
- };
+ } as SimplifiedChunk);
} else {
// Default for other document types
- return baseChunk;
+ this.simplifiedChunks.set(chunk.id, baseChunk as SimplifiedChunk);
}
- });
- console.log('simplifiedChunks', simplifiedChunks);
- // Update the document with all simplified chunks at once
- doc.chunk_simpl = JSON.stringify({ chunks: simplifiedChunks });
-
- return doc;
+ }
}
/**
@@ -1116,21 +1044,10 @@ export class AgentDocumentManager {
* @param chunkId The ID of the chunk to retrieve
* @returns The simplified chunk if found, undefined otherwise
*/
- public getSimplifiedChunkById(doc: Doc, chunkId: string): any | undefined {
- let chunks: any[] = [];
- if (!doc || !doc.chunk_simpl) {
- chunks = [];
- console.warn('No chunk found for chunkId', chunkId, '. Checking if document exists in documentsById.');
- return [];
- }
- try {
- const parsed = JSON.parse(StrCast(doc.chunk_simpl));
- chunks = parsed.chunks || [];
- } catch (e) {
- console.error('Error parsing simplified chunks:', e);
- return [];
- }
- return chunks.find(chunk => chunk.chunkId === chunkId);
+ public getSimplifiedChunkById(chunkId: string): any | undefined {
+ console.log('chunkId', chunkId, 'simplifiedChunks', this.simplifiedChunks);
+ console.log('doc', this.getDocument(this.simplifiedChunks.get(chunkId)?.doc_id || ''));
+ return { foundChunk: this.simplifiedChunks.get(chunkId), doc: this.getDocument(this.simplifiedChunks.get(chunkId)?.doc_id || '') };
}
/**
@@ -1150,27 +1067,4 @@ export class AgentDocumentManager {
return [];
}
}
-
- /**
- * Gets all document summaries combined into a single string
- * @returns String containing all document summaries
- */
- public getAllDocumentSummaries(): string {
- const summaries = Array.from(this.documentsById.keys())
- .map(id => {
- const doc = this.getDocument(id);
- if (doc) {
- // Try to get summary from either the document or its data document
- const summary = doc.summary || (doc[DocData] && doc[DocData].summary);
- if (summary) {
- return StrCast(summary);
- }
- }
- return null;
- })
- .filter(Boolean)
- .join('\n\n');
-
- return summaries;
- }
}