diff options
| author | A.J. Shulman <Shulman.aj@gmail.com> | 2025-05-11 13:42:00 -0400 |
|---|---|---|
| committer | A.J. Shulman <Shulman.aj@gmail.com> | 2025-05-11 13:42:00 -0400 |
| commit | a5d7f5c38192b91b7df3bd6ecace5ba7365449a6 (patch) | |
| tree | c6be94f983b5fcc65424b81d42ddb0718127404c /src/client/views/nodes/chatbot | |
| parent | 3c28aa3a706869d818bc8a089e8d1a53f7234bc0 (diff) | |
Made it so chunk Ids are seperately managed and made sure the doc id is sonsistent and not created in python spawn
Diffstat (limited to 'src/client/views/nodes/chatbot')
6 files changed, 112 insertions, 217 deletions
diff --git a/src/client/views/nodes/chatbot/agentsystem/prompts.ts b/src/client/views/nodes/chatbot/agentsystem/prompts.ts index e551ef830..fcb4ab450 100644 --- a/src/client/views/nodes/chatbot/agentsystem/prompts.ts +++ b/src/client/views/nodes/chatbot/agentsystem/prompts.ts @@ -103,9 +103,9 @@ export function getReactPrompt(tools: BaseTool<ReadonlyArray<Parameter>>[], summ <note>If no external tool is required, use 'no_tool', but if there might be relevant external information, use the appropriate tool.</note> </tools> - <summaries> + <available_documents> ${summaries()} - </summaries> + </available_documents> <chat_history> ${chatHistory} diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx index d919b5f7f..34a1ade2e 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -650,16 +650,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { citation: JSON.stringify(citation, null, 2), }); - // Try to find the document - let doc: Doc | undefined; - // First try to find the document using the document manager's chunk ID lookup - const parentDocId = this.docManager.getDocIdByChunkId(chunkId); - if (parentDocId) { - doc = this.docManager.getDocument(parentDocId); - console.log(`Found document by chunk ID lookup: ${parentDocId}`); - } - + const doc: Doc | undefined = this.docManager.getDocByChunkId(chunkId); if (!doc) { console.warn(`Document not found for citation with chunk_id: ${chunkId}`); return; @@ -989,32 +981,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { componentWillUnmount() { this.removeScrollListener(); } - - /** - * Getter that retrieves all linked documents for the current document. - */ - @computed get linkedDocs(): Doc[] { - const docIds = this.docManager.listDocs(); - const docs: Doc[] = []; - - // Get documents from the document manager using the getDocument method - docIds.forEach(id => { - const doc = this.docManager.getDocument(id); - if (doc) { - docs.push(doc); - } - }); - - return docs; - } - /** * Getter that retrieves document IDs of linked documents that have PDF_chunker–parsed content. */ @computed get docIds(): string[] { // Use the document manager to get all document IDs - return Array.from(this.docManager.listDocs()); + return Array.from(this.docManager.listDocs); } /** @@ -1023,7 +996,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { @computed get summaries(): string { // Use the document manager to get all summaries - return this.docManager.getAllDocumentSummaries(); + console.log(this.docManager.listDocs); + return JSON.stringify(this.docManager.listDocs); } /** @@ -1064,7 +1038,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { }; retrieveDocIds = (): string[] => { - return Array.from(this.docManager.listDocs()); + return Array.from(this.docManager.docIds); }; /** diff --git a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts index e6c2421e5..5297292bf 100644 --- a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts +++ b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts @@ -18,13 +18,13 @@ const parameterDefinitions: ReadonlyArray<Parameter> = [ name: 'action', type: 'string', required: true, - description: 'The action to perform: "get" to retrieve metadata, "edit" to modify metadata, "list" to enumerate documents, "getFieldOptions" to retrieve all available field options, or "create" to create a new document', + description: 'The action to perform: "get" to retrieve metadata, "edit" to modify metadata, "getFieldOptions" to retrieve all available field options, or "create" to create a new document', }, { name: 'documentId', type: 'string', required: false, - description: 'The ID of the document to get or edit metadata for. Required for "edit", optional for "get", ignored for "list", "getFieldOptions", and "create"', + description: 'The ID of the document to get or edit metadata for. Required for "edit", optional for "get", ignored for "getFieldOptions", and "create"', }, { name: 'fieldEdits', @@ -68,7 +68,6 @@ This tool provides the following capabilities: - Get metadata from a specific document - Edit metadata fields on documents (in either layout or data documents) - Edit multiple fields at once (useful for updating dependent fields together) -- List all available documents in the current view - Retrieve all available field options with metadata (IMPORTANT: always call this before editing) - Understand which fields are stored where (layout vs data document) - Get detailed information about all available document fields @@ -137,8 +136,8 @@ SPECIAL FIELD HANDLING: - Width/Height: Set layout_autoHeight/layout_autoWidth to false before editing RECOMMENDED WORKFLOW: -1. First call action="list" to identify available documents -2. Then call action="getFieldOptions" to understand available fields +0. Understand the currently available documents that were provided as <available_documents> in the prompt +1. Call action="getFieldOptions" to understand available fields 3. Get document metadata with action="get" to see current values 4. Edit fields with action="edit" using proper dependencies OR @@ -159,10 +158,6 @@ HANDLING DEPENDENT FIELDS: - width → layout_autoWidth (set to false to allow manual width) - Other auto-sizing related properties -To LIST available documents: -- Use action="list" to get a simple list of all documents in the current view -- This is useful when you need to identify documents before getting details or editing them - Editing fields follows these rules: 1. First checks if the field exists on the layout document using Doc.Get 2. If it exists on the layout document, it's updated there @@ -172,7 +167,6 @@ Editing fields follows these rules: Examples: - To get field options: { action: "getFieldOptions" } -- To list all documents: { action: "list" } - To get all document metadata: { action: "get" } - To get metadata for a specific document: { action: "get", documentId: "doc123" } - To edit a single field: { action: "edit", documentId: "doc123", fieldEdits: [{ fieldName: "backgroundColor", fieldValue: "#ff0000" }] } @@ -186,7 +180,8 @@ Examples: { fieldName: "layout_autoHeight", fieldValue: false }, { fieldName: "height", fieldValue: 200 } ]} -- IMPORTANT: MULTI STEP WORKFLOWS ARE NOT ONLY ALLOWED BUT ENCOURAGED. TAKE THINGS 1 STEP AT A TIME.`; +- IMPORTANT: MULTI STEP WORKFLOWS ARE NOT ONLY ALLOWED BUT ENCOURAGED. TAKE THINGS 1 STEP AT A TIME. +- IMPORTANT: WHEN CITING A DOCUMENT, MAKE THE CHUNK ID THE DOCUMENT ID. WHENEVER YOU CITE A DOCUMENT, ALWAYS MAKE THE CITATION TYPE "text", THE "direct_text" FIELD BLANK, AND THE "chunk_id" FIELD THE DOCUMENT ID.`; const documentMetadataToolInfo: ToolInfo<DocumentMetadataToolParamsType> = { name: 'documentMetadata', description: toolDescription, @@ -232,11 +227,11 @@ export class DocumentMetadataTool extends BaseTool<DocumentMetadataToolParamsTyp // Ensure the action is valid and convert to string const action = String(args.action); - if (!['get', 'edit', 'list', 'getFieldOptions', 'create'].includes(action)) { + if (!['get', 'edit', 'getFieldOptions', 'create'].includes(action)) { return [ { type: 'text', - text: 'Error: Invalid action. Valid actions are "get", "edit", "list", "getFieldOptions", or "create".', + text: 'Error: Invalid action. Valid actions are "get", "edit", "getFieldOptions", or "create".', }, ]; } @@ -386,10 +381,6 @@ export class DocumentMetadataTool extends BaseTool<DocumentMetadataToolParamsTyp } } - case 'list': { - this._docManager.listDocs(); - } - case 'getFieldOptions': { // Get all available field options with metadata const fieldOptions = this._docManager.getAllFieldMetadata(); @@ -457,7 +448,7 @@ ${JSON.stringify(createdMetadata, null, 2)}`, return [ { type: 'text', - text: 'Error: Unknown action. Valid actions are "get", "edit", "list", "getFieldOptions", or "create".', + text: 'Error: Unknown action. Valid actions are "get", "edit", "getFieldOptions", or "create".', }, ]; } @@ -537,11 +528,6 @@ ${JSON.stringify(createdMetadata, null, 2)}`, return true; } - // list action doesn't require any additional parameters - if (params.action === 'list') { - return true; - } - return true; } @@ -552,7 +538,7 @@ ${JSON.stringify(createdMetadata, null, 2)}`, */ private getParameterRequirementsByAction(action?: string): string { if (!action) { - return 'Please specify an action: "get", "edit", "list", "getFieldOptions", or "create".'; + return 'Please specify an action: "get", "edit", "getFieldOptions", or "create".'; } switch (action.toLowerCase()) { @@ -560,14 +546,12 @@ ${JSON.stringify(createdMetadata, null, 2)}`, return 'The "get" action accepts an optional documentId parameter.'; case 'edit': return 'The "edit" action requires documentId and fieldEdits parameters. fieldEdits must be a JSON array of field edits.'; - case 'list': - return 'The "list" action does not require any additional parameters.'; case 'getFieldOptions': return 'The "getFieldOptions" action does not require any additional parameters. It returns metadata about all available document fields.'; case 'create': return 'The "create" action requires title, data, and doc_type parameters.'; default: - return `Unknown action "${action}". Valid actions are "get", "edit", "list", "getFieldOptions", or "create".`; + return `Unknown action "${action}". Valid actions are "get", "edit", "getFieldOptions", or "create".`; } } } diff --git a/src/client/views/nodes/chatbot/tools/RAGTool.ts b/src/client/views/nodes/chatbot/tools/RAGTool.ts index ef374ed22..90b803d21 100644 --- a/src/client/views/nodes/chatbot/tools/RAGTool.ts +++ b/src/client/views/nodes/chatbot/tools/RAGTool.ts @@ -3,6 +3,7 @@ import { Observation, RAGChunk } from '../types/types'; import { ParametersType, ToolInfo } from '../types/tool_types'; import { Vectorstore } from '../vectorstore/Vectorstore'; import { BaseTool } from './BaseTool'; +import { DocumentMetadataTool } from './DocumentMetadataTool'; const ragToolParams = [ { @@ -17,7 +18,7 @@ type RAGToolParamsType = typeof ragToolParams; const ragToolInfo: ToolInfo<RAGToolParamsType> = { name: 'rag', - description: 'Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a set of document chunks (text or images) to provide a grounded response based on user documents.', + description: `Performs a RAG (Retrieval-Augmented Generation) search on user documents (only PDF, audio, and video are supported—for information about other document types, use the ${DocumentMetadataTool.name} tool) and returns a set of document chunks (text or images) to provide a grounded response based on user documents.`, citationRules: `When using the RAG tool, the structure must adhere to the format described in the ReAct prompt. Below are additional guidelines specifically for RAG-based responses: 1. **Grounded Text Guidelines**: @@ -75,7 +76,7 @@ export class RAGTool extends BaseTool<RAGToolParamsType> { async getFormattedChunks(relevantChunks: RAGChunk[]): Promise<Observation[]> { try { - const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks }) as { formattedChunks: Observation[]} + const { formattedChunks } = (await Networking.PostToServer('/formatChunks', { relevantChunks })) as { formattedChunks: Observation[] }; if (!formattedChunks) { throw new Error('Failed to format chunks'); diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts index 14cffcb70..c8a6bb16b 100644 --- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts +++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts @@ -1,4 +1,4 @@ -import { action, makeObservable, observable, ObservableMap, reaction, runInAction } from 'mobx'; +import { action, computed, makeObservable, observable, ObservableMap, reaction, runInAction } from 'mobx'; import { observer } from 'mobx-react'; import { v4 as uuidv4 } from 'uuid'; import { Doc, StrListCast } from '../../../../../fields/Doc'; @@ -31,7 +31,7 @@ export class AgentDocumentManager { private chatBox: ChatBox; private chatBoxDocument: Doc | null = null; private fieldMetadata: Record<string, any> = {}; - private readonly DOCUMENT_ID_FIELD = '_dash_document_id'; + @observable private documentIdsFromChunkIds: ObservableMap<string, string>; /** * Creates a new DocumentManager @@ -40,8 +40,17 @@ export class AgentDocumentManager { constructor(chatBox: ChatBox) { makeObservable(this); const agentDoc = DocCast(chatBox.Document.agentDocument) ?? new Doc(); + const chunkIds = DocCast(agentDoc.chunkIds) ?? new Doc(); + agentDoc.title = chatBox.Document.title + '_agentDocument'; + chunkIds.title = '_chunkIds'; chatBox.Document.agentDocument = agentDoc; + DocCast(chatBox.Document.agentDocument)!.chunkIds = chunkIds; + this.documentIdsFromChunkIds = StrListCast(chunkIds.mapping).reduce((mapping, content) => { + const [chunkId, docId] = content.split(':'); + mapping.set(chunkId, docId); + return mapping; + }, new ObservableMap<string, string>()); this.documentsById = StrListCast(agentDoc.mapping).reduce((mapping, content) => { const [id, layoutId, docId] = content.split(':'); const layoutDoc = DocServer.GetCachedRefField(layoutId); @@ -66,6 +75,19 @@ export class AgentDocumentManager { } //{ fireImmediately: true } ); + reaction( + () => this.documentIdsFromChunkIds.values(), + () => { + if (this.chatBoxDocument && DocCast(this.chatBoxDocument.agentDocument)) { + // Store the mapping with chunkId:docId format for consistency + const chunkIdsDoc = DocCast(DocCast(this.chatBoxDocument.agentDocument)!.chunkIds); + if (chunkIdsDoc) { + chunkIdsDoc.mapping = new List<string>(Array.from(this.documentIdsFromChunkIds.entries()).map(([chunkId, docId]) => `${chunkId}:${docId}`)); + } + } + } + //{ fireImmediately: true } + ); this.processDocument(this.chatBoxDocument); this.initializeFieldMetadata(); } @@ -120,7 +142,7 @@ export class AgentDocumentManager { try { // Use the LinkManager approach which is proven to work in ChatBox if (this.chatBoxDocument) { - console.log('Finding documents linked to ChatBox document with ID:', this.chatBoxDocument.id); + console.log('Finding documents linked to ChatBox document with ID:', this.chatBoxDocument[Id]); // Get directly linked documents via LinkManager const linkedDocs = LinkManager.Instance.getAllRelatedLinks(this.chatBoxDocument) @@ -134,57 +156,10 @@ export class AgentDocumentManager { linkedDocs.forEach((doc: Doc | undefined) => { if (doc) { this.processDocument(doc); - console.log('Processed linked document:', doc.id, doc.title, doc.type); + console.log('Processed linked document:', doc[Id], doc.title, doc.type); } }); - - // Include the ChatBox document itself - this.processDocument(this.chatBoxDocument); - - // If we have access to the Document's parent, try to find sibling documents - if (this.chatBoxDocument.parent) { - const parent = this.chatBoxDocument.parent; - console.log('Found parent document, checking for siblings'); - - // Check if parent is a Doc type and has a childDocs function - if (parent && typeof parent === 'object' && 'childDocs' in parent && typeof parent.childDocs === 'function') { - try { - const siblingDocs = parent.childDocs(); - if (Array.isArray(siblingDocs)) { - console.log(`Found ${siblingDocs.length} sibling documents via parent.childDocs()`); - siblingDocs.forEach((doc: Doc) => { - if (doc) { - this.processDocument(doc); - } - }); - } - } catch (e) { - console.warn('Error accessing parent.childDocs:', e); - } - } - } - } else if (this.chatBox && this.chatBox.linkedDocs) { - // If we have direct access to the linkedDocs computed property from ChatBox - console.log('Using ChatBox.linkedDocs directly'); - const linkedDocs = this.chatBox.linkedDocs; - if (Array.isArray(linkedDocs)) { - console.log(`Found ${linkedDocs.length} documents via ChatBox.linkedDocs`); - linkedDocs.forEach((doc: Doc) => { - if (doc) { - this.processDocument(doc); - } - }); - } - - // Process the ChatBox document if available - if (this.chatBox.Document) { - this.processDocument(this.chatBox.Document); - } - } else { - console.warn('No ChatBox document reference available for finding linked documents'); } - - console.log(`DocumentMetadataTool found ${this.documentsById.size} total documents`); } catch (error) { console.error('Error finding documents in Freeform view:', error); } @@ -201,6 +176,7 @@ export class AgentDocumentManager { // Only add if we haven't already processed this document if (!this.documentsById.has(docId)) { this.documentsById.set(docId, { layoutDoc: doc, dataDoc: doc[DocData] }); + console.log('Added document to documentsById:', doc[Id], docId, doc[Id], doc[DocData][Id]); } return docId; } @@ -213,37 +189,12 @@ export class AgentDocumentManager { private ensureDocumentId(doc: Doc): string { let docId: string | undefined; - // First try to get the ID from our custom field - if (doc[this.DOCUMENT_ID_FIELD]) { - docId = String(doc[this.DOCUMENT_ID_FIELD]); - return docId; - } - - // Try different ways to get a document ID - // 1. Try the direct id property if it exists - if (doc.id && typeof doc.id === 'string') { - docId = doc.id; - } - // 2. Try doc._id if it exists - else if (doc._id && typeof doc._id === 'string') { - docId = doc._id; - } - // 3. Try doc.data?.id if it exists - else if (doc.data && typeof doc.data === 'object' && 'id' in doc.data && typeof doc.data.id === 'string') { - docId = doc.data.id; - } - // 4. If none of the above work, generate a UUID - else { - docId = uuidv4(); - console.log(`Generated new UUID for document with title: ${doc.title || 'Untitled'}`); - } - - // Store the ID in the document's metadata so it persists - try { - doc[this.DOCUMENT_ID_FIELD] = docId; - } catch (e) { - console.warn(`Could not assign ID to document property`, e); + if (doc[Id]) { + console.log('Found document ID (normal):', doc[Id]); + docId = doc[Id]; + } else { + throw new Error('No document ID found'); } return docId; @@ -256,13 +207,13 @@ export class AgentDocumentManager { */ public extractDocumentMetadata(id: string) { if (!id) return null; - const doc = this.documentsById.get(id); - if (!doc) return null; - const layoutDoc = doc.layoutDoc; - const dataDoc = doc.dataDoc; + const agentDoc = this.documentsById.get(id); + if (!agentDoc) return null; + const layoutDoc = agentDoc.layoutDoc; + const dataDoc = agentDoc.dataDoc; const metadata: Record<string, any> = { - id: layoutDoc.dash_document_id || layoutDoc.id || '', + id: layoutDoc[Id] || dataDoc[Id] || '', title: layoutDoc.title || '', type: layoutDoc.type || '', fields: { @@ -355,7 +306,7 @@ export class AgentDocumentManager { if (value instanceof Doc) { return { type: 'Doc', - id: value.id || this.ensureDocumentId(value), + id: value[Id] || this.ensureDocumentId(value), title: value.title || '', docType: value.type || '', }; @@ -1011,33 +962,17 @@ export class AgentDocumentManager { * Returns a list of all document IDs in the manager. * @returns An array of document IDs (strings). */ - public listDocs(): string[] { - return Array.from(this.documentsById.keys()); + @computed + public get listDocs(): string[] { + console.log( + Array.from(this.documentsById.entries()).map(([id, agentDoc]) => JSON.stringify({ id, title: agentDoc.layoutDoc.title, type: agentDoc.layoutDoc.type, summary: agentDoc.layoutDoc.summary || 'No summary available for this document.' })) + ); + return Array.from(this.documentsById.entries()).map(([id, agentDoc]) => JSON.stringify({ id, title: agentDoc.layoutDoc.title, type: agentDoc.layoutDoc.type, summary: agentDoc.layoutDoc.summary || 'No summary available for this document.' })); } - /** - * Adds a document with a custom ID to the manager - * @param doc The document to add - * @param customId The custom ID to assign to the document - * @returns The customId that was assigned - */ - @action - public addCustomId(doc: Doc, customId: string): string { - if (!doc) { - console.error('Cannot add null document with custom ID'); - return ''; - } - - // Set the custom ID in the document's metadata - doc[this.DOCUMENT_ID_FIELD] = customId; - - // Store the document in our map - this.documentsById.set(customId, { - layoutDoc: doc, - dataDoc: doc, - }); - - return customId; + @computed + public get docIds(): string[] { + return Array.from(this.documentsById.keys()); } /** @@ -1078,11 +1013,8 @@ export class AgentDocumentManager { // Ensure each chunk ID can be linked back to its parent document // Store a mapping from chunk ID to parent document ID // This allows us to easily find a document by any of its chunk IDs - if (!this.documentsById.has(chunkId)) { - this.documentsById.set(chunkId, { - layoutDoc: doc, - dataDoc: docInfo.dataDoc, - }); + if (!this.documentIdsFromChunkIds.has(chunkId) && doc) { + this.documentIdsFromChunkIds.set(chunkId, doc[Id]); } } } @@ -1092,11 +1024,25 @@ export class AgentDocumentManager { * @param chunkId The chunk ID to look up * @returns The parent document ID if found */ - public getDocIdByChunkId(chunkId: string): string | undefined { - const docInfo = this.documentsById.get(chunkId); + public getDocByChunkId(chunkId: string): Doc | undefined { + // First, look up the document ID using the chunk ID mapping + const docId = this.documentIdsFromChunkIds.get(chunkId); + console.log('this.documentIdsFromChunkIds', this.documentIdsFromChunkIds); + console.log('docId', docId); + if (!docId) { + if (this.documentsById.has(chunkId)) { + return this.documentsById.get(chunkId)?.layoutDoc; + } else { + console.error('No document found for chunkId and docId', chunkId); + return undefined; + } + } + // Then get the document using the document ID + const docInfo = this.documentsById.get(docId); if (docInfo) { - return docInfo.layoutDoc[this.DOCUMENT_ID_FIELD] as string; + return docInfo.layoutDoc; } + console.error('No document found for docId', docId); return undefined; } @@ -1157,7 +1103,7 @@ export class AgentDocumentManager { return baseChunk; } }); - + console.log('simplifiedChunks', simplifiedChunks); // Update the document with all simplified chunks at once doc.chunk_simpl = JSON.stringify({ chunks: simplifiedChunks }); @@ -1165,32 +1111,25 @@ export class AgentDocumentManager { } /** - * Gets the simplified chunks from a document - * @param doc The document to get simplified chunks from - * @returns Array of simplified chunks or empty array if none exist + * Gets a specific simplified chunk by ID + * @param doc The document containing chunks + * @param chunkId The ID of the chunk to retrieve + * @returns The simplified chunk if found, undefined otherwise */ - public getSimplifiedChunks(doc: Doc): any[] { + public getSimplifiedChunkById(doc: Doc, chunkId: string): any | undefined { + let chunks: any[] = []; if (!doc || !doc.chunk_simpl) { + chunks = []; + console.warn('No chunk found for chunkId', chunkId, '. Checking if document exists in documentsById.'); return []; } - try { const parsed = JSON.parse(StrCast(doc.chunk_simpl)); - return parsed.chunks || []; + chunks = parsed.chunks || []; } catch (e) { console.error('Error parsing simplified chunks:', e); return []; } - } - - /** - * Gets a specific simplified chunk by ID - * @param doc The document containing chunks - * @param chunkId The ID of the chunk to retrieve - * @returns The simplified chunk if found, undefined otherwise - */ - public getSimplifiedChunkById(doc: Doc, chunkId: string): any | undefined { - const chunks = this.getSimplifiedChunks(doc); return chunks.find(chunk => chunk.chunkId === chunkId); } diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts index 3df1294e9..1349df483 100644 --- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts +++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts @@ -16,6 +16,7 @@ import { AI_Document, CHUNK_TYPE, RAGChunk } from '../types/types'; import OpenAI from 'openai'; import { Embedding } from 'openai/resources'; import { AgentDocumentManager } from '../utils/AgentDocumentManager'; +import { Id } from '../../../../../fields/FieldSymbols'; dotenv.config(); @@ -24,13 +25,12 @@ dotenv.config(); * and OpenAI text-embedding-3-large for text embedding. It handles AI document management, uploads, and query-based retrieval. */ export class Vectorstore { - private pinecone: Pinecone; // Pinecone client for managing the vector index. + private pinecone!: Pinecone; // Pinecone client for managing the vector index. private index!: Index; // The specific Pinecone index used for document chunks. - private openai: OpenAI; // OpenAI client for generating embeddings. + private openai!: OpenAI; // OpenAI client for generating embeddings. private indexName: string = 'pdf-chatbot'; // Default name for the index. - private _id: string; // Unique ID for the Vectorstore instance. - private docManager: AgentDocumentManager; // Document manager for handling documents - + private _id!: string; // Unique ID for the Vectorstore instance. + private docManager!: AgentDocumentManager; // Document manager for handling documents documents: AI_Document[] = []; // Store the documents indexed in the vectorstore. /** @@ -143,10 +143,8 @@ export class Vectorstore { progressCallback(85, 'Embeddings generated. Finalizing document...'); doc.original_segments = JSON.stringify(typedResponse.full); - const doc_id = uuidv4(); - - // Register the document with the AgentDocumentManager - this.docManager.addCustomId(doc, doc_id); + const doc_id = doc[Id]; + console.log('doc_id in vectorstore', doc_id); // Generate chunk IDs upfront so we can register them const chunkIds = segmentedTranscript.map(() => uuidv4()); @@ -191,7 +189,7 @@ export class Vectorstore { } else { // Process regular document console.log('Processing regular document...'); - const createDocumentResponse = await Networking.PostToServer('/createDocument', { file_path: local_file_path }); + const createDocumentResponse = await Networking.PostToServer('/createDocument', { file_path: local_file_path, doc_id: doc[Id] }); // Type assertion for the response const { jobId } = createDocumentResponse as { jobId: string }; @@ -211,12 +209,13 @@ export class Vectorstore { } } - // Register the document with the AgentDocumentManager - this.docManager.addCustomId(doc, result.doc_id); - // Collect all chunk IDs const chunkIds = result.chunks.map(chunk => chunk.id); + if (result.doc_id !== doc[Id]) { + console.log('doc_id in vectorstore', result.doc_id, 'does not match doc_id in doc', doc[Id]); + } + // Register chunks with the document manager this.docManager.registerChunkIds(result.doc_id, chunkIds); @@ -319,16 +318,14 @@ export class Vectorstore { const queryEmbedding = queryEmbeddingResponse.data[0].embedding; - // Get document IDs from the AgentDocumentManager - const docIds = Array.from(this.docManager.listDocs()); - console.log('Using document IDs for retrieval:', docIds); + console.log('Using document IDs for retrieval:', this.docManager.docIds); // Query the Pinecone index using the embedding and filter by document IDs. // We'll query based on document IDs that are registered in the document manager const queryResponse: QueryResponse = await this.index.query({ vector: queryEmbedding, filter: { - doc_id: { $in: docIds }, + doc_id: { $in: this.docManager.docIds }, }, topK, includeValues: true, @@ -356,7 +353,7 @@ export class Vectorstore { // Ensure the document manager knows about this chunk // This is important for maintaining backwards compatibility - if (chunk.id && !this.docManager.getDocIdByChunkId(chunk.id)) { + if (chunk.id && !this.docManager.getDocByChunkId(chunk.id)) { // If the chunk ID isn't registered but we have a doc_id in metadata if (chunk.metadata.doc_id && this.docManager.has(chunk.metadata.doc_id)) { // Register the chunk with its parent document |
