aboutsummaryrefslogtreecommitdiff
path: root/src/client/views/nodes/chatbot
diff options
context:
space:
mode:
Diffstat (limited to 'src/client/views/nodes/chatbot')
-rw-r--r--src/client/views/nodes/chatbot/agentsystem/prompts.ts4
-rw-r--r--src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx36
-rw-r--r--src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts38
-rw-r--r--src/client/views/nodes/chatbot/tools/RAGTool.ts5
-rw-r--r--src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts213
-rw-r--r--src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts33
6 files changed, 112 insertions, 217 deletions
diff --git a/src/client/views/nodes/chatbot/agentsystem/prompts.ts b/src/client/views/nodes/chatbot/agentsystem/prompts.ts
index e551ef830..fcb4ab450 100644
--- a/src/client/views/nodes/chatbot/agentsystem/prompts.ts
+++ b/src/client/views/nodes/chatbot/agentsystem/prompts.ts
@@ -103,9 +103,9 @@ export function getReactPrompt(tools: BaseTool<ReadonlyArray<Parameter>>[], summ
<note>If no external tool is required, use 'no_tool', but if there might be relevant external information, use the appropriate tool.</note>
</tools>
- <summaries>
+ <available_documents>
${summaries()}
- </summaries>
+ </available_documents>
<chat_history>
${chatHistory}
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index d919b5f7f..34a1ade2e 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -650,16 +650,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
citation: JSON.stringify(citation, null, 2),
});
- // Try to find the document
- let doc: Doc | undefined;
-
// First try to find the document using the document manager's chunk ID lookup
- const parentDocId = this.docManager.getDocIdByChunkId(chunkId);
- if (parentDocId) {
- doc = this.docManager.getDocument(parentDocId);
- console.log(`Found document by chunk ID lookup: ${parentDocId}`);
- }
-
+ const doc: Doc | undefined = this.docManager.getDocByChunkId(chunkId);
if (!doc) {
console.warn(`Document not found for citation with chunk_id: ${chunkId}`);
return;
@@ -989,32 +981,13 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
componentWillUnmount() {
this.removeScrollListener();
}
-
- /**
- * Getter that retrieves all linked documents for the current document.
- */
- @computed get linkedDocs(): Doc[] {
- const docIds = this.docManager.listDocs();
- const docs: Doc[] = [];
-
- // Get documents from the document manager using the getDocument method
- docIds.forEach(id => {
- const doc = this.docManager.getDocument(id);
- if (doc) {
- docs.push(doc);
- }
- });
-
- return docs;
- }
-
/**
* Getter that retrieves document IDs of linked documents that have PDF_chunker–parsed content.
*/
@computed
get docIds(): string[] {
// Use the document manager to get all document IDs
- return Array.from(this.docManager.listDocs());
+ return Array.from(this.docManager.listDocs);
}
/**
@@ -1023,7 +996,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
@computed
get summaries(): string {
// Use the document manager to get all summaries
- return this.docManager.getAllDocumentSummaries();
+ console.log(this.docManager.listDocs);
+ return JSON.stringify(this.docManager.listDocs);
}
/**
@@ -1064,7 +1038,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
};
retrieveDocIds = (): string[] => {
- return Array.from(this.docManager.listDocs());
+ return Array.from(this.docManager.docIds);
};
/**
diff --git a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts
index e6c2421e5..5297292bf 100644
--- a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts
+++ b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts
@@ -18,13 +18,13 @@ const parameterDefinitions: ReadonlyArray<Parameter> = [
name: 'action',
type: 'string',
required: true,
- description: 'The action to perform: "get" to retrieve metadata, "edit" to modify metadata, "list" to enumerate documents, "getFieldOptions" to retrieve all available field options, or "create" to create a new document',
+ description: 'The action to perform: "get" to retrieve metadata, "edit" to modify metadata, "getFieldOptions" to retrieve all available field options, or "create" to create a new document',
},
{
name: 'documentId',
type: 'string',
required: false,
- description: 'The ID of the document to get or edit metadata for. Required for "edit", optional for "get", ignored for "list", "getFieldOptions", and "create"',
+ description: 'The ID of the document to get or edit metadata for. Required for "edit", optional for "get", ignored for "getFieldOptions", and "create"',
},
{
name: 'fieldEdits',
@@ -68,7 +68,6 @@ This tool provides the following capabilities:
- Get metadata from a specific document
- Edit metadata fields on documents (in either layout or data documents)
- Edit multiple fields at once (useful for updating dependent fields together)
-- List all available documents in the current view
- Retrieve all available field options with metadata (IMPORTANT: always call this before editing)
- Understand which fields are stored where (layout vs data document)
- Get detailed information about all available document fields
@@ -137,8 +136,8 @@ SPECIAL FIELD HANDLING:
- Width/Height: Set layout_autoHeight/layout_autoWidth to false before editing
RECOMMENDED WORKFLOW:
-1. First call action="list" to identify available documents
-2. Then call action="getFieldOptions" to understand available fields
+0. Understand the currently available documents that were provided as <available_documents> in the prompt
+1. Call action="getFieldOptions" to understand available fields
3. Get document metadata with action="get" to see current values
4. Edit fields with action="edit" using proper dependencies
OR
@@ -159,10 +158,6 @@ HANDLING DEPENDENT FIELDS:
- width → layout_autoWidth (set to false to allow manual width)
- Other auto-sizing related properties
-To LIST available documents:
-- Use action="list" to get a simple list of all documents in the current view
-- This is useful when you need to identify documents before getting details or editing them
-
Editing fields follows these rules:
1. First checks if the field exists on the layout document using Doc.Get
2. If it exists on the layout document, it's updated there
@@ -172,7 +167,6 @@ Editing fields follows these rules:
Examples:
- To get field options: { action: "getFieldOptions" }
-- To list all documents: { action: "list" }
- To get all document metadata: { action: "get" }
- To get metadata for a specific document: { action: "get", documentId: "doc123" }
- To edit a single field: { action: "edit", documentId: "doc123", fieldEdits: [{ fieldName: "backgroundColor", fieldValue: "#ff0000" }] }
@@ -186,7 +180,8 @@ Examples:
{ fieldName: "layout_autoHeight", fieldValue: false },
{ fieldName: "height", fieldValue: 200 }
]}
-- IMPORTANT: MULTI STEP WORKFLOWS ARE NOT ONLY ALLOWED BUT ENCOURAGED. TAKE THINGS 1 STEP AT A TIME.`;
+- IMPORTANT: MULTI STEP WORKFLOWS ARE NOT ONLY ALLOWED BUT ENCOURAGED. TAKE THINGS 1 STEP AT A TIME.
+- IMPORTANT: WHEN CITING A DOCUMENT, MAKE THE CHUNK ID THE DOCUMENT ID. WHENEVER YOU CITE A DOCUMENT, ALWAYS MAKE THE CITATION TYPE "text", THE "direct_text" FIELD BLANK, AND THE "chunk_id" FIELD THE DOCUMENT ID.`;
const documentMetadataToolInfo: ToolInfo<DocumentMetadataToolParamsType> = {
name: 'documentMetadata',
description: toolDescription,
@@ -232,11 +227,11 @@ export class DocumentMetadataTool extends BaseTool<DocumentMetadataToolParamsTyp
// Ensure the action is valid and convert to string
const action = String(args.action);
- if (!['get', 'edit', 'list', 'getFieldOptions', 'create'].includes(action)) {
+ if (!['get', 'edit', 'getFieldOptions', 'create'].includes(action)) {
return [
{
type: 'text',
- text: 'Error: Invalid action. Valid actions are "get", "edit", "list", "getFieldOptions", or "create".',
+ text: 'Error: Invalid action. Valid actions are "get", "edit", "getFieldOptions", or "create".',
},
];
}
@@ -386,10 +381,6 @@ export class DocumentMetadataTool extends BaseTool<DocumentMetadataToolParamsTyp
}
}
- case 'list': {
- this._docManager.listDocs();
- }
-
case 'getFieldOptions': {
// Get all available field options with metadata
const fieldOptions = this._docManager.getAllFieldMetadata();
@@ -457,7 +448,7 @@ ${JSON.stringify(createdMetadata, null, 2)}`,
return [
{
type: 'text',
- text: 'Error: Unknown action. Valid actions are "get", "edit", "list", "getFieldOptions", or "create".',
+ text: 'Error: Unknown action. Valid actions are "get", "edit", "getFieldOptions", or "create".',
},
];
}
@@ -537,11 +528,6 @@ ${JSON.stringify(createdMetadata, null, 2)}`,
return true;
}
- // list action doesn't require any additional parameters
- if (params.action === 'list') {
- return true;
- }
-
return true;
}
@@ -552,7 +538,7 @@ ${JSON.stringify(createdMetadata, null, 2)}`,
*/
private getParameterRequirementsByAction(action?: string): string {
if (!action) {
- return 'Please specify an action: "get", "edit", "list", "getFieldOptions", or "create".';
+ return 'Please specify an action: "get", "edit", "getFieldOptions", or "create".';
}
switch (action.toLowerCase()) {
@@ -560,14 +546,12 @@ ${JSON.stringify(createdMetadata, null, 2)}`,
return 'The "get" action accepts an optional documentId parameter.';
case 'edit':
return 'The "edit" action requires documentId and fieldEdits parameters. fieldEdits must be a JSON array of field edits.';
- case 'list':
- return 'The "list" action does not require any additional parameters.';
case 'getFieldOptions':
return 'The "getFieldOptions" action does not require any additional parameters. It returns metadata about all available document fields.';
case 'create':
return 'The "create" action requires title, data, and doc_type parameters.';
default:
- return `Unknown action "${action}". Valid actions are "get", "edit", "list", "getFieldOptions", or "create".`;
+ return `Unknown action "${action}". Valid actions are "get", "edit", "getFieldOptions", or "create".`;
}
}
}
diff --git a/src/client/views/nodes/chatbot/tools/RAGTool.ts b/src/client/views/nodes/chatbot/tools/RAGTool.ts
index ef374ed22..90b803d21 100644
--- a/src/client/views/nodes/chatbot/tools/RAGTool.ts
+++ b/src/client/views/nodes/chatbot/tools/RAGTool.ts
@@ -3,6 +3,7 @@ import { Observation, RAGChunk } from '../types/types';
import { ParametersType, ToolInfo } from '../types/tool_types';
import { Vectorstore } from '../vectorstore/Vectorstore';
import { BaseTool } from './BaseTool';
+import { DocumentMetadataTool } from './DocumentMetadataTool';
const ragToolParams = [
{
@@ -17,7 +18,7 @@ type RAGToolParamsType = typeof ragToolParams;
const ragToolInfo: ToolInfo<RAGToolParamsType> = {
name: 'rag',
- description: 'Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a set of document chunks (text or images) to provide a grounded response based on user documents.',
+ description: `Performs a RAG (Retrieval-Augmented Generation) search on user documents (only PDF, audio, and video are supported—for information about other document types, use the ${DocumentMetadataTool.name} tool) and returns a set of document chunks (text or images) to provide a grounded response based on user documents.`,
citationRules: `When using the RAG tool, the structure must adhere to the format described in the ReAct prompt. Below are additional guidelines specifically for RAG-based responses:
1. **Grounded Text Guidelines**:
@@ -75,7 +76,7 @@ export class RAGTool extends BaseTool<RAGToolParamsType> {
async getFormattedChunks(relevantChunks: RAGChunk[]): Promise<Observation[]> {
try {
- const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks }) as { formattedChunks: Observation[]}
+ const { formattedChunks } = (await Networking.PostToServer('/formatChunks', { relevantChunks })) as { formattedChunks: Observation[] };
if (!formattedChunks) {
throw new Error('Failed to format chunks');
diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
index 14cffcb70..c8a6bb16b 100644
--- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -1,4 +1,4 @@
-import { action, makeObservable, observable, ObservableMap, reaction, runInAction } from 'mobx';
+import { action, computed, makeObservable, observable, ObservableMap, reaction, runInAction } from 'mobx';
import { observer } from 'mobx-react';
import { v4 as uuidv4 } from 'uuid';
import { Doc, StrListCast } from '../../../../../fields/Doc';
@@ -31,7 +31,7 @@ export class AgentDocumentManager {
private chatBox: ChatBox;
private chatBoxDocument: Doc | null = null;
private fieldMetadata: Record<string, any> = {};
- private readonly DOCUMENT_ID_FIELD = '_dash_document_id';
+ @observable private documentIdsFromChunkIds: ObservableMap<string, string>;
/**
* Creates a new DocumentManager
@@ -40,8 +40,17 @@ export class AgentDocumentManager {
constructor(chatBox: ChatBox) {
makeObservable(this);
const agentDoc = DocCast(chatBox.Document.agentDocument) ?? new Doc();
+ const chunkIds = DocCast(agentDoc.chunkIds) ?? new Doc();
+
agentDoc.title = chatBox.Document.title + '_agentDocument';
+ chunkIds.title = '_chunkIds';
chatBox.Document.agentDocument = agentDoc;
+ DocCast(chatBox.Document.agentDocument)!.chunkIds = chunkIds;
+ this.documentIdsFromChunkIds = StrListCast(chunkIds.mapping).reduce((mapping, content) => {
+ const [chunkId, docId] = content.split(':');
+ mapping.set(chunkId, docId);
+ return mapping;
+ }, new ObservableMap<string, string>());
this.documentsById = StrListCast(agentDoc.mapping).reduce((mapping, content) => {
const [id, layoutId, docId] = content.split(':');
const layoutDoc = DocServer.GetCachedRefField(layoutId);
@@ -66,6 +75,19 @@ export class AgentDocumentManager {
}
//{ fireImmediately: true }
);
+ reaction(
+ () => this.documentIdsFromChunkIds.values(),
+ () => {
+ if (this.chatBoxDocument && DocCast(this.chatBoxDocument.agentDocument)) {
+ // Store the mapping with chunkId:docId format for consistency
+ const chunkIdsDoc = DocCast(DocCast(this.chatBoxDocument.agentDocument)!.chunkIds);
+ if (chunkIdsDoc) {
+ chunkIdsDoc.mapping = new List<string>(Array.from(this.documentIdsFromChunkIds.entries()).map(([chunkId, docId]) => `${chunkId}:${docId}`));
+ }
+ }
+ }
+ //{ fireImmediately: true }
+ );
this.processDocument(this.chatBoxDocument);
this.initializeFieldMetadata();
}
@@ -120,7 +142,7 @@ export class AgentDocumentManager {
try {
// Use the LinkManager approach which is proven to work in ChatBox
if (this.chatBoxDocument) {
- console.log('Finding documents linked to ChatBox document with ID:', this.chatBoxDocument.id);
+ console.log('Finding documents linked to ChatBox document with ID:', this.chatBoxDocument[Id]);
// Get directly linked documents via LinkManager
const linkedDocs = LinkManager.Instance.getAllRelatedLinks(this.chatBoxDocument)
@@ -134,57 +156,10 @@ export class AgentDocumentManager {
linkedDocs.forEach((doc: Doc | undefined) => {
if (doc) {
this.processDocument(doc);
- console.log('Processed linked document:', doc.id, doc.title, doc.type);
+ console.log('Processed linked document:', doc[Id], doc.title, doc.type);
}
});
-
- // Include the ChatBox document itself
- this.processDocument(this.chatBoxDocument);
-
- // If we have access to the Document's parent, try to find sibling documents
- if (this.chatBoxDocument.parent) {
- const parent = this.chatBoxDocument.parent;
- console.log('Found parent document, checking for siblings');
-
- // Check if parent is a Doc type and has a childDocs function
- if (parent && typeof parent === 'object' && 'childDocs' in parent && typeof parent.childDocs === 'function') {
- try {
- const siblingDocs = parent.childDocs();
- if (Array.isArray(siblingDocs)) {
- console.log(`Found ${siblingDocs.length} sibling documents via parent.childDocs()`);
- siblingDocs.forEach((doc: Doc) => {
- if (doc) {
- this.processDocument(doc);
- }
- });
- }
- } catch (e) {
- console.warn('Error accessing parent.childDocs:', e);
- }
- }
- }
- } else if (this.chatBox && this.chatBox.linkedDocs) {
- // If we have direct access to the linkedDocs computed property from ChatBox
- console.log('Using ChatBox.linkedDocs directly');
- const linkedDocs = this.chatBox.linkedDocs;
- if (Array.isArray(linkedDocs)) {
- console.log(`Found ${linkedDocs.length} documents via ChatBox.linkedDocs`);
- linkedDocs.forEach((doc: Doc) => {
- if (doc) {
- this.processDocument(doc);
- }
- });
- }
-
- // Process the ChatBox document if available
- if (this.chatBox.Document) {
- this.processDocument(this.chatBox.Document);
- }
- } else {
- console.warn('No ChatBox document reference available for finding linked documents');
}
-
- console.log(`DocumentMetadataTool found ${this.documentsById.size} total documents`);
} catch (error) {
console.error('Error finding documents in Freeform view:', error);
}
@@ -201,6 +176,7 @@ export class AgentDocumentManager {
// Only add if we haven't already processed this document
if (!this.documentsById.has(docId)) {
this.documentsById.set(docId, { layoutDoc: doc, dataDoc: doc[DocData] });
+ console.log('Added document to documentsById:', doc[Id], docId, doc[Id], doc[DocData][Id]);
}
return docId;
}
@@ -213,37 +189,12 @@ export class AgentDocumentManager {
private ensureDocumentId(doc: Doc): string {
let docId: string | undefined;
- // First try to get the ID from our custom field
- if (doc[this.DOCUMENT_ID_FIELD]) {
- docId = String(doc[this.DOCUMENT_ID_FIELD]);
- return docId;
- }
-
- // Try different ways to get a document ID
-
// 1. Try the direct id property if it exists
- if (doc.id && typeof doc.id === 'string') {
- docId = doc.id;
- }
- // 2. Try doc._id if it exists
- else if (doc._id && typeof doc._id === 'string') {
- docId = doc._id;
- }
- // 3. Try doc.data?.id if it exists
- else if (doc.data && typeof doc.data === 'object' && 'id' in doc.data && typeof doc.data.id === 'string') {
- docId = doc.data.id;
- }
- // 4. If none of the above work, generate a UUID
- else {
- docId = uuidv4();
- console.log(`Generated new UUID for document with title: ${doc.title || 'Untitled'}`);
- }
-
- // Store the ID in the document's metadata so it persists
- try {
- doc[this.DOCUMENT_ID_FIELD] = docId;
- } catch (e) {
- console.warn(`Could not assign ID to document property`, e);
+ if (doc[Id]) {
+ console.log('Found document ID (normal):', doc[Id]);
+ docId = doc[Id];
+ } else {
+ throw new Error('No document ID found');
}
return docId;
@@ -256,13 +207,13 @@ export class AgentDocumentManager {
*/
public extractDocumentMetadata(id: string) {
if (!id) return null;
- const doc = this.documentsById.get(id);
- if (!doc) return null;
- const layoutDoc = doc.layoutDoc;
- const dataDoc = doc.dataDoc;
+ const agentDoc = this.documentsById.get(id);
+ if (!agentDoc) return null;
+ const layoutDoc = agentDoc.layoutDoc;
+ const dataDoc = agentDoc.dataDoc;
const metadata: Record<string, any> = {
- id: layoutDoc.dash_document_id || layoutDoc.id || '',
+ id: layoutDoc[Id] || dataDoc[Id] || '',
title: layoutDoc.title || '',
type: layoutDoc.type || '',
fields: {
@@ -355,7 +306,7 @@ export class AgentDocumentManager {
if (value instanceof Doc) {
return {
type: 'Doc',
- id: value.id || this.ensureDocumentId(value),
+ id: value[Id] || this.ensureDocumentId(value),
title: value.title || '',
docType: value.type || '',
};
@@ -1011,33 +962,17 @@ export class AgentDocumentManager {
* Returns a list of all document IDs in the manager.
* @returns An array of document IDs (strings).
*/
- public listDocs(): string[] {
- return Array.from(this.documentsById.keys());
+ @computed
+ public get listDocs(): string[] {
+ console.log(
+ Array.from(this.documentsById.entries()).map(([id, agentDoc]) => JSON.stringify({ id, title: agentDoc.layoutDoc.title, type: agentDoc.layoutDoc.type, summary: agentDoc.layoutDoc.summary || 'No summary available for this document.' }))
+ );
+ return Array.from(this.documentsById.entries()).map(([id, agentDoc]) => JSON.stringify({ id, title: agentDoc.layoutDoc.title, type: agentDoc.layoutDoc.type, summary: agentDoc.layoutDoc.summary || 'No summary available for this document.' }));
}
- /**
- * Adds a document with a custom ID to the manager
- * @param doc The document to add
- * @param customId The custom ID to assign to the document
- * @returns The customId that was assigned
- */
- @action
- public addCustomId(doc: Doc, customId: string): string {
- if (!doc) {
- console.error('Cannot add null document with custom ID');
- return '';
- }
-
- // Set the custom ID in the document's metadata
- doc[this.DOCUMENT_ID_FIELD] = customId;
-
- // Store the document in our map
- this.documentsById.set(customId, {
- layoutDoc: doc,
- dataDoc: doc,
- });
-
- return customId;
+ @computed
+ public get docIds(): string[] {
+ return Array.from(this.documentsById.keys());
}
/**
@@ -1078,11 +1013,8 @@ export class AgentDocumentManager {
// Ensure each chunk ID can be linked back to its parent document
// Store a mapping from chunk ID to parent document ID
// This allows us to easily find a document by any of its chunk IDs
- if (!this.documentsById.has(chunkId)) {
- this.documentsById.set(chunkId, {
- layoutDoc: doc,
- dataDoc: docInfo.dataDoc,
- });
+ if (!this.documentIdsFromChunkIds.has(chunkId) && doc) {
+ this.documentIdsFromChunkIds.set(chunkId, doc[Id]);
}
}
}
@@ -1092,11 +1024,25 @@ export class AgentDocumentManager {
* @param chunkId The chunk ID to look up
* @returns The parent document ID if found
*/
- public getDocIdByChunkId(chunkId: string): string | undefined {
- const docInfo = this.documentsById.get(chunkId);
+ public getDocByChunkId(chunkId: string): Doc | undefined {
+ // First, look up the document ID using the chunk ID mapping
+ const docId = this.documentIdsFromChunkIds.get(chunkId);
+ console.log('this.documentIdsFromChunkIds', this.documentIdsFromChunkIds);
+ console.log('docId', docId);
+ if (!docId) {
+ if (this.documentsById.has(chunkId)) {
+ return this.documentsById.get(chunkId)?.layoutDoc;
+ } else {
+ console.error('No document found for chunkId and docId', chunkId);
+ return undefined;
+ }
+ }
+ // Then get the document using the document ID
+ const docInfo = this.documentsById.get(docId);
if (docInfo) {
- return docInfo.layoutDoc[this.DOCUMENT_ID_FIELD] as string;
+ return docInfo.layoutDoc;
}
+ console.error('No document found for docId', docId);
return undefined;
}
@@ -1157,7 +1103,7 @@ export class AgentDocumentManager {
return baseChunk;
}
});
-
+ console.log('simplifiedChunks', simplifiedChunks);
// Update the document with all simplified chunks at once
doc.chunk_simpl = JSON.stringify({ chunks: simplifiedChunks });
@@ -1165,32 +1111,25 @@ export class AgentDocumentManager {
}
/**
- * Gets the simplified chunks from a document
- * @param doc The document to get simplified chunks from
- * @returns Array of simplified chunks or empty array if none exist
+ * Gets a specific simplified chunk by ID
+ * @param doc The document containing chunks
+ * @param chunkId The ID of the chunk to retrieve
+ * @returns The simplified chunk if found, undefined otherwise
*/
- public getSimplifiedChunks(doc: Doc): any[] {
+ public getSimplifiedChunkById(doc: Doc, chunkId: string): any | undefined {
+ let chunks: any[] = [];
if (!doc || !doc.chunk_simpl) {
+ chunks = [];
+ console.warn('No chunk found for chunkId', chunkId, '. Checking if document exists in documentsById.');
return [];
}
-
try {
const parsed = JSON.parse(StrCast(doc.chunk_simpl));
- return parsed.chunks || [];
+ chunks = parsed.chunks || [];
} catch (e) {
console.error('Error parsing simplified chunks:', e);
return [];
}
- }
-
- /**
- * Gets a specific simplified chunk by ID
- * @param doc The document containing chunks
- * @param chunkId The ID of the chunk to retrieve
- * @returns The simplified chunk if found, undefined otherwise
- */
- public getSimplifiedChunkById(doc: Doc, chunkId: string): any | undefined {
- const chunks = this.getSimplifiedChunks(doc);
return chunks.find(chunk => chunk.chunkId === chunkId);
}
diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
index 3df1294e9..1349df483 100644
--- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
@@ -16,6 +16,7 @@ import { AI_Document, CHUNK_TYPE, RAGChunk } from '../types/types';
import OpenAI from 'openai';
import { Embedding } from 'openai/resources';
import { AgentDocumentManager } from '../utils/AgentDocumentManager';
+import { Id } from '../../../../../fields/FieldSymbols';
dotenv.config();
@@ -24,13 +25,12 @@ dotenv.config();
* and OpenAI text-embedding-3-large for text embedding. It handles AI document management, uploads, and query-based retrieval.
*/
export class Vectorstore {
- private pinecone: Pinecone; // Pinecone client for managing the vector index.
+ private pinecone!: Pinecone; // Pinecone client for managing the vector index.
private index!: Index; // The specific Pinecone index used for document chunks.
- private openai: OpenAI; // OpenAI client for generating embeddings.
+ private openai!: OpenAI; // OpenAI client for generating embeddings.
private indexName: string = 'pdf-chatbot'; // Default name for the index.
- private _id: string; // Unique ID for the Vectorstore instance.
- private docManager: AgentDocumentManager; // Document manager for handling documents
-
+ private _id!: string; // Unique ID for the Vectorstore instance.
+ private docManager!: AgentDocumentManager; // Document manager for handling documents
documents: AI_Document[] = []; // Store the documents indexed in the vectorstore.
/**
@@ -143,10 +143,8 @@ export class Vectorstore {
progressCallback(85, 'Embeddings generated. Finalizing document...');
doc.original_segments = JSON.stringify(typedResponse.full);
- const doc_id = uuidv4();
-
- // Register the document with the AgentDocumentManager
- this.docManager.addCustomId(doc, doc_id);
+ const doc_id = doc[Id];
+ console.log('doc_id in vectorstore', doc_id);
// Generate chunk IDs upfront so we can register them
const chunkIds = segmentedTranscript.map(() => uuidv4());
@@ -191,7 +189,7 @@ export class Vectorstore {
} else {
// Process regular document
console.log('Processing regular document...');
- const createDocumentResponse = await Networking.PostToServer('/createDocument', { file_path: local_file_path });
+ const createDocumentResponse = await Networking.PostToServer('/createDocument', { file_path: local_file_path, doc_id: doc[Id] });
// Type assertion for the response
const { jobId } = createDocumentResponse as { jobId: string };
@@ -211,12 +209,13 @@ export class Vectorstore {
}
}
- // Register the document with the AgentDocumentManager
- this.docManager.addCustomId(doc, result.doc_id);
-
// Collect all chunk IDs
const chunkIds = result.chunks.map(chunk => chunk.id);
+ if (result.doc_id !== doc[Id]) {
+ console.log('doc_id in vectorstore', result.doc_id, 'does not match doc_id in doc', doc[Id]);
+ }
+
// Register chunks with the document manager
this.docManager.registerChunkIds(result.doc_id, chunkIds);
@@ -319,16 +318,14 @@ export class Vectorstore {
const queryEmbedding = queryEmbeddingResponse.data[0].embedding;
- // Get document IDs from the AgentDocumentManager
- const docIds = Array.from(this.docManager.listDocs());
- console.log('Using document IDs for retrieval:', docIds);
+ console.log('Using document IDs for retrieval:', this.docManager.docIds);
// Query the Pinecone index using the embedding and filter by document IDs.
// We'll query based on document IDs that are registered in the document manager
const queryResponse: QueryResponse = await this.index.query({
vector: queryEmbedding,
filter: {
- doc_id: { $in: docIds },
+ doc_id: { $in: this.docManager.docIds },
},
topK,
includeValues: true,
@@ -356,7 +353,7 @@ export class Vectorstore {
// Ensure the document manager knows about this chunk
// This is important for maintaining backwards compatibility
- if (chunk.id && !this.docManager.getDocIdByChunkId(chunk.id)) {
+ if (chunk.id && !this.docManager.getDocByChunkId(chunk.id)) {
// If the chunk ID isn't registered but we have a doc_id in metadata
if (chunk.metadata.doc_id && this.docManager.has(chunk.metadata.doc_id)) {
// Register the chunk with its parent document