aboutsummaryrefslogtreecommitdiff
path: root/src/client
diff options
context:
space:
mode:
Diffstat (limited to 'src/client')
-rw-r--r--src/client/views/nodes/chatbot/agentsystem/Agent.ts9
-rw-r--r--src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx50
-rw-r--r--src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts2
-rw-r--r--src/client/views/nodes/chatbot/tools/SearchTool.ts26
-rw-r--r--src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts30
-rw-r--r--src/client/views/nodes/chatbot/tools/WikipediaTool.ts2
-rw-r--r--src/client/views/nodes/chatbot/types/types.ts1
-rw-r--r--src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts192
-rw-r--r--src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts21
9 files changed, 96 insertions, 237 deletions
diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
index 24471bf5b..86d40864e 100644
--- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts
+++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
@@ -63,10 +63,8 @@ export class Agent {
*/
constructor(
_vectorstore: Vectorstore,
- summaries: () => string,
history: () => string,
csvData: () => { filename: string; id: string; text: string }[],
- getLinkedUrlDocId: (url: string) => string[],
createImage: (result: Upload.FileInformation & Upload.InspectionResults, options: DocumentOptions) => void,
createCSVInDash: (url: string, title: string, id: string, data: string) => void,
docManager: AgentDocumentManager
@@ -83,7 +81,7 @@ export class Agent {
calculate: new CalculateTool(),
rag: new RAGTool(this.vectorstore),
dataAnalysis: new DataAnalysisTool(csvData),
- websiteInfoScraper: new WebsiteInfoScraperTool(getLinkedUrlDocId),
+ websiteInfoScraper: new WebsiteInfoScraperTool(this._docManager),
searchTool: new SearchTool(this._docManager),
noTool: new NoTool(),
//imageCreationTool: new ImageCreationTool(createImage),
@@ -125,11 +123,8 @@ export class Agent {
// Retrieve chat history and generate system prompt
const chatHistory = this._history();
// Get document summaries directly from document manager
- const documentSummaries = this._docManager.getAllDocumentSummaries();
- // Create a function that returns document summaries for the prompt
- const getSummaries = () => documentSummaries;
// Generate the system prompt with the summaries
- const systemPrompt = getReactPrompt(Object.values(this.tools), getSummaries, chatHistory);
+ const systemPrompt = getReactPrompt(Object.values(this.tools), () => JSON.stringify(this._docManager.listDocs), chatHistory);
// Initialize intermediate messages
this.interMessages = [{ role: 'system', content: systemPrompt }];
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index 6349e554e..867e78860 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -121,16 +121,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
this.vectorstore = new Vectorstore(this.vectorstore_id, this.docManager);
// Create an agent with the vectorstore
- this.agent = new Agent(
- this.vectorstore,
- this.retrieveSummaries.bind(this),
- this.retrieveFormattedHistory.bind(this),
- this.retrieveCSVData.bind(this),
- this.retrieveDocIds.bind(this),
- this.createImageInDash.bind(this),
- this.createCSVInDash.bind(this),
- this.docManager
- );
+ this.agent = new Agent(this.vectorstore, this.retrieveFormattedHistory.bind(this), this.retrieveCSVData.bind(this), this.createImageInDash.bind(this), this.createCSVInDash.bind(this), this.docManager);
// Add event listeners
this.addScrollListener();
@@ -228,6 +219,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
}
};
+ //TODO: Update for new chunk_simpl on agentDocument
/**
* Adds a CSV file for analysis by sending it to OpenAI and generating a summary.
* @param newLinkedDoc The linked document representing the CSV file.
@@ -650,18 +642,15 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
citation: JSON.stringify(citation, null, 2),
});
- // First try to find the document using the document manager's chunk ID lookup
- const doc: Doc | undefined = this.docManager.getDocByChunkId(chunkId);
- if (!doc) {
- console.warn(`Document not found for citation with chunk_id: ${chunkId}`);
- return;
- }
-
// Get the simplified chunk using the document manager
- const foundChunk = this.docManager.getSimplifiedChunkById(doc, chunkId);
+ const { foundChunk, doc } = this.docManager.getSimplifiedChunkById(chunkId);
if (!foundChunk) {
- console.warn(`Chunk not found in document for chunk ID: ${chunkId}`);
- DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
+ if (doc) {
+ console.warn(`Chunk not found in document, ${doc.id}, for chunk ID: ${chunkId}`);
+ DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
+ } else {
+ console.warn(`Chunk not found for chunk ID: ${chunkId}`);
+ }
return;
}
@@ -678,6 +667,10 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
} else if (foundChunk.chunkType === CHUNK_TYPE.TABLE || foundChunk.chunkType === CHUNK_TYPE.IMAGE) {
this.handleOtherChunkTypes(foundChunk, citation, doc);
} else {
+ if (doc.type === 'web') {
+ DocumentManager.Instance.showDocument(doc, { openLocation: OpenWhere.addRight }, () => {});
+ return;
+ }
// Show the chunk text in citation popup
let chunkText = citation.direct_text || 'Text content not available';
this.showCitationPopup(chunkText);
@@ -987,16 +980,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
}
/**
- * Getter that retrieves summaries of all linked documents.
- */
- @computed
- get summaries(): string {
- // Use the document manager to get all summaries
- console.log(this.docManager.listDocs);
- return JSON.stringify(this.docManager.listDocs);
- }
-
- /**
* Getter that retrieves all linked CSV files for analysis.
*/
@computed get linkedCSVs(): { filename: string; id: string; text: string }[] {
@@ -1022,7 +1005,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
// Other helper methods for retrieving document data and processing
retrieveSummaries = (): string => {
- return this.docManager.getAllDocumentSummaries();
+ console.log(this.docManager.listDocs);
+ return JSON.stringify(this.docManager.listDocs);
};
retrieveCSVData = () => {
@@ -1033,10 +1017,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
return this.formattedHistory;
};
- retrieveDocIds = (): string[] => {
- return Array.from(this.docManager.docIds);
- };
-
/**
* Handles follow-up questions when the user clicks on them.
* Automatically sets the input value to the clicked follow-up question.
diff --git a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts
index 5297292bf..405949c1e 100644
--- a/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts
+++ b/src/client/views/nodes/chatbot/tools/DocumentMetadataTool.ts
@@ -408,7 +408,7 @@ export class DocumentMetadataTool extends BaseTool<DocumentMetadataToolParamsTyp
const title = String(args.title);
const data = String(args.data);
- const id = this._docManager.createDocInDash(docType, data, { title: title });
+ const id = await this._docManager.createDocInDash(docType, data, { title: title });
if (!id) {
return [
diff --git a/src/client/views/nodes/chatbot/tools/SearchTool.ts b/src/client/views/nodes/chatbot/tools/SearchTool.ts
index 53f5fc109..43f14ea83 100644
--- a/src/client/views/nodes/chatbot/tools/SearchTool.ts
+++ b/src/client/views/nodes/chatbot/tools/SearchTool.ts
@@ -48,19 +48,21 @@ export class SearchTool extends BaseTool<SearchToolParamsType> {
query,
max_results: this._max_results,
})) as { results: { url: string; snippet: string }[] };
- const data = results.map((result: { url: string; snippet: string }) => {
- // Create a web document with the URL
- const id = this._docManager.createDocInDash('web', result.url, {
- title: `Search Result: ${result.url}`,
- text_html: result.snippet,
- data_useCors: true,
- });
+ const data = await Promise.all(
+ results.map(async (result: { url: string; snippet: string }) => {
+ // Create a web document with the URL
+ const id = await this._docManager.createDocInDash('web', result.url, {
+ title: `Search Result: ${result.url}`,
+ text_html: result.snippet,
+ data_useCors: true,
+ });
- return {
- type: 'text' as const,
- text: `<chunk chunk_id="${id}" chunk_type="url"><url>${result.url}</url><overview>${result.snippet}</overview></chunk>`,
- };
- });
+ return {
+ type: 'text' as const,
+ text: `<chunk chunk_id="${id}" chunk_type="url"><url>${result.url}</url><overview>${result.snippet}</overview></chunk>`,
+ };
+ })
+ );
return data;
} catch (error) {
console.log(error);
diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
index 3c7b4e3db..495a985cb 100644
--- a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
+++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts
@@ -3,12 +3,14 @@ import { Networking } from '../../../../Network';
import { BaseTool } from './BaseTool';
import { Observation } from '../types/types';
import { ParametersType, ToolInfo } from '../types/tool_types';
-
+import { AgentDocumentManager } from '../utils/AgentDocumentManager';
+import { Doc } from '../../../../../fields/Doc';
+import { StrCast, WebCast } from '../../../../../fields/Types';
const websiteInfoScraperToolParams = [
{
- name: 'urls',
+ name: 'chunk_ids',
type: 'string[]',
- description: 'The URLs of the websites to scrape',
+ description: 'The chunk_ids of the urls to scrape from the SearchTool.',
required: true,
max_inputs: 3,
},
@@ -66,11 +68,11 @@ const websiteInfoScraperToolInfo: ToolInfo<WebsiteInfoScraperToolParamsType> = {
};
export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParamsType> {
- private _getLinkedUrlDocId: (url: string) => string[];
+ private _docManager: AgentDocumentManager;
- constructor(getLinkedUrlDocIds: (url: string) => string[]) {
+ constructor(docManager: AgentDocumentManager) {
super(websiteInfoScraperToolInfo);
- this._getLinkedUrlDocId = getLinkedUrlDocIds;
+ this._docManager = docManager;
}
/**
@@ -79,10 +81,13 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
* @param maxRetries Maximum number of retry attempts
* @returns The scraped content or error message
*/
- private async scrapeWithRetry(url: string, maxRetries = 2): Promise<Observation> {
+ private async scrapeWithRetry(chunkDoc: Doc, maxRetries = 2): Promise<Observation> {
let lastError = '';
let retryCount = 0;
-
+ const url = WebCast(chunkDoc.data!)!.url.href;
+ console.log(url);
+ console.log(chunkDoc);
+ console.log(chunkDoc.data);
// Validate URL format
try {
new URL(url); // This will throw if URL is invalid
@@ -110,7 +115,6 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
}
const { website_plain_text } = response as { website_plain_text: string };
- const id = this._getLinkedUrlDocId(url);
// Validate content quality
if (!website_plain_text) {
@@ -126,7 +130,7 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
if (retryCount === maxRetries) {
return {
type: 'text',
- text: `<chunk chunk_id="${id}" chunk_type="url">\n${website_plain_text}\nNote: Limited content was retrieved from this URL.\n</chunk>`,
+ text: `<chunk chunk_id="${chunkDoc.id}" chunk_type="url">\n${website_plain_text}\nNote: Limited content was retrieved from this URL.\n</chunk>`,
} as Observation;
}
@@ -138,7 +142,7 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
// Process and return content if it looks good
return {
type: 'text',
- text: `<chunk chunk_id="${id}" chunk_type="url">\n${website_plain_text}\n</chunk>`,
+ text: `<chunk chunk_id="${chunkDoc.id}" chunk_type="url">\n${website_plain_text}\n</chunk>`,
} as Observation;
} catch (error) {
lastError = error instanceof Error ? error.message : 'Unknown error';
@@ -156,10 +160,10 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam
}
async execute(args: ParametersType<WebsiteInfoScraperToolParamsType>): Promise<Observation[]> {
- const urls = args.urls;
+ const chunk_ids = args.chunk_ids;
// Create an array of promises, each one handling a website scrape for a URL
- const scrapingPromises = urls.map(url => this.scrapeWithRetry(url));
+ const scrapingPromises = chunk_ids.map(chunk_id => this.scrapeWithRetry(this._docManager.getDocument(chunk_id)!));
// Wait for all scraping promises to resolve
const results = await Promise.all(scrapingPromises);
diff --git a/src/client/views/nodes/chatbot/tools/WikipediaTool.ts b/src/client/views/nodes/chatbot/tools/WikipediaTool.ts
index ee815532a..ec5d83e52 100644
--- a/src/client/views/nodes/chatbot/tools/WikipediaTool.ts
+++ b/src/client/views/nodes/chatbot/tools/WikipediaTool.ts
@@ -32,7 +32,7 @@ export class WikipediaTool extends BaseTool<WikipediaToolParamsType> {
async execute(args: ParametersType<WikipediaToolParamsType>): Promise<Observation[]> {
try {
- const { text } = await Networking.PostToServer('/getWikipediaSummary', { title: args.title });
+ const { text } = (await Networking.PostToServer('/getWikipediaSummary', { title: args.title })) as { text: string };
const id = uuidv4();
const url = `https://en.wikipedia.org/wiki/${args.title.replace(/ /g, '_')}`;
this._addLinkedUrlDoc(url, id);
diff --git a/src/client/views/nodes/chatbot/types/types.ts b/src/client/views/nodes/chatbot/types/types.ts
index 90b5e7e11..0d1804b2d 100644
--- a/src/client/views/nodes/chatbot/types/types.ts
+++ b/src/client/views/nodes/chatbot/types/types.ts
@@ -101,6 +101,7 @@ export interface RAGChunk {
export interface SimplifiedChunk {
chunkId: string;
+ doc_id: string;
startPage?: number;
endPage?: number;
location?: string;
diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
index c8a6bb16b..5a09b945b 100644
--- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -13,7 +13,7 @@ import { LinkManager, UPDATE_SERVER_CACHE } from '../../../../util/LinkManager';
import { DocumentView } from '../../DocumentView';
import { ChatBox, parsedDoc } from '../chatboxcomponents/ChatBox';
import { supportedDocTypes } from '../types/tool_types';
-import { CHUNK_TYPE, RAGChunk } from '../types/types';
+import { CHUNK_TYPE, RAGChunk, SimplifiedChunk } from '../types/types';
/**
* Interface representing a document in the freeform view
@@ -31,7 +31,7 @@ export class AgentDocumentManager {
private chatBox: ChatBox;
private chatBoxDocument: Doc | null = null;
private fieldMetadata: Record<string, any> = {};
- @observable private documentIdsFromChunkIds: ObservableMap<string, string>;
+ @observable private simplifiedChunks: ObservableMap<string, SimplifiedChunk>;
/**
* Creates a new DocumentManager
@@ -40,17 +40,21 @@ export class AgentDocumentManager {
constructor(chatBox: ChatBox) {
makeObservable(this);
const agentDoc = DocCast(chatBox.Document.agentDocument) ?? new Doc();
- const chunkIds = DocCast(agentDoc.chunkIds) ?? new Doc();
+ const chunk_simpl = DocCast(agentDoc.chunk_simpl) ?? new Doc();
agentDoc.title = chatBox.Document.title + '_agentDocument';
- chunkIds.title = '_chunkIds';
+ chunk_simpl.title = '_chunk_simpl';
chatBox.Document.agentDocument = agentDoc;
- DocCast(chatBox.Document.agentDocument)!.chunkIds = chunkIds;
- this.documentIdsFromChunkIds = StrListCast(chunkIds.mapping).reduce((mapping, content) => {
- const [chunkId, docId] = content.split(':');
- mapping.set(chunkId, docId);
+ DocCast(chatBox.Document.agentDocument)!.chunk_simpl = chunk_simpl;
+
+ this.simplifiedChunks = StrListCast(chunk_simpl.mapping).reduce((mapping, chunks) => {
+ StrListCast(chunks).forEach(chunk => {
+ const parsed = JSON.parse(StrCast(chunk));
+ mapping.set(parsed.chunkId, parsed);
+ });
return mapping;
- }, new ObservableMap<string, string>());
+ }, new ObservableMap<string, SimplifiedChunk>());
+
this.documentsById = StrListCast(agentDoc.mapping).reduce((mapping, content) => {
const [id, layoutId, docId] = content.split(':');
const layoutDoc = DocServer.GetCachedRefField(layoutId);
@@ -76,14 +80,10 @@ export class AgentDocumentManager {
//{ fireImmediately: true }
);
reaction(
- () => this.documentIdsFromChunkIds.values(),
+ () => this.simplifiedChunks.values(),
() => {
if (this.chatBoxDocument && DocCast(this.chatBoxDocument.agentDocument)) {
- // Store the mapping with chunkId:docId format for consistency
- const chunkIdsDoc = DocCast(DocCast(this.chatBoxDocument.agentDocument)!.chunkIds);
- if (chunkIdsDoc) {
- chunkIdsDoc.mapping = new List<string>(Array.from(this.documentIdsFromChunkIds.entries()).map(([chunkId, docId]) => `${chunkId}:${docId}`));
- }
+ DocCast(DocCast(this.chatBoxDocument.agentDocument)!.chunk_simpl)!.mapping = new List<string>(Array.from(this.simplifiedChunks.values()).map(chunk => JSON.stringify(chunk)));
}
}
//{ fireImmediately: true }
@@ -831,7 +831,8 @@ export class AgentDocumentManager {
* @param options Optional configuration options
* @returns The ID of the created document
*/
- public createDocInDash(docType: string, data: string, options?: any): string {
+
+ public async createDocInDash(docType: string, data: string, options?: any): Promise<string> {
// Validate doc_type
if (!this.isValidDocType(docType)) {
throw new Error(`Invalid document type: ${docType}`);
@@ -877,14 +878,15 @@ export class AgentDocumentManager {
// Create link and add it to the document system
const linkDoc = Docs.Create.LinkDocument(this.chatBoxDocument, doc);
LinkManager.Instance.addLink(linkDoc);
-
- // Add document to view
- this.chatBox._props.addDocument?.(doc);
-
- // Show document - defer actual display to prevent immediate resource loading
- setTimeout(() => {
- DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
- }, 100);
+ if (doc.type !== 'web') {
+ // Add document to view
+ this.chatBox._props.addDocument?.(doc);
+
+ // Show document - defer actual display to prevent immediate resource loading
+ setTimeout(() => {
+ DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
+ }, 100);
+ }
}
});
@@ -986,88 +988,19 @@ export class AgentDocumentManager {
}
/**
- * Registers chunk IDs associated with a document in the manager
- * @param docId The parent document ID
- * @param chunkIds Array of chunk IDs associated with this document
- */
- @action
- public registerChunkIds(docId: string, chunkIds: string[]): void {
- // Get the document if it exists
- const docInfo = this.documentsById.get(docId);
- if (!docInfo) {
- console.warn(`Cannot register chunks for unknown document ID: ${docId}`);
- return;
- }
-
- // Store chunk IDs on the document for future reference
- const doc = docInfo.layoutDoc;
- if (!doc.chunk_ids) {
- doc.chunk_ids = JSON.stringify(chunkIds);
- } else {
- // Merge with existing chunk IDs if they exist
- const existingIds = JSON.parse(doc.chunk_ids as string);
- const updatedIds = [...new Set([...existingIds, ...chunkIds])]; // Remove duplicates
- doc.chunk_ids = JSON.stringify(updatedIds);
- }
- for (const chunkId of chunkIds) {
- // Ensure each chunk ID can be linked back to its parent document
- // Store a mapping from chunk ID to parent document ID
- // This allows us to easily find a document by any of its chunk IDs
- if (!this.documentIdsFromChunkIds.has(chunkId) && doc) {
- this.documentIdsFromChunkIds.set(chunkId, doc[Id]);
- }
- }
- }
-
- /**
- * Gets a document ID by a chunk ID
- * @param chunkId The chunk ID to look up
- * @returns The parent document ID if found
- */
- public getDocByChunkId(chunkId: string): Doc | undefined {
- // First, look up the document ID using the chunk ID mapping
- const docId = this.documentIdsFromChunkIds.get(chunkId);
- console.log('this.documentIdsFromChunkIds', this.documentIdsFromChunkIds);
- console.log('docId', docId);
- if (!docId) {
- if (this.documentsById.has(chunkId)) {
- return this.documentsById.get(chunkId)?.layoutDoc;
- } else {
- console.error('No document found for chunkId and docId', chunkId);
- return undefined;
- }
- }
- // Then get the document using the document ID
- const docInfo = this.documentsById.get(docId);
- if (docInfo) {
- return docInfo.layoutDoc;
- }
- console.error('No document found for docId', docId);
- return undefined;
- }
-
- /**
* Adds simplified chunks to a document for citation handling
* @param doc The document to add simplified chunks to
* @param chunks Array of full RAG chunks to simplify
* @param docType The type of document (e.g., 'pdf', 'video', 'audio', etc.)
* @returns The updated document with simplified chunks
*/
- public addSimplifiedChunks(doc: Doc, chunks: RAGChunk[], docType: string): Doc {
- if (!doc) {
- console.error('Cannot add simplified chunks to null document');
- return doc;
- }
-
- // Initialize empty chunks array if not exists
- if (!doc.chunk_simpl) {
- doc.chunk_simpl = JSON.stringify({ chunks: [] });
- }
-
+ @action
+ public addSimplifiedChunks(chunks: RAGChunk[], docType: string) {
+ console.log('chunks', chunks, 'simplifiedChunks', this.simplifiedChunks);
// Create array of simplified chunks based on document type
- const simplifiedChunks = chunks.map(chunk => {
+ for (const chunk of chunks) {
// Common properties across all chunk types
- const baseChunk = {
+ const baseChunk: SimplifiedChunk = {
chunkId: chunk.id,
text: chunk.metadata.text,
doc_id: chunk.metadata.doc_id,
@@ -1076,38 +1009,33 @@ export class AgentDocumentManager {
// Add type-specific properties
if (docType === 'video' || docType === 'audio') {
- return {
+ this.simplifiedChunks.set(chunk.id, {
...baseChunk,
start_time: chunk.metadata.start_time,
end_time: chunk.metadata.end_time,
indexes: chunk.metadata.indexes,
chunkType: docType === 'video' ? CHUNK_TYPE.VIDEO : CHUNK_TYPE.AUDIO,
- };
+ } as SimplifiedChunk);
} else if (docType === 'pdf') {
- return {
+ this.simplifiedChunks.set(chunk.id, {
...baseChunk,
startPage: chunk.metadata.start_page,
endPage: chunk.metadata.end_page,
location: chunk.metadata.location,
- };
+ } as SimplifiedChunk);
} else if (docType === 'csv') {
- return {
+ this.simplifiedChunks.set(chunk.id, {
...baseChunk,
rowStart: (chunk.metadata as any).row_start,
rowEnd: (chunk.metadata as any).row_end,
colStart: (chunk.metadata as any).col_start,
colEnd: (chunk.metadata as any).col_end,
- };
+ } as SimplifiedChunk);
} else {
// Default for other document types
- return baseChunk;
+ this.simplifiedChunks.set(chunk.id, baseChunk as SimplifiedChunk);
}
- });
- console.log('simplifiedChunks', simplifiedChunks);
- // Update the document with all simplified chunks at once
- doc.chunk_simpl = JSON.stringify({ chunks: simplifiedChunks });
-
- return doc;
+ }
}
/**
@@ -1116,21 +1044,10 @@ export class AgentDocumentManager {
* @param chunkId The ID of the chunk to retrieve
* @returns The simplified chunk if found, undefined otherwise
*/
- public getSimplifiedChunkById(doc: Doc, chunkId: string): any | undefined {
- let chunks: any[] = [];
- if (!doc || !doc.chunk_simpl) {
- chunks = [];
- console.warn('No chunk found for chunkId', chunkId, '. Checking if document exists in documentsById.');
- return [];
- }
- try {
- const parsed = JSON.parse(StrCast(doc.chunk_simpl));
- chunks = parsed.chunks || [];
- } catch (e) {
- console.error('Error parsing simplified chunks:', e);
- return [];
- }
- return chunks.find(chunk => chunk.chunkId === chunkId);
+ public getSimplifiedChunkById(chunkId: string): any | undefined {
+ console.log('chunkId', chunkId, 'simplifiedChunks', this.simplifiedChunks);
+ console.log('doc', this.getDocument(this.simplifiedChunks.get(chunkId)?.doc_id || ''));
+ return { foundChunk: this.simplifiedChunks.get(chunkId), doc: this.getDocument(this.simplifiedChunks.get(chunkId)?.doc_id || '') };
}
/**
@@ -1150,27 +1067,4 @@ export class AgentDocumentManager {
return [];
}
}
-
- /**
- * Gets all document summaries combined into a single string
- * @returns String containing all document summaries
- */
- public getAllDocumentSummaries(): string {
- const summaries = Array.from(this.documentsById.keys())
- .map(id => {
- const doc = this.getDocument(id);
- if (doc) {
- // Try to get summary from either the document or its data document
- const summary = doc.summary || (doc[DocData] && doc[DocData].summary);
- if (summary) {
- return StrCast(summary);
- }
- }
- return null;
- })
- .filter(Boolean)
- .join('\n\n');
-
- return summaries;
- }
}
diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
index 1349df483..f1fae6f11 100644
--- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
@@ -148,10 +148,6 @@ export class Vectorstore {
// Generate chunk IDs upfront so we can register them
const chunkIds = segmentedTranscript.map(() => uuidv4());
-
- // Register all chunk IDs with the document manager
- this.docManager.registerChunkIds(doc_id, chunkIds);
-
// Add transcript and embeddings to metadata
result = {
doc_id,
@@ -185,7 +181,7 @@ export class Vectorstore {
doc.segmented_transcript = JSON.stringify(segmentedTranscript);
// Use doc manager to add simplified chunks
const docType = local_file_path.endsWith('.mp3') ? 'audio' : 'video';
- this.docManager.addSimplifiedChunks(doc, result.chunks, docType);
+ this.docManager.addSimplifiedChunks(result.chunks, docType);
} else {
// Process regular document
console.log('Processing regular document...');
@@ -216,13 +212,10 @@ export class Vectorstore {
console.log('doc_id in vectorstore', result.doc_id, 'does not match doc_id in doc', doc[Id]);
}
- // Register chunks with the document manager
- this.docManager.registerChunkIds(result.doc_id, chunkIds);
-
// Use doc manager to add simplified chunks - determine document type from file extension
const fileExt = path.extname(local_file_path).toLowerCase();
const docType = fileExt === '.pdf' ? 'pdf' : fileExt === '.csv' ? 'csv' : 'text';
- this.docManager.addSimplifiedChunks(doc, result.chunks, docType);
+ this.docManager.addSimplifiedChunks(result.chunks, docType);
doc.summary = result.summary;
doc.ai_purpose = result.purpose;
@@ -351,16 +344,6 @@ export class Vectorstore {
},
} as RAGChunk;
- // Ensure the document manager knows about this chunk
- // This is important for maintaining backwards compatibility
- if (chunk.id && !this.docManager.getDocByChunkId(chunk.id)) {
- // If the chunk ID isn't registered but we have a doc_id in metadata
- if (chunk.metadata.doc_id && this.docManager.has(chunk.metadata.doc_id)) {
- // Register the chunk with its parent document
- this.docManager.registerChunkIds(chunk.metadata.doc_id, [chunk.id]);
- }
- }
-
return chunk;
});