diff options
author | A.J. Shulman <Shulman.aj@gmail.com> | 2025-04-24 13:21:00 -0400 |
---|---|---|
committer | A.J. Shulman <Shulman.aj@gmail.com> | 2025-04-24 13:21:00 -0400 |
commit | 5ce2263849bfb901e276a4c5fc8ca2dbd8b80350 (patch) | |
tree | 34d4ac121cb13f141fcdb63325446d9c125813c4 /src | |
parent | 321977e670cbdf10f6c49fc9071e3260a8bd4aae (diff) |
attempt at linking docs but listing metadata doesn't work
Diffstat (limited to 'src')
5 files changed, 46 insertions, 14 deletions
diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts index 5af021dbf..c021d141e 100644 --- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts +++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts @@ -66,10 +66,12 @@ export class Agent { history: () => string, csvData: () => { filename: string; id: string; text: string }[], addLinkedUrlDoc: (url: string, id: string) => void, + getLinkedUrlDocId: (url: string) => string[], createImage: (result: Upload.FileInformation & Upload.InspectionResults, options: DocumentOptions) => void, // eslint-disable-next-line @typescript-eslint/no-unused-vars createCSVInDash: (url: string, title: string, id: string, data: string) => void, - chatBox: ChatBox + chatBox: ChatBox, + docManager: AgentDocumentManager ) { // Initialize OpenAI client with API key from environment this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); @@ -77,14 +79,14 @@ export class Agent { this._history = history; this._summaries = summaries; this._csvData = csvData; - this._docManager = new AgentDocumentManager(chatBox); + this._docManager = docManager; // Define available tools for the assistant this.tools = { calculate: new CalculateTool(), rag: new RAGTool(this.vectorstore), dataAnalysis: new DataAnalysisTool(csvData), - websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc), + websiteInfoScraper: new WebsiteInfoScraperTool(getLinkedUrlDocId), searchTool: new SearchTool(addLinkedUrlDoc), noTool: new NoTool(), //imageCreationTool: new ImageCreationTool(createImage), diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx index e09b4313f..43765c1ce 100644 --- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -44,6 +44,7 @@ import { ProgressBar } from './ProgressBar'; import { OpenWhere } from '../../OpenWhere'; import { Upload } from '../../../../../server/SharedMediaTypes'; import { DocumentMetadataTool } from '../tools/DocumentMetadataTool'; +import { AgentDocumentManager } from '../utils/AgentDocumentManager'; dotenv.config(); @@ -76,6 +77,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { private agent: Agent; private messagesRef: React.RefObject<HTMLDivElement>; private _textInputRef: HTMLInputElement | undefined | null; + private docManager: AgentDocumentManager; /** * Static method that returns the layout string for the field. @@ -107,7 +109,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id); } this.vectorstore = new Vectorstore(this.vectorstore_id, this.retrieveDocIds); - this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.createImageInDash, this.createCSVInDash, this); + this.docManager = new AgentDocumentManager(this); + this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.getLinkedUrlDocIds, this.createImageInDash, this.createCSVInDash, this, this.docManager); // Reinitialize the DocumentMetadataTool with a direct reference to this ChatBox instance // This ensures the tool can properly access documents in the same Freeform view @@ -380,7 +383,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { @action addLinkedUrlDoc = async (url: string, id: string) => { const doc = Docs.Create.WebDocument(url, { data_useCors: true }); - + this.docManager.addCustomId(doc, id); const linkDoc = Docs.Create.LinkDocument(this.Document, doc); LinkManager.Instance.addLink(linkDoc); @@ -391,6 +394,28 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { }; doc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] }); + this.docManager.processDocument(doc); + }; + + /** + * Retrieves the IDs of linked url documents. + * @returns An array of document IDs. + */ + @action + getLinkedUrlDocIds = () => { + const linkedDocs: Doc[] = this.linkedDocs; + const linkedUrlDocIds: string[] = []; + + for (const doc of linkedDocs) { + if (doc.chunk_simpl) { + const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] }; + const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkType === CHUNK_TYPE.URL); + if (foundChunk) { + linkedUrlDocIds.push(foundChunk.chunkId); + } + } + } + return linkedUrlDocIds; }; /** diff --git a/src/client/views/nodes/chatbot/tools/SearchTool.ts b/src/client/views/nodes/chatbot/tools/SearchTool.ts index 6a11407a5..2ee30f0cf 100644 --- a/src/client/views/nodes/chatbot/tools/SearchTool.ts +++ b/src/client/views/nodes/chatbot/tools/SearchTool.ts @@ -28,7 +28,7 @@ export class SearchTool extends BaseTool<SearchToolParamsType> { private _addLinkedUrlDoc: (url: string, id: string) => void; private _max_results: number; - constructor(addLinkedUrlDoc: (url: string, id: string) => void, max_results: number = 4) { + constructor(addLinkedUrlDoc: (url: string, id: string) => void, max_results: number = 3) { super(searchToolInfo); this._addLinkedUrlDoc = addLinkedUrlDoc; this._max_results = max_results; diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts index 19ccd0b36..bff38ae15 100644 --- a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts +++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts @@ -66,11 +66,11 @@ const websiteInfoScraperToolInfo: ToolInfo<WebsiteInfoScraperToolParamsType> = { }; export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParamsType> { - private _addLinkedUrlDoc: (url: string, id: string) => void; + private _getLinkedUrlDocId: (url: string) => string[]; - constructor(addLinkedUrlDoc: (url: string, id: string) => void) { + constructor(getLinkedUrlDocIds: (url: string) => string[]) { super(websiteInfoScraperToolInfo); - this._addLinkedUrlDoc = addLinkedUrlDoc; + this._getLinkedUrlDocId = getLinkedUrlDocIds; } async execute(args: ParametersType<WebsiteInfoScraperToolParamsType>): Promise<Observation[]> { @@ -79,9 +79,8 @@ export class WebsiteInfoScraperTool extends BaseTool<WebsiteInfoScraperToolParam // Create an array of promises, each one handling a website scrape for a URL const scrapingPromises = urls.map(async url => { try { - const { website_plain_text } = await Networking.PostToServer('/scrapeWebsite', { url }); - const id = uuidv4(); - this._addLinkedUrlDoc(url, id); + const { website_plain_text } = (await Networking.PostToServer('/scrapeWebsite', { url })) as { website_plain_text: string }; + const id = this._getLinkedUrlDocId(url); return { type: 'text', text: `<chunk chunk_id="${id}" chunk_type="url">\n${website_plain_text}\n</chunk>`, diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts index c954226e4..4eeac3c6a 100644 --- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts +++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts @@ -90,7 +90,7 @@ export class AgentDocumentManager { */ public initializeFindDocsFreeform() { // Reset collections - this.documentsById.clear(); + //this.documentsById.clear(); try { // Use the LinkManager approach which is proven to work in ChatBox @@ -109,6 +109,7 @@ export class AgentDocumentManager { linkedDocs.forEach((doc: Doc) => { if (doc) { this.processDocument(doc); + console.log('Processed linked document:', doc.id, doc.title, doc.type); } }); @@ -164,6 +165,11 @@ export class AgentDocumentManager { } } + public addCustomId(doc: Doc, id: string) { + doc.id = id; + doc.DOCUMENT_ID_FIELD = id; + } + /** * Process a document by ensuring it has an ID and adding it to the appropriate collections * @param doc The document to process @@ -730,7 +736,7 @@ export class AgentDocumentManager { // Get metadata for all documents const documentsMetadata: Record<string, any> = {}; for (const doc of this.documentsById.values()) { - documentsMetadata.add(this.extractDocumentMetadata(doc)); + documentsMetadata.add(this.extractDocumentMetadata(doc) ?? { documentId: doc.layoutDoc.id, title: doc.layoutDoc.title, type: doc.layoutDoc.type }); } return { |