aboutsummaryrefslogtreecommitdiff
path: root/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
diff options
context:
space:
mode:
Diffstat (limited to 'src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx')
-rw-r--r--src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx450
1 files changed, 284 insertions, 166 deletions
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index 43765c1ce..35dbee3e9 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -71,7 +71,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
@observable private _citationPopup: { text: string; visible: boolean } = { text: '', visible: false };
// Private properties for managing OpenAI API, vector store, agent, and UI elements
- private openai: OpenAI;
+ private openai!: OpenAI; // Using definite assignment assertion
private vectorstore_id: string;
private vectorstore: Vectorstore;
private agent: Agent;
@@ -98,25 +98,34 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
*/
constructor(props: FieldViewProps) {
super(props);
- makeObservable(this); // Enable MobX observables
+ makeObservable(this);
- // Initialize OpenAI, vectorstore, and agent
- this.openai = this.initializeOpenAI();
- if (StrCast(this.dataDoc.vectorstore_id) == '') {
- this.vectorstore_id = uuidv4();
- this.dataDoc.vectorstore_id = this.vectorstore_id;
- } else {
- this.vectorstore_id = StrCast(this.dataDoc.vectorstore_id);
- }
- this.vectorstore = new Vectorstore(this.vectorstore_id, this.retrieveDocIds);
+ this.messagesRef = React.createRef();
this.docManager = new AgentDocumentManager(this);
- this.agent = new Agent(this.vectorstore, this.retrieveSummaries, this.retrieveFormattedHistory, this.retrieveCSVData, this.addLinkedUrlDoc, this.getLinkedUrlDocIds, this.createImageInDash, this.createCSVInDash, this, this.docManager);
- // Reinitialize the DocumentMetadataTool with a direct reference to this ChatBox instance
- // This ensures the tool can properly access documents in the same Freeform view
- this.agent.reinitializeDocumentMetadataTool();
+ // Initialize OpenAI client
+ this.initializeOpenAI();
+
+ // Create a unique vectorstore ID for this ChatBox
+ this.vectorstore_id = uuidv4();
+
+ // Initialize vectorstore with the document manager
+ this.vectorstore = new Vectorstore(this.vectorstore_id, this.docManager);
+
+ // Create an agent with the vectorstore
+ this.agent = new Agent(
+ this.vectorstore,
+ this.retrieveSummaries.bind(this),
+ this.retrieveFormattedHistory.bind(this),
+ this.retrieveCSVData.bind(this),
+ this.retrieveDocIds.bind(this),
+ this.createImageInDash.bind(this),
+ this.createCSVInDash.bind(this),
+ this.docManager
+ );
- this.messagesRef = React.createRef<HTMLDivElement>();
+ // Add event listeners
+ this.addScrollListener();
// Reaction to update dataDoc when chat history changes
reaction(
@@ -140,22 +149,25 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
*/
@action
addDocToVectorstore = async (newLinkedDoc: Doc) => {
- this._uploadProgress = 0;
- this._currentStep = 'Initializing...';
- this._isUploadingDocs = true;
-
try {
- // Add the document to the vectorstore
+ this._isUploadingDocs = true;
+
+ // Process the document first to ensure it has a valid ID
+ this.docManager.processDocument(newLinkedDoc);
+
+ // Add the document to the vectorstore which will also register chunks
await this.vectorstore.addAIDoc(newLinkedDoc, this.updateProgress);
- } catch (error) {
- console.error('Error uploading document:', error);
- this._currentStep = 'Error during upload';
- } finally {
- runInAction(() => {
- this._isUploadingDocs = false;
- this._uploadProgress = 0;
- this._currentStep = '';
- });
+
+ // No longer needed as documents are tracked by the AgentDocumentManager
+ // this._linked_docs_to_add.add(newLinkedDoc);
+
+ this._isUploadingDocs = false;
+
+ return true;
+ } catch (err) {
+ console.error('Error adding document to vectorstore:', err);
+ this._isUploadingDocs = false;
+ return false;
}
};
@@ -238,7 +250,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
apiKey: process.env.OPENAI_KEY,
dangerouslyAllowBrowser: true,
};
- return new OpenAI(configuration);
+ this.openai = new OpenAI(configuration);
}
/**
@@ -376,49 +388,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
};
/**
- * Adds a linked document from a URL for future reference and analysis.
- * @param url The URL of the document to add.
- * @param id The unique identifier for the document.
- */
- @action
- addLinkedUrlDoc = async (url: string, id: string) => {
- const doc = Docs.Create.WebDocument(url, { data_useCors: true });
- this.docManager.addCustomId(doc, id);
- const linkDoc = Docs.Create.LinkDocument(this.Document, doc);
- LinkManager.Instance.addLink(linkDoc);
-
- const chunkToAdd = {
- chunkId: id,
- chunkType: CHUNK_TYPE.URL,
- url: url,
- };
-
- doc.chunk_simpl = JSON.stringify({ chunks: [chunkToAdd] });
- this.docManager.processDocument(doc);
- };
-
- /**
- * Retrieves the IDs of linked url documents.
- * @returns An array of document IDs.
- */
- @action
- getLinkedUrlDocIds = () => {
- const linkedDocs: Doc[] = this.linkedDocs;
- const linkedUrlDocIds: string[] = [];
-
- for (const doc of linkedDocs) {
- if (doc.chunk_simpl) {
- const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] };
- const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkType === CHUNK_TYPE.URL);
- if (foundChunk) {
- linkedUrlDocIds.push(foundChunk.chunkId);
- }
- }
- }
- return linkedUrlDocIds;
- };
-
- /**
* Getter to retrieve the current user's name from the client utils.
*/
@computed
@@ -613,82 +582,224 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
*/
@action
handleCitationClick = async (citation: Citation) => {
- const currentLinkedDocs: Doc[] = this.linkedDocs;
- const chunkId = citation.chunk_id;
+ try {
+ // Extract values from MobX proxy object if needed
+ const chunkId = typeof citation.chunk_id === 'object' ? (citation.chunk_id as any).toString() : citation.chunk_id;
+
+ // For debugging
+ console.log('Citation clicked:', {
+ chunkId,
+ citation: JSON.stringify(citation, null, 2),
+ });
- for (const doc of currentLinkedDocs) {
- if (doc.chunk_simpl) {
- const docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl)) as { chunks: SimplifiedChunk[] };
- const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkId === chunkId);
+ // Try to find the document
+ const linkedDocs = this.linkedDocs;
+ let doc: Doc | undefined;
- if (foundChunk) {
- // Handle media chunks specifically
+ // First try to find the document using the document manager's chunk ID lookup
+ const parentDocId = this.docManager.getDocIdByChunkId(chunkId);
+ if (parentDocId) {
+ doc = this.docManager.getDocument(parentDocId);
+ console.log(`Found document by chunk ID lookup: ${parentDocId}`);
+ }
- if (doc.ai_type == 'video' || doc.ai_type == 'audio') {
- const directMatchSegmentStart = this.getDirectMatchingSegmentStart(doc, citation.direct_text || '', foundChunk.indexes || []);
+ // If not found, fall back to searching through linked docs (maintains compatibility)
+ if (!doc) {
+ for (const linkedDoc of linkedDocs) {
+ if (linkedDoc.chunk_simpl) {
+ try {
+ const docChunkSimpl = JSON.parse(StrCast(linkedDoc.chunk_simpl)) as { chunks: SimplifiedChunk[] };
+ const foundChunk = docChunkSimpl.chunks.find(chunk => chunk.chunkId === chunkId);
+ if (foundChunk) {
+ doc = linkedDoc;
+ console.log(`Found document by iterating through linked docs`);
+ break;
+ }
+ } catch (e) {
+ console.error(`Error parsing chunk_simpl for doc ${linkedDoc.id}:`, e);
+ }
+ }
+ }
+ }
- if (directMatchSegmentStart) {
- // Navigate to the segment's start time in the media player
- await this.goToMediaTimestamp(doc, directMatchSegmentStart, doc.ai_type);
- } else {
- console.error('No direct matching segment found for the citation.');
+ if (!doc) {
+ console.warn(`Document not found for citation with chunk_id: ${chunkId}`);
+ return;
+ }
+
+ // Process the chunk data
+ let docChunkSimpl: { chunks: SimplifiedChunk[] } = { chunks: [] };
+ try {
+ docChunkSimpl = JSON.parse(StrCast(doc.chunk_simpl) || '{"chunks":[]}');
+ } catch (e) {
+ console.error(`Error parsing chunk_simpl for the found document:`, e);
+ return;
+ }
+
+ const foundChunk = docChunkSimpl.chunks.find((chunk: SimplifiedChunk) => chunk.chunkId === chunkId);
+
+ // Handle different chunk types
+ if (foundChunk) {
+ console.log(`Found chunk in document:`, foundChunk);
+
+ // Handle video chunks
+ if (foundChunk.chunkType === CHUNK_TYPE.VIDEO) {
+ if (foundChunk.start_time !== undefined) {
+ await this.goToMediaTimestamp(doc, foundChunk.start_time, 'video');
+ } else {
+ console.warn('Video chunk missing start_time:', foundChunk);
+ }
+ }
+ // Handle audio chunks - note that we're using string comparison since 'audio' isn't in CHUNK_TYPE enum
+ else if (String(foundChunk.chunkType).toLowerCase() === 'audio') {
+ if (foundChunk.start_time !== undefined) {
+ await this.goToMediaTimestamp(doc, foundChunk.start_time, 'audio');
+ } else {
+ console.warn('Audio chunk missing start_time:', foundChunk);
+ }
+ }
+ // Handle table or image chunks
+ else if (foundChunk.chunkType === CHUNK_TYPE.TABLE || foundChunk.chunkType === CHUNK_TYPE.IMAGE) {
+ this.handleOtherChunkTypes(foundChunk, citation, doc);
+ }
+ // Handle text chunks
+ else if (foundChunk.chunkType === CHUNK_TYPE.TEXT) {
+ // Find text from the document's chunks metadata
+ let chunkText = '';
+
+ try {
+ // We already parsed the chunks earlier, so use that
+ const matchingChunk = docChunkSimpl.chunks.find(c => c.chunkId === foundChunk.chunkId);
+ if (matchingChunk && 'text' in matchingChunk) {
+ // If the text property exists on the chunk (even though it's not in the type)
+ chunkText = String(matchingChunk['text'] || '');
}
+ } catch (e) {
+ console.error('Error getting chunk text:', e);
+ }
+
+ // Default text if none found
+ if (!chunkText) {
+ chunkText = 'Text content not available';
+ }
+
+ this._citationPopup = {
+ text: chunkText,
+ visible: true,
+ };
+ }
+ // Handle URL chunks
+ else if (foundChunk.chunkType === CHUNK_TYPE.URL) {
+ if (foundChunk.url) {
+ // Instead of opening the URL in a new window, show the document in the viewer
+ DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
+ console.log(`Navigated to web document with URL: ${foundChunk.url}`);
} else {
- // Handle other chunk types as before
- this.handleOtherChunkTypes(foundChunk, citation, doc);
+ console.warn('URL chunk missing URL:', foundChunk);
}
}
+ } else if (doc?.original_segments) {
+ // Handle original segments for media files
+ let original_segments: any[] = [];
+ try {
+ original_segments = JSON.parse(StrCast(doc.original_segments));
+ } catch (e) {
+ console.error(`Error parsing original_segments:`, e);
+ return;
+ }
+
+ // Check if there's direct text to find in the segments
+ if (citation.direct_text) {
+ // Find the segment that contains the direct text
+ const start = this.getDirectMatchingSegmentStart(doc, citation.direct_text, []);
+ if (start !== -1) {
+ await this.goToMediaTimestamp(doc, start, doc.ai_type === 'audio' ? 'audio' : 'video');
+ }
+ }
+ } else {
+ console.warn('Unable to find chunk or segments for citation', citation);
}
+ } catch (error) {
+ console.error('Error handling citation click:', error);
}
};
+ /**
+ * Finds a matching segment in a document based on text content.
+ * @param doc The document to search in
+ * @param citationText The text to find in the document
+ * @param indexesOfSegments Optional indexes of segments to search in
+ * @returns The starting timestamp of the matching segment, or -1 if not found
+ */
getDirectMatchingSegmentStart = (doc: Doc, citationText: string, indexesOfSegments: string[]): number => {
- const originalSegments = JSON.parse(StrCast(doc.original_segments!)).map((segment: any, index: number) => ({
- index: index.toString(),
- text: segment.text,
- start: segment.start,
- end: segment.end,
- }));
-
- if (!Array.isArray(originalSegments) || originalSegments.length === 0 || !Array.isArray(indexesOfSegments)) {
- return 0;
+ if (!doc || !citationText) return -1;
+
+ // Get original segments from the document
+ const original_segments = doc.original_segments ? JSON.parse(StrCast(doc.original_segments)) : [];
+
+ if (!original_segments || !Array.isArray(original_segments) || original_segments.length === 0) {
+ return -1;
}
- // Create itemsToSearch array based on indexesOfSegments
- const itemsToSearch = indexesOfSegments.map((indexStr: string) => {
- const index = parseInt(indexStr, 10);
- const segment = originalSegments[index];
- return { text: segment.text, start: segment.start };
- });
+ let segments = original_segments;
- console.log('Constructed itemsToSearch:', itemsToSearch);
+ // If specific indexes are provided, filter segments by those indexes
+ if (indexesOfSegments && indexesOfSegments.length > 0) {
+ segments = original_segments.filter((segment: any) => indexesOfSegments.includes(segment.index));
+ }
+
+ // If no segments match the indexes, use all segments
+ if (segments.length === 0) {
+ segments = original_segments;
+ }
- // Helper function to calculate word overlap score
+ // First try to find an exact match
+ const exactMatch = segments.find((segment: any) => segment.text && segment.text.includes(citationText));
+
+ if (exactMatch) {
+ return exactMatch.start;
+ }
+
+ // If no exact match, find segment with best word overlap
const calculateWordOverlap = (text1: string, text2: string): number => {
- const words1 = new Set(text1.toLowerCase().split(/\W+/));
- const words2 = new Set(text2.toLowerCase().split(/\W+/));
- const intersection = new Set([...words1].filter(word => words2.has(word)));
- return intersection.size / Math.max(words1.size, words2.size); // Jaccard similarity
+ if (!text1 || !text2) return 0;
+
+ const words1 = text1.toLowerCase().split(/\s+/);
+ const words2 = text2.toLowerCase().split(/\s+/);
+ const wordSet1 = new Set(words1);
+
+ let overlap = 0;
+ for (const word of words2) {
+ if (wordSet1.has(word)) {
+ overlap++;
+ }
+ }
+
+ // Return percentage of overlap relative to the shorter text
+ return overlap / Math.min(words1.length, words2.length);
};
- // Search for the best matching segment
- let bestMatchStart = 0;
- let bestScore = 0;
-
- console.log(`Searching for best match for query: "${citationText}"`);
- itemsToSearch.forEach(item => {
- const score = calculateWordOverlap(citationText, item.text);
- console.log(`Comparing query to segment: "${item.text}" | Score: ${score}`);
- if (score > bestScore) {
- bestScore = score;
- bestMatchStart = item.start;
+ // Find segment with highest word overlap
+ let bestMatch = null;
+ let highestOverlap = 0;
+
+ for (const segment of segments) {
+ if (!segment.text) continue;
+
+ const overlap = calculateWordOverlap(segment.text, citationText);
+ if (overlap > highestOverlap) {
+ highestOverlap = overlap;
+ bestMatch = segment;
}
- });
+ }
- console.log('Best match found with score:', bestScore, '| Start time:', bestMatchStart);
+ // Only return matches with significant overlap (more than 30%)
+ if (bestMatch && highestOverlap > 0.3) {
+ return bestMatch.start;
+ }
- // Return the start time of the best match
- return bestMatchStart;
+ // If no good match found, return the start of the first segment as fallback
+ return segments.length > 0 ? segments[0].start : -1;
};
/**
@@ -772,7 +883,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
break;
case CHUNK_TYPE.CSV:
case CHUNK_TYPE.URL:
- DocumentManager.Instance.showDocument(doc, { willZoomCentered: true });
+ DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
+ console.log(`Showing web document in viewer with URL: ${foundChunk.url}`);
+ });
break;
default:
console.error('Unhandled chunk type:', foundChunk.chunkType);
@@ -879,6 +992,16 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
}
});
this.addScrollListener();
+
+ // Initialize the document manager by finding existing documents
+ this.docManager.initializeFindDocsFreeform();
+
+ // If there are stored doc IDs in our list of docs to add, process them
+ if (this._linked_docs_to_add.size > 0) {
+ this._linked_docs_to_add.forEach(doc => {
+ this.docManager.processDocument(doc);
+ });
+ }
}
/**
@@ -892,28 +1015,28 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
/**
* Getter that retrieves all linked documents for the current document.
*/
- @computed
- get linkedDocs() {
- return LinkManager.Instance.getAllRelatedLinks(this.Document)
- .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document)))
- .map(d => DocCast(d?.annotationOn, d))
- .filter(d => d);
+ @computed get linkedDocs(): Doc[] {
+ const docIds = this.docManager.listDocs();
+ const docs: Doc[] = [];
+
+ // Get documents from the document manager using the getDocument method
+ docIds.forEach(id => {
+ const doc = this.docManager.getDocument(id);
+ if (doc) {
+ docs.push(doc);
+ }
+ });
+
+ return docs;
}
/**
- * Getter that retrieves document IDs of linked documents that have AI-related content.
+ * Getter that retrieves document IDs of linked documents that have PDF_chunker–parsed content.
*/
@computed
- get docIds() {
- return LinkManager.Instance.getAllRelatedLinks(this.Document)
- .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document)))
- .map(d => DocCast(d?.annotationOn, d))
- .filter(d => d)
- .filter(d => {
- console.log(d.ai_doc_id);
- return d.ai_doc_id;
- })
- .map(d => StrCast(d.ai_doc_id));
+ get docIds(): string[] {
+ // Use the document manager to get all document IDs
+ return Array.from(this.docManager.listDocs());
}
/**
@@ -921,23 +1044,18 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
*/
@computed
get summaries(): string {
- return (
- LinkManager.Instance.getAllRelatedLinks(this.Document)
- .map(d => DocCast(LinkManager.getOppositeAnchor(d, this.Document)))
- .map(d => DocCast(d?.annotationOn, d))
- .filter(d => d)
- .filter(d => d.summary)
- .map((doc, index) => {
- if (PDFCast(doc.data)) {
- return `<summary file_name="${PDFCast(doc.data).url.pathname}" applicable_tools=["rag"]>${doc.summary}</summary>`;
- } else if (CsvCast(doc.data)) {
- return `<summary file_name="${CsvCast(doc.data).url.pathname}" applicable_tools=["dataAnalysis"]>${doc.summary}</summary>`;
- } else {
- return `${index + 1}) ${doc.summary}`;
- }
- })
- .join('\n') + '\n'
- );
+ const linkedDocs = Array.from(this.docManager.listDocs())
+ .map(id => {
+ const doc = this.docManager.extractDocumentMetadata(id);
+ if (doc && doc.fields && (doc.fields.layout.summary || doc.fields.data.summary)) {
+ return doc.fields.layout.summary || doc.fields.data.summary;
+ }
+ return null;
+ })
+ .filter(Boolean)
+ .join('\n\n');
+
+ return linkedDocs;
}
/**
@@ -965,7 +1083,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
// Other helper methods for retrieving document data and processing
- retrieveSummaries = () => {
+ retrieveSummaries = (): string => {
return this.summaries;
};
@@ -973,12 +1091,12 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
return this.linkedCSVs;
};
- retrieveFormattedHistory = () => {
+ retrieveFormattedHistory = (): string => {
return this.formattedHistory;
};
- retrieveDocIds = () => {
- return this.docIds;
+ retrieveDocIds = (): string[] => {
+ return Array.from(this.docManager.listDocs());
};
/**