aboutsummaryrefslogtreecommitdiff
path: root/src/client/views/nodes
diff options
context:
space:
mode:
authorA.J. Shulman <Shulman.aj@gmail.com>2025-05-12 15:46:30 -0400
committerA.J. Shulman <Shulman.aj@gmail.com>2025-05-12 15:46:30 -0400
commitb3aa238043d01cbc58293b45867706fa9b36cefe (patch)
tree770f07542f97e4bda2c56e00ef8118688e32fce9 /src/client/views/nodes
parent0a6f3fc649b37e273a501302c1dd645a5e9a18ac (diff)
workign better
Diffstat (limited to 'src/client/views/nodes')
-rw-r--r--src/client/views/nodes/PDFBox.scss23
-rw-r--r--src/client/views/nodes/PDFBox.tsx12
-rw-r--r--src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx143
-rw-r--r--src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts33
-rw-r--r--src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts10
5 files changed, 205 insertions, 16 deletions
diff --git a/src/client/views/nodes/PDFBox.scss b/src/client/views/nodes/PDFBox.scss
index eaea272dc..44013a96d 100644
--- a/src/client/views/nodes/PDFBox.scss
+++ b/src/client/views/nodes/PDFBox.scss
@@ -344,3 +344,26 @@
font-size: 30px;
}
}
+
+.pdfBox-fuzzy {
+ border: none;
+ background-color: #4a4a4a;
+ color: white;
+ padding: 0 8px;
+ height: 24px;
+ cursor: pointer;
+ margin-right: 4px;
+ border-radius: 3px;
+ display: flex;
+ align-items: center;
+ justify-content: center;
+
+ &.active {
+ background-color: #3498db;
+ color: white;
+ }
+
+ &:hover {
+ background-color: #2980b9;
+ }
+}
diff --git a/src/client/views/nodes/PDFBox.tsx b/src/client/views/nodes/PDFBox.tsx
index 55e6d5596..4ecbd65b6 100644
--- a/src/client/views/nodes/PDFBox.tsx
+++ b/src/client/views/nodes/PDFBox.tsx
@@ -53,6 +53,7 @@ export class PDFBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
private _sidebarRef = React.createRef<SidebarAnnos>();
@observable private _searching: boolean = false;
+ @observable private _fuzzySearchEnabled: boolean = true;
@observable private _pdf: Opt<Pdfjs.PDFDocumentProxy> = undefined;
@observable private _pageControls = false;
@@ -272,6 +273,14 @@ export class PDFBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
!this.Document._layout_fitWidth && (this.Document._height = NumCast(this.Document._width) * (p.height / p.width));
};
+ @action
+ toggleFuzzySearch = () => {
+ this._fuzzySearchEnabled = !this._fuzzySearchEnabled;
+ this._pdfViewer?.toggleFuzzySearch();
+ // Clear existing search results when switching modes
+ this.search('', false, true);
+ };
+
override search = action((searchString: string, bwd?: boolean, clear: boolean = false) => {
if (!this._searching && !clear) {
this._searching = true;
@@ -412,6 +421,9 @@ export class PDFBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
<button type="button" className="pdfBox-search" title="Search" onClick={e => this.search(this._searchString, e.shiftKey)}>
<FontAwesomeIcon icon="search" size="sm" />
</button>
+ <button type="button" className={`pdfBox-fuzzy ${this._fuzzySearchEnabled ? 'active' : ''}`} title={`${this._fuzzySearchEnabled ? 'Disable' : 'Enable'} Fuzzy Search`} onClick={this.toggleFuzzySearch}>
+ <FontAwesomeIcon icon="magic" size="sm" />
+ </button>
<button type="button" className="pdfBox-prevIcon" title="Previous Annotation" onClick={this.prevAnnotation}>
<FontAwesomeIcon icon="arrow-up" size="lg" />
</button>
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index 00077d68d..af689f243 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -15,7 +15,7 @@ import * as React from 'react';
import { v4 as uuidv4 } from 'uuid';
import { ClientUtils, OmitKeys } from '../../../../../ClientUtils';
import { Doc, DocListCast, Opt } from '../../../../../fields/Doc';
-import { DocData, DocViews } from '../../../../../fields/DocSymbols';
+import { DocData, DocLayout, DocViews } from '../../../../../fields/DocSymbols';
import { RichTextField } from '../../../../../fields/RichTextField';
import { ScriptField } from '../../../../../fields/ScriptField';
import { CsvCast, DocCast, NumCast, PDFCast, RTFCast, StrCast, VideoCast, AudioCast } from '../../../../../fields/Types';
@@ -644,6 +644,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
// Get the simplified chunk using the document manager
const { foundChunk, doc } = this.docManager.getSimplifiedChunkById(chunkId);
+ console.log('doc: ', doc);
if (!foundChunk) {
if (doc) {
console.warn(`Chunk not found in document, ${doc.id}, for chunk ID: ${chunkId}`);
@@ -665,12 +666,14 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
console.error('No direct matching segment found for the citation.');
}
} else if (foundChunk.chunkType === CHUNK_TYPE.TABLE || foundChunk.chunkType === CHUNK_TYPE.IMAGE) {
- this.handleOtherChunkTypes(foundChunk, citation, doc);
+ console.log('here: ', foundChunk);
+ this.handleOtherChunkTypes(foundChunk as SimplifiedChunk, citation, doc);
} else {
if (doc.type === 'web') {
DocumentManager.Instance.showDocument(doc, { openLocation: OpenWhere.addRight }, () => {});
return;
}
+ this.handleOtherChunkTypes(foundChunk, citation, doc);
// Show the chunk text in citation popup
let chunkText = citation.direct_text || 'Text content not available';
this.showCitationPopup(chunkText);
@@ -834,10 +837,45 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
this._citationPopup = { text: citation.direct_text ?? 'No text available', visible: true };
this.startCitationPopupTimer();
+ // Check if the document is a PDF (has a PDF viewer component)
+ const isPDF = PDFCast(doc.data) !== null || doc.type === DocumentType.PDF;
+
DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
- const firstView = Array.from(doc[DocViews])[0] as DocumentView;
- (firstView.ComponentView as PDFBox)?.gotoPage?.(foundChunk.startPage ?? 0);
- (firstView.ComponentView as PDFBox)?.search?.(citation.direct_text ?? '');
+ // Add a delay to ensure document is fully loaded and rendered
+ setTimeout(() => {
+ try {
+ // Safety check: ensure the document has views
+ if (!doc[DocViews] || doc[DocViews].size === 0) {
+ console.warn('Document views not available yet, retrying...');
+ this.retryPdfSearch(doc, citation, foundChunk, isPDF, 1);
+ return;
+ }
+
+ const views = Array.from(doc[DocViews]);
+ if (!views.length) {
+ console.warn('No document views found, retrying...');
+ this.retryPdfSearch(doc, citation, foundChunk, isPDF, 1);
+ return;
+ }
+
+ const firstView = views[0] as DocumentView;
+ if (!firstView || !firstView.ComponentView) {
+ console.warn('Component view not available yet, retrying...');
+ this.retryPdfSearch(doc, citation, foundChunk, isPDF, 1);
+ return;
+ }
+
+ const pdfComponent = firstView.ComponentView as PDFBox;
+ if (isPDF && pdfComponent && citation.direct_text) {
+ // Use our helper to ensure fuzzy search is enabled and execute the search
+ this.ensureFuzzySearchAndExecute(pdfComponent, citation.direct_text.trim(), foundChunk.startPage);
+ }
+ } catch (error) {
+ console.error('Error accessing PDF component:', error);
+ // Retry with exponential backoff
+ this.retryPdfSearch(doc, citation, foundChunk, isPDF, 1);
+ }
+ }, 500); // Initial delay before first attempt
});
break;
case CHUNK_TYPE.CSV:
@@ -851,6 +889,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
break;
}
};
+
/**
* Creates an annotation highlight on a PDF document for image citations.
* @param x1 X-coordinate of the top-left corner of the highlight.
@@ -1092,6 +1131,100 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
};
/**
+ * Retry PDF search with exponential backoff
+ */
+ retryPdfSearch = (doc: Doc, citation: Citation, foundChunk: SimplifiedChunk, isPDF: boolean, attempt: number) => {
+ if (attempt > 5) {
+ console.error('Maximum retry attempts reached for PDF search');
+ return;
+ }
+
+ const delay = Math.min(2000, 500 * Math.pow(1.5, attempt)); // Exponential backoff with max delay of 2 seconds
+
+ setTimeout(() => {
+ try {
+ if (!doc[DocViews] || doc[DocViews].size === 0) {
+ this.retryPdfSearch(doc, citation, foundChunk, isPDF, attempt + 1);
+ return;
+ }
+
+ const views = Array.from(doc[DocViews]);
+ if (!views.length) {
+ this.retryPdfSearch(doc, citation, foundChunk, isPDF, attempt + 1);
+ return;
+ }
+
+ const firstView = views[0] as DocumentView;
+ if (!firstView || !firstView.ComponentView) {
+ this.retryPdfSearch(doc, citation, foundChunk, isPDF, attempt + 1);
+ return;
+ }
+
+ const pdfComponent = firstView.ComponentView as PDFBox;
+ if (isPDF && pdfComponent && citation.direct_text) {
+ console.log(`PDF component found on attempt ${attempt}, executing search...`);
+ this.ensureFuzzySearchAndExecute(pdfComponent, citation.direct_text.trim(), foundChunk.startPage);
+ }
+ } catch (error) {
+ console.error(`Error on retry attempt ${attempt}:`, error);
+ this.retryPdfSearch(doc, citation, foundChunk, isPDF, attempt + 1);
+ }
+ }, delay);
+ };
+
+ /**
+ * Ensures fuzzy search is enabled in PDFBox and performs a search
+ * @param pdfComponent The PDFBox component
+ * @param searchText The text to search for
+ * @param startPage Optional page to navigate to before searching
+ */
+ private ensureFuzzySearchAndExecute = (pdfComponent: PDFBox, searchText: string, startPage?: number) => {
+ if (!pdfComponent) {
+ console.warn('PDF component is undefined, cannot perform search');
+ return;
+ }
+
+ if (!searchText?.trim()) {
+ console.warn('Search text is empty, skipping search');
+ return;
+ }
+
+ try {
+ // Check if the component has required methods
+ if (typeof pdfComponent.gotoPage !== 'function' || typeof pdfComponent.toggleFuzzySearch !== 'function' || typeof pdfComponent.search !== 'function') {
+ console.warn('PDF component missing required methods');
+ return;
+ }
+
+ // Navigate to the page if specified
+ if (typeof startPage === 'number') {
+ pdfComponent.gotoPage(startPage + 1);
+ }
+
+ // Always try to enable fuzzy search
+ try {
+ // PDFBox.tsx toggles fuzzy search state internally
+ // We'll call it once to make sure it's enabled
+ pdfComponent.toggleFuzzySearch();
+ } catch (toggleError) {
+ console.warn('Error toggling fuzzy search:', toggleError);
+ }
+
+ // Add a sufficient delay to ensure PDF is fully loaded before searching
+ setTimeout(() => {
+ try {
+ console.log('Performing fuzzy search for text:', searchText);
+ pdfComponent.search(searchText);
+ } catch (searchError) {
+ console.error('Error performing search:', searchError);
+ }
+ }, 1000); // Increased delay for better reliability
+ } catch (error) {
+ console.error('Error in fuzzy search setup:', error);
+ }
+ };
+
+ /**
* Main render method for the ChatBox
*/
render() {
diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
index e9d41efbd..784e90c3c 100644
--- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -173,6 +173,16 @@ export class AgentDocumentManager {
public processDocument(doc: Doc): string {
// Ensure document has a persistent ID
const docId = this.ensureDocumentId(doc);
+ if (doc.chunk_simplified) {
+ const newChunks: SimplifiedChunk[] = [];
+ for (const chunk of JSON.parse(StrCast(doc.chunk_simplified))) {
+ console.log('chunk', chunk);
+ newChunks.push(chunk as SimplifiedChunk);
+ }
+ console.log('Added simplified chunks to simplifiedChunks:', docId, newChunks);
+ this.addSimplifiedChunks(newChunks);
+ //DocCast(DocCast(this.chatBoxDocument!.agentDocument)!.chunk_simpl)!.mapping = new List<string>(Array.from(this.simplifiedChunks.values()).map(chunk => JSON.stringify(chunk)));
+ }
// Only add if we haven't already processed this document
if (!this.documentsById.has(docId)) {
this.documentsById.set(docId, { layoutDoc: doc, dataDoc: doc[DocData] });
@@ -1005,21 +1015,28 @@ export class AgentDocumentManager {
* @returns The updated document with simplified chunks
*/
@action
- public addSimplifiedChunks(chunks: RAGChunk[], docType: string) {
+ public addSimplifiedChunks(simplifiedChunks: SimplifiedChunk[]) {
+ simplifiedChunks.forEach(chunk => {
+ this.simplifiedChunks.set(chunk.chunkId, chunk);
+ });
+ }
+
+ public getSimplifiedChunks(chunks: RAGChunk[], docType: string): SimplifiedChunk[] {
console.log('chunks', chunks, 'simplifiedChunks', this.simplifiedChunks);
+ const simplifiedChunks: SimplifiedChunk[] = [];
// Create array of simplified chunks based on document type
for (const chunk of chunks) {
// Common properties across all chunk types
const baseChunk: SimplifiedChunk = {
chunkId: chunk.id,
- text: chunk.metadata.text,
+ //text: chunk.metadata.text,
doc_id: chunk.metadata.doc_id,
chunkType: chunk.metadata.type || CHUNK_TYPE.TEXT,
};
// Add type-specific properties
if (docType === 'video' || docType === 'audio') {
- this.simplifiedChunks.set(chunk.id, {
+ simplifiedChunks.push({
...baseChunk,
start_time: chunk.metadata.start_time,
end_time: chunk.metadata.end_time,
@@ -1027,14 +1044,14 @@ export class AgentDocumentManager {
chunkType: docType === 'video' ? CHUNK_TYPE.VIDEO : CHUNK_TYPE.AUDIO,
} as SimplifiedChunk);
} else if (docType === 'pdf') {
- this.simplifiedChunks.set(chunk.id, {
+ simplifiedChunks.push({
...baseChunk,
startPage: chunk.metadata.start_page,
endPage: chunk.metadata.end_page,
location: chunk.metadata.location,
} as SimplifiedChunk);
} else if (docType === 'csv') {
- this.simplifiedChunks.set(chunk.id, {
+ simplifiedChunks.push({
...baseChunk,
rowStart: (chunk.metadata as any).row_start,
rowEnd: (chunk.metadata as any).row_end,
@@ -1043,9 +1060,10 @@ export class AgentDocumentManager {
} as SimplifiedChunk);
} else {
// Default for other document types
- this.simplifiedChunks.set(chunk.id, baseChunk as SimplifiedChunk);
+ simplifiedChunks.push(baseChunk as SimplifiedChunk);
}
}
+ return simplifiedChunks;
}
/**
@@ -1054,9 +1072,8 @@ export class AgentDocumentManager {
* @param chunkId The ID of the chunk to retrieve
* @returns The simplified chunk if found, undefined otherwise
*/
+ @action
public getSimplifiedChunkById(chunkId: string): any | undefined {
- console.log('chunkId', chunkId, 'simplifiedChunks', this.simplifiedChunks);
- console.log('doc', this.getDocument(this.simplifiedChunks.get(chunkId)?.doc_id || ''));
return { foundChunk: this.simplifiedChunks.get(chunkId), doc: this.getDocument(this.simplifiedChunks.get(chunkId)?.doc_id || chunkId) };
}
diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
index f1fae6f11..252672dfc 100644
--- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
@@ -40,7 +40,7 @@ export class Vectorstore {
* @param docManager An instance of AgentDocumentManager to handle document management.
*/
constructor(id: string, docManager: AgentDocumentManager) {
- const pineconeApiKey = process.env.PINECONE_API_KEY;
+ const pineconeApiKey = 'pcsk_3txLxJ_9fxdmAph4csnq4yxoDF5De5A8bJvjWaXXigBgshy4eoXggrXcxATJiH8vzXbrKm';
if (!pineconeApiKey) {
console.log('PINECONE_API_KEY is not defined - Vectorstore will be unavailable');
return;
@@ -181,7 +181,9 @@ export class Vectorstore {
doc.segmented_transcript = JSON.stringify(segmentedTranscript);
// Use doc manager to add simplified chunks
const docType = local_file_path.endsWith('.mp3') ? 'audio' : 'video';
- this.docManager.addSimplifiedChunks(result.chunks, docType);
+ const simplifiedChunks = this.docManager.getSimplifiedChunks(result.chunks, docType);
+ doc.chunk_simplified = JSON.stringify(simplifiedChunks);
+ this.docManager.addSimplifiedChunks(simplifiedChunks);
} else {
// Process regular document
console.log('Processing regular document...');
@@ -215,7 +217,9 @@ export class Vectorstore {
// Use doc manager to add simplified chunks - determine document type from file extension
const fileExt = path.extname(local_file_path).toLowerCase();
const docType = fileExt === '.pdf' ? 'pdf' : fileExt === '.csv' ? 'csv' : 'text';
- this.docManager.addSimplifiedChunks(result.chunks, docType);
+ const simplifiedChunks = this.docManager.getSimplifiedChunks(result.chunks, docType);
+ doc.chunk_simplified = JSON.stringify(simplifiedChunks);
+ this.docManager.addSimplifiedChunks(simplifiedChunks);
doc.summary = result.summary;
doc.ai_purpose = result.purpose;