6 files changed, 762 insertions, 31 deletions
diff --git a/src/client/views/nodes/PDFBox.scss b/src/client/views/nodes/PDFBox.scss
index eaea272dc..44013a96d 100644
--- a/src/client/views/nodes/PDFBox.scss
+++ b/src/client/views/nodes/PDFBox.scss
@@ -344,3 +344,26 @@
         font-size: 30px;
     }
 }
+
+.pdfBox-fuzzy {
+    border: none;
+    background-color: #4a4a4a;
+    color: white;
+    padding: 0 8px;
+    height: 24px;
+    cursor: pointer;
+    margin-right: 4px;
+    border-radius: 3px;
+    display: flex;
+    align-items: center;
+    justify-content: center;
+
+    &.active {
+        background-color: #3498db;
+        color: white;
+    }
+
+    &:hover {
+        background-color: #2980b9;
+    }
+}
diff --git a/src/client/views/nodes/PDFBox.tsx b/src/client/views/nodes/PDFBox.tsx
index 55e6d5596..4ecbd65b6 100644
--- a/src/client/views/nodes/PDFBox.tsx
+++ b/src/client/views/nodes/PDFBox.tsx
@@ -53,6 +53,7 @@ export class PDFBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     private _sidebarRef = React.createRef<SidebarAnnos>();
 
     @observable private _searching: boolean = false;
+    @observable private _fuzzySearchEnabled: boolean = true;
     @observable private _pdf: Opt<Pdfjs.PDFDocumentProxy> = undefined;
     @observable private _pageControls = false;
 
@@ -272,6 +273,14 @@ export class PDFBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         !this.Document._layout_fitWidth && (this.Document._height = NumCast(this.Document._width) * (p.height / p.width));
     };
 
+    @action
+    toggleFuzzySearch = () => {
+        this._fuzzySearchEnabled = !this._fuzzySearchEnabled;
+        this._pdfViewer?.toggleFuzzySearch();
+        // Clear existing search results when switching modes
+        this.search('', false, true);
+    };
+
     override search = action((searchString: string, bwd?: boolean, clear: boolean = false) => {
         if (!this._searching && !clear) {
             this._searching = true;
@@ -412,6 +421,9 @@ export class PDFBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                     <button type="button" className="pdfBox-search" title="Search" onClick={e => this.search(this._searchString, e.shiftKey)}>
                         <FontAwesomeIcon icon="search" size="sm" />
                     </button>
+                    <button type="button" className={`pdfBox-fuzzy ${this._fuzzySearchEnabled ? 'active' : ''}`} title={`${this._fuzzySearchEnabled ? 'Disable' : 'Enable'} Fuzzy Search`} onClick={this.toggleFuzzySearch}>
+                        <FontAwesomeIcon icon="magic" size="sm" />
+                    </button>
                     <button type="button" className="pdfBox-prevIcon" title="Previous Annotation" onClick={this.prevAnnotation}>
                         <FontAwesomeIcon icon="arrow-up" size="lg" />
                     </button>
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index 00077d68d..af689f243 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -15,7 +15,7 @@ import * as React from 'react';
 import { v4 as uuidv4 } from 'uuid';
 import { ClientUtils, OmitKeys } from '../../../../../ClientUtils';
 import { Doc, DocListCast, Opt } from '../../../../../fields/Doc';
-import { DocData, DocViews } from '../../../../../fields/DocSymbols';
+import { DocData, DocLayout, DocViews } from '../../../../../fields/DocSymbols';
 import { RichTextField } from '../../../../../fields/RichTextField';
 import { ScriptField } from '../../../../../fields/ScriptField';
 import { CsvCast, DocCast, NumCast, PDFCast, RTFCast, StrCast, VideoCast, AudioCast } from '../../../../../fields/Types';
@@ -644,6 +644,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
 
             // Get the simplified chunk using the document manager
             const { foundChunk, doc } = this.docManager.getSimplifiedChunkById(chunkId);
+            console.log('doc: ', doc);
             if (!foundChunk) {
                 if (doc) {
                     console.warn(`Chunk not found in document, ${doc.id}, for chunk ID: ${chunkId}`);
@@ -665,12 +666,14 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                     console.error('No direct matching segment found for the citation.');
                 }
             } else if (foundChunk.chunkType === CHUNK_TYPE.TABLE || foundChunk.chunkType === CHUNK_TYPE.IMAGE) {
-                this.handleOtherChunkTypes(foundChunk, citation, doc);
+                console.log('here: ', foundChunk);
+                this.handleOtherChunkTypes(foundChunk as SimplifiedChunk, citation, doc);
             } else {
                 if (doc.type === 'web') {
                     DocumentManager.Instance.showDocument(doc, { openLocation: OpenWhere.addRight }, () => {});
                     return;
                 }
+                this.handleOtherChunkTypes(foundChunk, citation, doc);
                 // Show the chunk text in citation popup
                 let chunkText = citation.direct_text || 'Text content not available';
                 this.showCitationPopup(chunkText);
@@ -834,10 +837,45 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                 this._citationPopup = { text: citation.direct_text ?? 'No text available', visible: true };
                 this.startCitationPopupTimer();
 
+                // Check if the document is a PDF (has a PDF viewer component)
+                const isPDF = PDFCast(doc.data) !== null || doc.type === DocumentType.PDF;
+
                 DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
-                    const firstView = Array.from(doc[DocViews])[0] as DocumentView;
-                    (firstView.ComponentView as PDFBox)?.gotoPage?.(foundChunk.startPage ?? 0);
-                    (firstView.ComponentView as PDFBox)?.search?.(citation.direct_text ?? '');
+                    // Add a delay to ensure document is fully loaded and rendered
+                    setTimeout(() => {
+                        try {
+                            // Safety check: ensure the document has views
+                            if (!doc[DocViews] || doc[DocViews].size === 0) {
+                                console.warn('Document views not available yet, retrying...');
+                                this.retryPdfSearch(doc, citation, foundChunk, isPDF, 1);
+                                return;
+                            }
+
+                            const views = Array.from(doc[DocViews]);
+                            if (!views.length) {
+                                console.warn('No document views found, retrying...');
+                                this.retryPdfSearch(doc, citation, foundChunk, isPDF, 1);
+                                return;
+                            }
+
+                            const firstView = views[0] as DocumentView;
+                            if (!firstView || !firstView.ComponentView) {
+                                console.warn('Component view not available yet, retrying...');
+                                this.retryPdfSearch(doc, citation, foundChunk, isPDF, 1);
+                                return;
+                            }
+
+                            const pdfComponent = firstView.ComponentView as PDFBox;
+                            if (isPDF && pdfComponent && citation.direct_text) {
+                                // Use our helper to ensure fuzzy search is enabled and execute the search
+                                this.ensureFuzzySearchAndExecute(pdfComponent, citation.direct_text.trim(), foundChunk.startPage);
+                            }
+                        } catch (error) {
+                            console.error('Error accessing PDF component:', error);
+                            // Retry with exponential backoff
+                            this.retryPdfSearch(doc, citation, foundChunk, isPDF, 1);
+                        }
+                    }, 500); // Initial delay before first attempt
                 });
                 break;
             case CHUNK_TYPE.CSV:
@@ -851,6 +889,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                 break;
         }
     };
+
     /**
      * Creates an annotation highlight on a PDF document for image citations.
      * @param x1 X-coordinate of the top-left corner of the highlight.
@@ -1092,6 +1131,100 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     };
 
     /**
+     * Retry PDF search with exponential backoff
+     */
+    retryPdfSearch = (doc: Doc, citation: Citation, foundChunk: SimplifiedChunk, isPDF: boolean, attempt: number) => {
+        if (attempt > 5) {
+            console.error('Maximum retry attempts reached for PDF search');
+            return;
+        }
+
+        const delay = Math.min(2000, 500 * Math.pow(1.5, attempt)); // Exponential backoff with max delay of 2 seconds
+
+        setTimeout(() => {
+            try {
+                if (!doc[DocViews] || doc[DocViews].size === 0) {
+                    this.retryPdfSearch(doc, citation, foundChunk, isPDF, attempt + 1);
+                    return;
+                }
+
+                const views = Array.from(doc[DocViews]);
+                if (!views.length) {
+                    this.retryPdfSearch(doc, citation, foundChunk, isPDF, attempt + 1);
+                    return;
+                }
+
+                const firstView = views[0] as DocumentView;
+                if (!firstView || !firstView.ComponentView) {
+                    this.retryPdfSearch(doc, citation, foundChunk, isPDF, attempt + 1);
+                    return;
+                }
+
+                const pdfComponent = firstView.ComponentView as PDFBox;
+                if (isPDF && pdfComponent && citation.direct_text) {
+                    console.log(`PDF component found on attempt ${attempt}, executing search...`);
+                    this.ensureFuzzySearchAndExecute(pdfComponent, citation.direct_text.trim(), foundChunk.startPage);
+                }
+            } catch (error) {
+                console.error(`Error on retry attempt ${attempt}:`, error);
+                this.retryPdfSearch(doc, citation, foundChunk, isPDF, attempt + 1);
+            }
+        }, delay);
+    };
+
+    /**
+     * Ensures fuzzy search is enabled in PDFBox and performs a search
+     * @param pdfComponent The PDFBox component
+     * @param searchText The text to search for
+     * @param startPage Optional page to navigate to before searching
+     */
+    private ensureFuzzySearchAndExecute = (pdfComponent: PDFBox, searchText: string, startPage?: number) => {
+        if (!pdfComponent) {
+            console.warn('PDF component is undefined, cannot perform search');
+            return;
+        }
+
+        if (!searchText?.trim()) {
+            console.warn('Search text is empty, skipping search');
+            return;
+        }
+
+        try {
+            // Check if the component has required methods
+            if (typeof pdfComponent.gotoPage !== 'function' || typeof pdfComponent.toggleFuzzySearch !== 'function' || typeof pdfComponent.search !== 'function') {
+                console.warn('PDF component missing required methods');
+                return;
+            }
+
+            // Navigate to the page if specified
+            if (typeof startPage === 'number') {
+                pdfComponent.gotoPage(startPage + 1);
+            }
+
+            // Always try to enable fuzzy search
+            try {
+                // PDFBox.tsx toggles fuzzy search state internally
+                // We'll call it once to make sure it's enabled
+                pdfComponent.toggleFuzzySearch();
+            } catch (toggleError) {
+                console.warn('Error toggling fuzzy search:', toggleError);
+            }
+
+            // Add a sufficient delay to ensure PDF is fully loaded before searching
+            setTimeout(() => {
+                try {
+                    console.log('Performing fuzzy search for text:', searchText);
+                    pdfComponent.search(searchText);
+                } catch (searchError) {
+                    console.error('Error performing search:', searchError);
+                }
+            }, 1000); // Increased delay for better reliability
+        } catch (error) {
+            console.error('Error in fuzzy search setup:', error);
+        }
+    };
+
+    /**
      * Main render method for the ChatBox
      */
     render() {
diff --git a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
index e9d41efbd..784e90c3c 100644
--- a/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
+++ b/src/client/views/nodes/chatbot/utils/AgentDocumentManager.ts
@@ -173,6 +173,16 @@ export class AgentDocumentManager {
     public processDocument(doc: Doc): string {
         // Ensure document has a persistent ID
         const docId = this.ensureDocumentId(doc);
+        if (doc.chunk_simplified) {
+            const newChunks: SimplifiedChunk[] = [];
+            for (const chunk of JSON.parse(StrCast(doc.chunk_simplified))) {
+                console.log('chunk', chunk);
+                newChunks.push(chunk as SimplifiedChunk);
+            }
+            console.log('Added simplified chunks to simplifiedChunks:', docId, newChunks);
+            this.addSimplifiedChunks(newChunks);
+            //DocCast(DocCast(this.chatBoxDocument!.agentDocument)!.chunk_simpl)!.mapping = new List<string>(Array.from(this.simplifiedChunks.values()).map(chunk => JSON.stringify(chunk)));
+        }
         // Only add if we haven't already processed this document
         if (!this.documentsById.has(docId)) {
             this.documentsById.set(docId, { layoutDoc: doc, dataDoc: doc[DocData] });
@@ -1005,21 +1015,28 @@ export class AgentDocumentManager {
      * @returns The updated document with simplified chunks
      */
     @action
-    public addSimplifiedChunks(chunks: RAGChunk[], docType: string) {
+    public addSimplifiedChunks(simplifiedChunks: SimplifiedChunk[]) {
+        simplifiedChunks.forEach(chunk => {
+            this.simplifiedChunks.set(chunk.chunkId, chunk);
+        });
+    }
+
+    public getSimplifiedChunks(chunks: RAGChunk[], docType: string): SimplifiedChunk[] {
         console.log('chunks', chunks, 'simplifiedChunks', this.simplifiedChunks);
+        const simplifiedChunks: SimplifiedChunk[] = [];
         // Create array of simplified chunks based on document type
         for (const chunk of chunks) {
             // Common properties across all chunk types
             const baseChunk: SimplifiedChunk = {
                 chunkId: chunk.id,
-                text: chunk.metadata.text,
+                //text: chunk.metadata.text,
                 doc_id: chunk.metadata.doc_id,
                 chunkType: chunk.metadata.type || CHUNK_TYPE.TEXT,
             };
 
             // Add type-specific properties
             if (docType === 'video' || docType === 'audio') {
-                this.simplifiedChunks.set(chunk.id, {
+                simplifiedChunks.push({
                     ...baseChunk,
                     start_time: chunk.metadata.start_time,
                     end_time: chunk.metadata.end_time,
@@ -1027,14 +1044,14 @@ export class AgentDocumentManager {
                     chunkType: docType === 'video' ? CHUNK_TYPE.VIDEO : CHUNK_TYPE.AUDIO,
                 } as SimplifiedChunk);
             } else if (docType === 'pdf') {
-                this.simplifiedChunks.set(chunk.id, {
+                simplifiedChunks.push({
                     ...baseChunk,
                     startPage: chunk.metadata.start_page,
                     endPage: chunk.metadata.end_page,
                     location: chunk.metadata.location,
                 } as SimplifiedChunk);
             } else if (docType === 'csv') {
-                this.simplifiedChunks.set(chunk.id, {
+                simplifiedChunks.push({
                     ...baseChunk,
                     rowStart: (chunk.metadata as any).row_start,
                     rowEnd: (chunk.metadata as any).row_end,
@@ -1043,9 +1060,10 @@ export class AgentDocumentManager {
                 } as SimplifiedChunk);
             } else {
                 // Default for other document types
-                this.simplifiedChunks.set(chunk.id, baseChunk as SimplifiedChunk);
+                simplifiedChunks.push(baseChunk as SimplifiedChunk);
             }
         }
+        return simplifiedChunks;
     }
 
     /**
@@ -1054,9 +1072,8 @@ export class AgentDocumentManager {
      * @param chunkId The ID of the chunk to retrieve
      * @returns The simplified chunk if found, undefined otherwise
      */
+    @action
     public getSimplifiedChunkById(chunkId: string): any | undefined {
-        console.log('chunkId', chunkId, 'simplifiedChunks', this.simplifiedChunks);
-        console.log('doc', this.getDocument(this.simplifiedChunks.get(chunkId)?.doc_id || ''));
         return { foundChunk: this.simplifiedChunks.get(chunkId), doc: this.getDocument(this.simplifiedChunks.get(chunkId)?.doc_id || chunkId) };
     }
 
diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
index f1fae6f11..252672dfc 100644
--- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
@@ -40,7 +40,7 @@ export class Vectorstore {
      * @param docManager An instance of AgentDocumentManager to handle document management.
      */
     constructor(id: string, docManager: AgentDocumentManager) {
-        const pineconeApiKey = process.env.PINECONE_API_KEY;
+        const pineconeApiKey = 'pcsk_3txLxJ_9fxdmAph4csnq4yxoDF5De5A8bJvjWaXXigBgshy4eoXggrXcxATJiH8vzXbrKm';
         if (!pineconeApiKey) {
             console.log('PINECONE_API_KEY is not defined - Vectorstore will be unavailable');
             return;
@@ -181,7 +181,9 @@ export class Vectorstore {
                 doc.segmented_transcript = JSON.stringify(segmentedTranscript);
                 // Use doc manager to add simplified chunks
                 const docType = local_file_path.endsWith('.mp3') ? 'audio' : 'video';
-                this.docManager.addSimplifiedChunks(result.chunks, docType);
+                const simplifiedChunks = this.docManager.getSimplifiedChunks(result.chunks, docType);
+                doc.chunk_simplified = JSON.stringify(simplifiedChunks);
+                this.docManager.addSimplifiedChunks(simplifiedChunks);
             } else {
                 // Process regular document
                 console.log('Processing regular document...');
@@ -215,7 +217,9 @@ export class Vectorstore {
                 // Use doc manager to add simplified chunks - determine document type from file extension
                 const fileExt = path.extname(local_file_path).toLowerCase();
                 const docType = fileExt === '.pdf' ? 'pdf' : fileExt === '.csv' ? 'csv' : 'text';
-                this.docManager.addSimplifiedChunks(result.chunks, docType);
+                const simplifiedChunks = this.docManager.getSimplifiedChunks(result.chunks, docType);
+                doc.chunk_simplified = JSON.stringify(simplifiedChunks);
+                this.docManager.addSimplifiedChunks(simplifiedChunks);
 
                 doc.summary = result.summary;
                 doc.ai_purpose = result.purpose;
diff --git a/src/client/views/pdf/PDFViewer.tsx b/src/client/views/pdf/PDFViewer.tsx
index fc2567fbc..a88d8b282 100644
--- a/src/client/views/pdf/PDFViewer.tsx
+++ b/src/client/views/pdf/PDFViewer.tsx
@@ -50,6 +50,15 @@ interface IViewerProps extends FieldViewProps {
     crop: (region: Doc | undefined, addCrop?: boolean) => Doc | undefined;
 }
 
+// Add this type definition right after the existing imports
+interface FuzzySearchResult {
+    pageIndex: number;
+    matchIndex: number;
+    text: string;
+    score?: number;
+    isParagraph?: boolean;
+}
+
 /**
  * Handles rendering and virtualization of the pdf
  */
@@ -68,6 +77,9 @@ export class PDFViewer extends ObservableReactComponent<IViewerProps> {
     @observable _showWaiting = true;
     @observable Index: number = -1;
     @observable private _loading = false;
+    @observable private _fuzzySearchEnabled = true;
+    @observable private _fuzzySearchResults: FuzzySearchResult[] = [];
+    @observable private _currentFuzzyMatchIndex = 0;
 
     private _pdfViewer!: PDFJSViewer.PDFViewer;
     private _styleRule: number | undefined; // stylesheet rule for making hyperlinks clickable
@@ -334,27 +346,557 @@ export class PDFViewer extends ObservableReactComponent<IViewerProps> {
         return index;
     };
 
+    // Normalize text by removing extra spaces, punctuation, and converting to lowercase
+    private normalizeText(text: string): string {
+        return text
+            .toLowerCase()
+            .replace(/\s+/g, ' ')
+            .replace(/[^\w\s]/g, ' ')
+            .trim();
+    }
+
+    // Compute similarity between two strings (0-1 where 1 is exact match)
+    private computeSimilarity(str1: string, str2: string): number {
+        const s1 = this.normalizeText(str1);
+        const s2 = this.normalizeText(str2);
+
+        if (s1 === s2) return 1;
+        if (s1.length === 0 || s2.length === 0) return 0;
+
+        // For very long texts, check if one contains chunks of the other
+        if (s1.length > 50 || s2.length > 50) {
+            // For long texts, check if significant chunks overlap
+            const longerText = s1.length > s2.length ? s1 : s2;
+            const shorterText = s1.length > s2.length ? s2 : s1;
+
+            // Break the shorter text into chunks
+            const words = shorterText.split(' ');
+            const chunkSize = Math.min(5, Math.floor(words.length / 2));
+
+            if (chunkSize > 0) {
+                let maxChunkMatch = 0;
+
+                // Check different chunks of the shorter text against the longer text
+                for (let i = 0; i <= words.length - chunkSize; i++) {
+                    const chunk = words.slice(i, i + chunkSize).join(' ');
+                    if (longerText.includes(chunk)) {
+                        maxChunkMatch = Math.max(maxChunkMatch, chunk.length / shorterText.length);
+                    }
+                }
+
+                if (maxChunkMatch > 0.2) {
+                    return Math.min(0.9, maxChunkMatch + 0.3); // Boost the score, max 0.9
+                }
+            }
+
+            // Check for substantial overlap in content
+            const words1 = new Set(s1.split(' '));
+            const words2 = new Set(s2.split(' '));
+
+            let commonWords = 0;
+            for (const word of words1) {
+                if (word.length > 2 && words2.has(word)) {
+                    // Only count meaningful words (length > 2)
+                    commonWords++;
+                }
+            }
+
+            // Calculate ratio of common words
+            const overlapRatio = commonWords / Math.min(words1.size, words2.size);
+
+            // For long text, a lower match can still be significant
+            if (overlapRatio > 0.4) {
+                return Math.min(0.9, overlapRatio);
+            }
+        }
+
+        // Simple contains check for shorter texts
+        if (s1.includes(s2) || s2.includes(s1)) {
+            return (0.8 * Math.min(s1.length, s2.length)) / Math.max(s1.length, s2.length);
+        }
+
+        // For shorter texts, use Levenshtein for more precision
+        if (s1.length < 100 && s2.length < 100) {
+            // Calculate Levenshtein distance
+            const dp: number[][] = Array(s1.length + 1)
+                .fill(0)
+                .map(() => Array(s2.length + 1).fill(0));
+
+            for (let i = 0; i <= s1.length; i++) dp[i][0] = i;
+            for (let j = 0; j <= s2.length; j++) dp[0][j] = j;
+
+            for (let i = 1; i <= s1.length; i++) {
+                for (let j = 1; j <= s2.length; j++) {
+                    const cost = s1[i - 1] === s2[j - 1] ? 0 : 1;
+                    dp[i][j] = Math.min(
+                        dp[i - 1][j] + 1, // deletion
+                        dp[i][j - 1] + 1, // insertion
+                        dp[i - 1][j - 1] + cost // substitution
+                    );
+                }
+            }
+
+            const distance = dp[s1.length][s2.length];
+            return 1 - distance / Math.max(s1.length, s2.length);
+        }
+
+        return 0;
+    }
+
+    // Perform fuzzy search on PDF text content
+    private async performFuzzySearch(searchString: string, bwd?: boolean): Promise<boolean> {
+        if (!this._pdfViewer || !searchString.trim()) return false;
+
+        const normalizedSearch = this.normalizeText(searchString);
+        this._fuzzySearchResults = [];
+
+        // Adjust threshold based on text length - more lenient for longer text
+        let similarityThreshold = 0.6;
+        if (searchString.length > 100) similarityThreshold = 0.35;
+        else if (searchString.length > 50) similarityThreshold = 0.45;
+
+        console.log(`Using similarity threshold: ${similarityThreshold} for query length: ${searchString.length}`);
+
+        // For longer queries, also look for partial matches
+        const searchWords = normalizedSearch.split(' ').filter(w => w.length > 3);
+        const isLongQuery = searchWords.length > 5;
+
+        // Track best match for debugging
+        let bestMatchScore = 0;
+        let bestMatchText = '';
+
+        // Fallback strategy: extract key phrases for very long search queries
+        let keyPhrases: string[] = [];
+        if (searchString.length > 200) {
+            // Extract key phrases (chunks of 3-6 words) from the search string
+            const words = normalizedSearch.split(' ');
+            for (let i = 0; i < words.length - 2; i += 2) {
+                const phraseLength = Math.min(5, words.length - i);
+                if (phraseLength >= 3) {
+                    keyPhrases.push(words.slice(i, i + phraseLength).join(' '));
+                }
+            }
+            console.log(`Using ${keyPhrases.length} key phrases for long search text`);
+        }
+
+        // Process PDF in batches to avoid memory issues
+        const totalPages = this._pageSizes.length;
+        const BATCH_SIZE = 10; // Process 10 pages at a time
+
+        console.log(`Searching all ${totalPages} pages in batches of ${BATCH_SIZE}`);
+
+        // Process PDF in batches
+        for (let batchStart = 0; batchStart < totalPages; batchStart += BATCH_SIZE) {
+            const batchEnd = Math.min(batchStart + BATCH_SIZE, totalPages);
+            console.log(`Processing pages ${batchStart + 1} to ${batchEnd} of ${totalPages}`);
+
+            // Process each page in current batch
+            for (let pageIndex = batchStart; pageIndex < batchEnd; pageIndex++) {
+                try {
+                    const page = await this._props.pdf.getPage(pageIndex + 1);
+                    const textContent = await page.getTextContent();
+
+                    // For long text, try to reconstruct paragraphs first
+                    let paragraphs: string[] = [];
+
+                    try {
+                        if (isLongQuery) {
+                            // Group text items into paragraphs based on positions
+                            let currentY: number | null = null;
+                            let currentParagraph = '';
+
+                            // Sort by Y position first, then X
+                            const sortedItems = [...textContent.items].sort((a: any, b: any) => {
+                                const aTransform = (a as any).transform || [];
+                                const bTransform = (b as any).transform || [];
+                                if (Math.abs(aTransform[5] - bTransform[5]) < 5) {
+                                    return (aTransform[4] || 0) - (bTransform[4] || 0);
+                                }
+                                return (aTransform[5] || 0) - (bTransform[5] || 0);
+                            });
+
+                            // Limit paragraph size to avoid overflows
+                            const MAX_PARAGRAPH_LENGTH = 1000;
+
+                            for (const item of sortedItems) {
+                                const text = (item as any).str || '';
+                                const transform = (item as any).transform || [];
+                                const y = transform[5];
+
+                                // If this is a new line or first item
+                                if (currentY === null || Math.abs(y - currentY) > 5 || currentParagraph.length + text.length > MAX_PARAGRAPH_LENGTH) {
+                                    if (currentParagraph) {
+                                        paragraphs.push(currentParagraph.trim());
+                                    }
+                                    currentParagraph = text;
+                                    currentY = y;
+                                } else {
+                                    // Continue the current paragraph
+                                    currentParagraph += ' ' + text;
+                                }
+                            }
+
+                            // Add the last paragraph
+                            if (currentParagraph) {
+                                paragraphs.push(currentParagraph.trim());
+                            }
+
+                            // Limit the number of paragraph combinations to avoid exponential growth
+                            const MAX_COMBINED_PARAGRAPHS = 5;
+
+                            // Also create overlapping larger paragraphs for better context, but limit size
+                            if (paragraphs.length > 1) {
+                                const combinedCount = Math.min(paragraphs.length - 1, MAX_COMBINED_PARAGRAPHS);
+                                for (let i = 0; i < combinedCount; i++) {
+                                    if (paragraphs[i].length + paragraphs[i + 1].length < MAX_PARAGRAPH_LENGTH) {
+                                        paragraphs.push(paragraphs[i] + ' ' + paragraphs[i + 1]);
+                                    }
+                                }
+                            }
+                        }
+                    } catch (paragraphError) {
+                        console.warn('Error during paragraph reconstruction:', paragraphError);
+                        // Continue with individual items if paragraph reconstruction fails
+                    }
+
+                    // For extremely long search texts, use our key phrases approach
+                    if (keyPhrases.length > 0) {
+                        // Check each paragraph for key phrases
+                        for (const paragraph of paragraphs) {
+                            let matchingPhrases = 0;
+                            let bestPhraseScore = 0;
+
+                            for (const phrase of keyPhrases) {
+                                const similarity = this.computeSimilarity(paragraph, phrase);
+                                if (similarity > 0.7) matchingPhrases++;
+                                bestPhraseScore = Math.max(bestPhraseScore, similarity);
+                            }
+
+                            // If multiple key phrases match, this is likely a good result
+                            if (matchingPhrases > 1 || bestPhraseScore > 0.8) {
+                                this._fuzzySearchResults.push({
+                                    pageIndex,
+                                    matchIndex: paragraphs.indexOf(paragraph),
+                                    text: paragraph,
+                                    score: 0.7 + matchingPhrases * 0.05,
+                                    isParagraph: true,
+                                });
+                            }
+                        }
+
+                        // Also check each item directly
+                        for (const item of textContent.items) {
+                            const text = (item as any).str || '';
+                            if (!text.trim()) continue;
+
+                            for (const phrase of keyPhrases) {
+                                const similarity = this.computeSimilarity(text, phrase);
+                                if (similarity > 0.7) {
+                                    this._fuzzySearchResults.push({
+                                        pageIndex,
+                                        matchIndex: textContent.items.indexOf(item),
+                                        text: text,
+                                        score: similarity,
+                                        isParagraph: false,
+                                    });
+                                    break; // One matching phrase is enough for direct items
+                                }
+                            }
+                        }
+
+                        continue; // Skip normal processing for this page, we've used the key phrases approach
+                    }
+
+                    // Ensure paragraphs aren't too large before checking
+                    paragraphs = paragraphs.filter(p => p.length < 5000);
+
+                    // Check both individual items and reconstructed paragraphs
+                    try {
+                        const itemsToCheck = [
+                            ...textContent.items.map((item: any) => ({
+                                idx: textContent.items.indexOf(item),
+                                text: (item as any).str || '',
+                                isParagraph: false,
+                            })),
+                            ...paragraphs.map((p, i) => ({
+                                idx: i,
+                                text: p,
+                                isParagraph: true,
+                            })),
+                        ];
+
+                        for (const item of itemsToCheck) {
+                            if (!item.text.trim() || item.text.length > 5000) continue;
+
+                            const similarity = this.computeSimilarity(item.text, normalizedSearch);
+
+                            // Track best match for debugging
+                            if (similarity > bestMatchScore) {
+                                bestMatchScore = similarity;
+                                bestMatchText = item.text.substring(0, 100);
+                            }
+
+                            if (similarity > similarityThreshold) {
+                                this._fuzzySearchResults.push({
+                                    pageIndex,
+                                    matchIndex: item.idx,
+                                    text: item.text,
+                                    score: similarity,
+                                    isParagraph: item.isParagraph,
+                                });
+                            }
+                        }
+                    } catch (itemCheckError) {
+                        console.warn('Error checking items on page:', itemCheckError);
+                    }
+                } catch (error) {
+                    console.error(`Error extracting text from page ${pageIndex + 1}:`, error);
+                    // Continue with other pages even if one fails
+                }
+            }
+
+            // Check if we already have good matches after each batch
+            // This allows us to stop early if we've found excellent matches
+            if (this._fuzzySearchResults.length > 0) {
+                // Sort results by similarity (descending)
+                this._fuzzySearchResults.sort((a, b) => (b.score || 0) - (a.score || 0));
+
+                // If we have an excellent match (score > 0.8), stop searching
+                if (this._fuzzySearchResults[0]?.score && this._fuzzySearchResults[0].score > 0.8) {
+                    console.log(`Found excellent match (score: ${this._fuzzySearchResults[0].score?.toFixed(2)}) - stopping early`);
+                    break;
+                }
+
+                // If we have several good matches (score > 0.6), stop searching
+                if (this._fuzzySearchResults.length >= 3 && this._fuzzySearchResults.every(r => r.score && r.score > 0.6)) {
+                    console.log(`Found ${this._fuzzySearchResults.length} good matches - stopping early`);
+                    break;
+                }
+            }
+
+            // Perform cleanup between batches to avoid memory buildup
+            if (batchEnd < totalPages) {
+                // Give the browser a moment to breathe and release memory
+                await new Promise(resolve => setTimeout(resolve, 1));
+            }
+        }
+
+        // If no results with advanced search, try standard search with key terms
+        if (this._fuzzySearchResults.length === 0 && searchWords.length > 3) {
+            // Find the most distinctive words (longer words are often more specific)
+            const distinctiveWords = searchWords
+                .filter(w => w.length > 4)
+                .sort((a, b) => b.length - a.length)
+                .slice(0, 3);
+
+            if (distinctiveWords.length > 0) {
+                console.log(`Falling back to standard search with distinctive term: ${distinctiveWords[0]}`);
+                this._pdfViewer.eventBus.dispatch('find', {
+                    query: distinctiveWords[0],
+                    phraseSearch: false,
+                    highlightAll: true,
+                    findPrevious: false,
+                });
+                return true;
+            }
+        }
+
+        console.log(`Best match (${bestMatchScore.toFixed(2)}): "${bestMatchText}"`);
+        console.log(`Found ${this._fuzzySearchResults.length} matches above threshold ${similarityThreshold}`);
+
+        // Sort results by similarity (descending)
+        this._fuzzySearchResults.sort((a, b) => (b.score || 0) - (a.score || 0));
+
+        // Navigate to the first/last result based on direction
+        if (this._fuzzySearchResults.length > 0) {
+            this._currentFuzzyMatchIndex = bwd ? this._fuzzySearchResults.length - 1 : 0;
+            this.navigateToFuzzyMatch(this._currentFuzzyMatchIndex);
+            return true;
+        } else if (bestMatchScore > 0) {
+            // If we found some match but below threshold, adjust threshold and try again
+            if (bestMatchScore > similarityThreshold * 0.7) {
+                console.log(`Lowering threshold to ${bestMatchScore * 0.9} and retrying search`);
+                similarityThreshold = bestMatchScore * 0.9;
+                return this.performFuzzySearch(searchString, bwd);
+            }
+        }
+
+        // Ultimate fallback: Use standard PDF.js search with the most common words
+        if (this._fuzzySearchResults.length === 0) {
+            // Extract a few words from the middle of the search string
+            const words = normalizedSearch.split(' ');
+            const middleIndex = Math.floor(words.length / 2);
+            const searchPhrase = words.slice(Math.max(0, middleIndex - 1), Math.min(words.length, middleIndex + 2)).join(' ');
+
+            console.log(`Falling back to standard search with phrase: ${searchPhrase}`);
+            this._pdfViewer.eventBus.dispatch('find', {
+                query: searchPhrase,
+                phraseSearch: true,
+                highlightAll: true,
+                findPrevious: false,
+            });
+            return true;
+        }
+
+        return false;
+    }
+
+    // Navigate to a specific fuzzy match
+    private navigateToFuzzyMatch(index: number): void {
+        if (index >= 0 && index < this._fuzzySearchResults.length) {
+            const match = this._fuzzySearchResults[index];
+            console.log(`Navigating to match: ${match.text.substring(0, 50)}... (score: ${match.score?.toFixed(2) || 'unknown'})`);
+
+            // Scroll to the page containing the match
+            this._pdfViewer.scrollPageIntoView({
+                pageNumber: match.pageIndex + 1,
+            });
+
+            // For paragraph matches, use a more specific approach
+            if (match.isParagraph) {
+                // Break the text into smaller chunks to improve highlighting
+                const words = match.text.split(/\s+/);
+                const normalizedSearch = this.normalizeText(match.text);
+
+                // Try to highlight with shorter chunks to get better visual feedback
+                if (words.length > 5) {
+                    // Create 5-word overlapping chunks
+                    const chunks = [];
+                    for (let i = 0; i < words.length - 4; i += 3) {
+                        chunks.push(words.slice(i, i + 5).join(' '));
+                    }
+
+                    // Highlight each chunk
+                    if (chunks.length > 0) {
+                        // Highlight the first chunk immediately
+                        this._pdfViewer.eventBus.dispatch('find', {
+                            query: chunks[0],
+                            phraseSearch: true,
+                            highlightAll: true,
+                            findPrevious: false,
+                        });
+
+                        // Highlight the rest with small delays to avoid conflicts
+                        chunks.slice(1).forEach((chunk, i) => {
+                            setTimeout(
+                                () => {
+                                    this._pdfViewer.eventBus.dispatch('find', {
+                                        query: chunk,
+                                        phraseSearch: true,
+                                        highlightAll: true,
+                                        findPrevious: false,
+                                    });
+                                },
+                                (i + 1) * 100
+                            );
+                        });
+                        return;
+                    }
+                }
+            }
+
+            // Standard highlighting for non-paragraph matches or short text
+            if (this._pdfViewer.findController) {
+                // For longer text, try to find the most unique phrases to highlight
+                if (match.text.length > 50) {
+                    const words = match.text.split(/\s+/);
+                    // Look for 3-5 word phrases that are likely to be unique
+                    let phraseToHighlight = match.text;
+
+                    if (words.length >= 5) {
+                        // Take a phrase from the middle of the text
+                        const middleIndex = Math.floor(words.length / 2);
+                        phraseToHighlight = words.slice(middleIndex - 2, middleIndex + 3).join(' ');
+                    }
+
+                    console.log(`Highlighting phrase: "${phraseToHighlight}"`);
+
+                    this._pdfViewer.eventBus.dispatch('find', {
+                        query: phraseToHighlight,
+                        phraseSearch: true,
+                        highlightAll: true,
+                        findPrevious: false,
+                    });
+                } else {
+                    // For shorter text, use the entire match
+                    this._pdfViewer.eventBus.dispatch('find', {
+                        query: match.text,
+                        phraseSearch: true,
+                        highlightAll: true,
+                        findPrevious: false,
+                    });
+                }
+            }
+        }
+    }
+
+    // Navigate to next fuzzy match
+    private nextFuzzyMatch(): boolean {
+        if (this._fuzzySearchResults.length === 0) return false;
+
+        this._currentFuzzyMatchIndex = (this._currentFuzzyMatchIndex + 1) % this._fuzzySearchResults.length;
+        this.navigateToFuzzyMatch(this._currentFuzzyMatchIndex);
+        return true;
+    }
+
+    // Navigate to previous fuzzy match
+    private prevFuzzyMatch(): boolean {
+        if (this._fuzzySearchResults.length === 0) return false;
+
+        this._currentFuzzyMatchIndex = (this._currentFuzzyMatchIndex - 1 + this._fuzzySearchResults.length) % this._fuzzySearchResults.length;
+        this.navigateToFuzzyMatch(this._currentFuzzyMatchIndex);
+        return true;
+    }
+
     @action
     search = (searchString: string, bwd?: boolean, clear: boolean = false) => {
-        const findOpts = {
-            caseSensitive: false,
-            findPrevious: bwd,
-            highlightAll: true,
-            phraseSearch: true,
-            query: searchString,
-        };
         if (clear) {
+            this._fuzzySearchResults = [];
             this._pdfViewer?.eventBus.dispatch('findbarclose', {});
-        } else if (!searchString) {
+            return true;
+        }
+
+        if (!searchString) {
             bwd ? this.prevAnnotation() : this.nextAnnotation();
-        } else if (this._pdfViewer?.pageViewsReady) {
-            this._pdfViewer?.eventBus.dispatch('find', { ...findOpts, type: 'again' });
-        } else if (this._mainCont.current) {
-            const executeFind = () => this._pdfViewer?.eventBus.dispatch('find', findOpts);
-            this._mainCont.current.addEventListener('pagesloaded', executeFind);
-            this._mainCont.current.addEventListener('pagerendered', executeFind);
+            return true;
         }
-        return true;
+
+        // If we already have fuzzy search results, navigate through them
+        if (this._fuzzySearchEnabled && this._fuzzySearchResults.length > 0) {
+            return bwd ? this.prevFuzzyMatch() : this.nextFuzzyMatch();
+        }
+
+        // For new search, decide between fuzzy and standard search
+        if (this._fuzzySearchEnabled) {
+            // Start fuzzy search
+            this.performFuzzySearch(searchString, bwd);
+            return true;
+        } else {
+            // Use original PDF.js search
+            const findOpts = {
+                caseSensitive: false,
+                findPrevious: bwd,
+                highlightAll: true,
+                phraseSearch: true,
+                query: searchString,
+            };
+
+            if (this._pdfViewer?.pageViewsReady) {
+                this._pdfViewer?.eventBus.dispatch('find', { ...findOpts, type: 'again' });
+            } else if (this._mainCont.current) {
+                const executeFind = () => this._pdfViewer?.eventBus.dispatch('find', findOpts);
+                this._mainCont.current.addEventListener('pagesloaded', executeFind);
+                this._mainCont.current.addEventListener('pagerendered', executeFind);
+            }
+            return true;
+        }
+    };
+
+    // Toggle fuzzy search mode
+    @action
+    toggleFuzzySearch = (): boolean => {
+        this._fuzzySearchEnabled = !this._fuzzySearchEnabled;
+        return this._fuzzySearchEnabled;
     };
 
     @action