From 0db4583914e43e6efdba3e86a614a19956e73b5e Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Sat, 10 May 2025 20:30:24 -0400 Subject: feat: changed web document to display screenshot --- src/server/ApiManagers/AssistantManager.ts | 229 +++++++++++++++++++++++++++-- 1 file changed, 215 insertions(+), 14 deletions(-) (limited to 'src/server/ApiManagers') diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index af25722a4..6d2779163 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -485,36 +485,69 @@ export default class AssistantManager extends ApiManager { subscription: '/scrapeWebsite', secureHandler: async ({ req, res }) => { const { url } = req.body; + let browser = null; try { + // Set a longer timeout for slow-loading pages + const navigationTimeout = 60000; // 60 seconds + // Launch Puppeteer browser to navigate to the webpage - const browser = await puppeteer.launch({ - args: ['--no-sandbox', '--disable-setuid-sandbox'], + browser = await puppeteer.launch({ + args: ['--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage'], }); const page = await browser.newPage(); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'); - await page.goto(url, { waitUntil: 'networkidle2' }); + + // Set timeout for navigation + page.setDefaultNavigationTimeout(navigationTimeout); + + // Navigate with timeout and wait for content to load + await page.goto(url, { + waitUntil: 'networkidle2', + timeout: navigationTimeout, + }); + + // Wait a bit longer to ensure dynamic content loads + await new Promise(resolve => setTimeout(resolve, 2000)); // Extract HTML content const htmlContent = await page.content(); await browser.close(); + browser = null; - // Parse HTML content using JSDOM - const dom = new JSDOM(htmlContent, { url }); + // Use a try-catch block specifically for JSDOM parsing + try { + // Parse HTML content using JSDOM + const dom = new JSDOM(htmlContent, { url }); - // Extract readable content using Mozilla's Readability API - const reader = new Readability(dom.window.document); - const article = reader.parse(); + // Extract readable content using Mozilla's Readability API + const reader = new Readability(dom.window.document); + const article = reader.parse(); - if (article) { - const plainText = article.textContent; - res.send({ website_plain_text: plainText }); - } else { - res.status(500).send({ error: 'Failed to extract readable content' }); + if (article) { + const plainText = article.textContent; + res.send({ website_plain_text: plainText }); + } else { + // If Readability fails, fallback to extracting main content + const mainContent = await extractMainContent(htmlContent); + res.send({ website_plain_text: mainContent }); + } + } catch (parsingError) { + console.error('Error parsing website content:', parsingError); + + // Fallback to a simplified extraction method + const mainContent = await extractMainContent(htmlContent); + res.send({ website_plain_text: mainContent }); } } catch (error) { console.error('Error scraping website:', error); + + // Clean up browser if still open + if (browser) { + await browser.close().catch(e => console.error('Error closing browser:', e)); + } + res.status(500).send({ - error: 'Failed to scrape website', + error: 'Failed to scrape website: ' + ((error as Error).message || 'Unknown error'), }); } }, @@ -687,6 +720,127 @@ export default class AssistantManager extends ApiManager { } }, }); + + // Register an API route to capture a screenshot of a webpage using Puppeteer + // and return the image URL for display in the WebBox component + register({ + method: Method.POST, + subscription: '/captureWebScreenshot', + secureHandler: async ({ req, res }) => { + const { url, width, height, fullPage } = req.body; + + if (!url) { + res.status(400).send({ error: 'URL is required' }); + return; + } + + let browser = null; + try { + // Increase timeout for websites that load slowly + const navigationTimeout = 60000; // 60 seconds + + // Launch a headless browser with additional options to improve stability + browser = await puppeteer.launch({ + headless: true, // Use headless mode + args: [ + '--no-sandbox', + '--disable-setuid-sandbox', + '--disable-dev-shm-usage', + '--disable-accelerated-2d-canvas', + '--disable-gpu', + '--window-size=1200,800', + '--disable-web-security', // Helps with cross-origin issues + '--disable-features=IsolateOrigins,site-per-process', // Helps with frames + ], + timeout: navigationTimeout, + }); + + const page = await browser.newPage(); + + // Set a larger viewport to capture more content + await page.setViewport({ + width: Number(width) || 1200, + height: Number(height) || 800, + deviceScaleFactor: 1, + }); + + // Enable request interception to speed up page loading + await page.setRequestInterception(true); + page.on('request', request => { + // Skip unnecessary resources to speed up loading + const resourceType = request.resourceType(); + if (resourceType === 'font' || resourceType === 'media' || resourceType === 'websocket' || request.url().includes('analytics') || request.url().includes('tracker')) { + request.abort(); + } else { + request.continue(); + } + }); + + // Set navigation and timeout options + console.log(`Navigating to URL: ${url}`); + + // Navigate to the URL and wait for the page to load + await page.goto(url, { + waitUntil: ['networkidle2'], + timeout: navigationTimeout, + }); + + // Wait for a short delay after navigation to allow content to render + await new Promise(resolve => setTimeout(resolve, 2000)); + + // Take a screenshot + console.log('Taking screenshot...'); + const screenshotPath = `./src/server/public/files/images/webpage_${Date.now()}.png`; + const screenshotOptions = { + path: screenshotPath, + fullPage: fullPage === true, + omitBackground: false, + type: 'png' as 'png', + clip: + fullPage !== true + ? { + x: 0, + y: 0, + width: Number(width) || 1200, + height: Number(height) || 800, + } + : undefined, + }; + + await page.screenshot(screenshotOptions); + + // Get the full height of the page + const fullHeight = await page.evaluate(() => { + return Math.max(document.body.scrollHeight, document.documentElement.scrollHeight, document.body.offsetHeight, document.documentElement.offsetHeight, document.body.clientHeight, document.documentElement.clientHeight); + }); + + console.log(`Screenshot captured successfully with height: ${fullHeight}px`); + + // Return the URL to the screenshot + const screenshotUrl = `/files/images/webpage_${Date.now()}.png`; + res.json({ + screenshotUrl, + fullHeight, + }); + } catch (error: any) { + console.error('Error capturing screenshot:', error); + res.status(500).send({ + error: `Failed to capture screenshot: ${error.message}`, + details: error.stack, + }); + } finally { + // Ensure browser is closed to free resources + if (browser) { + try { + await browser.close(); + console.log('Browser closed successfully'); + } catch (error) { + console.error('Error closing browser:', error); + } + } + } + }, + }); } } @@ -829,3 +983,50 @@ function spawnPythonProcess(jobId: string, file_path: string) { runPythonScript(); } } + +/** + * Extracts main content from HTML by removing scripts, styles, and non-content elements + * Used as a fallback when Readability fails + * @param html The HTML content to process + * @returns Extracted main text content + */ +async function extractMainContent(html: string): Promise { + try { + // Create a simple DOM to extract content + const dom = new JSDOM(html, { runScripts: 'outside-only' }); + const document = dom.window.document; + + // Remove scripts, styles, and other non-content elements + const elementsToRemove = ['script', 'style', 'iframe', 'noscript', 'svg', 'header', 'footer', 'nav', 'aside', 'ads', 'banner', 'form', 'button', 'input']; + + elementsToRemove.forEach(tag => { + const elements = document.querySelectorAll(tag); + elements.forEach(el => el.remove()); + }); + + // Try to find the main content container using common selectors + const mainSelectors = ['main', 'article', '#content', '.content', '#main', '.main', '.post-content', '.article-content', '.entry-content']; + + let mainContent = ''; + + // Try each selector to find main content + for (const selector of mainSelectors) { + const element = document.querySelector(selector); + if (element && element.textContent && element.textContent.trim().length > 100) { + mainContent = element.textContent; + break; + } + } + + // If no main content found with selectors, use body content + if (!mainContent || mainContent.length < 200) { + mainContent = document.body.textContent || ''; + } + + // Clean up the text + return mainContent.replace(/\s+/g, ' ').replace(/\n+/g, '\n').trim(); + } catch (error) { + console.error('Error extracting main content:', error); + return 'Failed to extract content from the webpage.'; + } +} -- cgit v1.2.3-70-g09d2 From a5d7f5c38192b91b7df3bd6ecace5ba7365449a6 Mon Sep 17 00:00:00 2001 From: "A.J. Shulman" Date: Sun, 11 May 2025 13:42:00 -0400 Subject: Made it so chunk Ids are seperately managed and made sure the doc id is sonsistent and not created in python spawn --- src/client/views/nodes/WebBox.scss | 241 ++++---- src/client/views/nodes/WebBox.tsx | 605 +++++++-------------- src/client/views/nodes/WebBoxRenderer.js | 103 ++++ .../views/nodes/chatbot/agentsystem/prompts.ts | 4 +- .../nodes/chatbot/chatboxcomponents/ChatBox.tsx | 36 +- .../nodes/chatbot/tools/DocumentMetadataTool.ts | 38 +- src/client/views/nodes/chatbot/tools/RAGTool.ts | 5 +- .../nodes/chatbot/utils/AgentDocumentManager.ts | 213 +++----- .../views/nodes/chatbot/vectorstore/Vectorstore.ts | 33 +- src/server/ApiManagers/AssistantManager.ts | 8 +- src/server/chunker/pdf_chunker.py | 13 +- 11 files changed, 548 insertions(+), 751 deletions(-) (limited to 'src/server/ApiManagers') diff --git a/src/client/views/nodes/WebBox.scss b/src/client/views/nodes/WebBox.scss index a1991d1d0..77d7716f4 100644 --- a/src/client/views/nodes/WebBox.scss +++ b/src/client/views/nodes/WebBox.scss @@ -1,9 +1,13 @@ @use '../global/globalCssVariables.module.scss' as global; .webBox { + height: 100%; + width: 100%; + top: 0; + left: 0; position: relative; + display: flex; overflow: hidden; - aspect-ratio: 1 / 1; // Explicitly enforce square aspect ratio .webBox-sideResizer { position: absolute; @@ -16,119 +20,6 @@ .webBox-background { width: 100%; height: 100%; - position: absolute; - top: 0; - left: 0; - } - - // Simple container for screenshot - .webBox-screenshot-container { - width: 100%; - } - - .webBox-screenshot { - width: 100%; - height: auto; // Maintain aspect ratio - display: block; - pointer-events: none; - } - - .webBox-loading { - padding: 20px; - text-align: center; - color: #666; - background-color: #f5f5f5; - min-height: 200px; - display: flex; - flex-direction: column; - align-items: center; - justify-content: center; - } - - .webBox-loading-spinner { - margin-top: 15px; - color: #1976d2; - font-size: 24px; - } - - .webBox-error { - padding: 20px; - color: #d32f2f; - text-align: center; - background-color: #ffebee; - min-height: 200px; - display: flex; - flex-direction: column; - align-items: center; - justify-content: center; - gap: 15px; - } - - .webBox-placeholder { - padding: 20px; - text-align: center; - color: #757575; - background-color: #fafafa; - min-height: 200px; - display: flex; - align-items: center; - justify-content: center; - } - - // Basic container layout - .webBox-container { - width: 100%; - height: 100%; - position: relative; - } - - // Simple scrollable container - vertical only - .webBox-outerContent { - width: 100%; - position: relative; - overflow-y: auto; - overflow-x: hidden; - background-color: #f5f5f5; - - // Improve scrollbar styling - &::-webkit-scrollbar-thumb { - background-color: #888; - border-radius: 6px; - } - - &::-webkit-scrollbar { - width: 8px; - background-color: #f5f5f5; - } - } - - .webBox-innerContent { - width: 100%; - background-color: #f5f5f5; - } - - .webBox-htmlSpan { - position: absolute; - top: 0; - left: 0; - cursor: text; - padding: 15px; - width: 100%; - height: 100%; - } - - .webBox-annotationLayer { - position: absolute; - transform-origin: left top; - top: 0; - width: 100%; - pointer-events: none; - mix-blend-mode: multiply; - } - - .webBox-annotationBox { - position: absolute; - background-color: rgba(245, 230, 95, 0.616); } .webBox-ui { @@ -177,14 +68,14 @@ } } - .webBox-refreshButton { + .webBox-nextIcon, + .webBox-prevIcon { background: #121721; + color: white; height: 20px; width: 25px; display: flex; - position: absolute; - bottom: 0; - right: 40px; + position: relative; align-items: center; justify-content: center; border-radius: 3px; @@ -192,6 +83,10 @@ padding: 0px; } + .webBox-overlayButton:hover { + background: none; + } + .webBox-overlayCont { position: absolute; width: calc(100% - 40px); @@ -223,7 +118,8 @@ justify-content: center; border-radius: 3px; pointer-events: all; - z-index: 1; + z-index: 1; // so it appears on top of the document's title, if shown + box-shadow: global.$standard-box-shadow; transition: 0.2s; @@ -238,6 +134,89 @@ opacity: 0.1; } + .webBox-annotationLayer { + position: absolute; + transform-origin: left top; + top: 0; + width: 100%; + pointer-events: none; + mix-blend-mode: multiply; // bcz: makes text fuzzy! + } + + .webBox-annotationBox { + position: absolute; + background-color: rgba(245, 230, 95, 0.616); + } + + .webBox-container { + transform-origin: top left; + width: 100%; + height: 100%; + position: absolute; + + .webBox-htmlSpan { + position: absolute; + top: 0; + left: 0; + cursor: text; + padding: 15px; + height: 100%; + } + + .webBox-cont { + pointer-events: none; + } + + .webBox-cont, + .webBox-cont-interactive { + padding: 0vw; + position: absolute; + top: 0; + left: 0; + width: 100%; + height: 100%; + transform-origin: top left; + + .webBox-iframe { + width: 100%; + height: 100%; + position: absolute; + top: 0; + left: 0; + body { + ::selection { + color: white; + background: orange; + } + } + } + } + + .webBox-cont-interactive { + span { + user-select: text !important; + } + } + + .webBox-outerContent { + width: 100%; + height: 100%; + position: absolute; + transform-origin: top left; + top: 0; + left: 0; + overflow: auto; + + .webBox-innerContent { + position: relative; + } + } + + div.webBox-outerContent::-webkit-scrollbar-thumb { + cursor: nw-resize; + } + } + .webBox-overlay { width: 100%; height: 100%; @@ -277,13 +256,37 @@ width: 100%; height: 100%; position: absolute; + pointer-events: all; .indicator { position: absolute; + transition: background-color 0.2s ease; + border-radius: 2px; &.active { background-color: rgba(0, 0, 0, 0.1); + box-shadow: 0 0 2px rgba(0, 0, 0, 0.2); } } } + + // Add styles to hide font errors and improve user experience + .font-error-hidden { + font-family: + system-ui, + -apple-system, + BlinkMacSystemFont, + 'Segoe UI', + Roboto, + Arial, + sans-serif !important; + } + + // Change iframe behavior when resource loading errors occur + iframe.webBox-iframe { + &.loading-error { + // Make full content accessible when external resources fail + pointer-events: all !important; + } + } } diff --git a/src/client/views/nodes/WebBox.tsx b/src/client/views/nodes/WebBox.tsx index 045af7ecd..1e158f484 100644 --- a/src/client/views/nodes/WebBox.tsx +++ b/src/client/views/nodes/WebBox.tsx @@ -4,7 +4,6 @@ import { htmlToText } from 'html-to-text'; import { action, computed, IReactionDisposer, makeObservable, observable, ObservableMap, reaction, runInAction } from 'mobx'; import { observer } from 'mobx-react'; import * as React from 'react'; -import axios from 'axios'; import * as WebRequest from 'web-request'; import { addStyleSheet, addStyleSheetRule, clearStyleSheetRules, ClientUtils, DivHeight, getWordAtPoint, lightOrDark, returnFalse, returnOne, returnZero, setupMoveUpEvents, smoothScroll } from '../../../ClientUtils'; import { Doc, DocListCast, Field, FieldType, Opt, StrListCast } from '../../../fields/Doc'; @@ -70,20 +69,23 @@ export class WebBox extends ViewBoxAnnotatableComponent() { private _scrollTimer: NodeJS.Timeout | undefined; private _getAnchor: (savedAnnotations: Opt>, addAsAnnotation: boolean) => Opt = () => undefined; - @observable private _webUrl = ''; // url of the page we want to display - @observable private _hackHide = false; + @observable private _webUrl = ''; // url of the src parameter of the embedded iframe but not necessarily the rendered page - eg, when following a link, the rendered page changes but we don't want the src parameter to also change as that would cause an unnecessary re-render. + @observable private _hackHide = false; // apparently changing the value of the 'sandbox' prop doesn't necessarily apply it to the active iframe. so thisforces the ifrmae to be rebuilt when allowScripts is toggled @observable private _searching: boolean = false; @observable private _showSidebar = false; @observable private _webPageHasBeenRendered = false; @observable private _marqueeing: number[] | undefined = undefined; - @observable private _screenshotUrl: string | null = null; // URL to the screenshot image - @observable private _fullHeight: number = 0; // Full height of the webpage screenshot - @observable private _isLoadingScreenshot: boolean = false; // Loading state for the screenshot + get marqueeing() { + return this._marqueeing; + } + set marqueeing(val) { + val && this._marqueeref.current?.onInitiateSelection(val); + !val && this._marqueeref.current?.onTerminateSelection(); + this._marqueeing = val; + } @observable private _iframe: HTMLIFrameElement | null = null; @observable private _savedAnnotations = new ObservableMap(); @observable private _scrollHeight = NumCast(this.layoutDoc.scrollHeight); - @observable private _screenshotError: string | null = null; // Error message if screenshot fails - @observable private _loadingFromCache: boolean = false; @computed get _url() { return this.webField?.toString() || ''; } @@ -143,38 +145,31 @@ export class WebBox extends ViewBoxAnnotatableComponent() { }; updateIcon = async () => { - if (!this._screenshotUrl) { - // If we don't have a screenshot yet, capture one first - await this.captureWebScreenshot(); - } - + if (!this._iframe) return new Promise(res => res()); const scrollTop = NumCast(this.layoutDoc._layout_scrollTop); const nativeWidth = NumCast(this.layoutDoc.nativeWidth); const nativeHeight = (nativeWidth * this._props.PanelHeight()) / this._props.PanelWidth(); - + let htmlString = this._iframe.contentDocument && new XMLSerializer().serializeToString(this._iframe.contentDocument); + if (!htmlString) { + htmlString = await fetch(ClientUtils.CorsProxy(this.webField!.href)).then(response => response.text()); + } this.layoutDoc.thumb = undefined; this.Document.thumbLockout = true; // lock to prevent multiple thumb updates. - - try { - // If we have a screenshot, use it directly for the thumbnail - if (this._screenshotUrl) { - return ClientUtils.convertDataUri(this._screenshotUrl, this.layoutDoc[Id] + '_icon_' + new Date().getTime(), true, this.layoutDoc[Id] + '_icon_').then(returnedfilename => { + return (CreateImage(this._webUrl.endsWith('/') ? this._webUrl.substring(0, this._webUrl.length - 1) : this._webUrl, this._iframe.contentDocument?.styleSheets ?? [], htmlString, nativeWidth, nativeHeight, scrollTop) as Promise) + .then((dataUrl: string) => { + if (dataUrl.includes(' { this.Document.thumbLockout = false; this.layoutDoc.thumb = new ImageField(returnedfilename); this.layoutDoc.thumbScrollTop = scrollTop; this.layoutDoc.thumbNativeWidth = nativeWidth; this.layoutDoc.thumbNativeHeight = nativeHeight; }); - } else { - console.log('No screenshot available for thumbnail generation'); - this.Document.thumbLockout = false; - return Promise.resolve(); - } - } catch (error) { - console.error('Error creating thumbnail:', error); - this.Document.thumbLockout = false; - return Promise.reject(error); - } + }) + .catch((error: object) => console.error('oops, something went wrong!', error)); }; componentDidMount() { @@ -243,64 +238,13 @@ export class WebBox extends ViewBoxAnnotatableComponent() { }, { fireImmediately: true } ); - - // Check if we have a cached screenshot URL in metadata - if (this._url) { - this._webUrl = this._url; - const cachedScreenshotUrl = StrCast(this.dataDoc[this.fieldKey + '_screenshotUrl']); - const cachedHeight = NumCast(this.dataDoc[this.fieldKey + '_screenshotHeight']); - - if (cachedScreenshotUrl && cachedHeight) { - // Use cached screenshot - this._loadingFromCache = true; - this._isLoadingScreenshot = true; - - // Verify the cached screenshot exists by loading the image - const img = new Image(); - img.onload = action(() => { - this._screenshotUrl = cachedScreenshotUrl; - this._fullHeight = cachedHeight; - this._scrollHeight = cachedHeight; - this._webPageHasBeenRendered = true; - this._isLoadingScreenshot = false; - this._loadingFromCache = false; - - // Apply dimensions and initial scroll - if (this.layoutDoc._layout_autoHeight) { - this.layoutDoc._nativeHeight = this._fullHeight; - this._props.setHeight?.(this._fullHeight * (this._props.NativeDimScaling?.() || 1)); - } - - if (this._initialScroll !== undefined) { - this.setScrollPos(this._initialScroll); - } - - console.log(`Loaded cached screenshot: ${this._screenshotUrl}`); - }); - - img.onerror = action(() => { - // If image fails to load, capture a new screenshot - console.log('Cached screenshot not found, capturing new one'); - this._loadingFromCache = false; - this.captureWebScreenshot(); - }); - - img.src = cachedScreenshotUrl; - } else { - // No cached screenshot, capture a new one - this.captureWebScreenshot(); - } - } } componentWillUnmount() { - // Clean up timers - if (this._scrollTimer) { - clearTimeout(this._scrollTimer); - this._scrollTimer = undefined; - } - - // Clean up reaction disposers + this._iframetimeout && clearTimeout(this._iframetimeout); + this._iframetimeout = undefined; Object.values(this._disposers).forEach(disposer => disposer?.()); + // this._iframe?.removeEventListener('wheel', this.iframeWheel, true); + // this._iframe?.contentDocument?.removeEventListener("pointerup", this.iframeUp); } private _selectionText: string = ''; @@ -414,6 +358,59 @@ export class WebBox extends ViewBoxAnnotatableComponent() { _textAnnotationCreator: (() => ObservableMap) | undefined; savedAnnotationsCreator: () => ObservableMap = () => this._textAnnotationCreator?.() || this._savedAnnotations; + @action + iframeMove = (e: PointerEvent) => { + const theclick = this.props + .ScreenToLocalTransform() + .inverse() + .transformPoint(e.clientX, e.clientY - NumCast(this.layoutDoc.layout_scrollTop)); + this._marqueeref.current?.onMove(theclick); + }; + @action + iframeUp = (e: PointerEvent) => { + this._iframe?.contentDocument?.removeEventListener('pointermove', this.iframeMove); + this.marqueeing = undefined; + this._getAnchor = AnchorMenu.Instance?.GetAnchor; // need to save AnchorMenu's getAnchor since a subsequent selection on another doc will overwrite this value + this._textAnnotationCreator = undefined; + this.DocumentView?.()?.cleanupPointerEvents(); // pointerup events aren't generated on containing document view, so we have to invoke it here. + if (this._iframe?.contentWindow && this._iframe.contentDocument && !this._iframe.contentWindow.getSelection()?.isCollapsed) { + const mainContBounds = ClientUtils.GetScreenTransform(this._mainCont.current!); + const scale = (this._props.NativeDimScaling?.() || 1) * mainContBounds.scale; + const sel = this._iframe.contentWindow.getSelection(); + if (sel) { + this._selectionText = sel.toString(); + AnchorMenu.Instance.setSelectedText(sel.toString()); + this._textAnnotationCreator = () => this.createTextAnnotation(sel, !sel.isCollapsed ? sel.getRangeAt(0) : undefined); + AnchorMenu.Instance.jumpTo(e.clientX * scale + mainContBounds.translateX, e.clientY * scale + mainContBounds.translateY - NumCast(this.layoutDoc._layout_scrollTop) * scale); + // Changing which document to add the annotation to (the currently selected WebBox) + GPTPopup.Instance.setSidebarFieldKey(`${this._props.fieldKey}_${this._urlHash ? this._urlHash + '_' : ''}sidebar`); + GPTPopup.Instance.addDoc = this.sidebarAddDocument; + } + } else { + const theclick = this.props + .ScreenToLocalTransform() + .inverse() + .transformPoint(e.clientX, e.clientY - NumCast(this.layoutDoc.layout_scrollTop)); + if (!this._marqueeref.current?.isEmpty) this._marqueeref.current?.onEnd(theclick[0], theclick[1]); + else { + if (!(e.target as HTMLElement)?.tagName?.includes('INPUT')) this.finishMarquee(theclick[0], theclick[1]); + this._getAnchor = AnchorMenu.Instance?.GetAnchor; + this.marqueeing = undefined; + } + + ContextMenu.Instance.closeMenu(); + ContextMenu.Instance.setIgnoreEvents(false); + if (e?.button === 2 || e?.altKey) { + e?.preventDefault(); + e?.stopPropagation(); + setTimeout(() => { + // if menu comes up right away, the down event can still be active causing a menu item to be selected + this.specificContextMenu(); + this.DocumentView?.().onContextMenu(undefined, theclick[0], theclick[1]); + }); + } + } + }; @action webClipDown = (e: React.PointerEvent) => { e.stopPropagation(); @@ -508,6 +505,98 @@ export class WebBox extends ViewBoxAnnotatableComponent() { this._scrollHeight = this._iframe?.contentDocument?.body?.scrollHeight ?? 0; this.addWebStyleSheetRule(this.addWebStyleSheet(this._iframe?.contentDocument), '::selection', { color: 'white', background: 'orange' }, ''); + // Add error handler to suppress font CORS errors + if (this._iframe?.contentWindow) { + try { + // Track if any resource errors occurred + let hasResourceErrors = false; + + // Override the console.error to filter out font CORS errors + const win = this._iframe.contentWindow as Window & { console: Console }; + const originalConsoleError = win.console.error; + win.console.error = (...args: unknown[]) => { + const errorMsg = args.map(arg => String(arg)).join(' '); + if (errorMsg.includes('Access to font') && errorMsg.includes('has been blocked by CORS policy')) { + // Mark that we have font errors + hasResourceErrors = true; + // Ignore font CORS errors + return; + } + // Also catch other resource loading errors + if (errorMsg.includes('ERR_FAILED') || errorMsg.includes('ERR_BLOCKED_BY_CLIENT')) { + hasResourceErrors = true; + } + originalConsoleError.apply(win.console, args); + }; + + // Listen for resource loading errors + this._iframe.contentWindow.addEventListener( + 'error', + (e: Event) => { + const target = e.target as HTMLElement; + if (target instanceof HTMLElement) { + // If it's a resource that failed to load + if (target.tagName === 'LINK' || target.tagName === 'IMG' || target.tagName === 'SCRIPT') { + hasResourceErrors = true; + // Apply error class after a short delay to allow initial content to load + setTimeout(() => { + if (this._iframe && hasResourceErrors) { + this._iframe.classList.add('loading-error'); + } + }, 1000); + } + } + }, + true + ); + + // Add fallback CSS for fonts that fail to load + const style = this._iframe.contentDocument?.createElement('style'); + if (style) { + style.textContent = ` + @font-face { + font-family: 'CORS-fallback-serif'; + src: local('Times New Roman'), local('Georgia'), serif; + } + @font-face { + font-family: 'CORS-fallback-sans'; + src: local('Arial'), local('Helvetica'), sans-serif; + } + /* Fallback for all fonts that fail to load */ + @font-face { + font-display: swap !important; + } + + /* Add a script to find and fix elements with failed fonts */ + @font-face { + font-family: '__failed_font__'; + src: local('Arial'); + unicode-range: U+0000; + } + `; + this._iframe.contentDocument?.head.appendChild(style); + + // Add a script to detect and fix font loading issues + const script = this._iframe.contentDocument?.createElement('script'); + if (script) { + script.textContent = ` + // Fix font loading issues with fallbacks + setTimeout(function() { + document.querySelectorAll('*').forEach(function(el) { + if (window.getComputedStyle(el).fontFamily.includes('__failed_font__')) { + el.classList.add('font-error-hidden'); + } + }); + }, 1000); + `; + this._iframe.contentDocument?.head.appendChild(script); + } + } + } catch (e) { + console.log('Error setting up font error handling:', e); + } + } + let href: Opt; try { href = iframe?.contentWindow?.location.href; @@ -658,23 +747,15 @@ export class WebBox extends ViewBoxAnnotatableComponent() { this.dataDoc[this.fieldKey + '_history'] = new List([...history, this._url]); this.dataDoc[this.fieldKey] = new WebField(new URL(future.pop()!)); this._scrollHeight = 0; - - // Reset screenshot state for new URL - this._screenshotUrl = null; - this._fullHeight = 0; - this._isLoadingScreenshot = false; - if (this._webUrl === this._url) { this._webUrl = curUrl; setTimeout( action(() => { this._webUrl = this._url; - this.captureWebScreenshot(); // Capture screenshot for new URL }) ); } else { this._webUrl = this._url; - this.captureWebScreenshot(); // Capture screenshot for new URL } return true; } @@ -694,18 +775,11 @@ export class WebBox extends ViewBoxAnnotatableComponent() { else this.dataDoc[this.fieldKey + '_future'] = new List([...future, this._url]); this.dataDoc[this.fieldKey] = new WebField(new URL(history.pop()!)); this._scrollHeight = 0; - - // Reset screenshot state for new URL - this._screenshotUrl = null; - this._fullHeight = 0; - this._isLoadingScreenshot = false; - if (this._webUrl === this._url) { this._webUrl = curUrl; setTimeout(action(() => (this._webUrl = this._url))); } else { this._webUrl = this._url; - this.captureWebScreenshot(); // Capture screenshot for new URL } return true; } @@ -724,11 +798,10 @@ export class WebBox extends ViewBoxAnnotatableComponent() { this.layoutDoc.thumbNativeWidth = undefined; this.layoutDoc.thumbNativeHeight = undefined; } - + } + if (!preview) { if (!dontUpdateIframe) { this._webUrl = this._url; - // Capture screenshot when URL changes - this.captureWebScreenshot(); } } } catch { @@ -737,85 +810,6 @@ export class WebBox extends ViewBoxAnnotatableComponent() { return true; }; - @action - captureWebScreenshot = async () => { - if (!this._url || this._loadingFromCache) return; - - try { - this._isLoadingScreenshot = true; - this._screenshotError = null; - - console.log(`Capturing screenshot for URL: ${this._url}`); - - try { - const response = await axios.post('/captureWebScreenshot', { - url: this._url, - width: NumCast(this.Document.nativeWidth, 1200), - height: NumCast(this.Document.nativeHeight, 800), - fullPage: true, // Request a full page screenshot - }); - - runInAction(() => { - this._screenshotUrl = response.data.screenshotUrl; - this._fullHeight = response.data.fullHeight; - this._scrollHeight = response.data.fullHeight; - this._webPageHasBeenRendered = true; - this._isLoadingScreenshot = false; - - // Store screenshot URL and height in document metadata - this.dataDoc[this.fieldKey + '_screenshotUrl'] = response.data.screenshotUrl; - this.dataDoc[this.fieldKey + '_screenshotHeight'] = response.data.fullHeight; - - // Update native dimensions to match the screenshot - if (!this.dataDoc[this.fieldKey + '_nativeWidth']) { - this.dataDoc[this.fieldKey + '_nativeWidth'] = 1200; // Default width - } - - if (!this.dataDoc[this.fieldKey + '_nativeHeight']) { - this.dataDoc[this.fieldKey + '_nativeHeight'] = this._fullHeight; - } - - // Set document height if needed - if (this.layoutDoc._layout_autoHeight) { - this.layoutDoc._nativeHeight = this._fullHeight; - this._props.setHeight?.(this._fullHeight * (this._props.NativeDimScaling?.() || 1)); - } - - // Apply initial scroll if needed - if (this._initialScroll !== undefined) { - this.setScrollPos(this._initialScroll); - } - - console.log(`Screenshot captured successfully: ${this._screenshotUrl} with height: ${this._fullHeight}px`); - }); - } catch (error: any) { - // Handle error from the API - console.error('Error capturing screenshot:', error); - let errorMessage = 'Failed to capture webpage screenshot'; - - // Try to extract detailed error message from response - if (error.response && error.response.data && error.response.data.error) { - errorMessage = error.response.data.error; - } else if (error.message) { - errorMessage = error.message; - } - - runInAction(() => { - this._screenshotError = errorMessage; - this._isLoadingScreenshot = false; - }); - } - } catch (error: any) { - // Handle unexpected errors - runInAction(() => { - console.error('Unexpected error in captureWebScreenshot:', error); - this._screenshotError = 'An unexpected error occurred'; - this._isLoadingScreenshot = false; - }); - } - }; - - @action onWebUrlDrop = (e: React.DragEvent) => { const { dataTransfer } = e; const html = dataTransfer.getData('text/html'); @@ -830,28 +824,13 @@ export class WebBox extends ViewBoxAnnotatableComponent() { setData = (data: FieldType | Promise) => { if (!(typeof data === 'string') && !(data instanceof WebField)) return false; if (Field.toString(data) === this._url) return false; - - // Reset state for new URL this._scrollHeight = 0; - this._screenshotUrl = null; - this._fullHeight = 0; - this._isLoadingScreenshot = false; - - // Clear stored screenshot metadata for the previous URL - this.dataDoc[this.fieldKey + '_screenshotUrl'] = undefined; - this.dataDoc[this.fieldKey + '_screenshotHeight'] = undefined; - const oldUrl = this._url; const history = Cast(this.dataDoc[this.fieldKey + '_history'], listSpec('string'), []); const weburl = new WebField(Field.toString(data)); this.dataDoc[this.fieldKey + '_future'] = new List([]); this.dataDoc[this.fieldKey + '_history'] = new List([...(history || []), oldUrl]); this.dataDoc[this.fieldKey] = weburl; - - // Capture screenshot for the new URL - this._webUrl = weburl.toString(); - this.captureWebScreenshot(); - return true; }; onWebUrlValueKeyDown = (e: React.KeyboardEvent) => { @@ -868,14 +847,26 @@ export class WebBox extends ViewBoxAnnotatableComponent() { description: (this.layoutDoc[this.fieldKey + '_useCors'] ? "Don't Use" : 'Use') + ' Cors', event: () => { this.layoutDoc[this.fieldKey + '_useCors'] = !this.layoutDoc[this.fieldKey + '_useCors']; - // Re-capture screenshot with the new setting - this.captureWebScreenshot(); }, icon: 'snowflake', }); - - // Remove the "Allow Scripts" option since it's not relevant for screenshots - + funcs.push({ + description: (this.dataDoc[this.fieldKey + '_allowScripts'] ? 'Prevent' : 'Allow') + ' Scripts', + event: () => { + this.dataDoc[this.fieldKey + '_allowScripts'] = !this.dataDoc[this.fieldKey + '_allowScripts']; + if (this._iframe) { + runInAction(() => { + this._hackHide = true; + }); + setTimeout( + action(() => { + this._hackHide = false; + }) + ); + } + }, + icon: 'snowflake', + }); funcs.push({ description: (!this.layoutDoc.layout_reflowHorizontal ? 'Force' : 'Prevent') + ' Reflow', event: () => { @@ -887,21 +878,7 @@ export class WebBox extends ViewBoxAnnotatableComponent() { }, icon: 'snowflake', }); - - // Add a refresh option to re-capture the screenshot - funcs.push({ - description: 'Refresh Screenshot', - event: () => this.captureWebScreenshot(), - icon: 'sync-alt', - }); - - !Doc.noviceMode && - funcs.push({ - description: 'Update Icon', - event: () => this.updateIcon(), - icon: 'portrait', - }); - + !Doc.noviceMode && funcs.push({ description: 'Update Icon', event: () => this.updateIcon(), icon: 'portrait' }); cm.addItem({ description: 'Options...', subitems: funcs, icon: 'asterisk' }); } }; @@ -913,7 +890,7 @@ export class WebBox extends ViewBoxAnnotatableComponent() { */ @action onMarqueeDown = (e: React.PointerEvent) => { - const sel = window.document.getSelection(); + const sel = this._url ? this._iframe?.contentDocument?.getSelection() : window.document.getSelection(); this._textAnnotationCreator = undefined; if (sel?.empty) sel.empty(); // Chrome @@ -948,7 +925,6 @@ export class WebBox extends ViewBoxAnnotatableComponent() { @computed get urlContent() { if (this.ScreenToLocalBoxXf().Scale > 25) return
; - setTimeout( action(() => { if (this._initialScroll === undefined && !this._webPageHasBeenRendered) { @@ -957,10 +933,7 @@ export class WebBox extends ViewBoxAnnotatableComponent() { this._webPageHasBeenRendered = true; }) ); - const field = this.dataDoc[this._props.fieldKey]; - - // Handle HTML field (text content) if (field instanceof HtmlField) { return ( () { /> ); } - - // Handle WebField (screenshot of webpage) if (field instanceof WebField) { const url = this.layoutDoc[this.fieldKey + '_useCors'] ? '/corsproxy/' + this._webUrl : this._webUrl; const scripts = this.dataDoc[this.fieldKey + '_allowScripts'] || this._webUrl.includes('wikipedia.org') || this._webUrl.includes('google.com') || this._webUrl.startsWith('https://bing'); @@ -1198,7 +1169,7 @@ export class WebBox extends ViewBoxAnnotatableComponent() { TraceMobx(); // const previewScale = this._previewNativeWidth ? 1 - this.sidebarWidth() / this._previewNativeWidth : 1; const pointerEvents = this.layoutDoc._lockedPosition ? 'none' : (this._props.pointerEvents?.() as Property.PointerEvents | undefined); - const scale = this._props.NativeDimScaling?.() || 1; + // const scale = previewScale * (this._props.NativeDimScaling?.() || 1); return (
() { height: '100%', //`${100 / scale}%`, pointerEvents, }} + // when active, block wheel events from propagating since they're handled by the iframe onWheel={this.onZoomWheel} onScroll={() => this.setDashScrollTop(this._outerRef.current?.scrollTop || 0)} onPointerDown={this.onMarqueeDown}> -
+
this._props.PanelHeight() && this._scrollHeight) || '100%', pointerEvents }}> {this.content}
{this.renderTransparentAnnotations}
{this.renderOpaqueAnnotations} @@ -1258,13 +1224,6 @@ export class WebBox extends ViewBoxAnnotatableComponent() {
- - {/* Refresh button */} -
); } @@ -1293,25 +1252,16 @@ export class WebBox extends ViewBoxAnnotatableComponent() { annotationPointerEvents = () => (this._props.isContentActive() && (SnappingManager.IsDragging || Doc.ActiveTool !== InkTool.None) ? 'all' : 'none'); render() { TraceMobx(); - const containerWidth = NumCast(this.layoutDoc._width) || this._props.PanelWidth(); + const previewScale = this._previewNativeWidth ? 1 - this.sidebarWidth() / this._previewNativeWidth : 1; const pointerEvents = this.layoutDoc._lockedPosition ? 'none' : (this._props.pointerEvents?.() as Property.PointerEvents); - const scale = this._props.NativeDimScaling?.() || 1; - - // Force the component to be square - this.layoutDoc._height = containerWidth; - this.layoutDoc._width = containerWidth; - this.layoutDoc._forceActive = true; - + const scale = previewScale * (this._props.NativeDimScaling?.() || 1); return (
() {
); } - - get marqueeing() { - return this._marqueeing; - } - set marqueeing(val) { - val && this._marqueeref.current?.onInitiateSelection(val); - !val && this._marqueeref.current?.onTerminateSelection(); - this._marqueeing = val; - } } // eslint-disable-next-line prefer-arrow-callback ScriptingGlobals.add(function urlHash(url: string) { @@ -1395,149 +1336,3 @@ Docs.Prototypes.TemplateMap.set(DocumentType.WEB, { layout: { view: WebBox, dataField: 'data' }, options: { acl: '', _height: 300, _layout_fitWidth: true, _layout_nativeDimEditable: true, _layout_reflowVertical: true, waitForDoubleClickToClick: 'always', systemIcon: 'BsGlobe' }, }); - -// Add CSS styles for screenshot mode -const webBoxStyles = ` -.webBox-screenshot-container { - width: 100%; - position: relative; - overflow: visible; - display: flex; - align-items: flex-start; - justify-content: center; - background-color: #f5f5f5; -} - -.webBox-screenshot { - width: 100%; - pointer-events: none; - display: block; - user-select: none; - object-fit: contain; - transition: opacity 0.3s ease; -} - -.webBox-loading { - padding: 20px; - text-align: center; - color: #666; - background-color: #f5f5f5; - border-radius: 4px; - min-height: 200px; - display: flex; - flex-direction: column; - align-items: center; - justify-content: center; -} - -.webBox-loading-message { - font-size: 16px; - margin-bottom: 15px; - color: #555; -} - -.webBox-loading-spinner { - margin-top: 10px; - color: #1976d2; -} - -.webBox-error { - padding: 20px; - color: #d32f2f; - text-align: center; - background-color: #ffebee; - border-radius: 4px; - min-height: 200px; - display: flex; - flex-direction: column; - align-items: center; - justify-content: center; - gap: 15px; -} - -.webBox-error-icon { - color: #d32f2f; - margin-bottom: 10px; -} - -.webBox-error-message { - color: #d32f2f; - font-size: 14px; - max-width: 80%; - line-height: 1.5; -} - -.webBox-error-actions { - margin-top: 10px; -} - -.webBox-retry-button { - background-color: #f44336; - color: white; - border: none; - padding: 8px 16px; - border-radius: 4px; - cursor: pointer; - font-size: 14px; - transition: background-color 0.3s; -} - -.webBox-retry-button:hover { - background-color: #d32f2f; -} - -.webBox-placeholder { - padding: 20px; - text-align: center; - color: #757575; - background-color: #fafafa; - border-radius: 4px; - min-height: 200px; - display: flex; - align-items: center; - justify-content: center; -} - -.webBox-refreshButton { - margin-right: 5px; -} - -.webBox-innerContent { - position: relative; - width: 100%; - background-color: #f5f5f5; - overflow: visible; -} - -.webBox-outerContent { - overflow: auto; - width: 100%; - background-color: #f5f5f5; - position: relative; -} - -.webBox-container { - position: relative; - display: flex; - flex-direction: column; - height: 100%; - background-color: white; - border-radius: 4px; - overflow: hidden; -} - -.webBox { - position: relative; - height: 100%; - width: 100%; - overflow: hidden; - background-color: white; - border-radius: 4px; - box-shadow: 0 1px 3px rgba(0, 0, 0, 0.12), 0 1px 2px rgba(0, 0, 0, 0.24); -} -`; - -// Add the styles to the document -const styleEl = document.createElement('style'); -styleEl.textContent = webBoxStyles; -document.head.appendChild(styleEl); diff --git a/src/client/views/nodes/WebBoxRenderer.js b/src/client/views/nodes/WebBoxRenderer.js index ef465c453..31e0ef5e4 100644 --- a/src/client/views/nodes/WebBoxRenderer.js +++ b/src/client/views/nodes/WebBoxRenderer.js @@ -145,6 +145,29 @@ const ForeignHtmlRenderer = function (styleSheets) { return urlsFound; }; + /** + * Extracts font-face URLs from CSS rules + * @param {String} cssRuleStr + * @returns {String[]} + */ + const getFontFaceUrlsFromCss = function (cssRuleStr) { + const fontFaceUrls = []; + // Find @font-face blocks + const fontFaceBlocks = cssRuleStr.match(/@font-face\s*{[^}]*}/g) || []; + + fontFaceBlocks.forEach(block => { + // Extract URLs from src properties + const urls = block.match(/src\s*:\s*[^;]*/g) || []; + urls.forEach(srcDeclaration => { + // Find all url() references in the src declaration + const fontUrls = getUrlsFromCssString(srcDeclaration); + fontFaceUrls.push(...fontUrls); + }); + }); + + return fontFaceUrls; + }; + /** * * @param {String} html @@ -158,6 +181,61 @@ const ForeignHtmlRenderer = function (styleSheets) { return string.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); // $& means the whole matched string }; + /** + * Create a fallback font-face rule for handling CORS errors + * @returns {String} + */ + const createFallbackFontFaceRules = function () { + return ` + @font-face { + font-family: 'CORS-fallback-serif'; + src: local('Times New Roman'), local('Georgia'), serif; + } + @font-face { + font-family: 'CORS-fallback-sans'; + src: local('Arial'), local('Helvetica'), sans-serif; + } + /* Add fallback font handling */ + [data-font-error] { + font-family: 'CORS-fallback-sans', sans-serif !important; + } + [data-font-error="serif"] { + font-family: 'CORS-fallback-serif', serif !important; + } + `; + }; + + /** + * Clean up and optimize CSS for better rendering + * @param {String} cssStyles + * @returns {String} + */ + const optimizeCssForRendering = function (cssStyles) { + // Add fallback font-face rules + const enhanced = cssStyles + createFallbackFontFaceRules(); + + // Replace problematic font-face declarations with proxied versions + let optimized = enhanced.replace(/(url\(['"]?)(https?:\/\/[^)'"]+)(['"]?\))/gi, (match, prefix, url, suffix) => { + // If it's a font file, proxy it + if (url.match(/\.(woff2?|ttf|eot|otf)(\?.*)?$/i)) { + return `${prefix}${CorsProxy(url)}${suffix}`; + } + return match; + }); + + // Add error handling for fonts + optimized += ` + /* Suppress font CORS errors in console */ + @supports (font-display: swap) { + @font-face { + font-display: swap !important; + } + } + `; + + return optimized; + }; + /** * * @param {String} contentHtml @@ -175,6 +253,7 @@ const ForeignHtmlRenderer = function (styleSheets) { // copy styles let cssStyles = ''; const urlsFoundInCss = []; + const fontUrlsInCss = []; for (let i = 0; i < styleSheets.length; i += 1) { try { @@ -182,6 +261,7 @@ const ForeignHtmlRenderer = function (styleSheets) { for (let j = 0; j < rules.length; j += 1) { const cssRuleStr = rules[j].cssText; urlsFoundInCss.push(...getUrlsFromCssString(cssRuleStr)); + fontUrlsInCss.push(...getFontFaceUrlsFromCss(cssRuleStr)); cssStyles += cssRuleStr; } } catch (e) { @@ -189,6 +269,9 @@ const ForeignHtmlRenderer = function (styleSheets) { } } + // Optimize and enhance CSS + cssStyles = optimizeCssForRendering(cssStyles); + // const fetchedResourcesFromStylesheets = await getMultipleResourcesAsBase64(webUrl, urlsFoundInCss); // for (let i = 0; i < fetchedResourcesFromStylesheets.length; i++) { // const r = fetchedResourcesFromStylesheets[i]; @@ -203,6 +286,26 @@ const ForeignHtmlRenderer = function (styleSheets) { .replace(/
<\/div>/g, '') // when scripting isn't available (ie, rendering web pages here),