diff options
author | bobzel <zzzman@gmail.com> | 2025-05-10 20:26:00 -0400 |
---|---|---|
committer | bobzel <zzzman@gmail.com> | 2025-05-10 20:26:00 -0400 |
commit | b1bb206c73a0fbc4fb439cedd212565f7f85f4f8 (patch) | |
tree | 6eb38f1be57977c2c4bf91d11ba108bad8f3ad70 /src/server/server_Initialization.ts | |
parent | 534532aebbab49b00dc76ad6a0cf6c20368d818c (diff) |
changed how we corsProxy web pages to be simpler and work better. changed dragging off annotations after text selections to only create a text doc whent the drop target is the parent collection -- otherwise links are created.
Diffstat (limited to 'src/server/server_Initialization.ts')
-rw-r--r-- | src/server/server_Initialization.ts | 220 |
1 files changed, 85 insertions, 135 deletions
diff --git a/src/server/server_Initialization.ts b/src/server/server_Initialization.ts index a56ab5d18..514e2ce1e 100644 --- a/src/server/server_Initialization.ts +++ b/src/server/server_Initialization.ts @@ -1,19 +1,15 @@ import * as bodyParser from 'body-parser'; -import * as brotli from 'brotli'; import { blue, yellow } from 'colors'; import * as flash from 'connect-flash'; import * as MongoStoreConnect from 'connect-mongo'; -import * as cors from 'cors'; import * as express from 'express'; import * as expressFlash from 'express-flash'; import * as session from 'express-session'; import { createServer } from 'https'; import * as passport from 'passport'; -import * as request from 'request'; import * as webpack from 'webpack'; import * as wdm from 'webpack-dev-middleware'; import * as whm from 'webpack-hot-middleware'; -import * as zlib from 'zlib'; import * as config from '../../webpack.config'; import { logPort } from './ActionUtilities'; import RouteManager from './RouteManager'; @@ -23,6 +19,8 @@ import { SSL } from './apis/google/CredentialsLoader'; import { getForgot, getLogin, getLogout, getReset, getSignup, postForgot, postLogin, postReset, postSignup } from './authentication/AuthenticationManager'; import { Database } from './database'; import { WebSocket } from './websocket'; +import axios from 'axios'; +import { JSDOM } from 'jsdom'; /* RouteSetter is a wrapper around the server that prevents the server from being exposed. */ @@ -84,142 +82,96 @@ function buildWithMiddleware(server: express.Express) { return server; } -function registerEmbeddedBrowseRelativePathHandler(server: express.Express) { - server.use('*', (req, res) => { - // res.setHeader('Access-Control-Allow-Origin', '*'); - // res.header('Access-Control-Allow-Methods', 'GET, PUT, PATCH, POST, DELETE'); - // res.header('Access-Control-Allow-Headers', req.header('access-control-request-headers')); - const relativeUrl = req.originalUrl; - if (!res.headersSent && req.headers.referer?.includes('corsProxy')) { - if (!req.user) res.redirect('/home'); // When no user is logged in, we interpret a relative URL as being a reference to something they don't have access to and redirect to /home - // a request for something by a proxied referrer means it must be a relative reference. So construct a proxied absolute reference here. - try { - const proxiedRefererUrl = decodeURIComponent(req.headers.referer); // (e.g., http://localhost:<port>/corsProxy/https://en.wikipedia.org/wiki/Engelbart) - const dashServerUrl = proxiedRefererUrl.match(/.*corsProxy\//)![0]; // the dash server url (e.g.: http://localhost:<port>/corsProxy/ ) - const actualReferUrl = proxiedRefererUrl.replace(dashServerUrl, ''); // the url of the referer without the proxy (e.g., : https://en.wikipedia.org/wiki/Engelbart) - const absoluteTargetBaseUrl = actualReferUrl.match(/https?:\/\/[^/]*/)![0]; // the base of the original url (e.g., https://en.wikipedia.org) - const redirectedProxiedUrl = dashServerUrl + encodeURIComponent(absoluteTargetBaseUrl + relativeUrl); // the new proxied full url (e.g., http://localhost:<port>/corsProxy/https://en.wikipedia.org/<somethingelse>) - const redirectUrl = relativeUrl.startsWith('//') ? 'http:' + relativeUrl : redirectedProxiedUrl; - res.redirect(redirectUrl); - } catch (e) { - console.log('Error embed: ', e); +function registerCorsProxy(server: express.Express) { + // .replace('<head>', '<head> <style>[id ^= "google"] { display: none; } </style>') + server.use('/corsproxy', async (req, res) => { + try { + // Extract URL from either query param or path + let targetUrl: string; + + if (req.query.url) { + // Case 1: URL passed as query parameter (/corsproxy?url=...) + targetUrl = req.query.url as string; + } else { + // Case 2: URL passed as path (/corsproxy/http://example.com) + const path = req.originalUrl.replace(/^\/corsproxy\/?/, ''); + targetUrl = decodeURIComponent(path); + + // Add protocol if missing (assuming https as default) + if (!targetUrl.startsWith('http://') && !targetUrl.startsWith('https://')) { + targetUrl = `https://${targetUrl}`; + } + } + + if (!targetUrl) { + res.send(`<html><body bgcolor="red" link="006666" alink="8B4513" vlink="006666"> + <title>Error</title> + <div align="center"><h1>Failed to load: ${targetUrl} </h1></div> + <p>URL is required</p> + </body></html>`); + // res.status(400).json({ error: 'URL is required' }); + return; } - } else if (relativeUrl.startsWith('/search') && !req.headers.referer?.includes('corsProxy')) { - // detect search query and use default search engine - res.redirect(req.headers.referer + 'corsProxy/' + encodeURIComponent('http://www.google.com' + relativeUrl)); - } else { - res.status(404).json({ error: 'no such file or endpoint: try /home /logout /login' }); - } - }); -} -// eslint-disable-next-line @typescript-eslint/no-explicit-any -function proxyServe(req: any, requrl: string, response: any) { - // eslint-disable-next-line @typescript-eslint/no-require-imports - const htmlBodyMemoryStream = new (require('memorystream'))(); - let wasinBrFormat = false; - const sendModifiedBody = () => { - const header = response.headers['content-encoding']; - const refToCors = (match: string, tag: string, sym: string, href: string) => `${tag}=${sym + resolvedServerUrl}/corsProxy/${href + sym}`; - // const relpathToCors = (match: any, href: string, offset: any, string: any) => `="${resolvedServerUrl + '/corsProxy/' + decodeURIComponent(req.originalUrl.split('/corsProxy/')[1].match(/https?:\/\/[^\/]*/)?.[0] ?? '') + '/' + href}"`; - if (header) { + // Validate URL format try { - const bodyStream = htmlBodyMemoryStream.read(); - if (bodyStream) { - const htmlInputText = wasinBrFormat ? Buffer.from(brotli.decompress(bodyStream)) : header.includes('gzip') ? zlib.gunzipSync(bodyStream) : bodyStream; - const htmlText = htmlInputText - .toString('utf8') - .replace('<head>', '<head> <style>[id ^= "google"] { display: none; } </style>') - .replace(/(src|href)=(['"])(https?[^\n]*)\1/g, refToCors) // replace src or href='http(s)://...' or href="http(s)://.." - // .replace(/= *"\/([^"]*)"/g, relpathToCors) - .replace(/data-srcset="[^"]*"/g, '') - .replace(/srcset="[^"]*"/g, '') - .replace(/target="_blank"/g, ''); - response.send(header?.includes('gzip') ? zlib.gzipSync(htmlText) : htmlText); - } else { - req.pipe(request(requrl)) - // eslint-disable-next-line @typescript-eslint/no-explicit-any - .on('error', (e: any) => console.log('requrl ', e)) - .pipe(response) - // eslint-disable-next-line @typescript-eslint/no-explicit-any - .on('error', (e: any) => console.log('response pipe error', e)); - console.log('EMPTY body:' + req.url); - } + new URL(targetUrl); } catch (e) { - console.log('ERROR?: ', e); - } - } else { - req.pipe(htmlBodyMemoryStream) - // eslint-disable-next-line @typescript-eslint/no-explicit-any - .on('error', (e: any) => console.log('html body memorystream error', e)) - .pipe(response) - // eslint-disable-next-line @typescript-eslint/no-explicit-any - .on('error', (e: any) => console.log('html body memory stream response error', e)); - } - }; - const retrieveHTTPBody = () => { - // req.headers.cookie = ''; - req.pipe(request(requrl)) - // eslint-disable-next-line @typescript-eslint/no-explicit-any - .on('error', (e: any) => { - console.log(`CORS url error: ${requrl}`, e); - response.send(`<html><body bgcolor="red" link="006666" alink="8B4513" vlink="006666"> + res.send(`<html><body bgcolor="red" link="006666" alink="8B4513" vlink="006666"> <title>Error</title> - <div align="center"><h1>Failed to load: ${requrl} </h1></div> + <div align="center"><h1>Failed to load: ${targetUrl} </h1></div> <p>${e}</p> </body></html>`); - }) - // eslint-disable-next-line @typescript-eslint/no-explicit-any - .on('response', (res: any) => { - res.headers; - const headers = Object.keys(res.headers); - const headerCharRegex = /[^\t\x20-\x7e\x80-\xff]/; - headers.forEach(headerName => { - const header = res.headers[headerName]; - if (Array.isArray(header)) { - res.headers[headerName] = header.filter(h => !headerCharRegex.test(h)); - } else if (headerCharRegex.test(header || '')) { - delete res.headers[headerName]; - } else res.headers[headerName] = header; - if (headerName === 'content-encoding') { - wasinBrFormat = res.headers[headerName] === 'br'; - res.headers[headerName] = 'gzip'; - } + //res.status(400).json({ error: 'Invalid URL format' }); + return; + } + + const response = await axios.get(targetUrl as string, { + headers: { 'User-Agent': req.headers['user-agent'] || 'Mozilla/5.0' }, + responseType: 'text', + }); + + const baseUrl = new URL(targetUrl as string); + + if (response.headers['content-type']?.includes('text/html')) { + const dom = new JSDOM(response.data); + const document = dom.window.document; + + // Process all elements with href/src + const elements = document.querySelectorAll('[href],[src]'); + elements.forEach(elem => { + const attrs = []; + if (elem.hasAttribute('href')) attrs.push('href'); + if (elem.hasAttribute('src')) attrs.push('src'); + + attrs.forEach(attr => { + const originalUrl = elem.getAttribute(attr); + if (!originalUrl || originalUrl.startsWith('http://') || originalUrl.startsWith('https://') || originalUrl.startsWith('data:') || /^[a-z]+:/.test(originalUrl)) { + return; + } + + const resolvedUrl = new URL(originalUrl, baseUrl).toString(); + elem.setAttribute(attr, resolvedUrl); + }); }); - res.headers['x-permitted-cross-domain-policies'] = 'all'; - res.headers['x-frame-options'] = ''; - res.headers['content-security-policy'] = ''; - response.headers = response._headers = res.headers; - }) - .on('end', sendModifiedBody) - .pipe(htmlBodyMemoryStream) - // eslint-disable-next-line @typescript-eslint/no-explicit-any - .on('error', (e: any) => console.log('http body pipe error', e)); - }; - retrieveHTTPBody(); -} -function registerCorsProxy(server: express.Express) { - server.use('/corsProxy', async (req, res) => { - res.setHeader('Access-Control-Allow-Origin', '*'); - res.header('Access-Control-Allow-Methods', 'GET, PUT, PATCH, POST, DELETE'); - res.header('Access-Control-Allow-Headers', req.header('access-control-request-headers')); - const referer = req.headers.referer ? decodeURIComponent(req.headers.referer) : ''; - let requrlraw = decodeURIComponent(req.url.substring(1)); - const qsplit = requrlraw.split('?q='); - const newqsplit = requrlraw.split('&q='); - if (qsplit.length > 1 && newqsplit.length > 1) { - const lastq = newqsplit[newqsplit.length - 1]; - requrlraw = qsplit[0] + '?q=' + lastq.split('&')[0] + '&' + qsplit[1].split('&')[1]; - } - const requrl = requrlraw.startsWith('/') ? referer + requrlraw : requrlraw; - // cors weirdness here... - // if the referer is a cors page and the cors() route (I think) redirected to /corsProxy/<path> and the requested url path was relative, - // then we redirect again to the cors referer and just add the relative path. - if (!requrl.startsWith('http') && req.originalUrl.startsWith('/corsProxy') && referer?.includes('corsProxy')) { - res.redirect(referer + (referer.endsWith('/') ? '' : '/') + requrl); - } else { - proxyServe(req, requrl, res); + // Handle base tag + const baseTags = document.querySelectorAll('base'); + baseTags.forEach(tag => tag.remove()); + + const newBase = document.createElement('base'); + newBase.setAttribute('href', `${baseUrl}/`); + document.head.insertBefore(newBase, document.head.firstChild); + + response.data = dom.serialize(); + } + + res.set({ + 'Access-Control-Allow-Origin': '*', + 'Content-Type': response.headers['content-type'], + }).send(response.data); + } catch (error: unknown) { + res.status(500).json({ error: 'Proxy error', details: (error as { message: string }).message }); } }); } @@ -255,13 +207,11 @@ export default async function InitializeServer(routeSetter: RouteSetter) { app.use(whm(compiler)); app.get(/^\/+$/, (req, res) => res.redirect(req.user ? '/home' : '/login')); // target urls that consist of one or more '/'s with nothing in between app.use(express.static(publicDirectory, { setHeaders: res => res.setHeader('Access-Control-Allow-Origin', '*') })); // all urls that start with dash's public directory: /files/ (e.g., /files/images, /files/audio, etc) - // eslint-disable-next-line @typescript-eslint/no-explicit-any - app.use(cors({ origin: (_origin: any, callback: any) => callback(null, true) })); + // app.use(cors({ origin: (_origin: any, callback: any) => callback(null, true) })); registerAuthenticationRoutes(app); // this adds routes to authenticate a user (login, etc) - registerCorsProxy(app); // this adds a /corsProxy/ route to allow clients to get to urls that would otherwise be blocked by cors policies + registerCorsProxy(app); // this adds a /corsproxy/ route to allow clients to get to urls that would otherwise be blocked by cors policies isRelease && !SSL.Loaded && SSL.exit(); routeSetter(new RouteManager(app, isRelease)); // this sets up all the regular supervised routes (things like /home, download/upload api's, pdf, search, session, etc) - registerEmbeddedBrowseRelativePathHandler(app); // this allows renered web pages which internally have relative paths to find their content isRelease && process.env.serverPort && (resolvedPorts.server = Number(process.env.serverPort)); const server = isRelease ? createServer(SSL.Credentials, app) : app; await new Promise<void>(resolve => { |