diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/client/DocServer.ts | 11 | ||||
-rw-r--r-- | src/client/views/nodes/imageEditor/ImageEditor.tsx | 4 | ||||
-rw-r--r-- | src/fields/ObjectField.ts | 6 | ||||
-rw-r--r-- | src/server/ApiManagers/AssistantManager.ts | 5 | ||||
-rw-r--r-- | src/server/ApiManagers/DownloadManager.ts | 262 | ||||
-rw-r--r-- | src/server/ApiManagers/UploadManager.ts | 21 | ||||
-rw-r--r-- | src/server/chunker/requirements.txt | 37 | ||||
-rw-r--r-- | src/server/index.ts | 2 |
8 files changed, 52 insertions, 296 deletions
diff --git a/src/client/DocServer.ts b/src/client/DocServer.ts index c644308b7..e3c4609c0 100644 --- a/src/client/DocServer.ts +++ b/src/client/DocServer.ts @@ -277,11 +277,11 @@ export namespace DocServer { const fetchDocPromises: Map<string, Promise<Opt<Doc>>> = new Map(); // { p: Promise<Doc>; id: string }[] = []; // promises to fetch the value for a requested Doc // Determine which requested documents need to be fetched - // eslint-disable-next-line no-restricted-syntax for (const id of ids.filter(filterid => filterid)) { if (_cache[id] === undefined) { // EMPTY CACHE - make promise that we resolve after all batch-requested Docs have been fetched and deserialized and we know we have this Doc - const fetchPromise = new Promise<Opt<Doc>>(res => + // eslint-disable-next-line no-loop-func + _cache[id] = new Promise<Opt<Doc>>(res => allCachesFilledPromise.then(() => { // if all Docs have been cached, then we can be sure the fetched Doc has been found and cached. So return it to anyone who had been awaiting it. const cache = _cache[id]; @@ -289,8 +289,7 @@ export namespace DocServer { res(cache instanceof Doc ? cache : undefined); }) ); - // eslint-disable-next-line no-loop-func - fetchDocPromises.set(id, (_cache[id] = fetchPromise)); + fetchDocPromises.set(id, _cache[id]); uncachedRequestedIds.push(id); // add to list of Doc requests from server } // else CACHED => do nothing, Doc or promise of Doc is already in cache @@ -307,11 +306,11 @@ export namespace DocServer { let processed = 0; console.log('Retrieved ' + serializedFields.length + ' fields'); // After the serialized Docs have been received, deserialize them into objects. - // eslint-disable-next-line no-restricted-syntax for (const field of serializedFields) { - // eslint-disable-next-line no-await-in-loop ++processed % 150 === 0 && + // eslint-disable-next-line no-await-in-loop (await new Promise<number>( + // eslint-disable-next-line no-loop-func res => setTimeout(action(() => res(FieldLoader.ServerLoadStatus.retrieved = processed))) // prettier-ignore )); // force loading to yield to splash screen rendering to update progress diff --git a/src/client/views/nodes/imageEditor/ImageEditor.tsx b/src/client/views/nodes/imageEditor/ImageEditor.tsx index 198b8e713..abe235ad5 100644 --- a/src/client/views/nodes/imageEditor/ImageEditor.tsx +++ b/src/client/views/nodes/imageEditor/ImageEditor.tsx @@ -286,8 +286,8 @@ const ImageEditor = ({ imageEditorOpen, imageEditorSource, imageRootDoc, addDoc const maskBlob = await ImageUtility.canvasToBlob(canvasMask); const imgBlob = await ImageUtility.canvasToBlob(canvasOriginalImg); const res = await ImageUtility.getEdit(imgBlob, maskBlob, input || 'Fill in the image in the same style', 2); - if ((res as any).status == 'error') { - alert((res as any).message); + if (res.status == 'error') { + alert(res.message); } // create first image diff --git a/src/fields/ObjectField.ts b/src/fields/ObjectField.ts index c533cb596..1e1dbcd62 100644 --- a/src/fields/ObjectField.ts +++ b/src/fields/ObjectField.ts @@ -3,10 +3,10 @@ import { Copy, FieldChanged, Parent, ToJavascriptString, ToScriptString, ToStrin import { RefField } from './RefField'; export type serializedFieldType = { fieldId: string; heading?: string; __type: string }; -export type serializedFieldsType = { [key: string]: { fields: serializedFieldType[] } }; +export type serializedFieldsType = { [key: string]: { fields: serializedFieldType[] } | string }; export interface serializedDoctype { readonly id: string; - readonly fields?: serializedFieldsType; + readonly fields?: serializedFieldsType | serializedFieldType; } export type serverOpType = { @@ -18,12 +18,10 @@ export type serverOpType = { export abstract class ObjectField { // prettier-ignore public [FieldChanged]?: (diff?: { op: '$addToSet' | '$remFromSet' | '$set'; - // eslint-disable-next-line no-use-before-define items: FieldType[] | undefined; length: number | undefined; hint?: { deleteCount: number, start: number} }, serverOp?: serverOpType) => void; - // eslint-disable-next-line no-use-before-define public [Parent]?: RefField | ObjectField; abstract [Copy](): ObjectField; diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 20ec0dfe6..b917f555c 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -532,7 +532,7 @@ export default class AssistantManager extends ApiManager { try { // Read the file data and encode it as base64 - const file_data: string = fs.readFileSync(public_path, { encoding: 'base64' }); + const file_data = fs.readFileSync(public_path, { encoding: 'base64' }); // Generate a unique job ID for tracking const jobId = uuid.v4(); @@ -781,7 +781,8 @@ function spawnPythonProcess(jobId: string, file_path: string) { console.log('Virtual environment not found. Creating and setting up...'); // Create venv - const createVenvProcess = spawn('python', ['-m', 'venv', venvPath]); + // const createVenvProcess = spawn('python', ['-m', 'venv', venvPath]); + const createVenvProcess = spawn('python3.10', ['-m', 'venv', venvPath]); createVenvProcess.on('close', code => { if (code !== 0) { diff --git a/src/server/ApiManagers/DownloadManager.ts b/src/server/ApiManagers/DownloadManager.ts deleted file mode 100644 index 5ee21fb44..000000000 --- a/src/server/ApiManagers/DownloadManager.ts +++ /dev/null @@ -1,262 +0,0 @@ -import * as Archiver from 'archiver'; -import * as express from 'express'; -import * as path from 'path'; -import { URL } from 'url'; -import { DashUploadUtils, SizeSuffix } from '../DashUploadUtils'; -import { Method } from '../RouteManager'; -import RouteSubscriber from '../RouteSubscriber'; -import { Directory, publicDirectory, serverPathToFile } from '../SocketData'; -import { Database } from '../database'; -import ApiManager, { Registration } from './ApiManager'; - -export type Hierarchy = { [id: string]: string | Hierarchy }; -export type ZipMutator = (file: Archiver.Archiver) => void | Promise<void>; -export interface DocumentElements { - data: string | any[]; - title: string; -} - -/** - * This is a very specific utility method to help traverse the database - * to parse data and titles out of images and collections alone. - * - * We don't know if the document id given to is corresponds to a view document or a data - * document. If it's a data document, the response from the database will have - * a data field. If not, call recursively on the proto, and resolve with *its* data - * - * @param targetId the id of the Dash document whose data is being requests - * @returns the data of the document, as well as its title - */ -async function getData(targetId: string): Promise<DocumentElements> { - return new Promise<DocumentElements>((resolve, reject) => { - Database.Instance.getDocument(targetId, async (result: any) => { - const { data, proto, title } = result.fields; - if (data) { - if (data.url) { - resolve({ data: data.url, title }); - } else if (data.fields) { - resolve({ data: data.fields, title }); - } else { - reject(); - } - } else if (proto) { - getData(proto.fieldId).then(resolve, reject); - } else { - reject(); - } - }); - }); -} - -/** - * This function starts with a single document id as a seed, - * typically that of a collection, and then descends the entire tree - * of image or collection documents that are reachable from that seed. - * @param seedId the id of the root of the subtree we're trying to capture, interesting only if it's a collection - * @param hierarchy the data structure we're going to use to record the nesting of the collections and images as we descend - -Below is an example of the JSON hierarchy built from two images contained inside a collection titled 'a nested collection', -following the general recursive structure shown immediately below -{ - "parent folder name":{ - "first child's fild name":"first child's url" - ... - "nth child's fild name":"nth child's url" - } -} -{ - "a nested collection (865c4734-c036-4d67-a588-c71bb43d1440)":{ - "an image of a cat (ace99ffd-8ed8-4026-a5d5-a353fff57bdd).jpg":"https://upload.wikimedia.org/wikipedia/commons/thumb/3/3a/Cat03.jpg/1200px-Cat03.jpg", - "1*SGJw31T5Q9Zfsk24l2yirg.gif (9321cc9b-9b3e-4cb6-b99c-b7e667340f05).gif":"https://cdn-media-1.freecodecamp.org/images/1*SGJw31T5Q9Zfsk24l2yirg.gif" - } -} -*/ -async function buildHierarchyRecursive(seedId: string, hierarchy: Hierarchy): Promise<void> { - const { title, data } = await getData(seedId); - const label = `${title} (${seedId})`; - // is the document a collection? - if (Array.isArray(data)) { - // recurse over all documents in the collection. - const local: Hierarchy = {}; // create a child hierarchy for this level, which will get passed in as the parent of the recursive call - hierarchy[label] = local; // store it at the index in the parent, so we'll end up with a map of maps of maps - await Promise.all(data.map(proxy => buildHierarchyRecursive(proxy.fieldId, local))); - } else { - // now, data can only be a string, namely the url of the image - const filename = label + path.extname(data); // this is the file name under which the output image will be stored - hierarchy[filename] = data; - } -} - -/** - * This utility function factors out the process - * of creating a zip file and sending it back to the client - * by piping it into a response. - * - * Learn more about piping and readable / writable streams here! - * https://www.freecodecamp.org/news/node-js-streams-everything-you-need-to-know-c9141306be93/ - * - * @param res the writable stream response object that will transfer the generated zip file - * @param mutator the callback function used to actually modify and insert information into the zip instance - */ -export async function BuildAndDispatchZip(res: express.Response, mutator: ZipMutator): Promise<void> { - res.set('Content-disposition', `attachment;`); - res.set('Content-Type', 'application/zip'); - const zip = Archiver('zip'); - zip.pipe(res); - await mutator(zip); - return zip.finalize(); -} - -/** - * - * @param file the zip file to which we write the files - * @param hierarchy the data structure from which we read, defining the nesting of the documents in the zip - * @param prefix lets us create nested folders in the zip file by continually appending to the end - * of the prefix with each layer of recursion. - * - * Function Call #1 => "Dash Export" - * Function Call #2 => "Dash Export/a nested collection" - * Function Call #3 => "Dash Export/a nested collection/lowest level collection" - * ... - */ -async function writeHierarchyRecursive(file: Archiver.Archiver, hierarchy: Hierarchy, prefix = 'Dash Export'): Promise<void> { - // eslint-disable-next-line no-restricted-syntax - for (const documentTitle in hierarchy) { - if (Object.prototype.hasOwnProperty.call(hierarchy, documentTitle)) { - const result = hierarchy[documentTitle]; - // base case or leaf node, we've hit a url (image) - if (typeof result === 'string') { - let fPath: string; - const matches = /:\d+\/files\/images\/(upload_[\da-z]{32}.*)/g.exec(result); - if (matches !== null) { - // image already exists on our server - fPath = serverPathToFile(Directory.images, matches[1]); - } else { - // the image doesn't already exist on our server (may have been dragged - // and dropped in the browser and thus hosted remotely) so we upload it - // to our server and point the zip file to it, so it can bundle up the bytes - // eslint-disable-next-line no-await-in-loop - const information = await DashUploadUtils.UploadImage(result); - fPath = information instanceof Error ? '' : information.accessPaths[SizeSuffix.Original].server; - } - // write the file specified by the path to the directory in the - // zip file given by the prefix. - if (fPath) { - file.file(fPath, { name: documentTitle, prefix }); - } - } else { - // we've hit a collection, so we have to recurse - // eslint-disable-next-line no-await-in-loop - await writeHierarchyRecursive(file, result, `${prefix}/${documentTitle}`); - } - } - } -} - -async function getDocs(docId: string) { - const files = new Set<string>(); - const docs: { [id: string]: any } = {}; - const fn = (doc: any): string[] => { - const { id } = doc; - if (typeof id === 'string' && id.endsWith('Proto')) { - // Skip protos - return []; - } - const ids: string[] = []; - // eslint-disable-next-line no-restricted-syntax - for (const key in doc.fields) { - // eslint-disable-next-line no-continue - if (!Object.prototype.hasOwnProperty.call(doc.fields, key)) continue; - - const field = doc.fields[key]; - // eslint-disable-next-line no-continue - if (field === undefined || field === null) continue; - - if (field.__type === 'proxy' || field.__type === 'prefetch_proxy') { - ids.push(field.fieldId); - } else if (field.__type === 'script' || field.__type === 'computed') { - field.captures && ids.push(field.captures.fieldId); - } else if (field.__type === 'list') { - ids.push(...fn(field)); - } else if (typeof field === 'string') { - const re = /"(?:dataD|d)ocumentId"\s*:\s*"([\w-]*)"/g; - for (let match = re.exec(field); match !== null; match = re.exec(field)) { - ids.push(match[1]); - } - } else if (field.__type === 'RichTextField') { - const re = /"href"\s*:\s*"(.*?)"/g; - for (let match = re.exec(field.data); match !== null; match = re.exec(field.Data)) { - const urlString = match[1]; - const split = new URL(urlString).pathname.split('doc/'); - if (split.length > 1) { - ids.push(split[split.length - 1]); - } - } - const re2 = /"src"\s*:\s*"(.*?)"/g; - for (let match = re2.exec(field.Data); match !== null; match = re2.exec(field.Data)) { - const urlString = match[1]; - const { pathname } = new URL(urlString); - files.add(pathname); - } - } else if (['audio', 'image', 'video', 'pdf', 'web', 'map'].includes(field.__type)) { - const { pathname } = new URL(field.url); - files.add(pathname); - } - } - - if (doc.id) { - docs[doc.id] = doc; - } - return ids; - }; - await Database.Instance.visit([docId], fn); - return { id: docId, docs, files }; -} - -export default class DownloadManager extends ApiManager { - protected initialize(register: Registration): void { - /** - * Let's say someone's using Dash to organize images in collections. - * This lets them export the hierarchy they've built to their - * own file system in a useful format. - * - * This handler starts with a single document id (interesting only - * if it's that of a collection). It traverses the database, captures - * the nesting of only nested images or collections, writes - * that to a zip file and returns it to the client for download. - */ - register({ - method: Method.GET, - subscription: new RouteSubscriber('imageHierarchyExport').add('docId'), - secureHandler: async ({ req, res }) => { - const id = req.params.docId; - const hierarchy: Hierarchy = {}; - await buildHierarchyRecursive(id, hierarchy); - return BuildAndDispatchZip(res, zip => writeHierarchyRecursive(zip, hierarchy)); - }, - }); - - register({ - method: Method.GET, - subscription: new RouteSubscriber('downloadId').add('docId'), - secureHandler: async ({ req, res }) => - BuildAndDispatchZip(res, async zip => { - const { id, docs, files } = await getDocs(req.params.docId); - const docString = JSON.stringify({ id, docs }); - zip.append(docString, { name: 'doc.json' }); - files.forEach(val => { - zip.file(publicDirectory + val, { name: val.substring(1) }); - }); - }), - }); - - register({ - method: Method.GET, - subscription: new RouteSubscriber('serializeDoc').add('docId'), - secureHandler: async ({ req, res }) => { - const { docs, files } = await getDocs(req.params.docId); - res.send({ docs, files: Array.from(files) }); - }, - }); - } -} diff --git a/src/server/ApiManagers/UploadManager.ts b/src/server/ApiManagers/UploadManager.ts index 1e68a4e30..5e527281f 100644 --- a/src/server/ApiManagers/UploadManager.ts +++ b/src/server/ApiManagers/UploadManager.ts @@ -131,6 +131,9 @@ export default class UploadManager extends ApiManager { }, }); + type fieldstype = string | { __type: string; Data: string } | { __type: string; id: string; fieldId: string; fields: fieldstype[]; captures: { fieldId: string } }; + type doctype = { id: string; fields: fieldstype[] }; + register({ method: Method.POST, subscription: '/uploadDoc', @@ -145,7 +148,7 @@ export default class UploadManager extends ApiManager { ids[id] = uuid.v4(); return ids[id]; }; - const mapFn = (docIn: { id: string; fields: any[] }) => { + const mapFn = (docIn: doctype) => { const doc = docIn; if (doc.id) { doc.id = getId(doc.id); @@ -156,22 +159,20 @@ export default class UploadManager extends ApiManager { const field = doc.fields[key]; if (field === undefined || field === null) continue; - if (field.__type === 'Doc') { - mapFn(field); + if (typeof field === 'string') { + const re = /("(?:dataD|d)ocumentId"\s*:\s*")([\w-]*)"/g; + doc.fields[key] = field.replace(re, (match: string, p1: string, p2: string) => `${p1}${getId(p2)}"`); + } else if ('Data' in field) { + const re = /("href"\s*:\s*")(.*?)"/g; + field.Data = field.Data.replace(re, (match: string, p1: string, p2: string) => `${p1}${getId(p2)}"`); } else if (field.__type === 'proxy' || field.__type === 'prefetch_proxy') { field.fieldId = getId(field.fieldId); } else if (field.__type === 'script' || field.__type === 'computed') { if (field.captures) { field.captures.fieldId = getId(field.captures.fieldId); } - } else if (field.__type === 'list') { + } else if (field.__type === 'list' || field.__type === 'Doc') { mapFn(field); - } else if (typeof field === 'string') { - const re = /("(?:dataD|d)ocumentId"\s*:\s*")([\w-]*)"/g; - doc.fields[key] = field.replace(re, (match: string, p1: string, p2: string) => `${p1}${getId(p2)}"`); - } else if (field.__type === 'RichTextField') { - const re = /("href"\s*:\s*")(.*?)"/g; - field.Data = field.Data.replace(re, (match: string, p1: string, p2: string) => `${p1}${getId(p2)}"`); } } }; diff --git a/src/server/chunker/requirements.txt b/src/server/chunker/requirements.txt index 20bd486e5..586bbe505 100644 --- a/src/server/chunker/requirements.txt +++ b/src/server/chunker/requirements.txt @@ -1,15 +1,36 @@ +# Prefer official CPU wheels from the PyTorch index +--extra-index-url https://download.pytorch.org/whl/cpu + +############################################################################### +# Stable env for pdf_chunker.py # +############################################################################### + +# ─── LLM clients ───────────────────────────────────────────────────────────── +openai==1.40.6 +httpx==0.27.2 # <0.28 → avoids "proxies=" crash anthropic==0.34.0 cohere==5.8.0 -python-dotenv==1.0.1 + +# ─── Torch stack (CPU) ─────────────────────────────────────────────────────── +torch<=2.7.1 +torchvision<=0.22.1 # matches torch 2.5.x +torchaudio<=2.7.1 + +# ─── Vision / OCR / PDF processing ─────────────────────────────────────────── +ultralyticsplus==0.0.28 +easyocr==1.7.0 pymupdf==1.22.2 -lxml==5.3.0 +PyPDF2==3.0.1 +pytesseract==0.3.10 +Pillow==10.4.0 layoutparser==0.3.4 +lxml==5.3.0 + +# ─── ML / maths ────────────────────────────────────────────────────────────── numpy==1.26.4 -openai==1.40.6 -Pillow==10.4.0 -pytesseract==0.3.10 -PyPDF2==3.0.1 scikit-learn==1.5.1 + +# ─── Utilities ────────────────────────────────────────────────────────────── tqdm==4.66.5 -ultralyticsplus==0.0.28 -easyocr==1.7.0
\ No newline at end of file +python-dotenv==1.0.1 +packaging==24.0
\ No newline at end of file diff --git a/src/server/index.ts b/src/server/index.ts index 3b77359ec..eb9bbaa2d 100644 --- a/src/server/index.ts +++ b/src/server/index.ts @@ -7,7 +7,6 @@ import AssistantManager from './ApiManagers/AssistantManager'; import FlashcardManager from './ApiManagers/FlashcardManager'; import DataVizManager from './ApiManagers/DataVizManager'; import DeleteManager from './ApiManagers/DeleteManager'; -import DownloadManager from './ApiManagers/DownloadManager'; import FireflyManager from './ApiManagers/FireflyManager'; import GeneralGoogleManager from './ApiManagers/GeneralGoogleManager'; import SessionManager from './ApiManagers/SessionManager'; @@ -67,7 +66,6 @@ function routeSetter({ addSupervisedRoute, logRegistrationOutcome }: RouteManage new SessionManager(), new UserManager(), new UploadManager(), - new DownloadManager(), new DeleteManager(), new UtilManager(), new GeneralGoogleManager(), |