diff options
author | Tyler Schicke <tyler_schicke@brown.edu> | 2019-07-08 18:06:38 -0400 |
---|---|---|
committer | Tyler Schicke <tyler_schicke@brown.edu> | 2019-07-08 18:06:38 -0400 |
commit | be993c7bfd441a3a7d581371337c94fe692f0c65 (patch) | |
tree | 8e0f2a70d082327744ba885d922da4fd73fcf640 | |
parent | b2dc1c1d5e31a6241851d418db3a64ae6ca4dde8 (diff) |
Added some stuff to GC and sped it up
-rw-r--r-- | src/client/views/nodes/FormattedTextBox.tsx | 5 | ||||
-rw-r--r-- | src/server/GarbageCollector.ts | 108 | ||||
-rw-r--r-- | src/server/GarbageColletor.ts | 51 |
3 files changed, 110 insertions, 54 deletions
diff --git a/src/client/views/nodes/FormattedTextBox.tsx b/src/client/views/nodes/FormattedTextBox.tsx index bf6f4c764..af11e1524 100644 --- a/src/client/views/nodes/FormattedTextBox.tsx +++ b/src/client/views/nodes/FormattedTextBox.tsx @@ -89,9 +89,8 @@ export class FormattedTextBox extends DocComponent<(FieldViewProps & FormattedTe } public static GetDocFromUrl(url: string) { if (url.startsWith(document.location.origin)) { - let start = url.indexOf(window.location.origin); - let path = url.substr(start, url.length - start); - let docid = path.replace(DocServer.prepend("/doc/"), "").split("?")[0]; + const split = new URL(url).pathname.split("doc/"); + const docid = split[split.length - 1]; return docid; } return ""; diff --git a/src/server/GarbageCollector.ts b/src/server/GarbageCollector.ts new file mode 100644 index 000000000..fa182af40 --- /dev/null +++ b/src/server/GarbageCollector.ts @@ -0,0 +1,108 @@ +import { Database } from './database'; + +import * as path from 'path'; +import * as fs from 'fs'; + +function addDoc(doc: any, ids: string[], files: { [name: string]: string[] }) { + for (const key in doc) { + if (!doc.hasOwnProperty(key)) { + continue; + } + const field = doc[key]; + if (field === undefined || field === null) { + continue; + } + if (field.__type === "proxy") { + ids.push(field.fieldId); + } else if (field.__type === "list") { + addDoc(field.fields, ids, files); + } else if (typeof field === "string") { + const re = /"(?:dataD|d)ocumentId"\s*:\s*"([\w\-]*)"/g; + let match: string[] | null; + while ((match = re.exec(field)) !== null) { + ids.push(match[1]); + } + } else if (field.__type === "RichTextField") { + const re = /"href"\s*:\s*"(.*?)"/g; + let match: string[] | null; + while ((match = re.exec(field.Data)) !== null) { + const urlString = match[1]; + const split = new URL(urlString).pathname.split("doc/"); + if (split.length > 1) { + ids.push(split[split.length - 1]); + } + } + } else if (["audio", "image", "video", "pdf", "web"].includes(field.__type)) { + const url = new URL(field.url); + const pathname = url.pathname; + const ext = path.extname(pathname); + const fileName = path.basename(pathname, ext); + let exts = files[fileName]; + if (!exts) { + files[fileName] = exts = []; + } + exts.push(ext); + } + } +} + +async function GarbageCollect() { + // await new Promise(res => setTimeout(res, 3000)); + const cursor = await Database.Instance.query({}, 'users'); + const users = await cursor.toArray(); + const ids: string[] = users.map(user => user.userDocumentId); + const visited = new Set<string>(); + const files: { [name: string]: string[] } = {}; + + while (ids.length) { + const count = Math.min(ids.length, 100); + const index = ids.length - count; + const fetchIds = ids.splice(index, count).filter(id => !visited.has(id)); + if (!fetchIds.length) { + continue; + } + const docs = await new Promise<{ [key: string]: any }[]>(res => Database.Instance.getDocuments(fetchIds, res, "newDocuments")); + for (const doc of docs) { + const id = doc.id; + if (doc === undefined) { + console.log(`Couldn't find field with Id ${id}`); + continue; + } + visited.add(id); + addDoc(doc.fields, ids, files); + } + console.log(`To Go: ${ids.length}, visited: ${visited.size}`); + } + + console.log(`Done: ${visited.size}`); + + cursor.close(); + + const toDeleteCursor = await Database.Instance.query({ _id: { $nin: Array.from(visited) } }); + const toDelete = (await toDeleteCursor.toArray()).map(doc => doc._id); + toDeleteCursor.close(); + const result = await Database.Instance.delete({ _id: { $in: toDelete } }, "newDocuments"); + console.log(`${result.deletedCount} documents deleted`); + + const folder = "./src/server/public/files/"; + fs.readdir(folder, (_, fileList) => { + const filesToDelete = fileList.filter(file => { + const ext = path.extname(file); + let base = path.basename(file, ext); + const existsInDb = (base in files || (base = base.substring(0, base.length - 2)) in files) && files[base].includes(ext); + return file !== ".gitignore" && !existsInDb; + }); + console.log(`Deleting ${filesToDelete.length} files`); + filesToDelete.forEach(file => { + console.log(`Deleting file ${file}`); + try { + fs.unlinkSync(folder + file); + } catch { + console.warn(`Couldn't delete file ${file}`); + } + }); + console.log(`Deleted ${filesToDelete.length} files`); + }); +} + +GarbageCollect(); diff --git a/src/server/GarbageColletor.ts b/src/server/GarbageColletor.ts deleted file mode 100644 index f26b0cec6..000000000 --- a/src/server/GarbageColletor.ts +++ /dev/null @@ -1,51 +0,0 @@ -import { Database } from './database'; - -function addDoc(doc: any, ids: string[]) { - for (const key in doc) { - if (!doc.hasOwnProperty(key)) { - continue; - } - const field = doc[key]; - if (!(field instanceof Object)) { - continue; - } - if (field.__type === "proxy") { - ids.push(field.fieldId); - } else if (field.__type === "list") { - addDoc(field.fields, ids); - } - } -} - -async function GarbageCollect() { - // await new Promise(res => setTimeout(res, 3000)); - const cursor = await Database.Instance.query({}, 'users'); - const users = await cursor.toArray(); - const ids: string[] = users.map(user => user.userDocumentId); - const visited = new Set<string>(); - - while (ids.length) { - const id = ids.pop()!; - if (visited.has(id)) continue; - const doc = await new Promise<{ [key: string]: any }>(res => Database.Instance.getDocument(id, res, "newDocuments")); - if (doc === undefined) { - console.log(`Couldn't find field with Id ${id}`); - continue; - } - visited.add(id); - addDoc(doc.fields, ids); - console.log(`To Go: ${ids.length}, visited: ${visited.size}`); - } - - console.log(`Done: ${visited.size}`); - - cursor.close(); - - const toDeleteCursor = await Database.Instance.query({ _id: { $nin: Array.from(visited) } }); - const toDelete = (await toDeleteCursor.toArray()).map(doc => doc._id); - toDeleteCursor.close(); - const result = await Database.Instance.delete({ _id: { $in: toDelete } }, "newDocuments"); - console.log(`${result.deletedCount} documents deleted`); -} - -GarbageCollect(); |