aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/client/views/nodes/FormattedTextBox.tsx5
-rw-r--r--src/server/GarbageCollector.ts108
-rw-r--r--src/server/GarbageColletor.ts51
3 files changed, 110 insertions, 54 deletions
diff --git a/src/client/views/nodes/FormattedTextBox.tsx b/src/client/views/nodes/FormattedTextBox.tsx
index bf6f4c764..af11e1524 100644
--- a/src/client/views/nodes/FormattedTextBox.tsx
+++ b/src/client/views/nodes/FormattedTextBox.tsx
@@ -89,9 +89,8 @@ export class FormattedTextBox extends DocComponent<(FieldViewProps & FormattedTe
}
public static GetDocFromUrl(url: string) {
if (url.startsWith(document.location.origin)) {
- let start = url.indexOf(window.location.origin);
- let path = url.substr(start, url.length - start);
- let docid = path.replace(DocServer.prepend("/doc/"), "").split("?")[0];
+ const split = new URL(url).pathname.split("doc/");
+ const docid = split[split.length - 1];
return docid;
}
return "";
diff --git a/src/server/GarbageCollector.ts b/src/server/GarbageCollector.ts
new file mode 100644
index 000000000..fa182af40
--- /dev/null
+++ b/src/server/GarbageCollector.ts
@@ -0,0 +1,108 @@
+import { Database } from './database';
+
+import * as path from 'path';
+import * as fs from 'fs';
+
+function addDoc(doc: any, ids: string[], files: { [name: string]: string[] }) {
+ for (const key in doc) {
+ if (!doc.hasOwnProperty(key)) {
+ continue;
+ }
+ const field = doc[key];
+ if (field === undefined || field === null) {
+ continue;
+ }
+ if (field.__type === "proxy") {
+ ids.push(field.fieldId);
+ } else if (field.__type === "list") {
+ addDoc(field.fields, ids, files);
+ } else if (typeof field === "string") {
+ const re = /"(?:dataD|d)ocumentId"\s*:\s*"([\w\-]*)"/g;
+ let match: string[] | null;
+ while ((match = re.exec(field)) !== null) {
+ ids.push(match[1]);
+ }
+ } else if (field.__type === "RichTextField") {
+ const re = /"href"\s*:\s*"(.*?)"/g;
+ let match: string[] | null;
+ while ((match = re.exec(field.Data)) !== null) {
+ const urlString = match[1];
+ const split = new URL(urlString).pathname.split("doc/");
+ if (split.length > 1) {
+ ids.push(split[split.length - 1]);
+ }
+ }
+ } else if (["audio", "image", "video", "pdf", "web"].includes(field.__type)) {
+ const url = new URL(field.url);
+ const pathname = url.pathname;
+ const ext = path.extname(pathname);
+ const fileName = path.basename(pathname, ext);
+ let exts = files[fileName];
+ if (!exts) {
+ files[fileName] = exts = [];
+ }
+ exts.push(ext);
+ }
+ }
+}
+
+async function GarbageCollect() {
+ // await new Promise(res => setTimeout(res, 3000));
+ const cursor = await Database.Instance.query({}, 'users');
+ const users = await cursor.toArray();
+ const ids: string[] = users.map(user => user.userDocumentId);
+ const visited = new Set<string>();
+ const files: { [name: string]: string[] } = {};
+
+ while (ids.length) {
+ const count = Math.min(ids.length, 100);
+ const index = ids.length - count;
+ const fetchIds = ids.splice(index, count).filter(id => !visited.has(id));
+ if (!fetchIds.length) {
+ continue;
+ }
+ const docs = await new Promise<{ [key: string]: any }[]>(res => Database.Instance.getDocuments(fetchIds, res, "newDocuments"));
+ for (const doc of docs) {
+ const id = doc.id;
+ if (doc === undefined) {
+ console.log(`Couldn't find field with Id ${id}`);
+ continue;
+ }
+ visited.add(id);
+ addDoc(doc.fields, ids, files);
+ }
+ console.log(`To Go: ${ids.length}, visited: ${visited.size}`);
+ }
+
+ console.log(`Done: ${visited.size}`);
+
+ cursor.close();
+
+ const toDeleteCursor = await Database.Instance.query({ _id: { $nin: Array.from(visited) } });
+ const toDelete = (await toDeleteCursor.toArray()).map(doc => doc._id);
+ toDeleteCursor.close();
+ const result = await Database.Instance.delete({ _id: { $in: toDelete } }, "newDocuments");
+ console.log(`${result.deletedCount} documents deleted`);
+
+ const folder = "./src/server/public/files/";
+ fs.readdir(folder, (_, fileList) => {
+ const filesToDelete = fileList.filter(file => {
+ const ext = path.extname(file);
+ let base = path.basename(file, ext);
+ const existsInDb = (base in files || (base = base.substring(0, base.length - 2)) in files) && files[base].includes(ext);
+ return file !== ".gitignore" && !existsInDb;
+ });
+ console.log(`Deleting ${filesToDelete.length} files`);
+ filesToDelete.forEach(file => {
+ console.log(`Deleting file ${file}`);
+ try {
+ fs.unlinkSync(folder + file);
+ } catch {
+ console.warn(`Couldn't delete file ${file}`);
+ }
+ });
+ console.log(`Deleted ${filesToDelete.length} files`);
+ });
+}
+
+GarbageCollect();
diff --git a/src/server/GarbageColletor.ts b/src/server/GarbageColletor.ts
deleted file mode 100644
index f26b0cec6..000000000
--- a/src/server/GarbageColletor.ts
+++ /dev/null
@@ -1,51 +0,0 @@
-import { Database } from './database';
-
-function addDoc(doc: any, ids: string[]) {
- for (const key in doc) {
- if (!doc.hasOwnProperty(key)) {
- continue;
- }
- const field = doc[key];
- if (!(field instanceof Object)) {
- continue;
- }
- if (field.__type === "proxy") {
- ids.push(field.fieldId);
- } else if (field.__type === "list") {
- addDoc(field.fields, ids);
- }
- }
-}
-
-async function GarbageCollect() {
- // await new Promise(res => setTimeout(res, 3000));
- const cursor = await Database.Instance.query({}, 'users');
- const users = await cursor.toArray();
- const ids: string[] = users.map(user => user.userDocumentId);
- const visited = new Set<string>();
-
- while (ids.length) {
- const id = ids.pop()!;
- if (visited.has(id)) continue;
- const doc = await new Promise<{ [key: string]: any }>(res => Database.Instance.getDocument(id, res, "newDocuments"));
- if (doc === undefined) {
- console.log(`Couldn't find field with Id ${id}`);
- continue;
- }
- visited.add(id);
- addDoc(doc.fields, ids);
- console.log(`To Go: ${ids.length}, visited: ${visited.size}`);
- }
-
- console.log(`Done: ${visited.size}`);
-
- cursor.close();
-
- const toDeleteCursor = await Database.Instance.query({ _id: { $nin: Array.from(visited) } });
- const toDelete = (await toDeleteCursor.toArray()).map(doc => doc._id);
- toDeleteCursor.close();
- const result = await Database.Instance.delete({ _id: { $in: toDelete } }, "newDocuments");
- console.log(`${result.deletedCount} documents deleted`);
-}
-
-GarbageCollect();