aboutsummaryrefslogtreecommitdiff
path: root/src/server/GarbageCollector.ts
blob: 268239481a1cdb86cae90893eedc9240dd5fc4b8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import { Database } from './database';

import * as path from 'path';
import * as fs from 'fs';
import { Search } from './Search';

function addDoc(doc: any, ids: string[], files: { [name: string]: string[] }) {
    for (const key in doc) {
        if (!doc.hasOwnProperty(key)) {
            continue;
        }
        const field = doc[key];
        if (field === undefined || field === null) {
            continue;
        }
        if (field.__type === "proxy") {
            ids.push(field.fieldId);
        } else if (field.__type === "list") {
            addDoc(field.fields, ids, files);
        } else if (typeof field === "string") {
            const re = /"(?:dataD|d)ocumentId"\s*:\s*"([\w\-]*)"/g;
            let match: string[] | null;
            while ((match = re.exec(field)) !== null) {
                ids.push(match[1]);
            }
        } else if (field.__type === "RichTextField") {
            const re = /"href"\s*:\s*"(.*?)"/g;
            let match: string[] | null;
            while ((match = re.exec(field.Data)) !== null) {
                const urlString = match[1];
                const split = new URL(urlString).pathname.split("doc/");
                if (split.length > 1) {
                    ids.push(split[split.length - 1]);
                }
            }
            const re2 = /"src"\s*:\s*"(.*?)"/g;
            while ((match = re2.exec(field.Data)) !== null) {
                const urlString = match[1];
                const pathname = new URL(urlString).pathname;
                const ext = path.extname(pathname);
                const fileName = path.basename(pathname, ext);
                let exts = files[fileName];
                if (!exts) {
                    files[fileName] = exts = [];
                }
                exts.push(ext);
            }
        } else if (["audio", "image", "video", "pdf", "web"].includes(field.__type)) {
            const url = new URL(field.url);
            const pathname = url.pathname;
            const ext = path.extname(pathname);
            const fileName = path.basename(pathname, ext);
            let exts = files[fileName];
            if (!exts) {
                files[fileName] = exts = [];
            }
            exts.push(ext);
        }
    }
}

async function GarbageCollect() {
    // await new Promise(res => setTimeout(res, 3000));
    const cursor = await Database.Instance.query({}, { userDocumentId: 1 }, 'users');
    const users = await cursor.toArray();
    const ids: string[] = users.map(user => user.userDocumentId);
    const visited = new Set<string>();
    const files: { [name: string]: string[] } = {};

    while (ids.length) {
        const count = Math.min(ids.length, 100);
        const index = ids.length - count;
        const fetchIds = ids.splice(index, count).filter(id => !visited.has(id));
        if (!fetchIds.length) {
            continue;
        }
        const docs = await new Promise<{ [key: string]: any }[]>(res => Database.Instance.getDocuments(fetchIds, res, "newDocuments"));
        for (const doc of docs) {
            const id = doc.id;
            if (doc === undefined) {
                console.log(`Couldn't find field with Id ${id}`);
                continue;
            }
            visited.add(id);
            addDoc(doc.fields, ids, files);
        }
        console.log(`To Go: ${ids.length}, visited: ${visited.size}`);
    }

    console.log(`Done: ${visited.size}`);

    cursor.close();

    const toDeleteCursor = await Database.Instance.query({ _id: { $nin: Array.from(visited) } }, { _id: 1 });
    const toDelete: string[] = (await toDeleteCursor.toArray()).map(doc => doc._id);
    toDeleteCursor.close();
    const result = await Database.Instance.delete({ _id: { $in: toDelete } }, "newDocuments");
    console.log(`${result.deletedCount} documents deleted`);

    await Search.Instance.deleteDocuments(toDelete);
    console.log("Cleared search documents");

    const folder = "./src/server/public/files/";
    fs.readdir(folder, (_, fileList) => {
        const filesToDelete = fileList.filter(file => {
            const ext = path.extname(file);
            let base = path.basename(file, ext);
            const existsInDb = (base in files || (base = base.substring(0, base.length - 2)) in files) && files[base].includes(ext);
            return file !== ".gitignore" && !existsInDb;
        });
        console.log(`Deleting ${filesToDelete.length} files`);
        filesToDelete.forEach(file => {
            console.log(`Deleting file ${file}`);
            try {
                fs.unlinkSync(folder + file);
            } catch {
                console.warn(`Couldn't delete file ${file}`);
            }
        });
        console.log(`Deleted ${filesToDelete.length} files`);
    });
}

GarbageCollect();