import { green, red } from 'colors'; import { ExifImage } from 'exif'; import * as exifr from 'exifr'; import { File } from 'formidable'; import { createReadStream, createWriteStream, existsSync, readFileSync, rename, unlinkSync, writeFile } from 'fs'; import * as path from 'path'; import { basename } from 'path'; import * as sharp from 'sharp'; import { Readable, Stream } from 'stream'; import { filesDirectory, publicDirectory } from '.'; import { Opt } from '../fields/Doc'; import { ParsedPDF } from '../server/PdfTypes'; import { Utils } from '../Utils'; import { createIfNotExists } from './ActionUtilities'; import { clientPathToFile, Directory, pathToDirectory, serverPathToFile } from './ApiManagers/UploadManager'; import { resolvedServerUrl } from './server_Initialization'; import { AcceptableMedia, Upload } from './SharedMediaTypes'; import request = require('request-promise'); import formidable = require('formidable'); import { AzureManager } from './ApiManagers/AzureManager'; import axios from 'axios'; const spawn = require('child_process').spawn; const { exec } = require('child_process'); const parse = require('pdf-parse'); const ffmpeg = require('fluent-ffmpeg'); const fs = require('fs'); const requestImageSize = require('../client/util/request-image-size'); const md5File = require('md5-file'); export enum SizeSuffix { Small = '_s', Medium = '_m', Large = '_l', Original = '_o', None = '', } export function InjectSize(filename: string, size: SizeSuffix) { const extension = path.extname(filename).toLowerCase(); return filename.substring(0, filename.length - extension.length) + size + extension; } function isLocal() { return /Dash-Web[0-9]*[\\\/]src[\\\/]server[\\\/]public[\\\/](.*)/; } function usingAzure() { return process.env.USE_AZURE === 'true'; } export namespace DashUploadUtils { export interface Size { width: number; suffix: SizeSuffix; } export const Sizes: { [size: string]: Size } = { SMALL: { width: 100, suffix: SizeSuffix.Small }, MEDIUM: { width: 400, suffix: SizeSuffix.Medium }, LARGE: { width: 900, suffix: SizeSuffix.Large }, }; export function validateExtension(url: string) { return AcceptableMedia.imageFormats.includes(path.extname(url).toLowerCase()); } const size = 'content-length'; const type = 'content-type'; const BLOBSTORE_URL = process.env.BLOBSTORE_URL; const RESIZE_FUNCTION_URL = process.env.RESIZE_FUNCTION_URL; const { imageFormats, videoFormats, applicationFormats, audioFormats } = AcceptableMedia; //TODO:glr export async function concatVideos(filePaths: string[]): Promise { // make a list of paths to create the ordered text file for ffmpeg const inputListName = 'concat.txt'; const textFilePath = path.join(filesDirectory, inputListName); // make a list of paths to create the ordered text file for ffmpeg const filePathsText = filePaths.map(filePath => `file '${filePath}'`).join('\n'); // write the text file to the file system await new Promise((res, reject) => writeFile(textFilePath, filePathsText, err => { if (err) { reject(); console.log(err); } else res(); }) ); // make output file name based on timestamp const outputFileName = `output-${Utils.GenerateGuid()}.mp4`; // create the output file path in the videos directory const outputFilePath = path.join(pathToDirectory(Directory.videos), outputFileName); // concatenate the videos await new Promise((resolve, reject) => { var merge = ffmpeg(); merge .input(textFilePath) .inputOptions(['-f concat', '-safe 0']) // .outputOptions('-c copy') //.videoCodec("copy") .save(outputFilePath) .on('error', (err: any) => { console.log(err); reject(); }) .on('end', resolve); }); // delete concat.txt from the file system unlinkSync(textFilePath); // delete the old segment videos from the server filePaths.forEach(filePath => unlinkSync(filePath)); // return the path(s) to the output file return { accessPaths: getAccessPaths(Directory.videos, outputFileName), }; } function resolveExistingFile(name: string, pat: string, directory: Directory, type?: string, duration?: number, rawText?: string) { const data = { size: 0, path: path.basename(pat), name, type: type ?? '' }; const file = { ...data, toJSON: () => ({ ...data, filename: data.path.replace(/.*\//, ''), mtime: duration?.toString(), mime: '', toJson: () => undefined as any }) }; return { source: file, result: { accessPaths: { agnostic: getAccessPaths(directory, data.path), }, rawText, duration, }, }; } export function QueryYoutubeProgress(videoId: string, user?: Express.User) { // console.log(`PROGRESS:${videoId}`, (user as any)?.email); return uploadProgress.get(videoId) ?? 'pending data upload'; } export let uploadProgress = new Map(); export function uploadYoutube(videoId: string, overwriteId: string): Promise { return new Promise>((res, rej) => { const name = videoId; const path = name.replace(/^-/, '__') + '.mp4'; const finalPath = serverPathToFile(Directory.videos, path); if (existsSync(finalPath)) { uploadProgress.set(overwriteId, 'computing duration'); exec(`yt-dlp -o ${finalPath} "https://www.youtube.com/watch?v=${videoId}" --get-duration`, (error: any, stdout: any, stderr: any) => { const time = Array.from(stdout.trim().split(':')).reverse(); const duration = (time.length > 2 ? Number(time[2]) * 1000 * 60 : 0) + (time.length > 1 ? Number(time[1]) * 60 : 0) + (time.length > 0 ? Number(time[0]) : 0); res(resolveExistingFile(name, finalPath, Directory.videos, 'video/mp4', duration, undefined)); }); } else { uploadProgress.set(overwriteId, 'starting download'); const ytdlp = spawn(`yt-dlp`, ['-o', path, `https://www.youtube.com/watch?v=${videoId}`, '--max-filesize', '100M', '-f', 'mp4']); ytdlp.stdout.on('data', (data: any) => uploadProgress.set(overwriteId, data.toString())); let errors = ''; ytdlp.stderr.on('data', (data: any) => { uploadProgress.set(overwriteId, 'error:' + data.toString()); errors = data.toString(); }); ytdlp.on('exit', function (code: any) { if (code) { res({ source: { size: 0, path, name, type: '', toJSON: () => ({ name, path }), }, result: { name: 'failed youtube query', message: `Could not archive video. ${code ? errors : uploadProgress.get(videoId)}` }, }); } else { uploadProgress.set(overwriteId, 'computing duration'); exec(`yt-dlp-o ${path} "https://www.youtube.com/watch?v=${videoId}" --get-duration`, (error: any, stdout: any, stderr: any) => { const time = Array.from(stdout.trim().split(':')).reverse(); const duration = (time.length > 2 ? Number(time[2]) * 1000 * 60 : 0) + (time.length > 1 ? Number(time[1]) * 60 : 0) + (time.length > 0 ? Number(time[0]) : 0); const data = { size: 0, path, name, type: 'video/mp4' }; const file = { ...data, toJSON: () => ({ ...data, filename: data.path.replace(/.*\//, ''), mtime: duration.toString(), mime: '', toJson: () => undefined as any }) }; res(MoveParsedFile(file, Directory.videos)); }); } }); } }); } export async function upload(file: File, overwriteGuid?: string): Promise { const isAzureOn = usingAzure(); const { type, path, name } = file; const types = type?.split('/') ?? []; uploadProgress.set(overwriteGuid ?? name, 'uploading'); // If the client sent a guid it uses to track upload progress, use that guid. Otherwise, use the file's name. const category = types[0]; let format = `.${types[1]}`; console.log(green(`Processing upload of file (${name}) and format (${format}) with upload type (${type}) in category (${category}).`)); switch (category) { case 'image': if (imageFormats.includes(format)) { const result = await UploadImage(path, basename(path)); return { source: file, result }; } fs.unlink(path, () => {}); return { source: file, result: { name: 'Unsupported image format', message: `Could not upload unsupported file (${name}). Please convert to an .jpg` } }; case 'video': if (format.includes('x-matroska')) { console.log('case video'); await new Promise(res => ffmpeg(file.path) .videoCodec('copy') // this will copy the data instead of reencode it .save(file.path.replace('.mkv', '.mp4')) .on('end', res) .on('error', (e: any) => console.log(e)) ); file.path = file.path.replace('.mkv', '.mp4'); format = '.mp4'; } if (format.includes('quicktime')) { let abort = false; await new Promise(res => ffmpeg.ffprobe(file.path, (err: any, metadata: any) => { if (metadata.streams.some((stream: any) => stream.codec_name === 'hevc')) { abort = true; } res(); }) ); if (abort) { // bcz: instead of aborting, we could convert the file using the code below to an mp4. Problem is that this takes a long time and will clog up the server. // await new Promise(res => // ffmpeg(file.path) // .videoCodec('libx264') // this will copy the data instead of reencode it // .audioCodec('mp2') // .save(file.path.replace('.MOV', '.mp4').replace('.mov', '.mp4')) // .on('end', res) // ); // file.path = file.path.replace('.mov', '.mp4').replace('.MOV', '.mp4'); // format = '.mp4'; fs.unlink(path, () => {}); return { source: file, result: { name: 'Unsupported video format', message: `Could not upload unsupported file (${name}). Please convert to an .mp4` } }; } } if (videoFormats.includes(format) || format.includes('.webm')) { return MoveParsedFile(file, Directory.videos); } fs.unlink(path, () => {}); return { source: file, result: { name: 'Unsupported video format', message: `Could not upload unsupported file (${name}). Please convert to an .mp4` } }; case 'application': if (applicationFormats.includes(format)) { const val = UploadPdf(file); if (val) return val; } case 'audio': const components = format.split(';'); if (components.length > 1) { format = components[0]; } if (audioFormats.includes(format)) { return UploadAudio(file, format); } fs.unlink(path, () => {}); return { source: file, result: { name: 'Unsupported audio format', message: `Could not upload unsupported file (${name}). Please convert to an .mp3` } }; case 'text': if (types[1] == 'csv') { return UploadCsv(file); } } console.log(red(`Ignoring unsupported file (${name}) with upload type (${type}).`)); fs.unlink(path, () => {}); return { source: file, result: new Error(`Could not upload unsupported file (${name}) with upload type (${type}).`) }; } async function UploadPdf(file: File) { const fileKey = (await md5File(file.path)) + '.pdf'; const textFilename = `${fileKey.substring(0, fileKey.length - 4)}.txt`; if (fExists(fileKey, Directory.pdfs) && fExists(textFilename, Directory.text)) { fs.unlink(file.path, () => {}); return new Promise(res => { const textFilename = `${fileKey.substring(0, fileKey.length - 4)}.txt`; const readStream = createReadStream(serverPathToFile(Directory.text, textFilename)); var rawText = ''; readStream.on('data', chunk => (rawText += chunk.toString())).on('end', () => res(resolveExistingFile(file.name, fileKey, Directory.pdfs, file.type, undefined, rawText))); }); } const dataBuffer = readFileSync(file.path); const result: ParsedPDF | any = await parse(dataBuffer).catch((e: any) => e); if (!result.code) { await new Promise((resolve, reject) => { const writeStream = createWriteStream(serverPathToFile(Directory.text, textFilename)); writeStream.write(result?.text, error => (error ? reject(error) : resolve())); }); return MoveParsedFile(file, Directory.pdfs, undefined, result?.text, undefined, fileKey); } return { source: file, result: { name: 'faile pdf pupload', message: `Could not upload (${file.name}).${result.message}` } }; } async function UploadCsv(file: File) { const { path: sourcePath } = file; // read the file as a string const data = readFileSync(sourcePath, 'utf8'); // split the string into an array of lines return MoveParsedFile(file, Directory.csv, undefined, data); // console.log(csvParser(data)); } const manualSuffixes = ['.webm']; async function UploadAudio(file: File, format: string) { const suffix = manualSuffixes.includes(format) ? format : undefined; return MoveParsedFile(file, Directory.audio, suffix); } /** * Uploads an image specified by the @param source to Dash's /public/files/ * directory, and returns information generated during that upload * * @param {string} source is either the absolute path of an already uploaded image or * the url of a remote image * @param {string} filename dictates what to call the image. If not specified, * the name {@param prefix}_upload_{GUID} * @param {string} prefix is a string prepended to the generated image name in the * event that @param filename is not specified * * @returns {ImageUploadInformation | Error} This method returns * 1) the paths to the uploaded images (plural due to resizing) * 2) the exif data embedded in the image, or the error explaining why exif couldn't be parsed * 3) the size of the image, in bytes (4432130) * 4) the content type of the image, i.e. image/(jpeg | png | ...) */ export const UploadImage = async (source: string, filename?: string, prefix: string = ''): Promise => { const metadata = await InspectImage(source); if (metadata instanceof Error) { return { name: metadata.name, message: metadata.message }; } return UploadInspectedImage(metadata, filename || metadata.filename, prefix); }; export async function buildFileDirectories() { if (!existsSync(publicDirectory)) { console.error('\nPlease ensure that the following directory exists...\n'); console.log(publicDirectory); process.exit(0); } if (!existsSync(filesDirectory)) { console.error('\nPlease ensure that the following directory exists...\n'); console.log(filesDirectory); process.exit(0); } const pending = Object.keys(Directory).map(sub => createIfNotExists(`${filesDirectory}/${sub}`)); return Promise.all(pending); } export interface RequestedImageSize { width: number; height: number; type: string; } export interface ImageResizer { resizer?: sharp.Sharp; suffix: SizeSuffix; } /** * Based on the url's classification as local or remote, gleans * as much information as possible about the specified image * * @param source is the path or url to the image in question */ export const InspectImage = async (source: string): Promise => { let rawMatches: RegExpExecArray | null; let filename: string | undefined; /** * Just more edge case handling: this if clause handles the case where an image onto the canvas that * is represented by a base64 encoded data uri, rather than a proper file. We manually write it out * to the server and then carry on as if it had been put there by the Formidable form / file parser. */ if ((rawMatches = /^data:image\/([a-z]+);base64,(.*)/.exec(source)) !== null) { const [ext, data] = rawMatches.slice(1, 3); const resolved = (filename = `upload_${Utils.GenerateGuid()}.${ext}`); if (usingAzure()) { const response = await AzureManager.UploadBase64ImageBlob(resolved, data); source = `${AzureManager.BASE_STRING}/${resolved}`; } else { const error = await new Promise(resolve => { writeFile(serverPathToFile(Directory.images, resolved), data, 'base64', resolve); }); if (error !== null) { return error; } source = `${resolvedServerUrl}${clientPathToFile(Directory.images, resolved)}`; } } let resolvedUrl: string; /** * * At this point, we want to take whatever url we have and make sure it's requestable. * Anything that's hosted by some other website already is, but if the url is a local file url * (locates the file on this server machine), we have to resolve the client side url by cutting out the * basename subtree (i.e. /images/.) and put it on the end of the server's url. * * This can always be localhost, regardless of whether this is on the server or not, since we (the server, not the client) * will be the ones making the request, and from the perspective of dash-release or dash-web, localhost: refers to the same thing * as the full dash-release.eastus.cloudapp.azure.com:. */ const matches = isLocal().exec(source); if (matches === null) { resolvedUrl = source; } else { resolvedUrl = `${resolvedServerUrl}/${matches[1].split('\\').join('/')}`; } // See header comments: not all image files have exif data (I believe only JPG is the only format that can have it) const exifData = await parseExifData(resolvedUrl); const results = { exifData, requestable: resolvedUrl, }; // Use the request library to parse out file level image information in the headers const { headers } = await new Promise((resolve, reject) => { return request.head(resolvedUrl, (error, res) => (error ? reject(error) : resolve(res))); }).catch(e => { console.log('Error processing headers: ', e); }); try { // Compute the native width and height ofthe image with an npm module const { width: nativeWidth, height: nativeHeight } = await requestImageSize(resolvedUrl); // Bundle up the information into an object return { source, contentSize: parseInt(headers[size]), contentType: headers[type], nativeWidth, nativeHeight, filename, ...results, }; } catch (e: any) { console.log(e); return e; } }; /** * Basically just a wrapper around rename, which 'deletes' * the file at the old path and 'moves' it to the new one. For simplicity, the * caller just has to pass in the name of the target directory, and this function * will resolve the actual target path from that. * @param file The file to move * @param destination One of the specific media asset directories into which to move it * @param suffix If the file doesn't have a suffix and you want to provide it one * to appear in the new location */ export async function MoveParsedFile(file: formidable.File, destination: Directory, suffix: string | undefined = undefined, text?: string, duration?: number, targetName?: string): Promise { const { path: sourcePath } = file; let name = targetName ?? path.basename(sourcePath); suffix && (name += suffix); return new Promise(resolve => { const destinationPath = serverPathToFile(destination, name); rename(sourcePath, destinationPath, error => { resolve({ source: file, result: error ? error : { accessPaths: { agnostic: getAccessPaths(destination, name), }, rawText: text, duration, }, }); }); }); } export function fExists(name: string, destination: Directory) { const destinationPath = serverPathToFile(destination, name); return existsSync(destinationPath); } export function getAccessPaths(directory: Directory, fileName: string) { return { client: clientPathToFile(directory, fileName), server: serverPathToFile(directory, fileName), }; } /** * UploadInspectedImage() takes an image with its metadata. If Azure is being used, this method will call the Azure function * to execute the resizing. If Azure is not used, the function will begin to resize the image. * * @param metadata metadata object from InspectImage() * @param filename the name of the file * @param prefix the prefix to use, which will be set to '' if none is provided. * @param cleanUp a boolean indicating if the files should be deleted after upload. True by default. * @returns the accessPaths for the resized files. */ export const UploadInspectedImage = async (metadata: Upload.InspectionResults, filename?: string, prefix = '', cleanUp = true): Promise => { const { requestable, source, ...remaining } = metadata; const resolved = filename || `${prefix}upload_${Utils.GenerateGuid()}.${remaining.contentType.split('/')[1].toLowerCase()}`; const { images } = Directory; const information: Upload.ImageInformation = { accessPaths: { agnostic: usingAzure() ? { client: BLOBSTORE_URL + `/${resolved}`, server: BLOBSTORE_URL + `/${resolved}`, } : getAccessPaths(images, resolved), }, ...metadata, }; let writtenFiles: { [suffix: string]: string }; if (usingAzure()) { if (!RESIZE_FUNCTION_URL) { throw new Error('Resize function URL not provided.'); } try { const response = await axios.post(RESIZE_FUNCTION_URL, { url: requestable, filename: resolved, }); writtenFiles = response.data.writtenFiles; } catch (err) { console.error(err); writtenFiles = {}; } } else { writtenFiles = await outputResizedImages(() => request(requestable), resolved, pathToDirectory(Directory.images)); } for (const suffix of Object.keys(writtenFiles)) { information.accessPaths[suffix] = getAccessPaths(images, writtenFiles[suffix]); } if (isLocal().test(source) && cleanUp) { unlinkSync(source); } return information; }; const bufferConverterRec = (layer: any) => { for (const key of Object.keys(layer)) { const val: any = layer[key]; if (val instanceof Buffer) { layer[key] = val.toString(); } else if (Array.isArray(val) && typeof val[0] === 'number') { layer[key] = Buffer.from(val).toString(); } else if (typeof val === 'object') { bufferConverterRec(val); } } }; const parseExifData = async (source: string) => { const image = await request.get(source, { encoding: null }); const { data, error } = await new Promise<{ data: any; error: any }>(resolve => { new ExifImage({ image }, (error, data) => { let reason: Opt = undefined; if (error) { reason = (error as any).code; } resolve({ data, error: reason }); }); }); //data && bufferConverterRec(data); return error ? { data: undefined, error } : { data: await exifr.parse(image), error }; }; const { pngs, jpgs, webps, tiffs } = AcceptableMedia; const pngOptions = { compressionLevel: 9, adaptiveFiltering: true, force: true, }; /** * outputResizedImages takes in a readable stream and resizes the images according to the sizes defined at the top of this file. * * The new images will be saved to the server with the corresponding prefixes. * @param streamProvider a Stream of the image to process, taken from the /parsed_files location * @param outputFileName the basename (No suffix) of the outputted file. * @param outputDirectory the directory to output to, usually Directory.Images * @returns a map with suffixes as keys and resized filenames as values. */ export async function outputResizedImages(streamProvider: () => Stream | Promise, outputFileName: string, outputDirectory: string) { const writtenFiles: { [suffix: string]: string } = {}; for (const { resizer, suffix } of resizers(path.extname(outputFileName))) { const outputPath = path.resolve(outputDirectory, (writtenFiles[suffix] = InjectSize(outputFileName, suffix))); await new Promise(async (resolve, reject) => { const source = streamProvider(); let readStream = source instanceof Promise ? await source : source; let error = false; if (resizer) { readStream = readStream.pipe(resizer.withMetadata()).on('error', async args => { error = true; if (error) { const source2 = streamProvider(); let readStream2: Stream | undefined; readStream2 = source2 instanceof Promise ? await source2 : source2; readStream2?.pipe(createWriteStream(outputPath)).on('error', resolve).on('close', resolve); } }); } !error && readStream?.pipe(createWriteStream(outputPath)).on('error', resolve).on('close', resolve); }); } return writtenFiles; } /** * define the resizers to use * @param ext the extension * @returns an array of resizer functions from sharp */ function resizers(ext: string): DashUploadUtils.ImageResizer[] { return [ { suffix: SizeSuffix.Original }, ...Object.values(DashUploadUtils.Sizes).map(({ suffix, width }) => { let initial: sharp.Sharp | undefined = sharp({ failOnError: false }).resize(width, undefined, { withoutEnlargement: true }); if (pngs.includes(ext)) { initial = initial.png(pngOptions); } else if (jpgs.includes(ext)) { initial = initial.jpeg(); } else if (webps.includes(ext)) { initial = initial.webp(); } else if (tiffs.includes(ext)) { initial = initial.tiff(); } else if (ext === '.gif') { initial = undefined; } return { resizer: suffix === '_o' ? undefined : initial, suffix, }; }), ]; } }