aboutsummaryrefslogtreecommitdiff
path: root/src/server
diff options
context:
space:
mode:
Diffstat (limited to 'src/server')
-rw-r--r--src/server/ApiManagers/AssistantManager.ts401
-rw-r--r--src/server/ApiManagers/DataVizManager.ts2
-rw-r--r--src/server/ApiManagers/FireflyManager.ts410
-rw-r--r--src/server/ApiManagers/UploadManager.ts35
-rw-r--r--src/server/DashUploadUtils.ts59
-rw-r--r--src/server/RouteManager.ts3
-rw-r--r--src/server/apis/google/GoogleApiServerUtils.ts93
-rw-r--r--src/server/authentication/DashUserModel.ts5
-rw-r--r--src/server/chunker/pdf_chunker.py122
-rw-r--r--src/server/flashcard/venv/pyvenv.cfg3
-rw-r--r--src/server/index.ts3
-rw-r--r--src/server/server_Initialization.ts10
12 files changed, 929 insertions, 217 deletions
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index 8447a4934..af25722a4 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -15,14 +15,17 @@ import * as fs from 'fs';
import { writeFile } from 'fs';
import { google } from 'googleapis';
import { JSDOM } from 'jsdom';
+import OpenAI from 'openai';
import * as path from 'path';
import * as puppeteer from 'puppeteer';
import { promisify } from 'util';
import * as uuid from 'uuid';
import { AI_Document } from '../../client/views/nodes/chatbot/types/types';
+import { DashUploadUtils } from '../DashUploadUtils';
import { Method } from '../RouteManager';
import { filesDirectory, publicDirectory } from '../SocketData';
import ApiManager, { Registration } from './ApiManager';
+import { env } from 'process';
// Enumeration of directories where different file types are stored
export enum Directory {
@@ -87,6 +90,7 @@ export default class AssistantManager extends ApiManager {
protected initialize(register: Registration): void {
// Initialize Google Custom Search API
const customsearch = google.customsearch('v1');
+ const openai = new OpenAI({ apiKey: env.OPENAI_API_KEY });
// Register Wikipedia summary API route
register({
@@ -115,29 +119,86 @@ export default class AssistantManager extends ApiManager {
},
});
- // Register Google Web Search Results API route
+ // Register an API route to retrieve web search results using Google Custom Search
+ // This route filters results by checking their x-frame-options headers for security purposes
register({
method: Method.POST,
subscription: '/getWebSearchResults',
secureHandler: async ({ req, res }) => {
const { query, max_results } = req.body;
- try {
- // Fetch search results using Google Custom Search API
- const response = await customsearch.cse.list({
+ const MIN_VALID_RESULTS_RATIO = 0.75; // 3/4 threshold
+ let startIndex = 1; // Start at the first result initially
+ const fetchSearchResults = async (start: number) => {
+ return customsearch.cse.list({
q: query,
cx: process.env._CLIENT_GOOGLE_SEARCH_ENGINE_ID,
key: process.env._CLIENT_GOOGLE_API_KEY,
safe: 'active',
num: max_results,
+ start, // This controls which result index the search starts from
});
+ };
+
+ const filterResultsByXFrameOptions = async (
+ results: {
+ url: string | null | undefined;
+ snippet: string | null | undefined;
+ }[]
+ ) => {
+ const filteredResults = await Promise.all(
+ results
+ .filter(result => result.url)
+ .map(async result => {
+ try {
+ const urlResponse = await axios.head(result.url!, { timeout: 5000 });
+ const xFrameOptions = urlResponse.headers['x-frame-options'];
+ if (xFrameOptions && xFrameOptions.toUpperCase() === 'SAMEORIGIN') {
+ return result;
+ }
+ } catch (error) {
+ console.error(`Error checking x-frame-options for URL: ${result.url}`, error);
+ }
+ return null; // Exclude the result if it doesn't match
+ })
+ );
+ return filteredResults.filter(result => result !== null); // Remove null results
+ };
- const results =
+ try {
+ // Fetch initial search results
+ let response = await fetchSearchResults(startIndex);
+ const initialResults =
response.data.items?.map(item => ({
url: item.link,
snippet: item.snippet,
})) || [];
- res.send({ results });
+ // Filter the initial results
+ let validResults = await filterResultsByXFrameOptions(initialResults);
+
+ // If valid results are less than 3/4 of max_results, fetch more results
+ while (validResults.length < max_results * MIN_VALID_RESULTS_RATIO) {
+ // Increment the start index by the max_results to fetch the next set of results
+ startIndex += max_results;
+ response = await fetchSearchResults(startIndex);
+
+ const additionalResults =
+ response.data.items?.map(item => ({
+ url: item.link,
+ snippet: item.snippet,
+ })) || [];
+
+ const additionalValidResults = await filterResultsByXFrameOptions(additionalResults);
+ validResults = [...validResults, ...additionalValidResults]; // Combine valid results
+
+ // Break if no more results are available
+ if (additionalValidResults.length === 0 || response.data.items?.length === 0) {
+ break;
+ }
+ }
+
+ // Return the filtered valid results
+ res.send({ results: validResults.slice(0, max_results) }); // Limit the results to max_results
} catch (error) {
console.error('Error performing web search:', error);
res.status(500).send({
@@ -147,6 +208,187 @@ export default class AssistantManager extends ApiManager {
},
});
+ /**
+ * Converts a video file to audio format using ffmpeg.
+ * @param videoPath The path to the input video file.
+ * @param outputAudioPath The path to the output audio file.
+ * @returns A promise that resolves when the conversion is complete.
+ */
+ function convertVideoToAudio(videoPath: string, outputAudioPath: string): Promise<void> {
+ return new Promise((resolve, reject) => {
+ const ffmpegProcess = spawn('ffmpeg', [
+ '-i',
+ videoPath, // Input file
+ '-vn', // No video
+ '-acodec',
+ 'pcm_s16le', // Audio codec
+ '-ac',
+ '1', // Number of audio channels
+ '-ar',
+ '16000', // Audio sampling frequency
+ '-f',
+ 'wav', // Output format
+ outputAudioPath, // Output file
+ ]);
+
+ ffmpegProcess.on('error', error => {
+ console.error('Error running ffmpeg:', error);
+ reject(error);
+ });
+
+ ffmpegProcess.on('close', code => {
+ if (code === 0) {
+ console.log('Audio extraction complete:', outputAudioPath);
+ resolve();
+ } else {
+ reject(new Error(`ffmpeg exited with code ${code}`));
+ }
+ });
+ });
+ }
+
+ // Register an API route to process a media file (audio or video)
+ // Extracts audio from video files, transcribes the audio using OpenAI Whisper, and provides a summary
+ register({
+ method: Method.POST,
+ subscription: '/processMediaFile',
+ secureHandler: async ({ req, res }) => {
+ const { fileName } = req.body;
+
+ // Ensure the filename is provided
+ if (!fileName) {
+ res.status(400).send({ error: 'Filename is required' });
+ return;
+ }
+
+ try {
+ // Determine the file type and location
+ const isAudio = fileName.toLowerCase().endsWith('.mp3');
+ const directory = isAudio ? Directory.audio : Directory.videos;
+ const filePath = serverPathToFile(directory, fileName);
+
+ // Check if the file exists
+ if (!fs.existsSync(filePath)) {
+ res.status(404).send({ error: 'File not found' });
+ return;
+ }
+
+ console.log(`Processing ${isAudio ? 'audio' : 'video'} file: ${fileName}`);
+
+ // Step 1: Extract audio if it's a video
+ let audioPath = filePath;
+ if (!isAudio) {
+ const audioFileName = `${path.basename(fileName, path.extname(fileName))}.wav`;
+ audioPath = path.join(pathToDirectory(Directory.audio), audioFileName);
+
+ console.log('Extracting audio from video...');
+ await convertVideoToAudio(filePath, audioPath);
+ }
+
+ // Step 2: Transcribe audio using OpenAI Whisper
+ console.log('Transcribing audio...');
+ const transcription = await openai.audio.transcriptions.create({
+ file: fs.createReadStream(audioPath),
+ model: 'whisper-1',
+ response_format: 'verbose_json',
+ timestamp_granularities: ['segment'],
+ });
+
+ console.log('Audio transcription complete.');
+
+ // Step 3: Extract concise JSON
+ console.log('Extracting concise JSON...');
+ const originalSegments = transcription.segments?.map((segment, index) => ({
+ index: index.toString(),
+ text: segment.text,
+ start: segment.start,
+ end: segment.end,
+ }));
+
+ interface ConciseSegment {
+ text: string;
+ indexes: string[];
+ start: number | null;
+ end: number | null;
+ }
+
+ const combinedSegments = [];
+ let currentGroup: ConciseSegment = { text: '', indexes: [], start: null, end: null };
+ let currentDuration = 0;
+
+ originalSegments?.forEach(segment => {
+ const segmentDuration = segment.end - segment.start;
+
+ if (currentDuration + segmentDuration <= 4000) {
+ // Add segment to the current group
+ currentGroup.text += (currentGroup.text ? ' ' : '') + segment.text;
+ currentGroup.indexes.push(segment.index);
+ if (currentGroup.start === null) {
+ currentGroup.start = segment.start;
+ }
+ currentGroup.end = segment.end;
+ currentDuration += segmentDuration;
+ } else {
+ // Push the current group and start a new one
+ combinedSegments.push({ ...currentGroup });
+ currentGroup = {
+ text: segment.text,
+ indexes: [segment.index],
+ start: segment.start,
+ end: segment.end,
+ };
+ currentDuration = segmentDuration;
+ }
+ });
+
+ // Push the final group if it has content
+ if (currentGroup.text) {
+ combinedSegments.push({ ...currentGroup });
+ }
+ const lastSegment = combinedSegments[combinedSegments.length - 1];
+
+ // Check if the last segment is too short and combine it with the second last
+ if (combinedSegments.length > 1 && lastSegment.end && lastSegment.start) {
+ const secondLastSegment = combinedSegments[combinedSegments.length - 2];
+ const lastDuration = lastSegment.end - lastSegment.start;
+
+ if (lastDuration < 30) {
+ // Combine the last segment with the second last
+ secondLastSegment.text += (secondLastSegment.text ? ' ' : '') + lastSegment.text;
+ secondLastSegment.indexes = secondLastSegment.indexes.concat(lastSegment.indexes);
+ secondLastSegment.end = lastSegment.end;
+
+ // Remove the last segment from the array
+ combinedSegments.pop();
+ }
+ }
+
+ console.log('Segments combined successfully.');
+
+ console.log('Generating summary using GPT-4...');
+ const combinedText = combinedSegments.map(segment => segment.text).join(' ');
+
+ let summary = '';
+ try {
+ const completion = await openai.chat.completions.create({
+ messages: [{ role: 'system', content: `Summarize the following text in a concise paragraph:\n\n${combinedText}` }],
+ model: 'gpt-4o',
+ });
+ console.log('Summary generation complete.');
+ summary = completion.choices[0].message.content ?? 'Summary could not be generated.';
+ } catch (summaryError) {
+ console.error('Error generating summary:', summaryError);
+ summary = 'Summary could not be generated.';
+ }
+ // Step 5: Return the JSON result
+ res.send({ full: originalSegments, condensed: combinedSegments, summary });
+ } catch (error) {
+ console.error('Error processing media file:', error);
+ res.status(500).send({ error: 'Failed to process media file' });
+ }
+ },
+ });
+
// Axios instance with custom headers for scraping
const axiosInstance = axios.create({
headers: {
@@ -181,7 +423,38 @@ export default class AssistantManager extends ApiManager {
}
};
- // Register a proxy fetch API route
+ // Register an API route to generate an image using OpenAI's DALL-E model
+ // Uploads the generated image to the server and provides a URL for access
+ register({
+ method: Method.POST,
+ subscription: '/generateImage',
+ secureHandler: async ({ req, res }) => {
+ const { image_prompt } = req.body;
+
+ if (!image_prompt) {
+ res.status(400).send({ error: 'No prompt provided' });
+ return;
+ }
+
+ try {
+ const image = await openai.images.generate({ model: 'dall-e-3', prompt: image_prompt, response_format: 'url' });
+ console.log(image);
+ const result = await DashUploadUtils.UploadImage(image.data[0].url!);
+
+ const url = image.data[0].url;
+
+ res.send({ result, url });
+ } catch (error) {
+ console.error('Error fetching the URL:', error);
+ res.status(500).send({
+ error: 'Failed to fetch the URL',
+ });
+ }
+ },
+ });
+
+ // Register an API route to fetch data from a URL using a proxy with retry logic
+ // Useful for bypassing rate limits or scraping inaccessible data
register({
method: Method.POST,
subscription: '/proxyFetch',
@@ -206,6 +479,7 @@ export default class AssistantManager extends ApiManager {
});
// Register an API route to scrape website content using Puppeteer and JSDOM
+ // Extracts and returns readable content from a given URL
register({
method: Method.POST,
subscription: '/scrapeWebsite',
@@ -246,6 +520,8 @@ export default class AssistantManager extends ApiManager {
},
});
+ // Register an API route to create a document and start a background job for processing
+ // Uses Python scripts to process files and generate document chunks for further use
register({
method: Method.POST,
subscription: '/createDocument',
@@ -263,7 +539,7 @@ export default class AssistantManager extends ApiManager {
// Spawn the Python process and track its progress/output
// eslint-disable-next-line no-use-before-define
- spawnPythonProcess(jobId, file_name, file_data);
+ spawnPythonProcess(jobId, public_path);
// Send the job ID back to the client for tracking
res.send({ jobId });
@@ -277,6 +553,7 @@ export default class AssistantManager extends ApiManager {
});
// Register an API route to check the progress of a document creation job
+ // Returns the current step and progress percentage
register({
method: Method.GET,
subscription: '/getProgress/:jobId',
@@ -294,59 +571,30 @@ export default class AssistantManager extends ApiManager {
},
});
- // Register an API route to get the final result of a document creation job
+ // Register an API route to retrieve the final result of a document creation job
+ // Returns the processed data or an error status if the job is incomplete
register({
method: Method.GET,
subscription: '/getResult/:jobId',
secureHandler: async ({ req, res }) => {
- const { jobId } = req.params; // Get the job ID from the URL parameters
- // Check if the job result is available
+ const { jobId } = req.params;
if (jobResults[jobId]) {
const result = jobResults[jobId] as AI_Document & { status: string };
- // If the result contains image or table chunks, save the base64 data as image files
if (result.chunks && Array.isArray(result.chunks)) {
- await Promise.all(
- result.chunks.map(chunk => {
- if (chunk.metadata && (chunk.metadata.type === 'image' || chunk.metadata.type === 'table')) {
- const files_directory = '/files/chunk_images/';
- const directory = path.join(publicDirectory, files_directory);
-
- // Ensure the directory exists or create it
- if (!fs.existsSync(directory)) {
- fs.mkdirSync(directory);
- }
-
- const fileName = path.basename(chunk.metadata.file_path); // Get the file name from the path
- const filePath = path.join(directory, fileName); // Create the full file path
-
- // Check if the chunk contains base64 encoded data
- if (chunk.metadata.base64_data) {
- // Decode the base64 data and write it to a file
- const buffer = Buffer.from(chunk.metadata.base64_data, 'base64');
- fs.promises.writeFile(filePath, buffer).then(() => {
- // Update the file path in the chunk's metadata
- chunk.metadata.file_path = path.join(files_directory, fileName);
- chunk.metadata.base64_data = undefined; // Remove the base64 data from the metadata
- });
- } else {
- console.warn(`No base64_data found for chunk: ${fileName}`);
- }
- }
- })
- );
result.status = 'completed';
} else {
result.status = 'pending';
}
- res.json(result); // Send the result back to the client
+ res.json(result);
} else {
res.status(202).send({ status: 'pending' });
}
},
});
- // Register an API route to format chunks (e.g., text or image chunks) for display
+ // Register an API route to format chunks of text or images for structured display
+ // Converts raw chunk data into a structured format for frontend consumption
register({
method: Method.POST,
subscription: '/formatChunks',
@@ -367,7 +615,8 @@ export default class AssistantManager extends ApiManager {
// If the chunk is an image or table, read the corresponding file and encode it as base64
if (chunk.metadata.type === 'image' || chunk.metadata.type === 'table') {
try {
- const filePath = serverPathToFile(Directory.chunk_images, chunk.metadata.file_path); // Get the file path
+ const filePath = path.join(pathToDirectory(Directory.chunk_images), chunk.metadata.file_path); // Get the file path
+ console.log(filePath);
readFileAsync(filePath).then(imageBuffer => {
const base64Image = imageBuffer.toString('base64'); // Convert the image to base64
@@ -401,6 +650,7 @@ export default class AssistantManager extends ApiManager {
});
// Register an API route to create and save a CSV file on the server
+ // Writes the CSV content to a unique file and provides a URL for download
register({
method: Method.POST,
subscription: '/createCSV',
@@ -440,15 +690,23 @@ export default class AssistantManager extends ApiManager {
}
}
-function spawnPythonProcess(jobId: string, file_name: string, file_data: string) {
+/**
+ * Spawns a Python process to handle file processing tasks.
+ * @param jobId The job ID for tracking progress.
+ * @param file_name The name of the file to process.
+ * @param file_path The filepath of the file to process.
+ */
+function spawnPythonProcess(jobId: string, file_path: string) {
const venvPath = path.join(__dirname, '../chunker/venv');
const requirementsPath = path.join(__dirname, '../chunker/requirements.txt');
const pythonScriptPath = path.join(__dirname, '../chunker/pdf_chunker.py');
+ const outputDirectory = pathToDirectory(Directory.chunk_images);
+
function runPythonScript() {
const pythonPath = process.platform === 'win32' ? path.join(venvPath, 'Scripts', 'python') : path.join(venvPath, 'bin', 'python3');
- const pythonProcess = spawn(pythonPath, [pythonScriptPath, jobId, file_name, file_data]);
+ const pythonProcess = spawn(pythonPath, [pythonScriptPath, jobId, file_path, outputDirectory]);
let pythonOutput = '';
let stderrOutput = '';
@@ -460,23 +718,30 @@ function spawnPythonProcess(jobId: string, file_name: string, file_data: string)
pythonProcess.stderr.on('data', data => {
stderrOutput += data.toString();
const lines = stderrOutput.split('\n');
+ stderrOutput = lines.pop() || ''; // Save the last partial line back to stderrOutput
lines.forEach(line => {
if (line.trim()) {
- try {
- const parsedOutput = JSON.parse(line);
- if (parsedOutput.job_id && parsedOutput.progress !== undefined) {
- jobProgress[parsedOutput.job_id] = {
- step: parsedOutput.step,
- progress: parsedOutput.progress,
- };
- } else if (parsedOutput.progress !== undefined) {
- jobProgress[jobId] = {
- step: parsedOutput.step,
- progress: parsedOutput.progress,
- };
+ if (line.startsWith('PROGRESS:')) {
+ const jsonString = line.substring('PROGRESS:'.length);
+ try {
+ const parsedOutput = JSON.parse(jsonString);
+ if (parsedOutput.job_id && parsedOutput.progress !== undefined) {
+ jobProgress[parsedOutput.job_id] = {
+ step: parsedOutput.step,
+ progress: parsedOutput.progress,
+ };
+ } else if (parsedOutput.progress !== undefined) {
+ jobProgress[jobId] = {
+ step: parsedOutput.step,
+ progress: parsedOutput.progress,
+ };
+ }
+ } catch (err) {
+ console.error('Error parsing progress JSON:', jsonString, err);
}
- } catch (err) {
- console.error('Progress log from Python:', line, err);
+ } else {
+ // Log other stderr output
+ console.error('Python stderr:', line);
}
}
});
@@ -490,10 +755,24 @@ function spawnPythonProcess(jobId: string, file_name: string, file_data: string)
jobProgress[jobId] = { step: 'Complete', progress: 100 };
} catch (err) {
console.error('Error parsing final JSON result:', err);
+ jobResults[jobId] = { error: 'Failed to parse final result' };
}
} else {
console.error(`Python process exited with code ${code}`);
- jobResults[jobId] = { error: 'Python process failed' };
+ // Check if there was an error message in stderr
+ if (stderrOutput) {
+ // Try to parse the last line as JSON
+ const lines = stderrOutput.trim().split('\n');
+ const lastLine = lines[lines.length - 1];
+ try {
+ const errorOutput = JSON.parse(lastLine);
+ jobResults[jobId] = errorOutput;
+ } catch {
+ jobResults[jobId] = { error: 'Python process failed' };
+ }
+ } else {
+ jobResults[jobId] = { error: 'Python process failed' };
+ }
}
});
}
diff --git a/src/server/ApiManagers/DataVizManager.ts b/src/server/ApiManagers/DataVizManager.ts
index 88f22992d..d2028f23b 100644
--- a/src/server/ApiManagers/DataVizManager.ts
+++ b/src/server/ApiManagers/DataVizManager.ts
@@ -9,7 +9,7 @@ export default class DataVizManager extends ApiManager {
register({
method: Method.GET,
subscription: '/csvData',
- secureHandler: async ({ req, res }) => {
+ secureHandler: ({ req, res }) => {
const uri = req.query.uri as string;
return new Promise<void>(resolve => {
diff --git a/src/server/ApiManagers/FireflyManager.ts b/src/server/ApiManagers/FireflyManager.ts
new file mode 100644
index 000000000..e75ede9df
--- /dev/null
+++ b/src/server/ApiManagers/FireflyManager.ts
@@ -0,0 +1,410 @@
+import axios from 'axios';
+import { Dropbox } from 'dropbox';
+import * as fs from 'fs';
+import * as multipart from 'parse-multipart-data';
+import * as path from 'path';
+import { DashUserModel } from '../authentication/DashUserModel';
+import { DashUploadUtils } from '../DashUploadUtils';
+import { _error, _invalid, _success, Method } from '../RouteManager';
+import { Directory, filesDirectory } from '../SocketData';
+import ApiManager, { Registration } from './ApiManager';
+
+export default class FireflyManager extends ApiManager {
+ getBearerToken = () =>
+ fetch('https://ims-na1.adobelogin.com/ims/token/v3', {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/x-www-form-urlencoded',
+ },
+ body: `grant_type=client_credentials&client_id=${process.env._CLIENT_FIREFLY_CLIENT_ID}&client_secret=${process.env._CLIENT_FIREFLY_SECRET}&scope=openid,AdobeID,session,additional_info,read_organizations,firefly_api,ff_apis`,
+ }).catch(error => {
+ console.error('Error:', error);
+ return undefined;
+ });
+
+ generateImageFromStructure = (prompt: string = 'a realistic illustration of a cat coding', width: number = 2048, height: number = 2048, structureUrl: string, strength: number = 50, styles: string[], styleUrl: string | undefined) =>
+ this.getBearerToken().then(response =>
+ response?.json().then((data: { access_token: string }) =>
+ //prettier-ignore
+ fetch('https://firefly-api.adobe.io/v3/images/generate', {
+ method: 'POST',
+ headers: [
+ ['Content-Type', 'application/json'],
+ ['Accept', 'application/json'],
+ ['x-api-key', process.env._CLIENT_FIREFLY_CLIENT_ID ?? ''],
+ ['Authorization', `Bearer ${data.access_token}`],
+ ],
+ body: JSON.stringify({
+ prompt,
+ numVariations: 4,
+ detailLevel: 'preview',
+ modelVersion: 'image3_fast',
+ size: { width, height },
+ structure: !structureUrl
+ ? undefined
+ : {
+ strength,
+ imageReference: {
+ source: { url: structureUrl },
+ },
+ },
+ // prettier-ignore
+ style: {
+ presets: styles,
+ imageReference : !styleUrl
+ ? undefined
+ : {
+ source: { url: styleUrl },
+ }
+ }
+ }),
+ })
+ .then(response2 => response2.json().then(json =>
+ {
+ if (json.outputs?.length)
+ return (json.outputs as {image: {url:string }}[]).map(output => output.image);
+ throw new Error(JSON.stringify(json));
+ })
+ )
+ )
+ );
+
+ uploadImageToDropbox = (fileUrl: string, user: DashUserModel | undefined, dbx = new Dropbox({ accessToken: user?.dropboxToken || '' })) =>
+ new Promise<string | Error>((res, rej) =>
+ fs.readFile(path.join(filesDirectory, `${Directory.images}/${path.basename(fileUrl)}`), undefined, (err, contents) => {
+ if (err) {
+ console.log('Error: ', err);
+ rej();
+ } else {
+ dbx.filesUpload({ path: `/Apps/browndash/${path.basename(fileUrl)}`, contents })
+ .then(response => {
+ dbx.filesGetTemporaryLink({ path: response.result.path_display ?? '' })
+ .then(link => res(link.result.link))
+ .catch(e => res(new Error(e.toString())));
+ })
+ .catch(e => {
+ if (user?.dropboxRefresh) {
+ console.log('*********** try refresh dropbox for: ' + user.email + ' ***********');
+ this.refreshDropboxToken(user).then(token => {
+ if (!token) {
+ console.log('Dropbox error: cannot refresh token');
+ res(new Error(e.toString()));
+ } else {
+ const dbxNew = new Dropbox({ accessToken: user.dropboxToken || '' });
+ dbxNew
+ .filesUpload({ path: `/Apps/browndash/${path.basename(fileUrl)}`, contents })
+ .then(response => {
+ dbxNew
+ .filesGetTemporaryLink({ path: response.result.path_display ?? '' })
+ .then(link => res(link.result.link))
+ .catch(linkErr => res(new Error(linkErr.toString())));
+ })
+ .catch(uploadErr => {
+ console.log('Dropbox error:', uploadErr);
+ res(new Error(uploadErr.toString()));
+ });
+ }
+ });
+ } else {
+ console.log('Dropbox error:', e);
+ res(new Error(e.toString()));
+ }
+ });
+ }
+ })
+ );
+
+ generateImage = (prompt: string = 'a realistic illustration of a cat coding', width: number = 2048, height: number = 2048, seed?: number) => {
+ let body = `{ "prompt": "${prompt}", "size": { "width": ${width}, "height": ${height}} }`;
+ if (seed) {
+ console.log('RECEIVED SEED', seed);
+ body = `{ "prompt": "${prompt}", "size": { "width": ${width}, "height": ${height}}, "seeds": [${seed}]}`;
+ }
+ const fetched = this.getBearerToken().then(response =>
+ response?.json().then((data: { access_token: string }) =>
+ fetch('https://firefly-api.adobe.io/v3/images/generate', {
+ method: 'POST',
+ headers: [
+ ['Content-Type', 'application/json'],
+ ['Accept', 'application/json'],
+ ['x-api-key', process.env._CLIENT_FIREFLY_CLIENT_ID ?? ''],
+ ['Authorization', `Bearer ${data.access_token}`],
+ ],
+ body: body,
+ })
+ .then(response2 => response2.json())
+ .then(json => (json.error_code ? json : { seed: json.outputs?.[0]?.seed, url: json.outputs?.[0]?.image?.url }))
+ .catch(error => {
+ console.error('Error:', error);
+ return undefined;
+ })
+ )
+ );
+ return fetched;
+ };
+ expandImage = (imgUrl: string, prompt?: string) => {
+ const dropboxImgUrl = imgUrl;
+ const fetched = this.getBearerToken().then(response =>
+ response
+ ?.json()
+ .then((data: { access_token: string }) => {
+ return fetch('https://firefly-api.adobe.io/v3/images/expand', {
+ method: 'POST',
+ headers: [
+ ['Content-Type', 'application/json'],
+ ['Accept', 'application/json'],
+ ['x-api-key', process.env._CLIENT_FIREFLY_CLIENT_ID ?? ''],
+ ['Authorization', `Bearer ${data.access_token}`],
+ ],
+ body: JSON.stringify({
+ image: {
+ source: {
+ url: dropboxImgUrl,
+ },
+ },
+ numVariations: 1,
+ seeds: [0],
+ size: {
+ width: 3048,
+ height: 2048,
+ },
+ prompt: prompt ?? 'cloudy skies',
+ placement: {
+ inset: {
+ left: 0,
+ top: 0,
+ right: 0,
+ bottom: 0,
+ },
+ alignment: {
+ horizontal: 'center',
+ vertical: 'center',
+ },
+ },
+ }),
+ });
+ })
+ .then(resp => resp.json())
+ );
+ return fetched;
+ };
+ getImageText = (imageBlob: Blob) => {
+ const inputFileVarName = 'infile';
+ const outputVarName = 'result';
+ const fetched = this.getBearerToken().then(response =>
+ response?.json().then((data: { access_token: string }) => {
+ return fetch('https://sensei.adobe.io/services/v2/predict', {
+ method: 'POST',
+ headers: [
+ ['Prefer', 'respond-async, wait=59'],
+ ['x-api-key', process.env._CLIENT_FIREFLY_CLIENT_ID ?? ''],
+ // ['content-type', 'multipart/form-data'], // bcz: Don't set this!! content-type will get set automatically including the Boundary string
+ ['Authorization', `Bearer ${data.access_token}`],
+ ],
+ body: ((form: FormData) => {
+ form.set(inputFileVarName, imageBlob);
+ form.set(
+ 'contentAnalyzerRequests',
+ JSON.stringify({
+ 'sensei:name': 'Feature:cintel-object-detection:Service-b9ace8b348b6433e9e7d82371aa16690',
+ 'sensei:invocation_mode': 'asynchronous',
+ 'sensei:invocation_batch': false,
+ 'sensei:engines': [
+ {
+ 'sensei:execution_info': {
+ 'sensei:engine': 'Feature:cintel-object-detection:Service-b9ace8b348b6433e9e7d82371aa16690',
+ },
+ 'sensei:inputs': {
+ documents: [
+ {
+ 'sensei:multipart_field_name': inputFileVarName,
+ 'dc:format': 'image/png',
+ },
+ ],
+ },
+ 'sensei:params': {
+ correct_with_dictionary: true,
+ },
+ 'sensei:outputs': {
+ result: {
+ 'sensei:multipart_field_name': outputVarName,
+ 'dc:format': 'application/json',
+ },
+ },
+ },
+ ],
+ })
+ );
+ return form;
+ })(new FormData()),
+ }).then(response2 => {
+ const contentType = response2.headers.get('content-type') ?? '';
+ if (contentType.includes('application/json')) {
+ return response2.json().then((json: object) => JSON.stringify(json));
+ }
+ if (contentType.includes('multipart')) {
+ return response2
+ .arrayBuffer()
+ .then(arrayBuffer =>
+ multipart
+ .parse(Buffer.from(arrayBuffer), 'Boundary' + (response2.headers.get('content-type')?.match(/=Boundary(.*);/)?.[1] ?? ''))
+ .filter(part => part.name === outputVarName)
+ .map(part => JSON.parse(part.data.toString())[0])
+ .reduce((text, json) => text + (json?.is_text_present ? json.tags.map((tag: { text: string }) => tag.text).join(' ') : ''), '')
+ )
+ .catch(error => {
+ console.error('Error:', error);
+ return '';
+ });
+ }
+ return response2.text();
+ });
+ })
+ );
+ return fetched;
+ };
+
+ refreshDropboxToken = (user: DashUserModel) =>
+ axios
+ .post(
+ 'https://api.dropbox.com/oauth2/token',
+ new URLSearchParams({
+ refresh_token: user.dropboxRefresh || '',
+ grant_type: 'refresh_token',
+ client_id: process.env._CLIENT_DROPBOX_CLIENT_ID || '',
+ client_secret: process.env._CLIENT_DROPBOX_SECRET || '',
+ }).toString()
+ )
+ .then(refresh => {
+ console.log('***** dropbox token refreshed for ' + user?.email + ' ******* ');
+ user.dropboxToken = refresh.data.access_token;
+ user.save();
+ return user.dropboxToken;
+ })
+ .catch(e => {
+ console.log(e);
+ return undefined;
+ });
+
+ protected initialize(register: Registration): void {
+ register({
+ method: Method.POST,
+ subscription: '/queryFireflyImageFromStructure',
+ secureHandler: ({ req, res }) =>
+ new Promise<void>(resolver => {
+ (req.body.styleUrl ? this.uploadImageToDropbox(req.body.styleUrl, req.user as DashUserModel) : Promise.resolve(undefined))
+ .then(styleUrl => {
+ if (styleUrl instanceof Error) {
+ _invalid(res, styleUrl.message);
+ throw new Error('Error uploading images to dropbox');
+ }
+ this.uploadImageToDropbox(req.body.structureUrl, req.user as DashUserModel)
+ .then(dropboxStructureUrl => {
+ if (dropboxStructureUrl instanceof Error) {
+ _invalid(res, dropboxStructureUrl.message);
+ throw new Error('Error uploading images to dropbox');
+ }
+ return { styleUrl, structureUrl: dropboxStructureUrl };
+ })
+ .then(uploads =>
+ this.generateImageFromStructure(req.body.prompt, req.body.width, req.body.height, uploads.structureUrl, req.body.strength, req.body.presets, uploads.styleUrl)
+ .then(images => {
+ Promise.all((images ?? [new Error('no images were generated')]).map(fire => (fire instanceof Error ? fire : DashUploadUtils.UploadImage(fire.url))))
+ .then(dashImages => {
+ if (dashImages.every(img => img instanceof Error)) _invalid(res, dashImages[0]!.message);
+ else _success(res, JSON.stringify(dashImages.filter(img => !(img instanceof Error))));
+ })
+ .then(resolver);
+ })
+ .catch(e => {
+ _invalid(res, e.message);
+ resolver();
+ })
+ );
+ })
+ .catch(() => {
+ /* do nothing */
+ resolver();
+ });
+ }),
+ });
+ register({
+ method: Method.POST,
+ subscription: '/queryFireflyImage',
+ secureHandler: ({ req, res }) =>
+ this.generateImage(req.body.prompt, req.body.width, req.body.height, req.body.seed).then(img =>
+ img.error_code
+ ? _invalid(res, img.message)
+ : DashUploadUtils.UploadImage(img?.url ?? '', undefined, img?.seed).then(info => {
+ if (info instanceof Error) _invalid(res, info.message);
+ else _success(res, info);
+ })
+ ),
+ });
+
+ register({
+ method: Method.POST,
+ subscription: '/queryFireflyImageText',
+ secureHandler: ({ req, res }) =>
+ fetch(req.body.file).then(json =>
+ json.blob().then(file =>
+ this.getImageText(file).then(text => {
+ _success(res, text);
+ })
+ )
+ ),
+ });
+ register({
+ method: Method.POST,
+ subscription: '/expandImage',
+ secureHandler: ({ req, res }) =>
+ this.uploadImageToDropbox(req.body.file, req.user as DashUserModel).then(uploadUrl =>
+ uploadUrl instanceof Error
+ ? _invalid(res, uploadUrl.message)
+ : this.expandImage(uploadUrl, req.body.prompt).then(text => {
+ if (text.error_code) _error(res, text.message);
+ else
+ DashUploadUtils.UploadImage(text.outputs[0].image.url).then(info => {
+ if (info instanceof Error) _invalid(res, info.message);
+ else _success(res, info);
+ });
+ })
+ ),
+ });
+
+ // construct this url and send user to it. It will allow them to authorize their dropbox account and will send the resulting token to our endpoint /refreshDropbox
+ // https://www.dropbox.com/oauth2/authorize?client_id=DROPBOX_CLIENT_ID&response_type=code&token_access_type=offline&redirect_uri=http://localhost:1050/refreshDropbox
+ // see: https://dropbox.tech/developers/using-oauth-2-0-with-offline-access
+ //
+ register({
+ method: Method.GET,
+ subscription: '/refreshDropbox',
+ secureHandler: ({ req, res }) => {
+ const user = req.user as DashUserModel;
+ console.log(`******************* dropbox authorized for ${user?.email} ******************`);
+ _success(res, 'dropbox authorized for ' + user?.email);
+
+ const data = new URLSearchParams({
+ code: req.query.code as string,
+ grant_type: 'authorization_code',
+ client_id: process.env._CLIENT_DROPBOX_CLIENT_ID ?? '',
+ client_secret: process.env._CLIENT_DROPBOX_SECRET ?? '',
+ redirect_uri: 'http://localhost:1050/refreshDropbox',
+ });
+ axios
+ .post('https://api.dropbox.com/oauth2/token', data.toString())
+ .then(response => {
+ console.log('***** dropbox token (and refresh) received for ' + user?.email + ' ******* ');
+ user.dropboxToken = response.data.access_token;
+ user.dropboxRefresh = response.data.refresh_token;
+ user.save();
+
+ setTimeout(() => this.refreshDropboxToken(user), response.data.expires_in - 600);
+ })
+ .catch(e => {
+ console.log(e);
+ });
+ },
+ });
+ }
+}
diff --git a/src/server/ApiManagers/UploadManager.ts b/src/server/ApiManagers/UploadManager.ts
index 868373474..c9d5df547 100644
--- a/src/server/ApiManagers/UploadManager.ts
+++ b/src/server/ApiManagers/UploadManager.ts
@@ -70,10 +70,16 @@ export default class UploadManager extends ApiManager {
]);
} else {
fileguids.split(';').map(guid => DashUploadUtils.uploadProgress.set(guid, `resampling images`));
+ // original filenames with '.'s, such as a Macbook screenshot, can be a problem - their extension is not kept in formidable's newFilename.
+ // This makes sure that the extension is preserved in the newFilename.
+ const fixNewFilename = (f: formidable.File) => {
+ if (path.extname(f.originalFilename ?? '') !== path.extname(f.newFilename)) f.newFilename = f.newFilename + path.extname(f.originalFilename ?? '');
+ return f;
+ };
const results = (
await Promise.all(
Array.from(Object.keys(files)).map(
- async key => (!files[key] ? undefined : DashUploadUtils.upload(files[key]![0] /* , key */)) // key is the guid used by the client to track upload progress.
+ async key => (!files[key] ? undefined : DashUploadUtils.upload(fixNewFilename(files[key][0]) /* , key */)) // key is the guid used by the client to track upload progress.
)
)
).filter(result => result && !(result.result instanceof Error));
@@ -147,13 +153,10 @@ export default class UploadManager extends ApiManager {
if (doc.id) {
doc.id = getId(doc.id);
}
- // eslint-disable-next-line no-restricted-syntax
for (const key in doc.fields) {
- // eslint-disable-next-line no-continue
if (!Object.prototype.hasOwnProperty.call(doc.fields, key)) continue;
const field = doc.fields[key];
- // eslint-disable-next-line no-continue
if (field === undefined || field === null) continue;
if (field.__type === 'Doc') {
@@ -182,11 +185,9 @@ export default class UploadManager extends ApiManager {
let docids: string[] = [];
let linkids: string[] = [];
try {
- // eslint-disable-next-line no-restricted-syntax
for (const name in files) {
if (Object.prototype.hasOwnProperty.call(files, name)) {
const f = files[name];
- // eslint-disable-next-line no-continue
if (!f) continue;
const path2 = f[0]; // what about the rest of the array? are we guaranteed only one value is set?
const zip = new AdmZip(path2.filepath);
@@ -273,14 +274,20 @@ export default class UploadManager extends ApiManager {
.filter(f => regex.test(f))
.map(f => fs.unlinkSync(serverPath + f));
}
- imageDataUri.outputFile(uri, serverPathToFile(Directory.images, InjectSize(filename, origSuffix))).then((savedName: string) => {
- const ext = path.extname(savedName).toLowerCase();
- const outputPath = serverPathToFile(Directory.images, filename + ext);
- if (AcceptableMedia.imageFormats.includes(ext)) {
- workerResample(savedName, outputPath, origSuffix, false);
- }
- res.send(clientPathToFile(Directory.images, filename + ext));
- });
+ imageDataUri
+ .outputFile(uri, serverPathToFile(Directory.images, InjectSize(filename, origSuffix)))
+ .then((savedName: string) => {
+ const ext = path.extname(savedName).toLowerCase();
+ const outputPath = serverPathToFile(Directory.images, filename + ext);
+ if (AcceptableMedia.imageFormats.includes(ext)) {
+ workerResample(savedName, outputPath, origSuffix, false);
+ }
+ res.send(clientPathToFile(Directory.images, filename + ext));
+ })
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
+ .catch((e: any) => {
+ res.status(404).json({ error: e.toString() });
+ });
},
});
}
diff --git a/src/server/DashUploadUtils.ts b/src/server/DashUploadUtils.ts
index 1e55a885a..a2747257a 100644
--- a/src/server/DashUploadUtils.ts
+++ b/src/server/DashUploadUtils.ts
@@ -1,3 +1,4 @@
+/* eslint-disable no-use-before-define */
import axios from 'axios';
import { exec, spawn } from 'child_process';
import { green, red } from 'colors';
@@ -21,8 +22,8 @@ import { AzureManager } from './ApiManagers/AzureManager';
import { AcceptableMedia, Upload } from './SharedMediaTypes';
import { Directory, clientPathToFile, filesDirectory, pathToDirectory, publicDirectory, serverPathToFile } from './SocketData';
import { resolvedServerUrl } from './server_Initialization';
-
import { Worker, isMainThread, parentPort } from 'worker_threads';
+import requestImageSize from '../client/util/request-image-size';
// Create an array to store worker threads
enum workertasks {
@@ -47,22 +48,21 @@ if (isMainThread) {
async function workerResampleImage(message: { imgSourcePath: string; outputPath: string; origSuffix: string; unlinkSource: boolean }) {
const { imgSourcePath, outputPath, origSuffix, unlinkSource } = message;
- const sizes = !origSuffix ? [{ width: 400, suffix: SizeSuffix.Medium }] : DashUploadUtils.imageResampleSizes(path.extname(imgSourcePath));
+ const extension = path.extname(imgSourcePath);
+ const sizes = !origSuffix ? [{ width: 400, suffix: SizeSuffix.Medium }] : DashUploadUtils.imageResampleSizes(extension === '.xml' ? '.png' : extension);
// prettier-ignore
Jimp.read(imgSourcePath)
.then(img =>
sizes.forEach(({ width, suffix }) =>
img.resize({ w: width || img.bitmap.width })
- .write(InjectSize(outputPath, suffix) as `${string}.${string}`)
+ .write(InjectSize(outputPath, suffix) as `${string}.${string}`)
+ .catch(e => console.log("Jimp error:", e))
))
.catch(e => console.log('Error Jimp:', e))
.finally(() => unlinkSource && unlinkSync(imgSourcePath));
}
}
-// eslint-disable-next-line @typescript-eslint/no-var-requires
-const requestImageSize = require('../client/util/request-image-size');
-
export enum SizeSuffix {
Small = '_s',
Medium = '_m',
@@ -221,7 +221,6 @@ export namespace DashUploadUtils {
const parseExifData = async (source: string) => {
const image = await request.get(source, { encoding: null });
const { /* data, */ error } = await new Promise<{ data: ExifData; error: string | undefined }>(resolve => {
- // eslint-disable-next-line no-new
new ExifImage({ image }, (exifError, data) => {
resolve({ data, error: exifError?.message });
});
@@ -300,7 +299,6 @@ export namespace DashUploadUtils {
// Bundle up the information into an object
return {
source,
- // eslint-disable-next-line radix
contentSize: parseInt(headers[size]),
contentType: headers[type],
nativeWidth,
@@ -343,15 +341,24 @@ export namespace DashUploadUtils {
const outputPath = path.resolve(pathToDirectory(Directory.images), outputFileName);
const sizes = imageResampleSizes(path.extname(outputFileName));
- const imgReadStream = new Duplex();
- imgReadStream.push(fs.readFileSync(imgSourcePath));
- imgReadStream.push(null);
- await Promise.all(
- sizes.map(({ suffix }) =>
- new Promise<unknown>(res =>
- imgReadStream.pipe(createWriteStream(writtenFiles[suffix] = InjectSize(outputPath, suffix))).on('close', res)
- )
- )); // prettier-ignore
+ if (unlinkSource) {
+ const imgReadStream = new Duplex();
+ imgReadStream.push(fs.readFileSync(imgSourcePath));
+ imgReadStream.push(null);
+ await Promise.all(
+ sizes.map(({ suffix }) =>
+ new Promise<void>(res =>
+ imgReadStream.pipe(createWriteStream(writtenFiles[suffix] = InjectSize(outputPath, suffix))).on('close', res)
+ )
+ )); // prettier-ignore
+ } else {
+ await Promise.all(
+ sizes.map(({ suffix }) =>
+ new Promise<void>(res =>
+ request.get(imgSourcePath).pipe(createWriteStream(writtenFiles[suffix] = InjectSize(outputPath, suffix))).on('close', res)
+ )
+ )); // prettier-ignore
+ }
workerResample(imgSourcePath, outputPath, SizeSuffix.Original, unlinkSource);
return writtenFiles;
@@ -368,8 +375,9 @@ export namespace DashUploadUtils {
* @returns the accessPaths for the resized files.
*/
export const UploadInspectedImage = async (metadata: Upload.InspectionResults, filename: string, prefix = '', cleanUp = true): Promise<Upload.ImageInformation> => {
- const { requestable, source, ...remaining } = metadata;
- const resolved = filename || `${prefix}upload_${Utils.GenerateGuid()}.${remaining.contentType.split('/')[1].toLowerCase()}`;
+ const { requestable, ...remaining } = metadata;
+ const dfltSuffix = remaining.contentType.split('/')[1].toLowerCase();
+ const resolved = filename || `${prefix}upload_${Utils.GenerateGuid()}.${dfltSuffix === 'xml' ? 'jpg' : dfltSuffix}`;
const { images } = Directory;
const information: Upload.ImageInformation = {
accessPaths: {
@@ -400,10 +408,10 @@ export namespace DashUploadUtils {
writtenFiles = {};
}
} else {
- const unlinkSrcWhenFinished = isLocal().test(source) && cleanUp;
+ const unlinkSrcWhenFinished = cleanUp; // isLocal().test(source) && cleanUp;
try {
writtenFiles = await outputResizedImages(metadata.source, resolved, unlinkSrcWhenFinished);
- } catch (e) {
+ } catch {
// input is a blob or other, try reading it to create a metadata source file.
const reqSource = request(metadata.source);
const readStream: Stream = reqSource instanceof Promise ? await reqSource : reqSource;
@@ -415,7 +423,7 @@ export namespace DashUploadUtils {
.on('error', () => rej());
});
writtenFiles = await outputResizedImages(readSource, resolved, unlinkSrcWhenFinished);
- fs.unlink(readSource, err => console.log("Couldn't unlink temporary image file:" + readSource, err));
+ //fs.unlink(readSource, err => console.log("Couldn't unlink temporary image file:" + readSource, err));
}
}
Array.from(Object.keys(writtenFiles)).forEach(suffix => {
@@ -448,8 +456,7 @@ export namespace DashUploadUtils {
return { name: result.name, message: result.message };
}
const outputFile = filename || result.filename || '';
-
- return UploadInspectedImage(result, outputFile, prefix);
+ return UploadInspectedImage(result, outputFile, prefix, isLocal().exec(source) || source.startsWith('data:') ? true : false);
};
type md5 = 'md5';
@@ -567,7 +574,9 @@ export namespace DashUploadUtils {
switch (category) {
case 'image':
if (imageFormats.includes(format)) {
- const result = await UploadImage(filepath, basename(filepath));
+ const outputName = basename(filepath);
+ const extname = path.extname(originalFilename ?? '');
+ const result = await UploadImage(filepath, outputName.endsWith(extname) ? outputName : outputName + extname, undefined);
return { source: file, result };
}
fs.unlink(filepath, () => {});
diff --git a/src/server/RouteManager.ts b/src/server/RouteManager.ts
index d8e0455f6..2f6cf80b5 100644
--- a/src/server/RouteManager.ts
+++ b/src/server/RouteManager.ts
@@ -39,8 +39,7 @@ export function _success(res: Response, body: any) {
}
export function _invalid(res: Response, message: string) {
- res.statusMessage = message;
- res.status(STATUS.BAD_REQUEST).send();
+ res.status(STATUS.BAD_REQUEST).send(message);
}
export function _permissionDenied(res: Response, message?: string) {
diff --git a/src/server/apis/google/GoogleApiServerUtils.ts b/src/server/apis/google/GoogleApiServerUtils.ts
index 21c405bee..7373df473 100644
--- a/src/server/apis/google/GoogleApiServerUtils.ts
+++ b/src/server/apis/google/GoogleApiServerUtils.ts
@@ -1,7 +1,7 @@
+/* eslint-disable no-use-before-define */
import { GaxiosResponse } from 'gaxios';
import { Credentials, OAuth2Client, OAuth2ClientOptions } from 'google-auth-library';
import { google } from 'googleapis';
-import * as qs from 'query-string';
import * as request from 'request-promise';
import { Opt } from '../../../fields/Doc';
import { Database } from '../../database';
@@ -21,7 +21,6 @@ const scope = ['documents.readonly', 'documents', 'presentations', 'presentation
* This namespace manages server side authentication for Google API queries, either
* from the standard v1 APIs or the Google Photos REST API.
*/
-
export namespace GoogleApiServerUtils {
/**
* As we expand out to more Google APIs that are accessible from
@@ -71,29 +70,29 @@ export namespace GoogleApiServerUtils {
/**
* A briefer format for the response from a 'googleapis' API request
*/
- export type ApiResponse = Promise<GaxiosResponse>;
+ export type ApiResponse = Promise<GaxiosResponse<unknown>>;
/**
* A generic form for a handler that executes some request on the endpoint
*/
- export type ApiRouter = (endpoint: Endpoint, parameters: any) => ApiResponse;
+ export type ApiRouter = (endpoint: Endpoint, parameters: Record<string, unknown>) => ApiResponse;
/**
* A generic form for the asynchronous function that actually submits the
- * request to the API and returns the corresporing response. Helpful when
+ * request to the API and returns the corresponding response. Helpful when
* making an extensible endpoint definition.
*/
- export type ApiHandler = (parameters: any, methodOptions?: any) => ApiResponse;
+ export type ApiHandler = (parameters: Record<string, unknown>, methodOptions?: Record<string, unknown>) => ApiResponse;
/**
* A literal union type indicating the valid actions for these 'googleapis'
- * requestions
+ * requests
*/
export type Action = 'create' | 'retrieve' | 'update';
/**
* An interface defining any entity on which one can invoke
- * anuy of the following handlers. All 'googleapis' wrappers
+ * any of the following handlers. All 'googleapis' wrappers
* such as google.docs().documents and google.slides().presentations
* satisfy this interface.
*/
@@ -109,7 +108,7 @@ export namespace GoogleApiServerUtils {
* of needless duplicate clients that would arise from
* making one new client instance per request.
*/
- const authenticationClients = new Map<String, OAuth2Client>();
+ const authenticationClients = new Map<string, OAuth2Client>();
/**
* This function receives the target sector ("which G-Suite app's API am I interested in?")
@@ -120,23 +119,21 @@ export namespace GoogleApiServerUtils {
* @returns the relevant 'googleapis' wrapper, if any
*/
export async function GetEndpoint(sector: string, userId: string): Promise<Endpoint | void> {
- return new Promise(async resolve => {
- const auth = await retrieveOAuthClient(userId);
- if (!auth) {
- return resolve();
- }
- let routed: Opt<Endpoint>;
- const parameters: any = { auth, version: 'v1' };
- switch (sector) {
- case Service.Documents:
- routed = google.docs(parameters).documents;
- break;
- case Service.Slides:
- routed = google.slides(parameters).presentations;
- break;
- }
- resolve(routed);
- });
+ const auth = await retrieveOAuthClient(userId);
+ if (!auth) {
+ return;
+ }
+ let routed: Opt<Endpoint>;
+ const parameters: { version: 'v1' } = { /* auth, */ version: 'v1' }; ///* auth: OAuth2Client;*/
+ switch (sector) {
+ case Service.Documents:
+ routed = google.docs(parameters).documents;
+ break;
+ case Service.Slides:
+ routed = google.slides(parameters).presentations;
+ break;
+ }
+ return routed;
}
/**
@@ -149,19 +146,17 @@ export namespace GoogleApiServerUtils {
* security.
*/
export async function retrieveOAuthClient(userId: string): Promise<OAuth2Client | void> {
- return new Promise(async resolve => {
- const { credentials, refreshed } = await retrieveCredentials(userId);
- if (!credentials) {
- return resolve();
- }
- let client = authenticationClients.get(userId);
- if (!client) {
- authenticationClients.set(userId, (client = generateClient(credentials)));
- } else if (refreshed) {
- client.setCredentials(credentials);
- }
- resolve(client);
- });
+ const { credentials, refreshed } = await retrieveCredentials(userId);
+ if (!credentials) {
+ return;
+ }
+ let client = authenticationClients.get(userId);
+ if (!client) {
+ authenticationClients.set(userId, (client = generateClient(credentials)));
+ } else if (refreshed) {
+ client.setCredentials(credentials);
+ }
+ return client;
}
/**
@@ -173,7 +168,9 @@ export namespace GoogleApiServerUtils {
*/
function generateClient(credentials?: Credentials): OAuth2Client {
const client = new google.auth.OAuth2(oAuthOptions);
- credentials && client.setCredentials(credentials);
+ if (credentials) {
+ client.setCredentials(credentials);
+ }
return client;
}
@@ -206,7 +203,7 @@ export namespace GoogleApiServerUtils {
*/
export async function processNewUser(userId: string, authenticationCode: string): Promise<EnrichedCredentials> {
const credentials = await new Promise<Credentials>((resolve, reject) => {
- worker.getToken(authenticationCode, async (err, credentials) => {
+ worker.getToken(authenticationCode, (err, credentials) => {
if (err || !credentials) {
reject(err);
return;
@@ -221,7 +218,7 @@ export namespace GoogleApiServerUtils {
/**
* This type represents the union of the full set of OAuth2 credentials
- * and all of a Google user's publically available information. This is the strucure
+ * and all of a Google user's publicly available information. This is the structure
* of the JSON object we ultimately store in the googleAuthentication table of the database.
*/
export type EnrichedCredentials = Credentials & { userInfo: UserInfo };
@@ -297,15 +294,15 @@ export namespace GoogleApiServerUtils {
async function refreshAccessToken(credentials: Credentials, userId: string): Promise<Credentials> {
const headerParameters = { headers: { 'Content-Type': 'application/x-www-form-urlencoded' } };
const { client_id, client_secret } = GoogleCredentialsLoader.ProjectCredentials;
- const url = `https://oauth2.googleapis.com/token?${qs.stringify({
- refreshToken: credentials.refresh_token,
+ const params = new URLSearchParams({
+ refresh_token: credentials.refresh_token!,
client_id,
client_secret,
grant_type: 'refresh_token',
- })}`;
- const { access_token, expires_in } = await new Promise<any>(async resolve => {
- const response = await request.post(url, headerParameters);
- resolve(JSON.parse(response));
+ });
+ const url = `https://oauth2.googleapis.com/token?${params.toString()}`;
+ const { access_token, expires_in } = await new Promise<{ access_token: string; expires_in: number }>(resolve => {
+ request.post(url, headerParameters).then(response => resolve(JSON.parse(response)));
});
// expires_in is in seconds, but we're building the new expiry date in milliseconds
const expiry_date = new Date().getTime() + expires_in * 1000;
diff --git a/src/server/authentication/DashUserModel.ts b/src/server/authentication/DashUserModel.ts
index bfa6d7bdb..debeef60c 100644
--- a/src/server/authentication/DashUserModel.ts
+++ b/src/server/authentication/DashUserModel.ts
@@ -9,6 +9,9 @@ export type DashUserModel = mongoose.Document & {
passwordResetToken?: string;
passwordResetExpires?: Date;
+ dropboxRefresh?: string;
+ dropboxToken?: string;
+
userDocumentId: string;
sharingDocumentId: string;
linkDatabaseId: string;
@@ -37,6 +40,8 @@ const userSchema = new mongoose.Schema(
passwordResetToken: String,
passwordResetExpires: Date,
+ dropboxRefresh: String,
+ dropboxToken: String,
userDocumentId: String, // id that identifies a document which hosts all of a user's account data
sharingDocumentId: String, // id that identifies a document that stores documents shared to a user, their user color, and any additional info needed to communicate between users
linkDatabaseId: String,
diff --git a/src/server/chunker/pdf_chunker.py b/src/server/chunker/pdf_chunker.py
index 4fe3b9dbf..697550f2e 100644
--- a/src/server/chunker/pdf_chunker.py
+++ b/src/server/chunker/pdf_chunker.py
@@ -21,7 +21,7 @@ import json
import os
import uuid # For generating unique IDs
from enum import Enum # Enums for types like document type and purpose
-import cohere # Embedding client
+import openai
import numpy as np
from PyPDF2 import PdfReader # PDF text extraction
from openai import OpenAI # OpenAI client for text completion
@@ -35,8 +35,8 @@ warnings.filterwarnings('ignore', message="torch.load")
dotenv.load_dotenv() # Load environment variables
# Fix for newer versions of PIL
-if parse(PIL.__version__) >= parse('10.0.0'):
- Image.LINEAR = Image.BILINEAR
+# if parse(PIL.__version__) >= parse('10.0.0'):
+# Image.LINEAR = Image.BILINEAR
# Global dictionary to track progress of document processing jobs
current_progress = {}
@@ -54,8 +54,9 @@ def update_progress(job_id, step, progress_value):
"step": step,
"progress": progress_value
}
- print(json.dumps(progress_data), file=sys.stderr) # Use stderr for progress logs
- sys.stderr.flush() # Ensure it's sent immediately
+ print(f"PROGRESS:{json.dumps(progress_data)}", file=sys.stderr)
+ sys.stderr.flush()
+
class ElementExtractor:
@@ -63,13 +64,15 @@ class ElementExtractor:
A class that uses a YOLO model to extract tables and images from a PDF page.
"""
- def __init__(self, output_folder: str):
+ def __init__(self, output_folder: str, doc_id: str):
"""
Initializes the ElementExtractor with the output folder for saving images and the YOLO model.
:param output_folder: Path to the folder where extracted elements will be saved.
"""
- self.output_folder = output_folder
+ self.doc_id = doc_id
+ self.output_folder = os.path.join(output_folder, doc_id)
+ os.makedirs(self.output_folder, exist_ok=True)
self.model = YOLO('keremberke/yolov8m-table-extraction') # Load YOLO model for table extraction
self.model.overrides['conf'] = 0.25 # Set confidence threshold for detection
self.model.overrides['iou'] = 0.45 # Set Intersection over Union (IoU) threshold
@@ -116,17 +119,16 @@ class ElementExtractor:
table_path = os.path.join(self.output_folder, table_filename)
page_with_outline.save(table_path)
- # Convert the full-page image with red outline to base64
- base64_data = self.image_to_base64(page_with_outline)
+ file_path_for_client = f"{self.doc_id}/{table_filename}"
tables.append({
'metadata': {
"type": "table",
"location": [x1 / img.width, y1 / img.height, x2 / img.width, y2 / img.height],
- "file_path": table_path,
+ "file_path": file_path_for_client,
"start_page": page_num,
"end_page": page_num,
- "base64_data": base64_data,
+ "base64_data": self.image_to_base64(page_with_outline)
}
})
@@ -175,18 +177,17 @@ class ElementExtractor:
image_path = os.path.join(self.output_folder, image_filename)
page_with_outline.save(image_path)
- # Convert the full-page image with red outline to base64
- base64_data = self.image_to_base64(page_with_outline)
+ file_path_for_client = f"{self.doc_id}/{image_filename}"
images.append({
'metadata': {
"type": "image",
"location": [x1 / page.rect.width, y1 / page.rect.height, x2 / page.rect.width,
y2 / page.rect.height],
- "file_path": image_path,
+ "file_path": file_path_for_client,
"start_page": page_num,
"end_page": page_num,
- "base64_data": base64_data,
+ "base64_data": self.image_to_base64(image)
}
})
@@ -268,7 +269,7 @@ class PDFChunker:
The main class responsible for chunking PDF files into text and visual elements (tables/images).
"""
- def __init__(self, output_folder: str = "output", image_batch_size: int = 5) -> None:
+ def __init__(self, output_folder: str = "output", doc_id: str = '', image_batch_size: int = 5) -> None:
"""
Initializes the PDFChunker with an output folder and an element extractor for visual elements.
@@ -278,7 +279,8 @@ class PDFChunker:
self.client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) # Initialize the Anthropic API client
self.output_folder = output_folder
self.image_batch_size = image_batch_size # Batch size for image processing
- self.element_extractor = ElementExtractor(output_folder) # Initialize the element extractor
+ self.doc_id = doc_id # Add doc_id
+ self.element_extractor = ElementExtractor(output_folder, doc_id)
async def chunk_pdf(self, file_data: bytes, file_name: str, doc_id: str, job_id: str) -> List[Dict[str, Any]]:
"""
@@ -363,6 +365,7 @@ class PDFChunker:
for j, elem in enumerate(batch, start=1):
if j in summaries:
elem['metadata']['text'] = re.sub(r'^(Image|Table):\s*', '', summaries[j])
+ elem['metadata']['base64_data'] = ''
processed_elements.append(elem)
progress = ((i // image_batch_size) + 1) / total_batches * 100 # Calculate progress
@@ -628,10 +631,11 @@ class PDFChunker:
return summaries
- except Exception:
- #print(f"Error in batch_summarize_images: {str(e)}")
- #print("Returning placeholder summaries")
- return {number: "Error: No summary available" for number in images}
+ except Exception as e:
+ # Print errors to stderr so they don't interfere with JSON output
+ print(json.dumps({"error": str(e)}), file=sys.stderr)
+ sys.stderr.flush()
+
class DocumentType(Enum):
"""
@@ -664,7 +668,7 @@ class Document:
Represents a document being processed, such as a PDF, handling chunking, embedding, and summarization.
"""
- def __init__(self, file_data: bytes, file_name: str, job_id: str):
+ def __init__(self, file_path: str, file_name: str, job_id: str, output_folder: str):
"""
Initialize the Document with file data, file name, and job ID.
@@ -672,28 +676,38 @@ class Document:
:param file_name: The name of the file being processed.
:param job_id: The job ID associated with this document processing task.
"""
- self.file_data = file_data
+ self.output_folder = output_folder
self.file_name = file_name
+ self.file_path = file_path
self.job_id = job_id
self.type = self._get_document_type(file_name) # Determine the document type (PDF, CSV, etc.)
self.doc_id = job_id # Use the job ID as the document ID
self.chunks = [] # List to hold text and visual chunks
self.num_pages = 0 # Number of pages in the document (if applicable)
self.summary = "" # The generated summary for the document
-
self._process() # Start processing the document
def _process(self):
"""
Process the document: extract chunks, embed them, and generate a summary.
"""
- pdf_chunker = PDFChunker(output_folder="output") # Initialize the PDF chunker
- self.chunks = asyncio.run(pdf_chunker.chunk_pdf(self.file_data, self.file_name, self.doc_id, self.job_id)) # Extract chunks
-
- self.num_pages = self._get_pdf_pages() # Get the number of pages in the document
+ with open(self.file_path, 'rb') as file:
+ pdf_data = file.read()
+ pdf_chunker = PDFChunker(output_folder=self.output_folder, doc_id=self.doc_id) # Initialize PDFChunker
+ self.chunks = asyncio.run(pdf_chunker.chunk_pdf(pdf_data, os.path.basename(self.file_path), self.doc_id, self.job_id)) # Extract chunks
+ self.num_pages = self._get_pdf_pages(pdf_data) # Get the number of pages in the document
self._embed_chunks() # Embed the text chunks into embeddings
self.summary = self._generate_summary() # Generate a summary for the document
+ def _get_pdf_pages(self, pdf_data: bytes) -> int:
+ """
+ Get the total number of pages in the PDF document.
+ """
+ pdf_file = io.BytesIO(pdf_data) # Convert the file data to an in-memory binary stream
+ pdf_reader = PdfReader(pdf_file) # Initialize PDF reader
+ return len(pdf_reader.pages) # Return the number of pages in the PDF
+
+
def _get_document_type(self, file_name: str) -> DocumentType:
"""
Determine the document type based on its file extension.
@@ -708,33 +722,24 @@ class Document:
except ValueError:
raise FileTypeNotSupportedException(extension) # Raise exception if file type is unsupported
- def _get_pdf_pages(self) -> int:
- """
- Get the total number of pages in the PDF document.
-
- :return: The number of pages in the PDF.
- """
- pdf_file = io.BytesIO(self.file_data) # Convert the file data to an in-memory binary stream
- pdf_reader = PdfReader(pdf_file) # Initialize PDF reader
- return len(pdf_reader.pages) # Return the number of pages in the PDF
def _embed_chunks(self) -> None:
"""
Embed the text chunks using the Cohere API.
"""
- co = cohere.Client(os.getenv("COHERE_API_KEY")) # Initialize Cohere client with API key
+ openai = OpenAI() # Initialize Cohere client with API key
batch_size = 90 # Batch size for embedding
chunks_len = len(self.chunks) # Total number of chunks to embed
for i in tqdm(range(0, chunks_len, batch_size), desc="Embedding Chunks"):
batch = self.chunks[i: min(i + batch_size, chunks_len)] # Get batch of chunks
texts = [chunk['metadata']['text'] for chunk in batch] # Extract text from each chunk
- chunk_embs_batch = co.embed(
- texts=texts,
- model="embed-english-v3.0", # Use Cohere's embedding model
- input_type="search_document" # Specify input type
+ chunk_embs_batch = openai.embeddings.create(
+ model="text-embedding-3-large",
+ input=texts,
+ encoding_format="float"
)
- for j, emb in enumerate(chunk_embs_batch.embeddings):
- self.chunks[i + j]['values'] = emb # Store the embeddings in the corresponding chunks
+ for j, data_val in enumerate(chunk_embs_batch.data):
+ self.chunks[i + j]['values'] = data_val.embedding # Store the embeddings in the corresponding chunks
def _generate_summary(self) -> str:
"""
@@ -796,38 +801,34 @@ class Document:
"doc_id": self.doc_id
}, indent=2) # Convert the document's attributes to JSON format
-
-def process_document(file_data, file_name, job_id):
+def process_document(file_path, job_id, output_folder):
"""
Top-level function to process a document and return the JSON output.
- :param file_data: The binary data of the file being processed.
- :param file_name: The name of the file being processed.
+ :param file_path: The path to the file being processed.
:param job_id: The job ID for this document processing task.
:return: The processed document's data in JSON format.
"""
- new_document = Document(file_data, file_name, job_id) # Create a new Document object
- return new_document.to_json() # Return the document's JSON data
-
+ new_document = Document(file_path, file_path, job_id, output_folder)
+ return new_document.to_json()
def main():
"""
Main entry point for the script, called with arguments from Node.js.
"""
if len(sys.argv) != 4:
- print(json.dumps({"error": "Invalid arguments"}), file=sys.stderr) # Print error if incorrect number of arguments
+ print(json.dumps({"error": "Invalid arguments"}), file=sys.stderr)
return
- job_id = sys.argv[1] # Get the job ID from command-line arguments
- file_name = sys.argv[2] # Get the file name from command-line arguments
- file_data = sys.argv[3] # Get the base64-encoded file data from command-line arguments
+ job_id = sys.argv[1]
+ file_path = sys.argv[2]
+ output_folder = sys.argv[3] # Get the output folder from arguments
try:
- # Decode the base64 file data
- file_bytes = base64.b64decode(file_data)
-
+ os.makedirs(output_folder, exist_ok=True)
+
# Process the document
- document_result = process_document(file_bytes, file_name, job_id)
+ document_result = process_document(file_path, job_id, output_folder) # Pass output_folder
# Output the final result as JSON to stdout
print(document_result)
@@ -838,6 +839,5 @@ def main():
print(json.dumps({"error": str(e)}), file=sys.stderr)
sys.stderr.flush()
-
if __name__ == "__main__":
- main() # Execute the main function when the script is run
+ main() # Execute the main function when the script is run \ No newline at end of file
diff --git a/src/server/flashcard/venv/pyvenv.cfg b/src/server/flashcard/venv/pyvenv.cfg
new file mode 100644
index 000000000..740014e00
--- /dev/null
+++ b/src/server/flashcard/venv/pyvenv.cfg
@@ -0,0 +1,3 @@
+home = /Library/Frameworks/Python.framework/Versions/3.10/bin
+include-system-site-packages = false
+version = 3.10.11
diff --git a/src/server/index.ts b/src/server/index.ts
index 70a34aca5..3b77359ec 100644
--- a/src/server/index.ts
+++ b/src/server/index.ts
@@ -8,6 +8,7 @@ import FlashcardManager from './ApiManagers/FlashcardManager';
import DataVizManager from './ApiManagers/DataVizManager';
import DeleteManager from './ApiManagers/DeleteManager';
import DownloadManager from './ApiManagers/DownloadManager';
+import FireflyManager from './ApiManagers/FireflyManager';
import GeneralGoogleManager from './ApiManagers/GeneralGoogleManager';
import SessionManager from './ApiManagers/SessionManager';
import UploadManager from './ApiManagers/UploadManager';
@@ -73,6 +74,7 @@ function routeSetter({ addSupervisedRoute, logRegistrationOutcome }: RouteManage
/* new GooglePhotosManager(), */ new DataVizManager(),
new AssistantManager(),
new FlashcardManager(),
+ new FireflyManager(),
];
// initialize API Managers
@@ -114,7 +116,6 @@ function routeSetter({ addSupervisedRoute, logRegistrationOutcome }: RouteManage
});
const serve: PublicHandler = ({ req, res }) => {
- // eslint-disable-next-line new-cap
const detector = new mobileDetect(req.headers['user-agent'] || '');
const filename = detector.mobile() !== null ? 'mobile/image.html' : 'index.html';
res.sendFile(path.join(__dirname, '../../deploy/' + filename));
diff --git a/src/server/server_Initialization.ts b/src/server/server_Initialization.ts
index 4dcb32f8b..a56ab5d18 100644
--- a/src/server/server_Initialization.ts
+++ b/src/server/server_Initialization.ts
@@ -108,14 +108,14 @@ function registerEmbeddedBrowseRelativePathHandler(server: express.Express) {
// detect search query and use default search engine
res.redirect(req.headers.referer + 'corsProxy/' + encodeURIComponent('http://www.google.com' + relativeUrl));
} else {
- res.end();
+ res.status(404).json({ error: 'no such file or endpoint: try /home /logout /login' });
}
});
}
// eslint-disable-next-line @typescript-eslint/no-explicit-any
function proxyServe(req: any, requrl: string, response: any) {
- // eslint-disable-next-line global-require, @typescript-eslint/no-require-imports, @typescript-eslint/no-var-requires
+ // eslint-disable-next-line @typescript-eslint/no-require-imports
const htmlBodyMemoryStream = new (require('memorystream'))();
let wasinBrFormat = false;
const sendModifiedBody = () => {
@@ -189,7 +189,6 @@ function proxyServe(req: any, requrl: string, response: any) {
res.headers['x-permitted-cross-domain-policies'] = 'all';
res.headers['x-frame-options'] = '';
res.headers['content-security-policy'] = '';
- // eslint-disable-next-line no-multi-assign
response.headers = response._headers = res.headers;
})
.on('end', sendModifiedBody)
@@ -247,6 +246,10 @@ export default async function InitializeServer(routeSetter: RouteSetter) {
const app = buildWithMiddleware(express());
const compiler = webpack(config as webpack.Configuration);
+ // Default route
+ app.get('/', (req, res) => {
+ res.redirect(req.user ? '/home' : '/login'); //res.send('This is the default route.');
+ });
// route table managed by express. routes are tested sequentially against each of these map rules. when a match is found, the handler is called to process the request
app.use(wdm(compiler, { publicPath: config.output.publicPath }));
app.use(whm(compiler));
@@ -259,7 +262,6 @@ export default async function InitializeServer(routeSetter: RouteSetter) {
isRelease && !SSL.Loaded && SSL.exit();
routeSetter(new RouteManager(app, isRelease)); // this sets up all the regular supervised routes (things like /home, download/upload api's, pdf, search, session, etc)
registerEmbeddedBrowseRelativePathHandler(app); // this allows renered web pages which internally have relative paths to find their content
-
isRelease && process.env.serverPort && (resolvedPorts.server = Number(process.env.serverPort));
const server = isRelease ? createServer(SSL.Credentials, app) : app;
await new Promise<void>(resolve => {