aboutsummaryrefslogtreecommitdiff
path: root/src/server/ApiManagers/AssistantManager.ts
diff options
context:
space:
mode:
authorA.J. Shulman <Shulman.aj@gmail.com>2024-09-19 09:53:19 -0400
committerA.J. Shulman <Shulman.aj@gmail.com>2024-09-19 09:53:19 -0400
commit00b12111c4007ef6ecff645327007a67f6655d8b (patch)
treeb5ecfd18a7a7e5f2cd97b8307b248882d056c7e0 /src/server/ApiManagers/AssistantManager.ts
parentba0520baaa1f84d9fb08d3b2880c68302d28350a (diff)
added comments and fixed some styling and error handling
Diffstat (limited to 'src/server/ApiManagers/AssistantManager.ts')
-rw-r--r--src/server/ApiManagers/AssistantManager.ts224
1 files changed, 161 insertions, 63 deletions
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index 88a81c5de..063ba193a 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -19,6 +19,7 @@ import * as puppeteer from 'puppeteer';
import { JSDOM } from 'jsdom';
import { Readability } from '@mozilla/readability';
+// Enumeration of directories where different file types are stored
export enum Directory {
parsed_files = 'parsed_files',
images = 'images',
@@ -32,41 +33,76 @@ export enum Directory {
scrape_images = 'scrape_images',
}
+/**
+ * Constructs a normalized path to a file in the server's file system.
+ * @param directory The directory where the file is stored.
+ * @param filename The name of the file.
+ * @returns The full normalized path to the file.
+ */
export function serverPathToFile(directory: Directory, filename: string) {
return path.normalize(`${filesDirectory}/${directory}/${filename}`);
}
+/**
+ * Constructs a normalized path to a directory in the server's file system.
+ * @param directory The directory to access.
+ * @returns The full normalized path to the directory.
+ */
export function pathToDirectory(directory: Directory) {
return path.normalize(`${filesDirectory}/${directory}`);
}
+/**
+ * Constructs the client-accessible URL for a file.
+ * @param directory The directory where the file is stored.
+ * @param filename The name of the file.
+ * @returns The URL path to the file.
+ */
export function clientPathToFile(directory: Directory, filename: string) {
return `/files/${directory}/${filename}`;
}
+// Promisified versions of filesystem functions
const writeFileAsync = promisify(writeFile);
const readFileAsync = promisify(fs.readFile);
+/**
+ * Class responsible for handling various API routes related to the Assistant functionality.
+ * This class extends `ApiManager` and handles registration of routes and secure request handlers.
+ */
export default class AssistantManager extends ApiManager {
+ /**
+ * Registers all API routes and initializes necessary services like OpenAI, Scrapfly, and UnstructuredClient.
+ * @param register The registration method to register routes and handlers.
+ */
protected initialize(register: Registration): void {
+ // Initialize OpenAI API with client key
const openai = new OpenAI({
- apiKey: process.env._CLIENT_OPENAI_KEY, // Use client key so don't have to set key seperately for client and server.
+ apiKey: process.env._CLIENT_OPENAI_KEY,
dangerouslyAllowBrowser: true,
});
+
+ // Initialize UnstructuredClient for document processing
const unstructuredClient = new UnstructuredClient({
security: {
apiKeyAuth: process.env._CLIENT_UNSTRUCTURED_API_KEY!,
},
});
+
+ // Initialize ScrapflyClient for scraping purposes
const scrapflyClient = new ScrapflyClient({ key: process.env._CLIENT_SCRAPFLY_API_KEY! });
+
+ // Initialize Google Custom Search API
const customsearch = google.customsearch('v1');
+ // Register Wikipedia summary API route
register({
method: Method.POST,
subscription: '/getWikipediaSummary',
secureHandler: async ({ req, res }) => {
const { title } = req.body;
try {
+ // Fetch summary from Wikipedia using axios
const response = await axios.get('https://en.wikipedia.org/w/api.php', {
params: {
action: 'query',
@@ -75,25 +111,26 @@ export default class AssistantManager extends ApiManager {
format: 'json',
},
});
- const summary = response.data.query.search[0].snippet;
- if (!summary || summary.length === 0 || summary === '' || summary === ' ') {
- res.send({ text: 'No article found with that title.' });
- } else {
- res.send({ text: summary });
- }
+ const summary = response.data.query.search[0]?.snippet || 'No article found with that title.';
+ res.send({ text: summary });
} catch (error: any) {
- console.error('Error retrieving article summary from Wikipedia:', error);
- res.status(500).send({ error: 'Error retrieving article summary from Wikipedia.', details: error.message });
+ console.error('Error retrieving Wikipedia summary:', error);
+ res.status(500).send({
+ error: 'Error retrieving article summary from Wikipedia.',
+ details: error.message,
+ });
}
},
});
+ // Register Google Web Search Results API route
register({
method: Method.POST,
subscription: '/getWebSearchResults',
secureHandler: async ({ req, res }) => {
const { query, max_results } = req.body;
try {
+ // Fetch search results using Google Custom Search API
const response = await customsearch.cse.list({
q: query,
cx: process.env._CLIENT_GOOGLE_SEARCH_ENGINE_ID,
@@ -111,25 +148,40 @@ export default class AssistantManager extends ApiManager {
res.send({ results });
} catch (error: any) {
console.error('Error performing web search:', error);
- res.status(500).send({ error: 'Failed to perform web search', details: error.message });
+ res.status(500).send({
+ error: 'Failed to perform web search',
+ details: error.message,
+ });
}
},
});
+ // Axios instance with custom headers for scraping
const axiosInstance = axios.create({
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
},
});
+ /**
+ * Utility function to introduce delay (used for retries).
+ * @param ms Delay in milliseconds.
+ */
const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
+ /**
+ * Function to fetch a URL with retry logic, handling rate limits.
+ * Retries a request if it fails due to rate limits (HTTP status 429).
+ * @param url The URL to fetch.
+ * @param retries The number of retry attempts.
+ * @param backoff Initial backoff time in milliseconds.
+ */
const fetchWithRetry = async (url: string, retries = 3, backoff = 300) => {
try {
const response = await axiosInstance.get(url);
return response.data;
} catch (error: any) {
- if (retries > 0 && error.response && error.response.status === 429) {
+ if (retries > 0 && error.response?.status === 429) {
console.log(`Rate limited. Retrying in ${backoff}ms...`);
await delay(backoff);
return fetchWithRetry(url, retries - 1, backoff * 2);
@@ -138,6 +190,7 @@ export default class AssistantManager extends ApiManager {
}
};
+ // Register a proxy fetch API route
register({
method: Method.POST,
subscription: '/proxyFetch',
@@ -154,18 +207,22 @@ export default class AssistantManager extends ApiManager {
res.send({ data });
} catch (error: any) {
console.error('Error fetching the URL:', error);
- res.status(500).send({ error: 'Failed to fetch the URL', details: error.message });
+ res.status(500).send({
+ error: 'Failed to fetch the URL',
+ details: error.message,
+ });
}
},
});
+ // Register an API route to scrape website content using Puppeteer and JSDOM
register({
method: Method.POST,
subscription: '/scrapeWebsite',
secureHandler: async ({ req, res }) => {
const { url } = req.body;
try {
- // Launch Puppeteer to navigate to the webpage
+ // Launch Puppeteer browser to navigate to the webpage
const browser = await puppeteer.launch({
args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
@@ -173,45 +230,47 @@ export default class AssistantManager extends ApiManager {
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36');
await page.goto(url, { waitUntil: 'networkidle2' });
- // Get the HTML content of the page
+ // Extract HTML content
const htmlContent = await page.content();
await browser.close();
- // Use JSDOM to parse the HTML content
+ // Parse HTML content using JSDOM
const dom = new JSDOM(htmlContent, { url });
- // Use Readability to extract the readable content
+ // Extract readable content using Mozilla's Readability API
const reader = new Readability(dom.window.document);
const article = reader.parse();
if (article) {
- // Extract the plain text from the article content
const plainText = article.textContent;
-
- // Return the plain text content
res.send({ website_plain_text: plainText });
} else {
res.status(500).send({ error: 'Failed to extract readable content' });
}
} catch (error: any) {
console.error('Error scraping website:', error);
- res.status(500).send({ error: 'Failed to scrape website', details: error.message });
+ res.status(500).send({
+ error: 'Failed to scrape website',
+ details: error.message,
+ });
}
},
});
+ // Register an API route to create documents by sending files to a chatbot
register({
method: Method.POST,
subscription: '/createDocument',
secureHandler: async ({ req, res }) => {
const { file_path } = req.body;
- const public_path = path.join(publicDirectory, file_path);
- const file_name = path.basename(file_path);
+ const public_path = path.join(publicDirectory, file_path); // Resolve the file path in the public directory
+ const file_name = path.basename(file_path); // Extract the file name from the path
try {
- // Read file data and convert to base64
+ // Read the file data and encode it as base64
const file_data: string = fs.readFileSync(public_path, { encoding: 'base64' });
+ // Send the file data to a local chatbot API for document creation
const response = await axios.post(
'http://localhost:8080/createDocument',
{
@@ -225,65 +284,79 @@ export default class AssistantManager extends ApiManager {
}
);
+ // Retrieve the job ID from the response
const jobId = response.data['job_id'];
console.log('Job ID:', jobId);
+ // Send the job ID back to the client
res.send({ jobId });
} catch (error: any) {
console.error('Error communicating with chatbot:', error);
- res.status(500).send({ error: 'Failed to communicate with the chatbot', details: error.message });
+ res.status(500).send({
+ error: 'Failed to communicate with the chatbot',
+ details: error.message,
+ });
}
},
});
+ // Register an API route to check the progress of a document creation job
register({
method: Method.GET,
subscription: '/getProgress/:jobId',
secureHandler: async ({ req, res }) => {
- const { jobId } = req.params;
+ const { jobId } = req.params; // Get the job ID from the URL parameters
try {
+ // Query the local API to get the progress of the job
const progressResponse = await axios.get(`http://localhost:8080/getProgress/${jobId}`);
console.log(`Current step: ${progressResponse.data.step}, Progress within step: ${progressResponse.data.progress}%`);
- res.json(progressResponse.data);
+ res.json(progressResponse.data); // Send the progress data back to the client
} catch (error) {
console.error('Error getting progress:', error);
- res.status(500).send({ error: 'Failed to get progress', details: JSON.parse(error as string).message });
+ res.status(500).send({
+ error: 'Failed to get progress',
+ details: error,
+ });
}
},
});
+ // Register an API route to get the final result of a document creation job
register({
method: Method.GET,
subscription: '/getResult/:jobId',
secureHandler: async ({ req, res }) => {
- const { jobId } = req.params;
+ const { jobId } = req.params; // Get the job ID from the URL parameters
try {
+ // Query the local API to get the final result of the job
const finalResponse = await axios.get(`http://localhost:8080/getResult/${jobId}`);
console.log('Result:', finalResponse.data);
const result = finalResponse.data;
+ // If the result contains image or table chunks, save the base64 data as image files
if (result.chunks && Array.isArray(result.chunks)) {
for (const chunk of result.chunks) {
if (chunk.metadata && (chunk.metadata.type === 'image' || chunk.metadata.type === 'table')) {
let files_directory = '/files/chunk_images/';
const directory = path.join(publicDirectory, files_directory);
+ // Ensure the directory exists or create it
if (!fs.existsSync(directory)) {
fs.mkdirSync(directory);
}
- const fileName = path.basename(chunk.metadata.file_path);
- const filePath = path.join(directory, fileName);
+ const fileName = path.basename(chunk.metadata.file_path); // Get the file name from the path
+ const filePath = path.join(directory, fileName); // Create the full file path
- // Check if base64_data exists
+ // Check if the chunk contains base64 encoded data
if (chunk.metadata.base64_data) {
- // Decode Base64 and save as file
+ // Decode the base64 data and write it to a file
const buffer = Buffer.from(chunk.metadata.base64_data, 'base64');
await fs.promises.writeFile(filePath, buffer);
- // Update the file path in the chunk
+ // Update the file path in the chunk's metadata
chunk.metadata.file_path = path.join(files_directory, fileName);
- chunk.metadata.base64_data = undefined;
+ chunk.metadata.base64_data = undefined; // Remove the base64 data from the metadata
} else {
console.warn(`No base64_data found for chunk: ${fileName}`);
}
@@ -294,32 +367,42 @@ export default class AssistantManager extends ApiManager {
console.warn('Not ready');
result.status = 'pending';
}
- res.json(result);
+ res.json(result); // Send the result back to the client
} catch (error) {
- console.error('Error getting progress:', error);
- res.status(500).send({ error: 'Failed to get progress', details: error });
+ console.error('Error getting result:', error);
+ res.status(500).send({
+ error: 'Failed to get result',
+ details: error,
+ });
}
},
});
+ // Register an API route to format chunks (e.g., text or image chunks) for display
register({
method: Method.POST,
subscription: '/formatChunks',
secureHandler: async ({ req, res }) => {
- const { relevantChunks } = req.body;
+ const { relevantChunks } = req.body; // Get the relevant chunks from the request body
+
+ // Initialize an array to hold the formatted content
const content: { type: string; text?: string; image_url?: { url: string } }[] = [{ type: 'text', text: '<chunks>' }];
for (const chunk of relevantChunks) {
+ // Format each chunk by adding its metadata and content
content.push({
type: 'text',
- text: `<chunk chunk_id=${chunk.id} chunk_type=${chunk.metadata.type}>`,
+ text: `<chunk chunk_id=${chunk.id} chunk_type="${chunk.metadata.type}">`,
});
+ // If the chunk is an image or table, read the corresponding file and encode it as base64
if (chunk.metadata.type === 'image' || chunk.metadata.type === 'table') {
try {
- const filePath = serverPathToFile(Directory.chunk_images, chunk.metadata.file_path);
- const imageBuffer = await readFileAsync(filePath);
- const base64Image = imageBuffer.toString('base64');
+ const filePath = serverPathToFile(Directory.chunk_images, chunk.metadata.file_path); // Get the file path
+ const imageBuffer = await readFileAsync(filePath); // Read the image file
+ const base64Image = imageBuffer.toString('base64'); // Convert the image to base64
+
+ // Add the base64-encoded image to the content array
if (base64Image) {
content.push({
type: 'image_url',
@@ -334,33 +417,35 @@ export default class AssistantManager extends ApiManager {
console.error(`Error reading image file for chunk ${chunk.id}:`, error);
}
}
+
+ // Add the chunk's text content to the formatted content
content.push({ type: 'text', text: `${chunk.metadata.text}\n</chunk>\n` });
}
content.push({ type: 'text', text: '</chunks>' });
+ // Send the formatted content back to the client
res.send({ formattedChunks: content });
},
});
+ // Register an API route to create and save a CSV file on the server
register({
method: Method.POST,
subscription: '/createCSV',
secureHandler: async ({ req, res }) => {
const { filename, data } = req.body;
- // Validate input
+ // Validate that both the filename and data are provided
if (!filename || !data) {
res.status(400).send({ error: 'Filename and data fields are required.' });
return;
}
try {
- // Generate a UUID for the file
+ // Generate a UUID for the file to ensure unique naming
const uuidv4 = uuid.v4();
-
- // Construct the full filename with the UUID prefix
- const fullFilename = `${uuidv4}-${filename}`;
+ const fullFilename = `${uuidv4}-${filename}`; // Prefix the file name with the UUID
// Get the full server path where the file will be saved
const serverFilePath = serverPathToFile(Directory.csv, fullFilename);
@@ -368,60 +453,73 @@ export default class AssistantManager extends ApiManager {
// Write the CSV data (which is a raw string) to the file
await writeFileAsync(serverFilePath, data, 'utf8');
- // Construct the full client URL for accessing the file
+ // Construct the client-accessible URL for the file
const fileUrl = clientPathToFile(Directory.csv, fullFilename);
- // Return the file URL and UUID to the client
+ // Send the file URL and UUID back to the client
res.send({ fileUrl, id: uuidv4 });
} catch (error: any) {
console.error('Error creating CSV file:', error);
- res.status(500).send({ error: 'Failed to create CSV file.', details: error.message });
+ res.status(500).send({
+ error: 'Failed to create CSV file.',
+ details: error.message,
+ });
}
},
});
+ // Register an API route to chunk a document using the UnstructuredClient
register({
method: Method.POST,
subscription: '/chunkDocument',
secureHandler: async ({ req, res }) => {
- const { file_path } = req.body;
- const public_path = path.join(publicDirectory, file_path);
- const file_name = path.basename(file_path);
+ const { file_path } = req.body; // Get the file path from the request body
+ const public_path = path.join(publicDirectory, file_path); // Resolve the full path in the public directory
+ const file_name = path.basename(file_path); // Extract the file name from the path
try {
- // Read file data and convert to base64
+ // Read the file content as a Buffer
const file_data = await fs.promises.readFile(public_path);
try {
+ // Use UnstructuredClient to partition the document into chunks
const result = await unstructuredClient.general.partition({
partitionParameters: {
files: {
content: file_data,
fileName: file_name,
},
- strategy: Strategy.Auto,
- chunkingStrategy: ChunkingStrategy.ByTitle,
- extractImageBlockTypes: ['Image', 'Table'],
+ strategy: Strategy.Auto, // Automatically determine the chunking strategy
+ chunkingStrategy: ChunkingStrategy.ByTitle, // Chunk by title
+ extractImageBlockTypes: ['Image', 'Table'], // Extract images and tables
},
});
if (result.statusCode === 200) {
console.log(result.elements);
const jsonElements = JSON.stringify(result.elements, null, 2);
- // Print the processed data.
- console.log(jsonElements);
- res.send({ document_json: jsonElements });
+ console.log(jsonElements); // Log the JSON result of the partitioned elements
+ res.send({ document_json: jsonElements }); // Send the partitioned data as a JSON response
} else {
console.error(`Unexpected status code: ${result.statusCode}`);
- res.status(result.statusCode).send({ error: 'Failed to process the document', details: result });
+ res.status(result.statusCode).send({
+ error: 'Failed to process the document',
+ details: result,
+ });
}
} catch (e: any) {
console.error('Error during partitioning:', e);
- res.status(500).send({ error: 'Failed to partition the document', details: e.message });
+ res.status(500).send({
+ error: 'Failed to partition the document',
+ details: e.message,
+ });
}
} catch (error: any) {
console.error('Error reading file:', error);
- res.status(500).send({ error: 'Failed to read the file', details: error.message });
+ res.status(500).send({
+ error: 'Failed to read the file',
+ details: error.message,
+ });
}
},
});