aboutsummaryrefslogtreecommitdiff
path: root/src/server/ApiManagers/AssistantManager.ts
diff options
context:
space:
mode:
Diffstat (limited to 'src/server/ApiManagers/AssistantManager.ts')
-rw-r--r--src/server/ApiManagers/AssistantManager.ts411
1 files changed, 344 insertions, 67 deletions
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index b42314e41..2ffc99e58 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -7,6 +7,17 @@ import * as uuid from 'uuid';
import { filesDirectory, publicDirectory } from '../SocketData';
import { Method } from '../RouteManager';
import ApiManager, { Registration } from './ApiManager';
+import axios from 'axios';
+import { RAGChunk } from '../../client/views/nodes/ChatBox/types';
+import { UnstructuredClient } from 'unstructured-client';
+import { PartitionResponse } from 'unstructured-client/sdk/models/operations';
+import { ChunkingStrategy, Strategy } from 'unstructured-client/sdk/models/shared';
+import * as cheerio from 'cheerio';
+import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk';
+import { google } from 'googleapis';
+import * as puppeteer from 'puppeteer';
+import { JSDOM } from 'jsdom';
+import { Readability } from '@mozilla/readability';
export enum Directory {
parsed_files = 'parsed_files',
@@ -17,6 +28,8 @@ export enum Directory {
pdf_thumbnails = 'pdf_thumbnails',
audio = 'audio',
csv = 'csv',
+ chunk_images = 'chunk_images',
+ scrape_images = 'scrape_images',
}
export function serverPathToFile(directory: Directory, filename: string) {
@@ -36,94 +49,358 @@ const readFileAsync = promisify(fs.readFile);
export default class AssistantManager extends ApiManager {
protected initialize(register: Registration): void {
- const openai = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true });
+ const openai = new OpenAI({
+ apiKey: process.env._CLIENT_OPENAI_KEY, // Use client key so don't have to set key seperately for client and server.
+ dangerouslyAllowBrowser: true,
+ });
+ const unstructuredClient = new UnstructuredClient({
+ security: {
+ apiKeyAuth: process.env._CLIENT_UNSTRUCTURED_API_KEY!,
+ },
+ });
+ const scrapflyClient = new ScrapflyClient({ key: process.env._CLIENT_SCRAPFLY_API_KEY! });
+ const customsearch = google.customsearch('v1');
register({
method: Method.POST,
- subscription: '/uploadPDFToVectorStore',
+ subscription: '/getWikipediaSummary',
secureHandler: async ({ req, res }) => {
- const { urls, threadID, assistantID, vector_store_id } = req.body;
-
- const csvFilesIds: string[] = [];
- const otherFileIds: string[] = [];
- const allFileIds: string[] = [];
-
- const fileProcesses = urls.map(async (source: string) => {
- const fullPath = path.join(publicDirectory, source);
- const fileData = await openai.files.create({ file: createReadStream(fullPath), purpose: 'assistants' });
- allFileIds.push(fileData.id);
- if (source.endsWith('.csv')) {
- console.log(source);
- csvFilesIds.push(fileData.id);
+ const { title } = req.body;
+ try {
+ const response = await axios.get('https://en.wikipedia.org/w/api.php', {
+ params: {
+ action: 'query',
+ list: 'search',
+ srsearch: title,
+ format: 'json',
+ },
+ });
+ const summary = response.data.query.search[0].snippet;
+ if (!summary || summary.length === 0 || summary === '' || summary === ' ') {
+ res.send({ text: 'No article found with that title.' });
} else {
- openai.beta.vectorStores.files.create(vector_store_id, { file_id: fileData.id });
- otherFileIds.push(fileData.id);
+ res.send({ text: summary });
}
- });
+ } catch (error: any) {
+ console.error('Error retrieving article summary from Wikipedia:', error);
+ res.status(500).send({ error: 'Error retrieving article summary from Wikipedia.', details: error.message });
+ }
+ },
+ });
+
+ register({
+ method: Method.POST,
+ subscription: '/getWebSearchResults',
+ secureHandler: async ({ req, res }) => {
+ const { query, max_results } = req.body;
try {
- await Promise.all(fileProcesses).then(() => {
- res.send({ vector_store_id: vector_store_id, openai_file_ids: allFileIds });
+ const response = await customsearch.cse.list({
+ q: query,
+ cx: process.env._CLIENT_GOOGLE_SEARCH_ENGINE_ID,
+ key: process.env._CLIENT_GOOGLE_API_KEY,
+ safe: 'active',
+ num: max_results,
});
- } catch (error) {
- res.status(500).send({ error: 'Failed to process files' + error });
+
+ const results =
+ response.data.items?.map((item: any) => ({
+ url: item.link,
+ snippet: item.snippet,
+ })) || [];
+
+ res.send({ results });
+ } catch (error: any) {
+ console.error('Error performing web search:', error);
+ res.status(500).send({ error: 'Failed to perform web search', details: error.message });
}
},
});
+ const axiosInstance = axios.create({
+ headers: {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+ },
+ });
+
+ const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
+
+ const fetchWithRetry = async (url: string, retries = 3, backoff = 300) => {
+ try {
+ const response = await axiosInstance.get(url);
+ return response.data;
+ } catch (error: any) {
+ if (retries > 0 && error.response && error.response.status === 429) {
+ console.log(`Rate limited. Retrying in ${backoff}ms...`);
+ await delay(backoff);
+ return fetchWithRetry(url, retries - 1, backoff * 2);
+ }
+ throw error;
+ }
+ };
+
register({
method: Method.POST,
- subscription: '/downloadFileFromOpenAI',
+ subscription: '/scrapeWebsite',
secureHandler: async ({ req, res }) => {
- const { file_id, file_name } = req.body;
- //let files_directory: string;
- let files_directory = '/files/openAIFiles/';
- switch (file_name.split('.').pop()) {
- case 'pdf':
- files_directory = '/files/pdfs/';
- break;
- case 'csv':
- files_directory = '/files/csv/';
- break;
- case 'png':
- case 'jpg':
- case 'jpeg':
- files_directory = '/files/images/';
- break;
- default:
- break;
+ const { url } = req.body;
+ try {
+ // Launch Puppeteer to navigate to the webpage
+ const browser = await puppeteer.launch({
+ args: ['--no-sandbox', '--disable-setuid-sandbox'],
+ });
+ const page = await browser.newPage();
+ await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36');
+ await page.goto(url, { waitUntil: 'networkidle2' });
+
+ // Get the HTML content of the page
+ const htmlContent = await page.content();
+ await browser.close();
+
+ // Use JSDOM to parse the HTML content
+ const dom = new JSDOM(htmlContent, { url });
+
+ // Use Readability to extract the readable content
+ const reader = new Readability(dom.window.document);
+ const article = reader.parse();
+
+ if (article) {
+ // Extract the plain text from the article content
+ const plainText = article.textContent;
+
+ // Return the plain text content
+ res.send({ website_plain_text: plainText });
+ } else {
+ res.status(500).send({ error: 'Failed to extract readable content' });
+ }
+ } catch (error: any) {
+ console.error('Error scraping website:', error);
+ res.status(500).send({ error: 'Failed to scrape website', details: error.message });
}
+ },
+ });
+
+ register({
+ method: Method.POST,
+ subscription: '/createDocument',
+ secureHandler: async ({ req, res }) => {
+ const { file_path } = req.body;
+ const public_path = path.join(publicDirectory, file_path);
+ const file_name = path.basename(file_path);
+
+ try {
+ // Read file data and convert to base64
+ const file_data: string = fs.readFileSync(public_path, { encoding: 'base64' });
- const directory = path.join(publicDirectory, files_directory);
+ const response = await axios.post(
+ 'http://localhost:8080/createDocument',
+ {
+ file_data,
+ file_name,
+ },
+ {
+ headers: {
+ 'Content-Type': 'application/json',
+ },
+ }
+ );
- if (!fs.existsSync(directory)) {
- fs.mkdirSync(directory);
+ const jobId = response.data['job_id'];
+ console.log('Job ID:', jobId);
+
+ res.send({ jobId });
+ } catch (error: any) {
+ console.error('Error communicating with chatbot:', error);
+ res.status(500).send({ error: 'Failed to communicate with the chatbot', details: error.message });
+ }
+ },
+ });
+
+ register({
+ method: Method.GET,
+ subscription: '/getProgress/:jobId',
+ secureHandler: async ({ req, res }) => {
+ const { jobId } = req.params;
+ try {
+ const progressResponse = await axios.get(`http://localhost:8080/getProgress/${jobId}`);
+ console.log(`Current step: ${progressResponse.data.step}, Progress within step: ${progressResponse.data.progress}%`);
+ res.json(progressResponse.data);
+ } catch (error) {
+ console.error('Error getting progress:', error);
+ res.status(500).send({ error: 'Failed to get progress', details: JSON.parse(error as string).message });
}
- const file = await openai.files.content(file_id);
- const new_file_name = `${uuid.v4()}-${file_name}`;
- const file_path = path.join(directory, new_file_name);
- const file_array_buffer = await file.arrayBuffer();
- const bufferView = new Uint8Array(file_array_buffer);
+ },
+ });
+
+ register({
+ method: Method.GET,
+ subscription: '/getResult/:jobId',
+ secureHandler: async ({ req, res }) => {
+ const { jobId } = req.params;
try {
- const written_file = await writeFileAsync(file_path, bufferView);
- console.log(written_file);
- console.log(file_path);
- console.log(file_array_buffer);
- console.log(bufferView);
- const file_object = new File([bufferView], file_name);
- //DashUploadUtils.upload(file_object, 'openAIFiles');
- res.send({ file_path: path.join(files_directory, new_file_name) });
- /* res.send( {
- source: "file",
- result: {
- accessPaths: {
- agnostic: {client: path.join('/files/openAIFiles/', `${uuid.v4()}-${file_name}`)}
- },
- rawText: "",
- duration: 0,
- },
- } ); */
+ const finalResponse = await axios.get(`http://localhost:8080/getResult/${jobId}`);
+ console.log('Result:', finalResponse.data);
+ const result = finalResponse.data;
+
+ if (result.chunks && Array.isArray(result.chunks)) {
+ for (const chunk of result.chunks) {
+ if (chunk.metadata && (chunk.metadata.type === 'image' || chunk.metadata.type === 'table')) {
+ let files_directory = '/files/chunk_images/';
+ const directory = path.join(publicDirectory, files_directory);
+
+ if (!fs.existsSync(directory)) {
+ fs.mkdirSync(directory);
+ }
+
+ const fileName = path.basename(chunk.metadata.file_path);
+ const filePath = path.join(directory, fileName);
+
+ // Check if base64_data exists
+ if (chunk.metadata.base64_data) {
+ // Decode Base64 and save as file
+ const buffer = Buffer.from(chunk.metadata.base64_data, 'base64');
+ await fs.promises.writeFile(filePath, buffer);
+
+ // Update the file path in the chunk
+ chunk.metadata.file_path = path.join(files_directory, fileName);
+ chunk.metadata.base64_data = undefined;
+ } else {
+ console.warn(`No base64_data found for chunk: ${fileName}`);
+ }
+ }
+ }
+ result['status'] = 'completed';
+ } else {
+ console.warn('Not ready');
+ result.status = 'pending';
+ }
+ res.json(result);
} catch (error) {
- res.status(500).send({ error: 'Failed to write file' + error });
+ console.error('Error getting progress:', error);
+ res.status(500).send({ error: 'Failed to get progress', details: error });
+ }
+ },
+ });
+
+ register({
+ method: Method.POST,
+ subscription: '/formatChunks',
+ secureHandler: async ({ req, res }) => {
+ const { relevantChunks } = req.body;
+ const content: { type: string; text?: string; image_url?: { url: string } }[] = [{ type: 'text', text: '<chunks>' }];
+
+ for (const chunk of relevantChunks) {
+ content.push({
+ type: 'text',
+ text: `<chunk chunk_id=${chunk.id} chunk_type=${chunk.metadata.type}>`,
+ });
+
+ if (chunk.metadata.type === 'image' || chunk.metadata.type === 'table') {
+ try {
+ const filePath = serverPathToFile(Directory.chunk_images, chunk.metadata.file_path);
+ const imageBuffer = await readFileAsync(filePath);
+ const base64Image = imageBuffer.toString('base64');
+ if (base64Image) {
+ content.push({
+ type: 'image_url',
+ image_url: {
+ url: `data:image/jpeg;base64,${base64Image}`,
+ },
+ });
+ } else {
+ console.log(`Failed to encode image for chunk ${chunk.id}`);
+ }
+ } catch (error) {
+ console.error(`Error reading image file for chunk ${chunk.id}:`, error);
+ }
+ }
+ content.push({ type: 'text', text: `${chunk.metadata.text}\n</chunk>\n` });
+ }
+
+ content.push({ type: 'text', text: '</chunks>' });
+
+ res.send({ formattedChunks: content });
+ },
+ });
+
+ register({
+ method: Method.POST,
+ subscription: '/createCSV',
+ secureHandler: async ({ req, res }) => {
+ const { filename, data } = req.body;
+
+ // Validate input
+ if (!filename || !data) {
+ res.status(400).send({ error: 'Filename and data fields are required.' });
+ return;
+ }
+
+ try {
+ // Generate a UUID for the file
+ const uuidv4 = uuid.v4();
+
+ // Construct the full filename with the UUID prefix
+ const fullFilename = `${uuidv4}-${filename}`;
+
+ // Get the full server path where the file will be saved
+ const serverFilePath = serverPathToFile(Directory.csv, fullFilename);
+
+ // Write the CSV data (which is a raw string) to the file
+ await writeFileAsync(serverFilePath, data, 'utf8');
+
+ // Construct the full client URL for accessing the file
+ const fileUrl = clientPathToFile(Directory.csv, fullFilename);
+
+ // Return the file URL and UUID to the client
+ res.send({ fileUrl, id: uuidv4 });
+ } catch (error: any) {
+ console.error('Error creating CSV file:', error);
+ res.status(500).send({ error: 'Failed to create CSV file.', details: error.message });
+ }
+ },
+ });
+
+ register({
+ method: Method.POST,
+ subscription: '/chunkDocument',
+ secureHandler: async ({ req, res }) => {
+ const { file_path } = req.body;
+ const public_path = path.join(publicDirectory, file_path);
+ const file_name = path.basename(file_path);
+
+ try {
+ // Read file data and convert to base64
+ const file_data = await fs.promises.readFile(public_path);
+
+ try {
+ const result = await unstructuredClient.general.partition({
+ partitionParameters: {
+ files: {
+ content: file_data,
+ fileName: file_name,
+ },
+ strategy: Strategy.Auto,
+ chunkingStrategy: ChunkingStrategy.ByTitle,
+ extractImageBlockTypes: ['Image', 'Table'],
+ },
+ });
+
+ if (result.statusCode === 200) {
+ console.log(result.elements);
+ const jsonElements = JSON.stringify(result.elements, null, 2);
+ // Print the processed data.
+ console.log(jsonElements);
+ res.send({ document_json: jsonElements });
+ } else {
+ console.error(`Unexpected status code: ${result.statusCode}`);
+ res.status(result.statusCode).send({ error: 'Failed to process the document', details: result });
+ }
+ } catch (e: any) {
+ console.error('Error during partitioning:', e);
+ res.status(500).send({ error: 'Failed to partition the document', details: e.message });
+ }
+ } catch (error: any) {
+ console.error('Error reading file:', error);
+ res.status(500).send({ error: 'Failed to read the file', details: error.message });
}
},
});