better styling, now thoughts and actions are hidden, scroll works better

next steps: - [ ] Ensure it doesn’t create more web documents when one already exists - [ ] Citations should not be rendered on the next line but on the same line as the text - [ ] If invalid XML, run get 3.5 to verify and fix XML based one examples - [ ] Making sure if you ask for other information, it doesn’t go to the same website. Providing website history in use rules for the search tool and website scraper tool or in the prompt directly
author: A.J. Shulman <Shulman.aj@gmail.com> 2024-08-20 15:17:25 -0400
committer: A.J. Shulman <Shulman.aj@gmail.com> 2024-08-20 15:17:25 -0400
commit: 4c0c7794c85cfdbcd61a7ee5cb9a29494fd0444b (patch)
tree: abf99fc24966e65a0e0db3f8e17ccb6edcff2d4c /src/server/ApiManagers/AssistantManager.ts
parent: 4b6ce2ffcb82c1a7467ef7ed8b67b97094a8f6b6 (diff)
1 files changed, 30 insertions, 34 deletions
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index cd26ca79b..9b85dbbe8 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -14,6 +14,8 @@ import { PartitionResponse } from 'unstructured-client/sdk/models/operations';
 import { ChunkingStrategy, Strategy } from 'unstructured-client/sdk/models/shared';
 import * as cheerio from 'cheerio';
 import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk';
+import { google } from 'googleapis';
+import puppeteer from 'puppeteer';
 
 export enum Directory {
     parsed_files = 'parsed_files',
@@ -55,6 +57,7 @@ export default class AssistantManager extends ApiManager {
             },
         });
         const scrapflyClient = new ScrapflyClient({ key: process.env._CLIENT_SCRAPFLY_API_KEY! });
+        const customsearch = google.customsearch('v1');
 
         register({
             method: Method.POST,
@@ -89,20 +92,18 @@ export default class AssistantManager extends ApiManager {
             secureHandler: async ({ req, res }) => {
                 const { query } = req.body;
                 try {
-                    const response = await axios.get('http://api.serpstack.com/search', {
-                        params: {
-                            access_key: process.env._CLIENT_SERPSTACK_API_KEY,
-                            query: query,
-                        },
+                    const response = await customsearch.cse.list({
+                        q: query,
+                        cx: process.env._CLIENT_GOOGLE_SEARCH_ENGINE_ID,
+                        key: process.env._CLIENT_GOOGLE_API_KEY,
+                        safe: 'active',
                     });
-                    console.log(response.data);
 
-                    const results = response.data.organic_results.map((result: any) => ({
-                        url: result.url,
-                        snippet: result.snippet,
-                    }));
-
-                    console.log(results);
+                    const results =
+                        response.data.items?.map((item: any) => ({
+                            url: item.link,
+                            snippet: item.snippet,
+                        })) || [];
 
                     res.send({ results });
                 } catch (error: any) {
@@ -144,6 +145,7 @@ export default class AssistantManager extends ApiManager {
                     const scrapedImagesDirectory = pathToDirectory(Directory.scrape_images);
                     const filePath = serverPathToFile(Directory.scrape_images, url_filename);
 
+                    // Check if the image already exists
                     if (fs.existsSync(filePath)) {
                         const imageBuffer = await readFileAsync(filePath);
                         const base64Image = imageBuffer.toString('base64');
@@ -151,33 +153,27 @@ export default class AssistantManager extends ApiManager {
                         return res.send({ website_image_base64: base64Image });
                     }
 
+                    // Create the directory if it doesn't exist
                     if (!fs.existsSync(scrapedImagesDirectory)) {
                         fs.mkdirSync(scrapedImagesDirectory);
                     }
 
-                    const result = await scrapflyClient.scrape(
-                        new ScrapeConfig({
-                            url: url,
-                            render_js: true,
-                            screenshots: { everything: 'fullpage' },
-                        })
-                    );
-
-                    const screenshotPromises = Object.entries(result.result.screenshots).map(async ([name, screenshot]) => {
-                        const response = await axios.get(screenshot.url, {
-                            params: {
-                                key: process.env._CLIENT_SCRAPFLY_API_KEY!,
-                                options: 'print_media_format',
-                                proxy_pool: 'public_residential_pool',
-                            },
-                            responseType: 'arraybuffer',
-                        });
-                        await fs.promises.writeFile(filePath, response.data);
-                        return response.data.toString('base64');
+                    // Launch Puppeteer to take a screenshot of the webpage
+                    const browser = await puppeteer.launch({
+                        args: ['--no-sandbox', '--disable-setuid-sandbox'],
                     });
-
-                    const base64Screenshots = await Promise.all(screenshotPromises);
-                    res.send({ website_image_base64: base64Screenshots[0] });
+                    const page = await browser.newPage();
+                    await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36');
+                    await page.goto(url, { waitUntil: 'networkidle2' });
+                    const screenshotBuffer = await page.screenshot({ fullPage: true });
+                    await browser.close();
+
+                    // Save the screenshot to the file system
+                    await writeFileAsync(filePath, screenshotBuffer);
+
+                    // Return the base64-encoded image
+                    const base64Image = Buffer.from(screenshotBuffer).toString('base64');
+                    res.send({ website_image_base64: base64Image });
                 } catch (error: any) {
                     console.error('Error scraping website:', error);
                     res.status(500).send({ error: 'Failed to scrape website', details: error.message });
author	A.J. Shulman <Shulman.aj@gmail.com>	2024-08-20 15:17:25 -0400
committer	A.J. Shulman <Shulman.aj@gmail.com>	2024-08-20 15:17:25 -0400
commit	4c0c7794c85cfdbcd61a7ee5cb9a29494fd0444b (patch)
tree	abf99fc24966e65a0e0db3f8e17ccb6edcff2d4c /src/server/ApiManagers/AssistantManager.ts
parent	4b6ce2ffcb82c1a7467ef7ed8b67b97094a8f6b6 (diff)