diff options
author | A.J. Shulman <Shulman.aj@gmail.com> | 2024-08-20 15:17:25 -0400 |
---|---|---|
committer | A.J. Shulman <Shulman.aj@gmail.com> | 2024-08-20 15:17:25 -0400 |
commit | 4c0c7794c85cfdbcd61a7ee5cb9a29494fd0444b (patch) | |
tree | abf99fc24966e65a0e0db3f8e17ccb6edcff2d4c /src/server/ApiManagers/AssistantManager.ts | |
parent | 4b6ce2ffcb82c1a7467ef7ed8b67b97094a8f6b6 (diff) |
better styling, now thoughts and actions are hidden, scroll works better
next steps:
- [ ] Ensure it doesn’t create more web documents when one already exists
- [ ] Citations should not be rendered on the next line but on the same line as the text
- [ ] If invalid XML, run get 3.5 to verify and fix XML based one examples
- [ ] Making sure if you ask for other information, it doesn’t go to the same website. Providing website history in use rules for the search tool and website scraper tool or in the prompt directly
Diffstat (limited to 'src/server/ApiManagers/AssistantManager.ts')
-rw-r--r-- | src/server/ApiManagers/AssistantManager.ts | 64 |
1 files changed, 30 insertions, 34 deletions
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index cd26ca79b..9b85dbbe8 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -14,6 +14,8 @@ import { PartitionResponse } from 'unstructured-client/sdk/models/operations'; import { ChunkingStrategy, Strategy } from 'unstructured-client/sdk/models/shared'; import * as cheerio from 'cheerio'; import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk'; +import { google } from 'googleapis'; +import puppeteer from 'puppeteer'; export enum Directory { parsed_files = 'parsed_files', @@ -55,6 +57,7 @@ export default class AssistantManager extends ApiManager { }, }); const scrapflyClient = new ScrapflyClient({ key: process.env._CLIENT_SCRAPFLY_API_KEY! }); + const customsearch = google.customsearch('v1'); register({ method: Method.POST, @@ -89,20 +92,18 @@ export default class AssistantManager extends ApiManager { secureHandler: async ({ req, res }) => { const { query } = req.body; try { - const response = await axios.get('http://api.serpstack.com/search', { - params: { - access_key: process.env._CLIENT_SERPSTACK_API_KEY, - query: query, - }, + const response = await customsearch.cse.list({ + q: query, + cx: process.env._CLIENT_GOOGLE_SEARCH_ENGINE_ID, + key: process.env._CLIENT_GOOGLE_API_KEY, + safe: 'active', }); - console.log(response.data); - const results = response.data.organic_results.map((result: any) => ({ - url: result.url, - snippet: result.snippet, - })); - - console.log(results); + const results = + response.data.items?.map((item: any) => ({ + url: item.link, + snippet: item.snippet, + })) || []; res.send({ results }); } catch (error: any) { @@ -144,6 +145,7 @@ export default class AssistantManager extends ApiManager { const scrapedImagesDirectory = pathToDirectory(Directory.scrape_images); const filePath = serverPathToFile(Directory.scrape_images, url_filename); + // Check if the image already exists if (fs.existsSync(filePath)) { const imageBuffer = await readFileAsync(filePath); const base64Image = imageBuffer.toString('base64'); @@ -151,33 +153,27 @@ export default class AssistantManager extends ApiManager { return res.send({ website_image_base64: base64Image }); } + // Create the directory if it doesn't exist if (!fs.existsSync(scrapedImagesDirectory)) { fs.mkdirSync(scrapedImagesDirectory); } - const result = await scrapflyClient.scrape( - new ScrapeConfig({ - url: url, - render_js: true, - screenshots: { everything: 'fullpage' }, - }) - ); - - const screenshotPromises = Object.entries(result.result.screenshots).map(async ([name, screenshot]) => { - const response = await axios.get(screenshot.url, { - params: { - key: process.env._CLIENT_SCRAPFLY_API_KEY!, - options: 'print_media_format', - proxy_pool: 'public_residential_pool', - }, - responseType: 'arraybuffer', - }); - await fs.promises.writeFile(filePath, response.data); - return response.data.toString('base64'); + // Launch Puppeteer to take a screenshot of the webpage + const browser = await puppeteer.launch({ + args: ['--no-sandbox', '--disable-setuid-sandbox'], }); - - const base64Screenshots = await Promise.all(screenshotPromises); - res.send({ website_image_base64: base64Screenshots[0] }); + const page = await browser.newPage(); + await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'); + await page.goto(url, { waitUntil: 'networkidle2' }); + const screenshotBuffer = await page.screenshot({ fullPage: true }); + await browser.close(); + + // Save the screenshot to the file system + await writeFileAsync(filePath, screenshotBuffer); + + // Return the base64-encoded image + const base64Image = Buffer.from(screenshotBuffer).toString('base64'); + res.send({ website_image_base64: base64Image }); } catch (error: any) { console.error('Error scraping website:', error); res.status(500).send({ error: 'Failed to scrape website', details: error.message }); |