aboutsummaryrefslogtreecommitdiff
path: root/src/server/ApiManagers/AssistantManager.ts
diff options
context:
space:
mode:
Diffstat (limited to 'src/server/ApiManagers/AssistantManager.ts')
-rw-r--r--src/server/ApiManagers/AssistantManager.ts64
1 files changed, 30 insertions, 34 deletions
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index cd26ca79b..9b85dbbe8 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -14,6 +14,8 @@ import { PartitionResponse } from 'unstructured-client/sdk/models/operations';
import { ChunkingStrategy, Strategy } from 'unstructured-client/sdk/models/shared';
import * as cheerio from 'cheerio';
import { ScrapflyClient, ScrapeConfig } from 'scrapfly-sdk';
+import { google } from 'googleapis';
+import puppeteer from 'puppeteer';
export enum Directory {
parsed_files = 'parsed_files',
@@ -55,6 +57,7 @@ export default class AssistantManager extends ApiManager {
},
});
const scrapflyClient = new ScrapflyClient({ key: process.env._CLIENT_SCRAPFLY_API_KEY! });
+ const customsearch = google.customsearch('v1');
register({
method: Method.POST,
@@ -89,20 +92,18 @@ export default class AssistantManager extends ApiManager {
secureHandler: async ({ req, res }) => {
const { query } = req.body;
try {
- const response = await axios.get('http://api.serpstack.com/search', {
- params: {
- access_key: process.env._CLIENT_SERPSTACK_API_KEY,
- query: query,
- },
+ const response = await customsearch.cse.list({
+ q: query,
+ cx: process.env._CLIENT_GOOGLE_SEARCH_ENGINE_ID,
+ key: process.env._CLIENT_GOOGLE_API_KEY,
+ safe: 'active',
});
- console.log(response.data);
- const results = response.data.organic_results.map((result: any) => ({
- url: result.url,
- snippet: result.snippet,
- }));
-
- console.log(results);
+ const results =
+ response.data.items?.map((item: any) => ({
+ url: item.link,
+ snippet: item.snippet,
+ })) || [];
res.send({ results });
} catch (error: any) {
@@ -144,6 +145,7 @@ export default class AssistantManager extends ApiManager {
const scrapedImagesDirectory = pathToDirectory(Directory.scrape_images);
const filePath = serverPathToFile(Directory.scrape_images, url_filename);
+ // Check if the image already exists
if (fs.existsSync(filePath)) {
const imageBuffer = await readFileAsync(filePath);
const base64Image = imageBuffer.toString('base64');
@@ -151,33 +153,27 @@ export default class AssistantManager extends ApiManager {
return res.send({ website_image_base64: base64Image });
}
+ // Create the directory if it doesn't exist
if (!fs.existsSync(scrapedImagesDirectory)) {
fs.mkdirSync(scrapedImagesDirectory);
}
- const result = await scrapflyClient.scrape(
- new ScrapeConfig({
- url: url,
- render_js: true,
- screenshots: { everything: 'fullpage' },
- })
- );
-
- const screenshotPromises = Object.entries(result.result.screenshots).map(async ([name, screenshot]) => {
- const response = await axios.get(screenshot.url, {
- params: {
- key: process.env._CLIENT_SCRAPFLY_API_KEY!,
- options: 'print_media_format',
- proxy_pool: 'public_residential_pool',
- },
- responseType: 'arraybuffer',
- });
- await fs.promises.writeFile(filePath, response.data);
- return response.data.toString('base64');
+ // Launch Puppeteer to take a screenshot of the webpage
+ const browser = await puppeteer.launch({
+ args: ['--no-sandbox', '--disable-setuid-sandbox'],
});
-
- const base64Screenshots = await Promise.all(screenshotPromises);
- res.send({ website_image_base64: base64Screenshots[0] });
+ const page = await browser.newPage();
+ await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36');
+ await page.goto(url, { waitUntil: 'networkidle2' });
+ const screenshotBuffer = await page.screenshot({ fullPage: true });
+ await browser.close();
+
+ // Save the screenshot to the file system
+ await writeFileAsync(filePath, screenshotBuffer);
+
+ // Return the base64-encoded image
+ const base64Image = Buffer.from(screenshotBuffer).toString('base64');
+ res.send({ website_image_base64: base64Image });
} catch (error: any) {
console.error('Error scraping website:', error);
res.status(500).send({ error: 'Failed to scrape website', details: error.message });