aboutsummaryrefslogtreecommitdiff
path: root/src/server/ApiManagers/AssistantManager.ts
diff options
context:
space:
mode:
authorA.J. Shulman <Shulman.aj@gmail.com>2024-08-16 13:25:36 -0400
committerA.J. Shulman <Shulman.aj@gmail.com>2024-08-16 13:25:36 -0400
commita88190885c5d22ba6a31e3a7379aaaa2440ae0d4 (patch)
tree0b6de1e78dc093612edb032b758172aba5242749 /src/server/ApiManagers/AssistantManager.ts
parentafb5bb72c0f85eda1f41eab0e8b76398fff7ac7f (diff)
added check for if an image for that URL already exists
Diffstat (limited to 'src/server/ApiManagers/AssistantManager.ts')
-rw-r--r--src/server/ApiManagers/AssistantManager.ts76
1 files changed, 43 insertions, 33 deletions
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index c034960c9..b8f5f4241 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -24,6 +24,8 @@ export enum Directory {
pdf_thumbnails = 'pdf_thumbnails',
audio = 'audio',
csv = 'csv',
+ chunk_images = 'chunk_images',
+ scrape_images = 'scrape_images',
}
export function serverPathToFile(directory: Directory, filename: string) {
@@ -138,40 +140,48 @@ export default class AssistantManager extends ApiManager {
secureHandler: async ({ req, res }) => {
const { url } = req.body;
try {
- const result = await scrapflyClient.scrape(
- new ScrapeConfig({
- url: url,
- // enable headless browsers for screenshots
- render_js: true,
- // optional: you can wait for page to load before capturing
- screenshots: {
- // name: what-to-capture
- // fullpage - will capture everything
- // css selector (e.g. #reviews) - will capture just that element
- everything: 'fullpage',
- },
- })
- );
- console.log(result.result.screenshots);
-
- for (let [name, screenshot] of Object.entries(result.result.screenshots)) {
- let response = await axios.get(screenshot.url, {
- // note: don't forget to add your API key parameter:
- params: { key: process.env._CLIENT_SCRAPFLY_API_KEY!, options: 'print_media_format' },
- // this indicates that response is binary data:
- responseType: 'arraybuffer',
- });
- // write to screenshot data to a file in current directory:
- fs.writeFileSync(`example-screenshot-${name}.${screenshot.extension}`, response.data);
- const base64String = response.data.toString('base64');
- const directory = path.join(publicDirectory, '/files/scrape_images/');
- if (!fs.existsSync(directory)) {
- fs.mkdirSync(directory);
+ const url_filename = url.replace(/\./g, '-');
+ const scrapedImagesDirectory = pathToDirectory(Directory.scrape_images);
+ const filePath = serverPathToFile(Directory.scrape_images, url_filename);
+ if (fs.existsSync(filePath)) {
+ const imageBuffer = await readFileAsync(filePath);
+ const base64Image = imageBuffer.toString('base64');
+ res.send({ website_image_base64: base64Image });
+ return;
+ } else {
+ if (!fs.existsSync(scrapedImagesDirectory)) {
+ fs.mkdirSync(scrapedImagesDirectory);
+ }
+ const result = await scrapflyClient.scrape(
+ new ScrapeConfig({
+ url: url,
+ // enable headless browsers for screenshots
+ render_js: true,
+ // optional: you can wait for page to load before capturing
+ screenshots: {
+ // name: what-to-capture
+ // fullpage - will capture everything
+ // css selector (e.g. #reviews) - will capture just that element
+ everything: 'fullpage',
+ },
+ })
+ );
+ console.log(result.result.screenshots);
+
+ for (let [name, screenshot] of Object.entries(result.result.screenshots)) {
+ let response = await axios.get(screenshot.url, {
+ // note: don't forget to add your API key parameter:
+ params: { key: process.env._CLIENT_SCRAPFLY_API_KEY!, options: 'print_media_format' },
+ // this indicates that response is binary data:
+ responseType: 'arraybuffer',
+ });
+ // write to screenshot data to a file in current directory:
+ fs.writeFileSync(filePath, response.data);
+ const base64String = response.data.toString('base64');
+ await fs.promises.writeFile(filePath, response.data);
+
+ res.send({ website_image_base64: base64String });
}
- const filePath = path.join(directory, 'example-screenshot-' + name + '.' + screenshot.extension);
- await fs.promises.writeFile(filePath, response.data);
-
- res.send({ website_image_base64: base64String });
}
} catch (error: any) {
console.error('Error scraping website:', error);