diff options
author | A.J. Shulman <Shulman.aj@gmail.com> | 2024-08-16 13:25:36 -0400 |
---|---|---|
committer | A.J. Shulman <Shulman.aj@gmail.com> | 2024-08-16 13:25:36 -0400 |
commit | a88190885c5d22ba6a31e3a7379aaaa2440ae0d4 (patch) | |
tree | 0b6de1e78dc093612edb032b758172aba5242749 /src/server/ApiManagers/AssistantManager.ts | |
parent | afb5bb72c0f85eda1f41eab0e8b76398fff7ac7f (diff) |
added check for if an image for that URL already exists
Diffstat (limited to 'src/server/ApiManagers/AssistantManager.ts')
-rw-r--r-- | src/server/ApiManagers/AssistantManager.ts | 76 |
1 files changed, 43 insertions, 33 deletions
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index c034960c9..b8f5f4241 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -24,6 +24,8 @@ export enum Directory { pdf_thumbnails = 'pdf_thumbnails', audio = 'audio', csv = 'csv', + chunk_images = 'chunk_images', + scrape_images = 'scrape_images', } export function serverPathToFile(directory: Directory, filename: string) { @@ -138,40 +140,48 @@ export default class AssistantManager extends ApiManager { secureHandler: async ({ req, res }) => { const { url } = req.body; try { - const result = await scrapflyClient.scrape( - new ScrapeConfig({ - url: url, - // enable headless browsers for screenshots - render_js: true, - // optional: you can wait for page to load before capturing - screenshots: { - // name: what-to-capture - // fullpage - will capture everything - // css selector (e.g. #reviews) - will capture just that element - everything: 'fullpage', - }, - }) - ); - console.log(result.result.screenshots); - - for (let [name, screenshot] of Object.entries(result.result.screenshots)) { - let response = await axios.get(screenshot.url, { - // note: don't forget to add your API key parameter: - params: { key: process.env._CLIENT_SCRAPFLY_API_KEY!, options: 'print_media_format' }, - // this indicates that response is binary data: - responseType: 'arraybuffer', - }); - // write to screenshot data to a file in current directory: - fs.writeFileSync(`example-screenshot-${name}.${screenshot.extension}`, response.data); - const base64String = response.data.toString('base64'); - const directory = path.join(publicDirectory, '/files/scrape_images/'); - if (!fs.existsSync(directory)) { - fs.mkdirSync(directory); + const url_filename = url.replace(/\./g, '-'); + const scrapedImagesDirectory = pathToDirectory(Directory.scrape_images); + const filePath = serverPathToFile(Directory.scrape_images, url_filename); + if (fs.existsSync(filePath)) { + const imageBuffer = await readFileAsync(filePath); + const base64Image = imageBuffer.toString('base64'); + res.send({ website_image_base64: base64Image }); + return; + } else { + if (!fs.existsSync(scrapedImagesDirectory)) { + fs.mkdirSync(scrapedImagesDirectory); + } + const result = await scrapflyClient.scrape( + new ScrapeConfig({ + url: url, + // enable headless browsers for screenshots + render_js: true, + // optional: you can wait for page to load before capturing + screenshots: { + // name: what-to-capture + // fullpage - will capture everything + // css selector (e.g. #reviews) - will capture just that element + everything: 'fullpage', + }, + }) + ); + console.log(result.result.screenshots); + + for (let [name, screenshot] of Object.entries(result.result.screenshots)) { + let response = await axios.get(screenshot.url, { + // note: don't forget to add your API key parameter: + params: { key: process.env._CLIENT_SCRAPFLY_API_KEY!, options: 'print_media_format' }, + // this indicates that response is binary data: + responseType: 'arraybuffer', + }); + // write to screenshot data to a file in current directory: + fs.writeFileSync(filePath, response.data); + const base64String = response.data.toString('base64'); + await fs.promises.writeFile(filePath, response.data); + + res.send({ website_image_base64: base64String }); } - const filePath = path.join(directory, 'example-screenshot-' + name + '.' + screenshot.extension); - await fs.promises.writeFile(filePath, response.data); - - res.send({ website_image_base64: base64String }); } } catch (error: any) { console.error('Error scraping website:', error); |