diff options
Diffstat (limited to 'src/server/ApiManagers/AssistantManager.ts')
-rw-r--r-- | src/server/ApiManagers/AssistantManager.ts | 97 |
1 files changed, 97 insertions, 0 deletions
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 91185e042..6460edb9a 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -12,6 +12,7 @@ import { RAGChunk } from '../../client/views/nodes/ChatBox/types'; import { UnstructuredClient } from 'unstructured-client'; import { PartitionResponse } from 'unstructured-client/sdk/models/operations'; import { ChunkingStrategy, Strategy } from 'unstructured-client/sdk/models/shared'; +import * as cheerio from 'cheerio'; export enum Directory { parsed_files = 'parsed_files', @@ -80,6 +81,102 @@ export default class AssistantManager extends ApiManager { register({ method: Method.POST, + subscription: '/getWebSearchResults', + secureHandler: async ({ req, res }) => { + const { query } = req.body; + try { + const response = await axios.get('http://api.serpstack.com/search', { + params: { + access_key: process.env._CLIENT_SERPSTACK_API_KEY, + query: query, + }, + }); + console.log(response.data); + + const results = response.data.organic_results.map((result: any) => ({ + url: result.url, + snippet: result.snippet, + })); + + console.log(results); + + res.send({ results }); + } catch (error: any) { + console.error('Error performing web search:', error); + res.status(500).send({ error: 'Failed to perform web search', details: error.message }); + } + }, + }); + + const axiosInstance = axios.create({ + headers: { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36', + }, + }); + + const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)); + + const fetchWithRetry = async (url: string, retries = 3, backoff = 300) => { + try { + const response = await axiosInstance.get(url); + return response.data; + } catch (error: any) { + if (retries > 0 && error.response && error.response.status === 429) { + console.log(`Rate limited. Retrying in ${backoff}ms...`); + await delay(backoff); + return fetchWithRetry(url, retries - 1, backoff * 2); + } + throw error; + } + }; + + register({ + method: Method.POST, + subscription: '/scrapeWebsite', + secureHandler: async ({ req, res }) => { + const { url } = req.body; + try { + const html = await fetchWithRetry(url); + const $ = cheerio.load(html); + + // Remove script, style tags, and other non-content elements + $('script, style, noscript, iframe, svg, img, video, audio').remove(); + + // Function to extract text from an element and its children + function extractText(element: any): string { + let text = ''; + element.contents().each((_: any, el: any) => { + if (el.type === 'text') { + text += $(el).text().trim() + ' '; + } else if (el.type === 'tag' && !['script', 'style'].includes(el.name)) { + text += extractText($(el)) + ' '; + } + }); + return text.trim(); + } + + // Extract all visible text from the body + const bodyText = extractText($('body')); + + // Split the text into lines and remove empty lines + const lines = bodyText + .split('\n') + .map(line => line.trim()) + .filter(line => line.length > 0); + + // Join the lines back together + const extractedContent = lines.join('\n'); + + res.send({ content: extractedContent }); + } catch (error: any) { + console.error('Error scraping website:', error); + res.status(500).send({ error: 'Failed to scrape website', details: error.message }); + } + }, + }); + + register({ + method: Method.POST, subscription: '/createDocument', secureHandler: async ({ req, res }) => { const { file_path } = req.body; |