aboutsummaryrefslogtreecommitdiff
path: root/src/server/ApiManagers/AssistantManager.ts
diff options
context:
space:
mode:
authorA.J. Shulman <Shulman.aj@gmail.com>2024-08-15 13:16:32 -0400
committerA.J. Shulman <Shulman.aj@gmail.com>2024-08-15 13:16:32 -0400
commit6f9b8f9b393d411a17f7954b6cc36618efe698e2 (patch)
tree8090d9d0bafdfe3e97b8fd8914da9d1264e4172c /src/server/ApiManagers/AssistantManager.ts
parent0c8001c61a55540cdeeb6ae249fdd2835580121c (diff)
implemented search tool and other tools but scraping doesn't work
Diffstat (limited to 'src/server/ApiManagers/AssistantManager.ts')
-rw-r--r--src/server/ApiManagers/AssistantManager.ts97
1 files changed, 97 insertions, 0 deletions
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index 91185e042..6460edb9a 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -12,6 +12,7 @@ import { RAGChunk } from '../../client/views/nodes/ChatBox/types';
import { UnstructuredClient } from 'unstructured-client';
import { PartitionResponse } from 'unstructured-client/sdk/models/operations';
import { ChunkingStrategy, Strategy } from 'unstructured-client/sdk/models/shared';
+import * as cheerio from 'cheerio';
export enum Directory {
parsed_files = 'parsed_files',
@@ -80,6 +81,102 @@ export default class AssistantManager extends ApiManager {
register({
method: Method.POST,
+ subscription: '/getWebSearchResults',
+ secureHandler: async ({ req, res }) => {
+ const { query } = req.body;
+ try {
+ const response = await axios.get('http://api.serpstack.com/search', {
+ params: {
+ access_key: process.env._CLIENT_SERPSTACK_API_KEY,
+ query: query,
+ },
+ });
+ console.log(response.data);
+
+ const results = response.data.organic_results.map((result: any) => ({
+ url: result.url,
+ snippet: result.snippet,
+ }));
+
+ console.log(results);
+
+ res.send({ results });
+ } catch (error: any) {
+ console.error('Error performing web search:', error);
+ res.status(500).send({ error: 'Failed to perform web search', details: error.message });
+ }
+ },
+ });
+
+ const axiosInstance = axios.create({
+ headers: {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
+ },
+ });
+
+ const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
+
+ const fetchWithRetry = async (url: string, retries = 3, backoff = 300) => {
+ try {
+ const response = await axiosInstance.get(url);
+ return response.data;
+ } catch (error: any) {
+ if (retries > 0 && error.response && error.response.status === 429) {
+ console.log(`Rate limited. Retrying in ${backoff}ms...`);
+ await delay(backoff);
+ return fetchWithRetry(url, retries - 1, backoff * 2);
+ }
+ throw error;
+ }
+ };
+
+ register({
+ method: Method.POST,
+ subscription: '/scrapeWebsite',
+ secureHandler: async ({ req, res }) => {
+ const { url } = req.body;
+ try {
+ const html = await fetchWithRetry(url);
+ const $ = cheerio.load(html);
+
+ // Remove script, style tags, and other non-content elements
+ $('script, style, noscript, iframe, svg, img, video, audio').remove();
+
+ // Function to extract text from an element and its children
+ function extractText(element: any): string {
+ let text = '';
+ element.contents().each((_: any, el: any) => {
+ if (el.type === 'text') {
+ text += $(el).text().trim() + ' ';
+ } else if (el.type === 'tag' && !['script', 'style'].includes(el.name)) {
+ text += extractText($(el)) + ' ';
+ }
+ });
+ return text.trim();
+ }
+
+ // Extract all visible text from the body
+ const bodyText = extractText($('body'));
+
+ // Split the text into lines and remove empty lines
+ const lines = bodyText
+ .split('\n')
+ .map(line => line.trim())
+ .filter(line => line.length > 0);
+
+ // Join the lines back together
+ const extractedContent = lines.join('\n');
+
+ res.send({ content: extractedContent });
+ } catch (error: any) {
+ console.error('Error scraping website:', error);
+ res.status(500).send({ error: 'Failed to scrape website', details: error.message });
+ }
+ },
+ });
+
+ register({
+ method: Method.POST,
subscription: '/createDocument',
secureHandler: async ({ req, res }) => {
const { file_path } = req.body;