diff options
-rw-r--r-- | src/client/views/Main.tsx | 2 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/ProgressBar.tsx | 13 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/prompts.ts | 465 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/tools.ts | 26 | ||||
-rw-r--r-- | src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts | 43 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/agentsystem/Agent.ts (renamed from src/client/views/nodes/ChatBox/Agent.ts) | 148 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/agentsystem/prompts.ts | 181 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss (renamed from src/client/views/nodes/ChatBox/ChatBox.scss) | 0 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx (renamed from src/client/views/nodes/ChatBox/ChatBox.tsx) | 47 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/chatboxcomponents/MessageComponent.tsx (renamed from src/client/views/nodes/ChatBox/MessageComponent.tsx) | 57 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss (renamed from src/client/views/nodes/ChatBox/ProgressBar.scss) | 0 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.tsx | 22 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts (renamed from src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts) | 2 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts (renamed from src/client/views/nodes/ChatBox/response_parsers/StreamedAnswerParser.ts) | 0 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/tools/BaseTool.ts (renamed from src/client/views/nodes/ChatBox/tools/BaseTool.ts) | 2 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/tools/CalculateTool.ts (renamed from src/client/views/nodes/ChatBox/tools/CalculateTool.ts) | 0 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/tools/CreateCSVTool.ts (renamed from src/client/views/nodes/ChatBox/tools/CreateCSVTool.ts) | 0 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/tools/CreateCollectionTool.ts (renamed from src/client/views/nodes/ChatBox/tools/CreateCollectionTool.ts) | 0 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/tools/DataAnalysisTool.ts (renamed from src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts) | 0 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/tools/GetDocsTool.ts (renamed from src/client/views/nodes/ChatBox/tools/GetDocsTool.ts) | 0 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/tools/NoTool.ts (renamed from src/client/views/nodes/ChatBox/tools/NoTool.ts) | 0 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/tools/RAGTool.ts (renamed from src/client/views/nodes/ChatBox/tools/RAGTool.ts) | 2 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/tools/SearchTool.ts (renamed from src/client/views/nodes/ChatBox/tools/SearchTool.ts) | 0 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts | 83 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/tools/WikipediaTool.ts (renamed from src/client/views/nodes/ChatBox/tools/WikipediaTool.ts) | 0 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/types/types.ts (renamed from src/client/views/nodes/ChatBox/types.ts) | 40 | ||||
-rw-r--r-- | src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts (renamed from src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts) | 2 | ||||
-rw-r--r-- | src/server/ApiManagers/AssistantManager.ts | 2 |
28 files changed, 445 insertions, 692 deletions
diff --git a/src/client/views/Main.tsx b/src/client/views/Main.tsx index f7cd0e925..e1f7dd233 100644 --- a/src/client/views/Main.tsx +++ b/src/client/views/Main.tsx @@ -27,7 +27,7 @@ import { CollectionSchemaView } from './collections/collectionSchema/CollectionS import { SchemaRowBox } from './collections/collectionSchema/SchemaRowBox'; import './global/globalScripts'; import { AudioBox } from './nodes/AudioBox'; -import { ChatBox } from './nodes/ChatBox/ChatBox'; +import { ChatBox } from './nodes/chatbot/chatboxcomponents/ChatBox'; import { ComparisonBox } from './nodes/ComparisonBox'; import { DataVizBox } from './nodes/DataVizBox/DataVizBox'; import { DiagramBox } from './nodes/DiagramBox'; diff --git a/src/client/views/nodes/ChatBox/ProgressBar.tsx b/src/client/views/nodes/ChatBox/ProgressBar.tsx deleted file mode 100644 index 0aa07213f..000000000 --- a/src/client/views/nodes/ChatBox/ProgressBar.tsx +++ /dev/null @@ -1,13 +0,0 @@ -import React from 'react'; -import './ProgressBar.scss'; - -export const ProgressBar: React.FC = () => { - return ( - <div className="spinner-container"> - <div className="spinner"> - <div className="double-bounce1"></div> - <div className="double-bounce2"></div> - </div> - </div> - ); -}; diff --git a/src/client/views/nodes/ChatBox/prompts.ts b/src/client/views/nodes/ChatBox/prompts.ts deleted file mode 100644 index 0a356189b..000000000 --- a/src/client/views/nodes/ChatBox/prompts.ts +++ /dev/null @@ -1,465 +0,0 @@ -// prompts.ts - -import { Tool } from './types'; - -export function getReactPrompt(tools: Tool[], summaries: () => string, chatHistory: string): string { - const toolDescriptions: string = tools - .map( - tool => ` - <tool> - <title>${tool.name}</title> - <brief_summary>${tool.briefSummary}</brief_summary> - </tool> - ` - ) - .join('\n'); - - return `<system_message> - <task> - You are an advanced AI assistant equipped with various tools to answer user queries accurately and efficiently. Your task is to provide a comprehensive response based on the user's prompt using available tools, chat history, and provided information. Follow these guidelines meticulously to ensure the accuracy and structure of your response. - </task> - - <critical_points> - <point>**MOST IMPORTANT**: Always output responses within stage number tags, using the stage number and the system role as the root tag (e.g., <stage number="2" role="system">, <stage number="4" role="system">, etc.). This is crucial and should never be overlooked.</point> - <point>**STOP after every stage and wait for the system to provide the next input (e.g., action rules or observations).</point> - <point>Only output **ONE stage at a time** in your responses. Do not skip stages or provide multiple stages at once. Thus, you should only output even stage number root tags.</point> - <point>Always structure your responses using valid, well-formed XML with properly nested tags.</point> - <point>If a tool is needed, ALWAYS select the most appropriate tool based on the user's query.</point> - <point>If the query could relate to user documents or require external information (e.g., RAG, search + website scraping, data analysis), USE the appropriate tool to gather that information.</point> - <point>If there are no user docs or the user docs have not yielded helpful information, use the search tool to find websites followed by the website scraper tool to get useful infromation from one of those websites. You can use the website scraper (or even the search tool), multiple times to find information from multiple websites either from the same search or different searches.</point> - <point>Ensure at the end of every final answer, you provide exactly three follow-up questions from the user's perspective—from the perspective that they are asking the question.</point> - <point>Always follow the response structure provided in the instructions.</point> - <point>If a tool doesn't work—or yield helpful results—after two tries, EITHER use another tool or proceed with the response and ask the user for more information or clarification or let them know you cannot answer their question and why. DO NOT CONTINUE WITH THE SAME TOOL 3 TIMES.</point> - <point>Use multiple tools in conjunction with each other to provide a comprehensive answer to the user's query, if needed (i.e. for the prompt "create a CSV showing historical bird migration trends", you could use the search tool and the webscraper tool to get the info, and then use the create CSV tool to create the CSV)</point> - </critical_points> - - <response_structure> - <instruction> - When providing your final response, use the following structure: - </instruction> - <answer> - <tag><grounded_text> - Wrap text that is derived from tool-based or chunk-based information within these tags, ensuring proper citation.</tag> - <tag><normal_text> - Wrap text that is not derived from tool-based or chunk-based information within these tags.</tag> - <citations> - <tag><citation> - Provide citations for each grounded text, referencing the tool or chunk used.</tag> - </citations> - <follow_up_questions> - <tag><question> - Include exactly three follow-up questions from the user's perspective within these tags.</tag> - </follow_up_questions> - <loop_summary> - <tag><loop_summary> - Provide a summary of the actions and tools used by the assistant throughout the interaction within these tags.</tag> - </loop_summary> - </answer> - </response_structure> - - <grounded_text_guidelines> - <step>Wrap all information derived from tools (e.g., RAG, search + website scraping, data analysis)—which will be provided in chunks—in <grounded_text> tags.</step> - <step>DO NOT PUT ANYTHING THAT IS NOT DIRECTLY DERIVED FROM TOOLS OR CHUNKS IN <grounded_text> TAGS.</step> - <step>Use a single <grounded_text> tag for sequential and closely related information that references the same citation.</step> - <step>If other citations are used sequentially, create new <grounded_text> tags.</step> - <step>Ensure each <grounded_text> tag has corresponding citations (up to three, and one is fine). Separate multiple citation indices with commas.</step> - <step>Grounded text can be as short as a few words or as long as several sentences.</step> - <step>Avoid overlapping or nesting <grounded_text> tags; use sequential tags instead.</step> - <step>Grounded text tags should always have a citation_index attribute that references a citation index number that the text is grounded in.</step> - <step>Content within the <grounded_text> tags should be in Markdown format.</step> - </grounded_text_guidelines> - - <normal_text_guidelines> - <step>Wrap all text that is not derived from tools or chunks in <normal_text> tags (any text outputted in the answer that is not in a <grounded_text> tag should be within a normal text tag).</step> - <step>Ensure that these tags are used for your reasoning, background knowledge, or general information that does not require a citation.</step> - <step>Do not use <normal_text> tags for information that needs grounding or citation.</step> - <step>Anything that is in any user docs should be grounded text and cited, not normal text, even if it is background or general information.</step> - <step>Content within the <normal_text> tags should be in Markdown format.</step> - </normal_text_guidelines> - - <citation_guidelines> - <step>Create a unique citation for each distinct piece of information from tools or chunks that is used to support <grounded_text>.</step> - <step>Ensure each citation has a unique index number.</step> - <step>Specify the correct type: "text", "image", "table", "csv", or "url".</step> - <step>For text-based information, include only the relevant subset of the original information that the <grounded_text> is based on.</step> - <step>For image, table, csv, or url citation types, leave the citation content empty.</step> - <step>ALL CITATIONS MUST use the chunk_id field to reference the source, whether it's from RAG, search + website scraping, data analysis, or any other tool.</step> - <step>One citation can be used for multiple <grounded_text> tags if they are based on the same tool or chunk information.</step> - <step>!!!DO NOT OVERCITE - only include citations for information that is directly relevant to the <grounded_text>.</step> - </citation_guidelines> - - <operational_process> - <step>Analyze the user's query carefully.</step> - <step>Determine whether a tool is required to answer the query accurately.</step> - <step>If a tool is necessary:</step> - <substeps> - <substep>Select the most appropriate tool.</substep> - <substep>Use the <action> tag to specify the tool.</substep> - <substep>End your response after the <action> tag and wait for action rules to be provided.</substep> - <substep>Based on the action rules, provide the necessary tool parameters within the <inputs>. The <inputs> tag should be within the <action_input> tag, and should follow an <action_description> tag that contains a brief description of what you're doing with the action.</substep> - <substep>For each input, you may provide as many different iterations of the same tag (i.e. to provide multiple inputs to the tool) as is specified in the input's max_input's field.</substep> - <substep>End your response again and wait for the observation from the tool.</substep> - </substeps> - <step>If no tool is needed, use the 'no_tool' action but still follow the same response structure.</step> - <step>If the query might relate to user documents or requires external information, **ALWAYS** use the appropriate tool to retrieve the information (either rag or dataAnalysis).</step> - <step>Once all observations are collected, or if no tool was needed, provide your comprehensive answer within the <answer> tag, using the <grounded_text> and <normal_text> tags as required.</step> - </operational_process> - - <final_answer_requirements> - <requirement>Your final <answer> tag must contain:</requirement> - <elements> - <element>The complete answer to the user's query, with grounded information wrapped in <grounded_text> tags and general information wrapped in <normal_text> tags.</element> - <element>Exactly three follow-up questions written from the user's perspective, enclosed within <follow_up_questions> tags.</element> - <element>A concise <loop_summary> that describes the actions and tools used throughout the interaction.</element> - </elements> - </final_answer_requirements> - - <tools> - ${toolDescriptions} - <note>If no external tool is required to answer the question, use the 'no_tool' action. However, if the query might relate to user documents or require external information, do not use 'no_tool'—instead, use the appropriate tool (RAG, search + website scraping, data analysis), even if unsure.</note> - </tools> - - <user_information> - <note>ENSURE THAT YOU ONLY USE TOOLS THAT ANALYZE OR OTHERWISE USE USER DOCS IF THE QUERY APPLIES TO ONE OF THESE USER SUMMARIES (AT LEAST SOMEWHAT). IF THERE ARE NO SUMMARIES, THERE ARE NO USER DOCUMENTS.</note> - <summaries> - ${summaries()} - </summaries> - </user_information> - - <example_interactions> - <note>These examples are not, by any means, exhaustive in terms of how tools can be used in conjunction with one another. They simply are to provide you with examples of how to structure your outputs and use some of the tools in some contexts.</note> - <reminder>YOU ONLY OUTPUT THE ASSISTANT STAGES:</reminder> - <interaction description="rag and data analysis tool example"> - <system_message> - ***SYSTEM MESSAGE ELIDED*** - </system_message> - <stage number="1" role="user"> - <query>Could you provide a detailed overview of the 2010 Vancouver Winter Olympics's impact, including the overall summary of the games, key moments from official statements, and how the medal count compared across countries?</query> - </stage> - - <stage number="2" role="assistant"> - <thought> - Since the user has Olympics related docs, I will use the RAG tool to find relevant information from the user's documents, specifically focusing on key moments and statements from an official press release. - </thought> - <action>rag</action> - </stage> - - <stage number="3" role="user"> - <action_rules>***Action rules elided***</action_rules> - </stage> - - <stage number="4" role="assistant"> - <action_input> - <action_input_description>Searching user documents for official statements and key moments of the 2010 Vancouver Winter Olympics.</action_input_description> - <inputs> - <hypothetical_document_chunk> - The user is asking for key moments and statements from official sources regarding the 2010 Vancouver Winter Olympics. Search the provided documents for any press releases or official statements that highlight significant events, achievements, or noteworthy aspects of the games. - </hypothetical_document_chunk> - </inputs> - </action_input> - </stage> - - <stage number="5" role="user"> - <chunk chunk_id="987f6543-e21b-43c9-a987-654321fedcba" chunk_type="text"> - PRESS RELEASE: 2010 VANCOUVER WINTER OLYMPICS - - Vancouver, BC - - - The 2010 Winter Olympics, officially known as the XXI Olympic Winter Games, took place in Vancouver, British Columbia, Canada, from February 12 to 28, 2010. It featured 86 events in 15 disciplines across 7 sports, with 2,566 athletes from 82 National Olympic Committees participating. This edition of the Winter Olympics was notable for being the first hosted by Canada since the 1988 Winter Olympics in Calgary. - - Overall, the 2010 Winter Olympics have been hailed as a monumental success, showcasing the best of Canadian culture and athletic prowess. The Opening Ceremony, held on February 12, 2010, was a spectacular celebration of the nation's history and diversity... - - Key Moments: - - The Canadian men's and women's ice hockey teams both claimed gold medals, an achievement that brought immense pride to the host nation. - - Tragedy struck early in the games with the death of Georgian luger Nodar Kumaritashvili during a training run. This incident led to increased scrutiny of safety measures across all events... - - ***More chunk content elided for brevity*** - - The games concluded on February 28, 2010, with a memorable Closing Ceremony that highlighted the achievements of the athletes and the spirit of unity that defined the 2010 Vancouver Winter Olympics. - </chunk> - </stage> - - <stage number="6" role="assistant"> - <thought> - With information on key moments from the press release, I will now analyze the medal count data and economic impact data to provide a comprehensive overview of the 2010 Vancouver Winter Olympics's impact. - </thought> - <action>dataAnalysis</action> - </stage> - - <stage number="7" role="user"> - <action_rules>***Action rules elided***</action_rules> - </stage> - - <stage number="8" role="assistant"> - <action_input> - <action_input_description>Analyzing the medal count data and economic impact data for the 2010 Vancouver Winter Olympics to compare country performances and assess the games' overall impact.</action_input_description> - <inputs> - <csv_file_name>2010_Vancouver_Olympics_Medal_Count.csv</csv_file_name> - <csv_file_name>2010_Vancouver_Olympics_Economic_Impact.csv</csv_file_name> - </inputs> - </action_input> - </stage> - - <stage number="9" role="user"> - <chunk chunk_id="789a1234-e567-890f-gh12-34567ijklmn8" chunk_type="csv"> - Country,Gold,Silver,Bronze,Total - USA,9,15,13,37 - Germany,10,13,7,30 - Canada,14,7,5,26 - Norway,9,8,6,23 - Russia,3,5,7,15 - South Korea,6,6,2,14 - China,5,2,4,11 - Sweden,5,2,4,11 - France,2,3,6,11 - Austria,4,6,6,16 - </chunk> - <chunk chunk_id="bcde5678-f901-2345-gh67-89ijklmnopq0" chunk_type="csv"> - Category,Value - Total Cost (CAD),7.7 billion - Revenue Generated (CAD),2.5 billion - Jobs Created,45000 - Tourism Increase (%),35 - Infrastructure Investment (CAD),1.9 billion - Post-Olympic Annual Tourism Revenue Increase (CAD),220 million - Venues Repurposed,8 - Carbon Footprint Offset (%),118 - </chunk> - </stage> - - <stage number="10" role="assistant"> - <thought> - Now that I have gathered comprehensive information from the official press release, medal count data, and economic impact data, I will provide a detailed answer to the user's query, incorporating all aspects of the 2010 Vancouver Winter Olympics's impact. - </thought> - <answer> - <grounded_text citation_index="1"> - The 2010 Winter Olympics, officially known as the XXI Olympic Winter Games, took place in Vancouver, British Columbia, Canada, from February 12 to 28, 2010. It featured 86 events in 15 disciplines across 7 sports, with 2,566 athletes from 82 National Olympic Committees participating. - </grounded_text> - - <grounded_text citation_index="2"> - The Opening Ceremony was one of the most spectacular in Olympic history, celebrating Canadian culture and heritage. A significant highlight for the host nation was the victory of the Canadian men's and women's ice hockey teams, both securing gold medals, which was a source of immense national pride. - </grounded_text> - - <grounded_text citation_index="3"> - In terms of medal performance, the United States led the overall tally with 37 medals (9 gold, 15 silver, 13 bronze), followed by Germany with 30 medals (10 gold, 13 silver, 7 bronze). Canada, while placing third in total medals with 26, achieved a record number of 14 gold medals, the highest of any country in the 2010 games. Other notable performances came from Norway (23 medals), Russia (15 medals), and South Korea (14 medals). - </grounded_text> - - <grounded_text citation_index="4"> - The economic impact of the 2010 Vancouver Winter Olympics was significant. The total cost of hosting the games was approximately 7.7 billion CAD, while generating revenue of 2.5 billion CAD. The games created about 45,000 jobs and led to a 35% increase in tourism during the event. The long-term benefits include an annual increase of 220 million CAD in tourism revenue and 1.9 billion CAD invested in infrastructure. - </grounded_text> - - <grounded_text citation_index="5"> - From an environmental perspective, the Vancouver Olympics made efforts to be sustainable, with 118% of the carbon footprint offset and 8 venues repurposed after the games, demonstrating a commitment to leaving a positive legacy. - </grounded_text> - - <grounded_text citation_index="6"> - However, the games were also marked by tragedy with the unfortunate death of Georgian luger Nodar Kumaritashvili during a training run, which deeply affected the Olympic community and led to increased scrutiny of safety measures across all events. - </grounded_text> - - <grounded_text citation_index="7"> - Despite this tragedy, the 2010 Vancouver Winter Olympics were generally hailed as a monumental success, showcasing the best of Canadian culture and athletic prowess. The games concluded on February 28, 2010, with a memorable Closing Ceremony that highlighted the achievements of the athletes and the spirit of unity that defined these Olympics. - </grounded_text> - - <citations> - <citation index="1" chunk_id="987f6543-e21b-43c9-a987-654321fedcba" type="text"></citation> - <citation index="2" chunk_id="987f6543-e21b-43c9-a987-654321fedcba" type="text"></citation> - <citation index="3" chunk_id="789a1234-e567-890f-gh12-34567ijklmn8" type="csv"></citation> - <citation index="4" chunk_id="bcde5678-f901-2345-gh67-89ijklmnopq0" type="csv"></citation> - <citation index="5" chunk_id="bcde5678-f901-2345-gh67-89ijklmnopq0" type="csv"></citation> - <citation index="6" chunk_id="987f6543-e21b-43c9-a987-654321fedcba" type="text"></citation> - <citation index="7" chunk_id="987f6543-e21b-43c9-a987-654321fedcba" type="text"></citation> - </citations> - - <follow_up_questions> - <question>How did the 2010 Vancouver Winter Olympics compare to previous Winter Olympics in terms of economic impact and sustainability efforts?</question> - <question>What specific infrastructure improvements were made in Vancouver as a result of hosting the 2010 Winter Olympics?</question> - <question>How did the performance of Canadian athletes in the 2010 Vancouver Winter Olympics compare to their performance in previous Winter Olympics?</question> - </follow_up_questions> - <loop_summary> - The assistant used the RAG tool to gather information from the user's documents, focusing on key moments and official statements. It then used the dataAnalysis tool to analyze the medal count and economic impact data, providing a comprehensive overview of the 2010 Vancouver Winter Olympics's impact. - </loop_summary> - </answer> - </stage> - </interaction> - <interaction description="multistep web-scrape example"> - <system_message> - ***SYSTEM MESSAGE ELIDED*** - </system_message> - <stage number="1" role="user"> - <query>What are the best hiking trails in Yosemite National Park? And which are the safest?</query> - </stage> - <stage number="2" role="assistant"> - <thought> - To provide comprehensive information about the best hiking trails in Yosemite National Park, I'll use the search tool to find relevant websites with hiking recommendations, safety information, and general park information. - </thought> - <action>searchTool</action> - </stage> - <stage number="3" role="user"> - <action_rules>***Action rules elided***</action_rules> - </stage> - <stage number="4" role="assistant"> - <action_input> - <action_input_description>Searching the web for information about the best and safest hiking trails in Yosemite National Park.</action_input_description> - <inputs> - <query>Best hiking trails Yosemite National Park</query> - <query>Safest hiking trails Yosemite National Park</query> - <query>Information about hiking in Yosemite National Park</query> - </inputs> - </action_input> - </stage> - <stage number="5" role="user"> - <observation> - <chunk chunk_id="a1b2c3d4-e5f6-7g8h-9i10-j11k12l13m14" chunk_type="url"> - <url>https://www.nps.gov/yose/planyourvisit/hiking.htm</url> - <overview>Official National Park Service website with hiking information for Yosemite</overview> - </chunk> - <chunk chunk_id="n15o16p17-q18r-19s20-t21u-v22w23x24y25" chunk_type="url"> - <url>https://www.alltrails.com/parks/us/california/yosemite-national-park</url> - <overview>AllTrails page featuring user-rated hiking trails in Yosemite</overview> - </chunk> - <chunk chunk_id="z26a27b28-c29d-30e31-f32g-h33i34j35k36" chunk_type="url"> - <url>https://www.yosemitehikes.com/</url> - <overview>Comprehensive guide to Yosemite hikes with detailed trail information</overview> - </chunk> - </observation> - </stage> - <stage number="6" role="assistant"> - <thought> - I've found several relevant websites about hiking in Yosemite National Park. I'll start by scraping content from the official National Park Service website for the most reliable information, then move on to AllTrails for user ratings and reviews, and finally to YosemiteHikes.com for more detailed trail descriptions. - </thought> - <action>websiteInfoScraper</action> - </stage> - <stage number="7" role="user"> - <action_rules>***Action rules elided***</action_rules> - </stage> - <stage number="8" role="assistant"> - <action_input> - <action_input_description>Scraping information from three websites about hiking trails in Yosemite National Park.</action_input_description> - <inputs> - <url>https://www.nps.gov/yose/planyourvisit/hiking.htm</url> - <url>https://www.alltrails.com/parks/us/california/yosemite-national-park</url> - <url>https://www.yosemitehikes.com/</url> - </inputs> - </action_input> - </stage> - <stage number="9" role="user"> - <observation> - <chunk chunk_id="l37m38n39-o40p-41q42-r43s-t44u45v46w47" chunk_type="url"> - ***NPS general planning website image elided*** - </chunk> - <chunk chunk_id="x48y49z50-a51b-52c53-d54e-f55g56h57i58" chunk_type="url"> - ***AllTrails website image elided*** - </chunk> - <chunk chunk_id="j59k60l61-m62n-63o64-p65q-r66s67t68u69" chunk_type="url"> - ***YosemiteHikes.com website image elided*** - </chunk> - </observation> - </stage> - <stage number="10" role="assistant"> - <thought> - The information gathered from the three websites provides a good overview of the best hiking trails and safety considerations in Yosemite National Park. However, I noticed that there's limited information about seasonal trail conditions and closures, which could be crucial for planning a safe hike. To address this, I'll scrape additional information from the Yosemite National Park's current conditions page. - </thought> - <action>websiteInfoScraper</action> - </stage> - <stage number="11" role="user"> - <action_rules>***Action rules elided***</action_rules> - </stage> - <stage number="12" role="assistant"> - <action_input> - <action_input_description>Scraping information about current trail conditions and closures in Yosemite National Park.</action_input_description> - <inputs> - <url>https://www.nps.gov/yose/planyourvisit/conditions.htm</url> - </inputs> - </action_input> - </stage> - <stage number="13" role="user"> - <observation> - <chunk chunk_id="k70l71m72-n73o-74p75-q76r-s77t78u79v80" chunk_type="url"> - ***NPS conditions website image elided*** - </chunk> - </observation> - </stage> - <stage number="14" role="assistant"> - <thought> - Now that I have additional information about current trail conditions and seasonal closures, I can provide a more comprehensive and up-to-date answer to the user's query about the best and safest hiking trails in Yosemite National Park. - </thought> - <answer> - <grounded_text citation_index="1"> - Yosemite National Park offers a wide range of hiking trails suitable for various skill levels. Some of the best hiking trails in Yosemite, based on the information gathered, include: - - 1. Mist Trail to Vernal Fall: A moderate 3-mile round-trip hike in Yosemite Valley, offering close-up views of waterfalls... ***REST ELIDED FOR BREVITY*** </grounded_text> - - <grounded_text citation_index="2"> - ***CONTENT ELIDED FOR BREVITY*** - </grounded_text> - - <grounded_text citation_index="3"> - ***CONTENT ELIDED FOR BREVITY*** - </grounded_text> - <normal_text> - Regarding safety, some of the safest trails for hikers of all skill levels include: - </normal_text> - <grounded_text citation_index="1,2"> - 1. Lower Yosemite Fall Trail: An easy 1-mile loop in Yosemite Valley... ***REST ELIDED FOR BREVITY*** - </grounded_text> - - <grounded_text citation_index="4"> - ***CONTENT ELIDED FOR BREVITY*** - </grounded_text> - <normal_text> - To ensure a safe hiking experience in Yosemite, regardless of the trail you choose, follow these safety tips: - </normal_text> - <grounded_text citation_index="1,2,3,4"> - 1. Stay on marked trails at all times... - ***REST ELIDED FOR BREVITY*** - </grounded_text> - <normal_text> - Remember that even the "safest" trails can pose risks if proper precautions are not taken. Always hike within your ability level and be prepared for the specific conditions of the trail you choose. It's essential to check for daily updates on trail conditions before starting your hike, as they can change rapidly, especially during spring and fall. - </normal_text> - <citations> - <citation index="1" chunk_id="l37m38n39-o40p-41q42-r43s-t44u45v46w47" type="url"> - https://www.nps.gov/yose/planyourvisit/hiking.htm - </citation> - <citation index="2" chunk_id="x48y49z50-a51b-52c53-d54e-f55g56h57i58" type="url"> - https://www.alltrails.com/parks/us/california/yosemite-national-park - </citation> - <citation index="3" chunk_id="j59k60l61-m62n-63o64-p65q-r66s67t68u69" type="url"> - https://www.yosemitehikes.com/ - </citation> - <citation index="4" chunk_id="k70l71m72-n73o-74p75-q76r-s77t78u79v80" type="url"> - https://www.nps.gov/yose/planyourvisit/conditions.htm - </citation> - </citations> - <follow_up_questions> - <question>What is the best time of year to hike in Yosemite National Park?</question> - <question>Are there any special permits required for hiking certain trails in Yosemite?</question> - <question>What wildlife might I encounter while hiking in Yosemite, and how should I prepare for potential encounters?</question> - </follow_up_questions> - <loop_summary> - The assistant used the search tool to find relevant websites, then scraped information from those sites using the websiteInfoScraper tool. Additional information about current trail conditions was gathered from the NPS website to provide a comprehensive and safe hiking guide. - </loop_summary> - </answer> - </stage> - </interaction> - </example_interactions> - - <chat_history> - ${chatHistory} - <note>Use this history for context when appropriate.</note> - </chat_history> - - <final_instruction> - Now, process the user's query and provide your response following the format and rules outlined above. Ensure your final answer is comprehensive, correctly cited, and entirely contained within the structured tags. Do not get stuck in infinite loops and keep responses concise, grounded, and most importantly, HELPFUL AND USEFUL! - </final_instruction> -</system_message> -`; -} - -export function getSummarizedChunksPrompt(chunks: string): string { - return `Please provide a comprehensive summary of what you think the document from which these chunks originated. - Ensure the summary captures the main ideas and key points from all provided chunks. Be concise and brief and only provide the summary in paragraph form. - - Text chunks: - \`\`\` - ${chunks} - \`\`\``; -} - -export function getSummarizedSystemPrompt(): string { - return 'You are an AI assistant tasked with summarizing a document. You are provided with important chunks from the document and provide a summary, as best you can, of what the document will contain overall. Be concise and brief with your response.'; -} diff --git a/src/client/views/nodes/ChatBox/tools.ts b/src/client/views/nodes/ChatBox/tools.ts deleted file mode 100644 index 4035280a8..000000000 --- a/src/client/views/nodes/ChatBox/tools.ts +++ /dev/null @@ -1,26 +0,0 @@ -import { DocCast } from '../../../../fields/Types'; -import { DocServer } from '../../../DocServer'; -import { Docs } from '../../../documents/Documents'; -import { DocUtils } from '../../../documents/DocUtils'; -import { TabDocView } from '../../collections/TabDocView'; -import { DocumentView } from '../DocumentView'; -import { OpenWhere } from '../OpenWhere'; - -export function retrieval(json: any): string { - return ''; -} - -export function create_collection(docView: DocumentView, document_ids: string[], title: string): string { - const docs = document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id))); - const collection = Docs.Create.FreeformDocument(docs, { title }); - docView._props.addDocTab(collection, OpenWhere.addRight); //in future, create popup prompting user where to add - return 'Collection created in Dash called ' + title; -} - -export function create_link(docView: DocumentView, document_ids: string[]): string { - //Make document_ids a size 2 array - const docs = document_ids.map(doc_id => DocCast(DocServer.GetCachedRefField(doc_id))); - const linkDoc = DocUtils.MakeLink(docs[0], docs[1], {})!; - DocumentView.linkCommonAncestor(linkDoc)?.ComponentView?.addDocument?.(linkDoc); - return 'Link created between ' + docs[0].title + ' and ' + docs[1].title; -} diff --git a/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts deleted file mode 100644 index 4588b5aec..000000000 --- a/src/client/views/nodes/ChatBox/tools/WebsiteInfoScraperTool.ts +++ /dev/null @@ -1,43 +0,0 @@ -import { Networking } from '../../../../Network'; -import { BaseTool } from './BaseTool'; -import { v4 as uuidv4 } from 'uuid'; - -export class WebsiteInfoScraperTool extends BaseTool<{ url: string | string[] }> { - private _addLinkedUrlDoc: (url: string, id: string) => void; - - constructor(addLinkedUrlDoc: (url: string, id: string) => void) { - super( - 'websiteInfoScraper', - 'Scrape detailed information from specific websites identified as relevant to the user query', - { - url: { - type: 'string', - description: 'The URL(s) of the website(s) to scrape', - required: 'true', - max_inputs: '3', - }, - }, - 'Provide up to 3 URLs of websites that you have identified as the most relevant from the previous search. This tool will provide the text content of those specific websites. When providing a final response to the user based on information from these chunks, ideally cite as many of the url chunks as possible (ground your infromation from multiple sources, if possible) in order to provide a well grounded result.', - 'Returns the text content of the webpages for analysis.' - ); - this._addLinkedUrlDoc = addLinkedUrlDoc; - } - - async execute(args: { url: string | string[] }): Promise<any> { - const urls = Array.isArray(args.url) ? args.url : [args.url]; - const results = []; - - for (const url of urls) { - try { - const { website_plain_text } = await Networking.PostToServer('/scrapeWebsite', { url }); - const id = uuidv4(); - this._addLinkedUrlDoc(url, id); - results.push({ type: 'text', text: `<chunk chunk_id=${id} chunk_type=url>\n${website_plain_text}\n</chunk>\n` }); - } catch (error) { - results.push({ type: 'text', text: `An error occurred while scraping the website: ${url}` }); - } - } - - return results; - } -} diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts index 9eb069c78..180d05cf3 100644 --- a/src/client/views/nodes/ChatBox/Agent.ts +++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts @@ -1,25 +1,29 @@ import OpenAI from 'openai'; -import { Tool, AgentMessage, AssistantMessage, TEXT_TYPE, CHUNK_TYPE, ASSISTANT_ROLE, ProcessingInfo, PROCESSING_TYPE } from './types'; +import { Tool, AgentMessage, AssistantMessage, TEXT_TYPE, CHUNK_TYPE, ASSISTANT_ROLE, ProcessingInfo, PROCESSING_TYPE } from '../types/types'; import { getReactPrompt } from './prompts'; import { XMLParser, XMLBuilder } from 'fast-xml-parser'; -import { Vectorstore } from './vectorstore/Vectorstore'; +import { Vectorstore } from '../vectorstore/Vectorstore'; import { ChatCompletionMessageParam } from 'openai/resources'; import dotenv from 'dotenv'; -import { CalculateTool } from './tools/CalculateTool'; -import { RAGTool } from './tools/RAGTool'; -import { DataAnalysisTool } from './tools/DataAnalysisTool'; -import { WebsiteInfoScraperTool } from './tools/WebsiteInfoScraperTool'; -import { SearchTool } from './tools/SearchTool'; -import { NoTool } from './tools/NoTool'; -import { on } from 'events'; +import { CalculateTool } from '../tools/CalculateTool'; +import { RAGTool } from '../tools/RAGTool'; +import { DataAnalysisTool } from '../tools/DataAnalysisTool'; +import { WebsiteInfoScraperTool } from '../tools/WebsiteInfoScraperTool'; +import { SearchTool } from '../tools/SearchTool'; +import { NoTool } from '../tools/NoTool'; import { v4 as uuidv4 } from 'uuid'; -import { AnswerParser } from './response_parsers/AnswerParser'; -import { StreamedAnswerParser } from './response_parsers/StreamedAnswerParser'; -import { CreateCSVTool } from './tools/CreateCSVTool'; +import { AnswerParser } from '../response_parsers/AnswerParser'; +import { StreamedAnswerParser } from '../response_parsers/StreamedAnswerParser'; +import { CreateCSVTool } from '../tools/CreateCSVTool'; dotenv.config(); +/** + * The Agent class handles the interaction between the assistant and the tools available, + * processes user queries, and manages the communication flow between the tools and OpenAI. + */ export class Agent { + // Private properties private client: OpenAI; private tools: Record<string, Tool<any>>; private messages: AgentMessage[] = []; @@ -34,6 +38,15 @@ export class Agent { private processingInfo: ProcessingInfo[] = []; private streamedAnswerParser: StreamedAnswerParser = new StreamedAnswerParser(); + /** + * The constructor initializes the agent with the vector store and toolset, and sets up the OpenAI client. + * @param _vectorstore Vector store instance for document storage and retrieval. + * @param summaries A function to retrieve document summaries. + * @param history A function to retrieve chat history. + * @param csvData A function to retrieve CSV data linked to the assistant. + * @param addLinkedUrlDoc A function to add a linked document from a URL. + * @param createCSVInDash A function to create a CSV document in the dashboard. + */ constructor( _vectorstore: Vectorstore, summaries: () => string, @@ -42,11 +55,14 @@ export class Agent { addLinkedUrlDoc: (url: string, id: string) => void, createCSVInDash: (url: string, title: string, id: string, data: string) => void ) { + // Initialize OpenAI client with API key from environment this.client = new OpenAI({ apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true }); this.vectorstore = _vectorstore; this._history = history; this._summaries = summaries; this._csvData = csvData; + + // Define available tools for the assistant this.tools = { calculate: new CalculateTool(), rag: new RAGTool(this.vectorstore), @@ -58,56 +74,77 @@ export class Agent { }; } + /** + * This method handles the conversation flow with the assistant, processes user queries, + * and manages the assistant's decision-making process, including tool actions. + * @param question The user's question. + * @param onProcessingUpdate Callback function for processing updates. + * @param onAnswerUpdate Callback function for answer updates. + * @param maxTurns The maximum number of turns to allow in the conversation. + * @returns The final response from the assistant. + */ async askAgent(question: string, onProcessingUpdate: (processingUpdate: ProcessingInfo[]) => void, onAnswerUpdate: (answerUpdate: string) => void, maxTurns: number = 30): Promise<AssistantMessage> { console.log(`Starting query: ${question}`); + + // Push user's question to message history this.messages.push({ role: 'user', content: question }); + + // Retrieve chat history and generate system prompt const chatHistory = this._history(); const systemPrompt = getReactPrompt(Object.values(this.tools), this._summaries, chatHistory); + + // Initialize intermediate messages this.interMessages = [{ role: 'system', content: systemPrompt }]; this.interMessages.push({ role: 'user', content: `<stage number="1" role="user"><query>${question}</query></stage>` }); + + // Setup XML parser and builder const parser = new XMLParser({ ignoreAttributes: false, attributeNamePrefix: '@_', textNodeName: '_text', - isArray: (name, jpath, isLeafNode, isAttribute) => { - // Convert tags with the same name to arrays - return ['query', 'url'].indexOf(name) !== -1; - }, + isArray: (name, jpath, isLeafNode, isAttribute) => ['query', 'url'].indexOf(name) !== -1, }); const builder = new XMLBuilder({ ignoreAttributes: false, attributeNamePrefix: '@_' }); let currentAction: string | undefined; - this.processingInfo = []; + // Conversation loop (up to maxTurns) for (let i = 2; i < maxTurns; i += 2) { console.log(this.interMessages); console.log(`Turn ${i}/${maxTurns}`); + // Execute a step in the conversation and get the result const result = await this.execute(onProcessingUpdate, onAnswerUpdate); this.interMessages.push({ role: 'assistant', content: result }); let parsedResult; try { + // Parse XML result from the assistant parsedResult = parser.parse(result); } catch (error) { throw new Error(`Error parsing response: ${error}`); } + // Extract the stage from the parsed result const stage = parsedResult.stage; - if (!stage) { throw new Error(`Error: No stage found in response`); } + // Handle different stage elements (thoughts, actions, inputs, answers) for (const key in stage) { if (key === 'thought') { + // Handle assistant's thoughts console.log(`Thought: ${stage[key]}`); this.processingNumber++; } else if (key === 'action') { + // Handle action stage currentAction = stage[key] as string; console.log(`Action: ${currentAction}`); + if (this.tools[currentAction]) { + // Prepare the next action based on the current tool const nextPrompt = [ { type: 'text', @@ -117,18 +154,19 @@ export class Agent { this.interMessages.push({ role: 'user', content: nextPrompt }); break; } else { + // Handle error in case of an invalid action console.log('Error: No valid action'); this.interMessages.push({ role: 'user', content: `<stage number="${i + 1}" role="system-error-reporter">No valid action, try again.</stage>` }); break; } } else if (key === 'action_input') { + // Handle action input stage const actionInput = stage[key]; console.log(`Action input:`, actionInput.inputs); + if (currentAction) { try { - // Parse the inputs - //const parsedInputs = this.parseActionInputs(actionInput.inputs); - //console.log(`Parsed inputs:`, parsedInputs); + // Process the action with its input const observation = await this.processAction(currentAction, actionInput.inputs); const nextPrompt = [{ type: 'text', text: `<stage number="${i + 1}" role="user"> <observation>` }, ...observation, { type: 'text', text: '</observation></stage>' }]; console.log(observation); @@ -142,6 +180,7 @@ export class Agent { throw new Error('Error: Action input without a valid action'); } } else if (key === 'answer') { + // If an answer is found, end the query console.log('Answer found. Ending query.'); this.streamedAnswerParser.reset(); const parsedAnswer = AnswerParser.parse(result, this.processingInfo); @@ -149,10 +188,18 @@ export class Agent { } } } + throw new Error('Reached maximum turns. Ending query.'); } + /** + * Executes a step in the conversation, processing the assistant's response and parsing it in real-time. + * @param onProcessingUpdate Callback for processing updates. + * @param onAnswerUpdate Callback for answer updates. + * @returns The full response from the assistant. + */ private async execute(onProcessingUpdate: (processingUpdate: ProcessingInfo[]) => void, onAnswerUpdate: (answerUpdate: string) => void): Promise<string> { + // Stream OpenAI response for real-time updates const stream = await this.client.chat.completions.create({ model: 'gpt-4o', messages: this.interMessages as ChatCompletionMessageParam[], @@ -165,37 +212,46 @@ export class Agent { let currentContent: string = ''; let isInsideTag: boolean = false; + // Process each chunk of the streamed response for await (const chunk of stream) { let content = chunk.choices[0]?.delta?.content || ''; fullResponse += content; + // Parse the streamed content character by character for (const char of content) { if (currentTag === 'answer') { + // Handle answer parsing for real-time updates currentContent += char; - //console.log(char); const streamedAnswer = this.streamedAnswerParser.parse(char); - //console.log(streamedAnswer); onAnswerUpdate(streamedAnswer); continue; } else if (char === '<') { + // Start of a new tag isInsideTag = true; currentTag = ''; currentContent = ''; } else if (char === '>') { + // End of the tag isInsideTag = false; if (currentTag.startsWith('/')) { currentTag = ''; } } else if (isInsideTag) { + // Append characters to the tag name currentTag += char; } else if (currentTag === 'thought' || currentTag === 'action_input_description') { + // Handle processing information for thought or action input description currentContent += char; const current_info = this.processingInfo.find(info => info.index === this.processingNumber); if (current_info) { current_info.content = currentContent.trim(); onProcessingUpdate(this.processingInfo); } else { - this.processingInfo.push({ index: this.processingNumber, type: currentTag === 'thought' ? PROCESSING_TYPE.THOUGHT : PROCESSING_TYPE.ACTION, content: currentContent.trim() }); + this.processingInfo.push({ + index: this.processingNumber, + type: currentTag === 'thought' ? PROCESSING_TYPE.THOUGHT : PROCESSING_TYPE.ACTION, + content: currentContent.trim(), + }); onProcessingUpdate(this.processingInfo); } } @@ -205,50 +261,18 @@ export class Agent { return fullResponse; } + /** + * Processes a specific action by invoking the appropriate tool with the provided inputs. + * @param action The action to perform. + * @param actionInput The inputs for the action. + * @returns The result of the action. + */ private async processAction(action: string, actionInput: any): Promise<any> { if (!(action in this.tools)) { throw new Error(`Unknown action: ${action}`); } const tool = this.tools[action]; - const args: Record<string, any> = {}; - - // for (const paramName in tool.parameters) { - // if (actionInput[paramName] !== undefined) { - // if (Array.isArray(actionInput[paramName])) { - // // If the input is already an array, use it as is - // args[paramName] = actionInput[paramName]; - // } else if (typeof actionInput[paramName] === 'object' && actionInput[paramName] !== null) { - // // If the input is an object, check if it has multiple of the same tag - // const values = Object.values(actionInput[paramName]); - // if (values.length > 1) { - // // If there are multiple values, convert to an array - // args[paramName] = values; - // } else { - // // If there's only one value, use it directly - // args[paramName] = values[0]; - // } - // } else { - // // For single values, use them as is - // args[paramName] = actionInput[paramName]; - // } - // } else if (tool.parameters[paramName].required === 'true') { - // throw new Error(`Missing required parameter '${paramName}' for action '${action}'`); - // } - // } - return await tool.execute(actionInput); } - - private parseActionInputs(inputs: any): Record<string, string | string[]> { - const parsedInputs: Record<string, string | string[]> = {}; - for (const key in inputs) { - if (Array.isArray(inputs[key])) { - parsedInputs[key] = inputs[key].map((item: any) => item._text); - } else { - parsedInputs[key] = inputs[key]._text; - } - } - return parsedInputs; - } } diff --git a/src/client/views/nodes/chatbot/agentsystem/prompts.ts b/src/client/views/nodes/chatbot/agentsystem/prompts.ts new file mode 100644 index 000000000..9daabc35f --- /dev/null +++ b/src/client/views/nodes/chatbot/agentsystem/prompts.ts @@ -0,0 +1,181 @@ +// prompts.ts + +import { Tool } from '../types/types'; + +export function getReactPrompt(tools: Tool[], summaries: () => string, chatHistory: string): string { + const toolDescriptions = tools + .map( + tool => ` + <tool> + <title>${tool.name}</title> + <brief_summary>${tool.briefSummary}</brief_summary> + </tool>` + ) + .join('\n'); + + return `<system_message> + <task> + You are an advanced AI assistant equipped with tools to answer user queries efficiently. You operate in a loop that is RIGIDLY structured and requires the use of specific tags and formats for your responses. Your goal is to provide accurate and well-structured answers to user queries. Below are the guidelines and information you can use to structure your approach to accomplishing this task. + </task> + + <critical_points> + <point>**STRUCTURE**: Always use the correct stage tags (e.g., <stage number="2" role="assistant">) for every response. Use only even-numbered stages for your responses.</point> + <point>**STOP after every stage and wait for input. Do not combine multiple stages in one response.**</point> + <point>If a tool is needed, select the most appropriate tool based on the query.</point> + <point>**If one tool does not yield satisfactory results or fails twice, try another tool that might work better for the query.**</point> + <point>Ensure that **ALL answers follow the answer structure**: grounded text wrapped in <grounded_text> tags with corresponding citations, normal text in <normal_text> tags, and three follow-up questions at the end.</point> + </critical_points> + + <answer_structure> + <answer> + <grounded_text> - All information derived from tools or user documents must be wrapped in these tags with proper citation.</grounded_text> + <normal_text> - Use this tag for text not derived from tools or user documents.</normal_text> + <citations> + <citation> - Provide proper citations for each <grounded_text>, referencing the tool or document chunk used.</citation> + </citations> + <follow_up_questions> - Provide exactly three user-perspective follow-up questions.</follow_up_questions> + <loop_summary> - Summarize the actions and tools used in the conversation.</loop_summary> + </answer> + </answer_structure> + + <grounded_text_guidelines> + <step>**Wrap ALL tool-based information** in <grounded_text> tags and provide citations.</step> + <step>Use separate <grounded_text> tags for distinct information or when switching to a different tool or document.</step> + <step>Ensure that **EVERY** <grounded_text> tag includes a citation index referencing the source of the information.</step> + <step>Over-citing is discouraged—only cite the information that is directly relevant to the user's query.</step> + </grounded_text_guidelines> + + <normal_text_guidelines> + <step>Wrap general information or reasoning **not derived from tools or documents** in <normal_text> tags.</step> + <step>Never put information derived from user documents or tools in <normal_text> tags—use <grounded_text> for those.</step> + </normal_text_guidelines> + + <operational_process> + <step>Carefully analyze the user query and determine if a tool is necessary to provide an accurate answer.</step> + <step>If a tool is needed, choose the most appropriate one and **stop after the action** to wait for system input.</step> + <step>If no tool is needed, use the 'no_tool' action but follow the structure.</step> + <step>When all observations are complete, format the final answer using <grounded_text> and <normal_text> tags with appropriate citations.</step> + <step>Include exactly three follow-up questions from the user's perspective.</step> + <step>Provide a loop summary at the end of the conversation.</step> + </operational_process> + + <tools> + ${toolDescriptions} + <note>If no external tool is required, use 'no_tool', but if there might be relevant external information, use the appropriate tool.</note> + </tools> + + <summaries> + ${summaries()} + </summaries> + + <chat_history> + ${chatHistory} + </chat_history> + + <example_interaction> + <interaction description="Correct use of RAG and website scraping tools"> + <stage number="1" role="user"> + <query>Can you provide key moments from the 2022 World Cup and its impact on tourism in Qatar?</query> + </stage> + + <stage number="2" role="assistant"> + <thought> + I will use the RAG tool to retrieve key moments from the user's World Cup documents. Afterward, I will use the website scraper tool to gather tourism impact data on Qatar. + </thought> + <action>rag</action> + </stage> + + <stage number="3" role="user"> + <action_rules>***Action rules omitted***</action_rules> + </stage> + + <stage number="4" role="assistant"> + <action_input> + <action_input_description>Searching user documents for key moments from the 2022 World Cup.</action_input_description> + <inputs> + <hypothetical_document_chunk>Key moments from the 2022 World Cup. Goals, assists, big wins, big losses.</hypothetical_document_chunk> + </inputs> + </action_input> + </stage> + + <stage number="5" role="user"> + <observation> + <chunk chunk_id="1234" chunk_type="text"> + The 2022 FIFA World Cup saw Argentina win, with Lionel Messi's performance being a key highlight. It was widely celebrated as a historical moment in sports. + </chunk> + </observation> + </stage> + + <stage number="6" role="assistant"> + <thought> + With key moments from the World Cup retrieved, I will now use the website scraper tool to gather data on Qatar’s tourism impact during the World Cup. + </thought> + <action>websiteInfoScraper</action> + </stage> + + <stage number="7" role="user"> + <action_rules>***Action rules omitted***</action_rules> + </stage> + + <stage number="8" role="assistant"> + <action_input> + <action_input_description>Scraping websites for information about Qatar's tourism impact during the 2022 World Cup.</action_input_description> + <inputs> + <query>Tourism impact of the 2022 World Cup in Qatar</query> + </inputs> + </action_input> + </stage> + + <stage number="9" role="user"> + <observation> + <chunk chunk_id="5678" chunk_type="url"> + <url>https://www.qatartourism.com/world-cup-impact</url> + <overview>During the 2022 World Cup, Qatar saw a 40% increase in tourism, with over 1.5 million visitors attending.</overview> + </chunk> + </observation> + </stage> + + <stage number="10" role="assistant"> + <thought> + Now that I have gathered both key moments from the World Cup and tourism impact data from Qatar, I will summarize the information in my final response. + </thought> + <answer> + <grounded_text citation_index="1">**The 2022 World Cup** saw Argentina crowned champions, with **Lionel Messi** leading his team to victory, marking a historic moment in sports.</grounded_text> + <grounded_text citation_index="2">**Qatar** experienced a **40% increase in tourism** during the World Cup, welcoming over **1.5 million visitors**, significantly boosting its economy.</grounded_text> + <normal_text>Moments like **Messi’s triumph** often become ingrained in the legacy of World Cups, immortalizing these tournaments in both sports and cultural memory. The **long-term implications** of the World Cup on Qatar's **economy, tourism**, and **global image** remain important areas of interest as the country continues to build on the momentum generated by hosting this prestigious event.</normal_text> + <citations> + <citation index="1" chunk_id="1234" type="text">Key moments from the 2022 World Cup.</citation> + <citation index="2" chunk_id="5678" type="url"></citation> + </citations> + <follow_up_questions> + <question>What long-term effects has the World Cup had on Qatar's economy and infrastructure?</question> + <question>Can you compare Qatar's tourism numbers with previous World Cup hosts?</question> + <question>How has Qatar’s image on the global stage evolved post-World Cup?</question> + </follow_up_questions> + <loop_summary> + The assistant first used the RAG tool to extract key moments from the user documents about the 2022 World Cup. Then, the assistant utilized the website scraping tool to gather data on Qatar's tourism impact. Both tools provided valuable information, and no additional tools were needed. + </loop_summary> + </answer> + </stage> + </interaction> + </example_interaction> + + <final_instruction> + Process the user's query according to these rules. Ensure your final answer is comprehensive, well-structured, and includes citations where appropriate. + </final_instruction> +</system_message>`; +} + +export function getSummarizedChunksPrompt(chunks: string): string { + return `Please provide a comprehensive summary of what you think the document from which these chunks originated. + Ensure the summary captures the main ideas and key points from all provided chunks. Be concise and brief and only provide the summary in paragraph form. + + Text chunks: + \`\`\` + ${chunks} + \`\`\``; +} + +export function getSummarizedSystemPrompt(): string { + return 'You are an AI assistant tasked with summarizing a document. You are provided with important chunks from the document and provide a summary, as best you can, of what the document will contain overall. Be concise and brief with your response.'; +} diff --git a/src/client/views/nodes/ChatBox/ChatBox.scss b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss index 42f6a0d61..42f6a0d61 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.scss +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx index 98a2e6002..6dc691798 100644 --- a/src/client/views/nodes/ChatBox/ChatBox.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx @@ -2,27 +2,27 @@ import { action, computed, makeObservable, observable, observe, reaction, runInA import { observer } from 'mobx-react'; import OpenAI, { ClientOptions } from 'openai'; import * as React from 'react'; -import { Doc, DocListCast } from '../../../../fields/Doc'; -import { CsvCast, DocCast, PDFCast, RTFCast, StrCast } from '../../../../fields/Types'; -import { DocumentType } from '../../../documents/DocumentTypes'; -import { Docs } from '../../../documents/Documents'; -import { LinkManager } from '../../../util/LinkManager'; -import { ViewBoxAnnotatableComponent } from '../../DocComponent'; -import { FieldView, FieldViewProps } from '../FieldView'; +import { Doc, DocListCast } from '../../../../../fields/Doc'; +import { CsvCast, DocCast, PDFCast, RTFCast, StrCast } from '../../../../../fields/Types'; +import { DocumentType } from '../../../../documents/DocumentTypes'; +import { Docs } from '../../../../documents/Documents'; +import { LinkManager } from '../../../../util/LinkManager'; +import { ViewBoxAnnotatableComponent } from '../../../DocComponent'; +import { FieldView, FieldViewProps } from '../../FieldView'; import './ChatBox.scss'; import MessageComponentBox from './MessageComponent'; -import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, SimplifiedChunk, ProcessingInfo } from './types'; -import { Vectorstore } from './vectorstore/Vectorstore'; -import { Agent } from './Agent'; +import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, SimplifiedChunk, ProcessingInfo } from '../types/types'; +import { Vectorstore } from '../vectorstore/Vectorstore'; +import { Agent } from '../agentsystem/Agent'; import dotenv from 'dotenv'; -import { DocData, DocViews } from '../../../../fields/DocSymbols'; -import { DocumentManager } from '../../../util/DocumentManager'; +import { DocData, DocViews } from '../../../../../fields/DocSymbols'; +import { DocumentManager } from '../../../../util/DocumentManager'; import { v4 as uuidv4 } from 'uuid'; -import { DocUtils } from '../../../documents/DocUtils'; -import { ClientUtils } from '../../../../ClientUtils'; +import { DocUtils } from '../../../../documents/DocUtils'; +import { ClientUtils } from '../../../../../ClientUtils'; import { ProgressBar } from './ProgressBar'; -import { DocumentView } from '../DocumentView'; -import { Networking } from '../../../Network'; +import { DocumentView } from '../../DocumentView'; +import { Networking } from '../../../../Network'; dotenv.config(); @@ -74,7 +74,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { // Initialize OpenAI, vectorstore, and agent this.openai = this.initializeOpenAI(); if (StrCast(this.dataDoc.vectorstore_id) == '') { - console.log('new_id'); this.vectorstore_id = uuidv4(); this.dataDoc.vectorstore_id = this.vectorstore_id; } else { @@ -130,7 +129,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { */ @action updateProgress = (progress: number, step: string) => { - console.log('Progress:', progress, step); this.uploadProgress = progress; this.currentStep = step; }; @@ -142,11 +140,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { */ @action addCSVForAnalysis = async (newLinkedDoc: Doc, id?: string) => { - console.log('adding csv file for analysis'); if (!newLinkedDoc.chunk_simpl) { // Convert document text to CSV data const csvData: string = StrCast(newLinkedDoc.text); - console.log('CSV Data:', csvData); // Generate a summary using OpenAI API const completion = await this.openai.chat.completions.create({ @@ -201,7 +197,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { * @returns OpenAI client instance. */ initializeOpenAI() { - console.log(process.env.OPENAI_KEY); const configuration: ClientOptions = { apiKey: process.env.OPENAI_KEY, dangerouslyAllowBrowser: true, @@ -347,7 +342,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { @action addLinkedUrlDoc = async (url: string, id: string) => { const doc = Docs.Create.WebDocument(url, { data_useCors: true }); - console.log('Adding URL:', url); const linkDoc = Docs.Create.LinkDocument(this.Document, doc); LinkManager.Instance.addLink(linkDoc); @@ -361,10 +355,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { const xFrameOptions = data.headers?.['x-frame-options']; if (xFrameOptions && xFrameOptions.toUpperCase() === 'SAMEORIGIN') { - console.log('URL cannot be displayed in an iframe:', url); canDisplay = false; } else { - console.log('URL can be displayed in an iframe:', url); canDisplay = true; } } catch (error) { @@ -398,7 +390,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { */ @action createCSVInDash = async (url: string, title: string, id: string, data: string) => { - console.log('Creating CSV in Dash:', url, title); const doc = DocCast(await DocUtils.DocumentFromType('csv', url, { title: title, text: RTFCast(data) })); const linkDoc = Docs.Create.LinkDocument(this.Document, doc); @@ -416,7 +407,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { */ @action handleCitationClick = (citation: Citation) => { - console.log('Citation clicked:', citation); const currentLinkedDocs: Doc[] = this.linkedDocs; const chunkId = citation.chunk_id; @@ -470,7 +460,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {}); break; default: - console.log('Chunk type not supported', foundChunk.chunkType); + console.error('Chunk type not recognized:', foundChunk.chunkType); break; } } @@ -573,7 +563,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { this.addCSVForAnalysis(change.newValue); } } else if (change.type === 'delete') { - console.log('Deleted docs: ', change.oldValue); + // Handle document removal } }); this.addScrollListener(); @@ -685,7 +675,6 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() { */ @action handleFollowUpClick = (question: string) => { - console.log('Follow-up question clicked:', question); this.inputValue = question; }; diff --git a/src/client/views/nodes/ChatBox/MessageComponent.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/MessageComponent.tsx index 812e52ee0..801becb64 100644 --- a/src/client/views/nodes/ChatBox/MessageComponent.tsx +++ b/src/client/views/nodes/chatbot/chatboxcomponents/MessageComponent.tsx @@ -1,8 +1,17 @@ import React, { useState } from 'react'; import { observer } from 'mobx-react'; -import { AssistantMessage, Citation, MessageContent, PROCESSING_TYPE, ProcessingInfo, TEXT_TYPE } from './types'; +import { AssistantMessage, Citation, MessageContent, PROCESSING_TYPE, ProcessingInfo, TEXT_TYPE } from '../types/types'; import ReactMarkdown from 'react-markdown'; +/** + * Props for the MessageComponentBox. + * @interface MessageComponentProps + * @property {AssistantMessage} message - The message data to display. + * @property {number} index - The index of the message. + * @property {Function} onFollowUpClick - Callback to handle follow-up question clicks. + * @property {Function} onCitationClick - Callback to handle citation clicks. + * @property {Function} updateMessageCitations - Function to update message citations. + */ interface MessageComponentProps { message: AssistantMessage; index: number; @@ -11,12 +20,24 @@ interface MessageComponentProps { updateMessageCitations: (index: number, citations: Citation[]) => void; } -const MessageComponentBox: React.FC<MessageComponentProps> = function ({ message, index, onFollowUpClick, onCitationClick, updateMessageCitations }) { +/** + * MessageComponentBox displays the content of an AssistantMessage including text, citations, + * processing information, and follow-up questions. + * @param {MessageComponentProps} props - The props for the component. + */ +const MessageComponentBox: React.FC<MessageComponentProps> = ({ message, index, onFollowUpClick, onCitationClick, updateMessageCitations }) => { + // State for managing whether the dropdown is open or closed for processing info const [dropdownOpen, setDropdownOpen] = useState(false); + /** + * Renders the content of the message based on the type (e.g., grounded text, normal text). + * @param {MessageContent} item - The content item to render. + * @returns {JSX.Element} JSX element rendering the content. + */ const renderContent = (item: MessageContent) => { const i = item.index; - //console.log('item', item, 'index', i); + + // Handle grounded text with citations if (item.type === TEXT_TYPE.GROUNDED) { const citation_ids = item.citation_ids || []; return ( @@ -33,19 +54,28 @@ const MessageComponentBox: React.FC<MessageComponentProps> = function ({ message })} </span> ); - } else if (item.type === TEXT_TYPE.NORMAL) { + } + + // Handle normal text + else if (item.type === TEXT_TYPE.NORMAL) { return ( <span key={i} className="normal-text"> <ReactMarkdown>{item.text}</ReactMarkdown> </span> ); - } else if ('query' in item) { + } + + // Handle query type content + else if ('query' in item) { return ( <span key={i} className="query-text"> <ReactMarkdown>{JSON.stringify(item.query)}</ReactMarkdown> </span> ); - } else { + } + + // Fallback for any other content type + else { return ( <span key={i}> <ReactMarkdown>{JSON.stringify(item)}</ReactMarkdown> @@ -54,8 +84,14 @@ const MessageComponentBox: React.FC<MessageComponentProps> = function ({ message } }; + // Check if the message contains processing information (thoughts/actions) const hasProcessingInfo = message.processing_info && message.processing_info.length > 0; + /** + * Renders processing information such as thoughts or actions during message handling. + * @param {ProcessingInfo} info - The processing information to render. + * @returns {JSX.Element | null} JSX element rendering the processing info or null. + */ const renderProcessingInfo = (info: ProcessingInfo) => { if (info.type === PROCESSING_TYPE.THOUGHT) { return ( @@ -69,13 +105,13 @@ const MessageComponentBox: React.FC<MessageComponentProps> = function ({ message <strong>Action:</strong> {info.content} </div> ); - } else { - return null; } + return null; }; return ( <div className={`message ${message.role}`}> + {/* Processing Information Dropdown */} {hasProcessingInfo && ( <div className="processing-info"> <button className="toggle-info" onClick={() => setDropdownOpen(!dropdownOpen)}> @@ -85,7 +121,11 @@ const MessageComponentBox: React.FC<MessageComponentProps> = function ({ message <br /> </div> )} + + {/* Message Content */} <div className="message-content">{message.content && message.content.map(messageFragment => <React.Fragment key={messageFragment.index}>{renderContent(messageFragment)}</React.Fragment>)}</div> + + {/* Follow-up Questions Section */} {message.follow_up_questions && message.follow_up_questions.length > 0 && ( <div className="follow-up-questions"> <h4>Follow-up Questions:</h4> @@ -102,4 +142,5 @@ const MessageComponentBox: React.FC<MessageComponentProps> = function ({ message ); }; +// Export the observer-wrapped component to allow MobX to react to state changes export default observer(MessageComponentBox); diff --git a/src/client/views/nodes/ChatBox/ProgressBar.scss b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss index ff5be4a38..ff5be4a38 100644 --- a/src/client/views/nodes/ChatBox/ProgressBar.scss +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.scss diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.tsx new file mode 100644 index 000000000..b9fd08742 --- /dev/null +++ b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.tsx @@ -0,0 +1,22 @@ +import React from 'react'; +import './ProgressBar.scss'; + +/** + * ProgressBar is a functional React component that displays a loading spinner + * to indicate progress or ongoing processing. It uses two bouncing elements + * to create a smooth animation that represents an active state. + * + * The animation consists of two divs (`double-bounce1` and `double-bounce2`), + * each of which will bounce in and out of view, creating a pulsating effect. + */ +export const ProgressBar: React.FC = () => { + return ( + <div className="spinner-container"> + {/* Spinner div containing two bouncing elements */} + <div className="spinner"> + <div className="double-bounce1"></div> {/* First bouncing element */} + <div className="double-bounce2"></div> {/* Second bouncing element */} + </div> + </div> + ); +}; diff --git a/src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts b/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts index 79b53b0a3..3b4fdb6f5 100644 --- a/src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts +++ b/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts @@ -1,4 +1,4 @@ -import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getChunkType, ProcessingInfo } from '../types'; +import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getChunkType, ProcessingInfo } from '../types/types'; import { v4 as uuid } from 'uuid'; export class AnswerParser { diff --git a/src/client/views/nodes/ChatBox/response_parsers/StreamedAnswerParser.ts b/src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts index 3585cab4a..3585cab4a 100644 --- a/src/client/views/nodes/ChatBox/response_parsers/StreamedAnswerParser.ts +++ b/src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts diff --git a/src/client/views/nodes/ChatBox/tools/BaseTool.ts b/src/client/views/nodes/chatbot/tools/BaseTool.ts index 2e2267653..b57f1c8e4 100644 --- a/src/client/views/nodes/ChatBox/tools/BaseTool.ts +++ b/src/client/views/nodes/chatbot/tools/BaseTool.ts @@ -1,4 +1,4 @@ -import { Tool } from '../types'; +import { Tool } from '../types/types'; export abstract class BaseTool<T extends Record<string, any> = Record<string, any>> implements Tool<T> { constructor( diff --git a/src/client/views/nodes/ChatBox/tools/CalculateTool.ts b/src/client/views/nodes/chatbot/tools/CalculateTool.ts index 74b7ca27b..74b7ca27b 100644 --- a/src/client/views/nodes/ChatBox/tools/CalculateTool.ts +++ b/src/client/views/nodes/chatbot/tools/CalculateTool.ts diff --git a/src/client/views/nodes/ChatBox/tools/CreateCSVTool.ts b/src/client/views/nodes/chatbot/tools/CreateCSVTool.ts index 55015846b..55015846b 100644 --- a/src/client/views/nodes/ChatBox/tools/CreateCSVTool.ts +++ b/src/client/views/nodes/chatbot/tools/CreateCSVTool.ts diff --git a/src/client/views/nodes/ChatBox/tools/CreateCollectionTool.ts b/src/client/views/nodes/chatbot/tools/CreateCollectionTool.ts index 573428179..573428179 100644 --- a/src/client/views/nodes/ChatBox/tools/CreateCollectionTool.ts +++ b/src/client/views/nodes/chatbot/tools/CreateCollectionTool.ts diff --git a/src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts b/src/client/views/nodes/chatbot/tools/DataAnalysisTool.ts index a12ee46e5..a12ee46e5 100644 --- a/src/client/views/nodes/ChatBox/tools/DataAnalysisTool.ts +++ b/src/client/views/nodes/chatbot/tools/DataAnalysisTool.ts diff --git a/src/client/views/nodes/ChatBox/tools/GetDocsTool.ts b/src/client/views/nodes/chatbot/tools/GetDocsTool.ts index f970ca8ee..f970ca8ee 100644 --- a/src/client/views/nodes/ChatBox/tools/GetDocsTool.ts +++ b/src/client/views/nodes/chatbot/tools/GetDocsTool.ts diff --git a/src/client/views/nodes/ChatBox/tools/NoTool.ts b/src/client/views/nodes/chatbot/tools/NoTool.ts index 1f0830a77..1f0830a77 100644 --- a/src/client/views/nodes/ChatBox/tools/NoTool.ts +++ b/src/client/views/nodes/chatbot/tools/NoTool.ts diff --git a/src/client/views/nodes/ChatBox/tools/RAGTool.ts b/src/client/views/nodes/chatbot/tools/RAGTool.ts index 544b9daba..c24306dcd 100644 --- a/src/client/views/nodes/ChatBox/tools/RAGTool.ts +++ b/src/client/views/nodes/chatbot/tools/RAGTool.ts @@ -1,6 +1,6 @@ import { BaseTool } from './BaseTool'; import { Vectorstore } from '../vectorstore/Vectorstore'; -import { RAGChunk } from '../types'; +import { RAGChunk } from '../types/types'; import * as fs from 'fs'; import { Networking } from '../../../../Network'; import { file } from 'jszip'; diff --git a/src/client/views/nodes/ChatBox/tools/SearchTool.ts b/src/client/views/nodes/chatbot/tools/SearchTool.ts index b926cbadc..b926cbadc 100644 --- a/src/client/views/nodes/ChatBox/tools/SearchTool.ts +++ b/src/client/views/nodes/chatbot/tools/SearchTool.ts diff --git a/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts new file mode 100644 index 000000000..2118218f6 --- /dev/null +++ b/src/client/views/nodes/chatbot/tools/WebsiteInfoScraperTool.ts @@ -0,0 +1,83 @@ +import { Networking } from '../../../../Network'; +import { BaseTool } from './BaseTool'; +import { v4 as uuidv4 } from 'uuid'; + +export class WebsiteInfoScraperTool extends BaseTool<{ url: string | string[] }> { + private _addLinkedUrlDoc: (url: string, id: string) => void; + + constructor(addLinkedUrlDoc: (url: string, id: string) => void) { + super( + 'websiteInfoScraper', + 'Scrape detailed information from specific websites relevant to the user query', + { + url: { + type: 'string', + description: 'The URL(s) of the website(s) to scrape', + required: true, + max_inputs: 3, + }, + }, + ` + Your task is to provide a comprehensive response to the user's prompt using the content scraped from relevant websites. Ensure you follow these guidelines for structuring your response: + + 1. Grounded Text Tag Structure: + - Wrap all text derived from the scraped website(s) in <grounded_text> tags. + - **Do not include non-sourced information** in <grounded_text> tags. + - Use a single <grounded_text> tag for content derived from a single website. If citing multiple websites, create new <grounded_text> tags for each. + - Ensure each <grounded_text> tag has a citation index corresponding to the scraped URL. + + 2. Citation Tag Structure: + - Create a <citation> tag for each distinct piece of information used from the website(s). + - Each <citation> tag must reference a URL chunk using the chunk_id attribute. + - For URL-based citations, leave the citation content empty, but reference the chunk_id and type as 'url'. + + 3. Structural Integrity Checks: + - Ensure all opening and closing tags are matched properly. + - Verify that all citation_index attributes in <grounded_text> tags correspond to valid citations. + - Do not over-cite—cite only the most relevant parts of the websites. + + Example Usage: + + <answer> + <grounded_text citation_index="1"> + Based on data from the World Bank, economic growth has stabilized in recent years, following a surge in investments. + </grounded_text> + <grounded_text citation_index="2"> + According to information retrieved from the International Monetary Fund, the inflation rate has been gradually decreasing since 2020. + </grounded_text> + + <citations> + <citation index="1" chunk_id="1234" type="url"></citation> + <citation index="2" chunk_id="5678" type="url"></citation> + </citations> + + <follow_up_questions> + <question>What are the long-term economic impacts of increased investments on GDP?</question> + <question>How might inflation trends affect future monetary policy?</question> + <question>Are there additional factors that could influence economic growth beyond investments and inflation?</question> + </follow_up_questions> + </answer> + `, + 'Returns the text content of the webpages for further analysis and grounding.' + ); + this._addLinkedUrlDoc = addLinkedUrlDoc; + } + + async execute(args: { url: string | string[] }): Promise<any> { + const urls = Array.isArray(args.url) ? args.url : [args.url]; + const results = []; + + for (const url of urls) { + try { + const { website_plain_text } = await Networking.PostToServer('/scrapeWebsite', { url }); + const id = uuidv4(); + this._addLinkedUrlDoc(url, id); + results.push({ type: 'text', text: `<chunk chunk_id=${id} chunk_type=url>\n${website_plain_text}\n</chunk>\n` }); + } catch (error) { + results.push({ type: 'text', text: `An error occurred while scraping the website: ${url}` }); + } + } + + return results; + } +} diff --git a/src/client/views/nodes/ChatBox/tools/WikipediaTool.ts b/src/client/views/nodes/chatbot/tools/WikipediaTool.ts index 143d91d80..143d91d80 100644 --- a/src/client/views/nodes/ChatBox/tools/WikipediaTool.ts +++ b/src/client/views/nodes/chatbot/tools/WikipediaTool.ts diff --git a/src/client/views/nodes/ChatBox/types.ts b/src/client/views/nodes/chatbot/types/types.ts index a12c52592..f5d14ad6a 100644 --- a/src/client/views/nodes/ChatBox/types.ts +++ b/src/client/views/nodes/chatbot/types/types.ts @@ -1,9 +1,3 @@ -import { breadcrumbsClasses } from '@mui/material'; -import { Doc } from '../../../../fields/Doc'; -import { StrCast } from '../../../../fields/Types'; -import e from 'cors'; -import { index } from 'd3'; - export enum ASSISTANT_ROLE { USER = 'user', ASSISTANT = 'assistant', @@ -133,37 +127,3 @@ export interface AgentMessage { role: 'system' | 'user' | 'assistant'; content: string | { type: string; text?: string; image_url?: { url: string } }[]; } - -// export function convertToAIDocument(json: any): AI_Document { -// if (!json) { -// throw new Error('Invalid JSON object'); -// } - -// const chunks: Chunk[] = json.chunks.map((chunk: any) => ({ -// id: chunk.id, -// values: chunk.values, -// metadata: { -// text: chunk.metadata.text, -// type: chunk.metadata.type as CHUNK_TYPE, // Ensure type casting -// original_document: chunk.metadata.original_document, -// file_path: chunk.metadata.file_path, -// location: chunk.metadata.location, -// start_page: chunk.metadata.start_page, -// end_page: chunk.metadata.end_page, -// base64_data: chunk.metadata.base64_data, -// width: chunk.metadata.width, -// height: chunk.metadata.height, -// }, -// })); - -// const aiDocument: AI_Document = { -// purpose: json.purpose, -// file_name: json.file_name, -// num_pages: json.num_pages, -// summary: json.summary, -// chunks: chunks, -// type: json.type, -// }; - -// return aiDocument; -// } diff --git a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts index cc3b1ccd5..07a2b73bc 100644 --- a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts +++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts @@ -2,7 +2,7 @@ import { Pinecone, Index, IndexList, PineconeRecord, RecordMetadata, QueryRespon import { CohereClient } from 'cohere-ai'; import { EmbedResponse } from 'cohere-ai/api'; import dotenv from 'dotenv'; -import { RAGChunk, AI_Document, CHUNK_TYPE } from '../types'; +import { RAGChunk, AI_Document, CHUNK_TYPE } from '../types/types'; import { Doc } from '../../../../../fields/Doc'; import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types'; import { Networking } from '../../../../Network'; diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 58d35ce64..88a81c5de 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -8,7 +8,7 @@ import { filesDirectory, publicDirectory } from '../SocketData'; import { Method } from '../RouteManager'; import ApiManager, { Registration } from './ApiManager'; import axios from 'axios'; -import { RAGChunk } from '../../client/views/nodes/ChatBox/types'; +import { RAGChunk } from '../../client/views/nodes/chatbot/types/types'; import { UnstructuredClient } from 'unstructured-client'; import { PartitionResponse } from 'unstructured-client/sdk/models/operations'; import { ChunkingStrategy, Strategy } from 'unstructured-client/sdk/models/shared'; |