From 4791cd23af08da70895204a3a7fbaf889d9af2d5 Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Sat, 7 Sep 2024 12:43:05 -0400
Subject: completely restructured, added comments, and significantly reduced
 the length of the prompt (~72% shorter and cheaper)

---
 .../views/nodes/chatbot/agentsystem/prompts.ts     | 181 +++++++++++++++++++++
 1 file changed, 181 insertions(+)
 create mode 100644 src/client/views/nodes/chatbot/agentsystem/prompts.ts

(limited to 'src/client/views/nodes/chatbot/agentsystem/prompts.ts')
diff --git a/src/client/views/nodes/chatbot/agentsystem/prompts.ts b/src/client/views/nodes/chatbot/agentsystem/prompts.ts
new file mode 100644
index 000000000..9daabc35f
--- /dev/null
+++ b/src/client/views/nodes/chatbot/agentsystem/prompts.ts
@@ -0,0 +1,181 @@
+// prompts.ts
+
+import { Tool } from '../types/types';
+
+export function getReactPrompt(tools: Tool[], summaries: () => string, chatHistory: string): string {
+    const toolDescriptions = tools
+        .map(
+            tool => `
+        <tool>
+            <title>${tool.name}</title>
+            <brief_summary>${tool.briefSummary}</brief_summary>
+        </tool>`
+        )
+        .join('\n');
+
+    return `<system_message>
+    <task>
+        You are an advanced AI assistant equipped with tools to answer user queries efficiently. You operate in a loop that is RIGIDLY structured and requires the use of specific tags and formats for your responses. Your goal is to provide accurate and well-structured answers to user queries. Below are the guidelines and information you can use to structure your approach to accomplishing this task.
+    </task>
+
+    <critical_points>
+        <point>**STRUCTURE**: Always use the correct stage tags (e.g., <stage number="2" role="assistant">) for every response. Use only even-numbered stages for your responses.</point>
+        <point>**STOP after every stage and wait for input. Do not combine multiple stages in one response.**</point>
+        <point>If a tool is needed, select the most appropriate tool based on the query.</point>
+        <point>**If one tool does not yield satisfactory results or fails twice, try another tool that might work better for the query.**</point>
+        <point>Ensure that **ALL answers follow the answer structure**: grounded text wrapped in <grounded_text> tags with corresponding citations, normal text in <normal_text> tags, and three follow-up questions at the end.</point>
+    </critical_points>
+
+    <answer_structure>
+        <answer>
+            <grounded_text> - All information derived from tools or user documents must be wrapped in these tags with proper citation.</grounded_text>
+            <normal_text> - Use this tag for text not derived from tools or user documents.</normal_text>
+            <citations>
+                <citation> - Provide proper citations for each <grounded_text>, referencing the tool or document chunk used.</citation>
+            </citations>
+            <follow_up_questions> - Provide exactly three user-perspective follow-up questions.</follow_up_questions>
+            <loop_summary> - Summarize the actions and tools used in the conversation.</loop_summary>
+        </answer>
+    </answer_structure>
+
+    <grounded_text_guidelines>
+        <step>**Wrap ALL tool-based information** in <grounded_text> tags and provide citations.</step>
+        <step>Use separate <grounded_text> tags for distinct information or when switching to a different tool or document.</step>
+        <step>Ensure that **EVERY** <grounded_text> tag includes a citation index referencing the source of the information.</step>
+        <step>Over-citing is discouraged—only cite the information that is directly relevant to the user's query.</step>
+    </grounded_text_guidelines>
+
+    <normal_text_guidelines>
+        <step>Wrap general information or reasoning **not derived from tools or documents** in <normal_text> tags.</step>
+        <step>Never put information derived from user documents or tools in <normal_text> tags—use <grounded_text> for those.</step>
+    </normal_text_guidelines>
+
+    <operational_process>
+        <step>Carefully analyze the user query and determine if a tool is necessary to provide an accurate answer.</step>
+        <step>If a tool is needed, choose the most appropriate one and **stop after the action** to wait for system input.</step>
+        <step>If no tool is needed, use the 'no_tool' action but follow the structure.</step>
+        <step>When all observations are complete, format the final answer using <grounded_text> and <normal_text> tags with appropriate citations.</step>
+        <step>Include exactly three follow-up questions from the user's perspective.</step>
+        <step>Provide a loop summary at the end of the conversation.</step>
+    </operational_process>
+
+    <tools>
+        ${toolDescriptions}
+        <note>If no external tool is required, use 'no_tool', but if there might be relevant external information, use the appropriate tool.</note>
+    </tools>
+
+    <summaries>
+        ${summaries()}
+    </summaries>
+
+    <chat_history>
+        ${chatHistory}
+    </chat_history>
+
+    <example_interaction>
+        <interaction description="Correct use of RAG and website scraping tools">
+            <stage number="1" role="user">
+                <query>Can you provide key moments from the 2022 World Cup and its impact on tourism in Qatar?</query>
+            </stage>
+
+            <stage number="2" role="assistant">
+                <thought>
+                    I will use the RAG tool to retrieve key moments from the user's World Cup documents. Afterward, I will use the website scraper tool to gather tourism impact data on Qatar.
+                </thought>
+                <action>rag</action>
+            </stage>
+
+            <stage number="3" role="user">
+                <action_rules>***Action rules omitted***</action_rules>
+            </stage>
+
+            <stage number="4" role="assistant">
+                <action_input>
+                    <action_input_description>Searching user documents for key moments from the 2022 World Cup.</action_input_description>
+                    <inputs>
+                        <hypothetical_document_chunk>Key moments from the 2022 World Cup. Goals, assists, big wins, big losses.</hypothetical_document_chunk>
+                    </inputs>
+                </action_input>
+            </stage>
+
+            <stage number="5" role="user">
+                <observation>
+                    <chunk chunk_id="1234" chunk_type="text">
+                        The 2022 FIFA World Cup saw Argentina win, with Lionel Messi's performance being a key highlight. It was widely celebrated as a historical moment in sports.
+                    </chunk>
+                </observation>
+            </stage>
+
+            <stage number="6" role="assistant">
+                <thought>
+                    With key moments from the World Cup retrieved, I will now use the website scraper tool to gather data on Qatar’s tourism impact during the World Cup.
+                </thought>
+                <action>websiteInfoScraper</action>
+            </stage>
+
+            <stage number="7" role="user">
+                <action_rules>***Action rules omitted***</action_rules>
+            </stage>
+
+            <stage number="8" role="assistant">
+                <action_input>
+                    <action_input_description>Scraping websites for information about Qatar's tourism impact during the 2022 World Cup.</action_input_description>
+                    <inputs>
+                        <query>Tourism impact of the 2022 World Cup in Qatar</query>
+                    </inputs>
+                </action_input>
+            </stage>
+
+            <stage number="9" role="user">
+                <observation>
+                    <chunk chunk_id="5678" chunk_type="url">
+                        <url>https://www.qatartourism.com/world-cup-impact</url>
+                        <overview>During the 2022 World Cup, Qatar saw a 40% increase in tourism, with over 1.5 million visitors attending.</overview>
+                    </chunk>
+                </observation>
+            </stage>
+
+            <stage number="10" role="assistant">
+                <thought>
+                    Now that I have gathered both key moments from the World Cup and tourism impact data from Qatar, I will summarize the information in my final response.
+                </thought>
+                <answer>
+                    <grounded_text citation_index="1">**The 2022 World Cup** saw Argentina crowned champions, with **Lionel Messi** leading his team to victory, marking a historic moment in sports.</grounded_text>
+                    <grounded_text citation_index="2">**Qatar** experienced a **40% increase in tourism** during the World Cup, welcoming over **1.5 million visitors**, significantly boosting its economy.</grounded_text>
+                    <normal_text>Moments like **Messi’s triumph** often become ingrained in the legacy of World Cups, immortalizing these tournaments in both sports and cultural memory. The **long-term implications** of the World Cup on Qatar's **economy, tourism**, and **global image** remain important areas of interest as the country continues to build on the momentum generated by hosting this prestigious event.</normal_text>
+                    <citations>
+                        <citation index="1" chunk_id="1234" type="text">Key moments from the 2022 World Cup.</citation>
+                        <citation index="2" chunk_id="5678" type="url"></citation>
+                    </citations>
+                    <follow_up_questions>
+                        <question>What long-term effects has the World Cup had on Qatar's economy and infrastructure?</question>
+                        <question>Can you compare Qatar's tourism numbers with previous World Cup hosts?</question>
+                        <question>How has Qatar’s image on the global stage evolved post-World Cup?</question>
+                    </follow_up_questions>
+                    <loop_summary>
+                        The assistant first used the RAG tool to extract key moments from the user documents about the 2022 World Cup. Then, the assistant utilized the website scraping tool to gather data on Qatar's tourism impact. Both tools provided valuable information, and no additional tools were needed. 
+                    </loop_summary>
+                </answer>
+            </stage>
+        </interaction>
+    </example_interaction>
+
+    <final_instruction>
+        Process the user's query according to these rules. Ensure your final answer is comprehensive, well-structured, and includes citations where appropriate.
+    </final_instruction>
+</system_message>`;
+}
+
+export function getSummarizedChunksPrompt(chunks: string): string {
+    return `Please provide a comprehensive summary of what you think the document from which these chunks originated. 
+                    Ensure the summary captures the main ideas and key points from all provided chunks. Be concise and brief and only provide the summary in paragraph form.
+
+                    Text chunks:
+                    \`\`\`
+                    ${chunks}
+                    \`\`\``;
+}
+
+export function getSummarizedSystemPrompt(): string {
+    return 'You are an AI assistant tasked with summarizing a document. You are provided with important chunks from the document and provide a summary, as best you can, of what the document will contain overall. Be concise and brief with your response.';
+}
-- 
cgit v1.2.3-70-g09d2


From ba0520baaa1f84d9fb08d3b2880c68302d28350a Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Sat, 7 Sep 2024 13:07:33 -0400
Subject: added clarifying structural info to ReAct prompt (still ~69% shorter
 than previous prompt) and shortened the RAG prompt

---
 .../views/nodes/chatbot/agentsystem/prompts.ts     |  28 +++++-
 .../nodes/chatbot/chatboxcomponents/ChatBox.tsx    |   7 +-
 src/client/views/nodes/chatbot/tools/RAGTool.ts    | 104 +++++----------------
 3 files changed, 52 insertions(+), 87 deletions(-)

(limited to 'src/client/views/nodes/chatbot/agentsystem/prompts.ts')

diff --git a/src/client/views/nodes/chatbot/agentsystem/prompts.ts b/src/client/views/nodes/chatbot/agentsystem/prompts.ts
index 9daabc35f..7000d8634 100644
--- a/src/client/views/nodes/chatbot/agentsystem/prompts.ts
+++ b/src/client/views/nodes/chatbot/agentsystem/prompts.ts
@@ -26,12 +26,33 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto
         <point>Ensure that **ALL answers follow the answer structure**: grounded text wrapped in <grounded_text> tags with corresponding citations, normal text in <normal_text> tags, and three follow-up questions at the end.</point>
     </critical_points>
 
+    <thought_structure>
+        <thought>
+            <description>
+                Always provide a thought before each action to explain why you are choosing the next step or tool. This helps clarify your reasoning for the action you will take.
+            </description>
+        </thought>
+    </thought_structure>
+
+    <action_input_structure>
+        <action_input>
+            <action_input_description>
+                Always describe what the action will do in the <action_input_description> tag. Be clear about how the tool will process the input and why it is appropriate for this stage.
+            </action_input_description>
+            <inputs>
+                <description>
+                    Provide the actual inputs for the action in the <inputs> tag. Ensure that each input is specific to the tool being used. Inputs should match the expected parameters for the tool (e.g., a search term for the website scraper, document references for RAG).
+                </description>
+            </inputs>
+        </action_input>
+    </action_input_structure>
+
     <answer_structure>
         <answer>
             <grounded_text> - All information derived from tools or user documents must be wrapped in these tags with proper citation.</grounded_text>
-            <normal_text> - Use this tag for text not derived from tools or user documents.</normal_text>
+            <normal_text> - Use this tag for text not derived from tools or user documents. It should only be for narrative-like text or extremely common knowledge information.</normal_text>
             <citations>
-                <citation> - Provide proper citations for each <grounded_text>, referencing the tool or document chunk used.</citation>
+                <citation> - Provide proper citations for each <grounded_text>, referencing the tool or document chunk used. ENSURE THAT THERE IS A CITATION WHOSE INDEX MATCHES FOR EVERY GROUNDED TEXT CITATION INDEX. </citation>
             </citations>
             <follow_up_questions> - Provide exactly three user-perspective follow-up questions.</follow_up_questions>
             <loop_summary> - Summarize the actions and tools used in the conversation.</loop_summary>
@@ -41,7 +62,8 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto
     <grounded_text_guidelines>
         <step>**Wrap ALL tool-based information** in <grounded_text> tags and provide citations.</step>
         <step>Use separate <grounded_text> tags for distinct information or when switching to a different tool or document.</step>
-        <step>Ensure that **EVERY** <grounded_text> tag includes a citation index referencing the source of the information.</step>
+        <step>Ensure that **EVERY** <grounded_text> tag includes a citation index aligned with a citation that you provide that references the source of the information.</step>
+        <step>There should be a one-to-one relationship between <grounded_text> tags and citations.</step>
         <step>Over-citing is discouraged—only cite the information that is directly relevant to the user's query.</step>
     </grounded_text_guidelines>
 
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index 6dc691798..28bfbeae3 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -51,8 +51,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     private vectorstore_id: string;
     private vectorstore: Vectorstore;
     private agent: Agent;
-    private _oldWheel: HTMLDivElement | null = null;
-    private messagesRef: React.RefObject;
+    private messagesRef: React.RefObject<HTMLDivElement>;
 
     /**
      * Static method that returns the layout string for the field.
@@ -246,12 +245,12 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
      * @param event The form submission event.
      */
     @action
-    askGPT = async (event: React.FormEvent): Promise => {
+    askGPT = async (event: React.FormEvent): Promise<any> => {
         event.preventDefault();
         this.inputValue = '';
 
         // Extract the user's message
-        const textInput = event.currentTarget.elements.namedItem('messageInput') as HTMLInputElement;
+        const textInput = (event.currentTarget as HTMLFormElement).elements.namedItem('messageInput') as HTMLInputElement;
         const trimmedText = textInput.value.trim();
 
         if (trimmedText) {
diff --git a/src/client/views/nodes/chatbot/tools/RAGTool.ts b/src/client/views/nodes/chatbot/tools/RAGTool.ts
index c24306dcd..f4b7b42ea 100644
--- a/src/client/views/nodes/chatbot/tools/RAGTool.ts
+++ b/src/client/views/nodes/chatbot/tools/RAGTool.ts
@@ -14,114 +14,58 @@ export class RAGTool extends BaseTool {
             {
                 hypothetical_document_chunk: {
                     type: 'string',
-                    description:
-                        "Detailed version of the prompt that is effectively a hypothetical document chunk that would be ideal to embed and compare to the vectors of real document chunks to fetch the most relevant document chunks to answer the user's query",
+                    description: "A detailed prompt representing an ideal chunk to embed and compare against document vectors to retrieve the most relevant content for answering the user's query.",
                     required: 'true',
                 },
             },
             `
-            Your task is to provide a comprehensive response to the user's prompt based on the given chunks and chat history. Follow these structural guidelines meticulously:
+            When using the RAG tool, the structure must adhere to the format described in the ReAct prompt. Below are additional guidelines specifically for RAG-based responses:
 
-            1. Overall Structure:
-            <answer>
-                [Main content with grounded_text tags interspersed with normal plain text (information that is not derived from chunks' information)]
-                <citations>
-                [Individual citation tags]
-                </citations>
-                <follow_up_questions>
-                [Three question tags]
-                </follow_up_questions>
-            </answer>
-
-            2. Grounded Text Tag Structure:
-            - Basic format: 
-                <grounded_text citation_index="[citation index number(s)]">
-                [Your generated text based on information from a subset of a chunk (a citation's direct text)]
-                </grounded_text>
+            1. **Grounded Text Guidelines**:
+                - Each <grounded_text> tag must correspond to exactly one citation, ensuring a one-to-one relationship.
+                - Always cite a **subset** of the chunk, never the full text. The citation should be as short as possible while providing the relevant information (typically one to two sentences).
+                - Do not paraphrase the chunk text in the citation; use the original subset directly from the chunk.
+                - If multiple citations are needed for different sections of the response, create new <grounded_text> tags for each.
 
-            3. Citation Tag Structure:
-            <citation index="[unique number]" chunk_id="[UUID v4]" type="[text/image/table]">
-                [For text: relevant subset of original chunk]
-                [For image/table: leave empty]
-            </citation>
+            2. **Citation Guidelines**:
+                - The citation must include only the relevant excerpt from the chunk being referenced.
+                - Use unique citation indices and reference the chunk_id for the source of the information.
+                - For text chunks, the citation content must reflect the **exact subset** of the original chunk that is relevant to the grounded_text tag.
 
-            4. Detailed Grounded Text Guidelines:
-                a. Wrap all information derived from chunks in grounded_text tags.
-                b. DO NOT PUT ANYTHING THAT IS NOT DIRECTLY DERIVED FROM INFORMATION FROM CHUNKS (EITHER IMAGE, TABLE, OR TEXT) IN GROUNDED_TEXT TAGS.
-                c. Use a single grounded_text tag for suquential and closely related information that references the same citation. If other citations' information are used sequentially, create new grounded_text tags.
-                d. Ensure every grounded_text tag has up to a few corresponding citations (should not be more than 3 and only 1 is fine). Multiple citation indices should be separated by commas.
-                e. Grounded text can be as short as a few words or as long as several sentences.
-                f. Avoid overlapping or nesting grounded_text tags; instead, use sequential tags.
-
-            5. Detailed Citation Guidelines:
-                a. Create a unique citation for each distinct piece of information from the chunks that is used to support grounded_text.
-                b. ALL TEXT CITATIONS must have direct text in its element content (e.g. <citation ...>DIRECT TEXT HERE</citation>) that is a relevant SUBSET of the original text chunk that is being cited specifically.
-                c. DO NOT paraphrase or summarize the text; use the original text as much as possible.
-                d. DO NOT USE THE FULL TEXT CHUNK as the citation content; only use the relevant subset of the text that the grounded_text is base. AS SHORT AS POSSIBLE WHILE PROVIDING INFORMATION (ONE TO TWO SENTENCES USUALLY)!
-                e. Ensure each citation has a unique index number.
-                f. Specify the correct type: "text", "image", or "table".
-                g. For text chunks, the content of the citation should ALWAYS have the relevant subset of the original text that the grounded_text is based on.
-                h. For image/table chunks, leave the citation content empty.
-                i. One citation can be used for multiple grounded_text tags if they are based on the same chunk information.
-                j. !!!DO NOT OVERCITE - only include citations for information that is directly relevant to the grounded_text.
-
-            6. Structural Integrity Checks:
-                a. Ensure all opening tags have corresponding closing tags.
-                b. Verify that all grounded_text tags have valid citation_index attributes (they should be equal to the associated citation(s) index field—not their chunk_id field).
-                c. Check that all cited indices in grounded_text tags have corresponding citations.
-
-            Example of grounded_text usage:
+            **Example**:
 
             <answer>
-                <grounded_text citation_index="1,2">
-                Artificial Intelligence (AI) is revolutionizing various sectors, with healthcare experiencing significant transformations in areas such as diagnosis and treatment planning.
-                </grounded_text>
-                <grounded_text citation_index="2,3,4">
-                In the field of medical diagnosis, AI has shown remarkable capabilities, particularly in radiology. For instance, AI systems have drastically improved mammogram analysis, achieving 99% accuracy at a rate 30 times faster than human radiologists.
+                <grounded_text citation_index="1">
+                    Artificial Intelligence is revolutionizing various sectors, with healthcare seeing transformations in diagnosis and treatment planning.
                 </grounded_text>
-                <grounded_text citation_index="4">
-                This advancement not only enhances the efficiency of healthcare systems but also significantly reduces the occurrence of false positives, leading to fewer unnecessary biopsies and reduced patient stress.
+                <grounded_text citation_index="2">
+                    Based on recent data, AI has drastically improved mammogram analysis, achieving 99% accuracy at a rate 30 times faster than human radiologists.
                 </grounded_text>
-            
-                <grounded_text citation_index="5,6">
-                Beyond diagnosis, AI is playing a crucial role in drug discovery and development. By analyzing vast amounts of genetic and molecular data, AI algorithms can identify potential drug candidates much faster than traditional methods.
-                </grounded_text>
-                <grounded_text citation_index="6">
-                    This could potentially reduce the time and cost of bringing new medications to market, especially for rare diseases that have historically received less attention due to limited market potential.
-                </grounded_text>
-
-                [... rest of the content ...]
 
                 <citations>
-                    <citation index="1" chunk_id="123e4567-e89b-12d3-a456-426614174000" type="text">Artificial Intelligence is revolutionizing various industries, with healthcare being one of the most profoundly affected sectors.</citation>
-                    <citation index="2" chunk_id="123e4567-e89b-12d3-a456-426614174001" type="text">AI has shown particular promise in the field of radiology, enhancing the accuracy and speed of image analysis.</citation>
-                    <citation index="3" chunk_id="123e4567-e89b-12d3-a456-426614174002" type="text">According to recent studies, AI systems have achieved 99% accuracy in mammogram analysis, performing the task 30 times faster than human radiologists.</citation>
-                    <citation index="4" chunk_id="123e4567-e89b-12d3-a456-426614174003" type="text">The improvement in mammogram accuracy has led to a significant reduction in false positives, decreasing the need for unnecessary biopsies and reducing patient anxiety.</citation>
-                    <citation index="5" chunk_id="123e4567-e89b-12d3-a456-426614174004" type="text">AI is accelerating the drug discovery process by analyzing complex molecular and genetic data to identify potential drug candidates.</citation>
-                    <citation index="6" chunk_id="123e4567-e89b-12d3-a456-426614174005" type="text">The use of AI in drug discovery could significantly reduce the time and cost associated with bringing new medications to market, particularly for rare diseases.</citation>
+                    <citation index="1" chunk_id="abc123" type="text">Artificial Intelligence is revolutionizing various industries, especially in healthcare.</citation>
+                    <citation index="2" chunk_id="abc124" type="table"></citation>
                 </citations>
 
                 <follow_up_questions>
-                    <question>How might AI-driven personalized medicine impact the cost and accessibility of healthcare in the future?</question>
-                    <question>What measures can be taken to ensure that AI systems in healthcare are free from biases and equally effective for diverse populations?</question>
-                    <question>How could the role of healthcare professionals evolve as AI becomes more integrated into medical practices?</question>
+                    <question>How can AI enhance patient outcomes in fields outside radiology?</question>
+                    <question>What are the challenges in implementing AI systems across different hospitals?</question>
+                    <question>How might AI-driven advancements impact healthcare costs?</question>
                 </follow_up_questions>
             </answer>
             `,
 
-            `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a 
-            set of document chunks (either images or text) that can be used to provide a grounded response based on 
-            user documents`
+            `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a set of document chunks (text or images) to provide a grounded response based on user documents.`
         );
     }
 
-    async execute(args: { hypothetical_document_chunk: string }): Promise {
+    async execute(args: { hypothetical_document_chunk: string }): Promise<any> {
         const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk);
         const formatted_chunks = await this.getFormattedChunks(relevantChunks);
         return formatted_chunks;
     }
 
-    async getFormattedChunks(relevantChunks: RAGChunk[]): Promise {
+    async getFormattedChunks(relevantChunks: RAGChunk[]): Promise<any> {
         try {
             const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks });
 
-- 
cgit v1.2.3-70-g09d2


From 00b12111c4007ef6ecff645327007a67f6655d8b Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Thu, 19 Sep 2024 09:53:19 -0400
Subject: added comments and fixed some styling and error handling

---
 .../views/nodes/chatbot/agentsystem/prompts.ts     |  12 +-
 .../nodes/chatbot/chatboxcomponents/ChatBox.scss   |   6 +-
 .../nodes/chatbot/chatboxcomponents/ChatBox.tsx    |  11 +-
 src/server/ApiManagers/AssistantManager.ts         | 224 +++++++++++++++------
 4 files changed, 179 insertions(+), 74 deletions(-)

(limited to 'src/client/views/nodes/chatbot/agentsystem/prompts.ts')

diff --git a/src/client/views/nodes/chatbot/agentsystem/prompts.ts b/src/client/views/nodes/chatbot/agentsystem/prompts.ts
index 7000d8634..01c30d444 100644
--- a/src/client/views/nodes/chatbot/agentsystem/prompts.ts
+++ b/src/client/views/nodes/chatbot/agentsystem/prompts.ts
@@ -22,8 +22,9 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto
         <point>**STRUCTURE**: Always use the correct stage tags (e.g., <stage number="2" role="assistant">) for every response. Use only even-numbered stages for your responses.</point>
         <point>**STOP after every stage and wait for input. Do not combine multiple stages in one response.**</point>
         <point>If a tool is needed, select the most appropriate tool based on the query.</point>
-        <point>**If one tool does not yield satisfactory results or fails twice, try another tool that might work better for the query.**</point>
+        <point>**If one tool does not yield satisfactory results or fails twice, try another tool that might work better for the query.** This often happens with the rag tool, which may not yeild great results. If this happens, try the search tool.</point>
         <point>Ensure that **ALL answers follow the answer structure**: grounded text wrapped in <grounded_text> tags with corresponding citations, normal text in <normal_text> tags, and three follow-up questions at the end.</point>
+        <point>If you use a tool that will do something (i.e. creating a CSV), and want to also use a tool that will provide you with information (i.e. RAG), use the tool that will provide you with information first. Then proceed with the tool that will do something.</point>
     </critical_points>
 
     <thought_structure>
@@ -48,8 +49,9 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto
     </action_input_structure>
 
     <answer_structure>
+        ALL answers must follow this structure and everything must be witin the <answer> tag:
         <answer>
-            <grounded_text> - All information derived from tools or user documents must be wrapped in these tags with proper citation.</grounded_text>
+            <grounded_text> - All information derived from tools or user documents must be wrapped in these tags with proper citation. This should not be word for word, but paraphrased from the text.</grounded_text>
             <normal_text> - Use this tag for text not derived from tools or user documents. It should only be for narrative-like text or extremely common knowledge information.</normal_text>
             <citations>
                 <citation> - Provide proper citations for each <grounded_text>, referencing the tool or document chunk used. ENSURE THAT THERE IS A CITATION WHOSE INDEX MATCHES FOR EVERY GROUNDED TEXT CITATION INDEX. </citation>
@@ -65,6 +67,10 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto
         <step>Ensure that **EVERY** <grounded_text> tag includes a citation index aligned with a citation that you provide that references the source of the information.</step>
         <step>There should be a one-to-one relationship between <grounded_text> tags and citations.</step>
         <step>Over-citing is discouraged—only cite the information that is directly relevant to the user's query.</step>
+        <step>Paraphrase the information in the <grounded_text> tags, but ensure that the meaning is preserved.</step>
+        <step>Do not include the full text of the chunk in the citation—only the relevant excerpt.</step>
+        <step>For text chunks, the citation content must reflect the exact subset of the original chunk that is relevant to the grounded_text tag.</step>
+        <step>Do not use citations from previous interactions. Only use citations from the current action loop.</step>
     </grounded_text_guidelines>
 
     <normal_text_guidelines>
@@ -130,7 +136,7 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto
 
             <stage number="6" role="assistant">
                 <thought>
-                    With key moments from the World Cup retrieved, I will now use the website scraper tool to gather data on Qatar’s tourism impact during the World Cup.
+                    With key moments from the World Cup retrieved, I will now use the website scraper tool to gather data on Qatar's tourism impact during the World Cup.
                 </thought>
                 <action>websiteInfoScraper</action>
             </stage>
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss
index 42f6a0d61..50111f678 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.scss
@@ -93,6 +93,7 @@ $transition: all 0.3s ease;
             display: flex;
             align-items: center;
             justify-content: center;
+            position: relative;
 
             &:hover {
                 background-color: darken($primary-color, 10%);
@@ -109,10 +110,7 @@ $transition: all 0.3s ease;
                 border: 3px solid rgba(255, 255, 255, 0.3);
                 border-top: 3px solid #fff;
                 border-radius: 50%;
-                animation: spin 1s linear infinite;
-                display: flex;
-                align-items: center;
-                justify-content: center;
+                animation: spin 2s linear infinite;
             }
         }
     }
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index 28bfbeae3..d4deff78b 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -23,6 +23,8 @@ import { ClientUtils } from '../../../../../ClientUtils';
 import { ProgressBar } from './ProgressBar';
 import { DocumentView } from '../../DocumentView';
 import { Networking } from '../../../../Network';
+import { PDFViewer } from '../../../pdf/PDFViewer';
+import { PDFBox } from '../../PDFBox';
 
 dotenv.config();
 
@@ -225,9 +227,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
      * Scrolls the chat messages container to the bottom, ensuring the latest message is visible.
      */
     scrollToBottom = () => {
-        if (this.messagesRef.current) {
-            this.messagesRef.current.scrollTop = this.messagesRef.current.scrollHeight;
-        }
+        // if (this.messagesRef.current) {
+        //     this.messagesRef.current.scrollTop = this.messagesRef.current.scrollHeight;
+        // }
     };
 
     /**
@@ -445,7 +447,8 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
 
                             DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
                                 const firstView = Array.from(doc[DocViews])[0] as DocumentView;
-                                firstView.ComponentView?.search?.(citation.direct_text ?? '');
+                                (firstView.ComponentView as PDFBox)?.gotoPage?.(foundChunk.startPage);
+                                (firstView.ComponentView as PDFBox)?.search?.(citation.direct_text ?? '');
                             });
                             break;
                         case CHUNK_TYPE.URL:
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index 88a81c5de..063ba193a 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -19,6 +19,7 @@ import * as puppeteer from 'puppeteer';
 import { JSDOM } from 'jsdom';
 import { Readability } from '@mozilla/readability';
 
+// Enumeration of directories where different file types are stored
 export enum Directory {
     parsed_files = 'parsed_files',
     images = 'images',
@@ -32,41 +33,76 @@ export enum Directory {
     scrape_images = 'scrape_images',
 }
 
+/**
+ * Constructs a normalized path to a file in the server's file system.
+ * @param directory The directory where the file is stored.
+ * @param filename The name of the file.
+ * @returns The full normalized path to the file.
+ */
 export function serverPathToFile(directory: Directory, filename: string) {
     return path.normalize(`${filesDirectory}/${directory}/${filename}`);
 }
 
+/**
+ * Constructs a normalized path to a directory in the server's file system.
+ * @param directory The directory to access.
+ * @returns The full normalized path to the directory.
+ */
 export function pathToDirectory(directory: Directory) {
     return path.normalize(`${filesDirectory}/${directory}`);
 }
 
+/**
+ * Constructs the client-accessible URL for a file.
+ * @param directory The directory where the file is stored.
+ * @param filename The name of the file.
+ * @returns The URL path to the file.
+ */
 export function clientPathToFile(directory: Directory, filename: string) {
     return `/files/${directory}/${filename}`;
 }
 
+// Promisified versions of filesystem functions
 const writeFileAsync = promisify(writeFile);
 const readFileAsync = promisify(fs.readFile);
 
+/**
+ * Class responsible for handling various API routes related to the Assistant functionality.
+ * This class extends `ApiManager` and handles registration of routes and secure request handlers.
+ */
 export default class AssistantManager extends ApiManager {
+    /**
+     * Registers all API routes and initializes necessary services like OpenAI, Scrapfly, and UnstructuredClient.
+     * @param register The registration method to register routes and handlers.
+     */
     protected initialize(register: Registration): void {
+        // Initialize OpenAI API with client key
         const openai = new OpenAI({
-            apiKey: process.env._CLIENT_OPENAI_KEY, // Use client key so don't have to set key seperately for client and server.
+            apiKey: process.env._CLIENT_OPENAI_KEY,
             dangerouslyAllowBrowser: true,
         });
+
+        // Initialize UnstructuredClient for document processing
         const unstructuredClient = new UnstructuredClient({
             security: {
                 apiKeyAuth: process.env._CLIENT_UNSTRUCTURED_API_KEY!,
             },
         });
+
+        // Initialize ScrapflyClient for scraping purposes
         const scrapflyClient = new ScrapflyClient({ key: process.env._CLIENT_SCRAPFLY_API_KEY! });
+
+        // Initialize Google Custom Search API
         const customsearch = google.customsearch('v1');
 
+        // Register Wikipedia summary API route
         register({
             method: Method.POST,
             subscription: '/getWikipediaSummary',
             secureHandler: async ({ req, res }) => {
                 const { title } = req.body;
                 try {
+                    // Fetch summary from Wikipedia using axios
                     const response = await axios.get('https://en.wikipedia.org/w/api.php', {
                         params: {
                             action: 'query',
@@ -75,25 +111,26 @@ export default class AssistantManager extends ApiManager {
                             format: 'json',
                         },
                     });
-                    const summary = response.data.query.search[0].snippet;
-                    if (!summary || summary.length === 0 || summary === '' || summary === ' ') {
-                        res.send({ text: 'No article found with that title.' });
-                    } else {
-                        res.send({ text: summary });
-                    }
+                    const summary = response.data.query.search[0]?.snippet || 'No article found with that title.';
+                    res.send({ text: summary });
                 } catch (error: any) {
-                    console.error('Error retrieving article summary from Wikipedia:', error);
-                    res.status(500).send({ error: 'Error retrieving article summary from Wikipedia.', details: error.message });
+                    console.error('Error retrieving Wikipedia summary:', error);
+                    res.status(500).send({
+                        error: 'Error retrieving article summary from Wikipedia.',
+                        details: error.message,
+                    });
                 }
             },
         });
 
+        // Register Google Web Search Results API route
         register({
             method: Method.POST,
             subscription: '/getWebSearchResults',
             secureHandler: async ({ req, res }) => {
                 const { query, max_results } = req.body;
                 try {
+                    // Fetch search results using Google Custom Search API
                     const response = await customsearch.cse.list({
                         q: query,
                         cx: process.env._CLIENT_GOOGLE_SEARCH_ENGINE_ID,
@@ -111,25 +148,40 @@ export default class AssistantManager extends ApiManager {
                     res.send({ results });
                 } catch (error: any) {
                     console.error('Error performing web search:', error);
-                    res.status(500).send({ error: 'Failed to perform web search', details: error.message });
+                    res.status(500).send({
+                        error: 'Failed to perform web search',
+                        details: error.message,
+                    });
                 }
             },
         });
 
+        // Axios instance with custom headers for scraping
         const axiosInstance = axios.create({
             headers: {
                 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
             },
         });
 
+        /**
+         * Utility function to introduce delay (used for retries).
+         * @param ms Delay in milliseconds.
+         */
         const delay = (ms: number) => new Promise(resolve => setTimeout(resolve, ms));
 
+        /**
+         * Function to fetch a URL with retry logic, handling rate limits.
+         * Retries a request if it fails due to rate limits (HTTP status 429).
+         * @param url The URL to fetch.
+         * @param retries The number of retry attempts.
+         * @param backoff Initial backoff time in milliseconds.
+         */
         const fetchWithRetry = async (url: string, retries = 3, backoff = 300) => {
             try {
                 const response = await axiosInstance.get(url);
                 return response.data;
             } catch (error: any) {
-                if (retries > 0 && error.response && error.response.status === 429) {
+                if (retries > 0 && error.response?.status === 429) {
                     console.log(`Rate limited. Retrying in ${backoff}ms...`);
                     await delay(backoff);
                     return fetchWithRetry(url, retries - 1, backoff * 2);
@@ -138,6 +190,7 @@ export default class AssistantManager extends ApiManager {
             }
         };
 
+        // Register a proxy fetch API route
         register({
             method: Method.POST,
             subscription: '/proxyFetch',
@@ -154,18 +207,22 @@ export default class AssistantManager extends ApiManager {
                     res.send({ data });
                 } catch (error: any) {
                     console.error('Error fetching the URL:', error);
-                    res.status(500).send({ error: 'Failed to fetch the URL', details: error.message });
+                    res.status(500).send({
+                        error: 'Failed to fetch the URL',
+                        details: error.message,
+                    });
                 }
             },
         });
 
+        // Register an API route to scrape website content using Puppeteer and JSDOM
         register({
             method: Method.POST,
             subscription: '/scrapeWebsite',
             secureHandler: async ({ req, res }) => {
                 const { url } = req.body;
                 try {
-                    // Launch Puppeteer to navigate to the webpage
+                    // Launch Puppeteer browser to navigate to the webpage
                     const browser = await puppeteer.launch({
                         args: ['--no-sandbox', '--disable-setuid-sandbox'],
                     });
@@ -173,45 +230,47 @@ export default class AssistantManager extends ApiManager {
                     await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36');
                     await page.goto(url, { waitUntil: 'networkidle2' });
 
-                    // Get the HTML content of the page
+                    // Extract HTML content
                     const htmlContent = await page.content();
                     await browser.close();
 
-                    // Use JSDOM to parse the HTML content
+                    // Parse HTML content using JSDOM
                     const dom = new JSDOM(htmlContent, { url });
 
-                    // Use Readability to extract the readable content
+                    // Extract readable content using Mozilla's Readability API
                     const reader = new Readability(dom.window.document);
                     const article = reader.parse();
 
                     if (article) {
-                        // Extract the plain text from the article content
                         const plainText = article.textContent;
-
-                        // Return the plain text content
                         res.send({ website_plain_text: plainText });
                     } else {
                         res.status(500).send({ error: 'Failed to extract readable content' });
                     }
                 } catch (error: any) {
                     console.error('Error scraping website:', error);
-                    res.status(500).send({ error: 'Failed to scrape website', details: error.message });
+                    res.status(500).send({
+                        error: 'Failed to scrape website',
+                        details: error.message,
+                    });
                 }
             },
         });
 
+        // Register an API route to create documents by sending files to a chatbot
         register({
             method: Method.POST,
             subscription: '/createDocument',
             secureHandler: async ({ req, res }) => {
                 const { file_path } = req.body;
-                const public_path = path.join(publicDirectory, file_path);
-                const file_name = path.basename(file_path);
+                const public_path = path.join(publicDirectory, file_path); // Resolve the file path in the public directory
+                const file_name = path.basename(file_path); // Extract the file name from the path
 
                 try {
-                    // Read file data and convert to base64
+                    // Read the file data and encode it as base64
                     const file_data: string = fs.readFileSync(public_path, { encoding: 'base64' });
 
+                    // Send the file data to a local chatbot API for document creation
                     const response = await axios.post(
                         'http://localhost:8080/createDocument',
                         {
@@ -225,65 +284,79 @@ export default class AssistantManager extends ApiManager {
                         }
                     );
 
+                    // Retrieve the job ID from the response
                     const jobId = response.data['job_id'];
                     console.log('Job ID:', jobId);
 
+                    // Send the job ID back to the client
                     res.send({ jobId });
                 } catch (error: any) {
                     console.error('Error communicating with chatbot:', error);
-                    res.status(500).send({ error: 'Failed to communicate with the chatbot', details: error.message });
+                    res.status(500).send({
+                        error: 'Failed to communicate with the chatbot',
+                        details: error.message,
+                    });
                 }
             },
         });
 
+        // Register an API route to check the progress of a document creation job
         register({
             method: Method.GET,
             subscription: '/getProgress/:jobId',
             secureHandler: async ({ req, res }) => {
-                const { jobId } = req.params;
+                const { jobId } = req.params; // Get the job ID from the URL parameters
                 try {
+                    // Query the local API to get the progress of the job
                     const progressResponse = await axios.get(`http://localhost:8080/getProgress/${jobId}`);
                     console.log(`Current step: ${progressResponse.data.step}, Progress within step: ${progressResponse.data.progress}%`);
-                    res.json(progressResponse.data);
+                    res.json(progressResponse.data); // Send the progress data back to the client
                 } catch (error) {
                     console.error('Error getting progress:', error);
-                    res.status(500).send({ error: 'Failed to get progress', details: JSON.parse(error as string).message });
+                    res.status(500).send({
+                        error: 'Failed to get progress',
+                        details: error,
+                    });
                 }
             },
         });
 
+        // Register an API route to get the final result of a document creation job
         register({
             method: Method.GET,
             subscription: '/getResult/:jobId',
             secureHandler: async ({ req, res }) => {
-                const { jobId } = req.params;
+                const { jobId } = req.params; // Get the job ID from the URL parameters
                 try {
+                    // Query the local API to get the final result of the job
                     const finalResponse = await axios.get(`http://localhost:8080/getResult/${jobId}`);
                     console.log('Result:', finalResponse.data);
                     const result = finalResponse.data;
 
+                    // If the result contains image or table chunks, save the base64 data as image files
                     if (result.chunks && Array.isArray(result.chunks)) {
                         for (const chunk of result.chunks) {
                             if (chunk.metadata && (chunk.metadata.type === 'image' || chunk.metadata.type === 'table')) {
                                 let files_directory = '/files/chunk_images/';
                                 const directory = path.join(publicDirectory, files_directory);
 
+                                // Ensure the directory exists or create it
                                 if (!fs.existsSync(directory)) {
                                     fs.mkdirSync(directory);
                                 }
 
-                                const fileName = path.basename(chunk.metadata.file_path);
-                                const filePath = path.join(directory, fileName);
+                                const fileName = path.basename(chunk.metadata.file_path); // Get the file name from the path
+                                const filePath = path.join(directory, fileName); // Create the full file path
 
-                                // Check if base64_data exists
+                                // Check if the chunk contains base64 encoded data
                                 if (chunk.metadata.base64_data) {
-                                    // Decode Base64 and save as file
+                                    // Decode the base64 data and write it to a file
                                     const buffer = Buffer.from(chunk.metadata.base64_data, 'base64');
                                     await fs.promises.writeFile(filePath, buffer);
 
-                                    // Update the file path in the chunk
+                                    // Update the file path in the chunk's metadata
                                     chunk.metadata.file_path = path.join(files_directory, fileName);
-                                    chunk.metadata.base64_data = undefined;
+                                    chunk.metadata.base64_data = undefined; // Remove the base64 data from the metadata
                                 } else {
                                     console.warn(`No base64_data found for chunk: ${fileName}`);
                                 }
@@ -294,32 +367,42 @@ export default class AssistantManager extends ApiManager {
                         console.warn('Not ready');
                         result.status = 'pending';
                     }
-                    res.json(result);
+                    res.json(result); // Send the result back to the client
                 } catch (error) {
-                    console.error('Error getting progress:', error);
-                    res.status(500).send({ error: 'Failed to get progress', details: error });
+                    console.error('Error getting result:', error);
+                    res.status(500).send({
+                        error: 'Failed to get result',
+                        details: error,
+                    });
                 }
             },
         });
 
+        // Register an API route to format chunks (e.g., text or image chunks) for display
         register({
             method: Method.POST,
             subscription: '/formatChunks',
             secureHandler: async ({ req, res }) => {
-                const { relevantChunks } = req.body;
+                const { relevantChunks } = req.body; // Get the relevant chunks from the request body
+
+                // Initialize an array to hold the formatted content
                 const content: { type: string; text?: string; image_url?: { url: string } }[] = [{ type: 'text', text: '<chunks>' }];
 
                 for (const chunk of relevantChunks) {
+                    // Format each chunk by adding its metadata and content
                     content.push({
                         type: 'text',
-                        text: `<chunk chunk_id=${chunk.id} chunk_type=${chunk.metadata.type}>`,
+                        text: `<chunk chunk_id=${chunk.id} chunk_type="${chunk.metadata.type}">`,
                     });
 
+                    // If the chunk is an image or table, read the corresponding file and encode it as base64
                     if (chunk.metadata.type === 'image' || chunk.metadata.type === 'table') {
                         try {
-                            const filePath = serverPathToFile(Directory.chunk_images, chunk.metadata.file_path);
-                            const imageBuffer = await readFileAsync(filePath);
-                            const base64Image = imageBuffer.toString('base64');
+                            const filePath = serverPathToFile(Directory.chunk_images, chunk.metadata.file_path); // Get the file path
+                            const imageBuffer = await readFileAsync(filePath); // Read the image file
+                            const base64Image = imageBuffer.toString('base64'); // Convert the image to base64
+
+                            // Add the base64-encoded image to the content array
                             if (base64Image) {
                                 content.push({
                                     type: 'image_url',
@@ -334,33 +417,35 @@ export default class AssistantManager extends ApiManager {
                             console.error(`Error reading image file for chunk ${chunk.id}:`, error);
                         }
                     }
+
+                    // Add the chunk's text content to the formatted content
                     content.push({ type: 'text', text: `${chunk.metadata.text}\n</chunk>\n` });
                 }
 
                 content.push({ type: 'text', text: '</chunks>' });
 
+                // Send the formatted content back to the client
                 res.send({ formattedChunks: content });
             },
         });
 
+        // Register an API route to create and save a CSV file on the server
         register({
             method: Method.POST,
             subscription: '/createCSV',
             secureHandler: async ({ req, res }) => {
                 const { filename, data } = req.body;
 
-                // Validate input
+                // Validate that both the filename and data are provided
                 if (!filename || !data) {
                     res.status(400).send({ error: 'Filename and data fields are required.' });
                     return;
                 }
 
                 try {
-                    // Generate a UUID for the file
+                    // Generate a UUID for the file to ensure unique naming
                     const uuidv4 = uuid.v4();
-
-                    // Construct the full filename with the UUID prefix
-                    const fullFilename = `${uuidv4}-${filename}`;
+                    const fullFilename = `${uuidv4}-${filename}`; // Prefix the file name with the UUID
 
                     // Get the full server path where the file will be saved
                     const serverFilePath = serverPathToFile(Directory.csv, fullFilename);
@@ -368,60 +453,73 @@ export default class AssistantManager extends ApiManager {
                     // Write the CSV data (which is a raw string) to the file
                     await writeFileAsync(serverFilePath, data, 'utf8');
 
-                    // Construct the full client URL for accessing the file
+                    // Construct the client-accessible URL for the file
                     const fileUrl = clientPathToFile(Directory.csv, fullFilename);
 
-                    // Return the file URL and UUID to the client
+                    // Send the file URL and UUID back to the client
                     res.send({ fileUrl, id: uuidv4 });
                 } catch (error: any) {
                     console.error('Error creating CSV file:', error);
-                    res.status(500).send({ error: 'Failed to create CSV file.', details: error.message });
+                    res.status(500).send({
+                        error: 'Failed to create CSV file.',
+                        details: error.message,
+                    });
                 }
             },
         });
 
+        // Register an API route to chunk a document using the UnstructuredClient
         register({
             method: Method.POST,
             subscription: '/chunkDocument',
             secureHandler: async ({ req, res }) => {
-                const { file_path } = req.body;
-                const public_path = path.join(publicDirectory, file_path);
-                const file_name = path.basename(file_path);
+                const { file_path } = req.body; // Get the file path from the request body
+                const public_path = path.join(publicDirectory, file_path); // Resolve the full path in the public directory
+                const file_name = path.basename(file_path); // Extract the file name from the path
 
                 try {
-                    // Read file data and convert to base64
+                    // Read the file content as a Buffer
                     const file_data = await fs.promises.readFile(public_path);
 
                     try {
+                        // Use UnstructuredClient to partition the document into chunks
                         const result = await unstructuredClient.general.partition({
                             partitionParameters: {
                                 files: {
                                     content: file_data,
                                     fileName: file_name,
                                 },
-                                strategy: Strategy.Auto,
-                                chunkingStrategy: ChunkingStrategy.ByTitle,
-                                extractImageBlockTypes: ['Image', 'Table'],
+                                strategy: Strategy.Auto, // Automatically determine the chunking strategy
+                                chunkingStrategy: ChunkingStrategy.ByTitle, // Chunk by title
+                                extractImageBlockTypes: ['Image', 'Table'], // Extract images and tables
                             },
                         });
 
                         if (result.statusCode === 200) {
                             console.log(result.elements);
                             const jsonElements = JSON.stringify(result.elements, null, 2);
-                            // Print the processed data.
-                            console.log(jsonElements);
-                            res.send({ document_json: jsonElements });
+                            console.log(jsonElements); // Log the JSON result of the partitioned elements
+                            res.send({ document_json: jsonElements }); // Send the partitioned data as a JSON response
                         } else {
                             console.error(`Unexpected status code: ${result.statusCode}`);
-                            res.status(result.statusCode).send({ error: 'Failed to process the document', details: result });
+                            res.status(result.statusCode).send({
+                                error: 'Failed to process the document',
+                                details: result,
+                            });
                         }
                     } catch (e: any) {
                         console.error('Error during partitioning:', e);
-                        res.status(500).send({ error: 'Failed to partition the document', details: e.message });
+                        res.status(500).send({
+                            error: 'Failed to partition the document',
+                            details: e.message,
+                        });
                     }
                 } catch (error: any) {
                     console.error('Error reading file:', error);
-                    res.status(500).send({ error: 'Failed to read the file', details: error.message });
+                    res.status(500).send({
+                        error: 'Failed to read the file',
+                        details: error.message,
+                    });
                 }
             },
         });
-- 
cgit v1.2.3-70-g09d2


From d347fc59feefd91a796012892da57511787bb6d0 Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Thu, 10 Oct 2024 11:39:19 -0400
Subject: added new file header comments and fixed some error handling

---
 .../views/nodes/chatbot/agentsystem/prompts.ts     |  9 ++++++++-
 .../nodes/chatbot/chatboxcomponents/ChatBox.tsx    |  9 +++++++++
 .../chatbot/chatboxcomponents/MessageComponent.tsx |  9 +++++++++
 .../chatbot/chatboxcomponents/ProgressBar.tsx      |  8 ++++++++
 .../nodes/chatbot/response_parsers/AnswerParser.ts |  8 ++++++++
 .../response_parsers/StreamedAnswerParser.ts       |  8 ++++++++
 src/client/views/nodes/chatbot/tools/BaseTool.ts   |  8 ++++++++
 .../views/nodes/chatbot/vectorstore/Vectorstore.ts | 10 +++++++++-
 src/server/ApiManagers/AssistantManager.ts         | 22 ++++++++++------------
 9 files changed, 77 insertions(+), 14 deletions(-)

(limited to 'src/client/views/nodes/chatbot/agentsystem/prompts.ts')

diff --git a/src/client/views/nodes/chatbot/agentsystem/prompts.ts b/src/client/views/nodes/chatbot/agentsystem/prompts.ts
index 01c30d444..f5aec3130 100644
--- a/src/client/views/nodes/chatbot/agentsystem/prompts.ts
+++ b/src/client/views/nodes/chatbot/agentsystem/prompts.ts
@@ -1,4 +1,11 @@
-// prompts.ts
+/**
+ * @file prompts.ts
+ * @description This file contains functions that generate prompts for various AI tasks, including
+ * generating system messages for structured AI assistant interactions and summarizing document chunks.
+ * It defines prompt structures to ensure the AI follows specific guidelines for response formatting,
+ * tool usage, and citation rules, with a rigid structure in mind for tasks such as answering user queries
+ * and summarizing content from provided text chunks.
+ */
 
 import { Tool } from '../types/types';
 
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index 613cb7078..44c231c87 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -1,3 +1,12 @@
+/**
+ * @file ChatBox.tsx
+ * @description This file defines the ChatBox component, which manages user interactions with
+ * an AI assistant. It handles document uploads, chat history, message input, and integration
+ * with the OpenAI API. The ChatBox is MobX-observable and tracks the progress of tasks such as
+ * document analysis and AI-driven summaries. It also maintains real-time chat functionality
+ * with support for follow-up questions and citation management.
+ */
+
 import dotenv from 'dotenv';
 import { ObservableSet, action, computed, makeObservable, observable, observe, reaction, runInAction } from 'mobx';
 import { observer } from 'mobx-react';
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/MessageComponent.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/MessageComponent.tsx
index 801becb64..d48f46963 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/MessageComponent.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/MessageComponent.tsx
@@ -1,3 +1,12 @@
+/**
+ * @file MessageComponentBox.tsx
+ * @description This file defines the MessageComponentBox component, which renders the content
+ * of an AssistantMessage. It supports rendering various message types such as grounded text,
+ * normal text, and follow-up questions. The component uses React and MobX for state management
+ * and includes functionality for handling citation and follow-up actions, as well as displaying
+ * agent processing information.
+ */
+
 import React, { useState } from 'react';
 import { observer } from 'mobx-react';
 import { AssistantMessage, Citation, MessageContent, PROCESSING_TYPE, ProcessingInfo, TEXT_TYPE } from '../types/types';
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.tsx
index b9fd08742..240862f8b 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ProgressBar.tsx
@@ -1,3 +1,11 @@
+/**
+ * @file ProgressBar.tsx
+ * @description This file defines the ProgressBar component, which displays a loading spinner
+ * to indicate progress during ongoing tasks or processing. The animation consists of two
+ * bouncing elements that create a pulsating effect, providing a visual cue for active progress.
+ * The component is styled using the accompanying `ProgressBar.scss` for smooth animation.
+ */
+
 import React from 'react';
 import './ProgressBar.scss';
 
diff --git a/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts b/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts
index 1ac753790..ed78cc7cb 100644
--- a/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts
+++ b/src/client/views/nodes/chatbot/response_parsers/AnswerParser.ts
@@ -1,3 +1,11 @@
+/**
+ * @file AnswerParser.ts
+ * @description This file defines the AnswerParser class, which processes structured XML-like responses
+ * from the AI system, parsing grounded text, normal text, citations, follow-up questions, and loop summaries.
+ * The parser converts the XML response into an AssistantMessage format, extracting key information like
+ * citations and processing steps for further use in the assistant's workflow.
+ */
+
 import { v4 as uuid } from 'uuid';
 import { ASSISTANT_ROLE, AssistantMessage, Citation, ProcessingInfo, TEXT_TYPE, getChunkType } from '../types/types';
 
diff --git a/src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts b/src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts
index 4149f3da9..dbd568faa 100644
--- a/src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts
+++ b/src/client/views/nodes/chatbot/response_parsers/StreamedAnswerParser.ts
@@ -1,3 +1,11 @@
+/**
+ * @file StreamedAnswerParser.ts
+ * @description This file defines the StreamedAnswerParser class, which parses incoming character streams
+ * to extract grounded or normal text based on the tags found in the input stream. It maintains state
+ * between grounded text and normal text sections, handling buffered input and ensuring proper text formatting
+ * for AI assistant responses.
+ */
+
 enum ParserState {
     Outside,
     InGroundedText,
diff --git a/src/client/views/nodes/chatbot/tools/BaseTool.ts b/src/client/views/nodes/chatbot/tools/BaseTool.ts
index 10780617b..a77f567a5 100644
--- a/src/client/views/nodes/chatbot/tools/BaseTool.ts
+++ b/src/client/views/nodes/chatbot/tools/BaseTool.ts
@@ -1,3 +1,11 @@
+/**
+ * @file BaseTool.ts
+ * @description This file defines the abstract BaseTool class, which serves as a blueprint
+ * for tool implementations in the AI assistant system. Each tool has a name, description,
+ * parameters, and citation rules. The BaseTool class provides a structure for executing actions
+ * and retrieving action rules for use within the assistant's workflow.
+ */
+
 import { Tool } from '../types/types';
 
 export abstract class BaseTool<T extends Record<string, unknown> = Record<string, unknown>> implements Tool<T> {
diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
index 9575277f7..f96f55997 100644
--- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
@@ -1,3 +1,10 @@
+/**
+ * @file Vectorstore.ts
+ * @description This file defines the Vectorstore class, which integrates with Pinecone for vector-based document indexing and Cohere for text embeddings.
+ * It handles tasks such as AI document management, document chunking, and retrieval of relevant document sections based on user queries.
+ * The class supports adding documents to the vectorstore, managing document status, and querying Pinecone for document chunks matching a query.
+ */
+
 import { Index, IndexList, Pinecone, PineconeRecord, QueryResponse, RecordMetadata } from '@pinecone-database/pinecone';
 import { CohereClient } from 'cohere-ai';
 import { EmbedResponse } from 'cohere-ai/api';
@@ -128,7 +135,8 @@ export class Vectorstore {
                     }
                 }
                 if (!result) {
-                    throw new Error('no result received...'); // bcz: is this an Error?
+                    console.error('Error processing document.');
+                    return;
                 }
 
                 // Once completed, process the document and add it to the vectorstore.
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index b4ebb1eae..b7d4191ca 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -1,3 +1,13 @@
+/**
+ * @file AssistantManager.ts
+ * @description This file defines the AssistantManager class, responsible for managing various
+ * API routes related to the Assistant functionality. It provides features such as file handling,
+ * web scraping, and integration with third-party APIs like OpenAI and Google Custom Search.
+ * It also handles job tracking and progress reporting for tasks like document creation and web scraping.
+ * Utility functions for path manipulation and file operations are included, along with
+ * a mechanism for handling retry logic during API calls.
+ */
+
 import { Readability } from '@mozilla/readability';
 import axios from 'axios';
 import { spawn } from 'child_process';
@@ -76,12 +86,6 @@ export default class AssistantManager extends ApiManager {
      * @param register The registration method to register routes and handlers.
      */
     protected initialize(register: Registration): void {
-        // Initialize OpenAI API with client key
-        const openai = new OpenAI({ // bcz: is this needed?  variable is never used...
-            apiKey: process.env._CLIENT_OPENAI_KEY,
-            dangerouslyAllowBrowser: true,
-        }); // prettier-ignore
-
         // Initialize Google Custom Search API
         const customsearch = google.customsearch('v1');
 
@@ -107,7 +111,6 @@ export default class AssistantManager extends ApiManager {
                     console.error('Error retrieving Wikipedia summary:', error);
                     res.status(500).send({
                         error: 'Error retrieving article summary from Wikipedia.',
-                        details: (error as { message: string }).message ?? error, // bcz: don't know what the error type contains...
                     });
                 }
             },
@@ -140,7 +143,6 @@ export default class AssistantManager extends ApiManager {
                     console.error('Error performing web search:', error);
                     res.status(500).send({
                         error: 'Failed to perform web search',
-                        details: (error as { message: string }).message ?? error, // bcz: don't know wha tthe error type contains...
                     });
                 }
             },
@@ -199,7 +201,6 @@ export default class AssistantManager extends ApiManager {
                     console.error('Error fetching the URL:', error);
                     res.status(500).send({
                         error: 'Failed to fetch the URL',
-                        details: (error as { message: string }).message ?? error, // bcz: don't know wha tthe error type contains...
                     });
                 }
             },
@@ -241,7 +242,6 @@ export default class AssistantManager extends ApiManager {
                     console.error('Error scraping website:', error);
                     res.status(500).send({
                         error: 'Failed to scrape website',
-                        details: (error as { message: string }).message ?? error, // bcz: don't know wha tthe error type contains...
                     });
                 }
             },
@@ -272,7 +272,6 @@ export default class AssistantManager extends ApiManager {
                     console.error('Error initiating document creation:', error);
                     res.status(500).send({
                         error: 'Failed to initiate document creation',
-                        details: (error as { message: string }).message ?? error, // bcz: don't know wha tthe error type contains...
                     });
                 }
             },
@@ -430,7 +429,6 @@ export default class AssistantManager extends ApiManager {
                     console.error('Error creating CSV file:', error);
                     res.status(500).send({
                         error: 'Failed to create CSV file.',
-                        details: (error as { message: string }).message ?? error, // bcz: don't know what the error type contains...
                     });
                 }
             },
-- 
cgit v1.2.3-70-g09d2