Merge branch 'master' into nathan-starter

author: bobzel <zzzman@gmail.com> 2024-10-10 18:58:39 -0400
committer: bobzel <zzzman@gmail.com> 2024-10-10 18:58:39 -0400
commit: 5752dff8ff7b1b2858542feec0b1bb037461bf1a (patch)
tree: 04080d4a596b0e5199b5ec95ab625fbb590f2a75 /src/client/views/nodes/chatbot/tools/RAGTool.ts
parent: 36735ff00a55ae587af5f69eef495533a1f35393 (diff)
parent: d347fc59feefd91a796012892da57511787bb6d0 (diff)
1 files changed, 79 insertions, 0 deletions
diff --git a/src/client/views/nodes/chatbot/tools/RAGTool.ts b/src/client/views/nodes/chatbot/tools/RAGTool.ts
new file mode 100644
index 000000000..4cc2f26ff
--- /dev/null
+++ b/src/client/views/nodes/chatbot/tools/RAGTool.ts
@@ -0,0 +1,79 @@
+import { Networking } from '../../../../Network';
+import { RAGChunk } from '../types/types';
+import { Vectorstore } from '../vectorstore/Vectorstore';
+import { BaseTool } from './BaseTool';
+
+export class RAGTool extends BaseTool {
+    constructor(private vectorstore: Vectorstore) {
+        super(
+            'rag',
+            'Perform a RAG search on user documents',
+            {
+                hypothetical_document_chunk: {
+                    type: 'string',
+                    description: "A detailed prompt representing an ideal chunk to embed and compare against document vectors to retrieve the most relevant content for answering the user's query.",
+                    required: 'true',
+                },
+            },
+            `
+            When using the RAG tool, the structure must adhere to the format described in the ReAct prompt. Below are additional guidelines specifically for RAG-based responses:
+
+            1. **Grounded Text Guidelines**:
+                - Each <grounded_text> tag must correspond to exactly one citation, ensuring a one-to-one relationship.
+                - Always cite a **subset** of the chunk, never the full text. The citation should be as short as possible while providing the relevant information (typically one to two sentences).
+                - Do not paraphrase the chunk text in the citation; use the original subset directly from the chunk.
+                - If multiple citations are needed for different sections of the response, create new <grounded_text> tags for each.
+
+            2. **Citation Guidelines**:
+                - The citation must include only the relevant excerpt from the chunk being referenced.
+                - Use unique citation indices and reference the chunk_id for the source of the information.
+                - For text chunks, the citation content must reflect the **exact subset** of the original chunk that is relevant to the grounded_text tag.
+
+            **Example**:
+
+            <answer>
+                <grounded_text citation_index="1">
+                    Artificial Intelligence is revolutionizing various sectors, with healthcare seeing transformations in diagnosis and treatment planning.
+                </grounded_text>
+                <grounded_text citation_index="2">
+                    Based on recent data, AI has drastically improved mammogram analysis, achieving 99% accuracy at a rate 30 times faster than human radiologists.
+                </grounded_text>
+
+                <citations>
+                    <citation index="1" chunk_id="abc123" type="text">Artificial Intelligence is revolutionizing various industries, especially in healthcare.</citation>
+                    <citation index="2" chunk_id="abc124" type="table"></citation>
+                </citations>
+
+                <follow_up_questions>
+                    <question>How can AI enhance patient outcomes in fields outside radiology?</question>
+                    <question>What are the challenges in implementing AI systems across different hospitals?</question>
+                    <question>How might AI-driven advancements impact healthcare costs?</question>
+                </follow_up_questions>
+            </answer>
+            `,
+
+            `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a set of document chunks (text or images) to provide a grounded response based on user documents.`
+        );
+    }
+
+    async execute(args: { hypothetical_document_chunk: string }): Promise<unknown> {
+        const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk);
+        const formatted_chunks = await this.getFormattedChunks(relevantChunks);
+        return formatted_chunks;
+    }
+
+    async getFormattedChunks(relevantChunks: RAGChunk[]): Promise<unknown> {
+        try {
+            const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks });
+
+            if (!formattedChunks) {
+                throw new Error('Failed to format chunks');
+            }
+
+            return formattedChunks;
+        } catch (error) {
+            console.error('Error formatting chunks:', error);
+            throw error;
+        }
+    }
+}
author	bobzel <zzzman@gmail.com>	2024-10-10 18:58:39 -0400
committer	bobzel <zzzman@gmail.com>	2024-10-10 18:58:39 -0400
commit	5752dff8ff7b1b2858542feec0b1bb037461bf1a (patch)
tree	04080d4a596b0e5199b5ec95ab625fbb590f2a75 /src/client/views/nodes/chatbot/tools/RAGTool.ts
parent	36735ff00a55ae587af5f69eef495533a1f35393 (diff)
parent	d347fc59feefd91a796012892da57511787bb6d0 (diff)