diff options
| author | Nathan-SR <144961007+Nathan-SR@users.noreply.github.com> | 2024-10-10 19:30:06 -0400 |
|---|---|---|
| committer | Nathan-SR <144961007+Nathan-SR@users.noreply.github.com> | 2024-10-10 19:30:06 -0400 |
| commit | 373340938a4bc48edb4b9345f28e562de41153d6 (patch) | |
| tree | d6604992d93a12920e1b62a1f906735d59434765 /src/client/views/nodes/chatbot/tools/RAGTool.ts | |
| parent | 772c7a4c4d8867cbc33a673c3e3c6f3e330d395d (diff) | |
| parent | 5752dff8ff7b1b2858542feec0b1bb037461bf1a (diff) | |
Merge branch 'nathan-starter' of https://github.com/brown-dash/Dash-Web into nathan-starter
Diffstat (limited to 'src/client/views/nodes/chatbot/tools/RAGTool.ts')
| -rw-r--r-- | src/client/views/nodes/chatbot/tools/RAGTool.ts | 79 |
1 files changed, 79 insertions, 0 deletions
diff --git a/src/client/views/nodes/chatbot/tools/RAGTool.ts b/src/client/views/nodes/chatbot/tools/RAGTool.ts new file mode 100644 index 000000000..4cc2f26ff --- /dev/null +++ b/src/client/views/nodes/chatbot/tools/RAGTool.ts @@ -0,0 +1,79 @@ +import { Networking } from '../../../../Network'; +import { RAGChunk } from '../types/types'; +import { Vectorstore } from '../vectorstore/Vectorstore'; +import { BaseTool } from './BaseTool'; + +export class RAGTool extends BaseTool { + constructor(private vectorstore: Vectorstore) { + super( + 'rag', + 'Perform a RAG search on user documents', + { + hypothetical_document_chunk: { + type: 'string', + description: "A detailed prompt representing an ideal chunk to embed and compare against document vectors to retrieve the most relevant content for answering the user's query.", + required: 'true', + }, + }, + ` + When using the RAG tool, the structure must adhere to the format described in the ReAct prompt. Below are additional guidelines specifically for RAG-based responses: + + 1. **Grounded Text Guidelines**: + - Each <grounded_text> tag must correspond to exactly one citation, ensuring a one-to-one relationship. + - Always cite a **subset** of the chunk, never the full text. The citation should be as short as possible while providing the relevant information (typically one to two sentences). + - Do not paraphrase the chunk text in the citation; use the original subset directly from the chunk. + - If multiple citations are needed for different sections of the response, create new <grounded_text> tags for each. + + 2. **Citation Guidelines**: + - The citation must include only the relevant excerpt from the chunk being referenced. + - Use unique citation indices and reference the chunk_id for the source of the information. + - For text chunks, the citation content must reflect the **exact subset** of the original chunk that is relevant to the grounded_text tag. + + **Example**: + + <answer> + <grounded_text citation_index="1"> + Artificial Intelligence is revolutionizing various sectors, with healthcare seeing transformations in diagnosis and treatment planning. + </grounded_text> + <grounded_text citation_index="2"> + Based on recent data, AI has drastically improved mammogram analysis, achieving 99% accuracy at a rate 30 times faster than human radiologists. + </grounded_text> + + <citations> + <citation index="1" chunk_id="abc123" type="text">Artificial Intelligence is revolutionizing various industries, especially in healthcare.</citation> + <citation index="2" chunk_id="abc124" type="table"></citation> + </citations> + + <follow_up_questions> + <question>How can AI enhance patient outcomes in fields outside radiology?</question> + <question>What are the challenges in implementing AI systems across different hospitals?</question> + <question>How might AI-driven advancements impact healthcare costs?</question> + </follow_up_questions> + </answer> + `, + + `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a set of document chunks (text or images) to provide a grounded response based on user documents.` + ); + } + + async execute(args: { hypothetical_document_chunk: string }): Promise<unknown> { + const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk); + const formatted_chunks = await this.getFormattedChunks(relevantChunks); + return formatted_chunks; + } + + async getFormattedChunks(relevantChunks: RAGChunk[]): Promise<unknown> { + try { + const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks }); + + if (!formattedChunks) { + throw new Error('Failed to format chunks'); + } + + return formattedChunks; + } catch (error) { + console.error('Error formatting chunks:', error); + throw error; + } + } +} |
