aboutsummaryrefslogtreecommitdiff
path: root/src/client/views/nodes/chatbot/tools/RAGTool.ts
diff options
context:
space:
mode:
authorA.J. Shulman <Shulman.aj@gmail.com>2024-09-07 12:43:05 -0400
committerA.J. Shulman <Shulman.aj@gmail.com>2024-09-07 12:43:05 -0400
commit4791cd23af08da70895204a3a7fbaf889d9af2d5 (patch)
treec4c2534e64724d62bae9152763f1a74cd5a963e0 /src/client/views/nodes/chatbot/tools/RAGTool.ts
parent210f8f5f1cd19e9416a12524cce119b273334fd3 (diff)
completely restructured, added comments, and significantly reduced the length of the prompt (~72% shorter and cheaper)
Diffstat (limited to 'src/client/views/nodes/chatbot/tools/RAGTool.ts')
-rw-r--r--src/client/views/nodes/chatbot/tools/RAGTool.ts138
1 files changed, 138 insertions, 0 deletions
diff --git a/src/client/views/nodes/chatbot/tools/RAGTool.ts b/src/client/views/nodes/chatbot/tools/RAGTool.ts
new file mode 100644
index 000000000..c24306dcd
--- /dev/null
+++ b/src/client/views/nodes/chatbot/tools/RAGTool.ts
@@ -0,0 +1,138 @@
+import { BaseTool } from './BaseTool';
+import { Vectorstore } from '../vectorstore/Vectorstore';
+import { RAGChunk } from '../types/types';
+import * as fs from 'fs';
+import { Networking } from '../../../../Network';
+import { file } from 'jszip';
+import { ChatCompletion, ChatCompletionContentPart, ChatCompletionMessageParam } from 'openai/resources';
+
+export class RAGTool extends BaseTool {
+ constructor(private vectorstore: Vectorstore) {
+ super(
+ 'rag',
+ 'Perform a RAG search on user documents',
+ {
+ hypothetical_document_chunk: {
+ type: 'string',
+ description:
+ "Detailed version of the prompt that is effectively a hypothetical document chunk that would be ideal to embed and compare to the vectors of real document chunks to fetch the most relevant document chunks to answer the user's query",
+ required: 'true',
+ },
+ },
+ `
+ Your task is to provide a comprehensive response to the user's prompt based on the given chunks and chat history. Follow these structural guidelines meticulously:
+
+ 1. Overall Structure:
+ <answer>
+ [Main content with grounded_text tags interspersed with normal plain text (information that is not derived from chunks' information)]
+ <citations>
+ [Individual citation tags]
+ </citations>
+ <follow_up_questions>
+ [Three question tags]
+ </follow_up_questions>
+ </answer>
+
+ 2. Grounded Text Tag Structure:
+ - Basic format:
+ <grounded_text citation_index="[citation index number(s)]">
+ [Your generated text based on information from a subset of a chunk (a citation's direct text)]
+ </grounded_text>
+
+ 3. Citation Tag Structure:
+ <citation index="[unique number]" chunk_id="[UUID v4]" type="[text/image/table]">
+ [For text: relevant subset of original chunk]
+ [For image/table: leave empty]
+ </citation>
+
+ 4. Detailed Grounded Text Guidelines:
+ a. Wrap all information derived from chunks in grounded_text tags.
+ b. DO NOT PUT ANYTHING THAT IS NOT DIRECTLY DERIVED FROM INFORMATION FROM CHUNKS (EITHER IMAGE, TABLE, OR TEXT) IN GROUNDED_TEXT TAGS.
+ c. Use a single grounded_text tag for suquential and closely related information that references the same citation. If other citations' information are used sequentially, create new grounded_text tags.
+ d. Ensure every grounded_text tag has up to a few corresponding citations (should not be more than 3 and only 1 is fine). Multiple citation indices should be separated by commas.
+ e. Grounded text can be as short as a few words or as long as several sentences.
+ f. Avoid overlapping or nesting grounded_text tags; instead, use sequential tags.
+
+ 5. Detailed Citation Guidelines:
+ a. Create a unique citation for each distinct piece of information from the chunks that is used to support grounded_text.
+ b. ALL TEXT CITATIONS must have direct text in its element content (e.g. <citation ...>DIRECT TEXT HERE</citation>) that is a relevant SUBSET of the original text chunk that is being cited specifically.
+ c. DO NOT paraphrase or summarize the text; use the original text as much as possible.
+ d. DO NOT USE THE FULL TEXT CHUNK as the citation content; only use the relevant subset of the text that the grounded_text is base. AS SHORT AS POSSIBLE WHILE PROVIDING INFORMATION (ONE TO TWO SENTENCES USUALLY)!
+ e. Ensure each citation has a unique index number.
+ f. Specify the correct type: "text", "image", or "table".
+ g. For text chunks, the content of the citation should ALWAYS have the relevant subset of the original text that the grounded_text is based on.
+ h. For image/table chunks, leave the citation content empty.
+ i. One citation can be used for multiple grounded_text tags if they are based on the same chunk information.
+ j. !!!DO NOT OVERCITE - only include citations for information that is directly relevant to the grounded_text.
+
+ 6. Structural Integrity Checks:
+ a. Ensure all opening tags have corresponding closing tags.
+ b. Verify that all grounded_text tags have valid citation_index attributes (they should be equal to the associated citation(s) index field—not their chunk_id field).
+ c. Check that all cited indices in grounded_text tags have corresponding citations.
+
+ Example of grounded_text usage:
+
+ <answer>
+ <grounded_text citation_index="1,2">
+ Artificial Intelligence (AI) is revolutionizing various sectors, with healthcare experiencing significant transformations in areas such as diagnosis and treatment planning.
+ </grounded_text>
+ <grounded_text citation_index="2,3,4">
+ In the field of medical diagnosis, AI has shown remarkable capabilities, particularly in radiology. For instance, AI systems have drastically improved mammogram analysis, achieving 99% accuracy at a rate 30 times faster than human radiologists.
+ </grounded_text>
+ <grounded_text citation_index="4">
+ This advancement not only enhances the efficiency of healthcare systems but also significantly reduces the occurrence of false positives, leading to fewer unnecessary biopsies and reduced patient stress.
+ </grounded_text>
+
+ <grounded_text citation_index="5,6">
+ Beyond diagnosis, AI is playing a crucial role in drug discovery and development. By analyzing vast amounts of genetic and molecular data, AI algorithms can identify potential drug candidates much faster than traditional methods.
+ </grounded_text>
+ <grounded_text citation_index="6">
+ This could potentially reduce the time and cost of bringing new medications to market, especially for rare diseases that have historically received less attention due to limited market potential.
+ </grounded_text>
+
+ [... rest of the content ...]
+
+ <citations>
+ <citation index="1" chunk_id="123e4567-e89b-12d3-a456-426614174000" type="text">Artificial Intelligence is revolutionizing various industries, with healthcare being one of the most profoundly affected sectors.</citation>
+ <citation index="2" chunk_id="123e4567-e89b-12d3-a456-426614174001" type="text">AI has shown particular promise in the field of radiology, enhancing the accuracy and speed of image analysis.</citation>
+ <citation index="3" chunk_id="123e4567-e89b-12d3-a456-426614174002" type="text">According to recent studies, AI systems have achieved 99% accuracy in mammogram analysis, performing the task 30 times faster than human radiologists.</citation>
+ <citation index="4" chunk_id="123e4567-e89b-12d3-a456-426614174003" type="text">The improvement in mammogram accuracy has led to a significant reduction in false positives, decreasing the need for unnecessary biopsies and reducing patient anxiety.</citation>
+ <citation index="5" chunk_id="123e4567-e89b-12d3-a456-426614174004" type="text">AI is accelerating the drug discovery process by analyzing complex molecular and genetic data to identify potential drug candidates.</citation>
+ <citation index="6" chunk_id="123e4567-e89b-12d3-a456-426614174005" type="text">The use of AI in drug discovery could significantly reduce the time and cost associated with bringing new medications to market, particularly for rare diseases.</citation>
+ </citations>
+
+ <follow_up_questions>
+ <question>How might AI-driven personalized medicine impact the cost and accessibility of healthcare in the future?</question>
+ <question>What measures can be taken to ensure that AI systems in healthcare are free from biases and equally effective for diverse populations?</question>
+ <question>How could the role of healthcare professionals evolve as AI becomes more integrated into medical practices?</question>
+ </follow_up_questions>
+ </answer>
+ `,
+
+ `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a
+ set of document chunks (either images or text) that can be used to provide a grounded response based on
+ user documents`
+ );
+ }
+
+ async execute(args: { hypothetical_document_chunk: string }): Promise {
+ const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk);
+ const formatted_chunks = await this.getFormattedChunks(relevantChunks);
+ return formatted_chunks;
+ }
+
+ async getFormattedChunks(relevantChunks: RAGChunk[]): Promise {
+ try {
+ const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks });
+
+ if (!formattedChunks) {
+ throw new Error('Failed to format chunks');
+ }
+
+ return formattedChunks;
+ } catch (error) {
+ console.error('Error formatting chunks:', error);
+ throw error;
+ }
+ }
+}