aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorA.J. Shulman <Shulman.aj@gmail.com>2024-09-07 13:07:33 -0400
committerA.J. Shulman <Shulman.aj@gmail.com>2024-09-07 13:07:33 -0400
commitba0520baaa1f84d9fb08d3b2880c68302d28350a (patch)
treedb38b62a6942906d51a8fb7c66eee1f31fd6623f /src
parent4791cd23af08da70895204a3a7fbaf889d9af2d5 (diff)
added clarifying structural info to ReAct prompt (still ~69% shorter than previous prompt) and shortened the RAG prompt
Diffstat (limited to 'src')
-rw-r--r--src/client/views/nodes/chatbot/agentsystem/prompts.ts28
-rw-r--r--src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx7
-rw-r--r--src/client/views/nodes/chatbot/tools/RAGTool.ts104
3 files changed, 52 insertions, 87 deletions
diff --git a/src/client/views/nodes/chatbot/agentsystem/prompts.ts b/src/client/views/nodes/chatbot/agentsystem/prompts.ts
index 9daabc35f..7000d8634 100644
--- a/src/client/views/nodes/chatbot/agentsystem/prompts.ts
+++ b/src/client/views/nodes/chatbot/agentsystem/prompts.ts
@@ -26,12 +26,33 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto
<point>Ensure that **ALL answers follow the answer structure**: grounded text wrapped in <grounded_text> tags with corresponding citations, normal text in <normal_text> tags, and three follow-up questions at the end.</point>
</critical_points>
+ <thought_structure>
+ <thought>
+ <description>
+ Always provide a thought before each action to explain why you are choosing the next step or tool. This helps clarify your reasoning for the action you will take.
+ </description>
+ </thought>
+ </thought_structure>
+
+ <action_input_structure>
+ <action_input>
+ <action_input_description>
+ Always describe what the action will do in the <action_input_description> tag. Be clear about how the tool will process the input and why it is appropriate for this stage.
+ </action_input_description>
+ <inputs>
+ <description>
+ Provide the actual inputs for the action in the <inputs> tag. Ensure that each input is specific to the tool being used. Inputs should match the expected parameters for the tool (e.g., a search term for the website scraper, document references for RAG).
+ </description>
+ </inputs>
+ </action_input>
+ </action_input_structure>
+
<answer_structure>
<answer>
<grounded_text> - All information derived from tools or user documents must be wrapped in these tags with proper citation.</grounded_text>
- <normal_text> - Use this tag for text not derived from tools or user documents.</normal_text>
+ <normal_text> - Use this tag for text not derived from tools or user documents. It should only be for narrative-like text or extremely common knowledge information.</normal_text>
<citations>
- <citation> - Provide proper citations for each <grounded_text>, referencing the tool or document chunk used.</citation>
+ <citation> - Provide proper citations for each <grounded_text>, referencing the tool or document chunk used. ENSURE THAT THERE IS A CITATION WHOSE INDEX MATCHES FOR EVERY GROUNDED TEXT CITATION INDEX. </citation>
</citations>
<follow_up_questions> - Provide exactly three user-perspective follow-up questions.</follow_up_questions>
<loop_summary> - Summarize the actions and tools used in the conversation.</loop_summary>
@@ -41,7 +62,8 @@ export function getReactPrompt(tools: Tool[], summaries: () => string, chatHisto
<grounded_text_guidelines>
<step>**Wrap ALL tool-based information** in <grounded_text> tags and provide citations.</step>
<step>Use separate <grounded_text> tags for distinct information or when switching to a different tool or document.</step>
- <step>Ensure that **EVERY** <grounded_text> tag includes a citation index referencing the source of the information.</step>
+ <step>Ensure that **EVERY** <grounded_text> tag includes a citation index aligned with a citation that you provide that references the source of the information.</step>
+ <step>There should be a one-to-one relationship between <grounded_text> tags and citations.</step>
<step>Over-citing is discouraged—only cite the information that is directly relevant to the user's query.</step>
</grounded_text_guidelines>
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index 6dc691798..28bfbeae3 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -51,8 +51,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
private vectorstore_id: string;
private vectorstore: Vectorstore;
private agent: Agent;
- private _oldWheel: HTMLDivElement | null = null;
- private messagesRef: React.RefObject;
+ private messagesRef: React.RefObject<HTMLDivElement>;
/**
* Static method that returns the layout string for the field.
@@ -246,12 +245,12 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
* @param event The form submission event.
*/
@action
- askGPT = async (event: React.FormEvent): Promise => {
+ askGPT = async (event: React.FormEvent): Promise<any> => {
event.preventDefault();
this.inputValue = '';
// Extract the user's message
- const textInput = event.currentTarget.elements.namedItem('messageInput') as HTMLInputElement;
+ const textInput = (event.currentTarget as HTMLFormElement).elements.namedItem('messageInput') as HTMLInputElement;
const trimmedText = textInput.value.trim();
if (trimmedText) {
diff --git a/src/client/views/nodes/chatbot/tools/RAGTool.ts b/src/client/views/nodes/chatbot/tools/RAGTool.ts
index c24306dcd..f4b7b42ea 100644
--- a/src/client/views/nodes/chatbot/tools/RAGTool.ts
+++ b/src/client/views/nodes/chatbot/tools/RAGTool.ts
@@ -14,114 +14,58 @@ export class RAGTool extends BaseTool {
{
hypothetical_document_chunk: {
type: 'string',
- description:
- "Detailed version of the prompt that is effectively a hypothetical document chunk that would be ideal to embed and compare to the vectors of real document chunks to fetch the most relevant document chunks to answer the user's query",
+ description: "A detailed prompt representing an ideal chunk to embed and compare against document vectors to retrieve the most relevant content for answering the user's query.",
required: 'true',
},
},
`
- Your task is to provide a comprehensive response to the user's prompt based on the given chunks and chat history. Follow these structural guidelines meticulously:
+ When using the RAG tool, the structure must adhere to the format described in the ReAct prompt. Below are additional guidelines specifically for RAG-based responses:
- 1. Overall Structure:
- <answer>
- [Main content with grounded_text tags interspersed with normal plain text (information that is not derived from chunks' information)]
- <citations>
- [Individual citation tags]
- </citations>
- <follow_up_questions>
- [Three question tags]
- </follow_up_questions>
- </answer>
-
- 2. Grounded Text Tag Structure:
- - Basic format:
- <grounded_text citation_index="[citation index number(s)]">
- [Your generated text based on information from a subset of a chunk (a citation's direct text)]
- </grounded_text>
+ 1. **Grounded Text Guidelines**:
+ - Each <grounded_text> tag must correspond to exactly one citation, ensuring a one-to-one relationship.
+ - Always cite a **subset** of the chunk, never the full text. The citation should be as short as possible while providing the relevant information (typically one to two sentences).
+ - Do not paraphrase the chunk text in the citation; use the original subset directly from the chunk.
+ - If multiple citations are needed for different sections of the response, create new <grounded_text> tags for each.
- 3. Citation Tag Structure:
- <citation index="[unique number]" chunk_id="[UUID v4]" type="[text/image/table]">
- [For text: relevant subset of original chunk]
- [For image/table: leave empty]
- </citation>
+ 2. **Citation Guidelines**:
+ - The citation must include only the relevant excerpt from the chunk being referenced.
+ - Use unique citation indices and reference the chunk_id for the source of the information.
+ - For text chunks, the citation content must reflect the **exact subset** of the original chunk that is relevant to the grounded_text tag.
- 4. Detailed Grounded Text Guidelines:
- a. Wrap all information derived from chunks in grounded_text tags.
- b. DO NOT PUT ANYTHING THAT IS NOT DIRECTLY DERIVED FROM INFORMATION FROM CHUNKS (EITHER IMAGE, TABLE, OR TEXT) IN GROUNDED_TEXT TAGS.
- c. Use a single grounded_text tag for suquential and closely related information that references the same citation. If other citations' information are used sequentially, create new grounded_text tags.
- d. Ensure every grounded_text tag has up to a few corresponding citations (should not be more than 3 and only 1 is fine). Multiple citation indices should be separated by commas.
- e. Grounded text can be as short as a few words or as long as several sentences.
- f. Avoid overlapping or nesting grounded_text tags; instead, use sequential tags.
-
- 5. Detailed Citation Guidelines:
- a. Create a unique citation for each distinct piece of information from the chunks that is used to support grounded_text.
- b. ALL TEXT CITATIONS must have direct text in its element content (e.g. <citation ...>DIRECT TEXT HERE</citation>) that is a relevant SUBSET of the original text chunk that is being cited specifically.
- c. DO NOT paraphrase or summarize the text; use the original text as much as possible.
- d. DO NOT USE THE FULL TEXT CHUNK as the citation content; only use the relevant subset of the text that the grounded_text is base. AS SHORT AS POSSIBLE WHILE PROVIDING INFORMATION (ONE TO TWO SENTENCES USUALLY)!
- e. Ensure each citation has a unique index number.
- f. Specify the correct type: "text", "image", or "table".
- g. For text chunks, the content of the citation should ALWAYS have the relevant subset of the original text that the grounded_text is based on.
- h. For image/table chunks, leave the citation content empty.
- i. One citation can be used for multiple grounded_text tags if they are based on the same chunk information.
- j. !!!DO NOT OVERCITE - only include citations for information that is directly relevant to the grounded_text.
-
- 6. Structural Integrity Checks:
- a. Ensure all opening tags have corresponding closing tags.
- b. Verify that all grounded_text tags have valid citation_index attributes (they should be equal to the associated citation(s) index field—not their chunk_id field).
- c. Check that all cited indices in grounded_text tags have corresponding citations.
-
- Example of grounded_text usage:
+ **Example**:
<answer>
- <grounded_text citation_index="1,2">
- Artificial Intelligence (AI) is revolutionizing various sectors, with healthcare experiencing significant transformations in areas such as diagnosis and treatment planning.
- </grounded_text>
- <grounded_text citation_index="2,3,4">
- In the field of medical diagnosis, AI has shown remarkable capabilities, particularly in radiology. For instance, AI systems have drastically improved mammogram analysis, achieving 99% accuracy at a rate 30 times faster than human radiologists.
+ <grounded_text citation_index="1">
+ Artificial Intelligence is revolutionizing various sectors, with healthcare seeing transformations in diagnosis and treatment planning.
</grounded_text>
- <grounded_text citation_index="4">
- This advancement not only enhances the efficiency of healthcare systems but also significantly reduces the occurrence of false positives, leading to fewer unnecessary biopsies and reduced patient stress.
+ <grounded_text citation_index="2">
+ Based on recent data, AI has drastically improved mammogram analysis, achieving 99% accuracy at a rate 30 times faster than human radiologists.
</grounded_text>
-
- <grounded_text citation_index="5,6">
- Beyond diagnosis, AI is playing a crucial role in drug discovery and development. By analyzing vast amounts of genetic and molecular data, AI algorithms can identify potential drug candidates much faster than traditional methods.
- </grounded_text>
- <grounded_text citation_index="6">
- This could potentially reduce the time and cost of bringing new medications to market, especially for rare diseases that have historically received less attention due to limited market potential.
- </grounded_text>
-
- [... rest of the content ...]
<citations>
- <citation index="1" chunk_id="123e4567-e89b-12d3-a456-426614174000" type="text">Artificial Intelligence is revolutionizing various industries, with healthcare being one of the most profoundly affected sectors.</citation>
- <citation index="2" chunk_id="123e4567-e89b-12d3-a456-426614174001" type="text">AI has shown particular promise in the field of radiology, enhancing the accuracy and speed of image analysis.</citation>
- <citation index="3" chunk_id="123e4567-e89b-12d3-a456-426614174002" type="text">According to recent studies, AI systems have achieved 99% accuracy in mammogram analysis, performing the task 30 times faster than human radiologists.</citation>
- <citation index="4" chunk_id="123e4567-e89b-12d3-a456-426614174003" type="text">The improvement in mammogram accuracy has led to a significant reduction in false positives, decreasing the need for unnecessary biopsies and reducing patient anxiety.</citation>
- <citation index="5" chunk_id="123e4567-e89b-12d3-a456-426614174004" type="text">AI is accelerating the drug discovery process by analyzing complex molecular and genetic data to identify potential drug candidates.</citation>
- <citation index="6" chunk_id="123e4567-e89b-12d3-a456-426614174005" type="text">The use of AI in drug discovery could significantly reduce the time and cost associated with bringing new medications to market, particularly for rare diseases.</citation>
+ <citation index="1" chunk_id="abc123" type="text">Artificial Intelligence is revolutionizing various industries, especially in healthcare.</citation>
+ <citation index="2" chunk_id="abc124" type="table"></citation>
</citations>
<follow_up_questions>
- <question>How might AI-driven personalized medicine impact the cost and accessibility of healthcare in the future?</question>
- <question>What measures can be taken to ensure that AI systems in healthcare are free from biases and equally effective for diverse populations?</question>
- <question>How could the role of healthcare professionals evolve as AI becomes more integrated into medical practices?</question>
+ <question>How can AI enhance patient outcomes in fields outside radiology?</question>
+ <question>What are the challenges in implementing AI systems across different hospitals?</question>
+ <question>How might AI-driven advancements impact healthcare costs?</question>
</follow_up_questions>
</answer>
`,
- `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a
- set of document chunks (either images or text) that can be used to provide a grounded response based on
- user documents`
+ `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a set of document chunks (text or images) to provide a grounded response based on user documents.`
);
}
- async execute(args: { hypothetical_document_chunk: string }): Promise {
+ async execute(args: { hypothetical_document_chunk: string }): Promise<any> {
const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk);
const formatted_chunks = await this.getFormattedChunks(relevantChunks);
return formatted_chunks;
}
- async getFormattedChunks(relevantChunks: RAGChunk[]): Promise {
+ async getFormattedChunks(relevantChunks: RAGChunk[]): Promise<any> {
try {
const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks });