src/client/views/nodes/ChatBox/tools/RAGTool.ts


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134

import { BaseTool } from './BaseTool';
import { Vectorstore } from '../vectorstore/Vectorstore';
import { RAGChunk } from '../types';
import * as fs from 'fs';
import { Networking } from '../../../../Network';
import { file } from 'jszip';

export class RAGTool extends BaseTool<{ hypothetical_document_chunk: string }> {
    constructor(private vectorstore: Vectorstore) {
        super(
            'rag',
            'Perform a RAG search on user documents',
            {
                hypothetical_document_chunk: {
                    type: 'string',
                    description:
                        "Detailed version of the prompt that is effectively a hypothetical document chunk that would be ideal to embed and compare to the vectors of real document chunks to fetch the most relevant document chunks to answer the user's query",
                    required: 'true',
                },
            },
            `
            Your task is to provide a comprehensive response to the user's prompt based on the given chunks and chat history. Follow these structural guidelines meticulously:

            1. Overall Structure:
            <answer>
                [Main content with grounded_text tags interspersed with normal plain text (information that is not derived from chunks' information)]
                <citations>
                [Individual citation tags]
                </citations>
                <follow_up_questions>
                [Three question tags]
                </follow_up_questions>
            </answer>

            2. Grounded Text Tag Structure:
            - Basic format: 
                <grounded_text citation_index="[citation index number(s)]">
                [Your generated text based on information from a subset of a chunk (a citation's direct text)]
                </grounded_text>

            3. Citation Tag Structure:
            <citation index="[unique number]" chunk_id="[UUID v4]" type="[text/image/table]">
                [For text: relevant subset of original chunk]
                [For image/table: leave empty]
            </citation>

            4. Detailed Grounded Text Guidelines:
                a. Wrap all information derived from chunks in grounded_text tags.
                b. DO NOT PUT ANYTHING THAT IS NOT DIRECTLY DERIVED FROM INFORMATION FROM CHUNKS (EITHER IMAGE, TABLE, OR TEXT) IN GROUNDED_TEXT TAGS.
                c. Use a single grounded_text tag for suquential and closely related information that references the same citation. If other citations' information are used sequentially, create new grounded_text tags.
                d. Ensure every grounded_text tag has up to a few corresponding citations (should not be more than 3 and only 1 is fine). Multiple citation indices should be separated by commas.
                e. Grounded text can be as short as a few words or as long as several sentences.
                f. Avoid overlapping or nesting grounded_text tags; instead, use sequential tags.

            5. Detailed Citation Guidelines:
                a. Create a unique citation for each distinct piece of information from the chunks that is used to support grounded_text.
                b. Ensure each citation has a unique index number.
                c. Specify the correct type: "text", "image", or "table".
                d. For text chunks, include only the relevant subset of the original text that the grounded_text is based on.
                e. For image/table chunks, leave the citation content empty.
                f. One citation can be used for multiple grounded_text tags if they are based on the same chunk information.
                g. !!!DO NOT OVERCITE - only include citations for information that is directly relevant to the grounded_text.

            6. Structural Integrity Checks:
                a. Ensure all opening tags have corresponding closing tags.
                b. Verify that all grounded_text tags have valid citation_index attributes (they should be equal to the associated citation(s) index field—not their chunk_id field).
                c. Check that all cited indices in grounded_text tags have corresponding citations.

            Example of grounded_text usage:

            <answer>
                <grounded_text citation_index="1,2">
                Artificial Intelligence (AI) is revolutionizing various sectors, with healthcare experiencing significant transformations in areas such as diagnosis and treatment planning.
                </grounded_text>
                <grounded_text citation_index="2,3,4">
                In the field of medical diagnosis, AI has shown remarkable capabilities, particularly in radiology. For instance, AI systems have drastically improved mammogram analysis, achieving 99% accuracy at a rate 30 times faster than human radiologists.
                </grounded_text>
                <grounded_text citation_index="4">
                This advancement not only enhances the efficiency of healthcare systems but also significantly reduces the occurrence of false positives, leading to fewer unnecessary biopsies and reduced patient stress.
                </grounded_text>
            
                <grounded_text citation_index="5,6">
                Beyond diagnosis, AI is playing a crucial role in drug discovery and development. By analyzing vast amounts of genetic and molecular data, AI algorithms can identify potential drug candidates much faster than traditional methods.
                </grounded_text>
                <grounded_text citation_index="6">
                    This could potentially reduce the time and cost of bringing new medications to market, especially for rare diseases that have historically received less attention due to limited market potential.
                </grounded_text>

                [... rest of the content ...]

                <citations>
                    <citation index="1" chunk_id="123e4567-e89b-12d3-a456-426614174000" type="text">Artificial Intelligence is revolutionizing various industries, with healthcare being one of the most profoundly affected sectors.</citation>
                    <citation index="2" chunk_id="123e4567-e89b-12d3-a456-426614174001" type="text">AI has shown particular promise in the field of radiology, enhancing the accuracy and speed of image analysis.</citation>
                    <citation index="3" chunk_id="123e4567-e89b-12d3-a456-426614174002" type="text">According to recent studies, AI systems have achieved 99% accuracy in mammogram analysis, performing the task 30 times faster than human radiologists.</citation>
                    <citation index="4" chunk_id="123e4567-e89b-12d3-a456-426614174003" type="text">The improvement in mammogram accuracy has led to a significant reduction in false positives, decreasing the need for unnecessary biopsies and reducing patient anxiety.</citation>
                    <citation index="5" chunk_id="123e4567-e89b-12d3-a456-426614174004" type="text">AI is accelerating the drug discovery process by analyzing complex molecular and genetic data to identify potential drug candidates.</citation>
                    <citation index="6" chunk_id="123e4567-e89b-12d3-a456-426614174005" type="text">The use of AI in drug discovery could significantly reduce the time and cost associated with bringing new medications to market, particularly for rare diseases.</citation>
                </citations>

                <follow_up_questions>
                    <question>How might AI-driven personalized medicine impact the cost and accessibility of healthcare in the future?</question>
                    <question>What measures can be taken to ensure that AI systems in healthcare are free from biases and equally effective for diverse populations?</question>
                    <question>How could the role of healthcare professionals evolve as AI becomes more integrated into medical practices?</question>
                </follow_up_questions>
            </answer>
            `,

            `Performs a RAG (Retrieval-Augmented Generation) search on user documents and returns a 
            set of document chunks (either images or text) that can be used to provide a grounded response based on 
            user documents`
        );
    }

    async execute(args: { hypothetical_document_chunk: string }): Promise<any> {
        const relevantChunks = await this.vectorstore.retrieve(args.hypothetical_document_chunk);
        const formatted_chunks = await this.getFormattedChunks(relevantChunks);
        return formatted_chunks;
    }

    async getFormattedChunks(relevantChunks: RAGChunk[]): Promise<{ type: string; text?: string; image_url?: { url: string } }[]> {
        try {
            const { formattedChunks } = await Networking.PostToServer('/formatChunks', { relevantChunks });

            if (!formattedChunks) {
                throw new Error('Failed to format chunks');
            }

            return formattedChunks;
        } catch (error) {
            console.error('Error formatting chunks:', error);
            throw error;
        }
    }
}