From 210f8f5f1cd19e9416a12524cce119b273334fd3 Mon Sep 17 00:00:00 2001
From: "A.J. Shulman" <Shulman.aj@gmail.com>
Date: Sat, 7 Sep 2024 11:48:36 -0400
Subject: reorganized parsers, added comments to vectorstore, and added
 citation popup for text citations

---
 src/client/views/nodes/ChatBox/Agent.ts            |   4 +-
 src/client/views/nodes/ChatBox/AnswerParser.ts     | 125 ---------------------
 src/client/views/nodes/ChatBox/ChatBox.scss        |  27 +++++
 src/client/views/nodes/ChatBox/ChatBox.tsx         |  12 ++
 src/client/views/nodes/ChatBox/ChunkManager.ts     |  24 ----
 .../views/nodes/ChatBox/StreamedAnswerParser.ts    |  73 ------------
 .../nodes/ChatBox/response_parsers/AnswerParser.ts | 125 +++++++++++++++++++++
 .../response_parsers/StreamedAnswerParser.ts       |  73 ++++++++++++
 .../views/nodes/ChatBox/vectorstore/Vectorstore.ts | 125 ++++++++++++++-------
 9 files changed, 326 insertions(+), 262 deletions(-)
 delete mode 100644 src/client/views/nodes/ChatBox/AnswerParser.ts
 delete mode 100644 src/client/views/nodes/ChatBox/ChunkManager.ts
 delete mode 100644 src/client/views/nodes/ChatBox/StreamedAnswerParser.ts
 create mode 100644 src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts
 create mode 100644 src/client/views/nodes/ChatBox/response_parsers/StreamedAnswerParser.ts

(limited to 'src')
diff --git a/src/client/views/nodes/ChatBox/Agent.ts b/src/client/views/nodes/ChatBox/Agent.ts
index eaa17d283..9eb069c78 100644
--- a/src/client/views/nodes/ChatBox/Agent.ts
+++ b/src/client/views/nodes/ChatBox/Agent.ts
@@ -13,8 +13,8 @@ import { SearchTool } from './tools/SearchTool';
 import { NoTool } from './tools/NoTool';
 import { on } from 'events';
 import { v4 as uuidv4 } from 'uuid';
-import { AnswerParser } from './AnswerParser';
-import { StreamedAnswerParser } from './StreamedAnswerParser';
+import { AnswerParser } from './response_parsers/AnswerParser';
+import { StreamedAnswerParser } from './response_parsers/StreamedAnswerParser';
 import { CreateCSVTool } from './tools/CreateCSVTool';
 
 dotenv.config();
diff --git a/src/client/views/nodes/ChatBox/AnswerParser.ts b/src/client/views/nodes/ChatBox/AnswerParser.ts
deleted file mode 100644
index 885114195..000000000
--- a/src/client/views/nodes/ChatBox/AnswerParser.ts
+++ /dev/null
@@ -1,125 +0,0 @@
-import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getChunkType, ProcessingInfo } from './types';
-import { v4 as uuid } from 'uuid';
-
-export class AnswerParser {
-    static parse(xml: string, processingInfo: ProcessingInfo[]): AssistantMessage {
-        const answerRegex = /<answer>([\s\S]*?)<\/answer>/;
-        const citationsRegex = /<citations>([\s\S]*?)<\/citations>/;
-        const citationRegex = /<citation index="([^"]+)" chunk_id="([^"]+)" type="([^"]+)">([\s\S]*?)<\/citation>/g;
-        const followUpQuestionsRegex = /<follow_up_questions>([\s\S]*?)<\/follow_up_questions>/;
-        const questionRegex = /<question>(.*?)<\/question>/g;
-        const groundedTextRegex = /<grounded_text citation_index="([^"]+)">([\s\S]*?)<\/grounded_text>/g;
-        const normalTextRegex = /<normal_text>([\s\S]*?)<\/normal_text>/g;
-        const loopSummaryRegex = /<loop_summary>([\s\S]*?)<\/loop_summary>/;
-
-        const answerMatch = answerRegex.exec(xml);
-        const citationsMatch = citationsRegex.exec(xml);
-        const followUpQuestionsMatch = followUpQuestionsRegex.exec(xml);
-        const loopSummaryMatch = loopSummaryRegex.exec(xml);
-
-        if (!answerMatch) {
-            throw new Error('Invalid XML: Missing <answer> tag.');
-        }
-
-        let rawTextContent = answerMatch[1].trim();
-        let content: AssistantMessage['content'] = [];
-        let citations: Citation[] = [];
-        let contentIndex = 0;
-
-        // Remove citations and follow-up questions from rawTextContent
-        if (citationsMatch) {
-            rawTextContent = rawTextContent.replace(citationsMatch[0], '').trim();
-        }
-        if (followUpQuestionsMatch) {
-            rawTextContent = rawTextContent.replace(followUpQuestionsMatch[0], '').trim();
-        }
-        if (loopSummaryMatch) {
-            rawTextContent = rawTextContent.replace(loopSummaryMatch[0], '').trim();
-        }
-
-        // Parse citations
-        let citationMatch;
-        const citationMap = new Map<string, string>();
-        if (citationsMatch) {
-            const citationsContent = citationsMatch[1];
-            while ((citationMatch = citationRegex.exec(citationsContent)) !== null) {
-                const [_, index, chunk_id, type, direct_text] = citationMatch;
-                const citation_id = uuid();
-                citationMap.set(index, citation_id);
-                citations.push({
-                    direct_text: direct_text.trim(),
-                    type: getChunkType(type),
-                    chunk_id,
-                    citation_id,
-                });
-            }
-        }
-
-        rawTextContent = rawTextContent.replace(normalTextRegex, '$1');
-
-        // Parse text content (normal and grounded)
-        let lastIndex = 0;
-        let match;
-
-        while ((match = groundedTextRegex.exec(rawTextContent)) !== null) {
-            const [fullMatch, citationIndex, groundedText] = match;
-
-            // Add normal text that is before the grounded text
-            if (match.index > lastIndex) {
-                const normalText = rawTextContent.slice(lastIndex, match.index).trim();
-                if (normalText) {
-                    content.push({
-                        index: contentIndex++,
-                        type: TEXT_TYPE.NORMAL,
-                        text: normalText,
-                        citation_ids: null,
-                    });
-                }
-            }
-
-            // Add grounded text
-            const citation_ids = citationIndex.split(',').map(index => citationMap.get(index) || '');
-            content.push({
-                index: contentIndex++,
-                type: TEXT_TYPE.GROUNDED,
-                text: groundedText.trim(),
-                citation_ids,
-            });
-
-            lastIndex = match.index + fullMatch.length;
-        }
-
-        // Add any remaining normal text after the last grounded text
-        if (lastIndex < rawTextContent.length) {
-            const remainingText = rawTextContent.slice(lastIndex).trim();
-            if (remainingText) {
-                content.push({
-                    index: contentIndex++,
-                    type: TEXT_TYPE.NORMAL,
-                    text: remainingText,
-                    citation_ids: null,
-                });
-            }
-        }
-
-        let followUpQuestions: string[] = [];
-        if (followUpQuestionsMatch) {
-            const questionsText = followUpQuestionsMatch[1];
-            let questionMatch;
-            while ((questionMatch = questionRegex.exec(questionsText)) !== null) {
-                followUpQuestions.push(questionMatch[1].trim());
-            }
-        }
-
-        const assistantResponse: AssistantMessage = {
-            role: ASSISTANT_ROLE.ASSISTANT,
-            content,
-            follow_up_questions: followUpQuestions,
-            citations,
-            processing_info: processingInfo,
-            loop_summary: loopSummaryMatch ? loopSummaryMatch[1].trim() : undefined,
-        };
-
-        return assistantResponse;
-    }
-}
diff --git a/src/client/views/nodes/ChatBox/ChatBox.scss b/src/client/views/nodes/ChatBox/ChatBox.scss
index adb0663c3..42f6a0d61 100644
--- a/src/client/views/nodes/ChatBox/ChatBox.scss
+++ b/src/client/views/nodes/ChatBox/ChatBox.scss
@@ -116,6 +116,33 @@ $transition: all 0.3s ease;
             }
         }
     }
+    .citation-popup {
+        position: fixed;
+        bottom: 50px;
+        left: 50%;
+        transform: translateX(-50%);
+        background-color: rgba(0, 0, 0, 0.8);
+        color: white;
+        padding: 10px 20px;
+        border-radius: 10px;
+        box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2);
+        z-index: 1000;
+        animation: fadeIn 0.3s ease-in-out;
+
+        p {
+            margin: 0;
+            font-size: 14px;
+        }
+
+        @keyframes fadeIn {
+            from {
+                opacity: 0;
+            }
+            to {
+                opacity: 1;
+            }
+        }
+    }
 }
 
 .message {
diff --git a/src/client/views/nodes/ChatBox/ChatBox.tsx b/src/client/views/nodes/ChatBox/ChatBox.tsx
index ff699aab3..98a2e6002 100644
--- a/src/client/views/nodes/ChatBox/ChatBox.tsx
+++ b/src/client/views/nodes/ChatBox/ChatBox.tsx
@@ -44,6 +44,7 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
     @observable private linked_docs_to_add: ObservableSet = observable.set();
     @observable private linked_csv_files: { filename: string; id: string; text: string }[] = [];
     @observable private isUploadingDocs: boolean = false;
+    @observable private citationPopup: { text: string; visible: boolean } = { text: '', visible: false };
 
     // Private properties for managing OpenAI API, vector store, agent, and UI elements
     private openai: OpenAI;
@@ -450,6 +451,9 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                             DocumentManager.Instance.showDocument(highlightDoc, { willZoomCentered: true }, () => {});
                             break;
                         case CHUNK_TYPE.TEXT:
+                            this.citationPopup = { text: citation.direct_text ?? 'No text available', visible: true };
+                            setTimeout(() => (this.citationPopup.visible = false), 3000); // Hide after 3 seconds
+
                             DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {
                                 const firstView = Array.from(doc[DocViews])[0] as DocumentView;
                                 firstView.ComponentView?.search?.(citation.direct_text ?? '');
@@ -730,6 +734,14 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                         )}
                     </button>
                 </form>
+                {/* Popup for citation */}
+                {this.citationPopup.visible && (
+                    <div className="citation-popup">
+                        <p>
+                            <strong>Text from your document: </strong> {this.citationPopup.text}
+                        </p>
+                    </div>
+                )}
             </div>
         );
     }
diff --git a/src/client/views/nodes/ChatBox/ChunkManager.ts b/src/client/views/nodes/ChatBox/ChunkManager.ts
deleted file mode 100644
index 64c073640..000000000
--- a/src/client/views/nodes/ChatBox/ChunkManager.ts
+++ /dev/null
@@ -1,24 +0,0 @@
-import { SimplifiedChunk } from './types';
-
-class ChunkManager {
-    private chunks: SimplifiedChunk[];
-
-    constructor() {
-        this.chunks = [];
-    }
-
-    addChunk(chunk: SimplifiedChunk) {
-        this.chunks.push(chunk);
-    }
-
-    removeChunk(chunk: SimplifiedChunk) {
-        const index = this.chunks.indexOf(chunk);
-        if (index !== -1) {
-            this.chunks.splice(index, 1);
-        }
-    }
-
-    getChunks() {
-        return this.chunks;
-    }
-}
diff --git a/src/client/views/nodes/ChatBox/StreamedAnswerParser.ts b/src/client/views/nodes/ChatBox/StreamedAnswerParser.ts
deleted file mode 100644
index 3585cab4a..000000000
--- a/src/client/views/nodes/ChatBox/StreamedAnswerParser.ts
+++ /dev/null
@@ -1,73 +0,0 @@
-import { threadId } from 'worker_threads';
-
-enum ParserState {
-    Outside,
-    InGroundedText,
-    InNormalText,
-}
-
-export class StreamedAnswerParser {
-    private state: ParserState = ParserState.Outside;
-    private buffer: string = '';
-    private result: string = '';
-    private isStartOfLine: boolean = true;
-
-    public parse(char: string): string {
-        switch (this.state) {
-            case ParserState.Outside:
-                if (char === '<') {
-                    this.buffer = '<';
-                } else if (char === '>') {
-                    if (this.buffer.startsWith('<grounded_text')) {
-                        this.state = ParserState.InGroundedText;
-                    } else if (this.buffer.startsWith('<normal_text')) {
-                        this.state = ParserState.InNormalText;
-                    }
-                    this.buffer = '';
-                } else {
-                    this.buffer += char;
-                }
-                break;
-
-            case ParserState.InGroundedText:
-            case ParserState.InNormalText:
-                if (char === '<') {
-                    this.buffer = '<';
-                } else if (this.buffer.startsWith('</grounded_text') && char === '>') {
-                    this.state = ParserState.Outside;
-                    this.buffer = '';
-                } else if (this.buffer.startsWith('</normal_text') && char === '>') {
-                    this.state = ParserState.Outside;
-                    this.buffer = '';
-                } else if (this.buffer.startsWith('<')) {
-                    this.buffer += char;
-                } else {
-                    this.processChar(char);
-                }
-                break;
-        }
-
-        return this.result.trim();
-    }
-
-    private processChar(char: string): void {
-        if (this.isStartOfLine && char === ' ') {
-            // Skip leading spaces
-            return;
-        }
-        if (char === '\n') {
-            this.result += char;
-            this.isStartOfLine = true;
-        } else {
-            this.result += char;
-            this.isStartOfLine = false;
-        }
-    }
-
-    public reset(): void {
-        this.state = ParserState.Outside;
-        this.buffer = '';
-        this.result = '';
-        this.isStartOfLine = true;
-    }
-}
diff --git a/src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts b/src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts
new file mode 100644
index 000000000..79b53b0a3
--- /dev/null
+++ b/src/client/views/nodes/ChatBox/response_parsers/AnswerParser.ts
@@ -0,0 +1,125 @@
+import { ASSISTANT_ROLE, AssistantMessage, Citation, CHUNK_TYPE, TEXT_TYPE, getChunkType, ProcessingInfo } from '../types';
+import { v4 as uuid } from 'uuid';
+
+export class AnswerParser {
+    static parse(xml: string, processingInfo: ProcessingInfo[]): AssistantMessage {
+        const answerRegex = /<answer>([\s\S]*?)<\/answer>/;
+        const citationsRegex = /<citations>([\s\S]*?)<\/citations>/;
+        const citationRegex = /<citation index="([^"]+)" chunk_id="([^"]+)" type="([^"]+)">([\s\S]*?)<\/citation>/g;
+        const followUpQuestionsRegex = /<follow_up_questions>([\s\S]*?)<\/follow_up_questions>/;
+        const questionRegex = /<question>(.*?)<\/question>/g;
+        const groundedTextRegex = /<grounded_text citation_index="([^"]+)">([\s\S]*?)<\/grounded_text>/g;
+        const normalTextRegex = /<normal_text>([\s\S]*?)<\/normal_text>/g;
+        const loopSummaryRegex = /<loop_summary>([\s\S]*?)<\/loop_summary>/;
+
+        const answerMatch = answerRegex.exec(xml);
+        const citationsMatch = citationsRegex.exec(xml);
+        const followUpQuestionsMatch = followUpQuestionsRegex.exec(xml);
+        const loopSummaryMatch = loopSummaryRegex.exec(xml);
+
+        if (!answerMatch) {
+            throw new Error('Invalid XML: Missing <answer> tag.');
+        }
+
+        let rawTextContent = answerMatch[1].trim();
+        let content: AssistantMessage['content'] = [];
+        let citations: Citation[] = [];
+        let contentIndex = 0;
+
+        // Remove citations and follow-up questions from rawTextContent
+        if (citationsMatch) {
+            rawTextContent = rawTextContent.replace(citationsMatch[0], '').trim();
+        }
+        if (followUpQuestionsMatch) {
+            rawTextContent = rawTextContent.replace(followUpQuestionsMatch[0], '').trim();
+        }
+        if (loopSummaryMatch) {
+            rawTextContent = rawTextContent.replace(loopSummaryMatch[0], '').trim();
+        }
+
+        // Parse citations
+        let citationMatch;
+        const citationMap = new Map<string, string>();
+        if (citationsMatch) {
+            const citationsContent = citationsMatch[1];
+            while ((citationMatch = citationRegex.exec(citationsContent)) !== null) {
+                const [_, index, chunk_id, type, direct_text] = citationMatch;
+                const citation_id = uuid();
+                citationMap.set(index, citation_id);
+                citations.push({
+                    direct_text: direct_text.trim(),
+                    type: getChunkType(type),
+                    chunk_id,
+                    citation_id,
+                });
+            }
+        }
+
+        rawTextContent = rawTextContent.replace(normalTextRegex, '$1');
+
+        // Parse text content (normal and grounded)
+        let lastIndex = 0;
+        let match;
+
+        while ((match = groundedTextRegex.exec(rawTextContent)) !== null) {
+            const [fullMatch, citationIndex, groundedText] = match;
+
+            // Add normal text that is before the grounded text
+            if (match.index > lastIndex) {
+                const normalText = rawTextContent.slice(lastIndex, match.index).trim();
+                if (normalText) {
+                    content.push({
+                        index: contentIndex++,
+                        type: TEXT_TYPE.NORMAL,
+                        text: normalText,
+                        citation_ids: null,
+                    });
+                }
+            }
+
+            // Add grounded text
+            const citation_ids = citationIndex.split(',').map(index => citationMap.get(index) || '');
+            content.push({
+                index: contentIndex++,
+                type: TEXT_TYPE.GROUNDED,
+                text: groundedText.trim(),
+                citation_ids,
+            });
+
+            lastIndex = match.index + fullMatch.length;
+        }
+
+        // Add any remaining normal text after the last grounded text
+        if (lastIndex < rawTextContent.length) {
+            const remainingText = rawTextContent.slice(lastIndex).trim();
+            if (remainingText) {
+                content.push({
+                    index: contentIndex++,
+                    type: TEXT_TYPE.NORMAL,
+                    text: remainingText,
+                    citation_ids: null,
+                });
+            }
+        }
+
+        let followUpQuestions: string[] = [];
+        if (followUpQuestionsMatch) {
+            const questionsText = followUpQuestionsMatch[1];
+            let questionMatch;
+            while ((questionMatch = questionRegex.exec(questionsText)) !== null) {
+                followUpQuestions.push(questionMatch[1].trim());
+            }
+        }
+
+        const assistantResponse: AssistantMessage = {
+            role: ASSISTANT_ROLE.ASSISTANT,
+            content,
+            follow_up_questions: followUpQuestions,
+            citations,
+            processing_info: processingInfo,
+            loop_summary: loopSummaryMatch ? loopSummaryMatch[1].trim() : undefined,
+        };
+
+        return assistantResponse;
+    }
+}
diff --git a/src/client/views/nodes/ChatBox/response_parsers/StreamedAnswerParser.ts b/src/client/views/nodes/ChatBox/response_parsers/StreamedAnswerParser.ts
new file mode 100644
index 000000000..3585cab4a
--- /dev/null
+++ b/src/client/views/nodes/ChatBox/response_parsers/StreamedAnswerParser.ts
@@ -0,0 +1,73 @@
+import { threadId } from 'worker_threads';
+
+enum ParserState {
+    Outside,
+    InGroundedText,
+    InNormalText,
+}
+
+export class StreamedAnswerParser {
+    private state: ParserState = ParserState.Outside;
+    private buffer: string = '';
+    private result: string = '';
+    private isStartOfLine: boolean = true;
+
+    public parse(char: string): string {
+        switch (this.state) {
+            case ParserState.Outside:
+                if (char === '<') {
+                    this.buffer = '<';
+                } else if (char === '>') {
+                    if (this.buffer.startsWith('<grounded_text')) {
+                        this.state = ParserState.InGroundedText;
+                    } else if (this.buffer.startsWith('<normal_text')) {
+                        this.state = ParserState.InNormalText;
+                    }
+                    this.buffer = '';
+                } else {
+                    this.buffer += char;
+                }
+                break;
+
+            case ParserState.InGroundedText:
+            case ParserState.InNormalText:
+                if (char === '<') {
+                    this.buffer = '<';
+                } else if (this.buffer.startsWith('</grounded_text') && char === '>') {
+                    this.state = ParserState.Outside;
+                    this.buffer = '';
+                } else if (this.buffer.startsWith('</normal_text') && char === '>') {
+                    this.state = ParserState.Outside;
+                    this.buffer = '';
+                } else if (this.buffer.startsWith('<')) {
+                    this.buffer += char;
+                } else {
+                    this.processChar(char);
+                }
+                break;
+        }
+
+        return this.result.trim();
+    }
+
+    private processChar(char: string): void {
+        if (this.isStartOfLine && char === ' ') {
+            // Skip leading spaces
+            return;
+        }
+        if (char === '\n') {
+            this.result += char;
+            this.isStartOfLine = true;
+        } else {
+            this.result += char;
+            this.isStartOfLine = false;
+        }
+    }
+
+    public reset(): void {
+        this.state = ParserState.Outside;
+        this.buffer = '';
+        this.result = '';
+        this.isStartOfLine = true;
+    }
+}
diff --git a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts
index b5145c1f7..cc3b1ccd5 100644
--- a/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/ChatBox/vectorstore/Vectorstore.ts
@@ -2,47 +2,55 @@ import { Pinecone, Index, IndexList, PineconeRecord, RecordMetadata, QueryRespon
 import { CohereClient } from 'cohere-ai';
 import { EmbedResponse } from 'cohere-ai/api';
 import dotenv from 'dotenv';
-import axios from 'axios';
-import { SimplifiedChunk } from '../types';
-
 import { RAGChunk, AI_Document, CHUNK_TYPE } from '../types';
 import { Doc } from '../../../../../fields/Doc';
-import { DocData } from '../../../../../fields/DocSymbols';
 import { CsvCast, PDFCast, StrCast } from '../../../../../fields/Types';
 import { Networking } from '../../../../Network';
 
 dotenv.config();
 
+/**
+ * The Vectorstore class integrates with Pinecone for vector-based document indexing and retrieval,
+ * and Cohere for text embedding. It handles AI document management, uploads, and query-based retrieval.
+ */
 export class Vectorstore {
-    private pinecone: Pinecone;
-    private index!: Index;
-    private cohere: CohereClient;
-    private indexName: string = 'pdf-chatbot';
-    private _id: string;
-    private _doc_ids: string[] = [];
+    private pinecone: Pinecone; // Pinecone client for managing the vector index.
+    private index!: Index; // The specific Pinecone index used for document chunks.
+    private cohere: CohereClient; // Cohere client for generating embeddings.
+    private indexName: string = 'pdf-chatbot'; // Default name for the index.
+    private _id: string; // Unique ID for the Vectorstore instance.
+    private _doc_ids: string[] = []; // List of document IDs handled by this instance.
 
-    documents: AI_Document[] = [];
+    documents: AI_Document[] = []; // Store the documents indexed in the vectorstore.
 
+    /**
+     * Constructor initializes the Pinecone and Cohere clients, sets up the document ID list,
+     * and initializes the Pinecone index.
+     * @param id The unique identifier for the vectorstore instance.
+     * @param doc_ids A function that returns a list of document IDs.
+     */
     constructor(id: string, doc_ids: () => string[]) {
         const pineconeApiKey = process.env.PINECONE_API_KEY;
         if (!pineconeApiKey) {
             throw new Error('PINECONE_API_KEY is not defined.');
         }
 
-        this.pinecone = new Pinecone({
-            apiKey: pineconeApiKey,
-        });
-        this.cohere = new CohereClient({
-            token: process.env.COHERE_API_KEY,
-        });
+        // Initialize Pinecone and Cohere clients with API keys from the environment.
+        this.pinecone = new Pinecone({ apiKey: pineconeApiKey });
+        this.cohere = new CohereClient({ token: process.env.COHERE_API_KEY });
         this._id = id;
         this._doc_ids = doc_ids();
         this.initializeIndex();
     }
 
+    /**
+     * Initializes the Pinecone index by checking if it exists, and creating it if not.
+     * The index is set to use the cosine metric for vector similarity.
+     */
     private async initializeIndex() {
         const indexList: IndexList = await this.pinecone.listIndexes();
 
+        // Check if the index already exists, otherwise create it.
         if (!indexList.indexes?.some(index => index.name === this.indexName)) {
             await this.pinecone.createIndex({
                 name: this.indexName,
@@ -57,62 +65,76 @@ export class Vectorstore {
             });
         }
 
+        // Set the index for future use.
         this.index = this.pinecone.Index(this.indexName);
     }
 
+    /**
+     * Adds an AI document to the vectorstore. This method handles document chunking, uploading to the
+     * vectorstore, and updating the progress for long-running tasks like file uploads.
+     * @param doc The document to be added to the vectorstore.
+     * @param progressCallback Callback to update the progress of the upload.
+     */
     async addAIDoc(doc: Doc, progressCallback: (progress: number, step: string) => void) {
         console.log('Adding AI Document:', doc);
         const ai_document_status: string = StrCast(doc.ai_document_status);
 
-        if (ai_document_status !== undefined && ai_document_status !== null && ai_document_status.trim() !== '' && ai_document_status !== '{}') {
+        // Skip if the document is already in progress or completed.
+        if (ai_document_status !== undefined && ai_document_status.trim() !== '' && ai_document_status !== '{}') {
             if (ai_document_status === 'IN PROGRESS') {
                 console.log('Already in progress.');
                 return;
             }
-            if (!this._doc_ids.includes(StrCast(doc.ai_doc_id))) this._doc_ids.push(StrCast(doc.ai_doc_id));
+            if (!this._doc_ids.includes(StrCast(doc.ai_doc_id))) {
+                this._doc_ids.push(StrCast(doc.ai_doc_id));
+            }
         } else {
+            // Start processing the document.
             doc.ai_document_status = 'PROGRESS';
             console.log(doc);
+
+            // Get the local file path (CSV or PDF).
             const local_file_path: string = CsvCast(doc.data)?.url?.pathname ?? PDFCast(doc.data)?.url?.pathname;
             console.log('Local File Path:', local_file_path);
 
             if (local_file_path) {
                 console.log('Creating AI Document...');
-                // Start the document creation process
+                // Start the document creation process by sending the file to the server.
                 const { jobId } = await Networking.PostToServer('/createDocument', { file_path: local_file_path });
 
-                // Poll the server for progress updates
+                // Poll the server for progress updates.
                 let inProgress: boolean = true;
                 let result: any = null;
                 while (inProgress) {
-                    await new Promise(resolve => setTimeout(resolve, 2000)); // Polling interval
+                    // Polling interval for status updates.
+                    await new Promise(resolve => setTimeout(resolve, 2000));
 
+                    // Check if the job is completed.
                     const resultResponse = await Networking.FetchFromServer(`/getResult/${jobId}`);
                     const resultResponseJson = JSON.parse(resultResponse);
-                    //console.log('Result Response:', resultResponseJson);
                     if (resultResponseJson.status === 'completed') {
                         console.log('Result here:', resultResponseJson);
                         result = resultResponseJson;
                         break;
                     }
 
+                    // Fetch progress information and update the progress callback.
                     const progressResponse = await Networking.FetchFromServer(`/getProgress/${jobId}`);
                     const progressResponseJson = JSON.parse(progressResponse);
-                    //console.log('Progress Response:', progressResponseJson);
-
                     if (progressResponseJson) {
-                        console.log('Progress:', progressResponseJson);
                         const progress = progressResponseJson.progress;
                         const step = progressResponseJson.step;
                         progressCallback(progress, step);
                     }
                 }
 
-                // Process the final document result
+                // Once completed, process the document and add it to the vectorstore.
                 console.log('Document JSON:', result);
                 this.documents.push(result);
-                await this.indexDocument(JSON.parse(JSON.stringify(result, (key, value) => (value === null || value === undefined ? undefined : value))));
+                await this.indexDocument(result);
                 console.log(`Document added: ${result.file_name}`);
+
+                // Update document metadata such as summary, purpose, and vectorstore ID.
                 doc.summary = result.summary;
                 doc.ai_doc_id = result.doc_id;
                 this._doc_ids.push(result.doc_id);
@@ -128,6 +150,7 @@ export class Vectorstore {
                     doc.chunk_simpl = JSON.stringify({ chunks: [] });
                 }
 
+                // Process each chunk of the document and update the document's chunk_simpl field.
                 result.chunks.forEach((chunk: RAGChunk) => {
                     const chunkToAdd = {
                         chunkId: chunk.id,
@@ -142,27 +165,41 @@ export class Vectorstore {
                     doc.chunk_simpl = JSON.stringify(new_chunk_simpl);
                 });
 
+                // Mark the document status as completed.
                 doc.ai_document_status = 'COMPLETED';
             }
         }
     }
 
+    /**
+     * Indexes the processed document by uploading the document's vector chunks to the Pinecone index.
+     * @param document The processed document containing its chunks and metadata.
+     */
     private async indexDocument(document: any) {
         console.log('Uploading vectors to content namespace...');
-        const pineconeRecords: PineconeRecord[] = (document.chunks as RAGChunk[]).map(
-            chunk =>
-                ({
-                    id: chunk.id,
-                    values: chunk.values,
-                    metadata: { ...chunk.metadata } as RecordMetadata,
-                }) as PineconeRecord
-        );
+
+        // Prepare Pinecone records for each chunk in the document.
+        const pineconeRecords: PineconeRecord[] = (document.chunks as RAGChunk[]).map(chunk => ({
+            id: chunk.id,
+            values: chunk.values,
+            metadata: { ...chunk.metadata } as RecordMetadata,
+        }));
+
+        // Upload the records to Pinecone.
         await this.index.upsert(pineconeRecords);
     }
 
-    async retrieve(query: string, topK: number = 10): Promise {
+    /**
+     * Retrieves the top K document chunks relevant to the user's query.
+     * This involves embedding the query using Cohere, then querying Pinecone for matching vectors.
+     * @param query The search query string.
+     * @param topK The number of top results to return (default is 10).
+     * @returns A list of document chunks that match the query.
+     */
+    async retrieve(query: string, topK: number = 10): Promise<RAGChunk[]> {
         console.log(`Retrieving chunks for query: ${query}`);
         try {
+            // Generate an embedding for the query using Cohere.
             const queryEmbeddingResponse: EmbedResponse = await this.cohere.embed({
                 texts: [query],
                 model: 'embed-english-v3.0',
@@ -171,6 +208,7 @@ export class Vectorstore {
 
             let queryEmbedding: number[];
 
+            // Extract the embedding from the response.
             if (Array.isArray(queryEmbeddingResponse.embeddings)) {
                 queryEmbedding = queryEmbeddingResponse.embeddings[0];
             } else if (queryEmbeddingResponse.embeddings && 'embeddings' in queryEmbeddingResponse.embeddings) {
@@ -183,6 +221,7 @@ export class Vectorstore {
                 throw new Error('Query embedding is not an array');
             }
 
+            // Query the Pinecone index using the embedding and filter by document IDs.
             const queryResponse: QueryResponse = await this.index.query({
                 vector: queryEmbedding,
                 filter: {
@@ -193,12 +232,22 @@ export class Vectorstore {
                 includeMetadata: true,
             });
 
+            // Map the results into RAGChunks and return them.
             return queryResponse.matches.map(
                 match =>
                     ({
                         id: match.id,
                         values: match.values as number[],
-                        metadata: match.metadata as { text: string; type: string; original_document: string; file_path: string; doc_id: string; location: string; start_page: number; end_page: number },
+                        metadata: match.metadata as {
+                            text: string;
+                            type: string;
+                            original_document: string;
+                            file_path: string;
+                            doc_id: string;
+                            location: string;
+                            start_page: number;
+                            end_page: number;
+                        },
                     }) as RAGChunk
             );
         } catch (error) {
-- 
cgit v1.2.3-70-g09d2