flashcards w assistant finally workgit add -A

author: alyssaf16 <alyssa_feinberg@brown.edu> 2024-11-12 01:01:23 -0500
committer: alyssaf16 <alyssa_feinberg@brown.edu> 2024-11-12 01:01:23 -0500
commit: c358fba1ee2aa54a97373d07e7b218c74dfd9bf0 (patch)
tree: a3f7dea094e70a5f9549b43c21eadc23e0be8ab6 /src
parent: 1e4909f04fdcc4c0b3a60b8c75e8b687e2b63b8e (diff)
7 files changed, 450 insertions, 27 deletions
diff --git a/src/client/views/nodes/chatbot/agentsystem/Agent.ts b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
index 05d13d1db..0b0e211eb 100644
--- a/src/client/views/nodes/chatbot/agentsystem/Agent.ts
+++ b/src/client/views/nodes/chatbot/agentsystem/Agent.ts
@@ -69,9 +69,9 @@ export class Agent {
         // Define available tools for the assistant
         this.tools = {
             calculate: new CalculateTool(),
-            rag: new RAGTool(this.vectorstore),
+            // rag: new RAGTool(this.vectorstore),
             dataAnalysis: new DataAnalysisTool(csvData),
-            websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc),
+            // websiteInfoScraper: new WebsiteInfoScraperTool(addLinkedUrlDoc),
             searchTool: new SearchTool(addLinkedUrlDoc),
             createCSV: new CreateCSVTool(createCSVInDash),
             noTool: new NoTool(),
diff --git a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
index 68d4383e7..95f3fbc5d 100644
--- a/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
+++ b/src/client/views/nodes/chatbot/chatboxcomponents/ChatBox.tsx
@@ -463,9 +463,11 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
                 doc = DocCast(Docs.Create.TextDocument(data, options));
                 break;
             case 'flashcard':
-                // doc = this.createSingleFlashcard(data, options);
                 doc = this.createFlashcard(data, options);
                 break;
+            case 'deck':
+                doc = this.createDeck(data, options);
+                break;
             case 'image':
                 doc = DocCast(Docs.Create.ImageDocument(data, options));
                 break;
@@ -551,31 +553,82 @@ export class ChatBox extends ViewBoxAnnotatableComponent<FieldViewProps>() {
         await DocumentManager.Instance.showDocument(doc, { willZoomCentered: true }, () => {});
     };
 
-    // TODO: DELEGATE TO DIFFERENT CLASS
     @action
-    createFlashcard = (data: string, options: DocumentOptions) => {
+    createDeck = (data: any, options: DocumentOptions) => {
         const flashcardDeck: Doc[] = [];
-        const parsedItems: { [key: string]: string } = JSON.parse(data);
-        Object.entries(parsedItems).forEach(([key, val]) => {
-            console.log('key' + key);
-            console.log('key' + val);
-
-            const side1 = Docs.Create.CenteredTextCreator('question', key, options);
-            const side2 = Docs.Create.CenteredTextCreator('answer', val, options);
-            const doc = DocCast(Docs.Create.FlashcardDocument(data, side1, side2, { _width: 300, _height: 300 }));
-            this._props.addDocument?.(doc);
-            flashcardDeck.push(doc);
+
+        // Parse `data` only if it’s a string
+        const deckData = typeof data === 'string' ? JSON.parse(data) : data;
+        console.log('Parsed Deck Data:', deckData);
+        const flashcardArray = Array.isArray(deckData) ? deckData : Object.values(deckData);
+        console.log(typeof flashcardArray);
+        // Process each flashcard document in the `deckData` array
+        flashcardArray.forEach(doc => {
+            const flashcardDoc = this.createFlashcard(doc, options);
+            if (flashcardDoc) flashcardDeck.push(flashcardDoc);
         });
-        const col = DocCast(
+
+        // Create a carousel to contain the flashcard deck
+        const carouselDoc = DocCast(
             Docs.Create.CarouselDocument(flashcardDeck, {
-                title: options.title,
-                _width: 300,
-                _height: 300,
+                title: options.title || 'Flashcard Deck',
+                _width: options._width || 300,
+                _height: options._height || 300,
                 _layout_fitWidth: false,
                 _layout_autoHeight: true,
             })
         );
-        return col;
+
+        return carouselDoc;
+    };
+    @action
+    createFlashcard = (data: any, options: any) => {
+        // const flashcardDeck: Doc[] = [];
+
+        // Process each flashcard item in the data array
+        // const p = JSON.parse(data);
+        const deckData = typeof data === 'string' ? JSON.parse(data) : data;
+        const flashcardArray = Array.isArray(deckData) ? deckData : Object.values(deckData)[2];
+        console.log(typeof flashcardArray);
+
+        const [front, back] = flashcardArray;
+
+        // Check that both front and back are text documents
+        console.log('DATA' + data);
+        console.log('front' + front);
+        console.log('back' + back);
+        console.log(front.doc_type);
+        console.log(back.doc_type);
+        if (front.doc_type === 'text' && back.doc_type === 'text') {
+            const sideOptions: DocumentOptions = {
+                backgroundColor: options.backgroundColor,
+                _width: options._width,
+                _height: options._height,
+            };
+
+            // Create front and back text documents
+            const side1 = Docs.Create.CenteredTextCreator(front.title, front.data, sideOptions);
+            const side2 = Docs.Create.CenteredTextCreator(back.title, back.data, sideOptions);
+
+            // Create the flashcard document with both sides
+            const flashcardDoc = DocCast(Docs.Create.FlashcardDocument(data.title, side1, side2, sideOptions));
+            return flashcardDoc;
+            // this._props.addDocument?.(flashcardDoc);
+            // flashcardDeck.push(flashcardDoc);
+        }
+
+        // Create a carousel to contain the flashcard deck
+        // const carouselDoc = DocCast(
+        //     Docs.Create.CarouselDocument(flashcardDeck, {
+        //         title: options.title || data.title,
+        //         _width: data.width || 300,
+        //         _height: data.height || 300,
+        //         _layout_fitWidth: false,
+        //         _layout_autoHeight: true,
+        //     })
+        // );
+
+        // return carouselDoc;
     };
 
     /**
diff --git a/src/client/views/nodes/chatbot/tools/CreateDocumentTool.ts b/src/client/views/nodes/chatbot/tools/CreateDocumentTool.ts
index b14a57779..ebe0448aa 100644
--- a/src/client/views/nodes/chatbot/tools/CreateDocumentTool.ts
+++ b/src/client/views/nodes/chatbot/tools/CreateDocumentTool.ts
@@ -6,16 +6,85 @@ import { DocumentOptions } from '../../../../documents/Documents';
 
 const example = [
     {
+        doc_type: 'deck',
+        title: 'Chemistry',
+        data: [
+            {
+                doc_type: 'flashcard',
+                title: 'Photosynthesis',
+                data: [
+                    {
+                        doc_type: 'text',
+                        title: 'front_Photosynthesis',
+                        data: 'What is photosynthesis?',
+                        width: 300,
+                        height: 300,
+                    },
+                    {
+                        doc_type: 'text',
+                        title: 'back_photosynthesis',
+                        data: 'The process by which plants make food.',
+                        width: 300,
+                        height: 300,
+                    },
+                ],
+                backgroundColor: '#00ff00',
+                width: 300,
+                height: 300,
+            },
+            {
+                doc_type: 'flashcard',
+                title: 'Photosynthesis',
+                data: [
+                    {
+                        doc_type: 'text',
+                        title: 'front_Photosynthesis',
+                        data: 'What is photosynthesis?',
+                        width: 300,
+                        height: 300,
+                    },
+                    {
+                        doc_type: 'text',
+                        title: 'back_photosynthesis',
+                        data: 'The process by which plants make food.',
+                        width: 300,
+                        height: 300,
+                    },
+                ],
+                backgroundColor: '#00ff00',
+                width: 300,
+                height: 300,
+            },
+        ],
+        backgroundColor: '#00ff00',
+        width: 600,
+        height: 600,
+    },
+    {
+        doc_type: 'web',
+        title: 'Brown University Wikipedia',
+        data: 'https://en.wikipedia.org/wiki/Brown_University',
+        width: 300,
+        height: 300,
+    },
+    {
         doc_type: 'collection',
         title: 'Science Collection',
         data: [
             {
+                doc_type: 'web',
+                title: 'Brown University Wikipedia',
+                data: 'https://en.wikipedia.org/wiki/Brown_University',
+                width: 300,
+                height: 300,
+            },
+            {
                 doc_type: 'flashcard',
                 title: 'Photosynthesis',
                 data: [
                     {
                         doc_type: 'text',
-                        title: 'Front Photosynthesis',
+                        title: 'front_Photosynthesis',
                         data: 'What is photosynthesis?',
                         width: 300,
                         height: 300,
@@ -72,9 +141,9 @@ const docInstructions = {
     },
     text: 'Provide text content as a plain string. Example: "This is a standalone text document."',
     flashcard: 'Two text documents with content for the front and back.',
-    flashcardDeck: 'A collection of flashcards under a common theme.',
+    deck: 'A decks data is an array of flashcards.',
     image: 'A URL to an image for data. Example: "https://example.com/image.jpg"',
-    web: 'A URL to a webpage. Example: "https://example.com"',
+    web: 'A URL to a webpage. Example: https://en.wikipedia.org/wiki/Brown_University',
     equation: 'Create a equation document.',
     noteboard: 'Create a noteboard document',
     comparison: 'Create a comparison document',
@@ -148,12 +217,13 @@ export class CreateDocTool extends BaseTool<CreateListDocToolParamsType> {
     constructor(addLinkedDoc: (doc_type: string, data: string, options: DocumentOptions, id: string) => void) {
         super(
             'createDoc',
-            'Creates one or more documents that best fit users request',
+            'Creates one or more documents that best fit users request with input following the example below. Or creates a dashboard of many documents/collections.',
             createListDocToolParams,
-            'Modify the data parameter and include title (and optionally color) for the document.',
+            'Modify the data parameter and include title (and optionally color) for the document. Web doc data type must be url from search tool.',
             'Creates one or more documents represented by an array of strings with the provided content based on the instructions ' +
                 docInstructions +
-                'Use if the user wants to create something that aligns with a document type in dash like a flashcard, flashcard deck/stack, or textbox or text document of some sort. Can use after a search or other tool to save information.'
+                'Use if the user wants to create something that aligns with a document type in dash like a flashcard, flashcard deck/stack, or textbox or text document of some sort. Can use after the search tool to save information.' +
+                'When user asks for dashboard, create many documents/collections with different colors and texts while listening to their preferences, after using search tool to create a dashboard.'
         );
         this._addLinkedDoc = addLinkedDoc;
     }
diff --git a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
index 5ed784559..cf7fa0ff3 100644
--- a/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
+++ b/src/client/views/nodes/chatbot/vectorstore/Vectorstore.ts
@@ -44,7 +44,7 @@ export class Vectorstore {
 
         // Initialize Pinecone and Cohere clients with API keys from the environment.
         this.pinecone = new Pinecone({ apiKey: pineconeApiKey });
-        this.cohere = new CohereClient({ token: process.env.COHERE_API_KEY });
+        // this.cohere = new CohereClient({ token: process.env.COHERE_API_KEY });
         this._id = id;
         this._doc_ids = doc_ids();
         this.initializeIndex();
diff --git a/src/server/flashcard/labels.py b/src/server/flashcard/labels.py
new file mode 100644
index 000000000..546fc4bd3
--- /dev/null
+++ b/src/server/flashcard/labels.py
@@ -0,0 +1,285 @@
+import base64
+import numpy as np
+import base64
+import easyocr
+import sys
+from PIL import Image
+from io import BytesIO
+import requests
+import json
+import numpy as np
+
+class BoundingBoxUtils:
+    """Utility class for bounding box operations and OCR result corrections."""
+
+    @staticmethod
+    def is_close(box1, box2, x_threshold=20, y_threshold=20):
+        """
+        Determines if two bounding boxes are horizontally and vertically close.
+
+        Parameters:
+        box1, box2 (list): The bounding boxes to compare.
+        x_threshold (int): The threshold for horizontal proximity.
+        y_threshold (int): The threshold for vertical proximity.
+
+        Returns:
+        bool: True if boxes are close, False otherwise.
+        """
+        horizontally_close = (abs(box1[2] - box2[0]) < x_threshold or  # Right edge of box1 and left edge of box2
+                              abs(box2[2] - box1[0]) < x_threshold or  # Right edge of box2 and left edge of box1
+                              abs(box1[2] - box2[2]) < x_threshold or
+                              abs(box2[0] - box1[0]) < x_threshold)
+        
+        vertically_close = (abs(box1[3] - box2[1]) < y_threshold or  # Bottom edge of box1 and top edge of box2
+                            abs(box2[3] - box1[1]) < y_threshold or
+                            box1[1] == box2[1] or box1[3] == box2[3])
+        
+        return horizontally_close and vertically_close
+
+    @staticmethod
+    def adjust_bounding_box(bbox, original_text, corrected_text):
+        """
+        Adjusts a bounding box based on differences in text length.
+
+        Parameters:
+        bbox (list): The original bounding box coordinates.
+        original_text (str): The original text detected by OCR.
+        corrected_text (str): The corrected text after cleaning.
+
+        Returns:
+        list: The adjusted bounding box.
+        """
+        if not bbox or len(bbox) != 4:
+            return bbox
+
+        # Adjust the x-coordinates slightly to account for text correction
+        x_adjustment = 5
+        adjusted_bbox = [
+            [bbox[0][0] + x_adjustment, bbox[0][1]],
+            [bbox[1][0], bbox[1][1]],
+            [bbox[2][0] + x_adjustment, bbox[2][1]],
+            [bbox[3][0], bbox[3][1]]
+        ]
+        return adjusted_bbox
+
+    @staticmethod
+    def correct_ocr_results(results):
+        """
+        Corrects common OCR misinterpretations in the detected text and adjusts bounding boxes accordingly.
+
+        Parameters:
+        results (list): A list of OCR results, each containing bounding box, text, and confidence score.
+
+        Returns:
+        list: Corrected OCR results with adjusted bounding boxes.
+        """
+        corrections = {
+            "~": "",  # Replace '~' with empty string
+            "-": ""   # Replace '-' with empty string
+        }
+
+        corrected_results = []
+        for (bbox, text, prob) in results:
+            corrected_text = ''.join(corrections.get(char, char) for char in text)
+            adjusted_bbox = BoundingBoxUtils.adjust_bounding_box(bbox, text, corrected_text)
+            corrected_results.append((adjusted_bbox, corrected_text, prob))
+
+        return corrected_results
+
+    @staticmethod
+    def convert_to_json_serializable(data):
+        """
+        Converts a list containing various types, including numpy types, to a JSON-serializable format.
+
+        Parameters:
+        data (list): A list containing numpy or other non-serializable types.
+
+        Returns:
+        list: A JSON-serializable version of the input list.
+        """
+        def convert_element(element):
+            if isinstance(element, list):
+                return [convert_element(e) for e in element]
+            elif isinstance(element, tuple):
+                return tuple(convert_element(e) for e in element)
+            elif isinstance(element, np.integer):
+                return int(element)
+            elif isinstance(element, np.floating):
+                return float(element)
+            elif isinstance(element, np.ndarray):
+                return element.tolist()
+            else:
+                return element
+        
+        return convert_element(data)
+
+class ImageLabelProcessor:
+    """Class to process images and perform OCR with EasyOCR."""
+
+    VERTICAL_THRESHOLD = 20
+    HORIZONTAL_THRESHOLD = 8
+
+    def __init__(self, img_source, source_type, smart_mode):
+        self.img_source = img_source
+        self.source_type = source_type
+        self.smart_mode = smart_mode
+        self.img_val = self.load_image()
+
+    def load_image(self):
+        """Load image from either a base64 string or URL."""
+        if self.source_type == 'drag':
+            return self._load_base64_image()
+        else:
+            return self._load_url_image()
+
+    def _load_base64_image(self):
+        """Decode and save the base64 image."""
+        base64_string = self.img_source
+        if base64_string.startswith("data:image"):
+            base64_string = base64_string.split(",")[1]
+
+
+        # Decode the base64 string
+        image_data = base64.b64decode(base64_string)
+        image = Image.open(BytesIO(image_data)).convert('RGB')
+        image.save("temp_image.jpg")
+        return "temp_image.jpg"
+
+    def _load_url_image(self):
+        """Download image from URL and return it in byte format."""
+        url = self.img_source
+        response = requests.get(url)
+        image = Image.open(BytesIO(response.content)).convert('RGB')
+
+        image_bytes = BytesIO()
+        image.save(image_bytes, format='PNG')
+        return image_bytes.getvalue()
+
+    def process_image(self):
+        """Process the image and return the OCR results."""
+        if self.smart_mode:
+            return self._process_smart_mode()
+        else:
+            return self._process_standard_mode()
+
+    def _process_smart_mode(self):
+        """Process the image in smart mode using EasyOCR."""
+        reader = easyocr.Reader(['en'])
+        result = reader.readtext(self.img_val, detail=1, paragraph=True)
+
+        all_boxes = [bbox for bbox, text in result]
+        all_texts = [text for bbox, text in result]
+
+        response_data = {
+            'status': 'success',
+            'message': 'Data received',
+            'boxes': BoundingBoxUtils.convert_to_json_serializable(all_boxes),
+            'text': BoundingBoxUtils.convert_to_json_serializable(all_texts),
+        }
+
+        return response_data
+
+    def _process_standard_mode(self):
+        """Process the image in standard mode using EasyOCR."""
+        reader = easyocr.Reader(['en'])
+        results = reader.readtext(self.img_val)
+
+        filtered_results = BoundingBoxUtils.correct_ocr_results([
+            (bbox, text, prob) for bbox, text, prob in results if prob >= 0.7
+        ])
+
+        return self._merge_and_prepare_response(filtered_results)
+
+    def are_vertically_close(self, box1, box2):
+        """Check if two bounding boxes are vertically close."""
+        box1_bottom = max(box1[2][1], box1[3][1])
+        box2_top = min(box2[0][1], box2[1][1])
+        vertical_distance = box2_top - box1_bottom
+
+        box1_left = box1[0][0]
+        box2_left = box2[0][0]
+        box1_right = box1[1][0]
+        box2_right = box2[1][0]
+        hori_close = abs(box2_left - box1_left) <= self.HORIZONTAL_THRESHOLD or abs(box2_right - box1_right) <= self.HORIZONTAL_THRESHOLD
+
+        return vertical_distance <= self.VERTICAL_THRESHOLD and hori_close
+
+    def merge_boxes(self, boxes, texts):
+        """Merge multiple bounding boxes and their associated text."""
+        x_coords = []
+        y_coords = []
+
+        # Collect all x and y coordinates
+        for box in boxes:
+            for point in box:
+                x_coords.append(point[0])
+                y_coords.append(point[1])
+
+        # Create the merged bounding box
+        merged_box = [
+            [min(x_coords), min(y_coords)],
+            [max(x_coords), min(y_coords)],
+            [max(x_coords), max(y_coords)],
+            [min(x_coords), max(y_coords)]
+        ]
+
+        # Combine the texts
+        merged_text = ' '.join(texts)
+
+        return merged_box, merged_text
+
+    def _merge_and_prepare_response(self, filtered_results):
+        """Merge vertically close boxes and prepare the final response."""
+        current_boxes, current_texts = [], []
+        all_boxes, all_texts = [], []
+
+        for ind in range(len(filtered_results) - 1):
+            if not current_boxes:
+                current_boxes.append(filtered_results[ind][0])
+                current_texts.append(filtered_results[ind][1])
+
+            if self.are_vertically_close(filtered_results[ind][0], filtered_results[ind + 1][0]):
+                current_boxes.append(filtered_results[ind + 1][0])
+                current_texts.append(filtered_results[ind + 1][1])
+            else:
+                merged = self.merge_boxes(current_boxes, current_texts)
+                all_boxes.append(merged[0])
+                all_texts.append(merged[1])
+                current_boxes, current_texts = [], []
+
+        if current_boxes:
+            merged = self.merge_boxes(current_boxes, current_texts)
+            all_boxes.append(merged[0])
+            all_texts.append(merged[1])
+
+        if not current_boxes and filtered_results:
+            merged = self.merge_boxes([filtered_results[-1][0]], [filtered_results[-1][1]])
+            all_boxes.append(merged[0])
+            all_texts.append(merged[1])
+
+        response = {
+            'status': 'success',
+            'message': 'Data received',
+            'boxes': BoundingBoxUtils.convert_to_json_serializable(all_boxes),
+            'text': BoundingBoxUtils.convert_to_json_serializable(all_texts),
+        }
+
+        return response
+
+# Main execution function
+def labels():
+    """Main function to handle image OCR processing based on input arguments."""
+    source_type = sys.argv[2]
+    smart_mode = (sys.argv[3] == 'smart')
+    with open(sys.argv[1], 'r') as f:
+        img_source = f.read()
+    # Create ImageLabelProcessor instance
+    processor = ImageLabelProcessor(img_source, source_type, smart_mode)
+    response = processor.process_image()
+
+    # Print and return the response
+    print(response)
+    return response
+
+
+labels()
diff --git a/src/server/flashcard/requirements.txt b/src/server/flashcard/requirements.txt
new file mode 100644
index 000000000..eb92a819b
--- /dev/null
+++ b/src/server/flashcard/requirements.txt
@@ -0,0 +1,12 @@
+easyocr==1.7.1
+requests==2.32.3
+pillow==10.4.0
+numpy==1.26.4
+tqdm==4.66.4
+Werkzeug==3.0.3
+python-dateutil==2.9.0.post0
+six==1.16.0
+certifi==2024.6.2
+charset-normalizer==3.3.2
+idna==3.7
+urllib3==1.26.19
+\ No newline at end of file
diff --git a/src/server/flashcard/venv/pyvenv.cfg b/src/server/flashcard/venv/pyvenv.cfg
new file mode 100644
index 000000000..740014e00
--- /dev/null
+++ b/src/server/flashcard/venv/pyvenv.cfg
@@ -0,0 +1,3 @@
+home = /Library/Frameworks/Python.framework/Versions/3.10/bin
+include-system-site-packages = false
+version = 3.10.11
author	alyssaf16 <alyssa_feinberg@brown.edu>	2024-11-12 01:01:23 -0500
committer	alyssaf16 <alyssa_feinberg@brown.edu>	2024-11-12 01:01:23 -0500
commit	c358fba1ee2aa54a97373d07e7b218c74dfd9bf0 (patch)
tree	a3f7dea094e70a5f9549b43c21eadc23e0be8ab6 /src
parent	1e4909f04fdcc4c0b3a60b8c75e8b687e2b63b8e (diff)