5 files changed, 438 insertions, 89 deletions
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts
index 8447a4934..4d2068014 100644
--- a/src/server/ApiManagers/AssistantManager.ts
+++ b/src/server/ApiManagers/AssistantManager.ts
@@ -9,7 +9,7 @@
  */
 
 import { Readability } from '@mozilla/readability';
-import axios from 'axios';
+import axios, { AxiosResponse } from 'axios';
 import { spawn } from 'child_process';
 import * as fs from 'fs';
 import { writeFile } from 'fs';
@@ -23,6 +23,7 @@ import { AI_Document } from '../../client/views/nodes/chatbot/types/types';
 import { Method } from '../RouteManager';
 import { filesDirectory, publicDirectory } from '../SocketData';
 import ApiManager, { Registration } from './ApiManager';
+import { getServerPath } from '../../client/util/reportManager/reportManagerUtils';
 
 // Enumeration of directories where different file types are stored
 export enum Directory {
@@ -115,29 +116,79 @@ export default class AssistantManager extends ApiManager {
             },
         });
 
-        // Register Google Web Search Results API route
         register({
             method: Method.POST,
             subscription: '/getWebSearchResults',
             secureHandler: async ({ req, res }) => {
                 const { query, max_results } = req.body;
-                try {
-                    // Fetch search results using Google Custom Search API
-                    const response = await customsearch.cse.list({
+                const MIN_VALID_RESULTS_RATIO = 0.75; // 3/4 threshold
+                let startIndex = 1; // Start at the first result initially
+                let validResults: any[] = [];
+
+                const fetchSearchResults = async (start: number) => {
+                    return customsearch.cse.list({
                         q: query,
                         cx: process.env._CLIENT_GOOGLE_SEARCH_ENGINE_ID,
                         key: process.env._CLIENT_GOOGLE_API_KEY,
                         safe: 'active',
                         num: max_results,
+                        start, // This controls which result index the search starts from
                     });
+                };
+
+                const filterResultsByXFrameOptions = async (results: any[]) => {
+                    const filteredResults = await Promise.all(
+                        results.map(async result => {
+                            try {
+                                const urlResponse: AxiosResponse = await axios.head(result.url, { timeout: 5000 });
+                                const xFrameOptions = urlResponse.headers['x-frame-options'];
+                                if (xFrameOptions && xFrameOptions.toUpperCase() === 'SAMEORIGIN') {
+                                    return result;
+                                }
+                            } catch (error) {
+                                console.error(`Error checking x-frame-options for URL: ${result.url}`, error);
+                            }
+                            return null; // Exclude the result if it doesn't match
+                        })
+                    );
+                    return filteredResults.filter(result => result !== null); // Remove null results
+                };
 
-                    const results =
+                try {
+                    // Fetch initial search results
+                    let response = await fetchSearchResults(startIndex);
+                    let initialResults =
                         response.data.items?.map(item => ({
                             url: item.link,
                             snippet: item.snippet,
                         })) || [];
 
-                    res.send({ results });
+                    // Filter the initial results
+                    validResults = await filterResultsByXFrameOptions(initialResults);
+
+                    // If valid results are less than 3/4 of max_results, fetch more results
+                    while (validResults.length < max_results * MIN_VALID_RESULTS_RATIO) {
+                        // Increment the start index by the max_results to fetch the next set of results
+                        startIndex += max_results;
+                        response = await fetchSearchResults(startIndex);
+
+                        const additionalResults =
+                            response.data.items?.map(item => ({
+                                url: item.link,
+                                snippet: item.snippet,
+                            })) || [];
+
+                        const additionalValidResults = await filterResultsByXFrameOptions(additionalResults);
+                        validResults = [...validResults, ...additionalValidResults]; // Combine valid results
+
+                        // Break if no more results are available
+                        if (additionalValidResults.length === 0 || response.data.items?.length === 0) {
+                            break;
+                        }
+                    }
+
+                    // Return the filtered valid results
+                    res.send({ results: validResults.slice(0, max_results) }); // Limit the results to max_results
                 } catch (error) {
                     console.error('Error performing web search:', error);
                     res.status(500).send({
@@ -299,47 +350,16 @@ export default class AssistantManager extends ApiManager {
             method: Method.GET,
             subscription: '/getResult/:jobId',
             secureHandler: async ({ req, res }) => {
-                const { jobId } = req.params; // Get the job ID from the URL parameters
-                // Check if the job result is available
+                const { jobId } = req.params;
                 if (jobResults[jobId]) {
                     const result = jobResults[jobId] as AI_Document & { status: string };
 
-                    // If the result contains image or table chunks, save the base64 data as image files
                     if (result.chunks && Array.isArray(result.chunks)) {
-                        await Promise.all(
-                            result.chunks.map(chunk => {
-                                if (chunk.metadata && (chunk.metadata.type === 'image' || chunk.metadata.type === 'table')) {
-                                    const files_directory = '/files/chunk_images/';
-                                    const directory = path.join(publicDirectory, files_directory);
-
-                                    // Ensure the directory exists or create it
-                                    if (!fs.existsSync(directory)) {
-                                        fs.mkdirSync(directory);
-                                    }
-
-                                    const fileName = path.basename(chunk.metadata.file_path); // Get the file name from the path
-                                    const filePath = path.join(directory, fileName); // Create the full file path
-
-                                    // Check if the chunk contains base64 encoded data
-                                    if (chunk.metadata.base64_data) {
-                                        // Decode the base64 data and write it to a file
-                                        const buffer = Buffer.from(chunk.metadata.base64_data, 'base64');
-                                        fs.promises.writeFile(filePath, buffer).then(() => {
-                                            // Update the file path in the chunk's metadata
-                                            chunk.metadata.file_path = path.join(files_directory, fileName);
-                                            chunk.metadata.base64_data = undefined; // Remove the base64 data from the metadata
-                                        });
-                                    } else {
-                                        console.warn(`No base64_data found for chunk: ${fileName}`);
-                                    }
-                                }
-                            })
-                        );
                         result.status = 'completed';
                     } else {
                         result.status = 'pending';
                     }
-                    res.json(result); // Send the result back to the client
+                    res.json(result);
                 } else {
                     res.status(202).send({ status: 'pending' });
                 }
@@ -367,7 +387,7 @@ export default class AssistantManager extends ApiManager {
                         // If the chunk is an image or table, read the corresponding file and encode it as base64
                         if (chunk.metadata.type === 'image' || chunk.metadata.type === 'table') {
                             try {
-                                const filePath = serverPathToFile(Directory.chunk_images, chunk.metadata.file_path); // Get the file path
+                                const filePath = path.join(pathToDirectory(Directory.chunk_images), chunk.metadata.file_path); // Get the file path
                                 readFileAsync(filePath).then(imageBuffer => {
                                     const base64Image = imageBuffer.toString('base64'); // Convert the image to base64
 
@@ -445,10 +465,12 @@ function spawnPythonProcess(jobId: string, file_name: string, file_data: string)
     const requirementsPath = path.join(__dirname, '../chunker/requirements.txt');
     const pythonScriptPath = path.join(__dirname, '../chunker/pdf_chunker.py');
 
+    const outputDirectory = pathToDirectory(Directory.chunk_images);
+
     function runPythonScript() {
         const pythonPath = process.platform === 'win32' ? path.join(venvPath, 'Scripts', 'python') : path.join(venvPath, 'bin', 'python3');
 
-        const pythonProcess = spawn(pythonPath, [pythonScriptPath, jobId, file_name, file_data]);
+        const pythonProcess = spawn(pythonPath, [pythonScriptPath, jobId, file_name, file_data, outputDirectory]);
 
         let pythonOutput = '';
         let stderrOutput = '';
@@ -460,23 +482,30 @@ function spawnPythonProcess(jobId: string, file_name: string, file_data: string)
         pythonProcess.stderr.on('data', data => {
             stderrOutput += data.toString();
             const lines = stderrOutput.split('\n');
+            stderrOutput = lines.pop() || ''; // Save the last partial line back to stderrOutput
             lines.forEach(line => {
                 if (line.trim()) {
-                    try {
-                        const parsedOutput = JSON.parse(line);
-                        if (parsedOutput.job_id && parsedOutput.progress !== undefined) {
-                            jobProgress[parsedOutput.job_id] = {
-                                step: parsedOutput.step,
-                                progress: parsedOutput.progress,
-                            };
-                        } else if (parsedOutput.progress !== undefined) {
-                            jobProgress[jobId] = {
-                                step: parsedOutput.step,
-                                progress: parsedOutput.progress,
-                            };
+                    if (line.startsWith('PROGRESS:')) {
+                        const jsonString = line.substring('PROGRESS:'.length);
+                        try {
+                            const parsedOutput = JSON.parse(jsonString);
+                            if (parsedOutput.job_id && parsedOutput.progress !== undefined) {
+                                jobProgress[parsedOutput.job_id] = {
+                                    step: parsedOutput.step,
+                                    progress: parsedOutput.progress,
+                                };
+                            } else if (parsedOutput.progress !== undefined) {
+                                jobProgress[jobId] = {
+                                    step: parsedOutput.step,
+                                    progress: parsedOutput.progress,
+                                };
+                            }
+                        } catch (err) {
+                            console.error('Error parsing progress JSON:', jsonString, err);
                         }
-                    } catch (err) {
-                        console.error('Progress log from Python:', line, err);
+                    } else {
+                        // Log other stderr output
+                        console.error('Python stderr:', line);
                     }
                 }
             });
@@ -490,10 +519,24 @@ function spawnPythonProcess(jobId: string, file_name: string, file_data: string)
                     jobProgress[jobId] = { step: 'Complete', progress: 100 };
                 } catch (err) {
                     console.error('Error parsing final JSON result:', err);
+                    jobResults[jobId] = { error: 'Failed to parse final result' };
                 }
             } else {
                 console.error(`Python process exited with code ${code}`);
-                jobResults[jobId] = { error: 'Python process failed' };
+                // Check if there was an error message in stderr
+                if (stderrOutput) {
+                    // Try to parse the last line as JSON
+                    const lines = stderrOutput.trim().split('\n');
+                    const lastLine = lines[lines.length - 1];
+                    try {
+                        const errorOutput = JSON.parse(lastLine);
+                        jobResults[jobId] = errorOutput;
+                    } catch (err) {
+                        jobResults[jobId] = { error: 'Python process failed' };
+                    }
+                } else {
+                    jobResults[jobId] = { error: 'Python process failed' };
+                }
             }
         });
     }
diff --git a/src/server/chunker/pdf_chunker.py b/src/server/chunker/pdf_chunker.py
index 4fe3b9dbf..48b2dbf97 100644
--- a/src/server/chunker/pdf_chunker.py
+++ b/src/server/chunker/pdf_chunker.py
@@ -54,8 +54,9 @@ def update_progress(job_id, step, progress_value):
         "step": step,
         "progress": progress_value
     }
-    print(json.dumps(progress_data), file=sys.stderr)  # Use stderr for progress logs
-    sys.stderr.flush()  # Ensure it's sent immediately
+    print(f"PROGRESS:{json.dumps(progress_data)}", file=sys.stderr)
+    sys.stderr.flush()
+
 
 
 class ElementExtractor:
@@ -63,13 +64,15 @@ class ElementExtractor:
     A class that uses a YOLO model to extract tables and images from a PDF page.
     """
 
-    def __init__(self, output_folder: str):
+    def __init__(self, output_folder: str, doc_id: str):
         """
         Initializes the ElementExtractor with the output folder for saving images and the YOLO model.
 
         :param output_folder: Path to the folder where extracted elements will be saved.
         """
-        self.output_folder = output_folder
+        self.doc_id = doc_id
+        self.output_folder = os.path.join(output_folder, doc_id)
+        os.makedirs(self.output_folder, exist_ok=True)
         self.model = YOLO('keremberke/yolov8m-table-extraction')  # Load YOLO model for table extraction
         self.model.overrides['conf'] = 0.25  # Set confidence threshold for detection
         self.model.overrides['iou'] = 0.45  # Set Intersection over Union (IoU) threshold
@@ -116,17 +119,16 @@ class ElementExtractor:
             table_path = os.path.join(self.output_folder, table_filename)
             page_with_outline.save(table_path)
 
-            # Convert the full-page image with red outline to base64
-            base64_data = self.image_to_base64(page_with_outline)
+            file_path_for_client = f"{self.doc_id}/{table_filename}"
 
             tables.append({
                 'metadata': {
                     "type": "table",
                     "location": [x1 / img.width, y1 / img.height, x2 / img.width, y2 / img.height],
-                    "file_path": table_path,
+                    "file_path": file_path_for_client,
                     "start_page": page_num,
                     "end_page": page_num,
-                    "base64_data": base64_data,
+                    "base64_data": self.image_to_base64(page_with_outline)
                 }
             })
 
@@ -175,18 +177,17 @@ class ElementExtractor:
             image_path = os.path.join(self.output_folder, image_filename)
             page_with_outline.save(image_path)
 
-            # Convert the full-page image with red outline to base64
-            base64_data = self.image_to_base64(page_with_outline)
+            file_path_for_client = f"{self.doc_id}/{image_filename}"
 
             images.append({
                 'metadata': {
                     "type": "image",
                     "location": [x1 / page.rect.width, y1 / page.rect.height, x2 / page.rect.width,
                                     y2 / page.rect.height],
-                    "file_path": image_path,
+                    "file_path": file_path_for_client,
                     "start_page": page_num,
                     "end_page": page_num,
-                    "base64_data": base64_data,
+                    "base64_data": self.image_to_base64(image)
                 }
             })
 
@@ -268,7 +269,7 @@ class PDFChunker:
     The main class responsible for chunking PDF files into text and visual elements (tables/images).
     """
 
-    def __init__(self, output_folder: str = "output", image_batch_size: int = 5) -> None:
+    def __init__(self, output_folder: str = "output", doc_id: str = '', image_batch_size: int = 5) -> None:
         """
         Initializes the PDFChunker with an output folder and an element extractor for visual elements.
 
@@ -278,7 +279,8 @@ class PDFChunker:
         self.client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))  # Initialize the Anthropic API client
         self.output_folder = output_folder
         self.image_batch_size = image_batch_size  # Batch size for image processing
-        self.element_extractor = ElementExtractor(output_folder)  # Initialize the element extractor
+        self.doc_id = doc_id  # Add doc_id
+        self.element_extractor = ElementExtractor(output_folder, doc_id)
 
     async def chunk_pdf(self, file_data: bytes, file_name: str, doc_id: str, job_id: str) -> List[Dict[str, Any]]:
         """
@@ -363,6 +365,7 @@ class PDFChunker:
                 for j, elem in enumerate(batch, start=1):
                     if j in summaries:
                         elem['metadata']['text'] = re.sub(r'^(Image|Table):\s*', '', summaries[j])
+                        elem['metadata']['base64_data'] = ''
                         processed_elements.append(elem)
 
                 progress = ((i // image_batch_size) + 1) / total_batches * 100  # Calculate progress
@@ -628,10 +631,11 @@ class PDFChunker:
 
             return summaries
 
-        except Exception:
-            #print(f"Error in batch_summarize_images: {str(e)}")
-            #print("Returning placeholder summaries")
-            return {number: "Error: No summary available" for number in images}
+        except Exception as e:
+            # Print errors to stderr so they don't interfere with JSON output
+            print(json.dumps({"error": str(e)}), file=sys.stderr)
+            sys.stderr.flush()
+
 
 class DocumentType(Enum):
     """
@@ -664,7 +668,7 @@ class Document:
     Represents a document being processed, such as a PDF, handling chunking, embedding, and summarization.
     """
 
-    def __init__(self, file_data: bytes, file_name: str, job_id: str):
+    def __init__(self, file_data: bytes, file_name: str, job_id: str, output_folder: str):
         """
         Initialize the Document with file data, file name, and job ID.
 
@@ -672,6 +676,7 @@ class Document:
         :param file_name: The name of the file being processed.
         :param job_id: The job ID associated with this document processing task.
         """
+        self.output_folder = output_folder
         self.file_data = file_data
         self.file_name = file_name
         self.job_id = job_id
@@ -680,14 +685,13 @@ class Document:
         self.chunks = []  # List to hold text and visual chunks
         self.num_pages = 0  # Number of pages in the document (if applicable)
         self.summary = ""  # The generated summary for the document
-
         self._process()  # Start processing the document
 
     def _process(self):
         """
         Process the document: extract chunks, embed them, and generate a summary.
         """
-        pdf_chunker = PDFChunker(output_folder="output")  # Initialize the PDF chunker
+        pdf_chunker = PDFChunker(output_folder=self.output_folder, doc_id=self.doc_id)  # Initialize PDFChunker
         self.chunks = asyncio.run(pdf_chunker.chunk_pdf(self.file_data, self.file_name, self.doc_id, self.job_id))  # Extract chunks
 
         self.num_pages = self._get_pdf_pages()  # Get the number of pages in the document
@@ -796,8 +800,7 @@ class Document:
             "doc_id": self.doc_id
         }, indent=2)  # Convert the document's attributes to JSON format
 
-
-def process_document(file_data, file_name, job_id):
+def process_document(file_data, file_name, job_id, output_folder):
     """
     Top-level function to process a document and return the JSON output.
 
@@ -806,28 +809,30 @@ def process_document(file_data, file_name, job_id):
     :param job_id: The job ID for this document processing task.
     :return: The processed document's data in JSON format.
     """
-    new_document = Document(file_data, file_name, job_id)  # Create a new Document object
-    return new_document.to_json()  # Return the document's JSON data
-
+    new_document = Document(file_data, file_name, job_id, output_folder)
+    return new_document.to_json()
 
 def main():
     """
     Main entry point for the script, called with arguments from Node.js.
     """
-    if len(sys.argv) != 4:
-        print(json.dumps({"error": "Invalid arguments"}), file=sys.stderr)  # Print error if incorrect number of arguments
+    if len(sys.argv) != 5:
+        print(json.dumps({"error": "Invalid arguments"}), file=sys.stderr)
         return
 
-    job_id = sys.argv[1]  # Get the job ID from command-line arguments
-    file_name = sys.argv[2]  # Get the file name from command-line arguments
-    file_data = sys.argv[3]  # Get the base64-encoded file data from command-line arguments
+    job_id = sys.argv[1]
+    file_name = sys.argv[2]
+    file_data = sys.argv[3]
+    output_folder = sys.argv[4]  # Get the output folder from arguments
 
     try:
+        os.makedirs(output_folder, exist_ok=True)
+
         # Decode the base64 file data
         file_bytes = base64.b64decode(file_data)
 
         # Process the document
-        document_result = process_document(file_bytes, file_name, job_id)
+        document_result = process_document(file_bytes, file_name, job_id, output_folder)  # Pass output_folder
 
         # Output the final result as JSON to stdout
         print(document_result)
@@ -839,5 +844,6 @@ def main():
         sys.stderr.flush()
 
 
+
 if __name__ == "__main__":
     main()  # Execute the main function when the script is run
diff --git a/src/server/flashcard/labels.py b/src/server/flashcard/labels.py
new file mode 100644
index 000000000..546fc4bd3
--- /dev/null
+++ b/src/server/flashcard/labels.py
@@ -0,0 +1,285 @@
+import base64
+import numpy as np
+import base64
+import easyocr
+import sys
+from PIL import Image
+from io import BytesIO
+import requests
+import json
+import numpy as np
+
+class BoundingBoxUtils:
+    """Utility class for bounding box operations and OCR result corrections."""
+
+    @staticmethod
+    def is_close(box1, box2, x_threshold=20, y_threshold=20):
+        """
+        Determines if two bounding boxes are horizontally and vertically close.
+
+        Parameters:
+        box1, box2 (list): The bounding boxes to compare.
+        x_threshold (int): The threshold for horizontal proximity.
+        y_threshold (int): The threshold for vertical proximity.
+
+        Returns:
+        bool: True if boxes are close, False otherwise.
+        """
+        horizontally_close = (abs(box1[2] - box2[0]) < x_threshold or  # Right edge of box1 and left edge of box2
+                              abs(box2[2] - box1[0]) < x_threshold or  # Right edge of box2 and left edge of box1
+                              abs(box1[2] - box2[2]) < x_threshold or
+                              abs(box2[0] - box1[0]) < x_threshold)
+        
+        vertically_close = (abs(box1[3] - box2[1]) < y_threshold or  # Bottom edge of box1 and top edge of box2
+                            abs(box2[3] - box1[1]) < y_threshold or
+                            box1[1] == box2[1] or box1[3] == box2[3])
+        
+        return horizontally_close and vertically_close
+
+    @staticmethod
+    def adjust_bounding_box(bbox, original_text, corrected_text):
+        """
+        Adjusts a bounding box based on differences in text length.
+
+        Parameters:
+        bbox (list): The original bounding box coordinates.
+        original_text (str): The original text detected by OCR.
+        corrected_text (str): The corrected text after cleaning.
+
+        Returns:
+        list: The adjusted bounding box.
+        """
+        if not bbox or len(bbox) != 4:
+            return bbox
+
+        # Adjust the x-coordinates slightly to account for text correction
+        x_adjustment = 5
+        adjusted_bbox = [
+            [bbox[0][0] + x_adjustment, bbox[0][1]],
+            [bbox[1][0], bbox[1][1]],
+            [bbox[2][0] + x_adjustment, bbox[2][1]],
+            [bbox[3][0], bbox[3][1]]
+        ]
+        return adjusted_bbox
+
+    @staticmethod
+    def correct_ocr_results(results):
+        """
+        Corrects common OCR misinterpretations in the detected text and adjusts bounding boxes accordingly.
+
+        Parameters:
+        results (list): A list of OCR results, each containing bounding box, text, and confidence score.
+
+        Returns:
+        list: Corrected OCR results with adjusted bounding boxes.
+        """
+        corrections = {
+            "~": "",  # Replace '~' with empty string
+            "-": ""   # Replace '-' with empty string
+        }
+
+        corrected_results = []
+        for (bbox, text, prob) in results:
+            corrected_text = ''.join(corrections.get(char, char) for char in text)
+            adjusted_bbox = BoundingBoxUtils.adjust_bounding_box(bbox, text, corrected_text)
+            corrected_results.append((adjusted_bbox, corrected_text, prob))
+
+        return corrected_results
+
+    @staticmethod
+    def convert_to_json_serializable(data):
+        """
+        Converts a list containing various types, including numpy types, to a JSON-serializable format.
+
+        Parameters:
+        data (list): A list containing numpy or other non-serializable types.
+
+        Returns:
+        list: A JSON-serializable version of the input list.
+        """
+        def convert_element(element):
+            if isinstance(element, list):
+                return [convert_element(e) for e in element]
+            elif isinstance(element, tuple):
+                return tuple(convert_element(e) for e in element)
+            elif isinstance(element, np.integer):
+                return int(element)
+            elif isinstance(element, np.floating):
+                return float(element)
+            elif isinstance(element, np.ndarray):
+                return element.tolist()
+            else:
+                return element
+        
+        return convert_element(data)
+
+class ImageLabelProcessor:
+    """Class to process images and perform OCR with EasyOCR."""
+
+    VERTICAL_THRESHOLD = 20
+    HORIZONTAL_THRESHOLD = 8
+
+    def __init__(self, img_source, source_type, smart_mode):
+        self.img_source = img_source
+        self.source_type = source_type
+        self.smart_mode = smart_mode
+        self.img_val = self.load_image()
+
+    def load_image(self):
+        """Load image from either a base64 string or URL."""
+        if self.source_type == 'drag':
+            return self._load_base64_image()
+        else:
+            return self._load_url_image()
+
+    def _load_base64_image(self):
+        """Decode and save the base64 image."""
+        base64_string = self.img_source
+        if base64_string.startswith("data:image"):
+            base64_string = base64_string.split(",")[1]
+
+
+        # Decode the base64 string
+        image_data = base64.b64decode(base64_string)
+        image = Image.open(BytesIO(image_data)).convert('RGB')
+        image.save("temp_image.jpg")
+        return "temp_image.jpg"
+
+    def _load_url_image(self):
+        """Download image from URL and return it in byte format."""
+        url = self.img_source
+        response = requests.get(url)
+        image = Image.open(BytesIO(response.content)).convert('RGB')
+
+        image_bytes = BytesIO()
+        image.save(image_bytes, format='PNG')
+        return image_bytes.getvalue()
+
+    def process_image(self):
+        """Process the image and return the OCR results."""
+        if self.smart_mode:
+            return self._process_smart_mode()
+        else:
+            return self._process_standard_mode()
+
+    def _process_smart_mode(self):
+        """Process the image in smart mode using EasyOCR."""
+        reader = easyocr.Reader(['en'])
+        result = reader.readtext(self.img_val, detail=1, paragraph=True)
+
+        all_boxes = [bbox for bbox, text in result]
+        all_texts = [text for bbox, text in result]
+
+        response_data = {
+            'status': 'success',
+            'message': 'Data received',
+            'boxes': BoundingBoxUtils.convert_to_json_serializable(all_boxes),
+            'text': BoundingBoxUtils.convert_to_json_serializable(all_texts),
+        }
+
+        return response_data
+
+    def _process_standard_mode(self):
+        """Process the image in standard mode using EasyOCR."""
+        reader = easyocr.Reader(['en'])
+        results = reader.readtext(self.img_val)
+
+        filtered_results = BoundingBoxUtils.correct_ocr_results([
+            (bbox, text, prob) for bbox, text, prob in results if prob >= 0.7
+        ])
+
+        return self._merge_and_prepare_response(filtered_results)
+
+    def are_vertically_close(self, box1, box2):
+        """Check if two bounding boxes are vertically close."""
+        box1_bottom = max(box1[2][1], box1[3][1])
+        box2_top = min(box2[0][1], box2[1][1])
+        vertical_distance = box2_top - box1_bottom
+
+        box1_left = box1[0][0]
+        box2_left = box2[0][0]
+        box1_right = box1[1][0]
+        box2_right = box2[1][0]
+        hori_close = abs(box2_left - box1_left) <= self.HORIZONTAL_THRESHOLD or abs(box2_right - box1_right) <= self.HORIZONTAL_THRESHOLD
+
+        return vertical_distance <= self.VERTICAL_THRESHOLD and hori_close
+
+    def merge_boxes(self, boxes, texts):
+        """Merge multiple bounding boxes and their associated text."""
+        x_coords = []
+        y_coords = []
+
+        # Collect all x and y coordinates
+        for box in boxes:
+            for point in box:
+                x_coords.append(point[0])
+                y_coords.append(point[1])
+
+        # Create the merged bounding box
+        merged_box = [
+            [min(x_coords), min(y_coords)],
+            [max(x_coords), min(y_coords)],
+            [max(x_coords), max(y_coords)],
+            [min(x_coords), max(y_coords)]
+        ]
+
+        # Combine the texts
+        merged_text = ' '.join(texts)
+
+        return merged_box, merged_text
+
+    def _merge_and_prepare_response(self, filtered_results):
+        """Merge vertically close boxes and prepare the final response."""
+        current_boxes, current_texts = [], []
+        all_boxes, all_texts = [], []
+
+        for ind in range(len(filtered_results) - 1):
+            if not current_boxes:
+                current_boxes.append(filtered_results[ind][0])
+                current_texts.append(filtered_results[ind][1])
+
+            if self.are_vertically_close(filtered_results[ind][0], filtered_results[ind + 1][0]):
+                current_boxes.append(filtered_results[ind + 1][0])
+                current_texts.append(filtered_results[ind + 1][1])
+            else:
+                merged = self.merge_boxes(current_boxes, current_texts)
+                all_boxes.append(merged[0])
+                all_texts.append(merged[1])
+                current_boxes, current_texts = [], []
+
+        if current_boxes:
+            merged = self.merge_boxes(current_boxes, current_texts)
+            all_boxes.append(merged[0])
+            all_texts.append(merged[1])
+
+        if not current_boxes and filtered_results:
+            merged = self.merge_boxes([filtered_results[-1][0]], [filtered_results[-1][1]])
+            all_boxes.append(merged[0])
+            all_texts.append(merged[1])
+
+        response = {
+            'status': 'success',
+            'message': 'Data received',
+            'boxes': BoundingBoxUtils.convert_to_json_serializable(all_boxes),
+            'text': BoundingBoxUtils.convert_to_json_serializable(all_texts),
+        }
+
+        return response
+
+# Main execution function
+def labels():
+    """Main function to handle image OCR processing based on input arguments."""
+    source_type = sys.argv[2]
+    smart_mode = (sys.argv[3] == 'smart')
+    with open(sys.argv[1], 'r') as f:
+        img_source = f.read()
+    # Create ImageLabelProcessor instance
+    processor = ImageLabelProcessor(img_source, source_type, smart_mode)
+    response = processor.process_image()
+
+    # Print and return the response
+    print(response)
+    return response
+
+
+labels()
diff --git a/src/server/flashcard/requirements.txt b/src/server/flashcard/requirements.txt
new file mode 100644
index 000000000..eb92a819b
--- /dev/null
+++ b/src/server/flashcard/requirements.txt
@@ -0,0 +1,12 @@
+easyocr==1.7.1
+requests==2.32.3
+pillow==10.4.0
+numpy==1.26.4
+tqdm==4.66.4
+Werkzeug==3.0.3
+python-dateutil==2.9.0.post0
+six==1.16.0
+certifi==2024.6.2
+charset-normalizer==3.3.2
+idna==3.7
+urllib3==1.26.19
+\ No newline at end of file
diff --git a/src/server/flashcard/venv/pyvenv.cfg b/src/server/flashcard/venv/pyvenv.cfg
new file mode 100644
index 000000000..740014e00
--- /dev/null
+++ b/src/server/flashcard/venv/pyvenv.cfg
@@ -0,0 +1,3 @@
+home = /Library/Frameworks/Python.framework/Versions/3.10/bin
+include-system-site-packages = false
+version = 3.10.11