diff options
Diffstat (limited to 'src/server')
| -rw-r--r-- | src/server/ApiManagers/AssistantManager.ts | 157 | ||||
| -rw-r--r-- | src/server/chunker/pdf_chunker.py | 70 | ||||
| -rw-r--r-- | src/server/flashcard/labels.py | 285 | ||||
| -rw-r--r-- | src/server/flashcard/requirements.txt | 12 | ||||
| -rw-r--r-- | src/server/flashcard/venv/pyvenv.cfg | 3 |
5 files changed, 438 insertions, 89 deletions
diff --git a/src/server/ApiManagers/AssistantManager.ts b/src/server/ApiManagers/AssistantManager.ts index 8447a4934..4d2068014 100644 --- a/src/server/ApiManagers/AssistantManager.ts +++ b/src/server/ApiManagers/AssistantManager.ts @@ -9,7 +9,7 @@ */ import { Readability } from '@mozilla/readability'; -import axios from 'axios'; +import axios, { AxiosResponse } from 'axios'; import { spawn } from 'child_process'; import * as fs from 'fs'; import { writeFile } from 'fs'; @@ -23,6 +23,7 @@ import { AI_Document } from '../../client/views/nodes/chatbot/types/types'; import { Method } from '../RouteManager'; import { filesDirectory, publicDirectory } from '../SocketData'; import ApiManager, { Registration } from './ApiManager'; +import { getServerPath } from '../../client/util/reportManager/reportManagerUtils'; // Enumeration of directories where different file types are stored export enum Directory { @@ -115,29 +116,79 @@ export default class AssistantManager extends ApiManager { }, }); - // Register Google Web Search Results API route register({ method: Method.POST, subscription: '/getWebSearchResults', secureHandler: async ({ req, res }) => { const { query, max_results } = req.body; - try { - // Fetch search results using Google Custom Search API - const response = await customsearch.cse.list({ + const MIN_VALID_RESULTS_RATIO = 0.75; // 3/4 threshold + let startIndex = 1; // Start at the first result initially + let validResults: any[] = []; + + const fetchSearchResults = async (start: number) => { + return customsearch.cse.list({ q: query, cx: process.env._CLIENT_GOOGLE_SEARCH_ENGINE_ID, key: process.env._CLIENT_GOOGLE_API_KEY, safe: 'active', num: max_results, + start, // This controls which result index the search starts from }); + }; + + const filterResultsByXFrameOptions = async (results: any[]) => { + const filteredResults = await Promise.all( + results.map(async result => { + try { + const urlResponse: AxiosResponse = await axios.head(result.url, { timeout: 5000 }); + const xFrameOptions = urlResponse.headers['x-frame-options']; + if (xFrameOptions && xFrameOptions.toUpperCase() === 'SAMEORIGIN') { + return result; + } + } catch (error) { + console.error(`Error checking x-frame-options for URL: ${result.url}`, error); + } + return null; // Exclude the result if it doesn't match + }) + ); + return filteredResults.filter(result => result !== null); // Remove null results + }; - const results = + try { + // Fetch initial search results + let response = await fetchSearchResults(startIndex); + let initialResults = response.data.items?.map(item => ({ url: item.link, snippet: item.snippet, })) || []; - res.send({ results }); + // Filter the initial results + validResults = await filterResultsByXFrameOptions(initialResults); + + // If valid results are less than 3/4 of max_results, fetch more results + while (validResults.length < max_results * MIN_VALID_RESULTS_RATIO) { + // Increment the start index by the max_results to fetch the next set of results + startIndex += max_results; + response = await fetchSearchResults(startIndex); + + const additionalResults = + response.data.items?.map(item => ({ + url: item.link, + snippet: item.snippet, + })) || []; + + const additionalValidResults = await filterResultsByXFrameOptions(additionalResults); + validResults = [...validResults, ...additionalValidResults]; // Combine valid results + + // Break if no more results are available + if (additionalValidResults.length === 0 || response.data.items?.length === 0) { + break; + } + } + + // Return the filtered valid results + res.send({ results: validResults.slice(0, max_results) }); // Limit the results to max_results } catch (error) { console.error('Error performing web search:', error); res.status(500).send({ @@ -299,47 +350,16 @@ export default class AssistantManager extends ApiManager { method: Method.GET, subscription: '/getResult/:jobId', secureHandler: async ({ req, res }) => { - const { jobId } = req.params; // Get the job ID from the URL parameters - // Check if the job result is available + const { jobId } = req.params; if (jobResults[jobId]) { const result = jobResults[jobId] as AI_Document & { status: string }; - // If the result contains image or table chunks, save the base64 data as image files if (result.chunks && Array.isArray(result.chunks)) { - await Promise.all( - result.chunks.map(chunk => { - if (chunk.metadata && (chunk.metadata.type === 'image' || chunk.metadata.type === 'table')) { - const files_directory = '/files/chunk_images/'; - const directory = path.join(publicDirectory, files_directory); - - // Ensure the directory exists or create it - if (!fs.existsSync(directory)) { - fs.mkdirSync(directory); - } - - const fileName = path.basename(chunk.metadata.file_path); // Get the file name from the path - const filePath = path.join(directory, fileName); // Create the full file path - - // Check if the chunk contains base64 encoded data - if (chunk.metadata.base64_data) { - // Decode the base64 data and write it to a file - const buffer = Buffer.from(chunk.metadata.base64_data, 'base64'); - fs.promises.writeFile(filePath, buffer).then(() => { - // Update the file path in the chunk's metadata - chunk.metadata.file_path = path.join(files_directory, fileName); - chunk.metadata.base64_data = undefined; // Remove the base64 data from the metadata - }); - } else { - console.warn(`No base64_data found for chunk: ${fileName}`); - } - } - }) - ); result.status = 'completed'; } else { result.status = 'pending'; } - res.json(result); // Send the result back to the client + res.json(result); } else { res.status(202).send({ status: 'pending' }); } @@ -367,7 +387,7 @@ export default class AssistantManager extends ApiManager { // If the chunk is an image or table, read the corresponding file and encode it as base64 if (chunk.metadata.type === 'image' || chunk.metadata.type === 'table') { try { - const filePath = serverPathToFile(Directory.chunk_images, chunk.metadata.file_path); // Get the file path + const filePath = path.join(pathToDirectory(Directory.chunk_images), chunk.metadata.file_path); // Get the file path readFileAsync(filePath).then(imageBuffer => { const base64Image = imageBuffer.toString('base64'); // Convert the image to base64 @@ -445,10 +465,12 @@ function spawnPythonProcess(jobId: string, file_name: string, file_data: string) const requirementsPath = path.join(__dirname, '../chunker/requirements.txt'); const pythonScriptPath = path.join(__dirname, '../chunker/pdf_chunker.py'); + const outputDirectory = pathToDirectory(Directory.chunk_images); + function runPythonScript() { const pythonPath = process.platform === 'win32' ? path.join(venvPath, 'Scripts', 'python') : path.join(venvPath, 'bin', 'python3'); - const pythonProcess = spawn(pythonPath, [pythonScriptPath, jobId, file_name, file_data]); + const pythonProcess = spawn(pythonPath, [pythonScriptPath, jobId, file_name, file_data, outputDirectory]); let pythonOutput = ''; let stderrOutput = ''; @@ -460,23 +482,30 @@ function spawnPythonProcess(jobId: string, file_name: string, file_data: string) pythonProcess.stderr.on('data', data => { stderrOutput += data.toString(); const lines = stderrOutput.split('\n'); + stderrOutput = lines.pop() || ''; // Save the last partial line back to stderrOutput lines.forEach(line => { if (line.trim()) { - try { - const parsedOutput = JSON.parse(line); - if (parsedOutput.job_id && parsedOutput.progress !== undefined) { - jobProgress[parsedOutput.job_id] = { - step: parsedOutput.step, - progress: parsedOutput.progress, - }; - } else if (parsedOutput.progress !== undefined) { - jobProgress[jobId] = { - step: parsedOutput.step, - progress: parsedOutput.progress, - }; + if (line.startsWith('PROGRESS:')) { + const jsonString = line.substring('PROGRESS:'.length); + try { + const parsedOutput = JSON.parse(jsonString); + if (parsedOutput.job_id && parsedOutput.progress !== undefined) { + jobProgress[parsedOutput.job_id] = { + step: parsedOutput.step, + progress: parsedOutput.progress, + }; + } else if (parsedOutput.progress !== undefined) { + jobProgress[jobId] = { + step: parsedOutput.step, + progress: parsedOutput.progress, + }; + } + } catch (err) { + console.error('Error parsing progress JSON:', jsonString, err); } - } catch (err) { - console.error('Progress log from Python:', line, err); + } else { + // Log other stderr output + console.error('Python stderr:', line); } } }); @@ -490,10 +519,24 @@ function spawnPythonProcess(jobId: string, file_name: string, file_data: string) jobProgress[jobId] = { step: 'Complete', progress: 100 }; } catch (err) { console.error('Error parsing final JSON result:', err); + jobResults[jobId] = { error: 'Failed to parse final result' }; } } else { console.error(`Python process exited with code ${code}`); - jobResults[jobId] = { error: 'Python process failed' }; + // Check if there was an error message in stderr + if (stderrOutput) { + // Try to parse the last line as JSON + const lines = stderrOutput.trim().split('\n'); + const lastLine = lines[lines.length - 1]; + try { + const errorOutput = JSON.parse(lastLine); + jobResults[jobId] = errorOutput; + } catch (err) { + jobResults[jobId] = { error: 'Python process failed' }; + } + } else { + jobResults[jobId] = { error: 'Python process failed' }; + } } }); } diff --git a/src/server/chunker/pdf_chunker.py b/src/server/chunker/pdf_chunker.py index 4fe3b9dbf..48b2dbf97 100644 --- a/src/server/chunker/pdf_chunker.py +++ b/src/server/chunker/pdf_chunker.py @@ -54,8 +54,9 @@ def update_progress(job_id, step, progress_value): "step": step, "progress": progress_value } - print(json.dumps(progress_data), file=sys.stderr) # Use stderr for progress logs - sys.stderr.flush() # Ensure it's sent immediately + print(f"PROGRESS:{json.dumps(progress_data)}", file=sys.stderr) + sys.stderr.flush() + class ElementExtractor: @@ -63,13 +64,15 @@ class ElementExtractor: A class that uses a YOLO model to extract tables and images from a PDF page. """ - def __init__(self, output_folder: str): + def __init__(self, output_folder: str, doc_id: str): """ Initializes the ElementExtractor with the output folder for saving images and the YOLO model. :param output_folder: Path to the folder where extracted elements will be saved. """ - self.output_folder = output_folder + self.doc_id = doc_id + self.output_folder = os.path.join(output_folder, doc_id) + os.makedirs(self.output_folder, exist_ok=True) self.model = YOLO('keremberke/yolov8m-table-extraction') # Load YOLO model for table extraction self.model.overrides['conf'] = 0.25 # Set confidence threshold for detection self.model.overrides['iou'] = 0.45 # Set Intersection over Union (IoU) threshold @@ -116,17 +119,16 @@ class ElementExtractor: table_path = os.path.join(self.output_folder, table_filename) page_with_outline.save(table_path) - # Convert the full-page image with red outline to base64 - base64_data = self.image_to_base64(page_with_outline) + file_path_for_client = f"{self.doc_id}/{table_filename}" tables.append({ 'metadata': { "type": "table", "location": [x1 / img.width, y1 / img.height, x2 / img.width, y2 / img.height], - "file_path": table_path, + "file_path": file_path_for_client, "start_page": page_num, "end_page": page_num, - "base64_data": base64_data, + "base64_data": self.image_to_base64(page_with_outline) } }) @@ -175,18 +177,17 @@ class ElementExtractor: image_path = os.path.join(self.output_folder, image_filename) page_with_outline.save(image_path) - # Convert the full-page image with red outline to base64 - base64_data = self.image_to_base64(page_with_outline) + file_path_for_client = f"{self.doc_id}/{image_filename}" images.append({ 'metadata': { "type": "image", "location": [x1 / page.rect.width, y1 / page.rect.height, x2 / page.rect.width, y2 / page.rect.height], - "file_path": image_path, + "file_path": file_path_for_client, "start_page": page_num, "end_page": page_num, - "base64_data": base64_data, + "base64_data": self.image_to_base64(image) } }) @@ -268,7 +269,7 @@ class PDFChunker: The main class responsible for chunking PDF files into text and visual elements (tables/images). """ - def __init__(self, output_folder: str = "output", image_batch_size: int = 5) -> None: + def __init__(self, output_folder: str = "output", doc_id: str = '', image_batch_size: int = 5) -> None: """ Initializes the PDFChunker with an output folder and an element extractor for visual elements. @@ -278,7 +279,8 @@ class PDFChunker: self.client = Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY")) # Initialize the Anthropic API client self.output_folder = output_folder self.image_batch_size = image_batch_size # Batch size for image processing - self.element_extractor = ElementExtractor(output_folder) # Initialize the element extractor + self.doc_id = doc_id # Add doc_id + self.element_extractor = ElementExtractor(output_folder, doc_id) async def chunk_pdf(self, file_data: bytes, file_name: str, doc_id: str, job_id: str) -> List[Dict[str, Any]]: """ @@ -363,6 +365,7 @@ class PDFChunker: for j, elem in enumerate(batch, start=1): if j in summaries: elem['metadata']['text'] = re.sub(r'^(Image|Table):\s*', '', summaries[j]) + elem['metadata']['base64_data'] = '' processed_elements.append(elem) progress = ((i // image_batch_size) + 1) / total_batches * 100 # Calculate progress @@ -628,10 +631,11 @@ class PDFChunker: return summaries - except Exception: - #print(f"Error in batch_summarize_images: {str(e)}") - #print("Returning placeholder summaries") - return {number: "Error: No summary available" for number in images} + except Exception as e: + # Print errors to stderr so they don't interfere with JSON output + print(json.dumps({"error": str(e)}), file=sys.stderr) + sys.stderr.flush() + class DocumentType(Enum): """ @@ -664,7 +668,7 @@ class Document: Represents a document being processed, such as a PDF, handling chunking, embedding, and summarization. """ - def __init__(self, file_data: bytes, file_name: str, job_id: str): + def __init__(self, file_data: bytes, file_name: str, job_id: str, output_folder: str): """ Initialize the Document with file data, file name, and job ID. @@ -672,6 +676,7 @@ class Document: :param file_name: The name of the file being processed. :param job_id: The job ID associated with this document processing task. """ + self.output_folder = output_folder self.file_data = file_data self.file_name = file_name self.job_id = job_id @@ -680,14 +685,13 @@ class Document: self.chunks = [] # List to hold text and visual chunks self.num_pages = 0 # Number of pages in the document (if applicable) self.summary = "" # The generated summary for the document - self._process() # Start processing the document def _process(self): """ Process the document: extract chunks, embed them, and generate a summary. """ - pdf_chunker = PDFChunker(output_folder="output") # Initialize the PDF chunker + pdf_chunker = PDFChunker(output_folder=self.output_folder, doc_id=self.doc_id) # Initialize PDFChunker self.chunks = asyncio.run(pdf_chunker.chunk_pdf(self.file_data, self.file_name, self.doc_id, self.job_id)) # Extract chunks self.num_pages = self._get_pdf_pages() # Get the number of pages in the document @@ -796,8 +800,7 @@ class Document: "doc_id": self.doc_id }, indent=2) # Convert the document's attributes to JSON format - -def process_document(file_data, file_name, job_id): +def process_document(file_data, file_name, job_id, output_folder): """ Top-level function to process a document and return the JSON output. @@ -806,28 +809,30 @@ def process_document(file_data, file_name, job_id): :param job_id: The job ID for this document processing task. :return: The processed document's data in JSON format. """ - new_document = Document(file_data, file_name, job_id) # Create a new Document object - return new_document.to_json() # Return the document's JSON data - + new_document = Document(file_data, file_name, job_id, output_folder) + return new_document.to_json() def main(): """ Main entry point for the script, called with arguments from Node.js. """ - if len(sys.argv) != 4: - print(json.dumps({"error": "Invalid arguments"}), file=sys.stderr) # Print error if incorrect number of arguments + if len(sys.argv) != 5: + print(json.dumps({"error": "Invalid arguments"}), file=sys.stderr) return - job_id = sys.argv[1] # Get the job ID from command-line arguments - file_name = sys.argv[2] # Get the file name from command-line arguments - file_data = sys.argv[3] # Get the base64-encoded file data from command-line arguments + job_id = sys.argv[1] + file_name = sys.argv[2] + file_data = sys.argv[3] + output_folder = sys.argv[4] # Get the output folder from arguments try: + os.makedirs(output_folder, exist_ok=True) + # Decode the base64 file data file_bytes = base64.b64decode(file_data) # Process the document - document_result = process_document(file_bytes, file_name, job_id) + document_result = process_document(file_bytes, file_name, job_id, output_folder) # Pass output_folder # Output the final result as JSON to stdout print(document_result) @@ -839,5 +844,6 @@ def main(): sys.stderr.flush() + if __name__ == "__main__": main() # Execute the main function when the script is run diff --git a/src/server/flashcard/labels.py b/src/server/flashcard/labels.py new file mode 100644 index 000000000..546fc4bd3 --- /dev/null +++ b/src/server/flashcard/labels.py @@ -0,0 +1,285 @@ +import base64 +import numpy as np +import base64 +import easyocr +import sys +from PIL import Image +from io import BytesIO +import requests +import json +import numpy as np + +class BoundingBoxUtils: + """Utility class for bounding box operations and OCR result corrections.""" + + @staticmethod + def is_close(box1, box2, x_threshold=20, y_threshold=20): + """ + Determines if two bounding boxes are horizontally and vertically close. + + Parameters: + box1, box2 (list): The bounding boxes to compare. + x_threshold (int): The threshold for horizontal proximity. + y_threshold (int): The threshold for vertical proximity. + + Returns: + bool: True if boxes are close, False otherwise. + """ + horizontally_close = (abs(box1[2] - box2[0]) < x_threshold or # Right edge of box1 and left edge of box2 + abs(box2[2] - box1[0]) < x_threshold or # Right edge of box2 and left edge of box1 + abs(box1[2] - box2[2]) < x_threshold or + abs(box2[0] - box1[0]) < x_threshold) + + vertically_close = (abs(box1[3] - box2[1]) < y_threshold or # Bottom edge of box1 and top edge of box2 + abs(box2[3] - box1[1]) < y_threshold or + box1[1] == box2[1] or box1[3] == box2[3]) + + return horizontally_close and vertically_close + + @staticmethod + def adjust_bounding_box(bbox, original_text, corrected_text): + """ + Adjusts a bounding box based on differences in text length. + + Parameters: + bbox (list): The original bounding box coordinates. + original_text (str): The original text detected by OCR. + corrected_text (str): The corrected text after cleaning. + + Returns: + list: The adjusted bounding box. + """ + if not bbox or len(bbox) != 4: + return bbox + + # Adjust the x-coordinates slightly to account for text correction + x_adjustment = 5 + adjusted_bbox = [ + [bbox[0][0] + x_adjustment, bbox[0][1]], + [bbox[1][0], bbox[1][1]], + [bbox[2][0] + x_adjustment, bbox[2][1]], + [bbox[3][0], bbox[3][1]] + ] + return adjusted_bbox + + @staticmethod + def correct_ocr_results(results): + """ + Corrects common OCR misinterpretations in the detected text and adjusts bounding boxes accordingly. + + Parameters: + results (list): A list of OCR results, each containing bounding box, text, and confidence score. + + Returns: + list: Corrected OCR results with adjusted bounding boxes. + """ + corrections = { + "~": "", # Replace '~' with empty string + "-": "" # Replace '-' with empty string + } + + corrected_results = [] + for (bbox, text, prob) in results: + corrected_text = ''.join(corrections.get(char, char) for char in text) + adjusted_bbox = BoundingBoxUtils.adjust_bounding_box(bbox, text, corrected_text) + corrected_results.append((adjusted_bbox, corrected_text, prob)) + + return corrected_results + + @staticmethod + def convert_to_json_serializable(data): + """ + Converts a list containing various types, including numpy types, to a JSON-serializable format. + + Parameters: + data (list): A list containing numpy or other non-serializable types. + + Returns: + list: A JSON-serializable version of the input list. + """ + def convert_element(element): + if isinstance(element, list): + return [convert_element(e) for e in element] + elif isinstance(element, tuple): + return tuple(convert_element(e) for e in element) + elif isinstance(element, np.integer): + return int(element) + elif isinstance(element, np.floating): + return float(element) + elif isinstance(element, np.ndarray): + return element.tolist() + else: + return element + + return convert_element(data) + +class ImageLabelProcessor: + """Class to process images and perform OCR with EasyOCR.""" + + VERTICAL_THRESHOLD = 20 + HORIZONTAL_THRESHOLD = 8 + + def __init__(self, img_source, source_type, smart_mode): + self.img_source = img_source + self.source_type = source_type + self.smart_mode = smart_mode + self.img_val = self.load_image() + + def load_image(self): + """Load image from either a base64 string or URL.""" + if self.source_type == 'drag': + return self._load_base64_image() + else: + return self._load_url_image() + + def _load_base64_image(self): + """Decode and save the base64 image.""" + base64_string = self.img_source + if base64_string.startswith("data:image"): + base64_string = base64_string.split(",")[1] + + + # Decode the base64 string + image_data = base64.b64decode(base64_string) + image = Image.open(BytesIO(image_data)).convert('RGB') + image.save("temp_image.jpg") + return "temp_image.jpg" + + def _load_url_image(self): + """Download image from URL and return it in byte format.""" + url = self.img_source + response = requests.get(url) + image = Image.open(BytesIO(response.content)).convert('RGB') + + image_bytes = BytesIO() + image.save(image_bytes, format='PNG') + return image_bytes.getvalue() + + def process_image(self): + """Process the image and return the OCR results.""" + if self.smart_mode: + return self._process_smart_mode() + else: + return self._process_standard_mode() + + def _process_smart_mode(self): + """Process the image in smart mode using EasyOCR.""" + reader = easyocr.Reader(['en']) + result = reader.readtext(self.img_val, detail=1, paragraph=True) + + all_boxes = [bbox for bbox, text in result] + all_texts = [text for bbox, text in result] + + response_data = { + 'status': 'success', + 'message': 'Data received', + 'boxes': BoundingBoxUtils.convert_to_json_serializable(all_boxes), + 'text': BoundingBoxUtils.convert_to_json_serializable(all_texts), + } + + return response_data + + def _process_standard_mode(self): + """Process the image in standard mode using EasyOCR.""" + reader = easyocr.Reader(['en']) + results = reader.readtext(self.img_val) + + filtered_results = BoundingBoxUtils.correct_ocr_results([ + (bbox, text, prob) for bbox, text, prob in results if prob >= 0.7 + ]) + + return self._merge_and_prepare_response(filtered_results) + + def are_vertically_close(self, box1, box2): + """Check if two bounding boxes are vertically close.""" + box1_bottom = max(box1[2][1], box1[3][1]) + box2_top = min(box2[0][1], box2[1][1]) + vertical_distance = box2_top - box1_bottom + + box1_left = box1[0][0] + box2_left = box2[0][0] + box1_right = box1[1][0] + box2_right = box2[1][0] + hori_close = abs(box2_left - box1_left) <= self.HORIZONTAL_THRESHOLD or abs(box2_right - box1_right) <= self.HORIZONTAL_THRESHOLD + + return vertical_distance <= self.VERTICAL_THRESHOLD and hori_close + + def merge_boxes(self, boxes, texts): + """Merge multiple bounding boxes and their associated text.""" + x_coords = [] + y_coords = [] + + # Collect all x and y coordinates + for box in boxes: + for point in box: + x_coords.append(point[0]) + y_coords.append(point[1]) + + # Create the merged bounding box + merged_box = [ + [min(x_coords), min(y_coords)], + [max(x_coords), min(y_coords)], + [max(x_coords), max(y_coords)], + [min(x_coords), max(y_coords)] + ] + + # Combine the texts + merged_text = ' '.join(texts) + + return merged_box, merged_text + + def _merge_and_prepare_response(self, filtered_results): + """Merge vertically close boxes and prepare the final response.""" + current_boxes, current_texts = [], [] + all_boxes, all_texts = [], [] + + for ind in range(len(filtered_results) - 1): + if not current_boxes: + current_boxes.append(filtered_results[ind][0]) + current_texts.append(filtered_results[ind][1]) + + if self.are_vertically_close(filtered_results[ind][0], filtered_results[ind + 1][0]): + current_boxes.append(filtered_results[ind + 1][0]) + current_texts.append(filtered_results[ind + 1][1]) + else: + merged = self.merge_boxes(current_boxes, current_texts) + all_boxes.append(merged[0]) + all_texts.append(merged[1]) + current_boxes, current_texts = [], [] + + if current_boxes: + merged = self.merge_boxes(current_boxes, current_texts) + all_boxes.append(merged[0]) + all_texts.append(merged[1]) + + if not current_boxes and filtered_results: + merged = self.merge_boxes([filtered_results[-1][0]], [filtered_results[-1][1]]) + all_boxes.append(merged[0]) + all_texts.append(merged[1]) + + response = { + 'status': 'success', + 'message': 'Data received', + 'boxes': BoundingBoxUtils.convert_to_json_serializable(all_boxes), + 'text': BoundingBoxUtils.convert_to_json_serializable(all_texts), + } + + return response + +# Main execution function +def labels(): + """Main function to handle image OCR processing based on input arguments.""" + source_type = sys.argv[2] + smart_mode = (sys.argv[3] == 'smart') + with open(sys.argv[1], 'r') as f: + img_source = f.read() + # Create ImageLabelProcessor instance + processor = ImageLabelProcessor(img_source, source_type, smart_mode) + response = processor.process_image() + + # Print and return the response + print(response) + return response + + +labels() diff --git a/src/server/flashcard/requirements.txt b/src/server/flashcard/requirements.txt new file mode 100644 index 000000000..eb92a819b --- /dev/null +++ b/src/server/flashcard/requirements.txt @@ -0,0 +1,12 @@ +easyocr==1.7.1 +requests==2.32.3 +pillow==10.4.0 +numpy==1.26.4 +tqdm==4.66.4 +Werkzeug==3.0.3 +python-dateutil==2.9.0.post0 +six==1.16.0 +certifi==2024.6.2 +charset-normalizer==3.3.2 +idna==3.7 +urllib3==1.26.19
\ No newline at end of file diff --git a/src/server/flashcard/venv/pyvenv.cfg b/src/server/flashcard/venv/pyvenv.cfg new file mode 100644 index 000000000..740014e00 --- /dev/null +++ b/src/server/flashcard/venv/pyvenv.cfg @@ -0,0 +1,3 @@ +home = /Library/Frameworks/Python.framework/Versions/3.10/bin +include-system-site-packages = false +version = 3.10.11 |
