diff --git a/src/beaky/image_classifier/classifier.py b/src/beaky/image_classifier/classifier.py index a1bc8d4..5db2579 100644 --- a/src/beaky/image_classifier/classifier.py +++ b/src/beaky/image_classifier/classifier.py @@ -1,7 +1,9 @@ import datetime +import logging +import re from pathlib import Path -import pytesseract +from pytesseract import pytesseract from beaky.datamodels.ticket import ( Advance, @@ -23,7 +25,7 @@ def img_to_text(path: str) -> str: Bypasses PIL and lets Tesseract read the file directly. """ try: - text = pytesseract.image_to_string(path) + text = pytesseract.image_to_string(path, lang="ces") return text.strip() except pytesseract.TesseractNotFoundError: print("Error: Tesseract executable not found on your system.") @@ -37,6 +39,7 @@ def classify(text: str) -> Bet: """Given text extracted from an image and a date, return a Bet object that is relevant to that text.""" + logger = logging.getLogger(__name__) if not text: return UnknownTicket( ticketType=BetType.UNKNOWN, @@ -149,7 +152,19 @@ def img_classify(path: str, ticket_id: int) -> Ticket: print(extracted_text) # 2. Classify based on the extracted text (called separately) - result = classify(extracted_text) + try: + result = classify(extracted_text) + except Exception as exc: # pragma: no cover - defensive fallback + # Ensure result is always defined so downstream code cannot reference an unbound name + print(f"classify() raised an exception: {exc}") + result = UnknownTicket( + ticketType=BetType.UNKNOWN, + team1Name="N/A", + team2Name="N/A", + league="N/A", + raw_text=extracted_text, + date=datetime.datetime.now(), + ) # 3. Add the resulting tickets to our main list # Support classifier returning either a single Bet or a list of Bet