Created image classifier v.2

This commit is contained in:
Chlupaty
2026-03-22 11:18:15 +01:00
parent 8f8190b734
commit 1742a43d49

View File

@@ -1,7 +1,9 @@
import datetime
import logging
import re
from pathlib import Path
import pytesseract
from pytesseract import pytesseract
from beaky.datamodels.ticket import (
Advance,
@@ -23,7 +25,7 @@ def img_to_text(path: str) -> str:
Bypasses PIL and lets Tesseract read the file directly.
"""
try:
text = pytesseract.image_to_string(path)
text = pytesseract.image_to_string(path, lang="ces")
return text.strip()
except pytesseract.TesseractNotFoundError:
print("Error: Tesseract executable not found on your system.")
@@ -37,6 +39,7 @@ def classify(text: str) -> Bet:
"""Given text extracted from an image and a date, return a Bet object that is
relevant to that text."""
logger = logging.getLogger(__name__)
if not text:
return UnknownTicket(
ticketType=BetType.UNKNOWN,
@@ -149,7 +152,19 @@ def img_classify(path: str, ticket_id: int) -> Ticket:
print(extracted_text)
# 2. Classify based on the extracted text (called separately)
try:
result = classify(extracted_text)
except Exception as exc: # pragma: no cover - defensive fallback
# Ensure result is always defined so downstream code cannot reference an unbound name
print(f"classify() raised an exception: {exc}")
result = UnknownTicket(
ticketType=BetType.UNKNOWN,
team1Name="N/A",
team2Name="N/A",
league="N/A",
raw_text=extracted_text,
date=datetime.datetime.now(),
)
# 3. Add the resulting tickets to our main list
# Support classifier returning either a single Bet or a list of Bet