Created image classifier v.2
This commit is contained in:
@@ -1,7 +1,9 @@
|
||||
import datetime
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import pytesseract
|
||||
from pytesseract import pytesseract
|
||||
|
||||
from beaky.datamodels.ticket import (
|
||||
Advance,
|
||||
@@ -23,7 +25,7 @@ def img_to_text(path: str) -> str:
|
||||
Bypasses PIL and lets Tesseract read the file directly.
|
||||
"""
|
||||
try:
|
||||
text = pytesseract.image_to_string(path)
|
||||
text = pytesseract.image_to_string(path, lang="ces")
|
||||
return text.strip()
|
||||
except pytesseract.TesseractNotFoundError:
|
||||
print("Error: Tesseract executable not found on your system.")
|
||||
@@ -37,6 +39,7 @@ def classify(text: str) -> Bet:
|
||||
"""Given text extracted from an image and a date, return a Bet object that is
|
||||
relevant to that text."""
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
if not text:
|
||||
return UnknownTicket(
|
||||
ticketType=BetType.UNKNOWN,
|
||||
@@ -149,7 +152,19 @@ def img_classify(path: str, ticket_id: int) -> Ticket:
|
||||
print(extracted_text)
|
||||
|
||||
# 2. Classify based on the extracted text (called separately)
|
||||
try:
|
||||
result = classify(extracted_text)
|
||||
except Exception as exc: # pragma: no cover - defensive fallback
|
||||
# Ensure result is always defined so downstream code cannot reference an unbound name
|
||||
print(f"classify() raised an exception: {exc}")
|
||||
result = UnknownTicket(
|
||||
ticketType=BetType.UNKNOWN,
|
||||
team1Name="N/A",
|
||||
team2Name="N/A",
|
||||
league="N/A",
|
||||
raw_text=extracted_text,
|
||||
date=datetime.datetime.now(),
|
||||
)
|
||||
|
||||
# 3. Add the resulting tickets to our main list
|
||||
# Support classifier returning either a single Bet or a list of Bet
|
||||
|
||||
Reference in New Issue
Block a user