Created image classifier v.2
This commit is contained in:
@@ -1,7 +1,9 @@
|
|||||||
import datetime
|
import datetime
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pytesseract
|
from pytesseract import pytesseract
|
||||||
|
|
||||||
from beaky.datamodels.ticket import (
|
from beaky.datamodels.ticket import (
|
||||||
Advance,
|
Advance,
|
||||||
@@ -23,7 +25,7 @@ def img_to_text(path: str) -> str:
|
|||||||
Bypasses PIL and lets Tesseract read the file directly.
|
Bypasses PIL and lets Tesseract read the file directly.
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
text = pytesseract.image_to_string(path)
|
text = pytesseract.image_to_string(path, lang="ces")
|
||||||
return text.strip()
|
return text.strip()
|
||||||
except pytesseract.TesseractNotFoundError:
|
except pytesseract.TesseractNotFoundError:
|
||||||
print("Error: Tesseract executable not found on your system.")
|
print("Error: Tesseract executable not found on your system.")
|
||||||
@@ -37,6 +39,7 @@ def classify(text: str) -> Bet:
|
|||||||
"""Given text extracted from an image and a date, return a Bet object that is
|
"""Given text extracted from an image and a date, return a Bet object that is
|
||||||
relevant to that text."""
|
relevant to that text."""
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
if not text:
|
if not text:
|
||||||
return UnknownTicket(
|
return UnknownTicket(
|
||||||
ticketType=BetType.UNKNOWN,
|
ticketType=BetType.UNKNOWN,
|
||||||
@@ -149,7 +152,19 @@ def img_classify(path: str, ticket_id: int) -> Ticket:
|
|||||||
print(extracted_text)
|
print(extracted_text)
|
||||||
|
|
||||||
# 2. Classify based on the extracted text (called separately)
|
# 2. Classify based on the extracted text (called separately)
|
||||||
result = classify(extracted_text)
|
try:
|
||||||
|
result = classify(extracted_text)
|
||||||
|
except Exception as exc: # pragma: no cover - defensive fallback
|
||||||
|
# Ensure result is always defined so downstream code cannot reference an unbound name
|
||||||
|
print(f"classify() raised an exception: {exc}")
|
||||||
|
result = UnknownTicket(
|
||||||
|
ticketType=BetType.UNKNOWN,
|
||||||
|
team1Name="N/A",
|
||||||
|
team2Name="N/A",
|
||||||
|
league="N/A",
|
||||||
|
raw_text=extracted_text,
|
||||||
|
date=datetime.datetime.now(),
|
||||||
|
)
|
||||||
|
|
||||||
# 3. Add the resulting tickets to our main list
|
# 3. Add the resulting tickets to our main list
|
||||||
# Support classifier returning either a single Bet or a list of Bet
|
# Support classifier returning either a single Bet or a list of Bet
|
||||||
|
|||||||
Reference in New Issue
Block a user