Created image classifier v.1
This commit is contained in:
@@ -2,3 +2,5 @@ path: data/odkazy.xlsx
|
|||||||
|
|
||||||
screenshotter:
|
screenshotter:
|
||||||
target_path: data/screenshots/
|
target_path: data/screenshots/
|
||||||
|
img_classifier:
|
||||||
|
target_path: "data/screenshots/"
|
||||||
@@ -1,9 +1,11 @@
|
|||||||
from pydantic.dataclasses import dataclass
|
from pydantic.dataclasses import dataclass
|
||||||
|
|
||||||
|
from beaky.image_classifier.config import ImgClassifierConfig
|
||||||
from beaky.screenshotter.config import ScreenshotterConfig
|
from beaky.screenshotter.config import ScreenshotterConfig
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Config:
|
class Config:
|
||||||
path: str
|
path: str
|
||||||
screenshotter: ScreenshotterConfig
|
screenshotter: ScreenshotterConfig
|
||||||
|
img_classifer: ImgClassifierConfig
|
||||||
|
|||||||
@@ -1,7 +1,183 @@
|
|||||||
from datetime import datetime
|
import datetime
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
from beaky.datamodels.ticket import Ticket
|
import pytesseract
|
||||||
|
|
||||||
|
from beaky.datamodels.ticket import (
|
||||||
|
Advance,
|
||||||
|
Bet,
|
||||||
|
BetType,
|
||||||
|
BothTeamScored,
|
||||||
|
GoalAmount,
|
||||||
|
GoalHandicap,
|
||||||
|
Ticket,
|
||||||
|
UnknownTicket,
|
||||||
|
WinDrawLose,
|
||||||
|
WinDrawLoseDouble,
|
||||||
|
WinLose,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def f(path:str, date:datetime) -> list[Ticket]:
|
def img_to_text(path: str) -> str:
|
||||||
...
|
"""Given a path to an image, return the text contained in that image.
|
||||||
|
Bypasses PIL and lets Tesseract read the file directly.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
text = pytesseract.image_to_string(path)
|
||||||
|
return text.strip()
|
||||||
|
except pytesseract.TesseractNotFoundError:
|
||||||
|
print("Error: Tesseract executable not found on your system.")
|
||||||
|
return ""
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing {path}: {e}")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def classify(text: str) -> Bet:
|
||||||
|
"""Given text extracted from an image and a date, return a Bet object that is
|
||||||
|
relevant to that text."""
|
||||||
|
|
||||||
|
if not text:
|
||||||
|
return UnknownTicket(
|
||||||
|
ticketType=BetType.UNKNOWN,
|
||||||
|
team1Name="N/A",
|
||||||
|
team2Name="N/A",
|
||||||
|
date=datetime.datetime.now(),
|
||||||
|
league="N/A",
|
||||||
|
raw_text="No text extracted",
|
||||||
|
)
|
||||||
|
|
||||||
|
# 1. Defaults & Normalization
|
||||||
|
text_lower = text.lower()
|
||||||
|
date_obj = datetime.datetime.now()
|
||||||
|
team1, team2 = "Unknown", "Unknown"
|
||||||
|
league = "Unknown"
|
||||||
|
|
||||||
|
# 2. Heuristic extraction of Teams (Looking for "Team A - Team B" patterns)
|
||||||
|
lines = [line.strip() for line in text.split("\n") if line.strip()]
|
||||||
|
for line in lines:
|
||||||
|
if " - " in line or " vs " in line or " v " in line:
|
||||||
|
# Avoid splitting on hyphens in dates or numbers
|
||||||
|
if not re.search(r"\d\s*-\s*\d", line):
|
||||||
|
parts = re.split(r" - | vs | v ", line)
|
||||||
|
if len(parts) >= 2:
|
||||||
|
team1, team2 = parts[0].strip(), parts[1].strip()
|
||||||
|
break
|
||||||
|
|
||||||
|
# 3. Heuristic extraction of Date (Looking for DD.MM. YYYY HH:MM)
|
||||||
|
date_match = re.search(r"(\d{1,2}\.\s*\d{1,2}\.?\s*(?:\d{2,4})?)\s*(\d{1,2}:\d{2})?", text)
|
||||||
|
if date_match:
|
||||||
|
try:
|
||||||
|
# Fallback to current year if missing, basic parse attempt
|
||||||
|
date_str = f"{date_match.group(1).replace(' ', '')} {date_match.group(2) or '00:00'}"
|
||||||
|
if len(date_str.split(".")[2]) <= 5: # Missing year
|
||||||
|
date_str = date_str.replace(" ", f"{datetime.datetime.now().year} ")
|
||||||
|
date_obj = datetime.datetime.strptime(date_str, "%d.%m.%Y %H:%M")
|
||||||
|
except Exception:
|
||||||
|
pass # Keep default if parsing fails
|
||||||
|
|
||||||
|
# 4. Classification Logic based on keywords
|
||||||
|
base_args = {"team1Name": team1, "team2Name": team2, "date": date_obj, "league": league}
|
||||||
|
|
||||||
|
# Advance / Postup
|
||||||
|
if any(kw in text_lower for kw in ["postup", "postoupí", "advance"]):
|
||||||
|
return Advance(ticketType=BetType.ADVANCED, **base_args)
|
||||||
|
|
||||||
|
# Both Teams to Score / Oba dají gól
|
||||||
|
if any(kw in text_lower for kw in ["oba dají gól", "btts", "oba týmy dají gól"]):
|
||||||
|
return BothTeamScored(ticketType=BetType.BOTH_TEAM_SCORED, **base_args)
|
||||||
|
|
||||||
|
# Goal Amount (Over/Under)
|
||||||
|
if any(kw in text_lower for kw in ["počet gólů", "více než", "méně než", "over", "under"]):
|
||||||
|
# Attempt to find the goal line (e.g., 2.5, 3.5)
|
||||||
|
line_match = re.search(r"(\d+\.\d+)", text)
|
||||||
|
line_val = float(line_match.group(1)) if line_match else 2.5
|
||||||
|
is_over = any(kw in text_lower for kw in ["více", "over", "+"])
|
||||||
|
|
||||||
|
return GoalAmount(ticketType=BetType.GOAL_AMOUNT, line=line_val, over=is_over, **base_args)
|
||||||
|
|
||||||
|
# Goal Handicap
|
||||||
|
if any(kw in text_lower for kw in ["handicap", "hcp"]):
|
||||||
|
hcp_match = re.search(r"([+-]?\d+\.\d+)", text)
|
||||||
|
hcp_val = float(hcp_match.group(1)) if hcp_match else 0.0
|
||||||
|
# Simplistic logic: guess team 1 if not explicitly stated
|
||||||
|
team_bet = "2" if " 2 " in text else "1"
|
||||||
|
|
||||||
|
return GoalHandicap(ticketType=BetType.GOAL_HANDICAP, team_bet=team_bet, handicap_amount=hcp_val, **base_args)
|
||||||
|
|
||||||
|
# Win Draw Lose Double (1X, X2, 12)
|
||||||
|
if any(kw in text_lower for kw in ["1x", "x2", "12", "dvojitá šance", "neprohra"]):
|
||||||
|
bet_type = "01" if "1x" in text_lower else "02" if "x2" in text_lower else "12"
|
||||||
|
return WinDrawLoseDouble(ticketType=BetType.WIN_DRAW_LOSE_DOUBLE, betType=bet_type, **base_args)
|
||||||
|
|
||||||
|
# Win Lose (Draw no bet / Vítěz do rozhodnutí)
|
||||||
|
if any(kw in text_lower for kw in ["bez remízy", "vítěz do rozhodnutí", "konečný vítěz"]):
|
||||||
|
bet_type = "2" if re.search(r"\b2\b", text) else "1"
|
||||||
|
return WinLose(ticketType=BetType.WIN_LOSE, betType=bet_type, **base_args)
|
||||||
|
|
||||||
|
# Win Draw Lose (Standard Match Odds)
|
||||||
|
if any(kw in text_lower for kw in ["zápas", "výsledek zápasu", "1x2"]):
|
||||||
|
# Look for isolated 1, X (or 0), or 2
|
||||||
|
match_pick = re.search(r"\b(1|x|0|2)\b", text_lower)
|
||||||
|
bet_type = match_pick.group(1).upper() if match_pick else "1"
|
||||||
|
if bet_type == "X":
|
||||||
|
bet_type = "0"
|
||||||
|
|
||||||
|
return WinDrawLose(ticketType=BetType.WIN_DRAW_LOSE, betType=bet_type, **base_args)
|
||||||
|
|
||||||
|
# Fallback Unknown
|
||||||
|
return UnknownTicket(ticketType=BetType.UNKNOWN, raw_text=text, **base_args)
|
||||||
|
|
||||||
|
|
||||||
|
def img_classify(path: str, ticket_id: int) -> Ticket:
|
||||||
|
"""Given a path to an image and a date, return a list of Tickets that are
|
||||||
|
relevant to that image and date."""
|
||||||
|
# Define valid image extensions to ignore system files or text documents
|
||||||
|
ticket = Ticket(id=ticket_id, bets=[])
|
||||||
|
valid_extensions = {".png", ".jpg", ".jpeg", ".bmp", ".tiff", ".webp"}
|
||||||
|
path_obj = Path(path)
|
||||||
|
|
||||||
|
if not path_obj.is_dir():
|
||||||
|
print(f"Error: The path '{path}' is not a valid directory.")
|
||||||
|
return ticket
|
||||||
|
|
||||||
|
# Iterate through all files in the folder
|
||||||
|
for file_path in path_obj.iterdir():
|
||||||
|
if file_path.is_file() and file_path.suffix.lower() in valid_extensions:
|
||||||
|
# 1. Extract the text (called separately)
|
||||||
|
extracted_text = img_to_text(str(file_path))
|
||||||
|
print(extracted_text)
|
||||||
|
|
||||||
|
# 2. Classify based on the extracted text (called separately)
|
||||||
|
result = classify(extracted_text)
|
||||||
|
|
||||||
|
# 3. Add the resulting tickets to our main list
|
||||||
|
# Support classifier returning either a single Bet or a list of Bet
|
||||||
|
if result is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if isinstance(result, list):
|
||||||
|
for r in result:
|
||||||
|
print(
|
||||||
|
r.date,
|
||||||
|
getattr(r, "ticketType", None),
|
||||||
|
r.team1Name,
|
||||||
|
r.team2Name,
|
||||||
|
r.league,
|
||||||
|
)
|
||||||
|
ticket.bets.extend(result)
|
||||||
|
else:
|
||||||
|
print(
|
||||||
|
result.date,
|
||||||
|
getattr(result, "ticketType", None),
|
||||||
|
result.team1Name,
|
||||||
|
result.team2Name,
|
||||||
|
result.league,
|
||||||
|
)
|
||||||
|
ticket.bets.append(result)
|
||||||
|
|
||||||
|
return ticket
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
img_classify("./data/screenshots/", ticket_id=1)
|
||||||
|
|||||||
@@ -3,4 +3,4 @@ from pydantic.dataclasses import dataclass
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class ImgClassifierConfig:
|
class ImgClassifierConfig:
|
||||||
neco: str
|
target_path: str
|
||||||
|
|||||||
@@ -21,6 +21,7 @@ class Link:
|
|||||||
url: str
|
url: str
|
||||||
date: Optional[datetime] = None
|
date: Optional[datetime] = None
|
||||||
|
|
||||||
|
|
||||||
class Links:
|
class Links:
|
||||||
def __init__(self, path: str | Config):
|
def __init__(self, path: str | Config):
|
||||||
if isinstance(path, Config):
|
if isinstance(path, Config):
|
||||||
@@ -123,10 +124,6 @@ class Links:
|
|||||||
return len(self.links)
|
return len(self.links)
|
||||||
|
|
||||||
|
|
||||||
# Backwards-compatible alias in case other modules referenced Linker
|
|
||||||
Linker = Links
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
links_obj = Links("data/odkazy.xlsx")
|
links_obj = Links("data/odkazy.xlsx")
|
||||||
links = links_obj.ret_links()
|
links = links_obj.ret_links()
|
||||||
|
|||||||
Reference in New Issue
Block a user