Compare commits

...

3 Commits

Author SHA1 Message Date
Chlupaty
f33de1073f Created image classifier v.1 2026-03-22 01:23:16 +01:00
Chlupaty
98a22e2593 Created image classifier config file 2026-03-22 01:21:41 +01:00
7269b2d68f Mypy + ruff 2026-03-22 01:06:12 +01:00
11 changed files with 229 additions and 40 deletions

View File

@@ -5,3 +5,6 @@ screenshotter:
resolver:
api_key: 733f6882605be2de8980bbd074091ee4
img_classifier:
target_path: "data/screenshots/"

View File

@@ -21,7 +21,9 @@ dependencies = [
dev = [
"pytest>=9.0.2",
"ruff==0.15.5",
"pytz"
"pytz",
"types-requests",
"types-PyYAML",
# "playwright==1.58.0" # only dev because it cant be installed in a pipeline, just locally
]
@@ -37,6 +39,7 @@ lint.select = ["E", "F", "I"]
python_version = "3.12"
strict = true
ignore_missing_imports = true
plugins = ["pydantic.mypy"]
[tool.pytest.ini_options]
testpaths = ["test"]

View File

@@ -4,11 +4,10 @@ import yaml
from pydantic import ValidationError
from beaky.config import Config
from beaky.link_classifier.classifier import LinkClassifier
from beaky.resolvers.resolver import _B, _GRAY, _GREEN, _R, _RED, _YELLOW, TicketResolver, TicketVerdict
from beaky.scanner.scanner import Links
from beaky.screenshotter.screenshotter import Screenshotter
from beaky.link_classifier.classifier import LinkClassifier
from beaky.resolvers.resolver import TicketResolver
from beaky.resolvers.resolver import TicketVerdict, _R, _B, _GREEN, _RED, _YELLOW, _GRAY
_VERDICT_COLOR = {
TicketVerdict.TRUTHFUL: _GREEN,

View File

@@ -1,5 +1,6 @@
from pydantic.dataclasses import dataclass
from beaky.image_classifier.config import ImgClassifierConfig
from beaky.resolvers.config import ResolverConfig
from beaky.screenshotter.config import ScreenshotterConfig
@@ -8,4 +9,5 @@ from beaky.screenshotter.config import ScreenshotterConfig
class Config:
path: str
screenshotter: ScreenshotterConfig
resolver: ResolverConfig
resolver: ResolverConfig
img_classifer: ImgClassifierConfig

View File

@@ -26,38 +26,38 @@ class Bet(ABC):
date: datetime
league: str
@abstractmethod
def resolve(self): pass
def resolve(self) -> None: pass
@dataclass
class WinDrawLose(Bet):
"""Výsledek zápasu 1X2"""
betType: Literal["X", "0", "1", "2"] = "0"
def resolve(self):
def resolve(self) -> None:
...
@dataclass
class Advance(Bet):
"""What team advances to next round"""
def resolve(self):
def resolve(self) -> None:
raise NotImplementedError("Vyser si voko vine")
@dataclass
class WinDrawLoseDouble(Bet):
"""Výsledek zápasu - double"""
betType: Literal["01", "12", "02"] = "01"
def resolve(self):
def resolve(self) -> None:
...
@dataclass
class WinLose(Bet):
"""Výsledek zápasu bez remízy"""
betType: Literal["1", "2"] = "1"
def resolve(self):
def resolve(self) -> None:
...
@dataclass
class BothTeamScored(Bet):
def resolve(self):
def resolve(self) -> None:
...
@dataclass
@@ -65,7 +65,7 @@ class GoalAmount(Bet):
"""Počet gólů v zápasu — over/under total goals"""
line: float = 0.0 # goal line, e.g. 2.5
over: bool = True # True = more than line, False = less than line
def resolve(self):
def resolve(self) -> None:
...
@dataclass
@@ -73,14 +73,14 @@ class GoalHandicap(Bet):
"""Goal handicap for a specific team — add handicap_amount to team's score, team wins = you win"""
team_bet: Literal["1", "2"] = "1" # which team the handicap is applied to
handicap_amount: float = 0.0 # e.g. +1.5 or -0.5
def resolve(self):
def resolve(self) -> None:
...
@dataclass
class UnknownTicket(Bet):
"""Bet type that could not be classified"""
raw_text: str = ""
def resolve(self):
def resolve(self) -> None:
...

View File

@@ -1,7 +1,183 @@
from datetime import datetime
import datetime
from pathlib import Path
from beaky.datamodels.ticket import Ticket
import pytesseract
from beaky.datamodels.ticket import (
Advance,
Bet,
BetType,
BothTeamScored,
GoalAmount,
GoalHandicap,
Ticket,
UnknownTicket,
WinDrawLose,
WinDrawLoseDouble,
WinLose,
)
def f(path:str, date:datetime) -> list[Ticket]:
...
def img_to_text(path: str) -> str:
"""Given a path to an image, return the text contained in that image.
Bypasses PIL and lets Tesseract read the file directly.
"""
try:
text = pytesseract.image_to_string(path)
return text.strip()
except pytesseract.TesseractNotFoundError:
print("Error: Tesseract executable not found on your system.")
return ""
except Exception as e:
print(f"Error processing {path}: {e}")
return ""
def classify(text: str) -> Bet:
"""Given text extracted from an image and a date, return a Bet object that is
relevant to that text."""
if not text:
return UnknownTicket(
ticketType=BetType.UNKNOWN,
team1Name="N/A",
team2Name="N/A",
date=datetime.datetime.now(),
league="N/A",
raw_text="No text extracted",
)
# 1. Defaults & Normalization
text_lower = text.lower()
date_obj = datetime.datetime.now()
team1, team2 = "Unknown", "Unknown"
league = "Unknown"
# 2. Heuristic extraction of Teams (Looking for "Team A - Team B" patterns)
lines = [line.strip() for line in text.split("\n") if line.strip()]
for line in lines:
if " - " in line or " vs " in line or " v " in line:
# Avoid splitting on hyphens in dates or numbers
if not re.search(r"\d\s*-\s*\d", line):
parts = re.split(r" - | vs | v ", line)
if len(parts) >= 2:
team1, team2 = parts[0].strip(), parts[1].strip()
break
# 3. Heuristic extraction of Date (Looking for DD.MM. YYYY HH:MM)
date_match = re.search(r"(\d{1,2}\.\s*\d{1,2}\.?\s*(?:\d{2,4})?)\s*(\d{1,2}:\d{2})?", text)
if date_match:
try:
# Fallback to current year if missing, basic parse attempt
date_str = f"{date_match.group(1).replace(' ', '')} {date_match.group(2) or '00:00'}"
if len(date_str.split(".")[2]) <= 5: # Missing year
date_str = date_str.replace(" ", f"{datetime.datetime.now().year} ")
date_obj = datetime.datetime.strptime(date_str, "%d.%m.%Y %H:%M")
except Exception:
pass # Keep default if parsing fails
# 4. Classification Logic based on keywords
base_args = {"team1Name": team1, "team2Name": team2, "date": date_obj, "league": league}
# Advance / Postup
if any(kw in text_lower for kw in ["postup", "postoupí", "advance"]):
return Advance(ticketType=BetType.ADVANCED, **base_args)
# Both Teams to Score / Oba dají gól
if any(kw in text_lower for kw in ["oba dají gól", "btts", "oba týmy dají gól"]):
return BothTeamScored(ticketType=BetType.BOTH_TEAM_SCORED, **base_args)
# Goal Amount (Over/Under)
if any(kw in text_lower for kw in ["počet gólů", "více než", "méně než", "over", "under"]):
# Attempt to find the goal line (e.g., 2.5, 3.5)
line_match = re.search(r"(\d+\.\d+)", text)
line_val = float(line_match.group(1)) if line_match else 2.5
is_over = any(kw in text_lower for kw in ["více", "over", "+"])
return GoalAmount(ticketType=BetType.GOAL_AMOUNT, line=line_val, over=is_over, **base_args)
# Goal Handicap
if any(kw in text_lower for kw in ["handicap", "hcp"]):
hcp_match = re.search(r"([+-]?\d+\.\d+)", text)
hcp_val = float(hcp_match.group(1)) if hcp_match else 0.0
# Simplistic logic: guess team 1 if not explicitly stated
team_bet = "2" if " 2 " in text else "1"
return GoalHandicap(ticketType=BetType.GOAL_HANDICAP, team_bet=team_bet, handicap_amount=hcp_val, **base_args)
# Win Draw Lose Double (1X, X2, 12)
if any(kw in text_lower for kw in ["1x", "x2", "12", "dvojitá šance", "neprohra"]):
bet_type = "01" if "1x" in text_lower else "02" if "x2" in text_lower else "12"
return WinDrawLoseDouble(ticketType=BetType.WIN_DRAW_LOSE_DOUBLE, betType=bet_type, **base_args)
# Win Lose (Draw no bet / Vítěz do rozhodnutí)
if any(kw in text_lower for kw in ["bez remízy", "vítěz do rozhodnutí", "konečný vítěz"]):
bet_type = "2" if re.search(r"\b2\b", text) else "1"
return WinLose(ticketType=BetType.WIN_LOSE, betType=bet_type, **base_args)
# Win Draw Lose (Standard Match Odds)
if any(kw in text_lower for kw in ["zápas", "výsledek zápasu", "1x2"]):
# Look for isolated 1, X (or 0), or 2
match_pick = re.search(r"\b(1|x|0|2)\b", text_lower)
bet_type = match_pick.group(1).upper() if match_pick else "1"
if bet_type == "X":
bet_type = "0"
return WinDrawLose(ticketType=BetType.WIN_DRAW_LOSE, betType=bet_type, **base_args)
# Fallback Unknown
return UnknownTicket(ticketType=BetType.UNKNOWN, raw_text=text, **base_args)
def img_classify(path: str, ticket_id: int) -> Ticket:
"""Given a path to an image and a date, return a list of Tickets that are
relevant to that image and date."""
# Define valid image extensions to ignore system files or text documents
ticket = Ticket(id=ticket_id, bets=[])
valid_extensions = {".png", ".jpg", ".jpeg", ".bmp", ".tiff", ".webp"}
path_obj = Path(path)
if not path_obj.is_dir():
print(f"Error: The path '{path}' is not a valid directory.")
return ticket
# Iterate through all files in the folder
for file_path in path_obj.iterdir():
if file_path.is_file() and file_path.suffix.lower() in valid_extensions:
# 1. Extract the text (called separately)
extracted_text = img_to_text(str(file_path))
print(extracted_text)
# 2. Classify based on the extracted text (called separately)
result = classify(extracted_text)
# 3. Add the resulting tickets to our main list
# Support classifier returning either a single Bet or a list of Bet
if result is None:
continue
if isinstance(result, list):
for r in result:
print(
r.date,
getattr(r, "ticketType", None),
r.team1Name,
r.team2Name,
r.league,
)
ticket.bets.extend(result)
else:
print(
result.date,
getattr(result, "ticketType", None),
result.team1Name,
result.team2Name,
result.league,
)
ticket.bets.append(result)
return ticket
if __name__ == "__main__":
img_classify("./data/screenshots/", ticket_id=1)

View File

@@ -0,0 +1,6 @@
from pydantic.dataclasses import dataclass
@dataclass
class ImgClassifierConfig:
target_path: str

View File

@@ -1,19 +1,20 @@
import re
from datetime import datetime
from typing import Any
from playwright.sync_api import Page, sync_playwright
from beaky.datamodels.ticket import (
Bet,
BetType,
BothTeamScored,
GoalAmount,
GoalHandicap,
Ticket,
BetType,
UnknownTicket,
WinDrawLose,
WinDrawLoseDouble,
WinLose,
Bet
)
from beaky.scanner.scanner import Link
@@ -37,24 +38,24 @@ def _parse_teams(title: str) -> tuple[str, str]:
def _classify_bet(bet_text: str, team1: str, team2: str, date: datetime, league: str) -> Bet:
common = dict(team1Name=team1, team2Name=team2, date=date, league=league)
common: dict[str, Any] = dict(team1Name=team1, team2Name=team2, date=date, league=league)
# WinDrawLose double: "Výsledek zápasu - dvojtip: 10"
m = re.search(r"Výsledek zápasu - dvojtip:\s*(\d+)", bet_text)
if m:
# normalize order: "10" -> "01", "02" -> "02", "12" -> "12"
bet_type = "".join(sorted(m.group(1)))
return WinDrawLoseDouble(ticketType=BetType.WIN_DRAW_LOSE_DOUBLE, betType=bet_type, **common)
return WinDrawLoseDouble(ticketType=BetType.WIN_DRAW_LOSE_DOUBLE, betType=bet_type, **common) # type: ignore[arg-type]
# WinLose (no draw): "Výsledek bez remízy: 1"
m = re.search(r"bez rem[ií]zy:\s*([12])", bet_text)
if m:
return WinLose(ticketType=BetType.WIN_LOSE, betType=m.group(1), **common)
return WinLose(ticketType=BetType.WIN_LOSE, betType=m.group(1), **common) # type: ignore[arg-type]
# WinDrawLose: "Výsledek zápasu: 1"
m = re.search(r"Výsledek zápasu:\s*([012X])\s*$", bet_text.strip())
if m:
return WinDrawLose(ticketType=BetType.WIN_DRAW_LOSE, betType=m.group(1), **common)
return WinDrawLose(ticketType=BetType.WIN_DRAW_LOSE, betType=m.group(1), **common) # type: ignore[arg-type]
# BothTeamScored: "Každý z týmů dá gól v zápasu: Ano"
if "dá gól" in bet_text or "oba týmy" in bet_text.lower():
@@ -77,7 +78,7 @@ def _classify_bet(bet_text: str, team1: str, team2: str, date: datetime, league:
return UnknownTicket(ticketType=BetType.UNKNOWN, raw_text=bet_text, **common)
sign = 1.0 if m.group(1) == "+" else -1.0
handicap = sign * float(m.group(2))
return GoalHandicap(ticketType=BetType.GOAL_HANDICAP, team_bet=team_bet, handicap_amount=handicap, **common)
return GoalHandicap(ticketType=BetType.GOAL_HANDICAP, team_bet=team_bet, handicap_amount=handicap, **common) # type: ignore[arg-type]
return UnknownTicket(ticketType=BetType.UNKNOWN, raw_text=bet_text, **common)

View File

@@ -1,8 +1,9 @@
import time
from dataclasses import dataclass, field
from datetime import datetime, timedelta
from datetime import date, datetime, timedelta
from difflib import SequenceMatcher
from enum import Enum
from typing import Any
import requests
@@ -149,7 +150,7 @@ class ResolvedTicket:
return TicketVerdict.TRUTHFUL
def _get(url: str, headers: dict, params: dict, retries: int = 3, backoff: float = 60.0) -> requests.Response:
def _get(url: str, headers: dict[str, str], params: dict[str, str | int], retries: int = 3, backoff: float = 60.0) -> requests.Response:
for attempt in range(retries):
resp = requests.get(url, headers=headers, params=params)
if resp.status_code == 429:
@@ -166,7 +167,7 @@ class TicketResolver:
def __init__(self, config: ResolverConfig):
self._headers = {"x-apisports-key": config.api_key}
# Cache maps (center_date_str, league_id | None) -> list of fixture dicts
self._fixture_cache: dict[tuple[str, int | None], list[dict]] = {}
self._fixture_cache: dict[tuple[str, int | None], list[dict[str, Any]]] = {}
# Cache maps league name -> (league_id, confidence)
self._league_cache: dict[str, tuple[int | None, float]] = {}
@@ -215,7 +216,7 @@ class TicketResolver:
match_finished=finished,
)
def _find_fixture(self, bet: Bet) -> tuple[dict | None, float, float, float]:
def _find_fixture(self, bet: Bet) -> tuple[dict[str, Any] | None, float, float, float]:
"""Returns (fixture, name_match, date_proximity, league_confidence)."""
center = bet.date.date()
date_str = center.strftime("%Y-%m-%d")
@@ -225,7 +226,7 @@ class TicketResolver:
if cache_key not in self._fixture_cache:
date_from = (center - timedelta(days=_DATE_WINDOW)).strftime("%Y-%m-%d")
date_to = (center + timedelta(days=_DATE_WINDOW)).strftime("%Y-%m-%d")
params: dict = {"from": date_from, "to": date_to}
params: dict[str, str | int] = {"from": date_from, "to": date_to}
if league_id is not None:
params["league"] = league_id
params["season"] = center.year if center.month >= 7 else center.year - 1
@@ -273,14 +274,14 @@ def _similarity(a: str, b: str) -> float:
return SequenceMatcher(None, a.lower(), b.lower()).ratio()
def _date_proximity(fixture: dict, center) -> float:
def _date_proximity(fixture: dict[str, Any], center: date) -> float:
"""1.0 on exact date, linear decay to 0.0 at _DATE_WINDOW days away."""
fixture_date = datetime.fromisoformat(fixture["fixture"]["date"].replace("Z", "+00:00")).date()
days_off = abs((fixture_date - center).days)
return max(0.0, 1.0 - days_off / _DATE_WINDOW)
def _best_fixture_match(fixtures: list[dict], team1: str, team2: str, center) -> tuple[dict | None, float, float]:
def _best_fixture_match(fixtures: list[dict[str, Any]], team1: str, team2: str, center: date) -> tuple[dict[str, Any] | None, float, float]:
"""Returns (best_fixture, name_score, date_proximity) or (None, 0, 0) if no good match."""
best, best_combined, best_name, best_date = None, 0.0, 0.0, 0.0
for f in fixtures:
@@ -302,12 +303,12 @@ def _best_fixture_match(fixtures: list[dict], team1: str, team2: str, center) ->
return (best, best_name, best_date) if best_name > 0.5 else (None, best_name, best_date)
def _is_finished(fixture: dict) -> float:
def _is_finished(fixture: dict[str, Any]) -> float:
status = fixture.get("fixture", {}).get("status", {}).get("short", "")
return 1.0 if status in ("FT", "AET", "PEN", "AWD", "WO") else 0.0
def _evaluate_bet(bet: Bet, fixture: dict) -> BetOutcome:
def _evaluate_bet(bet: Bet, fixture: dict[str, Any]) -> BetOutcome:
goals = fixture.get("goals", {})
home = goals.get("home")
away = goals.get("away")

View File

@@ -21,6 +21,7 @@ class Link:
url: str
date: Optional[datetime] = None
class Links:
def __init__(self, path: str | Config):
if isinstance(path, Config):
@@ -123,10 +124,6 @@ class Links:
return len(self.links)
# Backwards-compatible alias in case other modules referenced Linker
Linker = Links
if __name__ == "__main__":
links_obj = Links("data/odkazy.xlsx")
links = links_obj.ret_links()

View File

@@ -1,4 +1,5 @@
from pathlib import Path
from typing import Any
from playwright.sync_api import sync_playwright
@@ -11,7 +12,7 @@ class Screenshotter:
self.config = config
def capture_tickets(self, links: list[Link]):
def capture_tickets(self, links: list[Link]) -> None:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
context = browser.new_context()
@@ -24,7 +25,7 @@ class Screenshotter:
browser.close()
def capture_ticket(self,page, url, target_path, ticket_selector=".betslip-history-detail__left-panel"):
def capture_ticket(self, page: Any, url: str, target_path: Path, ticket_selector: str = ".betslip-history-detail__left-panel") -> None:
page.goto(url)
page.wait_for_selector(ticket_selector, timeout=10000)
page.wait_for_timeout(1000)