diff --git a/src/beaky/cli.py b/src/beaky/cli.py index ff91798..c2d05b6 100644 --- a/src/beaky/cli.py +++ b/src/beaky/cli.py @@ -6,6 +6,7 @@ from pydantic import ValidationError from beaky.config import Config from beaky.scanner.scanner import Links from beaky.screenshotter.screenshotter import Screenshotter +from beaky.link_classifier.classifier import LinkClassifier def load_config(path: str) -> Config | None: @@ -46,7 +47,15 @@ def main() -> None: print(link) if args.mode == "class": - pass + classifier = LinkClassifier() + results = [] + for link in data.links: + results.append(classifier.classify(link)) + ticket = results[-1] + print(f"\n=== Link {link.id} ({len(data.links)} bets) ===") + print(f" [{type(ticket).__name__}]") + for k, v in vars(ticket.bets).items(): + print(f" {k}: {v}") if __name__ == "__main__": main() diff --git a/src/beaky/datamodels/scan.py b/src/beaky/datamodels/scan.py deleted file mode 100644 index 07bb433..0000000 --- a/src/beaky/datamodels/scan.py +++ /dev/null @@ -1,11 +0,0 @@ -from datetime import datetime - -from pydantic.dataclasses import dataclass - - -@dataclass -class Scan: - id: int - date: datetime - event_name: str - diff --git a/src/beaky/datamodels/ticket.py b/src/beaky/datamodels/ticket.py index 2c2959a..b57be5d 100644 --- a/src/beaky/datamodels/ticket.py +++ b/src/beaky/datamodels/ticket.py @@ -6,7 +6,7 @@ from typing import Literal from pydantic.dataclasses import dataclass -class TicketType(str, Enum): +class BetType(str, Enum): WIN_DRAW_LOSE = "win_draw_lose" ADVANCED = "advance" WIN_DRAW_LOSE_DOUBLE = "win_draw_lose_double" @@ -14,62 +14,76 @@ class TicketType(str, Enum): BOTH_TEAM_SCORED = "both_team_scored" GOAL_AMOUNT = "goal_amount" GOAL_HANDICAP = "goal_handicap" + UNKNOWN = "unknown" ... + @dataclass -class Ticket(ABC): - ticketType: TicketType +class Bet(ABC): + ticketType: BetType team1Name: str team2Name: str date: datetime - # other fields that might be interesting for resolving (like bet type in WinDrawLose) + league: str @abstractmethod def resolve(self): pass -class WinDrawLose(Ticket): +@dataclass +class WinDrawLose(Bet): """Výsledek zápasu 1X2""" - ticketType: TicketType.WIN_DRAW_LOSE - betType: Literal["X", "0", "1", "2"] + betType: Literal["X", "0", "1", "2"] = "0" def resolve(self): ... -class Advance(Ticket): +@dataclass +class Advance(Bet): """What team advances to next round""" - ticketType: TicketType.WIN_DRAW_LOSE def resolve(self): raise NotImplementedError("Vyser si voko vine") -class WinDrawLoseDouble(Ticket): +@dataclass +class WinDrawLoseDouble(Bet): """Výsledek zápasu - double""" - ticketType: TicketType.WIN_DRAW_LOSE_DOUBLE - betType: Literal["01", "12", "02"] + betType: Literal["01", "12", "02"] = "01" def resolve(self): ... -class WinLose(Ticket): +@dataclass +class WinLose(Bet): """Výsledek zápasu bez remízy""" - ticketType: TicketType.WIN_LOSE - betType: Literal["1", "2"] + betType: Literal["1", "2"] = "1" def resolve(self): ... -class BothTeamScored(Ticket): - ticketType: TicketType.BOTH_TEAM_SCORED +@dataclass +class BothTeamScored(Bet): def resolve(self): ... -class GoalAmount(Ticket): +@dataclass +class GoalAmount(Bet): """Počet gólů v zápasu — over/under total goals""" - ticketType: TicketType.GOAL_AMOUNT - line: float # goal line, e.g. 2.5 - over: bool # True = more than line, False = less than line + line: float = 0.0 # goal line, e.g. 2.5 + over: bool = True # True = more than line, False = less than line def resolve(self): ... -class GoalHandicap(Ticket): +@dataclass +class GoalHandicap(Bet): """Goal handicap for a specific team — add handicap_amount to team's score, team wins = you win""" - ticketType: TicketType.GOAL_HANDICAP - team_bet: Literal["1", "2"] # which team the handicap is applied to - handicap_amount: float # e.g. +1.5 or -0.5 + team_bet: Literal["1", "2"] = "1" # which team the handicap is applied to + handicap_amount: float = 0.0 # e.g. +1.5 or -0.5 def resolve(self): ... + +@dataclass +class UnknownTicket(Bet): + """Bet type that could not be classified""" + raw_text: str = "" + def resolve(self): + ... + +@dataclass +class Ticket: + id: int + bets: list[Bet] \ No newline at end of file diff --git a/src/beaky/link_classifier/classifier.py b/src/beaky/link_classifier/classifier.py index e69de29..549d8d3 100644 --- a/src/beaky/link_classifier/classifier.py +++ b/src/beaky/link_classifier/classifier.py @@ -0,0 +1,116 @@ +import re +from datetime import datetime + +from playwright.sync_api import Page, sync_playwright + +from beaky.datamodels.ticket import ( + BothTeamScored, + GoalAmount, + GoalHandicap, + Ticket, + BetType, + UnknownTicket, + WinDrawLose, + WinDrawLoseDouble, + WinLose, + Bet +) +from beaky.scanner.scanner import Link + +_TICKET_SELECTOR = ".betslip-history-detail__left-panel" +_LEG_SELECTOR = '[data-test="betslip-leg"]' + + +def _parse_czech_date(text: str) -> datetime | None: + m = re.search(r"(\d+)\.\s*(\d+)\.\s*(\d+)\s+(\d+):(\d+)", text) + if not m: + return None + day, month, year, hour, minute = map(int, m.groups()) + return datetime(year, month, day, hour, minute) + + +def _parse_teams(title: str) -> tuple[str, str]: + parts = title.split(" - ", 1) + if len(parts) == 2: + return parts[0].strip(), parts[1].strip() + return title.strip(), "" + + +def _classify_bet(bet_text: str, team1: str, team2: str, date: datetime, league: str) -> Bet: + common = dict(team1Name=team1, team2Name=team2, date=date, league=league) + + # WinDrawLose double: "Výsledek zápasu - dvojtip: 10" + m = re.search(r"Výsledek zápasu - dvojtip:\s*(\d+)", bet_text) + if m: + # normalize order: "10" -> "01", "02" -> "02", "12" -> "12" + bet_type = "".join(sorted(m.group(1))) + return WinDrawLoseDouble(ticketType=BetType.WIN_DRAW_LOSE_DOUBLE, betType=bet_type, **common) + + # WinLose (no draw): "Výsledek bez remízy: 1" + m = re.search(r"bez rem[ií]zy:\s*([12])", bet_text) + if m: + return WinLose(ticketType=BetType.WIN_LOSE, betType=m.group(1), **common) + + # WinDrawLose: "Výsledek zápasu: 1" + m = re.search(r"Výsledek zápasu:\s*([012X])\s*$", bet_text.strip()) + if m: + return WinDrawLose(ticketType=BetType.WIN_DRAW_LOSE, betType=m.group(1), **common) + + # BothTeamScored: "Každý z týmů dá gól v zápasu: Ano" + if "dá gól" in bet_text or "oba týmy" in bet_text.lower(): + return BothTeamScored(ticketType=BetType.BOTH_TEAM_SCORED, **common) + + # GoalAmount: "Počet gólů v zápasu 2.5: + 2.5" / "Počet gólů v zápasu 4: - 4" + m = re.search(r"Počet gólů v zápasu\s+(\d+(?:\.\d+)?):\s*([+-])", bet_text) + if m: + return GoalAmount(ticketType=BetType.GOAL_AMOUNT, line=float(m.group(1)), over=m.group(2) == "+", **common) + + # GoalHandicap: "[Team] počet gólů ...: +1.5" — team name in bet text determines team_bet + m = re.search(r"([+-])\s*(\d+(?:\.\d+)?)\s*$", bet_text.strip()) + if m and "gólů" in bet_text: + bet_lower = bet_text.lower() + if team1.lower() in bet_lower: + team_bet = "1" + elif team2.lower() in bet_lower: + team_bet = "2" + else: + return UnknownTicket(ticketType=BetType.UNKNOWN, raw_text=bet_text, **common) + sign = 1.0 if m.group(1) == "+" else -1.0 + handicap = sign * float(m.group(2)) + return GoalHandicap(ticketType=BetType.GOAL_HANDICAP, team_bet=team_bet, handicap_amount=handicap, **common) + + return UnknownTicket(ticketType=BetType.UNKNOWN, raw_text=bet_text, **common) + + +def _extract_legs(page: Page, fallback_date: datetime | None) -> list[Bet]: + bets: list[Bet] = [] + for leg in page.locator(_LEG_SELECTOR).all(): + title = leg.locator("h3").first.get_attribute("title") or "" + date_text = leg.locator(".betslip-leg-date span").first.inner_text() + bet_text = leg.locator("[data-selection-id]").first.inner_text() + league = leg.locator(".f-mt-1.f-leading-tight.f-line-clamp-2").first.inner_text() + + team1, team2 = _parse_teams(title) + date = _parse_czech_date(date_text) or fallback_date or datetime.now() + + bets.append(_classify_bet(bet_text, team1, team2, date, league)) + return bets + + +class LinkClassifier: + def classify(self, link: Link) -> Ticket: + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + context = browser.new_context() + page = context.new_page() + try: + page.goto(link.url) + page.wait_for_selector(_LEG_SELECTOR, timeout=15000) + page.wait_for_timeout(500) + result = Ticket(id=link.id, bets=_extract_legs(page, link.date)) + except Exception as e: + print(f"Error classifying link {link.id}: {e}") + finally: + page.close() + browser.close() + return result diff --git a/src/beaky/scanner/scanner.py b/src/beaky/scanner/scanner.py index 70e2180..8d9bf2e 100644 --- a/src/beaky/scanner/scanner.py +++ b/src/beaky/scanner/scanner.py @@ -21,7 +21,6 @@ class Link: url: str date: Optional[datetime] = None - class Links: def __init__(self, path: str | Config): if isinstance(path, Config):