diff --git a/config/application.yml b/config/application.yml index dbd40c4..46548bc 100644 --- a/config/application.yml +++ b/config/application.yml @@ -2,3 +2,6 @@ path: data/odkazy.xlsx screenshotter: target_path: data/screenshots/ + +resolver: + api_key: 733f6882605be2de8980bbd074091ee4 diff --git a/pyproject.toml b/pyproject.toml index 9d31262..4c2af6a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "beaky" -version = "0.0.1" +version = "0.1.0" description = "Scan tickets and decide" requires-python = ">=3.12" dependencies = [ @@ -13,7 +13,8 @@ dependencies = [ "pandas==3.0.1", "openpyxl>=3.1.0", "PyYaml==6.0.3", - "playwright==1.58.0" + "playwright==1.58.0", + "requests>=2.32.0" ] [project.optional-dependencies] diff --git a/src/beaky/cli.py b/src/beaky/cli.py index c774b34..654cbcf 100644 --- a/src/beaky/cli.py +++ b/src/beaky/cli.py @@ -7,7 +7,15 @@ from beaky.config import Config from beaky.scanner.scanner import Links from beaky.screenshotter.screenshotter import Screenshotter from beaky.link_classifier.classifier import LinkClassifier +from beaky.resolvers.resolver import TicketResolver +from beaky.resolvers.resolver import TicketVerdict, _R, _B, _GREEN, _RED, _YELLOW, _GRAY +_VERDICT_COLOR = { + TicketVerdict.TRUTHFUL: _GREEN, + TicketVerdict.NOT_TRUTHFUL: _RED, + TicketVerdict.POSSIBLY_TRUTHFUL: _YELLOW, + TicketVerdict.UNKNOWN: _GRAY, +} def load_config(path: str) -> Config | None: with open(path) as f: @@ -22,7 +30,8 @@ def load_config(path: str) -> Config | None: def main() -> None: parser = argparse.ArgumentParser(prog="beaky") parser.add_argument("--config", help="Path to config file.", default="config/application.yml") - parser.add_argument("mode", choices=["screenshotter", "parser", "class"], help="Mode of operation.") + parser.add_argument("--id", type=int, help="Resolve a single ticket by id (only used with resolve mode).") + parser.add_argument("mode", choices=["screenshotter", "parser", "class", "resolve"], help="Mode of operation.") args = parser.parse_args() config = load_config(args.config) @@ -58,5 +67,23 @@ def main() -> None: for k, v in vars(bet).items(): print(f" {k}: {v}") + if args.mode == "resolve": + classifier = LinkClassifier() + resolver = TicketResolver(config.resolver) + links = [l for l in data.links if l.id == args.id] if args.id is not None else data.links + if args.id is not None and not links: + print(f"ERROR: ticket id {args.id} not found") + return + for link in links: + print(f"\n=== Classifying ticket {link.id} ===") + ticket = classifier.classify(link) + for bet in ticket.bets: + print(f" [{type(bet).__name__}] {bet.team1Name} vs {bet.team2Name} | {bet.date.date()} | {bet.league}") + + print(f"\n--- Resolving ticket {link.id} ---") + resolved = resolver.resolve(ticket) + color = _VERDICT_COLOR.get(resolved.verdict, "") + print(f"\n {color}{_B}VERDICT: {resolved.verdict.value.upper()}{_R}") + if __name__ == "__main__": main() diff --git a/src/beaky/config.py b/src/beaky/config.py index 45dbe12..41b863d 100644 --- a/src/beaky/config.py +++ b/src/beaky/config.py @@ -1,9 +1,11 @@ from pydantic.dataclasses import dataclass +from beaky.resolvers.config import ResolverConfig from beaky.screenshotter.config import ScreenshotterConfig @dataclass class Config: path: str - screenshotter: ScreenshotterConfig \ No newline at end of file + screenshotter: ScreenshotterConfig + resolver: ResolverConfig \ No newline at end of file diff --git a/src/beaky/datamodels/ticket.py b/src/beaky/datamodels/ticket.py index b57be5d..f2b0ea1 100644 --- a/src/beaky/datamodels/ticket.py +++ b/src/beaky/datamodels/ticket.py @@ -83,6 +83,8 @@ class UnknownTicket(Bet): def resolve(self): ... + + @dataclass class Ticket: id: int diff --git a/src/beaky/resolvers/__init__.py b/src/beaky/resolvers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/beaky/resolvers/config.py b/src/beaky/resolvers/config.py new file mode 100644 index 0000000..3825a73 --- /dev/null +++ b/src/beaky/resolvers/config.py @@ -0,0 +1,6 @@ +from pydantic.dataclasses import dataclass + + +@dataclass +class ResolverConfig: + api_key: str diff --git a/src/beaky/resolvers/resolver.py b/src/beaky/resolvers/resolver.py new file mode 100644 index 0000000..9e87636 --- /dev/null +++ b/src/beaky/resolvers/resolver.py @@ -0,0 +1,353 @@ +import time +from dataclasses import dataclass, field +from datetime import datetime, timedelta +from difflib import SequenceMatcher +from enum import Enum + +import requests + +from beaky.datamodels.ticket import ( + Bet, + BothTeamScored, + GoalAmount, + GoalHandicap, + Ticket, + UnknownTicket, + WinDrawLose, + WinDrawLoseDouble, + WinLose, +) +from beaky.resolvers.config import ResolverConfig + +_API_BASE = "https://v3.football.api-sports.io" + +# Fortuna league strings (lowercased substring match) -> api-football league ID +_LEAGUE_MAP: dict[str, int] = { + # European cups + "liga mistrů": 2, + "champions league": 2, + "evropská liga": 3, + "europa league": 3, + "konferenční liga": 848, + "conference league": 848, + # Top flights + "1. anglie": 39, + "1. belgie": 144, + "1. česko": 345, + "1. dánsko": 119, + "1. francie": 61, + "1. itálie": 135, + "1. itálie - ženy": 794, + "1. německo": 78, + "1. nizozemsko": 88, + "1. polsko": 106, + "1. portugalsko": 94, + "1. rakousko": 218, + "1. rumunsko": 283, + "1. skotsko": 179, + "1. slovensko": 332, + "1. španělsko": 140, + "1. wales": 771, + # Second divisions + "2. anglie": 40, + "2. česko": 346, + "2. francie": 62, + "2. itálie": 136, + "2. německo": 79, + "2. nizozemsko": 89, + "2. rakousko": 219, + "2. slovensko": 333, + "2. španělsko": 141, + # Third divisions + "3. francie": 63, + "3. česko msfl": 349, + "3. česko čfl": 348, + # Fourth divisions + "4. česko - sk. a": 350, + "4. česko - sk. b": 351, + "4. česko - sk. c": 352, + "4. česko - sk. d": 353, + "4. česko - sk. e": 354, + "4. česko - sk. f": 686, + # Women + "1. česko - ženy": 669, + "fortuna=liga ženy": 669, + # Domestic cups + "anglie - fa cup": 45, + "anglie - efl cup": 48, + "česko - pohár": 347, +} + +_DATE_WINDOW = 3 # days either side of the bet date to search + +# ANSI color helpers +_R = "\033[0m" +_B = "\033[1m" +_DIM= "\033[2m" +_GREEN = "\033[32m" +_RED = "\033[31m" +_YELLOW = "\033[33m" +_CYAN = "\033[36m" +_GRAY = "\033[90m" + +_OUTCOME_COLOR = { + "win": _GREEN, + "lose": _RED, + "void": _YELLOW, + "unknown": _GRAY, +} + + +class TicketVerdict(str, Enum): + TRUTHFUL = "truthful" + NOT_TRUTHFUL = "not truthful" + POSSIBLY_TRUTHFUL = "possibly truthful — unresolvable bets remain, check manually" + UNKNOWN = "unknown — could not resolve enough bets to decide" + + +class BetOutcome(str, Enum): + WIN = "win" + LOSE = "lose" + VOID = "void" # stake returned (e.g. WinLose on draw, integer goal line hit) + UNKNOWN = "unknown" # fixture not found or unclassified bet + + +@dataclass +class ResolvedBet: + bet: Bet + outcome: BetOutcome + fixture_id: int | None = None + # Confidence breakdown (each component 0.0–1.0): + # name_match — how well team names matched (SequenceMatcher score) + # date_proximity — 1.0 exact date, linear decay to 0.0 at _DATE_WINDOW days away + # league_found — 1.0 static map hit, 0.7 API fallback, 0.3 not found + # match_finished — 1.0 if fixture status is terminal, 0.0 otherwise + confidence: float = 0.0 + name_match: float = 0.0 + date_proximity: float = 0.0 + league_found: float = 0.0 + match_finished: float = 0.0 + + +@dataclass +class ResolvedTicket: + ticket_id: int + bets: list[ResolvedBet] = field(default_factory=list) + + @property + def verdict(self) -> TicketVerdict: + resolvable = [b for b in self.bets if not isinstance(b.bet, UnknownTicket)] + unresolvable = [b for b in self.bets if isinstance(b.bet, UnknownTicket)] + if not resolvable: + return TicketVerdict.UNKNOWN + if any(b.outcome == BetOutcome.LOSE for b in resolvable): + return TicketVerdict.NOT_TRUTHFUL + if any(b.outcome == BetOutcome.UNKNOWN for b in resolvable): + return TicketVerdict.UNKNOWN + if unresolvable: + return TicketVerdict.POSSIBLY_TRUTHFUL + return TicketVerdict.TRUTHFUL + + +def _get(url: str, headers: dict, params: dict, retries: int = 3, backoff: float = 60.0) -> requests.Response: + for attempt in range(retries): + resp = requests.get(url, headers=headers, params=params) + if resp.status_code == 429: + wait = backoff * (attempt + 1) + print(f" !! rate limited — waiting {wait:.0f}s before retry ({attempt + 1}/{retries})") + time.sleep(wait) + continue + return resp + print(f" !! still rate limited after {retries} retries, giving up") + return resp + + +class TicketResolver: + def __init__(self, config: ResolverConfig): + self._headers = {"x-apisports-key": config.api_key} + # Cache maps (center_date_str, league_id | None) -> list of fixture dicts + self._fixture_cache: dict[tuple[str, int | None], list[dict]] = {} + # Cache maps league name -> (league_id, confidence) + self._league_cache: dict[str, tuple[int | None, float]] = {} + + def resolve(self, ticket: Ticket) -> ResolvedTicket: + result = ResolvedTicket(ticket_id=ticket.id) + for bet in ticket.bets: + result.bets.append(self._resolve_bet(bet)) + return result + + def _resolve_bet(self, bet: Bet) -> ResolvedBet: + bet_type = type(bet).__name__ + print(f"\n {_B}{_CYAN}┌─ [{bet_type}]{_R} {_B}{bet.team1Name} vs {bet.team2Name}{_R}" + f" {_DIM}{bet.date.strftime('%Y-%m-%d')} | {bet.league}{_R}") + + if isinstance(bet, UnknownTicket): + print(f" {_GRAY}│ skipping — not implemented: {bet.raw_text!r}{_R}") + print(f" {_GRAY}└─ UNKNOWN{_R}") + return ResolvedBet(bet=bet, outcome=BetOutcome.UNKNOWN) + + fixture, name_match, date_prox, league_conf = self._find_fixture(bet) + if fixture is None: + print(f" {_GRAY}└─ UNKNOWN — no fixture found{_R}") + return ResolvedBet(bet=bet, outcome=BetOutcome.UNKNOWN, league_found=league_conf) + + home_name = fixture["teams"]["home"]["name"] + away_name = fixture["teams"]["away"]["name"] + finished = _is_finished(fixture) + confidence = round((name_match + date_prox + league_conf + finished) / 4, 3) + outcome = _evaluate_bet(bet, fixture) if finished == 1.0 else BetOutcome.UNKNOWN + + goals = fixture["goals"] + color = _OUTCOME_COLOR.get(outcome.value, _GRAY) + print(f" {_DIM}│ matched #{fixture['fixture']['id']}: {home_name} vs {away_name}" + f" | {goals['home']}:{goals['away']} | {fixture['fixture']['status']['short']}" + f" | confidence {confidence} (name={name_match:.2f} date={date_prox:.2f} league={league_conf} finished={finished}){_R}") + print(f" {color}{_B}└─ {outcome.value.upper()}{_R}") + + return ResolvedBet( + bet=bet, + outcome=outcome, + fixture_id=fixture["fixture"]["id"], + confidence=confidence, + name_match=round(name_match, 3), + date_proximity=round(date_prox, 3), + league_found=league_conf, + match_finished=finished, + ) + + def _find_fixture(self, bet: Bet) -> tuple[dict | None, float, float, float]: + """Returns (fixture, name_match, date_proximity, league_confidence).""" + center = bet.date.date() + date_str = center.strftime("%Y-%m-%d") + league_id, league_conf = self._resolve_league(bet.league) + cache_key = (date_str, league_id) + + if cache_key not in self._fixture_cache: + date_from = (center - timedelta(days=_DATE_WINDOW)).strftime("%Y-%m-%d") + date_to = (center + timedelta(days=_DATE_WINDOW)).strftime("%Y-%m-%d") + params: dict = {"from": date_from, "to": date_to} + if league_id is not None: + params["league"] = league_id + params["season"] = center.year if center.month >= 7 else center.year - 1 + print(f" {_GRAY}│ GET /fixtures {params}{_R}") + resp = _get(f"{_API_BASE}/fixtures", headers=self._headers, params=params) + resp.raise_for_status() + self._fixture_cache[cache_key] = resp.json().get("response", []) + print(f" {_GRAY}│ {len(self._fixture_cache[cache_key])} fixtures returned (cached){_R}") + else: + print(f" {_GRAY}│ /fixtures (±{_DATE_WINDOW}d of {date_str}, league={league_id}) served from cache{_R}") + + fixture, name_match, date_prox = _best_fixture_match( + self._fixture_cache[cache_key], bet.team1Name, bet.team2Name, center + ) + return fixture, name_match, date_prox, league_conf + + def _resolve_league(self, league_name: str) -> tuple[int | None, float]: + key = league_name.lower().strip() + if key in self._league_cache: + return self._league_cache[key] + + for pattern, league_id in _LEAGUE_MAP.items(): + if pattern in key: + print(f" {_GRAY}│ league {league_name!r} -> id={league_id} (static map){_R}") + self._league_cache[key] = (league_id, 1.0) + return league_id, 1.0 + + # Fall back to API search — lower confidence since first result is taken unverified + print(f" {_GRAY}│ GET /leagues search={league_name!r}{_R}") + resp = _get(f"{_API_BASE}/leagues", headers=self._headers, params={"search": league_name[:20]}) + results = resp.json().get("response", []) + if results: + league_id = results[0]["league"]["id"] + league_found_name = results[0]["league"]["name"] + print(f" {_GRAY}│ matched {league_found_name!r} id={league_id} (API fallback, confidence=0.7){_R}") + self._league_cache[key] = (league_id, 0.7) + return league_id, 0.7 + + print(f" {_GRAY}│ no league found, searching fixtures by date only (confidence=0.3){_R}") + self._league_cache[key] = (None, 0.3) + return None, 0.3 + + +def _similarity(a: str, b: str) -> float: + return SequenceMatcher(None, a.lower(), b.lower()).ratio() + + +def _date_proximity(fixture: dict, center) -> float: + """1.0 on exact date, linear decay to 0.0 at _DATE_WINDOW days away.""" + fixture_date = datetime.fromisoformat(fixture["fixture"]["date"].replace("Z", "+00:00")).date() + days_off = abs((fixture_date - center).days) + return max(0.0, 1.0 - days_off / _DATE_WINDOW) + + +def _best_fixture_match(fixtures: list[dict], team1: str, team2: str, center) -> tuple[dict | None, float, float]: + """Returns (best_fixture, name_score, date_proximity) or (None, 0, 0) if no good match.""" + best, best_combined, best_name, best_date = None, 0.0, 0.0, 0.0 + for f in fixtures: + home = f["teams"]["home"]["name"] + away = f["teams"]["away"]["name"] + name_score = max( + _similarity(team1, home) + _similarity(team2, away), + _similarity(team1, away) + _similarity(team2, home), + ) / 2 + date_prox = _date_proximity(f, center) + # Name similarity is the primary signal; date proximity is a tiebreaker + combined = name_score * 0.8 + date_prox * 0.2 + if combined > best_combined: + best_combined = combined + best_name = name_score + best_date = date_prox + best = f + # Require minimum name similarity — date alone cannot rescue a bad name match + return (best, best_name, best_date) if best_name > 0.5 else (None, best_name, best_date) + + +def _is_finished(fixture: dict) -> float: + status = fixture.get("fixture", {}).get("status", {}).get("short", "") + return 1.0 if status in ("FT", "AET", "PEN", "AWD", "WO") else 0.0 + + +def _evaluate_bet(bet: Bet, fixture: dict) -> BetOutcome: + goals = fixture.get("goals", {}) + home = goals.get("home") + away = goals.get("away") + + if home is None or away is None: + return BetOutcome.UNKNOWN + + if isinstance(bet, WinDrawLose): + bet_draw = bet.betType in ("X", "0") + if bet_draw: + return BetOutcome.WIN if home == away else BetOutcome.LOSE + actual = "1" if home > away else ("0" if home == away else "2") + return BetOutcome.WIN if actual == bet.betType else BetOutcome.LOSE + + if isinstance(bet, WinDrawLoseDouble): + actual = "1" if home > away else ("0" if home == away else "2") + return BetOutcome.WIN if actual in bet.betType else BetOutcome.LOSE + + if isinstance(bet, WinLose): + if home == away: + return BetOutcome.VOID + actual = "1" if home > away else "2" + return BetOutcome.WIN if actual == bet.betType else BetOutcome.LOSE + + if isinstance(bet, BothTeamScored): + return BetOutcome.WIN if home > 0 and away > 0 else BetOutcome.LOSE + + if isinstance(bet, GoalAmount): + total = home + away + if total == bet.line: + return BetOutcome.VOID + won = total > bet.line if bet.over else total < bet.line + return BetOutcome.WIN if won else BetOutcome.LOSE + + if isinstance(bet, GoalHandicap): + h_home = home + (bet.handicap_amount if bet.team_bet == "1" else 0.0) + h_away = away + (bet.handicap_amount if bet.team_bet == "2" else 0.0) + if h_home == h_away: + return BetOutcome.VOID + actual_winner = "1" if h_home > h_away else "2" + return BetOutcome.WIN if actual_winner == bet.team_bet else BetOutcome.LOSE + + return BetOutcome.UNKNOWN