From 569b8ee4f8e48ea311f10ee815e7ff20c9d05bde Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janek=20Hlavat=C3=BD?= Date: Wed, 25 Mar 2026 20:25:34 +0100 Subject: [PATCH] Fuzzy match leagues --- beaky-backend/config/application.yml | 2 +- beaky-backend/src/beaky/cli.py | 4 ++ beaky-backend/src/beaky/resolvers/resolver.py | 66 +++++++++++-------- 3 files changed, 42 insertions(+), 30 deletions(-) diff --git a/beaky-backend/config/application.yml b/beaky-backend/config/application.yml index efa7724..3085b93 100644 --- a/beaky-backend/config/application.yml +++ b/beaky-backend/config/application.yml @@ -63,7 +63,7 @@ resolver: img_classifier: target_path: data/screenshots/ -log_level: INFO # set to DEBUG to see raw classifier and resolver output +log_level: DEBUG # set to DEBUG to see raw classifier and resolver output api: host: 0.0.0.0 diff --git a/beaky-backend/src/beaky/cli.py b/beaky-backend/src/beaky/cli.py index 6ae9a7b..8254834 100644 --- a/beaky-backend/src/beaky/cli.py +++ b/beaky-backend/src/beaky/cli.py @@ -1,4 +1,5 @@ import argparse +import logging import re as _re import shutil from datetime import datetime @@ -205,6 +206,9 @@ def main() -> None: print(e) return + log_level = getattr(logging, config.log_level.upper(), logging.INFO) + logging.basicConfig(level=log_level, format="%(message)s") + # always load testing data, we will modify that later data = Links(config) data.ret_links() diff --git a/beaky-backend/src/beaky/resolvers/resolver.py b/beaky-backend/src/beaky/resolvers/resolver.py index 0538cea..0f1511b 100644 --- a/beaky-backend/src/beaky/resolvers/resolver.py +++ b/beaky-backend/src/beaky/resolvers/resolver.py @@ -27,6 +27,7 @@ logger = logging.getLogger(__name__) _API_BASE = "https://v3.football.api-sports.io" _DATE_WINDOW = 3 # days either side of the bet date to search +_NAME_THRESHOLD = 0.5 class TicketVerdict(str, Enum): @@ -211,28 +212,28 @@ class TicketResolver: if key in self._league_cache: return self._league_cache[key] - # Use longest-match so "1. itálie - ženy" beats "1. itálie" - best_pattern, best_id = max( - ((p, lid) for p, lid in self._league_map.items() if p in key), - key=lambda t: len(t[0]), - default=(None, None), - ) - if best_id is not None: - _ansi.log(_ansi.gray(f" │ league {league_name!r} -> id={best_id} (static map, pattern={best_pattern!r})")) + # Static map — fuzzy match + patterns = list(self._league_map.keys()) + idx, score = _best_match(key, patterns) + if idx is not None: + best_id = self._league_map[patterns[idx]] + _ansi.log(_ansi.gray(f" │ league {league_name!r} -> id={best_id} (static map, pattern={patterns[idx]!r}, score={score:.2f})")) self._league_cache[key] = (best_id, 1.0) return best_id, 1.0 - # Fall back to API search — lower confidence since first result is taken unverified + # API fallback — fuzzy match all results _ansi.log(_ansi.gray(f" │ GET /leagues search={league_name!r}")) resp = _get(f"{_API_BASE}/leagues", headers=self._headers, params={"search": league_name[:20]}) results = resp.json().get("response", []) if results: - league_id = results[0]["league"]["id"] - league_found_name = results[0]["league"]["name"] - _ansi.log( - _ansi.gray(f" │ matched {league_found_name!r} id={league_id} (API fallback, confidence=0.7)")) - self._league_cache[key] = (league_id, 0.7) - return league_id, 0.7 + names = [r["league"]["name"].lower() for r in results] + idx, score = _best_match(key, names) + if idx is not None: + league_id = results[idx]["league"]["id"] + league_found_name = results[idx]["league"]["name"] + _ansi.log(_ansi.gray(f" │ matched {league_found_name!r} id={league_id} (API fallback, score={score:.2f}, confidence=0.7)")) + self._league_cache[key] = (league_id, 0.7) + return league_id, 0.7 _ansi.log(_ansi.gray(" │ no league found, searching fixtures by date only (confidence=0.3)")) self._league_cache[key] = (None, 0.3) @@ -282,6 +283,16 @@ def _similarity(a: str, b: str) -> float: return SequenceMatcher(None, a.lower(), b.lower()).ratio() +def _best_match(query: str, candidates: list[str], threshold: float = _NAME_THRESHOLD) -> tuple[int | None, float]: + """Return (index, score) of the best fuzzy match, or (None, score) if below threshold.""" + if not candidates: + return None, 0.0 + scores = [_similarity(query, c) for c in candidates] + best_idx = max(range(len(scores)), key=lambda i: scores[i]) + score = scores[best_idx] + return (best_idx, score) if score >= threshold else (None, score) + + def _date_proximity(fixture: dict[str, Any], center: date) -> float: """1.0 on exact date, linear decay to 0.0 at _DATE_WINDOW days away.""" fixture_date = datetime.fromisoformat(fixture["fixture"]["date"].replace("Z", "+00:00")).date() @@ -291,21 +302,18 @@ def _date_proximity(fixture: dict[str, Any], center: date) -> float: def _best_fixture_match(fixtures: list[dict[str, Any]], team1: str, team2: str, center: date) -> tuple[dict[str, Any] | None, float, float]: """Returns (best_fixture, name_score, date_proximity) or (None, 0, 0) if no good match.""" - best, best_combined, best_name, best_date = None, 0.0, 0.0, 0.0 - for f in fixtures: - home = f["teams"]["home"]["name"] - away = f["teams"]["away"]["name"] - name_score = (_similarity(team1, home) + _similarity(team2, away)) / 2 - date_prox = _date_proximity(f, center) - # Name similarity is the primary signal; date proximity is a tiebreaker - combined = name_score * 0.8 + date_prox * 0.2 - if combined > best_combined: - best_combined = combined - best_name = name_score - best_date = date_prox - best = f + if not fixtures: + return None, 0.0, 0.0 + # Name similarity is the primary signal; date proximity is a tiebreaker + home_names = [f["teams"]["home"]["name"] for f in fixtures] + away_names = [f["teams"]["away"]["name"] for f in fixtures] + name_scores = [(_similarity(team1, h) + _similarity(team2, a)) / 2 for h, a in zip(home_names, away_names)] + date_proxies = [_date_proximity(f, center) for f in fixtures] + combined = [n * 0.8 + d * 0.2 for n, d in zip(name_scores, date_proxies)] + best_idx = max(range(len(combined)), key=lambda i: combined[i]) + name, date = name_scores[best_idx], date_proxies[best_idx] # Require minimum name similarity — date alone cannot rescue a bad name match - return (best, best_name, best_date) if best_name > 0.5 else (None, best_name, best_date) + return (fixtures[best_idx], name, date) if name >= _NAME_THRESHOLD else (None, name, date) def _is_finished(fixture: dict[str, Any]) -> float: