Fuzzy match leagues

This commit is contained in:
2026-03-25 20:25:34 +01:00
parent 5de0d57612
commit 569b8ee4f8
3 changed files with 42 additions and 30 deletions

View File

@@ -63,7 +63,7 @@ resolver:
img_classifier: img_classifier:
target_path: data/screenshots/ target_path: data/screenshots/
log_level: INFO # set to DEBUG to see raw classifier and resolver output log_level: DEBUG # set to DEBUG to see raw classifier and resolver output
api: api:
host: 0.0.0.0 host: 0.0.0.0

View File

@@ -1,4 +1,5 @@
import argparse import argparse
import logging
import re as _re import re as _re
import shutil import shutil
from datetime import datetime from datetime import datetime
@@ -205,6 +206,9 @@ def main() -> None:
print(e) print(e)
return return
log_level = getattr(logging, config.log_level.upper(), logging.INFO)
logging.basicConfig(level=log_level, format="%(message)s")
# always load testing data, we will modify that later # always load testing data, we will modify that later
data = Links(config) data = Links(config)
data.ret_links() data.ret_links()

View File

@@ -27,6 +27,7 @@ logger = logging.getLogger(__name__)
_API_BASE = "https://v3.football.api-sports.io" _API_BASE = "https://v3.football.api-sports.io"
_DATE_WINDOW = 3 # days either side of the bet date to search _DATE_WINDOW = 3 # days either side of the bet date to search
_NAME_THRESHOLD = 0.5
class TicketVerdict(str, Enum): class TicketVerdict(str, Enum):
@@ -211,28 +212,28 @@ class TicketResolver:
if key in self._league_cache: if key in self._league_cache:
return self._league_cache[key] return self._league_cache[key]
# Use longest-match so "1. itálie - ženy" beats "1. itálie" # Static map — fuzzy match
best_pattern, best_id = max( patterns = list(self._league_map.keys())
((p, lid) for p, lid in self._league_map.items() if p in key), idx, score = _best_match(key, patterns)
key=lambda t: len(t[0]), if idx is not None:
default=(None, None), best_id = self._league_map[patterns[idx]]
) _ansi.log(_ansi.gray(f" │ league {league_name!r} -> id={best_id} (static map, pattern={patterns[idx]!r}, score={score:.2f})"))
if best_id is not None:
_ansi.log(_ansi.gray(f" │ league {league_name!r} -> id={best_id} (static map, pattern={best_pattern!r})"))
self._league_cache[key] = (best_id, 1.0) self._league_cache[key] = (best_id, 1.0)
return best_id, 1.0 return best_id, 1.0
# Fall back to API search — lower confidence since first result is taken unverified # API fallback — fuzzy match all results
_ansi.log(_ansi.gray(f" │ GET /leagues search={league_name!r}")) _ansi.log(_ansi.gray(f" │ GET /leagues search={league_name!r}"))
resp = _get(f"{_API_BASE}/leagues", headers=self._headers, params={"search": league_name[:20]}) resp = _get(f"{_API_BASE}/leagues", headers=self._headers, params={"search": league_name[:20]})
results = resp.json().get("response", []) results = resp.json().get("response", [])
if results: if results:
league_id = results[0]["league"]["id"] names = [r["league"]["name"].lower() for r in results]
league_found_name = results[0]["league"]["name"] idx, score = _best_match(key, names)
_ansi.log( if idx is not None:
_ansi.gray(f" │ matched {league_found_name!r} id={league_id} (API fallback, confidence=0.7)")) league_id = results[idx]["league"]["id"]
self._league_cache[key] = (league_id, 0.7) league_found_name = results[idx]["league"]["name"]
return league_id, 0.7 _ansi.log(_ansi.gray(f" │ matched {league_found_name!r} id={league_id} (API fallback, score={score:.2f}, confidence=0.7)"))
self._league_cache[key] = (league_id, 0.7)
return league_id, 0.7
_ansi.log(_ansi.gray(" │ no league found, searching fixtures by date only (confidence=0.3)")) _ansi.log(_ansi.gray(" │ no league found, searching fixtures by date only (confidence=0.3)"))
self._league_cache[key] = (None, 0.3) self._league_cache[key] = (None, 0.3)
@@ -282,6 +283,16 @@ def _similarity(a: str, b: str) -> float:
return SequenceMatcher(None, a.lower(), b.lower()).ratio() return SequenceMatcher(None, a.lower(), b.lower()).ratio()
def _best_match(query: str, candidates: list[str], threshold: float = _NAME_THRESHOLD) -> tuple[int | None, float]:
"""Return (index, score) of the best fuzzy match, or (None, score) if below threshold."""
if not candidates:
return None, 0.0
scores = [_similarity(query, c) for c in candidates]
best_idx = max(range(len(scores)), key=lambda i: scores[i])
score = scores[best_idx]
return (best_idx, score) if score >= threshold else (None, score)
def _date_proximity(fixture: dict[str, Any], center: date) -> float: def _date_proximity(fixture: dict[str, Any], center: date) -> float:
"""1.0 on exact date, linear decay to 0.0 at _DATE_WINDOW days away.""" """1.0 on exact date, linear decay to 0.0 at _DATE_WINDOW days away."""
fixture_date = datetime.fromisoformat(fixture["fixture"]["date"].replace("Z", "+00:00")).date() fixture_date = datetime.fromisoformat(fixture["fixture"]["date"].replace("Z", "+00:00")).date()
@@ -291,21 +302,18 @@ def _date_proximity(fixture: dict[str, Any], center: date) -> float:
def _best_fixture_match(fixtures: list[dict[str, Any]], team1: str, team2: str, center: date) -> tuple[dict[str, Any] | None, float, float]: def _best_fixture_match(fixtures: list[dict[str, Any]], team1: str, team2: str, center: date) -> tuple[dict[str, Any] | None, float, float]:
"""Returns (best_fixture, name_score, date_proximity) or (None, 0, 0) if no good match.""" """Returns (best_fixture, name_score, date_proximity) or (None, 0, 0) if no good match."""
best, best_combined, best_name, best_date = None, 0.0, 0.0, 0.0 if not fixtures:
for f in fixtures: return None, 0.0, 0.0
home = f["teams"]["home"]["name"] # Name similarity is the primary signal; date proximity is a tiebreaker
away = f["teams"]["away"]["name"] home_names = [f["teams"]["home"]["name"] for f in fixtures]
name_score = (_similarity(team1, home) + _similarity(team2, away)) / 2 away_names = [f["teams"]["away"]["name"] for f in fixtures]
date_prox = _date_proximity(f, center) name_scores = [(_similarity(team1, h) + _similarity(team2, a)) / 2 for h, a in zip(home_names, away_names)]
# Name similarity is the primary signal; date proximity is a tiebreaker date_proxies = [_date_proximity(f, center) for f in fixtures]
combined = name_score * 0.8 + date_prox * 0.2 combined = [n * 0.8 + d * 0.2 for n, d in zip(name_scores, date_proxies)]
if combined > best_combined: best_idx = max(range(len(combined)), key=lambda i: combined[i])
best_combined = combined name, date = name_scores[best_idx], date_proxies[best_idx]
best_name = name_score
best_date = date_prox
best = f
# Require minimum name similarity — date alone cannot rescue a bad name match # Require minimum name similarity — date alone cannot rescue a bad name match
return (best, best_name, best_date) if best_name > 0.5 else (None, best_name, best_date) return (fixtures[best_idx], name, date) if name >= _NAME_THRESHOLD else (None, name, date)
def _is_finished(fixture: dict[str, Any]) -> float: def _is_finished(fixture: dict[str, Any]) -> float: