Fuzzy match leagues

This commit is contained in:
2026-03-25 20:25:34 +01:00
parent 5de0d57612
commit 569b8ee4f8
3 changed files with 42 additions and 30 deletions

View File

@@ -63,7 +63,7 @@ resolver:
img_classifier:
target_path: data/screenshots/
log_level: INFO # set to DEBUG to see raw classifier and resolver output
log_level: DEBUG # set to DEBUG to see raw classifier and resolver output
api:
host: 0.0.0.0

View File

@@ -1,4 +1,5 @@
import argparse
import logging
import re as _re
import shutil
from datetime import datetime
@@ -205,6 +206,9 @@ def main() -> None:
print(e)
return
log_level = getattr(logging, config.log_level.upper(), logging.INFO)
logging.basicConfig(level=log_level, format="%(message)s")
# always load testing data, we will modify that later
data = Links(config)
data.ret_links()

View File

@@ -27,6 +27,7 @@ logger = logging.getLogger(__name__)
_API_BASE = "https://v3.football.api-sports.io"
_DATE_WINDOW = 3 # days either side of the bet date to search
_NAME_THRESHOLD = 0.5
class TicketVerdict(str, Enum):
@@ -211,26 +212,26 @@ class TicketResolver:
if key in self._league_cache:
return self._league_cache[key]
# Use longest-match so "1. itálie - ženy" beats "1. itálie"
best_pattern, best_id = max(
((p, lid) for p, lid in self._league_map.items() if p in key),
key=lambda t: len(t[0]),
default=(None, None),
)
if best_id is not None:
_ansi.log(_ansi.gray(f" │ league {league_name!r} -> id={best_id} (static map, pattern={best_pattern!r})"))
# Static map — fuzzy match
patterns = list(self._league_map.keys())
idx, score = _best_match(key, patterns)
if idx is not None:
best_id = self._league_map[patterns[idx]]
_ansi.log(_ansi.gray(f" │ league {league_name!r} -> id={best_id} (static map, pattern={patterns[idx]!r}, score={score:.2f})"))
self._league_cache[key] = (best_id, 1.0)
return best_id, 1.0
# Fall back to API search — lower confidence since first result is taken unverified
# API fallback — fuzzy match all results
_ansi.log(_ansi.gray(f" │ GET /leagues search={league_name!r}"))
resp = _get(f"{_API_BASE}/leagues", headers=self._headers, params={"search": league_name[:20]})
results = resp.json().get("response", [])
if results:
league_id = results[0]["league"]["id"]
league_found_name = results[0]["league"]["name"]
_ansi.log(
_ansi.gray(f" │ matched {league_found_name!r} id={league_id} (API fallback, confidence=0.7)"))
names = [r["league"]["name"].lower() for r in results]
idx, score = _best_match(key, names)
if idx is not None:
league_id = results[idx]["league"]["id"]
league_found_name = results[idx]["league"]["name"]
_ansi.log(_ansi.gray(f" │ matched {league_found_name!r} id={league_id} (API fallback, score={score:.2f}, confidence=0.7)"))
self._league_cache[key] = (league_id, 0.7)
return league_id, 0.7
@@ -282,6 +283,16 @@ def _similarity(a: str, b: str) -> float:
return SequenceMatcher(None, a.lower(), b.lower()).ratio()
def _best_match(query: str, candidates: list[str], threshold: float = _NAME_THRESHOLD) -> tuple[int | None, float]:
"""Return (index, score) of the best fuzzy match, or (None, score) if below threshold."""
if not candidates:
return None, 0.0
scores = [_similarity(query, c) for c in candidates]
best_idx = max(range(len(scores)), key=lambda i: scores[i])
score = scores[best_idx]
return (best_idx, score) if score >= threshold else (None, score)
def _date_proximity(fixture: dict[str, Any], center: date) -> float:
"""1.0 on exact date, linear decay to 0.0 at _DATE_WINDOW days away."""
fixture_date = datetime.fromisoformat(fixture["fixture"]["date"].replace("Z", "+00:00")).date()
@@ -291,21 +302,18 @@ def _date_proximity(fixture: dict[str, Any], center: date) -> float:
def _best_fixture_match(fixtures: list[dict[str, Any]], team1: str, team2: str, center: date) -> tuple[dict[str, Any] | None, float, float]:
"""Returns (best_fixture, name_score, date_proximity) or (None, 0, 0) if no good match."""
best, best_combined, best_name, best_date = None, 0.0, 0.0, 0.0
for f in fixtures:
home = f["teams"]["home"]["name"]
away = f["teams"]["away"]["name"]
name_score = (_similarity(team1, home) + _similarity(team2, away)) / 2
date_prox = _date_proximity(f, center)
if not fixtures:
return None, 0.0, 0.0
# Name similarity is the primary signal; date proximity is a tiebreaker
combined = name_score * 0.8 + date_prox * 0.2
if combined > best_combined:
best_combined = combined
best_name = name_score
best_date = date_prox
best = f
home_names = [f["teams"]["home"]["name"] for f in fixtures]
away_names = [f["teams"]["away"]["name"] for f in fixtures]
name_scores = [(_similarity(team1, h) + _similarity(team2, a)) / 2 for h, a in zip(home_names, away_names)]
date_proxies = [_date_proximity(f, center) for f in fixtures]
combined = [n * 0.8 + d * 0.2 for n, d in zip(name_scores, date_proxies)]
best_idx = max(range(len(combined)), key=lambda i: combined[i])
name, date = name_scores[best_idx], date_proxies[best_idx]
# Require minimum name similarity — date alone cannot rescue a bad name match
return (best, best_name, best_date) if best_name > 0.5 else (None, best_name, best_date)
return (fixtures[best_idx], name, date) if name >= _NAME_THRESHOLD else (None, name, date)
def _is_finished(fixture: dict[str, Any]) -> float: