Fuzzy match leagues
This commit is contained in:
@@ -63,7 +63,7 @@ resolver:
|
||||
img_classifier:
|
||||
target_path: data/screenshots/
|
||||
|
||||
log_level: INFO # set to DEBUG to see raw classifier and resolver output
|
||||
log_level: DEBUG # set to DEBUG to see raw classifier and resolver output
|
||||
|
||||
api:
|
||||
host: 0.0.0.0
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import argparse
|
||||
import logging
|
||||
import re as _re
|
||||
import shutil
|
||||
from datetime import datetime
|
||||
@@ -205,6 +206,9 @@ def main() -> None:
|
||||
print(e)
|
||||
return
|
||||
|
||||
log_level = getattr(logging, config.log_level.upper(), logging.INFO)
|
||||
logging.basicConfig(level=log_level, format="%(message)s")
|
||||
|
||||
# always load testing data, we will modify that later
|
||||
data = Links(config)
|
||||
data.ret_links()
|
||||
|
||||
@@ -27,6 +27,7 @@ logger = logging.getLogger(__name__)
|
||||
_API_BASE = "https://v3.football.api-sports.io"
|
||||
|
||||
_DATE_WINDOW = 3 # days either side of the bet date to search
|
||||
_NAME_THRESHOLD = 0.5
|
||||
|
||||
|
||||
class TicketVerdict(str, Enum):
|
||||
@@ -211,28 +212,28 @@ class TicketResolver:
|
||||
if key in self._league_cache:
|
||||
return self._league_cache[key]
|
||||
|
||||
# Use longest-match so "1. itálie - ženy" beats "1. itálie"
|
||||
best_pattern, best_id = max(
|
||||
((p, lid) for p, lid in self._league_map.items() if p in key),
|
||||
key=lambda t: len(t[0]),
|
||||
default=(None, None),
|
||||
)
|
||||
if best_id is not None:
|
||||
_ansi.log(_ansi.gray(f" │ league {league_name!r} -> id={best_id} (static map, pattern={best_pattern!r})"))
|
||||
# Static map — fuzzy match
|
||||
patterns = list(self._league_map.keys())
|
||||
idx, score = _best_match(key, patterns)
|
||||
if idx is not None:
|
||||
best_id = self._league_map[patterns[idx]]
|
||||
_ansi.log(_ansi.gray(f" │ league {league_name!r} -> id={best_id} (static map, pattern={patterns[idx]!r}, score={score:.2f})"))
|
||||
self._league_cache[key] = (best_id, 1.0)
|
||||
return best_id, 1.0
|
||||
|
||||
# Fall back to API search — lower confidence since first result is taken unverified
|
||||
# API fallback — fuzzy match all results
|
||||
_ansi.log(_ansi.gray(f" │ GET /leagues search={league_name!r}"))
|
||||
resp = _get(f"{_API_BASE}/leagues", headers=self._headers, params={"search": league_name[:20]})
|
||||
results = resp.json().get("response", [])
|
||||
if results:
|
||||
league_id = results[0]["league"]["id"]
|
||||
league_found_name = results[0]["league"]["name"]
|
||||
_ansi.log(
|
||||
_ansi.gray(f" │ matched {league_found_name!r} id={league_id} (API fallback, confidence=0.7)"))
|
||||
self._league_cache[key] = (league_id, 0.7)
|
||||
return league_id, 0.7
|
||||
names = [r["league"]["name"].lower() for r in results]
|
||||
idx, score = _best_match(key, names)
|
||||
if idx is not None:
|
||||
league_id = results[idx]["league"]["id"]
|
||||
league_found_name = results[idx]["league"]["name"]
|
||||
_ansi.log(_ansi.gray(f" │ matched {league_found_name!r} id={league_id} (API fallback, score={score:.2f}, confidence=0.7)"))
|
||||
self._league_cache[key] = (league_id, 0.7)
|
||||
return league_id, 0.7
|
||||
|
||||
_ansi.log(_ansi.gray(" │ no league found, searching fixtures by date only (confidence=0.3)"))
|
||||
self._league_cache[key] = (None, 0.3)
|
||||
@@ -282,6 +283,16 @@ def _similarity(a: str, b: str) -> float:
|
||||
return SequenceMatcher(None, a.lower(), b.lower()).ratio()
|
||||
|
||||
|
||||
def _best_match(query: str, candidates: list[str], threshold: float = _NAME_THRESHOLD) -> tuple[int | None, float]:
|
||||
"""Return (index, score) of the best fuzzy match, or (None, score) if below threshold."""
|
||||
if not candidates:
|
||||
return None, 0.0
|
||||
scores = [_similarity(query, c) for c in candidates]
|
||||
best_idx = max(range(len(scores)), key=lambda i: scores[i])
|
||||
score = scores[best_idx]
|
||||
return (best_idx, score) if score >= threshold else (None, score)
|
||||
|
||||
|
||||
def _date_proximity(fixture: dict[str, Any], center: date) -> float:
|
||||
"""1.0 on exact date, linear decay to 0.0 at _DATE_WINDOW days away."""
|
||||
fixture_date = datetime.fromisoformat(fixture["fixture"]["date"].replace("Z", "+00:00")).date()
|
||||
@@ -291,21 +302,18 @@ def _date_proximity(fixture: dict[str, Any], center: date) -> float:
|
||||
|
||||
def _best_fixture_match(fixtures: list[dict[str, Any]], team1: str, team2: str, center: date) -> tuple[dict[str, Any] | None, float, float]:
|
||||
"""Returns (best_fixture, name_score, date_proximity) or (None, 0, 0) if no good match."""
|
||||
best, best_combined, best_name, best_date = None, 0.0, 0.0, 0.0
|
||||
for f in fixtures:
|
||||
home = f["teams"]["home"]["name"]
|
||||
away = f["teams"]["away"]["name"]
|
||||
name_score = (_similarity(team1, home) + _similarity(team2, away)) / 2
|
||||
date_prox = _date_proximity(f, center)
|
||||
# Name similarity is the primary signal; date proximity is a tiebreaker
|
||||
combined = name_score * 0.8 + date_prox * 0.2
|
||||
if combined > best_combined:
|
||||
best_combined = combined
|
||||
best_name = name_score
|
||||
best_date = date_prox
|
||||
best = f
|
||||
if not fixtures:
|
||||
return None, 0.0, 0.0
|
||||
# Name similarity is the primary signal; date proximity is a tiebreaker
|
||||
home_names = [f["teams"]["home"]["name"] for f in fixtures]
|
||||
away_names = [f["teams"]["away"]["name"] for f in fixtures]
|
||||
name_scores = [(_similarity(team1, h) + _similarity(team2, a)) / 2 for h, a in zip(home_names, away_names)]
|
||||
date_proxies = [_date_proximity(f, center) for f in fixtures]
|
||||
combined = [n * 0.8 + d * 0.2 for n, d in zip(name_scores, date_proxies)]
|
||||
best_idx = max(range(len(combined)), key=lambda i: combined[i])
|
||||
name, date = name_scores[best_idx], date_proxies[best_idx]
|
||||
# Require minimum name similarity — date alone cannot rescue a bad name match
|
||||
return (best, best_name, best_date) if best_name > 0.5 else (None, best_name, best_date)
|
||||
return (fixtures[best_idx], name, date) if name >= _NAME_THRESHOLD else (None, name, date)
|
||||
|
||||
|
||||
def _is_finished(fixture: dict[str, Any]) -> float:
|
||||
|
||||
Reference in New Issue
Block a user