From d5211572a514850c1a9de2073e33d9643ed5ee3e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janek=20Hlavat=C3=BD?= Date: Wed, 25 Mar 2026 19:04:17 +0100 Subject: [PATCH] Logging --- config/application.yml | 2 + src/beaky/_ansi.py | 9 ++++ src/beaky/config.py | 1 + src/beaky/image_classifier/classifier.py | 19 ++++---- src/beaky/link_classifier/classifier.py | 5 +- src/beaky/resolvers/resolver.py | 58 +++++++++++++----------- src/beaky/scanner/scanner.py | 7 ++- src/beaky/screenshotter/screenshotter.py | 5 +- 8 files changed, 67 insertions(+), 39 deletions(-) diff --git a/config/application.yml b/config/application.yml index 93d6049..b79a200 100644 --- a/config/application.yml +++ b/config/application.yml @@ -62,3 +62,5 @@ resolver: img_classifier: target_path: data/screenshots/ + +log_level: INFO # set to DEBUG to see raw classifier and resolver output diff --git a/src/beaky/_ansi.py b/src/beaky/_ansi.py index 1b032c5..5c1429d 100644 --- a/src/beaky/_ansi.py +++ b/src/beaky/_ansi.py @@ -1,5 +1,14 @@ from __future__ import annotations +import logging + +_logger = logging.getLogger("beaky") + + +def log(text: str) -> None: + """Emit a (possibly ANSI-colored) message at DEBUG level.""" + _logger.debug("%s", text) + def bold(text: str) -> str: return f"\033[1m{text}\033[0m" diff --git a/src/beaky/config.py b/src/beaky/config.py index adfcf32..560bae0 100644 --- a/src/beaky/config.py +++ b/src/beaky/config.py @@ -11,3 +11,4 @@ class Config: screenshotter: ScreenshotterConfig resolver: ResolverConfig img_classifier: ImgClassifierConfig + log_level: str = "INFO" diff --git a/src/beaky/image_classifier/classifier.py b/src/beaky/image_classifier/classifier.py index cfd2041..099760f 100644 --- a/src/beaky/image_classifier/classifier.py +++ b/src/beaky/image_classifier/classifier.py @@ -1,9 +1,12 @@ import datetime +import logging import re from pathlib import Path import pytesseract +logger = logging.getLogger(__name__) + from beaky.datamodels.ticket import ( Advance, Bet, @@ -24,7 +27,7 @@ def img_to_text(path: str) -> str: try: return pytesseract.image_to_string(path, lang="ces").strip() except Exception as e: - print(f"Error processing {path}: {e}") + logger.error("Error processing %s: %s", path, e) return "" @@ -151,7 +154,7 @@ def classify(text: str) -> list[Bet]: sport_end_pattern = re.compile(r"^(Fotbal|Hokej|Tenis|Basketbal|Florbal|Volejbal|E-sport|Šipky)\s*/", re.IGNORECASE) for ln in lines: - print(f"Processing line: '{ln}'") + logger.debug("Processing line: '%s'", ln) is_start = date_start_pattern.search(ln) is_end = sport_end_pattern.match(ln) @@ -159,7 +162,7 @@ def classify(text: str) -> list[Bet]: # If we somehow hit a start while already in a block (missing end marker fallback), # save the current block before starting a new one. if current_block: - print(f"ERROR: SUS block was not properly ended and new block start detected: '{ln}'") + logger.warning("Block not properly ended, new block start detected: '%s'", ln) blocks.append(current_block) current_block = [ln] in_block = True @@ -178,7 +181,7 @@ def classify(text: str) -> list[Bet]: else: # We are outside a block. This is noise (e.g. "© osmifinále / 2.zápas 0:1" or "170"). # We simply ignore it and do nothing. - print(f"INFO: Ignoring line outside of any block: '{ln}'") + logger.debug("Ignoring line outside of any block: '%s'", ln) pass # Catch any dangling block at the very end of the document @@ -201,18 +204,16 @@ def img_classify(paths: list[str], ticket_id: int) -> Ticket: for file in paths: file_path = Path(file) if not (file_path.is_file() and file_path.suffix.lower() in valid_extensions): - print(f"Skipping invalid file: {file}") + logger.warning("Skipping invalid file: %s", file) continue extracted_text = img_to_text(str(file_path)) - print(f"--- Extracted Text from {file_path.name} ---") - # print(extracted_text) + logger.debug("Extracted text from %s", file_path.name) try: - print("--- START OF Classification Result ---") result = classify(extracted_text) except Exception as exc: - print(f"classify() error for {file_path}: {exc}") + logger.error("classify() error for %s: %s", file_path, exc) result = [ UnknownBet( ticketType=BetType.UNKNOWN, diff --git a/src/beaky/link_classifier/classifier.py b/src/beaky/link_classifier/classifier.py index 4eea04e..8df65f2 100644 --- a/src/beaky/link_classifier/classifier.py +++ b/src/beaky/link_classifier/classifier.py @@ -1,9 +1,12 @@ +import logging import re from datetime import datetime from typing import Any from playwright.sync_api import Page, sync_playwright +logger = logging.getLogger(__name__) + from beaky.datamodels.ticket import ( Bet, BetType, @@ -151,7 +154,7 @@ class LinkClassifier: page.wait_for_timeout(500) result = Ticket(id=link.id, bets=_extract_legs(page, link.date)) except Exception as e: - print(f"Error classifying link {link.id}: {e}") + logger.error("Error classifying link %d: %s", link.id, e) finally: page.close() browser.close() diff --git a/src/beaky/resolvers/resolver.py b/src/beaky/resolvers/resolver.py index 7a3fac5..638f145 100644 --- a/src/beaky/resolvers/resolver.py +++ b/src/beaky/resolvers/resolver.py @@ -1,10 +1,14 @@ +import logging import time -from dataclasses import dataclass, field +from dataclasses import field from datetime import date, datetime, timedelta from difflib import SequenceMatcher from enum import Enum from typing import Any +from pydantic import ConfigDict, SerializeAsAny +from pydantic.dataclasses import dataclass + import diskcache import requests @@ -18,6 +22,8 @@ from beaky.datamodels.ticket import ( ) from beaky.resolvers.config import ResolverConfig +logger = logging.getLogger(__name__) + _API_BASE = "https://v3.football.api-sports.io" _DATE_WINDOW = 3 # days either side of the bet date to search @@ -30,9 +36,9 @@ class TicketVerdict(str, Enum): UNKNOWN = "unknown — could not resolve enough bets to decide" -@dataclass +@dataclass(config=ConfigDict(arbitrary_types_allowed=True)) class ResolvedBet: - bet: Bet + bet: SerializeAsAny[Bet] outcome: BetOutcome fixture_id: int | None = None # Confidence breakdown (each component 0.0–1.0): @@ -73,11 +79,11 @@ def _get(url: str, headers: dict[str, str], params: dict[str, str | int], retrie resp = requests.get(url, headers=headers, params=params) if resp.status_code == 429: wait = backoff * (attempt + 1) - print(f" !! rate limited — waiting {wait:.0f}s before retry ({attempt + 1}/{retries})") + logger.warning("rate limited — waiting %.0fs before retry (%d/%d)", wait, attempt + 1, retries) time.sleep(wait) continue return resp - print(f" !! still rate limited after {retries} retries, giving up") + logger.warning("still rate limited after %d retries, giving up", retries) return resp @@ -99,17 +105,17 @@ class TicketResolver: def _resolve_bet(self, bet: Bet) -> ResolvedBet: bet_type = type(bet).__name__ - print(f"\n {_ansi.bold(_ansi.cyan(f'┌─ [{bet_type}]'))} {_ansi.bold(f'{bet.team1Name} vs {bet.team2Name}')}" - f" {_ansi.dim(f'{bet.date.strftime('%Y-%m-%d')} | {bet.league}')}") + _ansi.log(f"\n {_ansi.bold(_ansi.cyan(f'┌─ [{bet_type}]'))} {_ansi.bold(f'{bet.team1Name} vs {bet.team2Name}')}" + f" {_ansi.dim(f'{bet.date.strftime("%Y-%m-%d")} | {bet.league}')}") if isinstance(bet, UnknownBet): - print(_ansi.gray(f" │ skipping — not implemented: {bet.raw_text!r}")) - print(_ansi.gray(" └─ UNKNOWN")) + _ansi.log(_ansi.gray(f" │ skipping — not implemented: {bet.raw_text!r}")) + _ansi.log(_ansi.gray(" └─ UNKNOWN")) return ResolvedBet(bet=bet, outcome=BetOutcome.UNKNOWN) fixture, name_match, date_prox, league_conf = self._find_fixture(bet) if fixture is None: - print(_ansi.gray(" └─ UNKNOWN — no fixture found")) + _ansi.log(_ansi.gray(" └─ UNKNOWN — no fixture found")) return ResolvedBet(bet=bet, outcome=BetOutcome.UNKNOWN, league_found=league_conf) home_name = fixture["teams"]["home"]["name"] @@ -126,15 +132,15 @@ class TicketResolver: outcome = BetOutcome.UNKNOWN goals = fixture["goals"] - print(_ansi.dim( + _ansi.log(_ansi.dim( f" │ matched #{fixture['fixture']['id']}: {home_name} vs {away_name}" f" | {goals['home']}:{goals['away']} | {fixture['fixture']['status']['short']}" f" | confidence {confidence} (name={name_match:.2f} date={date_prox:.2f} league={league_conf} finished={finished})" )) - print(_ansi.bold(_ansi.green(f" └─ {outcome.value.upper()}") if outcome == BetOutcome.WIN - else _ansi.red(f" └─ {outcome.value.upper()}") if outcome == BetOutcome.LOSE - else _ansi.yellow(f" └─ {outcome.value.upper()}") if outcome == BetOutcome.VOID - else _ansi.gray(f" └─ {outcome.value.upper()}"))) + _ansi.log(_ansi.bold(_ansi.green(f" └─ {outcome.value.upper()}") if outcome == BetOutcome.WIN + else _ansi.red(f" └─ {outcome.value.upper()}") if outcome == BetOutcome.LOSE + else _ansi.yellow(f" └─ {outcome.value.upper()}") if outcome == BetOutcome.VOID + else _ansi.gray(f" └─ {outcome.value.upper()}"))) return ResolvedBet( bet=bet, @@ -151,9 +157,9 @@ class TicketResolver: def _get_statistics(self, fixture_id: int) -> list[dict[str, Any]]: cache_key = ("stats", fixture_id) if cache_key in self._disk_cache: - print(_ansi.gray(f" │ /fixtures/statistics served from disk cache (fixture={fixture_id})")) + _ansi.log(_ansi.gray(f" │ /fixtures/statistics served from disk cache (fixture={fixture_id})")) return self._disk_cache[cache_key] # type: ignore[no-any-return] - print(_ansi.gray(f" │ GET /fixtures/statistics fixture={fixture_id}")) + _ansi.log(_ansi.gray(f" │ GET /fixtures/statistics fixture={fixture_id}")) resp = _get(f"{_API_BASE}/fixtures/statistics", headers=self._headers, params={"fixture": fixture_id}) resp.raise_for_status() stats = resp.json().get("response", []) @@ -173,7 +179,7 @@ class TicketResolver: if cache_key not in self._fixture_cache: if cache_key in self._disk_cache and not cache_may_be_stale: self._fixture_cache[cache_key] = self._disk_cache[cache_key] - print(_ansi.gray(f" │ /fixtures served from disk cache ({len(self._fixture_cache[cache_key])} fixtures)")) + _ansi.log(_ansi.gray(f" │ /fixtures served from disk cache ({len(self._fixture_cache[cache_key])} fixtures)")) else: date_from = (center - timedelta(days=_DATE_WINDOW)).strftime("%Y-%m-%d") date_to = (center + timedelta(days=_DATE_WINDOW)).strftime("%Y-%m-%d") @@ -181,17 +187,17 @@ class TicketResolver: if league_id is not None: params["league"] = league_id params["season"] = center.year if center.month >= 7 else center.year - 1 - print(_ansi.gray(f" │ GET /fixtures {params}")) + _ansi.log(_ansi.gray(f" │ GET /fixtures {params}")) resp = _get(f"{_API_BASE}/fixtures", headers=self._headers, params=params) resp.raise_for_status() self._fixture_cache[cache_key] = resp.json().get("response", []) - print(_ansi.gray(f" │ {len(self._fixture_cache[cache_key])} fixtures returned")) + _ansi.log(_ansi.gray(f" │ {len(self._fixture_cache[cache_key])} fixtures returned")) cacheable = [f for f in self._fixture_cache[cache_key] if f.get("fixture", {}).get("status", {}).get("short") != "NS"] if cacheable: self._disk_cache[cache_key] = cacheable - print(_ansi.gray(f" │ {len(cacheable)} non-NS fixture(s) written to disk cache")) + _ansi.log(_ansi.gray(f" │ {len(cacheable)} non-NS fixture(s) written to disk cache")) else: - print(_ansi.gray(f" │ /fixtures (±{_DATE_WINDOW}d of {date_str}, league={league_id}) served from memory")) + _ansi.log(_ansi.gray(f" │ /fixtures (±{_DATE_WINDOW}d of {date_str}, league={league_id}) served from memory")) fixture, name_match, date_prox = _best_fixture_match( self._fixture_cache[cache_key], bet.team1Name, bet.team2Name, center @@ -210,22 +216,22 @@ class TicketResolver: default=(None, None), ) if best_id is not None: - print(_ansi.gray(f" │ league {league_name!r} -> id={best_id} (static map, pattern={best_pattern!r})")) + _ansi.log(_ansi.gray(f" │ league {league_name!r} -> id={best_id} (static map, pattern={best_pattern!r})")) self._league_cache[key] = (best_id, 1.0) return best_id, 1.0 # Fall back to API search — lower confidence since first result is taken unverified - print(_ansi.gray(f" │ GET /leagues search={league_name!r}")) + _ansi.log(_ansi.gray(f" │ GET /leagues search={league_name!r}")) resp = _get(f"{_API_BASE}/leagues", headers=self._headers, params={"search": league_name[:20]}) results = resp.json().get("response", []) if results: league_id = results[0]["league"]["id"] league_found_name = results[0]["league"]["name"] - print(_ansi.gray(f" │ matched {league_found_name!r} id={league_id} (API fallback, confidence=0.7)")) + _ansi.log(_ansi.gray(f" │ matched {league_found_name!r} id={league_id} (API fallback, confidence=0.7)")) self._league_cache[key] = (league_id, 0.7) return league_id, 0.7 - print(_ansi.gray(" │ no league found, searching fixtures by date only (confidence=0.3)")) + _ansi.log(_ansi.gray(" │ no league found, searching fixtures by date only (confidence=0.3)")) self._league_cache[key] = (None, 0.3) return None, 0.3 diff --git a/src/beaky/scanner/scanner.py b/src/beaky/scanner/scanner.py index 2d61f32..f82f8de 100644 --- a/src/beaky/scanner/scanner.py +++ b/src/beaky/scanner/scanner.py @@ -1,3 +1,4 @@ +import logging from datetime import datetime from typing import Any, Iterator, List, Optional @@ -6,6 +7,8 @@ from pydantic.dataclasses import dataclass from beaky.config import Config +logger = logging.getLogger(__name__) + @dataclass class Link: @@ -37,7 +40,7 @@ class Links: at least: 'id', 'link' (or 'url'), and optionally 'date' (case-insensitive). Returns the list of Link objects (also stored in self.links). """ - print("started ret_links()") + logger.debug("started ret_links()") wb = load_workbook(filename=self._path, read_only=True, data_only=True) ws = wb.active @@ -84,7 +87,7 @@ class Links: if id_idx is None or url_idx is None: # Required columns missing - print(f"Required 'id' or 'url' column missing in header. Found headers: {list(header_map.keys())}") + logger.warning("Required 'id' or 'url' column missing in header. Found headers: %s", list(header_map.keys())) return [] for row in rows: diff --git a/src/beaky/screenshotter/screenshotter.py b/src/beaky/screenshotter/screenshotter.py index 77e6748..c54ebfe 100644 --- a/src/beaky/screenshotter/screenshotter.py +++ b/src/beaky/screenshotter/screenshotter.py @@ -1,3 +1,4 @@ +import logging from pathlib import Path from typing import Any @@ -6,6 +7,8 @@ from playwright.sync_api import sync_playwright from beaky.config import Config from beaky.scanner.scanner import Link +logger = logging.getLogger(__name__) + class Screenshotter: def __init__(self, config: Config): @@ -18,7 +21,7 @@ class Screenshotter: context = browser.new_context() for link in links: - print("capturing link:", link) + logger.debug("capturing link: %s", link) page = context.new_page() target_path = Path(self.config.screenshotter.target_path) / f"{link.id}.png" self.capture_ticket(page, link.url, target_path)