Logging
This commit is contained in:
@@ -62,3 +62,5 @@ resolver:
|
||||
|
||||
img_classifier:
|
||||
target_path: data/screenshots/
|
||||
|
||||
log_level: INFO # set to DEBUG to see raw classifier and resolver output
|
||||
|
||||
@@ -1,5 +1,14 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
|
||||
_logger = logging.getLogger("beaky")
|
||||
|
||||
|
||||
def log(text: str) -> None:
|
||||
"""Emit a (possibly ANSI-colored) message at DEBUG level."""
|
||||
_logger.debug("%s", text)
|
||||
|
||||
|
||||
def bold(text: str) -> str:
|
||||
return f"\033[1m{text}\033[0m"
|
||||
|
||||
@@ -11,3 +11,4 @@ class Config:
|
||||
screenshotter: ScreenshotterConfig
|
||||
resolver: ResolverConfig
|
||||
img_classifier: ImgClassifierConfig
|
||||
log_level: str = "INFO"
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
import datetime
|
||||
import logging
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import pytesseract
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from beaky.datamodels.ticket import (
|
||||
Advance,
|
||||
Bet,
|
||||
@@ -24,7 +27,7 @@ def img_to_text(path: str) -> str:
|
||||
try:
|
||||
return pytesseract.image_to_string(path, lang="ces").strip()
|
||||
except Exception as e:
|
||||
print(f"Error processing {path}: {e}")
|
||||
logger.error("Error processing %s: %s", path, e)
|
||||
return ""
|
||||
|
||||
|
||||
@@ -151,7 +154,7 @@ def classify(text: str) -> list[Bet]:
|
||||
sport_end_pattern = re.compile(r"^(Fotbal|Hokej|Tenis|Basketbal|Florbal|Volejbal|E-sport|Šipky)\s*/", re.IGNORECASE)
|
||||
|
||||
for ln in lines:
|
||||
print(f"Processing line: '{ln}'")
|
||||
logger.debug("Processing line: '%s'", ln)
|
||||
is_start = date_start_pattern.search(ln)
|
||||
is_end = sport_end_pattern.match(ln)
|
||||
|
||||
@@ -159,7 +162,7 @@ def classify(text: str) -> list[Bet]:
|
||||
# If we somehow hit a start while already in a block (missing end marker fallback),
|
||||
# save the current block before starting a new one.
|
||||
if current_block:
|
||||
print(f"ERROR: SUS block was not properly ended and new block start detected: '{ln}'")
|
||||
logger.warning("Block not properly ended, new block start detected: '%s'", ln)
|
||||
blocks.append(current_block)
|
||||
current_block = [ln]
|
||||
in_block = True
|
||||
@@ -178,7 +181,7 @@ def classify(text: str) -> list[Bet]:
|
||||
else:
|
||||
# We are outside a block. This is noise (e.g. "© osmifinále / 2.zápas 0:1" or "170").
|
||||
# We simply ignore it and do nothing.
|
||||
print(f"INFO: Ignoring line outside of any block: '{ln}'")
|
||||
logger.debug("Ignoring line outside of any block: '%s'", ln)
|
||||
pass
|
||||
|
||||
# Catch any dangling block at the very end of the document
|
||||
@@ -201,18 +204,16 @@ def img_classify(paths: list[str], ticket_id: int) -> Ticket:
|
||||
for file in paths:
|
||||
file_path = Path(file)
|
||||
if not (file_path.is_file() and file_path.suffix.lower() in valid_extensions):
|
||||
print(f"Skipping invalid file: {file}")
|
||||
logger.warning("Skipping invalid file: %s", file)
|
||||
continue
|
||||
|
||||
extracted_text = img_to_text(str(file_path))
|
||||
print(f"--- Extracted Text from {file_path.name} ---")
|
||||
# print(extracted_text)
|
||||
logger.debug("Extracted text from %s", file_path.name)
|
||||
|
||||
try:
|
||||
print("--- START OF Classification Result ---")
|
||||
result = classify(extracted_text)
|
||||
except Exception as exc:
|
||||
print(f"classify() error for {file_path}: {exc}")
|
||||
logger.error("classify() error for %s: %s", file_path, exc)
|
||||
result = [
|
||||
UnknownBet(
|
||||
ticketType=BetType.UNKNOWN,
|
||||
|
||||
@@ -1,9 +1,12 @@
|
||||
import logging
|
||||
import re
|
||||
from datetime import datetime
|
||||
from typing import Any
|
||||
|
||||
from playwright.sync_api import Page, sync_playwright
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
from beaky.datamodels.ticket import (
|
||||
Bet,
|
||||
BetType,
|
||||
@@ -151,7 +154,7 @@ class LinkClassifier:
|
||||
page.wait_for_timeout(500)
|
||||
result = Ticket(id=link.id, bets=_extract_legs(page, link.date))
|
||||
except Exception as e:
|
||||
print(f"Error classifying link {link.id}: {e}")
|
||||
logger.error("Error classifying link %d: %s", link.id, e)
|
||||
finally:
|
||||
page.close()
|
||||
browser.close()
|
||||
|
||||
@@ -1,10 +1,14 @@
|
||||
import logging
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from dataclasses import field
|
||||
from datetime import date, datetime, timedelta
|
||||
from difflib import SequenceMatcher
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
from pydantic import ConfigDict, SerializeAsAny
|
||||
from pydantic.dataclasses import dataclass
|
||||
|
||||
import diskcache
|
||||
import requests
|
||||
|
||||
@@ -18,6 +22,8 @@ from beaky.datamodels.ticket import (
|
||||
)
|
||||
from beaky.resolvers.config import ResolverConfig
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_API_BASE = "https://v3.football.api-sports.io"
|
||||
|
||||
_DATE_WINDOW = 3 # days either side of the bet date to search
|
||||
@@ -30,9 +36,9 @@ class TicketVerdict(str, Enum):
|
||||
UNKNOWN = "unknown — could not resolve enough bets to decide"
|
||||
|
||||
|
||||
@dataclass
|
||||
@dataclass(config=ConfigDict(arbitrary_types_allowed=True))
|
||||
class ResolvedBet:
|
||||
bet: Bet
|
||||
bet: SerializeAsAny[Bet]
|
||||
outcome: BetOutcome
|
||||
fixture_id: int | None = None
|
||||
# Confidence breakdown (each component 0.0–1.0):
|
||||
@@ -73,11 +79,11 @@ def _get(url: str, headers: dict[str, str], params: dict[str, str | int], retrie
|
||||
resp = requests.get(url, headers=headers, params=params)
|
||||
if resp.status_code == 429:
|
||||
wait = backoff * (attempt + 1)
|
||||
print(f" !! rate limited — waiting {wait:.0f}s before retry ({attempt + 1}/{retries})")
|
||||
logger.warning("rate limited — waiting %.0fs before retry (%d/%d)", wait, attempt + 1, retries)
|
||||
time.sleep(wait)
|
||||
continue
|
||||
return resp
|
||||
print(f" !! still rate limited after {retries} retries, giving up")
|
||||
logger.warning("still rate limited after %d retries, giving up", retries)
|
||||
return resp
|
||||
|
||||
|
||||
@@ -99,17 +105,17 @@ class TicketResolver:
|
||||
|
||||
def _resolve_bet(self, bet: Bet) -> ResolvedBet:
|
||||
bet_type = type(bet).__name__
|
||||
print(f"\n {_ansi.bold(_ansi.cyan(f'┌─ [{bet_type}]'))} {_ansi.bold(f'{bet.team1Name} vs {bet.team2Name}')}"
|
||||
f" {_ansi.dim(f'{bet.date.strftime('%Y-%m-%d')} | {bet.league}')}")
|
||||
_ansi.log(f"\n {_ansi.bold(_ansi.cyan(f'┌─ [{bet_type}]'))} {_ansi.bold(f'{bet.team1Name} vs {bet.team2Name}')}"
|
||||
f" {_ansi.dim(f'{bet.date.strftime("%Y-%m-%d")} | {bet.league}')}")
|
||||
|
||||
if isinstance(bet, UnknownBet):
|
||||
print(_ansi.gray(f" │ skipping — not implemented: {bet.raw_text!r}"))
|
||||
print(_ansi.gray(" └─ UNKNOWN"))
|
||||
_ansi.log(_ansi.gray(f" │ skipping — not implemented: {bet.raw_text!r}"))
|
||||
_ansi.log(_ansi.gray(" └─ UNKNOWN"))
|
||||
return ResolvedBet(bet=bet, outcome=BetOutcome.UNKNOWN)
|
||||
|
||||
fixture, name_match, date_prox, league_conf = self._find_fixture(bet)
|
||||
if fixture is None:
|
||||
print(_ansi.gray(" └─ UNKNOWN — no fixture found"))
|
||||
_ansi.log(_ansi.gray(" └─ UNKNOWN — no fixture found"))
|
||||
return ResolvedBet(bet=bet, outcome=BetOutcome.UNKNOWN, league_found=league_conf)
|
||||
|
||||
home_name = fixture["teams"]["home"]["name"]
|
||||
@@ -126,12 +132,12 @@ class TicketResolver:
|
||||
outcome = BetOutcome.UNKNOWN
|
||||
|
||||
goals = fixture["goals"]
|
||||
print(_ansi.dim(
|
||||
_ansi.log(_ansi.dim(
|
||||
f" │ matched #{fixture['fixture']['id']}: {home_name} vs {away_name}"
|
||||
f" | {goals['home']}:{goals['away']} | {fixture['fixture']['status']['short']}"
|
||||
f" | confidence {confidence} (name={name_match:.2f} date={date_prox:.2f} league={league_conf} finished={finished})"
|
||||
))
|
||||
print(_ansi.bold(_ansi.green(f" └─ {outcome.value.upper()}") if outcome == BetOutcome.WIN
|
||||
_ansi.log(_ansi.bold(_ansi.green(f" └─ {outcome.value.upper()}") if outcome == BetOutcome.WIN
|
||||
else _ansi.red(f" └─ {outcome.value.upper()}") if outcome == BetOutcome.LOSE
|
||||
else _ansi.yellow(f" └─ {outcome.value.upper()}") if outcome == BetOutcome.VOID
|
||||
else _ansi.gray(f" └─ {outcome.value.upper()}")))
|
||||
@@ -151,9 +157,9 @@ class TicketResolver:
|
||||
def _get_statistics(self, fixture_id: int) -> list[dict[str, Any]]:
|
||||
cache_key = ("stats", fixture_id)
|
||||
if cache_key in self._disk_cache:
|
||||
print(_ansi.gray(f" │ /fixtures/statistics served from disk cache (fixture={fixture_id})"))
|
||||
_ansi.log(_ansi.gray(f" │ /fixtures/statistics served from disk cache (fixture={fixture_id})"))
|
||||
return self._disk_cache[cache_key] # type: ignore[no-any-return]
|
||||
print(_ansi.gray(f" │ GET /fixtures/statistics fixture={fixture_id}"))
|
||||
_ansi.log(_ansi.gray(f" │ GET /fixtures/statistics fixture={fixture_id}"))
|
||||
resp = _get(f"{_API_BASE}/fixtures/statistics", headers=self._headers, params={"fixture": fixture_id})
|
||||
resp.raise_for_status()
|
||||
stats = resp.json().get("response", [])
|
||||
@@ -173,7 +179,7 @@ class TicketResolver:
|
||||
if cache_key not in self._fixture_cache:
|
||||
if cache_key in self._disk_cache and not cache_may_be_stale:
|
||||
self._fixture_cache[cache_key] = self._disk_cache[cache_key]
|
||||
print(_ansi.gray(f" │ /fixtures served from disk cache ({len(self._fixture_cache[cache_key])} fixtures)"))
|
||||
_ansi.log(_ansi.gray(f" │ /fixtures served from disk cache ({len(self._fixture_cache[cache_key])} fixtures)"))
|
||||
else:
|
||||
date_from = (center - timedelta(days=_DATE_WINDOW)).strftime("%Y-%m-%d")
|
||||
date_to = (center + timedelta(days=_DATE_WINDOW)).strftime("%Y-%m-%d")
|
||||
@@ -181,17 +187,17 @@ class TicketResolver:
|
||||
if league_id is not None:
|
||||
params["league"] = league_id
|
||||
params["season"] = center.year if center.month >= 7 else center.year - 1
|
||||
print(_ansi.gray(f" │ GET /fixtures {params}"))
|
||||
_ansi.log(_ansi.gray(f" │ GET /fixtures {params}"))
|
||||
resp = _get(f"{_API_BASE}/fixtures", headers=self._headers, params=params)
|
||||
resp.raise_for_status()
|
||||
self._fixture_cache[cache_key] = resp.json().get("response", [])
|
||||
print(_ansi.gray(f" │ {len(self._fixture_cache[cache_key])} fixtures returned"))
|
||||
_ansi.log(_ansi.gray(f" │ {len(self._fixture_cache[cache_key])} fixtures returned"))
|
||||
cacheable = [f for f in self._fixture_cache[cache_key] if f.get("fixture", {}).get("status", {}).get("short") != "NS"]
|
||||
if cacheable:
|
||||
self._disk_cache[cache_key] = cacheable
|
||||
print(_ansi.gray(f" │ {len(cacheable)} non-NS fixture(s) written to disk cache"))
|
||||
_ansi.log(_ansi.gray(f" │ {len(cacheable)} non-NS fixture(s) written to disk cache"))
|
||||
else:
|
||||
print(_ansi.gray(f" │ /fixtures (±{_DATE_WINDOW}d of {date_str}, league={league_id}) served from memory"))
|
||||
_ansi.log(_ansi.gray(f" │ /fixtures (±{_DATE_WINDOW}d of {date_str}, league={league_id}) served from memory"))
|
||||
|
||||
fixture, name_match, date_prox = _best_fixture_match(
|
||||
self._fixture_cache[cache_key], bet.team1Name, bet.team2Name, center
|
||||
@@ -210,22 +216,22 @@ class TicketResolver:
|
||||
default=(None, None),
|
||||
)
|
||||
if best_id is not None:
|
||||
print(_ansi.gray(f" │ league {league_name!r} -> id={best_id} (static map, pattern={best_pattern!r})"))
|
||||
_ansi.log(_ansi.gray(f" │ league {league_name!r} -> id={best_id} (static map, pattern={best_pattern!r})"))
|
||||
self._league_cache[key] = (best_id, 1.0)
|
||||
return best_id, 1.0
|
||||
|
||||
# Fall back to API search — lower confidence since first result is taken unverified
|
||||
print(_ansi.gray(f" │ GET /leagues search={league_name!r}"))
|
||||
_ansi.log(_ansi.gray(f" │ GET /leagues search={league_name!r}"))
|
||||
resp = _get(f"{_API_BASE}/leagues", headers=self._headers, params={"search": league_name[:20]})
|
||||
results = resp.json().get("response", [])
|
||||
if results:
|
||||
league_id = results[0]["league"]["id"]
|
||||
league_found_name = results[0]["league"]["name"]
|
||||
print(_ansi.gray(f" │ matched {league_found_name!r} id={league_id} (API fallback, confidence=0.7)"))
|
||||
_ansi.log(_ansi.gray(f" │ matched {league_found_name!r} id={league_id} (API fallback, confidence=0.7)"))
|
||||
self._league_cache[key] = (league_id, 0.7)
|
||||
return league_id, 0.7
|
||||
|
||||
print(_ansi.gray(" │ no league found, searching fixtures by date only (confidence=0.3)"))
|
||||
_ansi.log(_ansi.gray(" │ no league found, searching fixtures by date only (confidence=0.3)"))
|
||||
self._league_cache[key] = (None, 0.3)
|
||||
return None, 0.3
|
||||
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Any, Iterator, List, Optional
|
||||
|
||||
@@ -6,6 +7,8 @@ from pydantic.dataclasses import dataclass
|
||||
|
||||
from beaky.config import Config
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Link:
|
||||
@@ -37,7 +40,7 @@ class Links:
|
||||
at least: 'id', 'link' (or 'url'), and optionally 'date' (case-insensitive).
|
||||
Returns the list of Link objects (also stored in self.links).
|
||||
"""
|
||||
print("started ret_links()")
|
||||
logger.debug("started ret_links()")
|
||||
wb = load_workbook(filename=self._path, read_only=True, data_only=True)
|
||||
ws = wb.active
|
||||
|
||||
@@ -84,7 +87,7 @@ class Links:
|
||||
|
||||
if id_idx is None or url_idx is None:
|
||||
# Required columns missing
|
||||
print(f"Required 'id' or 'url' column missing in header. Found headers: {list(header_map.keys())}")
|
||||
logger.warning("Required 'id' or 'url' column missing in header. Found headers: %s", list(header_map.keys()))
|
||||
return []
|
||||
|
||||
for row in rows:
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import logging
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
@@ -6,6 +7,8 @@ from playwright.sync_api import sync_playwright
|
||||
from beaky.config import Config
|
||||
from beaky.scanner.scanner import Link
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class Screenshotter:
|
||||
def __init__(self, config: Config):
|
||||
@@ -18,7 +21,7 @@ class Screenshotter:
|
||||
context = browser.new_context()
|
||||
|
||||
for link in links:
|
||||
print("capturing link:", link)
|
||||
logger.debug("capturing link: %s", link)
|
||||
page = context.new_page()
|
||||
target_path = Path(self.config.screenshotter.target_path) / f"{link.id}.png"
|
||||
self.capture_ticket(page, link.url, target_path)
|
||||
|
||||
Reference in New Issue
Block a user