Created image classifier v.1

Created image classifier config file
2026-03-22 01:17:21 +01:00 · 2026-03-21 22:24:13 +01:00
5 changed files with 192 additions and 9 deletions
--- a/config/application.yml
+++ b/config/application.yml
@@ -2,3 +2,5 @@ path: data/odkazy.xlsx

 screenshotter:
  target_path: data/screenshots/
+img_classifier:
+  target_path: "data/screenshots/"
--- a/src/beaky/config.py
+++ b/src/beaky/config.py
@@ -1,5 +1,6 @@
 from pydantic.dataclasses import dataclass

+from beaky.image_classifier.config import ImgClassifierConfig
 from beaky.screenshotter.config import ScreenshotterConfig


@@ -7,3 +8,4 @@ from beaky.screenshotter.config import ScreenshotterConfig
 class Config:
    path: str
    screenshotter: ScreenshotterConfig
+    img_classifer: ImgClassifierConfig
--- a/src/beaky/image_classifier/classifier.py
+++ b/src/beaky/image_classifier/classifier.py
@@ -1,7 +1,183 @@
-from datetime import datetime
+import datetime
+from pathlib import Path

-from beaky.datamodels.ticket import Ticket
+import pytesseract
+
+from beaky.datamodels.ticket import (
+    Advance,
+    Bet,
+    BetType,
+    BothTeamScored,
+    GoalAmount,
+    GoalHandicap,
+    Ticket,
+    UnknownTicket,
+    WinDrawLose,
+    WinDrawLoseDouble,
+    WinLose,
+)


-def f(path:str, date:datetime) -> list[Ticket]:
-    ...
+def img_to_text(path: str) -> str:
+    """Given a path to an image, return the text contained in that image.
+    Bypasses PIL and lets Tesseract read the file directly.
+    """
+    try:
+        text = pytesseract.image_to_string(path)
+        return text.strip()
+    except pytesseract.TesseractNotFoundError:
+        print("Error: Tesseract executable not found on your system.")
+        return ""
+    except Exception as e:
+        print(f"Error processing {path}: {e}")
+        return ""
+
+
+def classify(text: str) -> Bet:
+    """Given text extracted from an image and a date, return a Bet object that is
+    relevant to that text."""
+
+    if not text:
+        return UnknownTicket(
+            ticketType=BetType.UNKNOWN,
+            team1Name="N/A",
+            team2Name="N/A",
+            date=datetime.datetime.now(),
+            league="N/A",
+            raw_text="No text extracted",
+        )
+
+    # 1. Defaults & Normalization
+    text_lower = text.lower()
+    date_obj = datetime.datetime.now()
+    team1, team2 = "Unknown", "Unknown"
+    league = "Unknown"
+
+    # 2. Heuristic extraction of Teams (Looking for "Team A - Team B" patterns)
+    lines = [line.strip() for line in text.split("\n") if line.strip()]
+    for line in lines:
+        if " - " in line or " vs " in line or " v " in line:
+            # Avoid splitting on hyphens in dates or numbers
+            if not re.search(r"\d\s*-\s*\d", line):
+                parts = re.split(r" - | vs | v ", line)
+                if len(parts) >= 2:
+                    team1, team2 = parts[0].strip(), parts[1].strip()
+                    break
+
+    # 3. Heuristic extraction of Date (Looking for DD.MM. YYYY HH:MM)
+    date_match = re.search(r"(\d{1,2}\.\s*\d{1,2}\.?\s*(?:\d{2,4})?)\s*(\d{1,2}:\d{2})?", text)
+    if date_match:
+        try:
+            # Fallback to current year if missing, basic parse attempt
+            date_str = f"{date_match.group(1).replace(' ', '')} {date_match.group(2) or '00:00'}"
+            if len(date_str.split(".")[2]) <= 5:  # Missing year
+                date_str = date_str.replace(" ", f"{datetime.datetime.now().year} ")
+            date_obj = datetime.datetime.strptime(date_str, "%d.%m.%Y %H:%M")
+        except Exception:
+            pass  # Keep default if parsing fails
+
+    # 4. Classification Logic based on keywords
+    base_args = {"team1Name": team1, "team2Name": team2, "date": date_obj, "league": league}
+
+    # Advance / Postup
+    if any(kw in text_lower for kw in ["postup", "postoupí", "advance"]):
+        return Advance(ticketType=BetType.ADVANCED, **base_args)
+
+    # Both Teams to Score / Oba dají gól
+    if any(kw in text_lower for kw in ["oba dají gól", "btts", "oba týmy dají gól"]):
+        return BothTeamScored(ticketType=BetType.BOTH_TEAM_SCORED, **base_args)
+
+    # Goal Amount (Over/Under)
+    if any(kw in text_lower for kw in ["počet gólů", "více než", "méně než", "over", "under"]):
+        # Attempt to find the goal line (e.g., 2.5, 3.5)
+        line_match = re.search(r"(\d+\.\d+)", text)
+        line_val = float(line_match.group(1)) if line_match else 2.5
+        is_over = any(kw in text_lower for kw in ["více", "over", "+"])
+
+        return GoalAmount(ticketType=BetType.GOAL_AMOUNT, line=line_val, over=is_over, **base_args)
+
+    # Goal Handicap
+    if any(kw in text_lower for kw in ["handicap", "hcp"]):
+        hcp_match = re.search(r"([+-]?\d+\.\d+)", text)
+        hcp_val = float(hcp_match.group(1)) if hcp_match else 0.0
+        # Simplistic logic: guess team 1 if not explicitly stated
+        team_bet = "2" if " 2 " in text else "1"
+
+        return GoalHandicap(ticketType=BetType.GOAL_HANDICAP, team_bet=team_bet, handicap_amount=hcp_val, **base_args)
+
+    # Win Draw Lose Double (1X, X2, 12)
+    if any(kw in text_lower for kw in ["1x", "x2", "12", "dvojitá šance", "neprohra"]):
+        bet_type = "01" if "1x" in text_lower else "02" if "x2" in text_lower else "12"
+        return WinDrawLoseDouble(ticketType=BetType.WIN_DRAW_LOSE_DOUBLE, betType=bet_type, **base_args)
+
+    # Win Lose (Draw no bet / Vítěz do rozhodnutí)
+    if any(kw in text_lower for kw in ["bez remízy", "vítěz do rozhodnutí", "konečný vítěz"]):
+        bet_type = "2" if re.search(r"\b2\b", text) else "1"
+        return WinLose(ticketType=BetType.WIN_LOSE, betType=bet_type, **base_args)
+
+    # Win Draw Lose (Standard Match Odds)
+    if any(kw in text_lower for kw in ["zápas", "výsledek zápasu", "1x2"]):
+        # Look for isolated 1, X (or 0), or 2
+        match_pick = re.search(r"\b(1|x|0|2)\b", text_lower)
+        bet_type = match_pick.group(1).upper() if match_pick else "1"
+        if bet_type == "X":
+            bet_type = "0"
+
+        return WinDrawLose(ticketType=BetType.WIN_DRAW_LOSE, betType=bet_type, **base_args)
+
+    # Fallback Unknown
+    return UnknownTicket(ticketType=BetType.UNKNOWN, raw_text=text, **base_args)
+
+
+def img_classify(path: str, ticket_id: int) -> Ticket:
+    """Given a path to an image and a date, return a list of Tickets that are
+    relevant to that image and date."""
+    # Define valid image extensions to ignore system files or text documents
+    ticket = Ticket(id=ticket_id, bets=[])
+    valid_extensions = {".png", ".jpg", ".jpeg", ".bmp", ".tiff", ".webp"}
+    path_obj = Path(path)
+
+    if not path_obj.is_dir():
+        print(f"Error: The path '{path}' is not a valid directory.")
+        return ticket
+
+    # Iterate through all files in the folder
+    for file_path in path_obj.iterdir():
+        if file_path.is_file() and file_path.suffix.lower() in valid_extensions:
+            # 1. Extract the text (called separately)
+            extracted_text = img_to_text(str(file_path))
+            print(extracted_text)
+
+            # 2. Classify based on the extracted text (called separately)
+            result = classify(extracted_text)
+
+            # 3. Add the resulting tickets to our main list
+            # Support classifier returning either a single Bet or a list of Bet
+            if result is None:
+                continue
+
+            if isinstance(result, list):
+                for r in result:
+                    print(
+                        r.date,
+                        getattr(r, "ticketType", None),
+                        r.team1Name,
+                        r.team2Name,
+                        r.league,
+                    )
+                ticket.bets.extend(result)
+            else:
+                print(
+                    result.date,
+                    getattr(result, "ticketType", None),
+                    result.team1Name,
+                    result.team2Name,
+                    result.league,
+                )
+                ticket.bets.append(result)
+
+    return ticket
+
+
+if __name__ == "__main__":
+    img_classify("./data/screenshots/", ticket_id=1)
--- a/src/beaky/image_classifier/config.py
+++ b/src/beaky/image_classifier/config.py
@@ -0,0 +1,6 @@
+from pydantic.dataclasses import dataclass
+
+
+@dataclass
+class ImgClassifierConfig:
+    target_path: str
--- a/src/beaky/scanner/scanner.py
+++ b/src/beaky/scanner/scanner.py
@@ -21,6 +21,7 @@ class Link:
    url: str
    date: Optional[datetime] = None

+
 class Links:
    def __init__(self, path: str | Config):
        if isinstance(path, Config):
@@ -123,10 +124,6 @@ class Links:
        return len(self.links)


-# Backwards-compatible alias in case other modules referenced Linker
-Linker = Links
-
-
 if __name__ == "__main__":
    links_obj = Links("data/odkazy.xlsx")
    links = links_obj.ret_links()
Author	SHA1	Message	Date
Chlupaty	6914b620f1	Created image classifier v.1	2026-03-22 01:17:21 +01:00
Chlupaty	b2c32d5a23	Created image classifier config file	2026-03-21 22:24:13 +01:00