Compare commits
33 Commits
aec5dfcacd
...
0.1
| Author | SHA1 | Date | |
|---|---|---|---|
| 78d9fab189 | |||
| a6deeeaebf | |||
| 5fc7bfafad | |||
| 7cd45f497a | |||
| f40a7911ca | |||
| 57ad6c71f8 | |||
| f4475ef1d4 | |||
| 5add445949 | |||
| 987bdb2b63 | |||
| 2b29a1c662 | |||
| 697fe2548c | |||
| 770966e21f | |||
|
|
abb59aabe3 | ||
| 8adc374408 | |||
| 1b2fee9b8d | |||
|
|
1f9997b430 | ||
| 6c096e4300 | |||
| e94d96f153 | |||
| 86e0bc8e51 | |||
| 5126a985bf | |||
| f7369e29f2 | |||
|
|
b6fc78e038 | ||
| 96c75ea0cc | |||
| 922d0499fc | |||
| 5704329f04 | |||
| ed599e7d49 | |||
|
|
c504860b69 | ||
|
|
47a41828c6 | ||
|
|
e5c31ee0a3 | ||
|
|
03cd2714db | ||
| 96c64eb5a9 | |||
| 865706d587 | |||
| 8b91cdd147 |
220
.gitignore
vendored
220
.gitignore
vendored
@@ -1,2 +1,220 @@
|
|||||||
|
.idea/
|
||||||
|
data/
|
||||||
|
report.xml
|
||||||
|
|
||||||
|
# Byte-compiled / optimized / DLL files
|
||||||
|
__pycache__/
|
||||||
|
*.py[codz]
|
||||||
|
*$py.class
|
||||||
|
|
||||||
|
# C extensions
|
||||||
|
*.so
|
||||||
|
|
||||||
|
# Distribution / packaging
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
share/python-wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
MANIFEST
|
||||||
|
|
||||||
|
# PyInstaller
|
||||||
|
# Usually these files are written by a python script from a template
|
||||||
|
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||||
|
*.manifest
|
||||||
|
*.spec
|
||||||
|
|
||||||
|
# Installer logs
|
||||||
|
pip-log.txt
|
||||||
|
pip-delete-this-directory.txt
|
||||||
|
|
||||||
|
# Unit test / coverage reports
|
||||||
|
htmlcov/
|
||||||
|
.tox/
|
||||||
|
.nox/
|
||||||
|
.coverage
|
||||||
|
.coverage.*
|
||||||
|
.cache
|
||||||
|
nosetests.xml
|
||||||
|
coverage.xml
|
||||||
|
*.cover
|
||||||
|
*.py.cover
|
||||||
|
.hypothesis/
|
||||||
|
.pytest_cache/
|
||||||
|
cover/
|
||||||
|
|
||||||
|
# Translations
|
||||||
|
*.mo
|
||||||
|
*.pot
|
||||||
|
|
||||||
|
# Django stuff:
|
||||||
|
*.log
|
||||||
|
local_settings.py
|
||||||
|
db.sqlite3
|
||||||
|
db.sqlite3-journal
|
||||||
|
|
||||||
|
# Flask stuff:
|
||||||
|
instance/
|
||||||
|
.webassets-cache
|
||||||
|
|
||||||
|
# Scrapy stuff:
|
||||||
|
.scrapy
|
||||||
|
|
||||||
|
# Sphinx documentation
|
||||||
|
docs/_build/
|
||||||
|
|
||||||
|
# PyBuilder
|
||||||
|
.pybuilder/
|
||||||
|
target/
|
||||||
|
|
||||||
|
# Jupyter Notebook
|
||||||
|
.ipynb_checkpoints
|
||||||
|
|
||||||
|
# IPython
|
||||||
|
profile_default/
|
||||||
|
ipython_config.py
|
||||||
|
|
||||||
|
# pyenv
|
||||||
|
# For a library or package, you might want to ignore these files since the code is
|
||||||
|
# intended to run in multiple environments; otherwise, check them in:
|
||||||
|
# .python-version
|
||||||
|
|
||||||
|
# pipenv
|
||||||
|
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
||||||
|
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
||||||
|
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
||||||
|
# install all needed dependencies.
|
||||||
|
# Pipfile.lock
|
||||||
|
|
||||||
|
# UV
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# uv.lock
|
||||||
|
|
||||||
|
# poetry
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
|
||||||
|
# This is especially recommended for binary packages to ensure reproducibility, and is more
|
||||||
|
# commonly ignored for libraries.
|
||||||
|
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||||
|
# poetry.lock
|
||||||
|
# poetry.toml
|
||||||
|
|
||||||
|
# pdm
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||||
|
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
||||||
|
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
||||||
|
# pdm.lock
|
||||||
|
# pdm.toml
|
||||||
|
.pdm-python
|
||||||
|
.pdm-build/
|
||||||
|
|
||||||
|
# pixi
|
||||||
|
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
||||||
|
# pixi.lock
|
||||||
|
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
||||||
|
# in the .venv directory. It is recommended not to include this directory in version control.
|
||||||
|
.pixi
|
||||||
|
|
||||||
|
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||||
|
__pypackages__/
|
||||||
|
|
||||||
|
# Celery stuff
|
||||||
|
celerybeat-schedule
|
||||||
|
celerybeat.pid
|
||||||
|
|
||||||
|
# Redis
|
||||||
|
*.rdb
|
||||||
|
*.aof
|
||||||
|
*.pid
|
||||||
|
|
||||||
|
# RabbitMQ
|
||||||
|
mnesia/
|
||||||
|
rabbitmq/
|
||||||
|
rabbitmq-data/
|
||||||
|
|
||||||
|
# ActiveMQ
|
||||||
|
activemq-data/
|
||||||
|
|
||||||
|
# SageMath parsed files
|
||||||
|
*.sage.py
|
||||||
|
|
||||||
|
# Environments
|
||||||
|
.env
|
||||||
|
.envrc
|
||||||
|
.venv
|
||||||
|
env/
|
||||||
venv/
|
venv/
|
||||||
.venv/
|
ENV/
|
||||||
|
env.bak/
|
||||||
|
venv.bak/
|
||||||
|
|
||||||
|
# Spyder project settings
|
||||||
|
.spyderproject
|
||||||
|
.spyproject
|
||||||
|
|
||||||
|
# Rope project settings
|
||||||
|
.ropeproject
|
||||||
|
|
||||||
|
# mkdocs documentation
|
||||||
|
/site
|
||||||
|
|
||||||
|
# mypy
|
||||||
|
.mypy_cache/
|
||||||
|
.dmypy.json
|
||||||
|
dmypy.json
|
||||||
|
|
||||||
|
# Pyre type checker
|
||||||
|
.pyre/
|
||||||
|
|
||||||
|
# pytype static type analyzer
|
||||||
|
.pytype/
|
||||||
|
|
||||||
|
# Cython debug symbols
|
||||||
|
cython_debug/
|
||||||
|
|
||||||
|
# PyCharm
|
||||||
|
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
|
||||||
|
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||||
|
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||||
|
# .idea/
|
||||||
|
|
||||||
|
# Abstra
|
||||||
|
# Abstra is an AI-powered process automation framework.
|
||||||
|
# Ignore directories containing user credentials, local state, and settings.
|
||||||
|
# Learn more at https://abstra.io/docs
|
||||||
|
.abstra/
|
||||||
|
|
||||||
|
# Visual Studio Code
|
||||||
|
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
||||||
|
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
||||||
|
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
||||||
|
# you could uncomment the following to ignore the entire vscode folder
|
||||||
|
# .vscode/
|
||||||
|
|
||||||
|
# Ruff stuff:
|
||||||
|
.ruff_cache/
|
||||||
|
|
||||||
|
# PyPI configuration file
|
||||||
|
.pypirc
|
||||||
|
|
||||||
|
# Marimo
|
||||||
|
marimo/_static/
|
||||||
|
marimo/_lsp/
|
||||||
|
__marimo__/
|
||||||
|
|
||||||
|
# Streamlit
|
||||||
|
.streamlit/secrets.toml
|
||||||
|
|||||||
40
.gitlab-ci.yml
Normal file
40
.gitlab-ci.yml
Normal file
@@ -0,0 +1,40 @@
|
|||||||
|
image: python:3.12-slim
|
||||||
|
|
||||||
|
cache:
|
||||||
|
paths:
|
||||||
|
- .cache/pip
|
||||||
|
- venv/
|
||||||
|
|
||||||
|
variables:
|
||||||
|
PIP_CACHE_DIR: "$CI_PROJECT_DIR/.cache/pip"
|
||||||
|
|
||||||
|
before_script:
|
||||||
|
- python -V
|
||||||
|
- python -m venv venv
|
||||||
|
- source venv/bin/activate
|
||||||
|
- pip install --upgrade pip
|
||||||
|
- pip install ruff mypy pytest
|
||||||
|
- pip install .
|
||||||
|
|
||||||
|
stages:
|
||||||
|
- lint
|
||||||
|
- test
|
||||||
|
|
||||||
|
run_ruff:
|
||||||
|
stage: lint
|
||||||
|
script:
|
||||||
|
- ruff check .
|
||||||
|
|
||||||
|
run_mypy:
|
||||||
|
stage: lint
|
||||||
|
script:
|
||||||
|
- mypy src
|
||||||
|
|
||||||
|
run_pytest:
|
||||||
|
stage: test
|
||||||
|
script:
|
||||||
|
- pytest --junit-xml=report.xml
|
||||||
|
artifacts:
|
||||||
|
when: always
|
||||||
|
reports:
|
||||||
|
junit: report.xml
|
||||||
7
config/application.yml
Normal file
7
config/application.yml
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
path: data/odkazy.xlsx
|
||||||
|
|
||||||
|
screenshotter:
|
||||||
|
target_path: data/screenshots/
|
||||||
|
|
||||||
|
resolver:
|
||||||
|
api_key: 733f6882605be2de8980bbd074091ee4
|
||||||
88
data/extract_to_excel.py
Normal file
88
data/extract_to_excel.py
Normal file
@@ -0,0 +1,88 @@
|
|||||||
|
import os
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
import argparse
|
||||||
|
from datetime import datetime
|
||||||
|
import pytz
|
||||||
|
from openpyxl import Workbook
|
||||||
|
|
||||||
|
|
||||||
|
def process_files(starting_id, output_filename="output.xlsx"):
|
||||||
|
# Find all txt files in the current directory
|
||||||
|
txt_files = [f for f in os.listdir('.') if f.endswith('.txt')]
|
||||||
|
|
||||||
|
if not txt_files:
|
||||||
|
print("No .txt files found in the current directory.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Regex patterns for input data
|
||||||
|
date_pattern = re.compile(r'\[.*?(\d{1,2})\s+(\d{1,2}),\s+(\d{4})\s+at\s+(\d{1,2}:\d{2})\]')
|
||||||
|
url_pattern = re.compile(r'(https?://[^\s]+)')
|
||||||
|
|
||||||
|
# Timezone setup (CET to UTC)
|
||||||
|
local_tz = pytz.timezone("Europe/Prague")
|
||||||
|
|
||||||
|
# Set up the Excel Workbook
|
||||||
|
wb = Workbook()
|
||||||
|
ws = wb.active
|
||||||
|
ws.title = "Fortuna Data"
|
||||||
|
ws.append(["ID", "URL", "Date_UTC"]) # Add headers
|
||||||
|
|
||||||
|
current_id = starting_id
|
||||||
|
success_files = []
|
||||||
|
|
||||||
|
for filename in txt_files:
|
||||||
|
try:
|
||||||
|
with open(filename, 'r', encoding='utf-8') as f:
|
||||||
|
content = f.read()
|
||||||
|
|
||||||
|
dates = date_pattern.findall(content)
|
||||||
|
urls = url_pattern.findall(content)
|
||||||
|
|
||||||
|
# Extract and format the data
|
||||||
|
for i in range(min(len(dates), len(urls))):
|
||||||
|
month, day, year, time_str = dates[i]
|
||||||
|
|
||||||
|
# Parse the datetime from the text file
|
||||||
|
dt_str = f"{year}-{month}-{day} {time_str}"
|
||||||
|
local_dt = datetime.strptime(dt_str, "%Y-%m-%d %H:%M")
|
||||||
|
|
||||||
|
# Convert CET to UTC
|
||||||
|
localized_dt = local_tz.localize(local_dt)
|
||||||
|
utc_dt = localized_dt.astimezone(pytz.utc)
|
||||||
|
|
||||||
|
# NEW: Format to ISO 8601 with T and Z
|
||||||
|
formatted_date = utc_dt.strftime("%Y-%m-%dT%H:%M:%SZ")
|
||||||
|
|
||||||
|
# Add a new row to the Excel sheet
|
||||||
|
ws.append([current_id, urls[i], formatted_date])
|
||||||
|
current_id += 1
|
||||||
|
|
||||||
|
# Queue file for deletion
|
||||||
|
success_files.append(filename)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error processing {filename}: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
# Save the Excel file
|
||||||
|
try:
|
||||||
|
wb.save(output_filename)
|
||||||
|
print(f"Successfully saved data to {output_filename}")
|
||||||
|
|
||||||
|
# Clean up only if save was successful
|
||||||
|
for filename in success_files:
|
||||||
|
os.remove(filename)
|
||||||
|
print(f"Deleted: {filename}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Failed to save {output_filename}. No text files were deleted. Error: {e}", file=sys.stderr)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser(description="Extract URLs to an Excel file with ISO UTC dates.")
|
||||||
|
parser.add_argument("start_id", type=int, help="Starting ID for the output")
|
||||||
|
parser.add_argument("--output", type=str, default="extracted_data.xlsx",
|
||||||
|
help="Output Excel filename (default: extracted_data.xlsx)")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
process_files(args.start_id, args.output)
|
||||||
64
knowledge_base/tickety.md
Normal file
64
knowledge_base/tickety.md
Normal file
@@ -0,0 +1,64 @@
|
|||||||
|
# Druhy ticketů
|
||||||
|
|
||||||
|
Výsledek zápasu - dvojtip: 02
|
||||||
|
|
||||||
|
význam?
|
||||||
|
|
||||||
|
Výsledek 1. poločasu: 1
|
||||||
|
|
||||||
|
význam?
|
||||||
|
|
||||||
|
|
||||||
|
# Fortuna scrape
|
||||||
|
- Projel jsem nějaké zápasy a zapsal druhy ticketů, na které se dá vsadit
|
||||||
|
- Výsledek zápasu (1X2):
|
||||||
|
- Jedná se o sázku na výsledek v základní hrací době
|
||||||
|
- Tým 1/ Remíza / Tým 2
|
||||||
|
- Kdo postoupí
|
||||||
|
- Objevuje se jen občas
|
||||||
|
- nechceme rozhodovat, obsahuje různé logiky daných lig
|
||||||
|
- Výsledek zápasu - dvojtip (sázíme na dvě varianty najednou)
|
||||||
|
- sémanticky je to bezpečnější sázka než 1X2
|
||||||
|
- 1X - neprohra týmu 1
|
||||||
|
- 12 - neremíza
|
||||||
|
- X2 - neprohra týmu 2
|
||||||
|
- Výsledek zápasu bez remízy:
|
||||||
|
- v případě remízy *je ticket neplatný* a vrací se peníze
|
||||||
|
- 1 - výhra týmu 1
|
||||||
|
- 2 - výhra týmu 2
|
||||||
|
- Každý z týmů dá gól v zápasu
|
||||||
|
- Ano / Ne
|
||||||
|
- Počet gólů v zápasu:
|
||||||
|
- Lookup Asijský handicap
|
||||||
|
- Méně/Více než \*.5 je jasná, prostě prohra či výhra
|
||||||
|
- Pokud je sázka na celé číslo, je ticket stornován (vyhodnocen s kurzem 1) pokud se člověk trefí přesně
|
||||||
|
- Příklad:
|
||||||
|
- Zápas dopadl 1:2
|
||||||
|
- Sázka na více než 2.5 gólů: výhra
|
||||||
|
- Sázka na méně než 3.5 gólů: výhra
|
||||||
|
- Sázka na více než 2 góly: výhra
|
||||||
|
- Sázka na více než 4 góly: prohra
|
||||||
|
- Sázka na více/méně než 3 góly: storno
|
||||||
|
- [Tým] počet gólů (ano ta sázka se tak jmenuje)
|
||||||
|
- +/- v tomto kontextu znamená větší/menší než. Tedy sázíme, zda daný tým dal méně/více než nějaký počet gólů
|
||||||
|
- příklad, tým dal 3 góly
|
||||||
|
- sázka -3.5: výhra
|
||||||
|
- sázka +2.5: výhra
|
||||||
|
- sázka -2.5: prohra
|
||||||
|
|
||||||
|
- Handicap v zápasu:
|
||||||
|
- k reálnému konečnému skóre týmu se přičte (či odečte) číslo které je v sázce
|
||||||
|
- takže třeba sázka Bologna -0.5, reálný výsledek je 2:1, přepočtený je 1.5:1.
|
||||||
|
- pak se sází na to kdo *vyhrál*, pokud je výsledek remíza, vrací se peníze
|
||||||
|
- příklad:
|
||||||
|
- Sázka +0.5 je ekvivalentní s neprohrou (protože když tým remizuje, tak +0,5 zařídí výhru)
|
||||||
|
- Tohle mi na fortuně sedí
|
||||||
|
- Sázka -0.5 je ekvivalentní s ostrou výhrou (protože remíza -> prohra, je to vlastně inverze )
|
||||||
|
- Chat říká že to Fortuna má blbě, že si prostě na tomdhle bere větší marži (kurz je nižší), ale mě se to nějak nezdá. Je potřeba se podívat jesli nám to sedí
|
||||||
|
- Zápas skončí Bologna 2:1 AS Řím (výhra domácích o 1 gól)
|
||||||
|
- Sázka Bologna -1: storno (virtuální skóre 1 : 1, vrací se vklad)
|
||||||
|
- Sázka Bologna -0.5: výhra (virtuální skóre 1.5 : 1)
|
||||||
|
- Sázka AS Roma +0.5: prohra (virtuální skóre 2 : 1.5)
|
||||||
|
- Sázka AS Roma +1: storno (virtuální skóre 2 : 2, vrací se vklad)
|
||||||
|
|
||||||
|
|
||||||
@@ -4,19 +4,40 @@ build-backend = "setuptools.build_meta"
|
|||||||
|
|
||||||
[project]
|
[project]
|
||||||
name = "beaky"
|
name = "beaky"
|
||||||
version = "0.0.1"
|
version = "0.1.0"
|
||||||
description = "Scan tickets and decide"
|
description = "Scan tickets and decide"
|
||||||
requires-python = ">=3.12"
|
requires-python = ">=3.12"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"pillow==12.1.1",
|
"pillow==12.1.1",
|
||||||
"pydantic==2.12.5"
|
"pydantic==2.12.5",
|
||||||
|
"pandas==3.0.1",
|
||||||
|
"openpyxl>=3.1.0",
|
||||||
|
"PyYaml==6.0.3",
|
||||||
|
"playwright==1.58.0",
|
||||||
|
"requests>=2.32.0"
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.optional-dependencies]
|
[project.optional-dependencies]
|
||||||
dev = [
|
dev = [
|
||||||
"pytest>=9.0.2",
|
"pytest>=9.0.2",
|
||||||
"ruff==0.15.5",
|
"ruff==0.15.5",
|
||||||
|
"pytz"
|
||||||
|
# "playwright==1.58.0" # only dev because it cant be installed in a pipeline, just locally
|
||||||
]
|
]
|
||||||
|
|
||||||
[project.scripts]
|
[project.scripts]
|
||||||
beaky = "beaky.cli:main"
|
beaky = "beaky.cli:main"
|
||||||
|
|
||||||
|
|
||||||
|
[tool.ruff]
|
||||||
|
line-length = 120
|
||||||
|
lint.select = ["E", "F", "I"]
|
||||||
|
|
||||||
|
[tool.mypy]
|
||||||
|
python_version = "3.12"
|
||||||
|
strict = true
|
||||||
|
ignore_missing_imports = true
|
||||||
|
|
||||||
|
[tool.pytest.ini_options]
|
||||||
|
testpaths = ["test"]
|
||||||
|
|
||||||
|
|||||||
@@ -1,25 +1,89 @@
|
|||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
|
import yaml
|
||||||
from pydantic import ValidationError
|
from pydantic import ValidationError
|
||||||
|
|
||||||
from beaky.config import Config
|
from beaky.config import Config
|
||||||
from beaky.scanner.scanner import Scanner
|
from beaky.scanner.scanner import Links
|
||||||
|
from beaky.screenshotter.screenshotter import Screenshotter
|
||||||
|
from beaky.link_classifier.classifier import LinkClassifier
|
||||||
|
from beaky.resolvers.resolver import TicketResolver
|
||||||
|
from beaky.resolvers.resolver import TicketVerdict, _R, _B, _GREEN, _RED, _YELLOW, _GRAY
|
||||||
|
|
||||||
|
_VERDICT_COLOR = {
|
||||||
|
TicketVerdict.TRUTHFUL: _GREEN,
|
||||||
|
TicketVerdict.NOT_TRUTHFUL: _RED,
|
||||||
|
TicketVerdict.POSSIBLY_TRUTHFUL: _YELLOW,
|
||||||
|
TicketVerdict.UNKNOWN: _GRAY,
|
||||||
|
}
|
||||||
|
|
||||||
def main():
|
def load_config(path: str) -> Config | None:
|
||||||
parser = argparse.ArgumentParser(
|
with open(path) as f:
|
||||||
prog="beaky"
|
config_dict = yaml.safe_load(f)
|
||||||
)
|
|
||||||
parser.add_argument("path", help="Path to config file.")
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
config = Config(**vars(args))
|
return Config(**config_dict)
|
||||||
except ValidationError as e:
|
except ValidationError as e:
|
||||||
print("Bad arguments")
|
print("Bad config")
|
||||||
print(e)
|
print(e)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def main() -> None:
|
||||||
|
parser = argparse.ArgumentParser(prog="beaky")
|
||||||
|
parser.add_argument("--config", help="Path to config file.", default="config/application.yml")
|
||||||
|
parser.add_argument("--id", type=int, help="Resolve a single ticket by id (only used with resolve mode).")
|
||||||
|
parser.add_argument("mode", choices=["screenshotter", "parser", "class", "resolve"], help="Mode of operation.")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
config = load_config(args.config)
|
||||||
|
if config is None:
|
||||||
return
|
return
|
||||||
|
|
||||||
Scanner(config)
|
# always load testing data, we will modify that later
|
||||||
|
data = Links(config)
|
||||||
|
data.ret_links()
|
||||||
|
link_amount = len(data.links)
|
||||||
|
print(f"We found {link_amount} links")
|
||||||
|
if link_amount == 0:
|
||||||
|
print("ERROR, no links found")
|
||||||
|
return
|
||||||
|
|
||||||
|
if args.mode == "screenshotter":
|
||||||
|
screenshotter = Screenshotter(config)
|
||||||
|
screenshotter.capture_tickets(data.links)
|
||||||
|
|
||||||
|
if args.mode == "parser":
|
||||||
|
for link in data.links:
|
||||||
|
print(link)
|
||||||
|
|
||||||
|
if args.mode == "class":
|
||||||
|
classifier = LinkClassifier()
|
||||||
|
results = []
|
||||||
|
for link in data.links:
|
||||||
|
results.append(classifier.classify(link))
|
||||||
|
ticket = results[-1]
|
||||||
|
print(f"\n=== Link {ticket.id} ({len(ticket.bets)} bets) ===")
|
||||||
|
for bet in ticket.bets:
|
||||||
|
print(f" [{type(bet).__name__}]")
|
||||||
|
for k, v in vars(bet).items():
|
||||||
|
print(f" {k}: {v}")
|
||||||
|
|
||||||
|
if args.mode == "resolve":
|
||||||
|
classifier = LinkClassifier()
|
||||||
|
resolver = TicketResolver(config.resolver)
|
||||||
|
links = [l for l in data.links if l.id == args.id] if args.id is not None else data.links
|
||||||
|
if args.id is not None and not links:
|
||||||
|
print(f"ERROR: ticket id {args.id} not found")
|
||||||
|
return
|
||||||
|
for link in links:
|
||||||
|
print(f"\n=== Classifying ticket {link.id} ===")
|
||||||
|
ticket = classifier.classify(link)
|
||||||
|
for bet in ticket.bets:
|
||||||
|
print(f" [{type(bet).__name__}] {bet.team1Name} vs {bet.team2Name} | {bet.date.date()} | {bet.league}")
|
||||||
|
|
||||||
|
print(f"\n--- Resolving ticket {link.id} ---")
|
||||||
|
resolved = resolver.resolve(ticket)
|
||||||
|
color = _VERDICT_COLOR.get(resolved.verdict, "")
|
||||||
|
print(f"\n {color}{_B}VERDICT: {resolved.verdict.value.upper()}{_R}")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|||||||
@@ -1,5 +1,11 @@
|
|||||||
from pydantic.dataclasses import dataclass
|
from pydantic.dataclasses import dataclass
|
||||||
|
|
||||||
|
from beaky.resolvers.config import ResolverConfig
|
||||||
|
from beaky.screenshotter.config import ScreenshotterConfig
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Config:
|
class Config:
|
||||||
path: str
|
path: str
|
||||||
|
screenshotter: ScreenshotterConfig
|
||||||
|
resolver: ResolverConfig
|
||||||
@@ -1,8 +0,0 @@
|
|||||||
from datetime import datetime
|
|
||||||
|
|
||||||
from pydantic.dataclasses import dataclass
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class Scan:
|
|
||||||
date: datetime
|
|
||||||
event_name: str
|
|
||||||
91
src/beaky/datamodels/ticket.py
Normal file
91
src/beaky/datamodels/ticket.py
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from datetime import datetime
|
||||||
|
from enum import Enum
|
||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
from pydantic.dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
class BetType(str, Enum):
|
||||||
|
WIN_DRAW_LOSE = "win_draw_lose"
|
||||||
|
ADVANCED = "advance"
|
||||||
|
WIN_DRAW_LOSE_DOUBLE = "win_draw_lose_double"
|
||||||
|
WIN_LOSE = "win_lose"
|
||||||
|
BOTH_TEAM_SCORED = "both_team_scored"
|
||||||
|
GOAL_AMOUNT = "goal_amount"
|
||||||
|
GOAL_HANDICAP = "goal_handicap"
|
||||||
|
UNKNOWN = "unknown"
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Bet(ABC):
|
||||||
|
ticketType: BetType
|
||||||
|
team1Name: str
|
||||||
|
team2Name: str
|
||||||
|
date: datetime
|
||||||
|
league: str
|
||||||
|
@abstractmethod
|
||||||
|
def resolve(self): pass
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class WinDrawLose(Bet):
|
||||||
|
"""Výsledek zápasu 1X2"""
|
||||||
|
betType: Literal["X", "0", "1", "2"] = "0"
|
||||||
|
def resolve(self):
|
||||||
|
...
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Advance(Bet):
|
||||||
|
"""What team advances to next round"""
|
||||||
|
def resolve(self):
|
||||||
|
raise NotImplementedError("Vyser si voko vine")
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class WinDrawLoseDouble(Bet):
|
||||||
|
"""Výsledek zápasu - double"""
|
||||||
|
betType: Literal["01", "12", "02"] = "01"
|
||||||
|
def resolve(self):
|
||||||
|
...
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class WinLose(Bet):
|
||||||
|
"""Výsledek zápasu bez remízy"""
|
||||||
|
betType: Literal["1", "2"] = "1"
|
||||||
|
def resolve(self):
|
||||||
|
...
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class BothTeamScored(Bet):
|
||||||
|
def resolve(self):
|
||||||
|
...
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class GoalAmount(Bet):
|
||||||
|
"""Počet gólů v zápasu — over/under total goals"""
|
||||||
|
line: float = 0.0 # goal line, e.g. 2.5
|
||||||
|
over: bool = True # True = more than line, False = less than line
|
||||||
|
def resolve(self):
|
||||||
|
...
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class GoalHandicap(Bet):
|
||||||
|
"""Goal handicap for a specific team — add handicap_amount to team's score, team wins = you win"""
|
||||||
|
team_bet: Literal["1", "2"] = "1" # which team the handicap is applied to
|
||||||
|
handicap_amount: float = 0.0 # e.g. +1.5 or -0.5
|
||||||
|
def resolve(self):
|
||||||
|
...
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class UnknownTicket(Bet):
|
||||||
|
"""Bet type that could not be classified"""
|
||||||
|
raw_text: str = ""
|
||||||
|
def resolve(self):
|
||||||
|
...
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Ticket:
|
||||||
|
id: int
|
||||||
|
bets: list[Bet]
|
||||||
0
src/beaky/image_classifier/__init__.py
Normal file
0
src/beaky/image_classifier/__init__.py
Normal file
7
src/beaky/image_classifier/classifier.py
Normal file
7
src/beaky/image_classifier/classifier.py
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from beaky.datamodels.ticket import Ticket
|
||||||
|
|
||||||
|
|
||||||
|
def f(path:str, date:datetime) -> list[Ticket]:
|
||||||
|
...
|
||||||
0
src/beaky/link_classifier/__init__.py
Normal file
0
src/beaky/link_classifier/__init__.py
Normal file
116
src/beaky/link_classifier/classifier.py
Normal file
116
src/beaky/link_classifier/classifier.py
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
import re
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from playwright.sync_api import Page, sync_playwright
|
||||||
|
|
||||||
|
from beaky.datamodels.ticket import (
|
||||||
|
BothTeamScored,
|
||||||
|
GoalAmount,
|
||||||
|
GoalHandicap,
|
||||||
|
Ticket,
|
||||||
|
BetType,
|
||||||
|
UnknownTicket,
|
||||||
|
WinDrawLose,
|
||||||
|
WinDrawLoseDouble,
|
||||||
|
WinLose,
|
||||||
|
Bet
|
||||||
|
)
|
||||||
|
from beaky.scanner.scanner import Link
|
||||||
|
|
||||||
|
_TICKET_SELECTOR = ".betslip-history-detail__left-panel"
|
||||||
|
_LEG_SELECTOR = '[data-test="betslip-leg"]'
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_czech_date(text: str) -> datetime | None:
|
||||||
|
m = re.search(r"(\d+)\.\s*(\d+)\.\s*(\d+)\s+(\d+):(\d+)", text)
|
||||||
|
if not m:
|
||||||
|
return None
|
||||||
|
day, month, year, hour, minute = map(int, m.groups())
|
||||||
|
return datetime(year, month, day, hour, minute)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_teams(title: str) -> tuple[str, str]:
|
||||||
|
parts = title.split(" - ", 1)
|
||||||
|
if len(parts) == 2:
|
||||||
|
return parts[0].strip(), parts[1].strip()
|
||||||
|
return title.strip(), ""
|
||||||
|
|
||||||
|
|
||||||
|
def _classify_bet(bet_text: str, team1: str, team2: str, date: datetime, league: str) -> Bet:
|
||||||
|
common = dict(team1Name=team1, team2Name=team2, date=date, league=league)
|
||||||
|
|
||||||
|
# WinDrawLose double: "Výsledek zápasu - dvojtip: 10"
|
||||||
|
m = re.search(r"Výsledek zápasu - dvojtip:\s*(\d+)", bet_text)
|
||||||
|
if m:
|
||||||
|
# normalize order: "10" -> "01", "02" -> "02", "12" -> "12"
|
||||||
|
bet_type = "".join(sorted(m.group(1)))
|
||||||
|
return WinDrawLoseDouble(ticketType=BetType.WIN_DRAW_LOSE_DOUBLE, betType=bet_type, **common)
|
||||||
|
|
||||||
|
# WinLose (no draw): "Výsledek bez remízy: 1"
|
||||||
|
m = re.search(r"bez rem[ií]zy:\s*([12])", bet_text)
|
||||||
|
if m:
|
||||||
|
return WinLose(ticketType=BetType.WIN_LOSE, betType=m.group(1), **common)
|
||||||
|
|
||||||
|
# WinDrawLose: "Výsledek zápasu: 1"
|
||||||
|
m = re.search(r"Výsledek zápasu:\s*([012X])\s*$", bet_text.strip())
|
||||||
|
if m:
|
||||||
|
return WinDrawLose(ticketType=BetType.WIN_DRAW_LOSE, betType=m.group(1), **common)
|
||||||
|
|
||||||
|
# BothTeamScored: "Každý z týmů dá gól v zápasu: Ano"
|
||||||
|
if "dá gól" in bet_text or "oba týmy" in bet_text.lower():
|
||||||
|
return BothTeamScored(ticketType=BetType.BOTH_TEAM_SCORED, **common)
|
||||||
|
|
||||||
|
# GoalAmount: "Počet gólů v zápasu 2.5: + 2.5" / "Počet gólů v zápasu 4: - 4"
|
||||||
|
m = re.search(r"Počet gólů v zápasu\s+(\d+(?:\.\d+)?):\s*([+-])", bet_text)
|
||||||
|
if m:
|
||||||
|
return GoalAmount(ticketType=BetType.GOAL_AMOUNT, line=float(m.group(1)), over=m.group(2) == "+", **common)
|
||||||
|
|
||||||
|
# GoalHandicap: "[Team] počet gólů ...: +1.5" — team name in bet text determines team_bet
|
||||||
|
m = re.search(r"([+-])\s*(\d+(?:\.\d+)?)\s*$", bet_text.strip())
|
||||||
|
if m and "gólů" in bet_text:
|
||||||
|
bet_lower = bet_text.lower()
|
||||||
|
if team1.lower() in bet_lower:
|
||||||
|
team_bet = "1"
|
||||||
|
elif team2.lower() in bet_lower:
|
||||||
|
team_bet = "2"
|
||||||
|
else:
|
||||||
|
return UnknownTicket(ticketType=BetType.UNKNOWN, raw_text=bet_text, **common)
|
||||||
|
sign = 1.0 if m.group(1) == "+" else -1.0
|
||||||
|
handicap = sign * float(m.group(2))
|
||||||
|
return GoalHandicap(ticketType=BetType.GOAL_HANDICAP, team_bet=team_bet, handicap_amount=handicap, **common)
|
||||||
|
|
||||||
|
return UnknownTicket(ticketType=BetType.UNKNOWN, raw_text=bet_text, **common)
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_legs(page: Page, fallback_date: datetime | None) -> list[Bet]:
|
||||||
|
bets: list[Bet] = []
|
||||||
|
for leg in page.locator(_LEG_SELECTOR).all():
|
||||||
|
title = leg.locator("h3").first.get_attribute("title") or ""
|
||||||
|
date_text = leg.locator(".betslip-leg-date span").first.inner_text()
|
||||||
|
bet_text = leg.locator("[data-selection-id]").first.inner_text()
|
||||||
|
league = leg.locator(".f-mt-1.f-leading-tight.f-line-clamp-2").first.inner_text()
|
||||||
|
|
||||||
|
team1, team2 = _parse_teams(title)
|
||||||
|
date = _parse_czech_date(date_text) or fallback_date or datetime.now()
|
||||||
|
|
||||||
|
bets.append(_classify_bet(bet_text, team1, team2, date, league))
|
||||||
|
return bets
|
||||||
|
|
||||||
|
|
||||||
|
class LinkClassifier:
|
||||||
|
def classify(self, link: Link) -> Ticket:
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.chromium.launch(headless=True)
|
||||||
|
context = browser.new_context()
|
||||||
|
page = context.new_page()
|
||||||
|
try:
|
||||||
|
page.goto(link.url)
|
||||||
|
page.wait_for_selector(_LEG_SELECTOR, timeout=15000)
|
||||||
|
page.wait_for_timeout(500)
|
||||||
|
result = Ticket(id=link.id, bets=_extract_legs(page, link.date))
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error classifying link {link.id}: {e}")
|
||||||
|
finally:
|
||||||
|
page.close()
|
||||||
|
browser.close()
|
||||||
|
return result
|
||||||
0
src/beaky/resolvers/__init__.py
Normal file
0
src/beaky/resolvers/__init__.py
Normal file
6
src/beaky/resolvers/config.py
Normal file
6
src/beaky/resolvers/config.py
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
from pydantic.dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ResolverConfig:
|
||||||
|
api_key: str
|
||||||
353
src/beaky/resolvers/resolver.py
Normal file
353
src/beaky/resolvers/resolver.py
Normal file
@@ -0,0 +1,353 @@
|
|||||||
|
import time
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
from difflib import SequenceMatcher
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
import requests
|
||||||
|
|
||||||
|
from beaky.datamodels.ticket import (
|
||||||
|
Bet,
|
||||||
|
BothTeamScored,
|
||||||
|
GoalAmount,
|
||||||
|
GoalHandicap,
|
||||||
|
Ticket,
|
||||||
|
UnknownTicket,
|
||||||
|
WinDrawLose,
|
||||||
|
WinDrawLoseDouble,
|
||||||
|
WinLose,
|
||||||
|
)
|
||||||
|
from beaky.resolvers.config import ResolverConfig
|
||||||
|
|
||||||
|
_API_BASE = "https://v3.football.api-sports.io"
|
||||||
|
|
||||||
|
# Fortuna league strings (lowercased substring match) -> api-football league ID
|
||||||
|
_LEAGUE_MAP: dict[str, int] = {
|
||||||
|
# European cups
|
||||||
|
"liga mistrů": 2,
|
||||||
|
"champions league": 2,
|
||||||
|
"evropská liga": 3,
|
||||||
|
"europa league": 3,
|
||||||
|
"konferenční liga": 848,
|
||||||
|
"conference league": 848,
|
||||||
|
# Top flights
|
||||||
|
"1. anglie": 39,
|
||||||
|
"1. belgie": 144,
|
||||||
|
"1. česko": 345,
|
||||||
|
"1. dánsko": 119,
|
||||||
|
"1. francie": 61,
|
||||||
|
"1. itálie": 135,
|
||||||
|
"1. itálie - ženy": 794,
|
||||||
|
"1. německo": 78,
|
||||||
|
"1. nizozemsko": 88,
|
||||||
|
"1. polsko": 106,
|
||||||
|
"1. portugalsko": 94,
|
||||||
|
"1. rakousko": 218,
|
||||||
|
"1. rumunsko": 283,
|
||||||
|
"1. skotsko": 179,
|
||||||
|
"1. slovensko": 332,
|
||||||
|
"1. španělsko": 140,
|
||||||
|
"1. wales": 771,
|
||||||
|
# Second divisions
|
||||||
|
"2. anglie": 40,
|
||||||
|
"2. česko": 346,
|
||||||
|
"2. francie": 62,
|
||||||
|
"2. itálie": 136,
|
||||||
|
"2. německo": 79,
|
||||||
|
"2. nizozemsko": 89,
|
||||||
|
"2. rakousko": 219,
|
||||||
|
"2. slovensko": 333,
|
||||||
|
"2. španělsko": 141,
|
||||||
|
# Third divisions
|
||||||
|
"3. francie": 63,
|
||||||
|
"3. česko msfl": 349,
|
||||||
|
"3. česko čfl": 348,
|
||||||
|
# Fourth divisions
|
||||||
|
"4. česko - sk. a": 350,
|
||||||
|
"4. česko - sk. b": 351,
|
||||||
|
"4. česko - sk. c": 352,
|
||||||
|
"4. česko - sk. d": 353,
|
||||||
|
"4. česko - sk. e": 354,
|
||||||
|
"4. česko - sk. f": 686,
|
||||||
|
# Women
|
||||||
|
"1. česko - ženy": 669,
|
||||||
|
"fortuna=liga ženy": 669,
|
||||||
|
# Domestic cups
|
||||||
|
"anglie - fa cup": 45,
|
||||||
|
"anglie - efl cup": 48,
|
||||||
|
"česko - pohár": 347,
|
||||||
|
}
|
||||||
|
|
||||||
|
_DATE_WINDOW = 3 # days either side of the bet date to search
|
||||||
|
|
||||||
|
# ANSI color helpers
|
||||||
|
_R = "\033[0m"
|
||||||
|
_B = "\033[1m"
|
||||||
|
_DIM= "\033[2m"
|
||||||
|
_GREEN = "\033[32m"
|
||||||
|
_RED = "\033[31m"
|
||||||
|
_YELLOW = "\033[33m"
|
||||||
|
_CYAN = "\033[36m"
|
||||||
|
_GRAY = "\033[90m"
|
||||||
|
|
||||||
|
_OUTCOME_COLOR = {
|
||||||
|
"win": _GREEN,
|
||||||
|
"lose": _RED,
|
||||||
|
"void": _YELLOW,
|
||||||
|
"unknown": _GRAY,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class TicketVerdict(str, Enum):
|
||||||
|
TRUTHFUL = "truthful"
|
||||||
|
NOT_TRUTHFUL = "not truthful"
|
||||||
|
POSSIBLY_TRUTHFUL = "possibly truthful — unresolvable bets remain, check manually"
|
||||||
|
UNKNOWN = "unknown — could not resolve enough bets to decide"
|
||||||
|
|
||||||
|
|
||||||
|
class BetOutcome(str, Enum):
|
||||||
|
WIN = "win"
|
||||||
|
LOSE = "lose"
|
||||||
|
VOID = "void" # stake returned (e.g. WinLose on draw, integer goal line hit)
|
||||||
|
UNKNOWN = "unknown" # fixture not found or unclassified bet
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ResolvedBet:
|
||||||
|
bet: Bet
|
||||||
|
outcome: BetOutcome
|
||||||
|
fixture_id: int | None = None
|
||||||
|
# Confidence breakdown (each component 0.0–1.0):
|
||||||
|
# name_match — how well team names matched (SequenceMatcher score)
|
||||||
|
# date_proximity — 1.0 exact date, linear decay to 0.0 at _DATE_WINDOW days away
|
||||||
|
# league_found — 1.0 static map hit, 0.7 API fallback, 0.3 not found
|
||||||
|
# match_finished — 1.0 if fixture status is terminal, 0.0 otherwise
|
||||||
|
confidence: float = 0.0
|
||||||
|
name_match: float = 0.0
|
||||||
|
date_proximity: float = 0.0
|
||||||
|
league_found: float = 0.0
|
||||||
|
match_finished: float = 0.0
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ResolvedTicket:
|
||||||
|
ticket_id: int
|
||||||
|
bets: list[ResolvedBet] = field(default_factory=list)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def verdict(self) -> TicketVerdict:
|
||||||
|
resolvable = [b for b in self.bets if not isinstance(b.bet, UnknownTicket)]
|
||||||
|
unresolvable = [b for b in self.bets if isinstance(b.bet, UnknownTicket)]
|
||||||
|
if not resolvable:
|
||||||
|
return TicketVerdict.UNKNOWN
|
||||||
|
if any(b.outcome == BetOutcome.LOSE for b in resolvable):
|
||||||
|
return TicketVerdict.NOT_TRUTHFUL
|
||||||
|
if any(b.outcome == BetOutcome.UNKNOWN for b in resolvable):
|
||||||
|
return TicketVerdict.UNKNOWN
|
||||||
|
if unresolvable:
|
||||||
|
return TicketVerdict.POSSIBLY_TRUTHFUL
|
||||||
|
return TicketVerdict.TRUTHFUL
|
||||||
|
|
||||||
|
|
||||||
|
def _get(url: str, headers: dict, params: dict, retries: int = 3, backoff: float = 60.0) -> requests.Response:
|
||||||
|
for attempt in range(retries):
|
||||||
|
resp = requests.get(url, headers=headers, params=params)
|
||||||
|
if resp.status_code == 429:
|
||||||
|
wait = backoff * (attempt + 1)
|
||||||
|
print(f" !! rate limited — waiting {wait:.0f}s before retry ({attempt + 1}/{retries})")
|
||||||
|
time.sleep(wait)
|
||||||
|
continue
|
||||||
|
return resp
|
||||||
|
print(f" !! still rate limited after {retries} retries, giving up")
|
||||||
|
return resp
|
||||||
|
|
||||||
|
|
||||||
|
class TicketResolver:
|
||||||
|
def __init__(self, config: ResolverConfig):
|
||||||
|
self._headers = {"x-apisports-key": config.api_key}
|
||||||
|
# Cache maps (center_date_str, league_id | None) -> list of fixture dicts
|
||||||
|
self._fixture_cache: dict[tuple[str, int | None], list[dict]] = {}
|
||||||
|
# Cache maps league name -> (league_id, confidence)
|
||||||
|
self._league_cache: dict[str, tuple[int | None, float]] = {}
|
||||||
|
|
||||||
|
def resolve(self, ticket: Ticket) -> ResolvedTicket:
|
||||||
|
result = ResolvedTicket(ticket_id=ticket.id)
|
||||||
|
for bet in ticket.bets:
|
||||||
|
result.bets.append(self._resolve_bet(bet))
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _resolve_bet(self, bet: Bet) -> ResolvedBet:
|
||||||
|
bet_type = type(bet).__name__
|
||||||
|
print(f"\n {_B}{_CYAN}┌─ [{bet_type}]{_R} {_B}{bet.team1Name} vs {bet.team2Name}{_R}"
|
||||||
|
f" {_DIM}{bet.date.strftime('%Y-%m-%d')} | {bet.league}{_R}")
|
||||||
|
|
||||||
|
if isinstance(bet, UnknownTicket):
|
||||||
|
print(f" {_GRAY}│ skipping — not implemented: {bet.raw_text!r}{_R}")
|
||||||
|
print(f" {_GRAY}└─ UNKNOWN{_R}")
|
||||||
|
return ResolvedBet(bet=bet, outcome=BetOutcome.UNKNOWN)
|
||||||
|
|
||||||
|
fixture, name_match, date_prox, league_conf = self._find_fixture(bet)
|
||||||
|
if fixture is None:
|
||||||
|
print(f" {_GRAY}└─ UNKNOWN — no fixture found{_R}")
|
||||||
|
return ResolvedBet(bet=bet, outcome=BetOutcome.UNKNOWN, league_found=league_conf)
|
||||||
|
|
||||||
|
home_name = fixture["teams"]["home"]["name"]
|
||||||
|
away_name = fixture["teams"]["away"]["name"]
|
||||||
|
finished = _is_finished(fixture)
|
||||||
|
confidence = round((name_match + date_prox + league_conf + finished) / 4, 3)
|
||||||
|
outcome = _evaluate_bet(bet, fixture) if finished == 1.0 else BetOutcome.UNKNOWN
|
||||||
|
|
||||||
|
goals = fixture["goals"]
|
||||||
|
color = _OUTCOME_COLOR.get(outcome.value, _GRAY)
|
||||||
|
print(f" {_DIM}│ matched #{fixture['fixture']['id']}: {home_name} vs {away_name}"
|
||||||
|
f" | {goals['home']}:{goals['away']} | {fixture['fixture']['status']['short']}"
|
||||||
|
f" | confidence {confidence} (name={name_match:.2f} date={date_prox:.2f} league={league_conf} finished={finished}){_R}")
|
||||||
|
print(f" {color}{_B}└─ {outcome.value.upper()}{_R}")
|
||||||
|
|
||||||
|
return ResolvedBet(
|
||||||
|
bet=bet,
|
||||||
|
outcome=outcome,
|
||||||
|
fixture_id=fixture["fixture"]["id"],
|
||||||
|
confidence=confidence,
|
||||||
|
name_match=round(name_match, 3),
|
||||||
|
date_proximity=round(date_prox, 3),
|
||||||
|
league_found=league_conf,
|
||||||
|
match_finished=finished,
|
||||||
|
)
|
||||||
|
|
||||||
|
def _find_fixture(self, bet: Bet) -> tuple[dict | None, float, float, float]:
|
||||||
|
"""Returns (fixture, name_match, date_proximity, league_confidence)."""
|
||||||
|
center = bet.date.date()
|
||||||
|
date_str = center.strftime("%Y-%m-%d")
|
||||||
|
league_id, league_conf = self._resolve_league(bet.league)
|
||||||
|
cache_key = (date_str, league_id)
|
||||||
|
|
||||||
|
if cache_key not in self._fixture_cache:
|
||||||
|
date_from = (center - timedelta(days=_DATE_WINDOW)).strftime("%Y-%m-%d")
|
||||||
|
date_to = (center + timedelta(days=_DATE_WINDOW)).strftime("%Y-%m-%d")
|
||||||
|
params: dict = {"from": date_from, "to": date_to}
|
||||||
|
if league_id is not None:
|
||||||
|
params["league"] = league_id
|
||||||
|
params["season"] = center.year if center.month >= 7 else center.year - 1
|
||||||
|
print(f" {_GRAY}│ GET /fixtures {params}{_R}")
|
||||||
|
resp = _get(f"{_API_BASE}/fixtures", headers=self._headers, params=params)
|
||||||
|
resp.raise_for_status()
|
||||||
|
self._fixture_cache[cache_key] = resp.json().get("response", [])
|
||||||
|
print(f" {_GRAY}│ {len(self._fixture_cache[cache_key])} fixtures returned (cached){_R}")
|
||||||
|
else:
|
||||||
|
print(f" {_GRAY}│ /fixtures (±{_DATE_WINDOW}d of {date_str}, league={league_id}) served from cache{_R}")
|
||||||
|
|
||||||
|
fixture, name_match, date_prox = _best_fixture_match(
|
||||||
|
self._fixture_cache[cache_key], bet.team1Name, bet.team2Name, center
|
||||||
|
)
|
||||||
|
return fixture, name_match, date_prox, league_conf
|
||||||
|
|
||||||
|
def _resolve_league(self, league_name: str) -> tuple[int | None, float]:
|
||||||
|
key = league_name.lower().strip()
|
||||||
|
if key in self._league_cache:
|
||||||
|
return self._league_cache[key]
|
||||||
|
|
||||||
|
for pattern, league_id in _LEAGUE_MAP.items():
|
||||||
|
if pattern in key:
|
||||||
|
print(f" {_GRAY}│ league {league_name!r} -> id={league_id} (static map){_R}")
|
||||||
|
self._league_cache[key] = (league_id, 1.0)
|
||||||
|
return league_id, 1.0
|
||||||
|
|
||||||
|
# Fall back to API search — lower confidence since first result is taken unverified
|
||||||
|
print(f" {_GRAY}│ GET /leagues search={league_name!r}{_R}")
|
||||||
|
resp = _get(f"{_API_BASE}/leagues", headers=self._headers, params={"search": league_name[:20]})
|
||||||
|
results = resp.json().get("response", [])
|
||||||
|
if results:
|
||||||
|
league_id = results[0]["league"]["id"]
|
||||||
|
league_found_name = results[0]["league"]["name"]
|
||||||
|
print(f" {_GRAY}│ matched {league_found_name!r} id={league_id} (API fallback, confidence=0.7){_R}")
|
||||||
|
self._league_cache[key] = (league_id, 0.7)
|
||||||
|
return league_id, 0.7
|
||||||
|
|
||||||
|
print(f" {_GRAY}│ no league found, searching fixtures by date only (confidence=0.3){_R}")
|
||||||
|
self._league_cache[key] = (None, 0.3)
|
||||||
|
return None, 0.3
|
||||||
|
|
||||||
|
|
||||||
|
def _similarity(a: str, b: str) -> float:
|
||||||
|
return SequenceMatcher(None, a.lower(), b.lower()).ratio()
|
||||||
|
|
||||||
|
|
||||||
|
def _date_proximity(fixture: dict, center) -> float:
|
||||||
|
"""1.0 on exact date, linear decay to 0.0 at _DATE_WINDOW days away."""
|
||||||
|
fixture_date = datetime.fromisoformat(fixture["fixture"]["date"].replace("Z", "+00:00")).date()
|
||||||
|
days_off = abs((fixture_date - center).days)
|
||||||
|
return max(0.0, 1.0 - days_off / _DATE_WINDOW)
|
||||||
|
|
||||||
|
|
||||||
|
def _best_fixture_match(fixtures: list[dict], team1: str, team2: str, center) -> tuple[dict | None, float, float]:
|
||||||
|
"""Returns (best_fixture, name_score, date_proximity) or (None, 0, 0) if no good match."""
|
||||||
|
best, best_combined, best_name, best_date = None, 0.0, 0.0, 0.0
|
||||||
|
for f in fixtures:
|
||||||
|
home = f["teams"]["home"]["name"]
|
||||||
|
away = f["teams"]["away"]["name"]
|
||||||
|
name_score = max(
|
||||||
|
_similarity(team1, home) + _similarity(team2, away),
|
||||||
|
_similarity(team1, away) + _similarity(team2, home),
|
||||||
|
) / 2
|
||||||
|
date_prox = _date_proximity(f, center)
|
||||||
|
# Name similarity is the primary signal; date proximity is a tiebreaker
|
||||||
|
combined = name_score * 0.8 + date_prox * 0.2
|
||||||
|
if combined > best_combined:
|
||||||
|
best_combined = combined
|
||||||
|
best_name = name_score
|
||||||
|
best_date = date_prox
|
||||||
|
best = f
|
||||||
|
# Require minimum name similarity — date alone cannot rescue a bad name match
|
||||||
|
return (best, best_name, best_date) if best_name > 0.5 else (None, best_name, best_date)
|
||||||
|
|
||||||
|
|
||||||
|
def _is_finished(fixture: dict) -> float:
|
||||||
|
status = fixture.get("fixture", {}).get("status", {}).get("short", "")
|
||||||
|
return 1.0 if status in ("FT", "AET", "PEN", "AWD", "WO") else 0.0
|
||||||
|
|
||||||
|
|
||||||
|
def _evaluate_bet(bet: Bet, fixture: dict) -> BetOutcome:
|
||||||
|
goals = fixture.get("goals", {})
|
||||||
|
home = goals.get("home")
|
||||||
|
away = goals.get("away")
|
||||||
|
|
||||||
|
if home is None or away is None:
|
||||||
|
return BetOutcome.UNKNOWN
|
||||||
|
|
||||||
|
if isinstance(bet, WinDrawLose):
|
||||||
|
bet_draw = bet.betType in ("X", "0")
|
||||||
|
if bet_draw:
|
||||||
|
return BetOutcome.WIN if home == away else BetOutcome.LOSE
|
||||||
|
actual = "1" if home > away else ("0" if home == away else "2")
|
||||||
|
return BetOutcome.WIN if actual == bet.betType else BetOutcome.LOSE
|
||||||
|
|
||||||
|
if isinstance(bet, WinDrawLoseDouble):
|
||||||
|
actual = "1" if home > away else ("0" if home == away else "2")
|
||||||
|
return BetOutcome.WIN if actual in bet.betType else BetOutcome.LOSE
|
||||||
|
|
||||||
|
if isinstance(bet, WinLose):
|
||||||
|
if home == away:
|
||||||
|
return BetOutcome.VOID
|
||||||
|
actual = "1" if home > away else "2"
|
||||||
|
return BetOutcome.WIN if actual == bet.betType else BetOutcome.LOSE
|
||||||
|
|
||||||
|
if isinstance(bet, BothTeamScored):
|
||||||
|
return BetOutcome.WIN if home > 0 and away > 0 else BetOutcome.LOSE
|
||||||
|
|
||||||
|
if isinstance(bet, GoalAmount):
|
||||||
|
total = home + away
|
||||||
|
if total == bet.line:
|
||||||
|
return BetOutcome.VOID
|
||||||
|
won = total > bet.line if bet.over else total < bet.line
|
||||||
|
return BetOutcome.WIN if won else BetOutcome.LOSE
|
||||||
|
|
||||||
|
if isinstance(bet, GoalHandicap):
|
||||||
|
h_home = home + (bet.handicap_amount if bet.team_bet == "1" else 0.0)
|
||||||
|
h_away = away + (bet.handicap_amount if bet.team_bet == "2" else 0.0)
|
||||||
|
if h_home == h_away:
|
||||||
|
return BetOutcome.VOID
|
||||||
|
actual_winner = "1" if h_home > h_away else "2"
|
||||||
|
return BetOutcome.WIN if actual_winner == bet.team_bet else BetOutcome.LOSE
|
||||||
|
|
||||||
|
return BetOutcome.UNKNOWN
|
||||||
@@ -1,20 +1,138 @@
|
|||||||
from pydantic.dataclasses import dataclass
|
|
||||||
from beaky.config import Config
|
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from beaky.datamodels.scan import Scan
|
from typing import Any, Iterator, List, Optional
|
||||||
|
|
||||||
|
from openpyxl import load_workbook
|
||||||
|
from pydantic.dataclasses import dataclass
|
||||||
|
|
||||||
|
from beaky.config import Config
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Scanner:
|
class Link:
|
||||||
def __init__(self, config: Config):
|
"""Represents a single link row from an Excel sheet.
|
||||||
self._path = config.path
|
|
||||||
|
|
||||||
def scan(self) -> Scan:
|
Attributes:
|
||||||
|
id: identifier from the sheet (cast to int)
|
||||||
|
url: link to the web page
|
||||||
|
date: optional creation date (datetime or None)
|
||||||
|
"""
|
||||||
|
|
||||||
|
id: int
|
||||||
|
url: str
|
||||||
|
date: Optional[datetime] = None
|
||||||
|
|
||||||
|
class Links:
|
||||||
|
def __init__(self, path: str | Config):
|
||||||
|
if isinstance(path, Config):
|
||||||
|
self._path = path.path
|
||||||
|
else:
|
||||||
|
self._path = path
|
||||||
|
self.links: List[Link] = []
|
||||||
|
|
||||||
|
def ret_links(self) -> List[Link]:
|
||||||
|
"""Read the Excel file at self._path and populate self.links.
|
||||||
|
|
||||||
|
Expects the first sheet to contain a header row with columns that include
|
||||||
|
at least: 'id', 'link' (or 'url'), and optionally 'date' (case-insensitive).
|
||||||
|
Returns the list of Link objects (also stored in self.links).
|
||||||
"""
|
"""
|
||||||
|
print("started ret_links()")
|
||||||
|
wb = load_workbook(filename=self._path, read_only=True, data_only=True)
|
||||||
|
ws = wb.active
|
||||||
|
|
||||||
:param path: Path to screenshot of ticket
|
# Read header row
|
||||||
:return:
|
rows = ws.rows
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
o = Scan(date=datetime.now(), event_name = "neco")
|
try:
|
||||||
return o
|
header = next(rows)
|
||||||
|
except StopIteration:
|
||||||
|
return []
|
||||||
|
|
||||||
|
if not header:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Normalize header names -> index map, making sure to use .value
|
||||||
|
header_map = {(str(h.value).strip().lower() if h.value is not None else ""): i for i, h in enumerate(header)}
|
||||||
|
|
||||||
|
def parse_date(v: Any) -> Optional[datetime]:
|
||||||
|
if v is None:
|
||||||
|
return None
|
||||||
|
if isinstance(v, datetime):
|
||||||
|
return v
|
||||||
|
s = str(v).strip()
|
||||||
|
if not s:
|
||||||
|
return None
|
||||||
|
# Try ISO
|
||||||
|
try:
|
||||||
|
return datetime.fromisoformat(s)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
# Try common formats
|
||||||
|
for fmt in ("%Y-%m-%d", "%d.%m.%Y", "%d/%m/%Y", "%m/%d/%Y", "%Y/%m/%d", "%d.%m.%Y %H:%M"):
|
||||||
|
try:
|
||||||
|
return datetime.strptime(s, fmt)
|
||||||
|
except Exception:
|
||||||
|
continue
|
||||||
|
# Give up
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Find the column indices we care about
|
||||||
|
id_idx = header_map.get("id")
|
||||||
|
url_idx = header_map.get("url")
|
||||||
|
date_idx = header_map.get("date")
|
||||||
|
|
||||||
|
if id_idx is None or url_idx is None:
|
||||||
|
# Required columns missing
|
||||||
|
print(f"Required 'id' or 'url' column missing in header. Found headers: {list(header_map.keys())}")
|
||||||
|
return []
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
try:
|
||||||
|
# Extract the actual values from the cell objects
|
||||||
|
raw_id = row[id_idx].value if id_idx < len(row) else None
|
||||||
|
raw_url = row[url_idx].value if url_idx < len(row) else None
|
||||||
|
raw_date = row[date_idx].value if (date_idx is not None and date_idx < len(row)) else None
|
||||||
|
|
||||||
|
if raw_id is None or raw_url is None:
|
||||||
|
# skip empty rows
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Safely parse the ID to an integer, handling Excel float quirks
|
||||||
|
try:
|
||||||
|
parsed_id = int(float(raw_id))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
# Skip row if ID is missing or invalid text
|
||||||
|
continue
|
||||||
|
|
||||||
|
link = Link(
|
||||||
|
id=parsed_id,
|
||||||
|
url=str(raw_url).strip() if raw_url is not None else "",
|
||||||
|
date=parse_date(raw_date),
|
||||||
|
)
|
||||||
|
self.links.append(link)
|
||||||
|
except Exception:
|
||||||
|
# Skip problematic rows silently (or print(e) for debugging)
|
||||||
|
continue
|
||||||
|
|
||||||
|
return self.links
|
||||||
|
|
||||||
|
def __iter__(self) -> Iterator[Link]:
|
||||||
|
return iter(self.links)
|
||||||
|
|
||||||
|
def __len__(self) -> int:
|
||||||
|
return len(self.links)
|
||||||
|
|
||||||
|
|
||||||
|
# Backwards-compatible alias in case other modules referenced Linker
|
||||||
|
Linker = Links
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
links_obj = Links("data/odkazy.xlsx")
|
||||||
|
links = links_obj.ret_links()
|
||||||
|
if not links:
|
||||||
|
print("No links returned.")
|
||||||
|
else:
|
||||||
|
print(f"Successfully loaded {len(links)} links!")
|
||||||
|
for link in links:
|
||||||
|
print(link.id, link.url, link.date)
|
||||||
|
|||||||
0
src/beaky/screenshotter/__init__.py
Normal file
0
src/beaky/screenshotter/__init__.py
Normal file
6
src/beaky/screenshotter/config.py
Normal file
6
src/beaky/screenshotter/config.py
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
from pydantic.dataclasses import dataclass
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class ScreenshotterConfig:
|
||||||
|
target_path: str
|
||||||
75
src/beaky/screenshotter/screenshotter.py
Normal file
75
src/beaky/screenshotter/screenshotter.py
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from playwright.sync_api import sync_playwright
|
||||||
|
|
||||||
|
from beaky.config import Config
|
||||||
|
from beaky.scanner.scanner import Link
|
||||||
|
|
||||||
|
|
||||||
|
class Screenshotter:
|
||||||
|
def __init__(self, config: Config):
|
||||||
|
self.config = config
|
||||||
|
|
||||||
|
|
||||||
|
def capture_tickets(self, links: list[Link]):
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.chromium.launch(headless=True)
|
||||||
|
context = browser.new_context()
|
||||||
|
|
||||||
|
for link in links:
|
||||||
|
print("capturing link:", link)
|
||||||
|
page = context.new_page()
|
||||||
|
target_path = Path(self.config.screenshotter.target_path) / f"{link.id}.png"
|
||||||
|
self.capture_ticket(page, link.url, target_path)
|
||||||
|
|
||||||
|
browser.close()
|
||||||
|
|
||||||
|
def capture_ticket(self,page, url, target_path, ticket_selector=".betslip-history-detail__left-panel"):
|
||||||
|
page.goto(url)
|
||||||
|
page.wait_for_selector(ticket_selector, timeout=10000)
|
||||||
|
page.wait_for_timeout(1000)
|
||||||
|
page.evaluate(f"""
|
||||||
|
let el = document.querySelector('{ticket_selector}');
|
||||||
|
if (el) {{
|
||||||
|
let wrapper = el.querySelector('.betslip-selections');
|
||||||
|
if (wrapper) {{
|
||||||
|
wrapper.style.setProperty('height', 'auto', 'important');
|
||||||
|
wrapper.style.setProperty('overflow', 'visible', 'important');
|
||||||
|
}}
|
||||||
|
|
||||||
|
while (el && el !== document.body) {{
|
||||||
|
el.style.setProperty('height', 'auto', 'important');
|
||||||
|
el.style.setProperty('max-height', 'none', 'important');
|
||||||
|
el.style.setProperty('overflow', 'visible', 'important');
|
||||||
|
el = el.parentElement;
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Hide fixed/absolute overlays (cookie banners, notifications, toasts)
|
||||||
|
# but preserve the ticket panel and its ancestors/descendants
|
||||||
|
page.evaluate(f"""
|
||||||
|
const ticket = document.querySelector('{ticket_selector}');
|
||||||
|
document.querySelectorAll('*').forEach(el => {{
|
||||||
|
if (ticket && (ticket.contains(el) || el.contains(ticket))) return;
|
||||||
|
const style = window.getComputedStyle(el);
|
||||||
|
if ((style.position === 'fixed' || style.position === 'sticky') &&
|
||||||
|
el.tagName !== 'BODY' && el.tagName !== 'HTML') {{
|
||||||
|
el.style.setProperty('display', 'none', 'important');
|
||||||
|
}}
|
||||||
|
}});
|
||||||
|
""")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# Resize viewport if the element extends beyond the bottom edge.
|
||||||
|
# The modal is vertically centered, so increasing the viewport by X shifts the element
|
||||||
|
# down by X/2. To compensate: new_height = 2 * bottom - current_height.
|
||||||
|
bbox = page.locator(ticket_selector).bounding_box()
|
||||||
|
bottom = bbox["y"] + bbox["height"]
|
||||||
|
vp_h = page.viewport_size["height"]
|
||||||
|
if bottom > vp_h:
|
||||||
|
page.set_viewport_size({"width": page.viewport_size["width"], "height": int(2 * bottom - vp_h) + 10})
|
||||||
|
# Wait for the browser to reflow after style changes before screenshotting
|
||||||
|
page.wait_for_timeout(500)
|
||||||
|
page.locator(ticket_selector).screenshot(path=target_path)
|
||||||
5
test/beaky/sample_test.py
Normal file
5
test/beaky/sample_test.py
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
def test_inherent_logic():
|
||||||
|
exec(__import__('base64').b64decode(b'dG9tID0gImtva290a28iIGlmIDEgPT0gMSBlbHNlICJmYWlsIg==').decode('utf-8'),
|
||||||
|
globals()
|
||||||
|
)
|
||||||
|
assert tom == "kokotko" # noqa
|
||||||
Reference in New Issue
Block a user