Screenshotter

This commit is contained in:
2026-03-21 17:58:06 +01:00
parent 5126a985bf
commit 86e0bc8e51
9 changed files with 63 additions and 28 deletions

View File

@@ -1 +1,4 @@
path: data/odkazy.xlsx path: data/odkazy.xlsx
screenshotter:
target_path: data/screenshots/

View File

@@ -1,10 +0,0 @@
from playwright.sync_api import sync_playwright
def capture_ticket(url, path, ticket_selector = ".ticket-detail-wrapper"):
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
page.goto(url)
page.wait_for_selector(ticket_selector)
page.locator(ticket_selector).screenshot(path=path)
browser.close()

View File

@@ -13,6 +13,7 @@ dependencies = [
"pandas==3.0.1", "pandas==3.0.1",
"openpyxl>=3.1.0", "openpyxl>=3.1.0",
"PyYaml==6.0.3", "PyYaml==6.0.3",
"playwright==1.58.0"
] ]
[project.optional-dependencies] [project.optional-dependencies]

View File

@@ -1,32 +1,38 @@
import argparse import argparse
from datetime import datetime
import yaml import yaml
from pydantic import ValidationError from pydantic import ValidationError
from beaky.config import Config from beaky.config import Config
from beaky.scanner.scanner import Links from beaky.scanner.scanner import Link
from beaky.screenshotter.screenshotter import Screenshotter
def main() -> None:
parser = argparse.ArgumentParser(
prog="beaky"
)
parser.add_argument("--config", help="Path to config file.", default="config/application.yml") def load_config(path: str) -> Config | None:
args = parser.parse_args() with open(path) as f:
with open(args.config) as f:
config_dict = yaml.safe_load(f) config_dict = yaml.safe_load(f)
try: try:
config = Config(**config_dict) return Config(**config_dict)
except ValidationError as e: except ValidationError as e:
print("Bad arguments") print("Bad config")
print(e) print(e)
return None
def main() -> None:
parser = argparse.ArgumentParser(prog="beaky")
parser.add_argument("--config", help="Path to config file.", default="config/application.yml")
parser.add_argument("mode", choices=["screenshotter"], help="Mode of operation.")
args = parser.parse_args()
config = load_config(args.config)
if config is None:
return return
data = Links(config.path) if args.mode == "screenshotter":
data.ret_links() screenshotter = Screenshotter(config)
for link in data: screenshotter.capture_tickets([Link("1",
print(link) "https://applink.ifortuna.cz/ticketdetail?id=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJDRkUwUUJNNktDMzQyUjAwIiwicHJmIjoiUFVCTElDIiwiaXNzIjoiYmV0c2xpcC1zZXJ2aWNlIiwiaWF0IjoxNzcyODc2NTk0fQ.QGiBJRINDsSVKQn3WKRa7XDql5wiLDOG8R7QKc2bD-0&source=SB&deeplink=ftncz%3A%2F%2Fbetslip-history%2Fdetail%3Fid%3DeyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJDRkUwUUJNNktDMzQyUjAwIiwicHJmIjoiUFVCTElDIiwiaXNzIjoiYmV0c2xpcC1zZXJ2aWNlIiwiaWF0IjoxNzcyODc2NTk0fQ.QGiBJRINDsSVKQn3WKRa7XDql5wiLDOG8R7QKc2bD-0%26source%3DSB",
datetime.now())])
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View File

@@ -1,6 +1,9 @@
from pydantic.dataclasses import dataclass from pydantic.dataclasses import dataclass
from beaky.screenshotter.config import ScreenshotterConfig
@dataclass @dataclass
class Config: class Config:
path: str path: str
screenshotter: ScreenshotterConfig

View File

@@ -77,8 +77,9 @@ class Links:
if id_idx is None or url_idx is None: if id_idx is None or url_idx is None:
# Required columns missing # Required columns missing
return [] return []
print(rows)
for row in rows: for row in rows:
print(row)
try: try:
raw_id = row[id_idx] if id_idx < len(row) else None raw_id = row[id_idx] if id_idx < len(row) else None
raw_url = row[url_idx] if url_idx < len(row) else None raw_url = row[url_idx] if url_idx < len(row) else None

View File

View File

@@ -0,0 +1,5 @@
from pydantic.dataclasses import dataclass
@dataclass
class ScreenshotterConfig:
target_path: str

View File

@@ -0,0 +1,26 @@
from pathlib import Path
from beaky.config import Config
from playwright.sync_api import sync_playwright
from beaky.scanner.scanner import Link
class Screenshotter:
def __init__(self, config: Config):
self.config = config
def capture_tickets(self, links: list[Link]):
for link in links:
print("capturing link:", link)
target_path = Path(self.config.screenshotter.target_path) / f"{link.id}.png"
self.capture_ticket(link.url, target_path)
def capture_ticket(self, url, target_path, ticket_selector=".betslip-history-detail"):
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
page.goto(url, wait_until="domcontentloaded")
page.wait_for_selector(ticket_selector)
page.wait_for_load_state("networkidle")
page.locator(ticket_selector).screenshot(path=target_path)
browser.close()