Scanner debug part 1

This commit is contained in:
Chlupaty
2026-03-21 18:26:29 +01:00
parent 6c096e4300
commit 1f9997b430

View File

@@ -1,5 +1,5 @@
from datetime import datetime from datetime import datetime
from typing import Iterator, List, Optional from typing import Any, Iterator, List, Optional
from openpyxl import load_workbook from openpyxl import load_workbook
from pydantic.dataclasses import dataclass from pydantic.dataclasses import dataclass
@@ -9,7 +9,15 @@ from beaky.config import Config
@dataclass @dataclass
class Link: class Link:
id: str """Represents a single link row from an Excel sheet.
Attributes:
id: identifier from the sheet (kept as string)
url: link to the web page
date: optional creation date (datetime or None)
"""
id: int
url: str url: str
date: Optional[datetime] = None date: Optional[datetime] = None
@@ -26,15 +34,16 @@ class Links:
"""Read the Excel file at self._path and populate self.links. """Read the Excel file at self._path and populate self.links.
Expects the first sheet to contain a header row with columns that include Expects the first sheet to contain a header row with columns that include
at least: 'id', 'link' (or 'url'), and 'date' (case-insensitive). The at least: 'id', 'link' (or 'url'), and optionally 'date' (case-insensitive).
method will attempt to parse dates and will store them as datetime when Returns the list of Link objects (also stored in self.links).
possible; missing or unparsable dates become None.
""" """
print("started ret_links()")
wb = load_workbook(filename=self._path, read_only=True, data_only=True) wb = load_workbook(filename=self._path, read_only=True, data_only=True)
ws = wb.active ws = wb.active
# Read header row # Read header row
rows = ws.iter_rows(values_only=True) rows = ws.rows
try: try:
header = next(rows) header = next(rows)
except StopIteration: except StopIteration:
@@ -44,10 +53,9 @@ class Links:
return [] return []
# Normalize header names -> index map # Normalize header names -> index map
header_map = {(str(h).strip().lower() if h is not None else ""): i for i, h in enumerate(header) } header_map = {(str(h).strip().lower() if h is not None else ""): i for i, h in enumerate(header)}
# Helper to parse date-like values def parse_date(v: Any) -> Optional[datetime]:
def parse_date(v: None | datetime) -> Optional[datetime]:
if v is None: if v is None:
return None return None
if isinstance(v, datetime): if isinstance(v, datetime):
@@ -71,12 +79,14 @@ class Links:
# Find the column indices we care about # Find the column indices we care about
id_idx = header_map.get("id") id_idx = header_map.get("id")
url_idx = header_map.get("link") url_idx = header_map.get("url")
date_idx = header_map.get("date") date_idx = header_map.get("date")
if id_idx is None or url_idx is None: if id_idx is None or url_idx is None:
# Required columns missing # Required columns missing
print("Required 'id' or 'url' column missing in header")
return [] return []
print(rows) print(rows)
for row in rows: for row in rows:
print(row) print(row)
@@ -89,8 +99,11 @@ class Links:
# skip empty rows # skip empty rows
continue continue
link = Link(id=str(raw_id).strip() if raw_id is not None else "", link = Link(
url=str(raw_url).strip() if raw_url is not None else "", date=parse_date(raw_date)) id=str(raw_id).strip() if raw_id is not None else "",
url=str(raw_url).strip() if raw_url is not None else "",
date=parse_date(raw_date),
)
self.links.append(link) self.links.append(link)
except Exception: except Exception:
# Skip problematic rows silently # Skip problematic rows silently
@@ -106,4 +119,11 @@ class Links:
# Backwards-compatible alias in case other modules referenced Linker # Backwards-compatible alias in case other modules referenced Linker
Linker = Links Linker = Links
if __name__ == "__main__":
links_obj = Links("data/odkazy.xlsx")
links = links_obj.ret_links()
if not links:
print("No links returned.")