From 1f9997b430b9ff5437272cba247bfe0651aa7156 Mon Sep 17 00:00:00 2001 From: Chlupaty Date: Sat, 21 Mar 2026 18:26:29 +0100 Subject: [PATCH] Scanner debug part 1 --- src/beaky/scanner/scanner.py | 46 ++++++++++++++++++++++++++---------- 1 file changed, 33 insertions(+), 13 deletions(-) diff --git a/src/beaky/scanner/scanner.py b/src/beaky/scanner/scanner.py index a55de15..dd10937 100644 --- a/src/beaky/scanner/scanner.py +++ b/src/beaky/scanner/scanner.py @@ -1,5 +1,5 @@ from datetime import datetime -from typing import Iterator, List, Optional +from typing import Any, Iterator, List, Optional from openpyxl import load_workbook from pydantic.dataclasses import dataclass @@ -9,7 +9,15 @@ from beaky.config import Config @dataclass class Link: - id: str + """Represents a single link row from an Excel sheet. + + Attributes: + id: identifier from the sheet (kept as string) + url: link to the web page + date: optional creation date (datetime or None) + """ + + id: int url: str date: Optional[datetime] = None @@ -26,15 +34,16 @@ class Links: """Read the Excel file at self._path and populate self.links. Expects the first sheet to contain a header row with columns that include - at least: 'id', 'link' (or 'url'), and 'date' (case-insensitive). The - method will attempt to parse dates and will store them as datetime when - possible; missing or unparsable dates become None. + at least: 'id', 'link' (or 'url'), and optionally 'date' (case-insensitive). + Returns the list of Link objects (also stored in self.links). """ + print("started ret_links()") wb = load_workbook(filename=self._path, read_only=True, data_only=True) ws = wb.active # Read header row - rows = ws.iter_rows(values_only=True) + rows = ws.rows + try: header = next(rows) except StopIteration: @@ -44,10 +53,9 @@ class Links: return [] # Normalize header names -> index map - header_map = {(str(h).strip().lower() if h is not None else ""): i for i, h in enumerate(header) } + header_map = {(str(h).strip().lower() if h is not None else ""): i for i, h in enumerate(header)} - # Helper to parse date-like values - def parse_date(v: None | datetime) -> Optional[datetime]: + def parse_date(v: Any) -> Optional[datetime]: if v is None: return None if isinstance(v, datetime): @@ -71,12 +79,14 @@ class Links: # Find the column indices we care about id_idx = header_map.get("id") - url_idx = header_map.get("link") + url_idx = header_map.get("url") date_idx = header_map.get("date") if id_idx is None or url_idx is None: # Required columns missing + print("Required 'id' or 'url' column missing in header") return [] + print(rows) for row in rows: print(row) @@ -89,8 +99,11 @@ class Links: # skip empty rows continue - link = Link(id=str(raw_id).strip() if raw_id is not None else "", - url=str(raw_url).strip() if raw_url is not None else "", date=parse_date(raw_date)) + link = Link( + id=str(raw_id).strip() if raw_id is not None else "", + url=str(raw_url).strip() if raw_url is not None else "", + date=parse_date(raw_date), + ) self.links.append(link) except Exception: # Skip problematic rows silently @@ -106,4 +119,11 @@ class Links: # Backwards-compatible alias in case other modules referenced Linker -Linker = Links \ No newline at end of file +Linker = Links + + +if __name__ == "__main__": + links_obj = Links("data/odkazy.xlsx") + links = links_obj.ret_links() + if not links: + print("No links returned.")