Scanner debug part 1

2026-03-21 18:26:29 +01:00
parent 6c096e4300
commit 1f9997b430
1 changed files with 33 additions and 13 deletions
--- a/src/beaky/scanner/scanner.py
+++ b/src/beaky/scanner/scanner.py
@@ -1,5 +1,5 @@
 from datetime import datetime
-from typing import Iterator, List, Optional
+from typing import Any, Iterator, List, Optional

 from openpyxl import load_workbook
 from pydantic.dataclasses import dataclass
@@ -9,7 +9,15 @@ from beaky.config import Config

@dataclass
 class Link:
-    id: str
+    """Represents a single link row from an Excel sheet.
+
+    Attributes:
+        id: identifier from the sheet (kept as string)
+        url: link to the web page
+        date: optional creation date (datetime or None)
+    """
+
+    id: int
    url: str
    date: Optional[datetime] = None

@@ -26,15 +34,16 @@ class Links:
        """Read the Excel file at self._path and populate self.links.

        Expects the first sheet to contain a header row with columns that include
-        at least: 'id', 'link' (or 'url'), and 'date' (case-insensitive). The
-        method will attempt to parse dates and will store them as datetime when
-        possible; missing or unparsable dates become None.
+        at least: 'id', 'link' (or 'url'), and optionally 'date' (case-insensitive).
+        Returns the list of Link objects (also stored in self.links).
        """
+        print("started ret_links()")
        wb = load_workbook(filename=self._path, read_only=True, data_only=True)
        ws = wb.active

        # Read header row
-        rows = ws.iter_rows(values_only=True)
+        rows = ws.rows
+
        try:
            header = next(rows)
        except StopIteration:
@@ -44,10 +53,9 @@ class Links:
            return []

        # Normalize header names -> index map
-        header_map = {(str(h).strip().lower() if h is not None else ""): i for i, h in enumerate(header) }
+        header_map = {(str(h).strip().lower() if h is not None else ""): i for i, h in enumerate(header)}

-        # Helper to parse date-like values
-        def parse_date(v: None | datetime) -> Optional[datetime]:
+        def parse_date(v: Any) -> Optional[datetime]:
            if v is None:
                return None
            if isinstance(v, datetime):
@@ -71,12 +79,14 @@ class Links:

        # Find the column indices we care about
        id_idx = header_map.get("id")
-        url_idx = header_map.get("link")
+        url_idx = header_map.get("url")
        date_idx = header_map.get("date")

        if id_idx is None or url_idx is None:
            # Required columns missing
+            print("Required 'id' or 'url' column missing in header")
            return []
+
        print(rows)
        for row in rows:
            print(row)
@@ -89,8 +99,11 @@ class Links:
                    # skip empty rows
                    continue

-                link = Link(id=str(raw_id).strip() if raw_id is not None else "",
-                             url=str(raw_url).strip() if raw_url is not None else "", date=parse_date(raw_date))
+                link = Link(
+                    id=str(raw_id).strip() if raw_id is not None else "",
+                    url=str(raw_url).strip() if raw_url is not None else "",
+                    date=parse_date(raw_date),
+                )
                self.links.append(link)
            except Exception:
                # Skip problematic rows silently
@@ -107,3 +120,10 @@ class Links:

 # Backwards-compatible alias in case other modules referenced Linker
 Linker = Links
+
+
+if __name__ == "__main__":
+    links_obj = Links("data/odkazy.xlsx")
+    links = links_obj.ret_links()
+    if not links:
+        print("No links returned.")