Scanner debug part 1
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from typing import Iterator, List, Optional
|
from typing import Any, Iterator, List, Optional
|
||||||
|
|
||||||
from openpyxl import load_workbook
|
from openpyxl import load_workbook
|
||||||
from pydantic.dataclasses import dataclass
|
from pydantic.dataclasses import dataclass
|
||||||
@@ -9,7 +9,15 @@ from beaky.config import Config
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Link:
|
class Link:
|
||||||
id: str
|
"""Represents a single link row from an Excel sheet.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
id: identifier from the sheet (kept as string)
|
||||||
|
url: link to the web page
|
||||||
|
date: optional creation date (datetime or None)
|
||||||
|
"""
|
||||||
|
|
||||||
|
id: int
|
||||||
url: str
|
url: str
|
||||||
date: Optional[datetime] = None
|
date: Optional[datetime] = None
|
||||||
|
|
||||||
@@ -26,15 +34,16 @@ class Links:
|
|||||||
"""Read the Excel file at self._path and populate self.links.
|
"""Read the Excel file at self._path and populate self.links.
|
||||||
|
|
||||||
Expects the first sheet to contain a header row with columns that include
|
Expects the first sheet to contain a header row with columns that include
|
||||||
at least: 'id', 'link' (or 'url'), and 'date' (case-insensitive). The
|
at least: 'id', 'link' (or 'url'), and optionally 'date' (case-insensitive).
|
||||||
method will attempt to parse dates and will store them as datetime when
|
Returns the list of Link objects (also stored in self.links).
|
||||||
possible; missing or unparsable dates become None.
|
|
||||||
"""
|
"""
|
||||||
|
print("started ret_links()")
|
||||||
wb = load_workbook(filename=self._path, read_only=True, data_only=True)
|
wb = load_workbook(filename=self._path, read_only=True, data_only=True)
|
||||||
ws = wb.active
|
ws = wb.active
|
||||||
|
|
||||||
# Read header row
|
# Read header row
|
||||||
rows = ws.iter_rows(values_only=True)
|
rows = ws.rows
|
||||||
|
|
||||||
try:
|
try:
|
||||||
header = next(rows)
|
header = next(rows)
|
||||||
except StopIteration:
|
except StopIteration:
|
||||||
@@ -44,10 +53,9 @@ class Links:
|
|||||||
return []
|
return []
|
||||||
|
|
||||||
# Normalize header names -> index map
|
# Normalize header names -> index map
|
||||||
header_map = {(str(h).strip().lower() if h is not None else ""): i for i, h in enumerate(header) }
|
header_map = {(str(h).strip().lower() if h is not None else ""): i for i, h in enumerate(header)}
|
||||||
|
|
||||||
# Helper to parse date-like values
|
def parse_date(v: Any) -> Optional[datetime]:
|
||||||
def parse_date(v: None | datetime) -> Optional[datetime]:
|
|
||||||
if v is None:
|
if v is None:
|
||||||
return None
|
return None
|
||||||
if isinstance(v, datetime):
|
if isinstance(v, datetime):
|
||||||
@@ -71,12 +79,14 @@ class Links:
|
|||||||
|
|
||||||
# Find the column indices we care about
|
# Find the column indices we care about
|
||||||
id_idx = header_map.get("id")
|
id_idx = header_map.get("id")
|
||||||
url_idx = header_map.get("link")
|
url_idx = header_map.get("url")
|
||||||
date_idx = header_map.get("date")
|
date_idx = header_map.get("date")
|
||||||
|
|
||||||
if id_idx is None or url_idx is None:
|
if id_idx is None or url_idx is None:
|
||||||
# Required columns missing
|
# Required columns missing
|
||||||
|
print("Required 'id' or 'url' column missing in header")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
print(rows)
|
print(rows)
|
||||||
for row in rows:
|
for row in rows:
|
||||||
print(row)
|
print(row)
|
||||||
@@ -89,8 +99,11 @@ class Links:
|
|||||||
# skip empty rows
|
# skip empty rows
|
||||||
continue
|
continue
|
||||||
|
|
||||||
link = Link(id=str(raw_id).strip() if raw_id is not None else "",
|
link = Link(
|
||||||
url=str(raw_url).strip() if raw_url is not None else "", date=parse_date(raw_date))
|
id=str(raw_id).strip() if raw_id is not None else "",
|
||||||
|
url=str(raw_url).strip() if raw_url is not None else "",
|
||||||
|
date=parse_date(raw_date),
|
||||||
|
)
|
||||||
self.links.append(link)
|
self.links.append(link)
|
||||||
except Exception:
|
except Exception:
|
||||||
# Skip problematic rows silently
|
# Skip problematic rows silently
|
||||||
@@ -107,3 +120,10 @@ class Links:
|
|||||||
|
|
||||||
# Backwards-compatible alias in case other modules referenced Linker
|
# Backwards-compatible alias in case other modules referenced Linker
|
||||||
Linker = Links
|
Linker = Links
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
links_obj = Links("data/odkazy.xlsx")
|
||||||
|
links = links_obj.ret_links()
|
||||||
|
if not links:
|
||||||
|
print("No links returned.")
|
||||||
|
|||||||
Reference in New Issue
Block a user