Scanner debug part 2 (done)

This commit is contained in:
Chlupaty
2026-03-21 18:48:54 +01:00
parent 8adc374408
commit abb59aabe3

View File

@@ -12,7 +12,7 @@ class Link:
"""Represents a single link row from an Excel sheet. """Represents a single link row from an Excel sheet.
Attributes: Attributes:
id: identifier from the sheet (kept as string) id: identifier from the sheet (cast to int)
url: link to the web page url: link to the web page
date: optional creation date (datetime or None) date: optional creation date (datetime or None)
""" """
@@ -52,8 +52,8 @@ class Links:
if not header: if not header:
return [] return []
# Normalize header names -> index map # Normalize header names -> index map, making sure to use .value
header_map = {(str(h).strip().lower() if h is not None else ""): i for i, h in enumerate(header)} header_map = {(str(h.value).strip().lower() if h.value is not None else ""): i for i, h in enumerate(header)}
def parse_date(v: Any) -> Optional[datetime]: def parse_date(v: Any) -> Optional[datetime]:
if v is None: if v is None:
@@ -84,29 +84,35 @@ class Links:
if id_idx is None or url_idx is None: if id_idx is None or url_idx is None:
# Required columns missing # Required columns missing
print("Required 'id' or 'url' column missing in header") print(f"Required 'id' or 'url' column missing in header. Found headers: {list(header_map.keys())}")
return [] return []
print(rows)
for row in rows: for row in rows:
print(row)
try: try:
raw_id = row[id_idx] if id_idx < len(row) else None # Extract the actual values from the cell objects
raw_url = row[url_idx] if url_idx < len(row) else None raw_id = row[id_idx].value if id_idx < len(row) else None
raw_date = row[date_idx] if (date_idx is not None and date_idx < len(row)) else None raw_url = row[url_idx].value if url_idx < len(row) else None
raw_date = row[date_idx].value if (date_idx is not None and date_idx < len(row)) else None
if raw_id is None and raw_url is None: if raw_id is None or raw_url is None:
# skip empty rows # skip empty rows
continue continue
# Safely parse the ID to an integer, handling Excel float quirks
try:
parsed_id = int(float(raw_id))
except (ValueError, TypeError):
# Skip row if ID is missing or invalid text
continue
link = Link( link = Link(
id=str(raw_id).strip() if raw_id is not None else "", id=parsed_id,
url=str(raw_url).strip() if raw_url is not None else "", url=str(raw_url).strip() if raw_url is not None else "",
date=parse_date(raw_date), date=parse_date(raw_date),
) )
self.links.append(link) self.links.append(link)
except Exception: except Exception:
# Skip problematic rows silently # Skip problematic rows silently (or print(e) for debugging)
continue continue
return self.links return self.links
@@ -127,3 +133,7 @@ if __name__ == "__main__":
links = links_obj.ret_links() links = links_obj.ret_links()
if not links: if not links:
print("No links returned.") print("No links returned.")
else:
print(f"Successfully loaded {len(links)} links!")
for link in links:
print(link.id, link.url, link.date)