Scanner debug part 2 (done)

This commit is contained in:
Chlupaty
2026-03-21 18:48:54 +01:00
parent 8adc374408
commit abb59aabe3

View File

@@ -12,7 +12,7 @@ class Link:
"""Represents a single link row from an Excel sheet.
Attributes:
id: identifier from the sheet (kept as string)
id: identifier from the sheet (cast to int)
url: link to the web page
date: optional creation date (datetime or None)
"""
@@ -52,8 +52,8 @@ class Links:
if not header:
return []
# Normalize header names -> index map
header_map = {(str(h).strip().lower() if h is not None else ""): i for i, h in enumerate(header)}
# Normalize header names -> index map, making sure to use .value
header_map = {(str(h.value).strip().lower() if h.value is not None else ""): i for i, h in enumerate(header)}
def parse_date(v: Any) -> Optional[datetime]:
if v is None:
@@ -84,29 +84,35 @@ class Links:
if id_idx is None or url_idx is None:
# Required columns missing
print("Required 'id' or 'url' column missing in header")
print(f"Required 'id' or 'url' column missing in header. Found headers: {list(header_map.keys())}")
return []
print(rows)
for row in rows:
print(row)
try:
raw_id = row[id_idx] if id_idx < len(row) else None
raw_url = row[url_idx] if url_idx < len(row) else None
raw_date = row[date_idx] if (date_idx is not None and date_idx < len(row)) else None
# Extract the actual values from the cell objects
raw_id = row[id_idx].value if id_idx < len(row) else None
raw_url = row[url_idx].value if url_idx < len(row) else None
raw_date = row[date_idx].value if (date_idx is not None and date_idx < len(row)) else None
if raw_id is None and raw_url is None:
if raw_id is None or raw_url is None:
# skip empty rows
continue
# Safely parse the ID to an integer, handling Excel float quirks
try:
parsed_id = int(float(raw_id))
except (ValueError, TypeError):
# Skip row if ID is missing or invalid text
continue
link = Link(
id=str(raw_id).strip() if raw_id is not None else "",
id=parsed_id,
url=str(raw_url).strip() if raw_url is not None else "",
date=parse_date(raw_date),
)
self.links.append(link)
except Exception:
# Skip problematic rows silently
# Skip problematic rows silently (or print(e) for debugging)
continue
return self.links
@@ -127,3 +133,7 @@ if __name__ == "__main__":
links = links_obj.ret_links()
if not links:
print("No links returned.")
else:
print(f"Successfully loaded {len(links)} links!")
for link in links:
print(link.id, link.url, link.date)