Scanner debug part 2 (done)
This commit is contained in:
@@ -12,7 +12,7 @@ class Link:
|
||||
"""Represents a single link row from an Excel sheet.
|
||||
|
||||
Attributes:
|
||||
id: identifier from the sheet (kept as string)
|
||||
id: identifier from the sheet (cast to int)
|
||||
url: link to the web page
|
||||
date: optional creation date (datetime or None)
|
||||
"""
|
||||
@@ -52,8 +52,8 @@ class Links:
|
||||
if not header:
|
||||
return []
|
||||
|
||||
# Normalize header names -> index map
|
||||
header_map = {(str(h).strip().lower() if h is not None else ""): i for i, h in enumerate(header)}
|
||||
# Normalize header names -> index map, making sure to use .value
|
||||
header_map = {(str(h.value).strip().lower() if h.value is not None else ""): i for i, h in enumerate(header)}
|
||||
|
||||
def parse_date(v: Any) -> Optional[datetime]:
|
||||
if v is None:
|
||||
@@ -84,29 +84,35 @@ class Links:
|
||||
|
||||
if id_idx is None or url_idx is None:
|
||||
# Required columns missing
|
||||
print("Required 'id' or 'url' column missing in header")
|
||||
print(f"Required 'id' or 'url' column missing in header. Found headers: {list(header_map.keys())}")
|
||||
return []
|
||||
|
||||
print(rows)
|
||||
for row in rows:
|
||||
print(row)
|
||||
try:
|
||||
raw_id = row[id_idx] if id_idx < len(row) else None
|
||||
raw_url = row[url_idx] if url_idx < len(row) else None
|
||||
raw_date = row[date_idx] if (date_idx is not None and date_idx < len(row)) else None
|
||||
# Extract the actual values from the cell objects
|
||||
raw_id = row[id_idx].value if id_idx < len(row) else None
|
||||
raw_url = row[url_idx].value if url_idx < len(row) else None
|
||||
raw_date = row[date_idx].value if (date_idx is not None and date_idx < len(row)) else None
|
||||
|
||||
if raw_id is None and raw_url is None:
|
||||
if raw_id is None or raw_url is None:
|
||||
# skip empty rows
|
||||
continue
|
||||
|
||||
# Safely parse the ID to an integer, handling Excel float quirks
|
||||
try:
|
||||
parsed_id = int(float(raw_id))
|
||||
except (ValueError, TypeError):
|
||||
# Skip row if ID is missing or invalid text
|
||||
continue
|
||||
|
||||
link = Link(
|
||||
id=str(raw_id).strip() if raw_id is not None else "",
|
||||
id=parsed_id,
|
||||
url=str(raw_url).strip() if raw_url is not None else "",
|
||||
date=parse_date(raw_date),
|
||||
)
|
||||
self.links.append(link)
|
||||
except Exception:
|
||||
# Skip problematic rows silently
|
||||
# Skip problematic rows silently (or print(e) for debugging)
|
||||
continue
|
||||
|
||||
return self.links
|
||||
@@ -127,3 +133,7 @@ if __name__ == "__main__":
|
||||
links = links_obj.ret_links()
|
||||
if not links:
|
||||
print("No links returned.")
|
||||
else:
|
||||
print(f"Successfully loaded {len(links)} links!")
|
||||
for link in links:
|
||||
print(link.id, link.url, link.date)
|
||||
|
||||
Reference in New Issue
Block a user