Scanner debug part 2 (done)
This commit is contained in:
@@ -12,7 +12,7 @@ class Link:
|
|||||||
"""Represents a single link row from an Excel sheet.
|
"""Represents a single link row from an Excel sheet.
|
||||||
|
|
||||||
Attributes:
|
Attributes:
|
||||||
id: identifier from the sheet (kept as string)
|
id: identifier from the sheet (cast to int)
|
||||||
url: link to the web page
|
url: link to the web page
|
||||||
date: optional creation date (datetime or None)
|
date: optional creation date (datetime or None)
|
||||||
"""
|
"""
|
||||||
@@ -52,8 +52,8 @@ class Links:
|
|||||||
if not header:
|
if not header:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Normalize header names -> index map
|
# Normalize header names -> index map, making sure to use .value
|
||||||
header_map = {(str(h).strip().lower() if h is not None else ""): i for i, h in enumerate(header)}
|
header_map = {(str(h.value).strip().lower() if h.value is not None else ""): i for i, h in enumerate(header)}
|
||||||
|
|
||||||
def parse_date(v: Any) -> Optional[datetime]:
|
def parse_date(v: Any) -> Optional[datetime]:
|
||||||
if v is None:
|
if v is None:
|
||||||
@@ -84,29 +84,35 @@ class Links:
|
|||||||
|
|
||||||
if id_idx is None or url_idx is None:
|
if id_idx is None or url_idx is None:
|
||||||
# Required columns missing
|
# Required columns missing
|
||||||
print("Required 'id' or 'url' column missing in header")
|
print(f"Required 'id' or 'url' column missing in header. Found headers: {list(header_map.keys())}")
|
||||||
return []
|
return []
|
||||||
|
|
||||||
print(rows)
|
|
||||||
for row in rows:
|
for row in rows:
|
||||||
print(row)
|
|
||||||
try:
|
try:
|
||||||
raw_id = row[id_idx] if id_idx < len(row) else None
|
# Extract the actual values from the cell objects
|
||||||
raw_url = row[url_idx] if url_idx < len(row) else None
|
raw_id = row[id_idx].value if id_idx < len(row) else None
|
||||||
raw_date = row[date_idx] if (date_idx is not None and date_idx < len(row)) else None
|
raw_url = row[url_idx].value if url_idx < len(row) else None
|
||||||
|
raw_date = row[date_idx].value if (date_idx is not None and date_idx < len(row)) else None
|
||||||
|
|
||||||
if raw_id is None and raw_url is None:
|
if raw_id is None or raw_url is None:
|
||||||
# skip empty rows
|
# skip empty rows
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# Safely parse the ID to an integer, handling Excel float quirks
|
||||||
|
try:
|
||||||
|
parsed_id = int(float(raw_id))
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
# Skip row if ID is missing or invalid text
|
||||||
|
continue
|
||||||
|
|
||||||
link = Link(
|
link = Link(
|
||||||
id=str(raw_id).strip() if raw_id is not None else "",
|
id=parsed_id,
|
||||||
url=str(raw_url).strip() if raw_url is not None else "",
|
url=str(raw_url).strip() if raw_url is not None else "",
|
||||||
date=parse_date(raw_date),
|
date=parse_date(raw_date),
|
||||||
)
|
)
|
||||||
self.links.append(link)
|
self.links.append(link)
|
||||||
except Exception:
|
except Exception:
|
||||||
# Skip problematic rows silently
|
# Skip problematic rows silently (or print(e) for debugging)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
return self.links
|
return self.links
|
||||||
@@ -127,3 +133,7 @@ if __name__ == "__main__":
|
|||||||
links = links_obj.ret_links()
|
links = links_obj.ret_links()
|
||||||
if not links:
|
if not links:
|
||||||
print("No links returned.")
|
print("No links returned.")
|
||||||
|
else:
|
||||||
|
print(f"Successfully loaded {len(links)} links!")
|
||||||
|
for link in links:
|
||||||
|
print(link.id, link.url, link.date)
|
||||||
|
|||||||
Reference in New Issue
Block a user