- need to skip invalid table headers, they should be introduced with <thead>
and then each column <th> but some website may use <tr> instead of <thead>
- strip (trim) strings
tds = line.find_all("td")
logger.debug("tds[%s]()=%d", type(tds), len(tds))
+ if len(tds) == 0:
+ logger.warning("No 'td' tag found in line[]='%s' - SKIPPED!", type(line))
+ continue
+
blocked = tds[0].text.strip()
reason = tds[1].text.strip()
tds = line.find_all("td")
logger.debug("tds[%s]()=%d", type(tds), len(tds))
- blocked = tds[0].text
- reason = tds[1].text
+ if len(tds) == 0:
+ logger.warning("No 'td' tag found in line[]='%s' - SKIPPED!", type(line))
+ continue
+
+ blocked = tds[0].text.strip()
+ reason = tds[1].text.strip()
logger.debug("blocked='%s',reason='%s' - BEFORE!", blocked, reason)
blocked = tidyup.domain(blocked) if blocked != "" else None