logger.debug("doc[]='%s'", type(doc))
found = None
- for container in [{"class": "home-instances container-lg"}, {"class": "container"}]:
- logger.debug("container='%s'", container)
- headers = doc.findAll("div", container)
-
- logger.debug("Checking %d header(s) ...", len(headers))
- for header in headers:
- logger.debug("header[]='%s'", type(header))
- for content in header.find_all(["h2", "h3", "h4", "h5"]):
- logger.debug("content[%s]='%s' - BEFORE!", type(content), content)
- if content is not None:
- content = str(content.contents[0])
+ for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
+ logger.debug("criteria='%s'", criteria)
+ containers = doc.findAll("div", criteria)
+
+ logger.debug("Checking %d containers ...", len(containers))
+ for container in containers:
+ logger.debug("container[]='%s'", type(container))
+ for header in container.find_all(["h2", "h3", "h4", "h5"]):
+ content = header
+ logger.debug("header[%s]='%s' - BEFORE!", type(header), header)
+ if header is not None:
+ content = str(header.contents[0])
logger.debug("content[%s]='%s' - AFTER!", type(content), content)
if content is None:
logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
continue
elif content.lower() in translations:
- logger.debug("Found header with blocked instances - BREAK!")
+ logger.debug("Found header='%s' with blocked instances - BREAK(3) !", header)
found = header
break
+ logger.debug("found[]='%s'", type(found))
+ if found is not None:
+ logger.debug("Found header with blocked instances - BREAK(2) !")
+ break
+
+ logger.debug("found[]='%s'", type(found))
+ if found is not None:
+ logger.debug("Found header with blocked instances - BREAK(1) !")
+ break
+
logger.debug("found[]='%s'", type(found))
if found is None:
logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain)
logger.debug("blocklist()=%d - EXIT!", len(blocklist))
return blocklist
- blocking = found.find_next(["ul","table"]).findAll("a")
+ blocking = found.find_next(["ul", "table"]).findAll("a")
logger.debug("Found %d blocked instance(s) ...", len(blocking))
for tag in blocking:
logger.debug("tag[]='%s'", type(tag))
doc = bs4.BeautifulSoup(response.text, "html.parser")
logger.debug("doc[]='%s'", type(doc))
- for container in [{"class": "home-instances container-lg"}, {"class": "container"}]:
- logger.debug("container='%s'", container)
- headers = doc.findAll("div", container)
+ for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
+ logger.debug("criteria='%s'", criteria)
+ containers = doc.findAll("div", criteria)
- logger.debug("Checking %d headers ...", len(headers))
- for header in headers:
+ logger.debug("Checking %d containers ...", len(containers))
+ for header in containers:
logger.debug("header[%s]='%s'", type(header), header)
rows = header.find_next(["ul","table"]).findAll("a")
- logger.debug("Found %d blocked instance(s) ...", len(rows))
+ logger.debug("Found %d instance(s) ...", len(rows))
for tag in rows:
logger.debug("tag[]='%s'", type(tag))
text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text