logger.debug("Parsing %d tags ...", len(tags))
for tag in tags:
logger.debug("tag[]='%s'", type(tag))
- domain = tidyup.domain(tag.find(search).contents[0])
- logger.debug("domain='%s' - AFTER!", domain)
+ domain = tag.find(search).contents[0]
+ logger.debug("domain='%s' - BEFORE! #1", domain)
+ if domain not in ["", None]:
+ domain = tidyup.domain(domain)
+ logger.debug("domain='%s' - AFTER! #2", domain)
if domain == "":
logger.debug("tag='%s' has no domain, trying <em> ...", tag)
- domain = tidyup.domain(tag.find("em").contents[0])
- logger.debug("domain='%s' - AFTER!", domain)
+ domain = tag.find("em").contents[0]
+ logger.debug("domain='%s' - BEFORE! #2", domain)
+ if domain not in ["", None]:
+ domain = tidyup.domain(domain)
+ logger.debug("domain='%s' - AFTER! #2", domain)
logger.debug("domain='%s' - AFTER2!", domain)
if domain == "":
logger.warning("Empty domain after checking search='%s' and <em> tags - SKIPPED!", search)
continue
+ elif domain == "noagendasocial.com/noagenda.social":
+ logger.debug("domain='%s' is a double-domain entry, adding all ...", domain)
+ add_all_to_list(domains, domain, "/")
+
+ logger.debug("domain='%s' - SKIPPING!", domain)
+ continue
+ elif "," in domain:
+ logger.debug("domain='%s' contains a comma-separated list of domains, adding all ...", domain)
+ add_all_to_list(domains, domain, ",")
+
+ logger.debug("domain='%s' - SKIPPING!", domain)
+ continue
+ elif not validators.domain(domain, rfc_2782=True):
+ logger.warning("domain='%s' is not a valid domain - SKIPPED!", domain)
+ continue
logger.debug("domain='%s' - BEFORE!", domain)
domain = domain_helper.encode_idna(domain)
logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
continue
- logger.debug("Appending domain='%s'", domain)
+ logger.debug("Appending domain='%s' ...", domain)
domains.append(domain)
logger.debug("domains()=%d - EXIT!", len(domains))
return domains
+def add_all_to_list(domains: list, source: str, splitter: str) -> None:
+ logger.debug("domains()=%d,source='%s',splitter='%s' - CALLED!")
+ if not isinstance(domains, list):
+ raise TypeError(f"Parameter domains[]='{type(domains)}' is not type 'list'")
+ elif not isinstance(source, str):
+ raise TypeError(f"Parameter source[]='{type(source)}' is not type 'list'")
+ elif source == "":
+ raise ValueError("Parameter 'source' is empty")
+ elif not isinstance(splitter, str):
+ raise TypeError(f"Parameter splitter[]='{type(splitter)}' is not type 'list'")
+ elif splitter == "":
+ raise ValueError("Parameter 'splitter' is empty")
+
+ for domain in source.split(splitter):
+ logger.debug("domain='%s' - LOOP!", domain)
+ domain = domain.strip()
+ if not domain_helper.is_wanted(domain):
+ logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
+ continue
+
+ logger.debug("Appending domain='%s' ...", domain)
+ domains.append(domain)
+
+ logger.debug("EXIT!")
+
def deobfuscate(domain: str, blocker: str, domain_hash: str = None) -> str:
logger.debug("domain='%s',blocker='%s',domain_hash='%s' - CALLED!", domain, blocker, domain_hash)
domain_helper.raise_on(blocker)