import bs4
-from fba import utils
-
from fba.helpers import config
from fba.helpers import domain as domain_helper
from fba.helpers import tidyup
#logger.setLevel(logging.DEBUG)
def fetch_blocks(domain: str) -> list:
- logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
+ logger.debug("domain='%s' - CALLED!", domain)
domain_helper.raise_on(domain)
+ if not instances.is_registered(domain):
+ raise Exception(f"domain='{domain}' is not registered but function is invoked.")
+
blocklist = list()
block_tag = None
try:
logger.debug("Fetching friendica blocks from domain='%s'", domain)
- doc = bs4.BeautifulSoup(
- network.fetch_response(
- domain,
- "/friendica",
- network.web_headers,
- (config.get("connection_timeout"), config.get("read_timeout"))
- ).text,
- "html.parser",
- )
+ raw = network.fetch_response(
+ domain,
+ "/friendica",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ ).text
+ logger.debug("Parsing %d Bytes ...", len(raw))
+
+ doc = bs4.BeautifulSoup(raw, "html.parser",)
logger.debug("doc[]='%s'", type(doc))
block_tag = doc.find(id="about_blocklist")
+ logger.debug("block_tag[%s]='%s'", type(block_tag), block_tag)
except network.exceptions as exception:
logger.warning("Exception '%s' during fetching instances from domain='%s'", type(exception), domain)
instances.set_last_error(domain, exception)
+
+ logger.debug("Returning empty list ... - EXIT!")
return list()
- # Prevents exceptions:
+ logger.debug("block_tag[%s]='%s'", type(block_tag), block_tag)
if block_tag is None:
- logger.debug("Instance has no block list: domain='%s'", domain)
+ logger.debug("Instance has no block list: domain='%s' - EXIT!", domain)
return list()
table = block_tag.find("table")
logger.debug("table[]='%s'", type(table))
- if table.find("tbody"):
+ if table is None:
+ logger.warning("domain='%s' has no table tag - EXIT !", domain)
+ return list()
+ elif table.find("tbody"):
rows = table.find("tbody").find_all("tr")
else:
rows = table.find_all("tr")
logger.debug("Found rows()=%d", len(rows))
for line in rows:
logger.debug("line='%s'", line)
- blocked = tidyup.domain(line.find_all("td")[0].text)
+ blocked = line.find_all("td")[0].text
+ logger.debug("blocked='%s'", blocked)
+
+ blocked = tidyup.domain(blocked) if blocked != "" else None
reason = tidyup.reason(line.find_all("td")[1].text)
- logger.debug("blocked='%s',reason='%s'", blocked, reason)
+ logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
- if blocked == "":
- logger.debug("line[]='%s' returned empty blocked domain - SKIPPED!")
+ if blocked is None or blocked == "":
+ logger.warning("line[]='%s' returned empty blocked domain - SKIPPED!", type(line))
continue
- elif blocked.count("*") > 0:
- logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
- instances.set_has_obfuscation(domain, True)
-
- # Obscured domain name with no hash
- row = instances.deobfuscate("*", blocked)
-
- logger.debug("row[]='%s'", type(row))
- if row is None:
- logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
- continue
-
- logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
- blocked = row[0]
- elif blocked.count("?") > 0:
- logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
- instances.set_has_obfuscation(domain, True)
-
- # Obscured domain name with no hash
- row = instances.deobfuscate("?", blocked)
-
- logger.debug("row[]='%s'", type(row))
- if row is None:
- logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
- continue
-
- logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
- blocked = row[0]
-
- logger.debug("blocked[%s]='%s'", type(blocked), blocked)
- if not utils.is_domain_wanted(blocked):
+ elif not domain_helper.is_wanted(blocked):
logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
continue
- logger.debug(f"Appending blocked='{blocked}',reason='{reason}'")
+ logger.debug("Appending blocked='%s',reason='%s'", blocked, reason)
blocklist.append({
"blocker" : domain,
- "blocked" : tidyup.domain(blocked),
- "reason" : tidyup.reason(reason),
+ "blocked" : blocked,
+ "reason" : reason,
"block_level": "reject",
})