import logging
import bs4
-import validators
from fba import utils
from fba.helpers import config
+from fba.helpers import domain as domain_helper
from fba.helpers import tidyup
from fba.http import network
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
+#logger.setLevel(logging.DEBUG)
-def fetch_blocks(domain: str) -> dict:
+def fetch_blocks(domain: str) -> list:
logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
- if not isinstance(domain, str):
- raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
- elif domain == "":
- raise ValueError("Parameter 'domain' is empty")
- elif domain.lower() != domain:
- raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
- elif not validators.domain(domain.split("/")[0]):
- raise ValueError(f"domain='{domain}' is not a valid domain")
- elif domain.endswith(".arpa"):
- raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
- elif domain.endswith(".tld"):
- raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
+ domain_helper.raise_on(domain)
blocklist = list()
block_tag = None
try:
- logger.debug("Fetching friendica blocks from domain:", domain)
+ logger.debug("Fetching friendica blocks from domain='%s'", domain)
doc = bs4.BeautifulSoup(
network.fetch_response(
domain,
block_tag = doc.find(id="about_blocklist")
except network.exceptions as exception:
- logger.warning(f"Exception '{type(exception)}' during fetching instances (friendica) from domain='{domain}'")
+ logger.warning("Exception '%s' during fetching instances from domain='%s'", type(exception), domain)
instances.set_last_error(domain, exception)
- return dict()
+ return list()
# Prevents exceptions:
if block_tag is None:
- logger.debug("Instance has no block list:", domain)
- return dict()
+ logger.debug("Instance has no block list: domain='%s'", domain)
+ return list()
table = block_tag.find("table")
- logger.debug(f"table[]='{type(table)}'")
+ logger.debug("table[]='%s'", type(table))
if table.find("tbody"):
rows = table.find("tbody").find_all("tr")
else:
rows = table.find_all("tr")
- logger.debug(f"Found rows()={len(rows)}")
+ logger.debug("Found rows()=%d", len(rows))
for line in rows:
- logger.debug(f"line='{line}'")
+ logger.debug("line='%s'", line)
blocked = tidyup.domain(line.find_all("td")[0].text)
reason = tidyup.reason(line.find_all("td")[1].text)
- logger.debug(f"blocked='{blocked}',reason='{reason}'")
+ logger.debug("blocked='%s',reason='%s'", blocked, reason)
+ if blocked == "":
+ logger.debug("line[]='%s' returned empty blocked domain - SKIPPED!")
+ continue
+ elif blocked.count("*") > 0:
+ logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
+ instances.set_has_obfuscation(domain, True)
+
+ # Obscured domain name with no hash
+ row = instances.deobfuscate("*", blocked)
+
+ logger.debug("row[]='%s'", type(row))
+ if row is None:
+ logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
+ continue
+
+ logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
+ blocked = row[0]
+ elif blocked.count("?") > 0:
+ logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
+ instances.set_has_obfuscation(domain, True)
+
+ # Obscured domain name with no hash
+ row = instances.deobfuscate("?", blocked)
+
+ logger.debug("row[]='%s'", type(row))
+ if row is None:
+ logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
+ continue
+
+ logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
+ blocked = row[0]
+
+ logger.debug("blocked[%s]='%s'", type(blocked), blocked)
if not utils.is_domain_wanted(blocked):
logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
continue
logger.debug(f"Appending blocked='{blocked}',reason='{reason}'")
blocklist.append({
- "domain": tidyup.domain(blocked),
- "reason": tidyup.reason(reason)
+ "blocker" : domain,
+ "blocked" : tidyup.domain(blocked),
+ "reason" : tidyup.reason(reason),
+ "block_level": "reject",
})
- logger.debug("Next!")
- logger.debug("Returning blocklist() for domain:", domain, len(blocklist))
- return {
- "reject": blocklist
- }
+ logger.debug("blocklist()=%d - EXIT!", len(blocklist))
+ return blocklist