]> git.mxchange.org Git - fba.git/blobdiff - fba/networks/friendica.py
Continued:
[fba.git] / fba / networks / friendica.py
index 39b81d37483c56fe779f60f71a6ad4e9cf17faa2..c4a90c16b3413216aeb744cac6bf9e66aa4b12b8 100644 (file)
 import logging
 
 import bs4
-import validators
 
 from fba import utils
 
 from fba.helpers import config
+from fba.helpers import domain as domain_helper
 from fba.helpers import tidyup
 
 from fba.http import network
@@ -30,27 +30,17 @@ from fba.models import instances
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+#logger.setLevel(logging.DEBUG)
 
-def fetch_blocks(domain: str) -> dict:
+def fetch_blocks(domain: str) -> list:
     logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
-    if not isinstance(domain, str):
-        raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
-    elif domain == "":
-        raise ValueError("Parameter 'domain' is empty")
-    elif domain.lower() != domain:
-        raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
-    elif not validators.domain(domain.split("/")[0]):
-        raise ValueError(f"domain='{domain}' is not a valid domain")
-    elif domain.endswith(".arpa"):
-        raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
-    elif domain.endswith(".tld"):
-        raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
+    domain_helper.raise_on(domain)
 
     blocklist = list()
     block_tag = None
 
     try:
-        logger.debug("Fetching friendica blocks from domain:", domain)
+        logger.debug("Fetching friendica blocks from domain='%s'", domain)
         doc = bs4.BeautifulSoup(
             network.fetch_response(
                 domain,
@@ -64,42 +54,74 @@ def fetch_blocks(domain: str) -> dict:
 
         block_tag = doc.find(id="about_blocklist")
     except network.exceptions as exception:
-        logger.warning(f"Exception '{type(exception)}' during fetching instances (friendica) from domain='{domain}'")
+        logger.warning("Exception '%s' during fetching instances from domain='%s'", type(exception), domain)
         instances.set_last_error(domain, exception)
-        return dict()
+        return list()
 
     # Prevents exceptions:
     if block_tag is None:
-        logger.debug("Instance has no block list:", domain)
-        return dict()
+        logger.debug("Instance has no block list: domain='%s'", domain)
+        return list()
 
     table = block_tag.find("table")
 
-    logger.debug(f"table[]='{type(table)}'")
+    logger.debug("table[]='%s'", type(table))
     if table.find("tbody"):
         rows = table.find("tbody").find_all("tr")
     else:
         rows = table.find_all("tr")
 
-    logger.debug(f"Found rows()={len(rows)}")
+    logger.debug("Found rows()=%d", len(rows))
     for line in rows:
-        logger.debug(f"line='{line}'")
+        logger.debug("line='%s'", line)
         blocked = tidyup.domain(line.find_all("td")[0].text)
         reason  = tidyup.reason(line.find_all("td")[1].text)
-        logger.debug(f"blocked='{blocked}',reason='{reason}'")
+        logger.debug("blocked='%s',reason='%s'", blocked, reason)
 
+        if blocked == "":
+            logger.debug("line[]='%s' returned empty blocked domain - SKIPPED!")
+            continue
+        elif blocked.count("*") > 0:
+            logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
+            instances.set_has_obfuscation(domain, True)
+
+            # Obscured domain name with no hash
+            row = instances.deobfuscate("*", blocked)
+
+            logger.debug("row[]='%s'", type(row))
+            if row is None:
+                logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
+                continue
+
+            logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
+            blocked = row[0]
+        elif blocked.count("?") > 0:
+            logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
+            instances.set_has_obfuscation(domain, True)
+
+            # Obscured domain name with no hash
+            row = instances.deobfuscate("?", blocked)
+
+            logger.debug("row[]='%s'", type(row))
+            if row is None:
+                logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
+                continue
+
+            logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
+            blocked = row[0]
+
+        logger.debug("blocked[%s]='%s'", type(blocked), blocked)
         if not utils.is_domain_wanted(blocked):
             logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
             continue
 
         logger.debug(f"Appending blocked='{blocked}',reason='{reason}'")
         blocklist.append({
-            "domain": tidyup.domain(blocked),
-            "reason": tidyup.reason(reason)
+            "blocker"    : domain,
+            "blocked"    : tidyup.domain(blocked),
+            "reason"     : tidyup.reason(reason),
+            "block_level": "reject",
         })
-        logger.debug("Next!")
 
-    logger.debug("Returning blocklist() for domain:", domain, len(blocklist))
-    return {
-        "reject": blocklist
-    }
+    logger.debug("blocklist()=%d - EXIT!", len(blocklist))
+    return blocklist