]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Thu, 23 Nov 2023 01:27:30 +0000 (02:27 +0100)
committerRoland Häder <roland@mxchange.org>
Thu, 23 Nov 2023 01:29:26 +0000 (02:29 +0100)
- check config key against "true"
- improved logger messages
- added some

fba/commands.py
fba/helpers/cache.py
fba/helpers/domain.py
fba/helpers/locking.py
fba/helpers/processing.py
fba/helpers/software.py
fba/http/csrf.py
fba/http/federation.py
fba/http/nodeinfo.py
fba/models/blocks.py

index 7fc983652881fda72fd4b9d8d64620436089be6c..7886d379a30c87f1c0dd0189560690a64ff9cba5 100644 (file)
@@ -371,7 +371,7 @@ def fetch_blocks(args: argparse.Namespace) -> int:
             elif block["blocked"].endswith(".onion"):
                 logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
                 continue
-            elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain"):
+            elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain") == "true":
                 logger.debug("blocked='%s' is an I2P .onion domain - SKIPPED", block["blocked"])
                 continue
             elif block["blocked"].endswith(".arpa"):
@@ -1321,7 +1321,7 @@ def recheck_obfuscation(args: argparse.Namespace) -> int:
             elif block["blocked"].endswith(".onion"):
                 logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
                 continue
-            elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain"):
+            elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain") == "true":
                 logger.debug("blocked='%s' is an I2P onion domain name - SKIPPED!", block["blocked"])
                 continue
             elif block["blocked"].endswith(".arpa"):
index f15d56f21aef88a76773f882aef5e77d4fa0f969..325e58337649ed543344d2f0706f81c7360f9344 100644 (file)
@@ -26,7 +26,11 @@ _cache = {}
 ##### Cache #####
 
 def key_exists(key: str) -> bool:
-    return key in _cache
+    logger.debug("key='%s' - CALLED!", key)
+    exists = key in _cache
+
+    logger.debug("exists='%s' - EXIT!", exists)
+    return exists
 
 def set_all(key: str, rows: list, value: any):
     logger.debug("key='%s',rows()=%d,value[]='%s' - CALLED!", key, len(rows), type(value))
@@ -37,6 +41,7 @@ def set_all(key: str, rows: list, value: any):
         logger.debug("Cache for key='%s' not initialized.", key)
         _cache[key] = dict()
 
+    logger.debug("Setting %d row(s) for key='%s',value[%s]='%s' ...", len(rows), key, type(value), value)
     for sub in rows:
         logger.debug("Setting key='%s',sub[%s]='%s'", key, type(sub), sub)
         if isinstance(sub, sqlite3.Row):
@@ -57,7 +62,7 @@ def set_sub_key(key: str, sub: str, value: any):
     elif not key_exists(key):
         raise Exception(f"Cache for key='{key}' is not initialized, but function invoked")
 
-    logger.debug("Setting key='%s',sub='%s',value[]='%s' ...", key, sub, type(value))
+    logger.debug("Setting key='%s',sub='%s',value[%s]='%s' ...", key, sub, type(value), value)
     _cache[key][sub] = value
 
     logger.debug("EXIT!")
index da39c80c43b7982e6a7eda04ab50c9ab739482ba..00bf9384aee437d3d9b2fc715fa0ba0968584369 100644 (file)
@@ -41,7 +41,7 @@ def raise_on(domain: str):
         raise ValueError(f"domain='{domain}' is not a valid domain")
     elif domain.endswith(".onion"):
         raise ValueError(f"domain='{domain}' is a TOR, please don't crawl them!")
-    elif domain.endswith(".i2p") and config.get("allow_i2p_domain"):
+    elif domain.endswith(".i2p") and config.get("allow_i2p_domain") == "true":
         raise ValueError(f"domain='{domain}' is an I2P, please don't crawl them!")
     elif domain.endswith(".arpa"):
         raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
@@ -90,7 +90,7 @@ def is_wanted(domain: str) -> bool:
     elif domain.endswith(".onion"):
         logger.debug("domain='%s' is a TOR .onion domain - setting False ...", domain)
         wanted = False
-    elif domain.endswith(".i2p") and config.get("allow_i2p_domain"):
+    elif domain.endswith(".i2p") and config.get("allow_i2p_domain") == "true":
         logger.debug("domain='%s' is an I2P domain - setting False ...", domain)
         wanted = False
     elif domain.endswith(".tld"):
index 0fc18b967bd37fb988c4d8ebaee4c44edd4cf5ee..f553d0435a6f9068a10c7786a4e22dccb58cb5ea 100644 (file)
@@ -47,6 +47,7 @@ def release():
     if LOCK is not None:
         logger.debug("Releasing lock ...")
         LOCK.close()
+
         logger.debug("Deleting lockfile='%s' ...", lockfile)
         os.remove(lockfile)
 
index dfb6388ff192f3a38e091558aadc2c6bb380ca82..defabdd8c9d3c1a28e90aa038a8f1fb4f3c79b15 100644 (file)
@@ -199,7 +199,7 @@ def csv_block(blocker: str, url: str, command: str):
         elif domain.endswith(".onion"):
             logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
             continue
-        elif domain.endswith(".i2p") and config.get("allow_i2p_domain"):
+        elif domain.endswith(".i2p") and config.get("allow_i2p_domain") == "true":
             logger.debug("domain='%s' is an I2P .onion domain - SKIPPED", domain)
             continue
         elif domain.endswith(".arpa"):
index be0bbb292d1a2ecd3d6fa62b54bcdb1c433ad3ce..f224ba4e478da7e9b86d7a3aac54dc58599c7e04 100644 (file)
@@ -208,5 +208,6 @@ def is_relay(software: str) -> bool:
         raise ValueError(f"software[]='{type(software)}' is not type 'str'")
 
     found = software in relays
+
     logger.debug("found='%s' - EXIT!", found)
     return found
index f629cb8f976caa538fc5111e7541af05018e7ff3..9f349e7fdcfe85d72394e4ef4e3a854442bf98d0 100644 (file)
@@ -70,6 +70,7 @@ def determine(domain: str, headers: dict) -> dict:
             reqheaders["X-CSRF-Token"] = tag["content"]
     elif not domain_helper.is_in_url(domain, response.url):
         logger.warning("domain='%s' doesn't match with response.url='%s', maybe redirect to other domain?", domain, response.url)
+
         message = f"Redirect from domain='{domain}' to response.url='{response.url}'"
         instances.set_last_error(domain, message)
         raise requests.exceptions.TooManyRedirects(message)
index 49eee69bd013b43be40d0675b615ef92712a4785..fdb18285a5bf91fea0948a32c82a34d70a6ee3af 100644 (file)
@@ -42,6 +42,12 @@ from fba.networks import peertube
 # Depth counter, being raised and lowered
 _DEPTH = 0
 
+# API paths
+_api_paths = [
+    "/api/v1/instance/peers",
+    "/api/v3/site",
+]
+
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
@@ -218,16 +224,11 @@ def fetch_peers(domain: str, software: str, origin: str) -> list:
         logger.debug("Returning empty list ... - EXIT!")
         return list()
 
-    paths = [
-        "/api/v1/instance/peers",
-        "/api/v3/site",
-    ]
-
     # Init peers variable
     peers = list()
 
-    logger.debug("Checking %d paths ...", len(paths))
-    for path in paths:
+    logger.debug("Checking %d API paths ...", len(_api_paths))
+    for path in _api_paths:
         logger.debug("Fetching path='%s' from domain='%s',software='%s' ...", path, domain, software)
         data = network.get_json_api(
             domain,
index 5810c568b4a32e3ea247a37893cd5de93aeef184..b31e2acd836b3af0770662969bbca8a815f4593c 100644 (file)
@@ -28,6 +28,29 @@ from fba.models import instances
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
+# Request paths
+_request_paths = [
+    "/nodeinfo/2.1.json",
+    "/nodeinfo/2.1",
+    "/nodeinfo/2.0.json",
+    "/nodeinfo/2.0",
+    "/nodeinfo/1.0.json",
+    "/nodeinfo/1.0",
+    "/api/v1/instance",
+]
+
+# "rel" identifiers (no real URLs)
+_nodeinfo_identifier = [
+    "https://nodeinfo.diaspora.software/ns/schema/2.1",
+    "http://nodeinfo.diaspora.software/ns/schema/2.1",
+    "https://nodeinfo.diaspora.software/ns/schema/2.0",
+    "http://nodeinfo.diaspora.software/ns/schema/2.0",
+    "https://nodeinfo.diaspora.software/ns/schema/1.1",
+    "http://nodeinfo.diaspora.software/ns/schema/1.1",
+    "https://nodeinfo.diaspora.software/ns/schema/1.0",
+    "http://nodeinfo.diaspora.software/ns/schema/1.0",
+]
+
 def fetch(domain: str, path: str = None, update_mode: bool = True) -> dict:
     logger.debug("domain='%s',path='%s',update_mode='%s' - CALLED!", domain, path, update_mode)
     domain_helper.raise_on(domain)
@@ -59,6 +82,7 @@ def fetch(domain: str, path: str = None, update_mode: bool = True) -> dict:
     try:
         logger.debug("Checking CSRF for domain='%s'", domain)
         headers = csrf.determine(domain, dict())
+        logger.debug("headers()=%d", len(headers))
     except network.exceptions as exception:
         logger.warning("Exception '%s' during checking CSRF (nodeinfo,%s) - EXIT!", type(exception), __name__)
         instances.set_last_error(domain, exception)
@@ -68,17 +92,8 @@ def fetch(domain: str, path: str = None, update_mode: bool = True) -> dict:
             "exception"    : exception,
         }
 
-    request_paths = [
-       "/nodeinfo/2.1.json",
-       "/nodeinfo/2.1",
-       "/nodeinfo/2.0.json",
-       "/nodeinfo/2.0",
-       "/nodeinfo/1.0.json",
-       "/nodeinfo/1.0",
-       "/api/v1/instance",
-    ]
-
-    for request in request_paths:
+    logger.debug("Checking %d request paths ...", len(_request_paths))
+    for request in _request_paths:
         logger.debug("request='%s'", request)
         http_url  = f"http://{domain}{str(path) if path is not None else '/'}"
         https_url = f"https://{domain}{str(path) if path is not None else '/'}"
@@ -105,6 +120,8 @@ def fetch(domain: str, path: str = None, update_mode: bool = True) -> dict:
 
                     logger.debug("domain='%s',request='%s'", domain, request)
                     instances.set_nodeinfo_url(domain, "https://{domain}{request}")
+
+                logger.debug("BREAK!")
                 break
 
             logger.warning("Failed fetching nodeinfo from domain='%s',status_code='%s',error_message='%s'", domain, data['status_code'], data['error_message'])
@@ -116,18 +133,6 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict:
     logger.debug("domain='%s' - CALLED!", domain)
     domain_helper.raise_on(domain)
 
-    # "rel" identifiers (no real URLs)
-    nodeinfo_identifier = [
-        "https://nodeinfo.diaspora.software/ns/schema/2.1",
-        "http://nodeinfo.diaspora.software/ns/schema/2.1",
-        "https://nodeinfo.diaspora.software/ns/schema/2.0",
-        "http://nodeinfo.diaspora.software/ns/schema/2.0",
-        "https://nodeinfo.diaspora.software/ns/schema/1.1",
-        "http://nodeinfo.diaspora.software/ns/schema/1.1",
-        "https://nodeinfo.diaspora.software/ns/schema/1.0",
-        "http://nodeinfo.diaspora.software/ns/schema/1.0",
-    ]
-
     # No CSRF by default, you don't have to add network.api_headers by yourself here
     headers = tuple()
 
@@ -157,29 +162,29 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict:
         logger.debug("data[]='%s'", type(data))
 
         if "error_message" not in data and "json" in data and len(data["json"]) > 0:
-            logger.debug("path='%s' returned valid json()=%d", path, len(data["json"]))
+            logger.debug("path='%s' returned valid json()=%d - BREAK!", path, len(data["json"]))
             break
 
     logger.debug("data[]='%s'", type(data))
     if "exception" in data:
-        logger.warning("domain='%s' returned exception '%s'", domain, str(data["exception"]))
+        logger.warning("domain='%s' returned exception '%s' - RAISE!", domain, str(data["exception"]))
         raise data["exception"]
     elif "error_message" in data:
-        logger.warning("domain='%s' returned error message: '%s'", domain, data["error_message"])
+        logger.warning("domain='%s' returned error message: '%s' - EXIT!", domain, data["error_message"])
         return data
     elif "json" not in data:
-        logger.warning("domain='%s' returned no 'json' key", domain)
+        logger.warning("domain='%s' returned no 'json' key - EXIT!", domain)
         return dict()
 
     infos = data["json"]
-    logger.debug("infos()=%d has been returned", len(infos))
+    logger.debug("infos(%d)[]='%s' has been returned", len(infos), type(infos))
 
     if "links" in infos:
         logger.debug("Marking domain='%s' as successfully handled ...", domain)
         instances.set_success(domain)
 
         logger.debug("Found infos[links]()=%d record(s),", len(infos["links"]))
-        for niid in nodeinfo_identifier:
+        for niid in _nodeinfo_identifier:
             data = dict()
 
             logger.debug("Checking niid='%s' ...", niid)
@@ -231,7 +236,7 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict:
                     instances.set_detection_mode(domain, "AUTO_DISCOVERY")
                     instances.set_nodeinfo_url(domain, link["href"])
 
-                    logger.debug("Marking domain='%s' as successfully handled ...", domain)
+                    logger.debug("Marking domain='%s' as successfully handled - BREAK!", domain)
                     instances.set_success(domain)
                     break
                 else:
@@ -240,7 +245,7 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict:
 
             logger.debug("data()=%d", len(data))
             if "error_message" not in data and "json" in data:
-                logger.debug("Auto-discovery successful: domain='%s'", domain)
+                logger.debug("Auto-discovery successful: domain='%s' - BREAK!", domain)
                 break
     elif "server" in infos:
         logger.debug("Found infos[server][software]='%s'", infos["server"]["software"])
index 1b61887309c9c145367a7da686b3cddbdba4bb20..70e6eb3f83f12133b0a9b421b40814fe1d75e53e 100644 (file)
@@ -144,7 +144,9 @@ def add(blocker: str, blocked: str, reason: str, block_level: str):
 
     if reason is not None:
         # Maybe needs cleaning
+        logger.debug("reason='%s' - BEFORE!")
         reason = tidyup.reason(reason)
+        logger.debug("reason='%s' - AFTER!")
 
     logger.info("New block: blocker='%s',blocked='%s',reason='%s',block_level='%s'", blocker, blocked, reason, block_level)