]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Tue, 7 Jan 2025 03:31:37 +0000 (04:31 +0100)
committerRoland Häder <roland@mxchange.org>
Wed, 8 Jan 2025 12:08:55 +0000 (13:08 +0100)
- local "caching" of configuration values to speedup code execution and lowering
  massive debug logging
- combined None and "" together

fba/commands.py
fba/helpers/blacklist.py
fba/helpers/processing.py
fba/http/csrf.py
fba/http/federation.py
fba/http/network.py
fba/http/nodeinfo.py

index 5dfdcedb104a31a5756ee16fd7b9f0b869f2e20a..f8dcd747b5ea7c35138d1f462f3e6c1fd3ddb634 100644 (file)
@@ -59,6 +59,10 @@ from fba.networks import mastodon
 from fba.networks import misskey
 from fba.networks import pleroma
 
+# Locally "cached" values to speedup code and keep massive debug log shorter
+_timeout = (config.get("connection_timeout"), config.get("read_timeout"))
+_bot_enabled = config.get("bot_enabled")
+
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 #logger.setLevel(logging.DEBUG)
@@ -266,7 +270,7 @@ def fetch_bkali(args: argparse.Namespace) -> int:
 
 def fetch_blocks(args: argparse.Namespace) -> int:
     logger.debug("args[]='%s' - CALLED!", type(args))
-    if args.domain is not None and args.domain != "":
+    if args.domain not in [None, ""]:
         logger.debug("args.domain='%s' - checking ...", args.domain)
         if not validators.domain(args.domain, rfc_2782=True):
             logger.warning("args.domain='%s' is not valid.", args.domain)
@@ -281,11 +285,11 @@ def fetch_blocks(args: argparse.Namespace) -> int:
     logger.debug("Invoking locking.acquire() ...")
     locking.acquire()
 
-    if args.domain is not None and args.domain != "":
+    if args.domain not in [None, ""]:
         # Re-check single domain
         logger.debug("Querying database for args.domain='%s' ...", args.domain)
         database.cursor.execute("SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ? LIMIT 1", [args.domain])
-    elif args.software is not None and args.software != "":
+    elif args.software not in [None, ""]:
         # Re-check single software
         logger.debug("Querying database for args.software='%s' ...", args.software)
         database.cursor.execute("SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? ORDER BY last_blocked ASC, total_blocks DESC", [args.software])
@@ -452,7 +456,7 @@ def fetch_blocks(args: argparse.Namespace) -> int:
             block["block_level"] = blocks.alias_block_level(block["block_level"])
             logger.debug("block[block_level]='%s' - AFTER!", block["block_level"])
 
-            if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] in ["rejected", "suspended"] and config.get("bot_enabled"):
+            if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] in ["rejected", "suspended"] and _bot_enabled:
                 logger.debug("Appending block[blocked]'%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
                 blockdict.append({
                     "blocked": block["blocked"],
@@ -474,8 +478,8 @@ def fetch_blocks(args: argparse.Namespace) -> int:
         logger.debug("Invoking cookies.clear(%s) ...", blocker)
         cookies.clear(blocker)
 
-        logger.debug("config.get(bot_enabled)='%s',blockdict()=%d'", config.get("bot_enabled"), len(blockdict))
-        if config.get("bot_enabled") and len(blockdict) > 0:
+        logger.debug("_bot_enabled='%s',blockdict()=%d'", _bot_enabled, len(blockdict))
+        if _bot_enabled and len(blockdict) > 0:
             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
             network.send_bot_post(blocker, blockdict)
 
@@ -502,7 +506,7 @@ def fetch_observer(args: argparse.Namespace) -> int:
         raw = network.fetch_url(
             f"https://{source_domain}",
             network.web_headers,
-            (config.get("connection_timeout"), config.get("read_timeout"))
+            timeout=_timeout
         ).text
         logger.debug("raw[%s]()=%d", type(raw), len(raw))
 
@@ -632,7 +636,7 @@ def fetch_todon_wiki(args: argparse.Namespace) -> int:
     raw = network.fetch_url(
         f"https://{source_domain}/todon/domainblocks",
         network.web_headers,
-        (config.get("connection_timeout"), config.get("read_timeout"))
+        timeout=_timeout
     ).text
     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
 
@@ -656,18 +660,16 @@ def fetch_todon_wiki(args: argparse.Namespace) -> int:
 
     blockdict = list()
     for block_level in blocklist:
+        logger.debug("block_level='%s'", block_level)
         blockers = blocklist[block_level]
 
-        logger.debug("block_level='%s',blockers()=%d'", block_level, len(blockers))
+        logger.debug("Checking %d blocker entries for block_level='%s' ...", len(blockers), block_level)
         for blocked in blockers:
             logger.debug("blocked='%s'", blocked)
 
             if not domain_helper.is_wanted(blocked):
                 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
                 continue
-            elif not domain_helper.is_wanted(blocker):
-                logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
-                continue
             elif blocks.is_instance_blocked(blocker, blocked, block_level):
                 logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
                 continue
@@ -681,7 +683,7 @@ def fetch_todon_wiki(args: argparse.Namespace) -> int:
 
 
             logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
-            if processing.block(blocker, blocked, None, block_level) and block_level in ["suspended", "rejected"] and config.get("bot_enabled"):
+            if processing.block(blocker, blocked, None, block_level) and block_level in ["suspended", "rejected"] and _bot_enabled:
                 logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
                 blockdict.append({
                     "blocked": blocked,
@@ -691,8 +693,8 @@ def fetch_todon_wiki(args: argparse.Namespace) -> int:
         logger.debug("Invoking commit() ...")
         database.connection.commit()
 
-        logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
-        if config.get("bot_enabled") and len(blockdict) > 0:
+        logger.debug("_bot_enabled='%s',blockdict()=%d", _bot_enabled, len(blockdict))
+        if _bot_enabled and len(blockdict) > 0:
             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
             network.send_bot_post(blocker, blockdict)
 
@@ -747,7 +749,7 @@ def fetch_cs(args: argparse.Namespace):
     raw = network.fetch_url(
         f"https://{source_domain}/federation",
         network.web_headers,
-        (config.get("connection_timeout"), config.get("read_timeout"))
+        timeout=_timeout
     ).text
     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
 
@@ -788,7 +790,7 @@ def fetch_cs(args: argparse.Namespace):
                         logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
                         instances.set_last_error(row["domain"], exception)
 
-                if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level in ["suspended", "rejected"] and config.get("bot_enabled"):
+                if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level in ["suspended", "rejected"] and _bot_enabled:
                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
                     blockdict.append({
                         "blocked": row["domain"],
@@ -798,8 +800,8 @@ def fetch_cs(args: argparse.Namespace):
         logger.debug("Invoking commit() ...")
         database.connection.commit()
 
-        logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
-        if config.get("bot_enabled") and len(blockdict) > 0:
+        logger.debug("_bot_enabled='%s',blockdict()=%d", _bot_enabled, len(blockdict))
+        if _bot_enabled and len(blockdict) > 0:
             logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
             network.send_bot_post(blocker, blockdict)
 
@@ -831,7 +833,7 @@ def fetch_fba_rss(args: argparse.Namespace) -> int:
         sources.update(domain)
 
     logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
-    response = network.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
+    response = network.fetch_url(args.feed, network.web_headers, _timeout)
 
     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
     if response.ok and response.status_code == 200 and len(response.text) > 0:
@@ -910,7 +912,7 @@ def fetch_fbabot_atom(args: argparse.Namespace) -> int:
     domains = list()
 
     logger.info("Fetching ATOM feed='%s' from FBA bot account ...", feed)
-    response = network.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
+    response = network.fetch_url(feed, network.web_headers, _timeout)
 
     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
     if response.ok and response.status_code == 200 and len(response.text) > 0:
@@ -1137,7 +1139,7 @@ def fetch_txt(args: argparse.Namespace) -> int:
     logger.info("Checking %d text file(s) ...", len(blocklists.txt_files))
     for row in blocklists.txt_files:
         logger.debug("Fetching row[url]='%s' ...", row["url"])
-        response = network.fetch_url(row["url"], network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
+        response = network.fetch_url(row["url"], network.web_headers, _timeout)
 
         logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
         if response.ok and response.status_code == 200 and response.text != "":
@@ -1185,7 +1187,7 @@ def fetch_fedipact(args: argparse.Namespace) -> int:
     response = network.fetch_url(
         f"https://{source_domain}",
         network.web_headers,
-        (config.get("connection_timeout"), config.get("read_timeout"))
+        timeout=_timeout
     )
 
     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
@@ -1244,7 +1246,7 @@ def fetch_joinmobilizon(args: argparse.Namespace) -> int:
     raw = network.fetch_url(
         f"https://{source_domain}/api/v1/instances",
         network.web_headers,
-        (config.get("connection_timeout"), config.get("read_timeout"))
+        timeout=_timeout
     ).text
     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
 
@@ -1295,7 +1297,7 @@ def fetch_joinmisskey(args: argparse.Namespace) -> int:
     raw = network.fetch_url(
         f"https://{source_domain}/instances.json",
         network.web_headers,
-        (config.get("connection_timeout"), config.get("read_timeout"))
+        timeout=_timeout
     ).text
     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
 
@@ -1445,7 +1447,7 @@ def recheck_obfuscation(args: argparse.Namespace) -> int:
                 block["block_level"] = blocks.alias_block_level(block["block_level"])
                 logger.debug("block[block_level]='%s' - AFTER!", block["block_level"])
 
-                if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] in ["suspended", "rejected"] and config.get("bot_enabled"):
+                if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] in ["suspended", "rejected"] and _bot_enabled:
                     logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block["block_level"], row["domain"])
                     blockdict.append({
                         "blocked": blocked,
@@ -1471,8 +1473,8 @@ def recheck_obfuscation(args: argparse.Namespace) -> int:
         logger.debug("Invoking commit() ...")
         database.connection.commit()
 
-        logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
-        if config.get("bot_enabled") and len(blockdict) > 0:
+        logger.debug("_bot_enabled='%s',blockdict()=%d", _bot_enabled, len(blockdict))
+        if _bot_enabled and len(blockdict) > 0:
             logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
             network.send_bot_post(row["domain"], blockdict)
 
@@ -1494,7 +1496,7 @@ def fetch_fedilist(args: argparse.Namespace) -> int:
         sources.update(source_domain)
 
     url = f"http://{source_domain}/instance/csv?onion=not"
-    if args.software is not None and args.software != "":
+    if args.software not in [None, ""]:
         logger.debug("args.software='%s'", args.software)
         url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
 
@@ -1502,7 +1504,7 @@ def fetch_fedilist(args: argparse.Namespace) -> int:
     response = reqto.get(
         url,
         headers=network.web_headers,
-        timeout=(config.get("connection_timeout"), config.get("read_timeout")),
+        timeout=_timeout,
         allow_redirects=False
     )
 
@@ -1561,13 +1563,13 @@ def update_nodeinfo(args: argparse.Namespace) -> int:
     logger.debug("Invoking locking.acquire() ...")
     locking.acquire()
 
-    if args.domain is not None and args.domain != "":
+    if args.domain not in [None, ""]:
         logger.debug("Fetching args.domain='%s'", args.domain)
         database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
-    elif args.software is not None and args.software != "":
+    elif args.software not in [None, ""]:
         logger.info("Fetching domains for args.software='%s'", args.software)
         database.cursor.execute("SELECT domain, software FROM instances WHERE software = ? ORDER BY last_updated ASC", [args.software])
-    elif args.mode is not None and args.mode != "":
+    elif args.mode not in [None, ""]:
         logger.info("Fetching domains for args.mode='%s'", args.mode.upper())
         database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode = ? ORDER BY last_updated ASC", [args.mode])
     elif args.no_software:
@@ -1655,7 +1657,7 @@ def fetch_instances_social(args: argparse.Namespace) -> int:
     logger.info("Fetching list from source_domain='%s' ...", source_domain)
     rows = network.fetch_json_rows(
         source_domain,
-        "/api/1.0/instances/list?count=0&sort_by=name", 
+        "/api/1.0/instances/list?count=0&sort_by=name",
         {
             "Authorization": f"Bearer {config.get('instances_social_api_key')}",
         },
@@ -1664,7 +1666,7 @@ def fetch_instances_social(args: argparse.Namespace) -> int:
 
     logger.info("Checking %d row(s) ...", len(rows))
     for row in rows:
-        logger.debug("row[]='%s'", type(row))
+        logger.debug("row[]='%s' - BEFORE!", type(row))
         domain = tidyup.domain(row["name"]) if row["name"] not in [None, ""] else None
         logger.debug("domain='%s' - AFTER!", domain)
 
@@ -1720,7 +1722,7 @@ def fetch_relaylist(args: argparse.Namespace) -> int:
 
     logger.info("Checking %d row(s) ...", len(rows))
     for row in rows:
-        logger.debug("row[]='%s'", type(row))
+        logger.debug("row[%s]='%s' - BEFORE!", type(row), row)
         domain = urlparse(row["url"]).netloc.lower().split(":")[0]
         logger.debug("domain='%s' - AFTER!", domain)
 
@@ -1757,10 +1759,10 @@ def fetch_relays(args: argparse.Namespace) -> int:
     logger.debug("Invoking locking.acquire() ...")
     locking.acquire()
 
-    if args.domain is not None and args.domain != "":
+    if args.domain not in [None, ""]:
         logger.debug("Fetching instances record for args.domain='%s' ...", args.domain)
         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
-    elif args.software is not None and args.software != "":
+    elif args.software not in [None, ""]:
         logger.debug("Fetching instances records for args.software='%s' ...", args.software)
         database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND nodeinfo_url IS NOT NULL AND software = ? ORDER BY last_updated DESC", [args.software])
     else:
@@ -1787,7 +1789,7 @@ def fetch_relays(args: argparse.Namespace) -> int:
                 logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
                 raw = network.fetch_api_url(
                     row["nodeinfo_url"],
-                    (config.get("connection_timeout"), config.get("read_timeout"))
+                    timeout=_timeout
                 )
 
                 logger.debug("raw[%s]()=%d", type(raw), len(raw))
@@ -1814,7 +1816,7 @@ def fetch_relays(args: argparse.Namespace) -> int:
                 raw = network.fetch_url(
                     f"https://{row['domain']}",
                     network.web_headers,
-                    (config.get("connection_timeout"), config.get("read_timeout"))
+                    timeout=_timeout
                 ).text
                 logger.debug("raw[%s]()=%d", type(raw), len(raw))
 
index 723bf65257280dbeb64f5e10fccce429a7965dc4..760875680d82d21d1e2cd88073a63ce5c0bb1584 100644 (file)
@@ -79,6 +79,8 @@ _blacklist = {
     "static.sl-reverse.messenger.com": "Please get yourself a proper domain name, no static-IP host names",
     "documentation.on.seirdy.one"    : "Just ignore such lines!",
     "drankdrankdrank"                : "Mass flooding of instances",
+    "cn24tv.it/page/"                : "Useless massive pages",
+    "youtube.com/channel/"           : "Useless massive YT channels",
 }
 
 @lru_cache
index 8326d8ebd04b7607593186648fcdffbdeb8b1389..463978b7e229ac87615a31101c19d33fd7bd8a76 100644 (file)
@@ -202,14 +202,18 @@ def csv_block(blocker: str, url: str, command: str) -> None:
             reject_media = True
         elif "reject_media" in row and row["reject_media"].lower() == "true":
             reject_media = True
+        else:
+            logger.debug("row='%s' for domain='%s' does not contain key '[#]reject_media'", row, domain)
 
         if "#reject_reports" in row and row["#reject_reports"].lower() == "true":
             reject_reports = True
         elif "reject_reports" in row and row["reject_reports"].lower() == "true":
             reject_reports = True
+        else:
+            logger.debug("row='%s' for domain='%s' does not contain key '[#]reject_reports'", row, domain)
 
         cnt = cnt + 1
-        logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
+        logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s',cnt=%d", domain, severity, reject_media, reject_reports, cnt)
         if domain in [None, ""]:
             logger.debug("domain='%s' is empty - SKIPPED!", domain)
             continue
index d85445bfec98ce35f13a6e06f358e8231923281a..96c02f262b6bae3518f6d86b7c80ef37c365fb8d 100644 (file)
@@ -32,6 +32,7 @@ from fba.models import instances
 
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+#logger.setLevel(logging.DEBUG)
 
 def determine(domain: str, headers: dict) -> dict:
     logger.debug("domain='%s',headers()=%d - CALLED!", domain, len(headers))
index 15c67997a95120b24f4b15a2dae65babbd60a727..c24ca632ef41e03826bd6099e4257fe7d062bcc6 100644 (file)
@@ -49,8 +49,13 @@ _api_paths = [
     "/api/v3/site",
 ]
 
+# Local "cache" to shorten intense debug output
+_max_crawl_depth  = config.get("max_crawl_depth")
+_min_peers_length = config.get("min_peers_length")
+
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+#logger.setLevel(logging.DEBUG)
 
 def fetch_instances(domain: str, origin: str, software: str, command: str, path: str = None) -> None:
     global _DEPTH
@@ -200,7 +205,7 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path:
                 instances.update(domain)
 
             logger.debug("instance='%s',origin='%s',_DEPTH=%d reached!", instance, origin, _DEPTH)
-            if _DEPTH <= config.get("max_crawl_depth") and len(peerlist) >= config.get("min_peers_length"):
+            if _DEPTH <= _max_crawl_depth and len(peerlist) >= _min_peers_length:
                 logger.debug("Fetching instance='%s',origin='%s',command='%s',path='%s',_DEPTH=%d ...", instance, domain, command, path, _DEPTH)
                 fetch_instances(instance, domain, None, command, path)
             else:
index a2e3b430fd85343cb1c07983c305dd7cf572bf13..5ca77bc1b4c5c1cf8ac9faee64e5006814cfcbea 100644 (file)
@@ -35,9 +35,6 @@ from fba.helpers import json as json_helper
 
 from fba.models import instances
 
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
 # HTTP headers for non-API requests
 web_headers = {
     "User-Agent": config.get("useragent"),
@@ -64,6 +61,10 @@ exceptions = (
     urllib3.exceptions.LocationParseError
 )
 
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+#logger.setLevel(logging.DEBUG)
+
 def post_json_api(domain: str, path: str, data: str = "", headers: dict = dict()) -> dict:
     logger.debug("domain='%s',path='%s',data='%s',headers()=%d - CALLED!", domain, path, data, len(headers))
     domain_helper.raise_on(domain)
index db89b212c956e36375878d50a93a1b03a4b56bb2..9e81bb97a0fc9e9105cdbfc7a75e9b8247c431d3 100644 (file)
@@ -27,9 +27,6 @@ from fba.http import network
 
 from fba.models import instances
 
-logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-
 # Well-known URLs for nodeinfo informations
 _well_known_nodeinfo_urls = [
     "/.well-known/x-nodeinfo2",
@@ -59,6 +56,13 @@ _nodeinfo_identifier = [
     "http://nodeinfo.diaspora.software/ns/schema/1.0",
 ]
 
+# Locally "cached" values to speedup code and keep massive debug log shorter
+_timeout = (config.get("connection_timeout"), config.get("read_timeout"))
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+#logger.setLevel(logging.DEBUG)
+
 def fetch(domain: str, path: str = None, update_mode: bool = True) -> dict:
     logger.debug("domain='%s',path='%s',update_mode='%s' - CALLED!", domain, path, update_mode)
     domain_helper.raise_on(domain)
@@ -247,7 +251,7 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict:
                 logger.debug("Fetching nodeinfo from url='%s' ...", url)
                 data = network.fetch_api_url(
                     url,
-                    (config.get("connection_timeout"), config.get("read_timeout"))
+                    timeout=_timeout
                  )
 
                 logger.debug("link[href]='%s',data[]='%s'", link["href"], type(data))