From 9ea84728dbdd696a3cc09130440971e5eaf006fb Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Fri, 2 Jun 2023 15:44:15 +0200 Subject: [PATCH] Continued: - first thanks to `activitypub-trolls.cf` we have tons of registered "instances" which needs to be fetched and then these trolls need to be ignored - then commented out some debug lines - also fixed some code - also let */? pass as obsfucations --- config.defaults.json | 2 +- fba.py | 51 ++++++++----- fetch_blocks.py | 178 +++++++++++++++++++++++++------------------ 3 files changed, 137 insertions(+), 94 deletions(-) diff --git a/config.defaults.json b/config.defaults.json index e9843bd..f6de1c0 100644 --- a/config.defaults.json +++ b/config.defaults.json @@ -4,7 +4,7 @@ "host" : "127.0.0.1", "port" : 8069, "useragent" : "Mozilla/5.0 (Windows NT 10.0; rv:102.0) Gecko/20100101 Firefox/102.0", - "connection_timeout": 2, + "connection_timeout": 30, "read_timeout" : 5, "bot_enabled" : false, "bot_instance" : "https://example.com", diff --git a/fba.py b/fba.py index 37e7384..0b58c53 100644 --- a/fba.py +++ b/fba.py @@ -191,11 +191,11 @@ def is_cache_key_set(key: str, sub: str) -> bool: ##### Other functions ##### def is_primitive(var: any) -> bool: - #print(f"DEBUG: var[]='{type(var)}' - CALLED!") + # NOISY-DEBUG: print(f"DEBUG: var[]='{type(var)}' - CALLED!") return type(var) in {int, str, float, bool} or var == None def set_instance_data(key: str, domain: str, value: any): - #print(f"DEBUG: key='{key}',domain='{domain}',value[]='{type(value)}' - CALLED!") + # NOISY-DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',value[]='{type(value)}' - CALLED!") if type(key) != str: raise ValueError("Parameter key[]='{type(key)}' is not 'str'") elif key == "": @@ -438,12 +438,12 @@ def update_instance_data(domain: str): raise ValueError(f"No fields have been set, but method invoked, domain='{domain}'") # DEBUG: print(f"DEBUG: sql_string='{sql_string}',fields()={len(fields)}") - sql = "UPDATE instances SET" + sql_string + " last_updated = TIME() WHERE domain = ? LIMIT 1" - # DEBUG: print("DEBUG: sql:", sql) + sql_string = "UPDATE instances SET" + sql_string + " last_updated = TIME() WHERE domain = ? LIMIT 1" + # DEBUG: print("DEBUG: sql_string:", sql_string) try: - # DEBUG: print("DEBUG: Executing SQL:", sql) - cursor.execute(sql, fields) + # DEBUG: print("DEBUG: Executing SQL:", sql_string) + cursor.execute(sql_string, fields) # DEBUG: print(f"DEBUG: Success! (rowcount={cursor.rowcount })") if cursor.rowcount == 0: @@ -461,7 +461,7 @@ def update_instance_data(domain: str): pass except BaseException as e: - print(f"ERROR: failed SQL query: domain='{domain}',sql='{sql}',exception[{type(e)}]:'{str(e)}'") + print(f"ERROR: failed SQL query: domain='{domain}',sql_string='{sql_string}',exception[{type(e)}]:'{str(e)}'") sys.exit(255) # DEBUG: print("DEBUG: EXIT!") @@ -1039,6 +1039,15 @@ def determine_software(domain: str, path: str = None) -> str: return software def update_block_reason(reason: str, blocker: str, blocked: str, block_level: str): + if type(reason) != str and reason != None: + raise ValueError(f"Parameter reason[]='{type(reason)}' is not 'str'") + elif type(blocker) != str: + raise ValueError(f"Parameter blocker[]='{type(blocker)}' is not 'str'") + elif type(blocked) != str: + raise ValueError(f"Parameter blocked[]='{type(blocked)}' is not 'str'") + elif type(block_level) != str: + raise ValueError(f"Parameter block_level[]='{type(block_level)}' is not 'str'") + # DEBUG: print("DEBUG: Updating block reason:", reason, blocker, blocked, block_level) try: cursor.execute( @@ -1054,10 +1063,11 @@ def update_block_reason(reason: str, blocker: str, blocked: str, block_level: st # DEBUG: print(f"DEBUG: cursor.rowcount={cursor.rowcount}") if cursor.rowcount == 0: - print("WARNING: Did not update any rows:", domain) + print("WARNING: Did not update any rows:", blocker, blocked) + return except BaseException as e: - print(f"ERROR: failed SQL query: reason='{reason}',blocker='{blocker}',blocked='{blocked}',block_level='{block_level}',sql='{sql}',exception[{type(e)}]:'{str(e)}'") + print(f"ERROR: failed SQL query: reason='{reason}',blocker='{blocker}',blocked='{blocked}',block_level='{block_level}',exception[{type(e)}]:'{str(e)}'") sys.exit(255) # DEBUG: print("DEBUG: EXIT!") @@ -1077,6 +1087,7 @@ def update_last_seen(blocker: str, blocked: str, block_level: str): if cursor.rowcount == 0: print("WARNING: Did not update any rows:", domain) + return except BaseException as e: print(f"ERROR: failed SQL query: last_seen='{last_seen}',blocker='{blocker}',blocked='{blocked}',block_level='{block_level}',exception[{type(e)}]:'{str(e)}'") @@ -1098,8 +1109,12 @@ def block_instance(blocker: str, blocked: str, reason: str, block_level: str): raise ValueError(f"Parameter 'blocked' cannot be empty") elif not validators.domain(blocked.split("/")[0]): raise ValueError(f"Bad blocked='{blocked}'") + elif is_blacklisted(blocker): + raise Exception(f"blocker='{blocker}' is blacklisted but function invoked") + elif is_blacklisted(blocked): + raise Exception(f"blocked='{blocked}' is blacklisted but function invoked") - print("INFO: New block:", blocker, blocked, reason, block_level, first_seen, last_seen) + print("INFO: New block:", blocker, blocked, reason, block_level) try: cursor.execute( "INSERT INTO blocks (blocker, blocked, reason, block_level, first_seen, last_seen) VALUES(?, ?, ?, ?, ?, ?)", @@ -1112,7 +1127,6 @@ def block_instance(blocker: str, blocked: str, reason: str, block_level: str): time.time() ), ) - except BaseException as e: print(f"ERROR: failed SQL query: blocker='{blocker}',blocked='{blocked}',reason='{reason}',block_level='{block_level}',exception[{type(e)}]:'{str(e)}'") sys.exit(255) @@ -1125,9 +1139,9 @@ def is_instance_registered(domain: str) -> bool: elif domain == "": raise ValueError(f"Parameter 'domain' cannot be empty") - # NOISY-DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + # NOISY-DEBUG: # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") if not is_cache_initialized("is_registered"): - # NOISY-DEBUG: print(f"DEBUG: Cache for 'is_registered' not initialized, fetching all rows ...") + # NOISY-DEBUG: # DEBUG: print(f"DEBUG: Cache for 'is_registered' not initialized, fetching all rows ...") try: cursor.execute("SELECT domain FROM instances") @@ -1140,7 +1154,7 @@ def is_instance_registered(domain: str) -> bool: # Is cache found? registered = is_cache_key_set("is_registered", domain) - # NOISY-DEBUG: print(f"DEBUG: registered='{registered}' - EXIT!") + # NOISY-DEBUG: # DEBUG: print(f"DEBUG: registered='{registered}' - EXIT!") return registered def add_instance(domain: str, origin: str, originator: str, path: str = None): @@ -1154,13 +1168,14 @@ def add_instance(domain: str, origin: str, originator: str, path: str = None): raise ValueError(f"originator[]={type(originator)} is not 'str'") elif originator == "": raise ValueError(f"originator cannot be empty") - - # DEBUG: print("DEBUG: domain,origin,originator,path:", domain, origin, originator, path) - if not validators.domain(domain.split("/")[0]): + elif not validators.domain(domain.split("/")[0]): raise ValueError(f"Bad domain name='{domain}'") elif origin is not None and not validators.domain(origin.split("/")[0]): raise ValueError(f"Bad origin name='{origin}'") + elif is_blacklisted(domain): + raise Exception(f"domain='{domain}' is blacklisted, but method invoked") + # DEBUG: print("DEBUG: domain,origin,originator,path:", domain, origin, originator, path) software = determine_software(domain, path) # DEBUG: print("DEBUG: Determined software:", software) @@ -1252,7 +1267,7 @@ def get_mastodon_blocks(domain: str) -> dict: try: doc = bs4.BeautifulSoup( - reqto.get(f"https://{domain}/about/more", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])).text, + reqto.get(f"https://{domain}/about", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])).text, "html.parser", ) except BaseException as e: diff --git a/fetch_blocks.py b/fetch_blocks.py index 4702736..c72d56e 100755 --- a/fetch_blocks.py +++ b/fetch_blocks.py @@ -22,6 +22,7 @@ import time import bs4 import itertools import re +import validators import fba fba.cursor.execute( @@ -31,10 +32,10 @@ fba.cursor.execute( rows = fba.cursor.fetchall() print(f"INFO: Checking {len(rows)} entries ...") for blocker, software, origin, nodeinfo_url in rows: - # NOISY-DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url) + # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url) blockdict = [] blocker = fba.tidyup(blocker) - # NOISY-DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software) + # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software) if blocker == "": print("WARNING: blocker is now empty!") @@ -43,7 +44,7 @@ for blocker, software, origin, nodeinfo_url in rows: print(f"WARNING: blocker='{blocker}' is blacklisted now!") continue - # NOISY-DEBUG: print(f"DEBUG: blocker='{blocker}'") + # DEBUG: print(f"DEBUG: blocker='{blocker}'") fba.update_last_blocked(blocker) if software == "pleroma": @@ -55,13 +56,13 @@ for blocker, software, origin, nodeinfo_url in rows: print("WARNING: Could not fetch nodeinfo from blocker:", blocker) continue - print("DEBUG: Updating nodeinfo:", blocker) + # DEBUG: print("DEBUG: Updating nodeinfo:", blocker) fba.update_last_nodeinfo(blocker) federation = json["metadata"]["federation"] if "enabled" in federation: - # NOISY-DEBUG: print("DEBUG: Instance has no block list to analyze:", blocker) + # DEBUG: print("DEBUG: Instance has no block list to analyze:", blocker) continue if "mrf_simple" in federation: @@ -69,38 +70,43 @@ for blocker, software, origin, nodeinfo_url in rows: {**federation["mrf_simple"], **{"quarantined_instances": federation["quarantined_instances"]}} ).items(): - # NOISY-DEBUG: print("DEBUG: block_level, blocks():", block_level, len(blocks)) + # DEBUG: print("DEBUG: block_level, blocks():", block_level, len(blocks)) block_level = fba.tidyup(block_level) - # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level) + # DEBUG: print("DEBUG: BEFORE block_level:", block_level) if block_level == "": print("WARNING: block_level is now empty!") continue for blocked in blocks: - # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked) + # DEBUG: print("DEBUG: BEFORE blocked:", blocked) blocked = fba.tidyup(blocked) - # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked) + # DEBUG: print("DEBUG: AFTER blocked:", blocked) if blocked == "": print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level) continue - - if blocked.count("*") > 1: + elif fba.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue + elif blocked.count("*") > 1: # -ACK!-oma also started obscuring domains without hash fba.cursor.execute( "SELECT domain, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] ) searchres = fba.cursor.fetchone() - # NOISY-DEBUG: print("DEBUG: searchres[]:", type(searchres)) + # DEBUG: print("DEBUG: searchres[]:", type(searchres)) if searchres != None: blocked = searchres[0] nodeinfo_url = searchres[1] - # NOISY-DEBUG: print("DEBUG: Looked up domain:", blocked) + # DEBUG: print("DEBUG: Looked up domain:", blocked) + elif not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}' is not a valid domai name - skipped!") + continue - # NOISY-DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) if not fba.is_instance_registered(blocked): - # NOISY-DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") fba.add_instance(blocked, blocker, origin, nodeinfo_url) fba.cursor.execute( @@ -113,47 +119,50 @@ for blocker, software, origin, nodeinfo_url in rows: ) if fba.cursor.fetchone() == None: - # NOISY-DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level) + # DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level) fba.block_instance(blocker, blocked, "unknown", block_level) if block_level == "reject": - # NOISY-DEBUG: print("DEBUG: Adding to blockdict:", blocked) + # DEBUG: print("DEBUG: Adding to blockdict:", blocked) blockdict.append( { "blocked": blocked, "reason" : None }) else: - # NOISY-DEBUG: print("DEBUG: Updating last_seen:", blocker, blocked, block_level) + # DEBUG: print("DEBUG: Updating last_seen:", blocker, blocked, block_level) fba.update_last_seen(blocker, blocked, block_level) fba.connection.commit() # Reasons if "mrf_simple_info" in federation: - # NOISY-DEBUG: print("DEBUG: Found mrf_simple_info:", blocker) + # DEBUG: print("DEBUG: Found mrf_simple_info:", blocker) for block_level, info in ( {**federation["mrf_simple_info"], **(federation["quarantined_instances_info"] if "quarantined_instances_info" in federation else {})} ).items(): - # NOISY-DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items())) + # DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items())) block_level = fba.tidyup(block_level) - # NOISY-DEBUG: print("DEBUG: BEFORE block_level:", block_level) + # DEBUG: print("DEBUG: BEFORE block_level:", block_level) if block_level == "": print("WARNING: block_level is now empty!") continue for blocked, reason in info.items(): - # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked) + # DEBUG: print("DEBUG: BEFORE blocked:", blocked) blocked = fba.tidyup(blocked) - # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked) + # DEBUG: print("DEBUG: AFTER blocked:", blocked) if blocked == "": print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level) continue + elif fba.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue elif blocked.count("*") > 1: # same domain guess as above, but for reasons field fba.cursor.execute( @@ -165,18 +174,21 @@ for blocker, software, origin, nodeinfo_url in rows: blocked = searchres[0] origin = searchres[1] nodeinfo_url = searchres[2] + elif not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}' is not a valid domai name - skipped!") + continue - # NOISY-DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) if not fba.is_instance_registered(blocked): - # NOISY-DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") fba.add_instance(blocked, blocker, origin, nodeinfo_url) - # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"]) + # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"]) fba.update_block_reason(reason["reason"], blocker, blocked, block_level) for entry in blockdict: if entry["blocked"] == blocked: - # NOISY-DEBUG: print("DEBUG: Updating entry reason:", blocked) + # DEBUG: print("DEBUG: Updating entry reason:", blocked) entry["reason"] = reason["reason"] fba.connection.commit() @@ -195,23 +207,23 @@ for blocker, software, origin, nodeinfo_url in rows: } # handling CSRF, I've saw at least one server requiring it to access the endpoint - # NOISY-DEBUG: print("DEBUG: Fetching meta:", blocker) + # DEBUG: print("DEBUG: Fetching meta:", blocker) meta = bs4.BeautifulSoup( - reqto.get(f"https://{blocker}/about", headers=fba.headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).text, + reqto.get(f"https://{blocker}/", headers=fba.headers, timeout=(fba.config["connection_timeout"], fba.config["read_timeout"])).text, "html.parser", ) try: csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"] - # NOISY-DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf) + # DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf) reqheaders = {**fba.api_headers, **{"X-CSRF-Token": csrf}} - except: - # NOISY-DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker) + except BaseException as e: + # DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker, e) reqheaders = fba.api_headers - # NOISY-DEBUG: print("DEBUG: Quering API domain_blocks:", blocker) - blocks = reqto.get(f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json() + # DEBUG: print("DEBUG: Querying API domain_blocks:", blocker) + blocks = reqto.get(f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=(fba.config["connection_timeout"], fba.config["read_timeout"])).json() - # NOISY-DEBUG: print("DEBUG: blocks():", len(blocks)) + print(f"INFO: Checking {len(blocks)} entries from blocker='{blocker}' ...") for block in blocks: entry = { 'domain': block['domain'], @@ -219,7 +231,7 @@ for blocker, software, origin, nodeinfo_url in rows: 'reason': block['comment'] } - # NOISY-DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment']) + # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment']) if block['severity'] == 'suspend': json['reject'].append(entry) elif block['severity'] == 'silence': @@ -230,38 +242,32 @@ for blocker, software, origin, nodeinfo_url in rows: json['report_removal'].append(entry) else: print("WARNING: Unknown severity:", block['severity'], block['domain']) - except: - # NOISY-DEBUG: print("DEBUG: Failed, Trying mastodon-specific fetches:", blocker) + except BaseException as e: + # DEBUG: print(f"DEBUG: Failed, trying mastodon-specific fetches: blocker='{blocker}',exception[{type(e)}]={str(e)}") json = fba.get_mastodon_blocks(blocker) - # NOISY-DEBUG: print("DEBUG: json.items():", blocker, len(json.items())) + print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}' ...") for block_level, blocks in json.items(): - # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks)) + # DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks)) block_level = fba.tidyup(block_level) - # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level) + # DEBUG: print("DEBUG: AFTER-block_level:", block_level) if block_level == "": print("WARNING: block_level is empty, blocker:", blocker) continue for instance in blocks: blocked, blocked_hash, reason = instance.values() - # NOISY-DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason) + # DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason) blocked = fba.tidyup(blocked) - # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked) + # DEBUG: print("DEBUG: AFTER-blocked:", blocked) if blocked == "": print("WARNING: blocked is empty:", blocker) continue - elif blocked.count("*") < 1: - # No obsfucation for this instance - fba.cursor.execute( - "SELECT hash FROM instances WHERE domain = ? LIMIT 1", [blocked] - ) - - if fba.cursor.fetchone() == None: - # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) - fba.add_instance(blocked, blocker, origin, nodeinfo_url) - else: + elif fba.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue + elif blocked.count("*") > 0: # Doing the hash search for instance names as well to tidy up DB fba.cursor.execute( "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash] @@ -269,15 +275,23 @@ for blocker, software, origin, nodeinfo_url in rows: searchres = fba.cursor.fetchone() if searchres != None: - # NOISY-DEBUG: print("DEBUG: Updating domain: ", searchres[0]) + # DEBUG: print("DEBUG: Updating domain: ", searchres[0]) blocked = searchres[0] origin = searchres[1] nodeinfo_url = searchres[2] - # NOISY-DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) if not fba.is_instance_registered(blocked): - # NOISY-DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") fba.add_instance(blocked, blocker, origin, nodeinfo_url) + elif not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}' is not a valid domai name - skipped!") + continue + + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + if not fba.is_instance_registered(blocked): + # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) + fba.add_instance(blocked, blocker, origin, nodeinfo_url) fba.cursor.execute( "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1", @@ -289,6 +303,7 @@ for blocker, software, origin, nodeinfo_url in rows: ) if fba.cursor.fetchone() == None: + # DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level) fba.block_instance(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, reason, block_level) if block_level == "reject": @@ -301,7 +316,7 @@ for blocker, software, origin, nodeinfo_url in rows: fba.update_last_seen(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level) if reason != "": - # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason) + # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason) fba.update_block_reason(reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level) fba.connection.commit() @@ -321,23 +336,27 @@ for blocker, software, origin, nodeinfo_url in rows: print("WARNING: takahe is not fully supported for fetching blacklist!", blocker) #json = fba.get_takahe_blocks(blocker) + print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}' ...") for block_level, blocks in json.items(): - # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks)) + # DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks)) block_level = fba.tidyup(block_level) - # NOISY-DEBUG: print("DEBUG: AFTER-block_level:", block_level) + # DEBUG: print("DEBUG: AFTER-block_level:", block_level) if block_level == "": print("WARNING: block_level is empty, blocker:", blocker) continue for instance in blocks: blocked, reason = instance.values() - # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked) + # DEBUG: print("DEBUG: BEFORE blocked:", blocked) blocked = fba.tidyup(blocked) - # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked) + # DEBUG: print("DEBUG: AFTER blocked:", blocked) if blocked == "": print("WARNING: blocked is empty:", blocker) continue + elif fba.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue elif blocked.count("*") > 0: # Some friendica servers also obscure domains without hash fba.cursor.execute( @@ -350,8 +369,7 @@ for blocker, software, origin, nodeinfo_url in rows: blocked = searchres[0] origin = searchres[1] nodeinfo_url = searchres[2] - - if blocked.count("?") > 0: + elif blocked.count("?") > 0: # Some obscure them with question marks, not sure if that's dependent on version or not fba.cursor.execute( "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")] @@ -361,14 +379,17 @@ for blocker, software, origin, nodeinfo_url in rows: blocked = searchres[0] origin = searchres[1] nodeinfo_url = searchres[2] + elif not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}' is not a valid domai name - skipped!") + continue - # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked) + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) if not fba.is_instance_registered(blocked): - # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) + # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) fba.add_instance(blocked, blocker, origin, nodeinfo_url) fba.cursor.execute( - "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?", + "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? LIMIT 1", (blocker, blocked), ) @@ -385,7 +406,7 @@ for blocker, software, origin, nodeinfo_url in rows: fba.update_last_seen(blocker, blocked, block_level) if reason != '': - # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason) + # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason) fba.update_block_reason(reason, blocker, blocked, block_level) fba.connection.commit() @@ -395,23 +416,26 @@ for blocker, software, origin, nodeinfo_url in rows: print("INFO: blocker:", blocker) try: # Blocks - federation = reqto.get(f"https://{blocker}{get_peers_url}?filter=suspended", headers=fba.api_headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json() + federation = reqto.get(f"https://{blocker}{fba.get_peers_url}?filter=suspended", headers=fba.api_headers, timeout=(fba.config["connection_timeout"], fba.config["read_timeout"])).json() if (federation == None): print("WARNING: No valid response:", blocker); elif "error" in federation: print("WARNING: API returned error:", federation["error"]) else: - # NOISY-DEBUG: print("DEBUG: Checking fenderation():", len(federation)) + print(f"INFO: Checking {len(federation)} entries from blocker='{blocker}' ...") for peer in federation: blocked = peer["domain"].lower() - # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked) + # DEBUG: print("DEBUG: BEFORE blocked:", blocked) blocked = fba.tidyup(blocked) - # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked) + # DEBUG: print("DEBUG: AFTER blocked:", blocked) if blocked == "": print("WARNING: blocked is empty:", blocker) continue + elif fba.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue elif blocked.count("*") > 0: # GTS does not have hashes for obscured domains, so we have to guess it fba.cursor.execute( @@ -423,9 +447,13 @@ for blocker, software, origin, nodeinfo_url in rows: blocked = searchres[0] origin = searchres[1] nodeinfo_url = searchres[2] + elif not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}' is not a valid domai name - skipped!") + continue + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) if not fba.is_instance_registered(blocked): - # NOISY-DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") fba.add_instance(blocked, blocker, origin, nodeinfo_url) fba.cursor.execute( @@ -438,7 +466,7 @@ for blocker, software, origin, nodeinfo_url in rows: ) if fba.cursor.fetchone() == None: - # NOISY-DEBUG: print(f"DEBUG: blocker='{blocker}' is blocking '{blocked}' for unknown reason at this point") + # DEBUG: print(f"DEBUG: blocker='{blocker}' is blocking '{blocked}' for unknown reason at this point") fba.block_instance(blocker, blocked, "unknown", "reject") blockdict.append( @@ -450,12 +478,12 @@ for blocker, software, origin, nodeinfo_url in rows: fba.update_last_seen(blocker, blocked, "reject") if "public_comment" in peer: - # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, peer["public_comment"]) + # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, peer["public_comment"]) fba.update_block_reason(peer["public_comment"], blocker, blocked, "reject") for entry in blockdict: if entry["blocked"] == blocked: - # NOISY-DEBUG: print(f"DEBUG: Setting block reason for blocked='{blocked}':'{peer['public_comment']}'") + # DEBUG: print(f"DEBUG: Setting block reason for blocked='{blocked}':'{peer['public_comment']}'") entry["reason"] = peer["public_comment"] fba.connection.commit() -- 2.39.5