From 2d589b91e9eb2a98c5861e8bd1dbbb45e38b2af8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Wed, 14 Jun 2023 00:43:29 +0200 Subject: [PATCH] Continued: - reformatted ({}) block - commented out debug line - converted print("", foo, bar) to masked (f) version - fixed logic bug when not all needed steps were executed --- fba/networks/mastodon.py | 57 ++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 26 deletions(-) diff --git a/fba/networks/mastodon.py b/fba/networks/mastodon.py index 3f2de7c..cdfccc8 100644 --- a/fba/networks/mastodon.py +++ b/fba/networks/mastodon.py @@ -112,13 +112,11 @@ def fetch_blocks_from_about(domain: str) -> dict: if header_text in blocklist or header_text.lower() in blocklist: # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu for line in header.find_all_next("table")[0].find_all("tr")[1:]: - blocklist[header_text].append( - { - "domain": tidyup.domain(line.find("span").text), - "hash" : tidyup.domain(line.find("span")["title"][9:]), - "reason": tidyup.reason(line.find_all("td")[1].text), - } - ) + blocklist[header_text].append({ + "domain": tidyup.domain(line.find("span").text), + "hash" : tidyup.domain(line.find("span")["title"][9:]), + "reason": tidyup.reason(line.find_all("td")[1].text), + }) else: print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}") @@ -193,7 +191,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): # Check type # DEBUG: print(f"DEBUG: block[]='{type(block)}'") if not isinstance(block, dict): - print(f"DEBUG: block[]='{type(block)}' is of type 'dict' - SKIPPED!") + # DEBUG: print(f"DEBUG: block[]='{type(block)}' is of type 'dict' - SKIPPED!") continue # Map block -> entry @@ -218,7 +216,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") rows['report_removal'].append(entry) else: - print("WARNING: Unknown severity:", block['severity'], block['domain']) + print(f"WARNING: Unknown severity='{block['severity']}', domain='{block['domain']}'") else: # DEBUG: print(f"DEBUG: domain='{domain}' has returned zero rows, trying /about/more page ...") rows = fetch_blocks_from_about(domain) @@ -261,20 +259,30 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): blocked = row[0] origin = row[1] nodeinfo_url = row[2] + elif blocked.count("?") > 0: + # Doing the hash search for instance names as well to tidy up DB + row = instances.deobscure("?", blocked, blocked_hash) - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!") - continue - elif blocked.endswith(".arpa"): - print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") + # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + if row is None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!") continue - elif not instances.is_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") - instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) - elif not validators.domain(blocked): + + # DEBUG: print("DEBUG: Updating domain: ", row[0]) + blocked = row[0] + origin = row[1] + nodeinfo_url = row[2] + + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + if not validators.domain(blocked): print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!") continue + elif blocked.endswith(".arpa"): + print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") + continue + elif not instances.is_registered(blocked): + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) if not validators.domain(blocked): @@ -287,12 +295,9 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain) instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) - blocking = blocked if blocked.count("*") <= 1 else blocked_hash - # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'") - if not blocks.is_instance_blocked(domain, blocked, block_level): # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level) - blocks.add_instance(domain, blocking, reason, block_level) + blocks.add_instance(domain, blocked, reason, block_level) if block_level == "reject": found_blocks.append({ @@ -300,9 +305,9 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): "reason" : reason }) else: - # DEBUG: print(f"DEBUG: Updating block last seen and reason for domain='{domain}',blocking='{blocking}' ...") - blocks.update_last_seen(domain, blocking, block_level) - blocks.update_reason(reason, domain, blocking, block_level) + # DEBUG: print(f"DEBUG: Updating block last seen and reason for domain='{domain}',blocked='{blocked}' ...") + blocks.update_last_seen(domain, blocked, block_level) + blocks.update_reason(reason, domain, blocked, block_level) # DEBUG: print("DEBUG: Committing changes ...") fba.connection.commit() -- 2.39.5