From c92c41f9a988e8c09219e8b05e82677e77d6fd87 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Mon, 12 Jun 2023 18:09:07 +0200 Subject: [PATCH] Continued: - duplicated some code for when no "mrf_simple" is returned but only "quarantined_instances" --- fba/networks/mastodon.py | 2 +- fba/networks/pleroma.py | 173 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 173 insertions(+), 2 deletions(-) diff --git a/fba/networks/mastodon.py b/fba/networks/mastodon.py index 04a935b..6fa4605 100644 --- a/fba/networks/mastodon.py +++ b/fba/networks/mastodon.py @@ -95,7 +95,7 @@ def fetch_blocks_from_about(domain: str) -> dict: # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'") if doc is None: - print(f"WARNING: Cannot find any 'h3' tags for domain='{domain}' - EXIT!") + print(f"WARNING: Cannot fetch any /about pages for domain='{domain}' - EXIT!") return blocklist for header in doc.find_all("h3"): diff --git a/fba/networks/pleroma.py b/fba/networks/pleroma.py index 9b70245..e38041c 100644 --- a/fba/networks/pleroma.py +++ b/fba/networks/pleroma.py @@ -15,6 +15,7 @@ # along with this program. If not, see . import inspect + import validators from fba import blacklist @@ -45,6 +46,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): blockdict = list() rows = None try: + # DEBUG: print(f"DEBUG: Fetching nodeinfo: domain='{domain}',nodeinfo_url='{nodeinfo_url}'") rows = federation.fetch_nodeinfo(domain, nodeinfo_url) except network.exceptions as exception: print(f"WARNING: Exception '{type(exception)}' during fetching nodeinfo") @@ -99,6 +101,19 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): # Obscured domain name with no hash row = instances.deobscure("*", blocked) + # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + if row is None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") + continue + + # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'") + blocked = row[0] + origin = row[1] + nodeinfo_url = row[2] + elif blocked.count("?") > 0: + # Obscured domain name with no hash + row = instances.deobscure("?", blocked) + # DEBUG: print(f"DEBUG: row[]='{type(row)}'") if row is None: print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") @@ -136,6 +151,77 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): else: # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...") blocks.update_last_seen(domain, blocked, block_level) + elif "quarantined_instances" in data: + # DEBUG: print(f"DEBUG: Found 'quarantined_instances' in JSON response: domain='{domain}'") + block_level = "quarantined" + + for blocked in data["quarantined_instances"]: + # DEBUG: print("DEBUG: BEFORE blocked:", blocked) + blocked = tidyup.domain(blocked) + # DEBUG: print("DEBUG: AFTER blocked:", blocked) + + if blocked == "": + print("WARNING: blocked is empty after tidyup.domain():", domain, block_level) + continue + elif blacklist.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue + elif blocked.count("*") > 0: + # Obscured domain name with no hash + row = instances.deobscure("*", blocked) + + # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + if row is None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") + continue + + # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'") + blocked = row[0] + origin = row[1] + nodeinfo_url = row[2] + elif blocked.count("?") > 0: + # Obscured domain name with no hash + row = instances.deobscure("?", blocked) + + # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + if row is None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") + continue + + # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'") + blocked = row[0] + origin = row[1] + nodeinfo_url = row[2] + + # DEBUG: print(f"DEBUG: blocked='{blocked}'") + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!") + continue + elif blocked.endswith(".arpa"): + print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") + continue + elif not instances.is_registered(blocked): + # Commit changes + fba.connection.commit() + + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) + + if not blocks.is_instance_blocked(domain, blocked, block_level): + # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level) + blocks.add_instance(domain, blocked, "unknown", block_level) + + if block_level == "reject": + # DEBUG: print("DEBUG: Adding to blockdict:", blocked) + blockdict.append({ + "blocked": blocked, + "reason" : None + }) + else: + # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...") + blocks.update_last_seen(domain, blocked, block_level) + else: + print(f"WARNING: Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='{domain}'") # DEBUG: print("DEBUG: Committing changes ...") fba.connection.commit() @@ -183,6 +269,19 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): # Obscured domain name with no hash row = instances.deobscure("*", blocked) + # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + if row is None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") + continue + + # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'") + blocked = row[0] + origin = row[1] + nodeinfo_url = row[2] + elif blocked.count("?") > 0: + # Obscured domain name with no hash + row = instances.deobscure("?", blocked) + # DEBUG: print(f"DEBUG: row[]='{type(row)}'") if row is None: print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") @@ -201,7 +300,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") continue elif not instances.is_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodein + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) # DEBUG: print(f"DEBUG: Updating block reason: reason='{reason}',domain='{domain}',blocked='{blocked}',block_level='{block_level}'") @@ -212,6 +311,78 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): if entry["blocked"] == blocked: # DEBUG: print(f"DEBUG: Updating entry reason: blocked='{blocked}',reason='{reason}'") entry["reason"] = reason + elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]: + # DEBUG: print(f"DEBUG: Found 'quarantined_instances_info' in JSON response: domain='{domain}'") + block_level = "quarantined" + + #print(data["quarantined_instances_info"]) + rows = data["quarantined_instances_info"]["quarantined_instances"] + for blocked in rows: + # DEBUG: print("DEBUG: BEFORE blocked:", blocked) + blocked = tidyup.domain(blocked) + # DEBUG: print("DEBUG: AFTER blocked:", blocked) + + if blocked not in rows or "reason" not in rows[blocked]: + print(f"WARNING: Cannot find blocked='{blocked}' in rows()={len(rows)},domain='{domain}'") + break + + reason = rows[blocked]["reason"] + # DEBUG: print(f"DEBUG: reason='{reason}'") + + if blocked == "": + print("WARNING: blocked is empty after tidyup.domain():", domain, block_level) + continue + elif blacklist.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue + elif blocked.count("*") > 0: + # Obscured domain name with no hash + row = instances.deobscure("*", blocked) + + # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + if row is None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") + continue + + # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'") + blocked = row[0] + origin = row[1] + nodeinfo_url = row[2] + elif blocked.count("?") > 0: + # Obscured domain name with no hash + row = instances.deobscure("?", blocked) + + # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + if row is None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") + continue + + # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'") + blocked = row[0] + origin = row[1] + nodeinfo_url = row[2] + + # DEBUG: print(f"DEBUG: blocked='{blocked}'") + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!") + continue + elif blocked.endswith(".arpa"): + print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") + continue + elif not instances.is_registered(blocked): + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) + + # DEBUG: print(f"DEBUG: Updating block reason: reason='{reason}',domain='{domain}',blocked='{blocked}',block_level='{block_level}'") + blocks.update_reason(reason, domain, blocked, block_level) + + # DEBUG: print(f"DEBUG: blockdict()={len(blockdict)}") + for entry in blockdict: + if entry["blocked"] == blocked: + # DEBUG: print(f"DEBUG: Updating entry reason: blocked='{blocked}',reason='{reason}'") + entry["reason"] = reason + else: + print(f"WARNING: Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='{domain}'") fba.connection.commit() # DEBUG: print("DEBUG: EXIT!") -- 2.39.5