From d30a841e8f6a7217da892b0771add5d7b4ffafb2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Tue, 13 Jun 2023 00:01:25 +0200 Subject: [PATCH] Continued: - moved blocks, instances to new package fba.models as these modules are only around a database table with the same name - movved+renamed fba.log_error() to fba.models.error_log.add() - pleroma has a /instance/about/index.html "API" which can be parsed for block lists if the JSON has returned no 'mrf_simple' and 'quarantined_instances' --- fba/__init__.py | 3 +- fba/commands.py | 5 +- fba/fba.py | 41 +------ fba/federation.py | 3 +- fba/helpers/tidyup.py | 3 - fba/models/__init__.py | 20 ++++ fba/{ => models}/blocks.py | 4 +- fba/models/error_log.py | 55 ++++++++++ fba/{ => models}/instances.py | 4 +- fba/network.py | 3 +- fba/networks/friendica.py | 3 +- fba/networks/lemmy.py | 3 +- fba/networks/mastodon.py | 21 ++-- fba/networks/misskey.py | 15 ++- fba/networks/peertube.py | 3 +- fba/networks/pleroma.py | 199 +++++++++++++++++++++++++++++++--- 16 files changed, 300 insertions(+), 85 deletions(-) create mode 100644 fba/models/__init__.py rename fba/{ => models}/blocks.py (98%) create mode 100644 fba/models/error_log.py rename fba/{ => models}/instances.py (99%) diff --git a/fba/__init__.py b/fba/__init__.py index 0230fa3..ceb63ed 100644 --- a/fba/__init__.py +++ b/fba/__init__.py @@ -15,7 +15,6 @@ __all__ = [ 'blacklist', - 'blocks', 'boot', 'commands', 'config', @@ -23,8 +22,8 @@ __all__ = [ 'federation', 'fba', 'helpers', - 'instances', 'locking', + 'model', 'network', 'networks', ] diff --git a/fba/commands.py b/fba/commands.py index 2c50c68..ad5b7ca 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -27,16 +27,17 @@ import reqto import validators from fba import blacklist -from fba import blocks from fba import config from fba import federation from fba import fba -from fba import instances from fba import locking from fba import network from fba.helpers import tidyup +from fba.models import blocks +from fba.models import instances + from fba.networks import friendica from fba.networks import mastodon from fba.networks import misskey diff --git a/fba/fba.py b/fba/fba.py index 6273e45..3cfc925 100644 --- a/fba/fba.py +++ b/fba/fba.py @@ -14,9 +14,7 @@ # along with this program. If not, see . import hashlib -import json import sqlite3 -import time from urllib.parse import urlparse @@ -24,11 +22,11 @@ import requests import validators from fba import blacklist -from fba import config from fba import federation -from fba import instances from fba import network +from fba.models import instances + # Connect to database connection = sqlite3.connect("blocks.db") cursor = connection.cursor() @@ -47,41 +45,6 @@ def get_hash(domain: str) -> str: return hashlib.sha256(domain.encode("utf-8")).hexdigest() -def log_error(domain: str, error: dict): - # DEBUG: print("DEBUG: domain,error[]:", domain, type(error)) - if not isinstance(domain, str): - raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") - elif domain == "": - raise ValueError("Parameter 'domain' is empty") - elif config.get("write_error_log").lower() != "true": - # DEBUG: print(f"DEBUG: Writing to error_log is disabled in configuruation file - EXIT!") - return - - # DEBUG: print("DEBUG: BEFORE error[]:", type(error)) - if isinstance(error, BaseException, error, json.decoder.JSONDecodeError): - error = f"error[{type(error)}]='{str(error)}'" - - # DEBUG: print("DEBUG: AFTER error[]:", type(error)) - if isinstance(error, str): - cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, 999, ?, ?)",[ - domain, - error, - time.time() - ]) - else: - cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, ?, ?, ?)",[ - domain, - error["status_code"], - error["error_message"], - time.time() - ]) - - # Cleanup old entries - # DEBUG: print(f"DEBUG: Purging old records (distance: {config.get('error_log_cleanup')})") - cursor.execute("DELETE FROM error_log WHERE created < ?", [time.time() - config.get("error_log_cleanup")]) - - # DEBUG: print("DEBUG: EXIT!") - def fetch_url(url: str, headers: dict, timeout: tuple) -> requests.models.Response: # DEBUG: print(f"DEBUG: url='{url}',headers()={len(headers)},timeout={timeout} - CALLED!") if not isinstance(url, str): diff --git a/fba/federation.py b/fba/federation.py index 54157b6..9e0922d 100644 --- a/fba/federation.py +++ b/fba/federation.py @@ -19,12 +19,13 @@ import validators from fba import blacklist from fba import config from fba import csrf -from fba import instances from fba import network from fba.helpers import tidyup from fba.helpers import version +from fba.models import instances + from fba.networks import lemmy from fba.networks import misskey from fba.networks import peertube diff --git a/fba/helpers/tidyup.py b/fba/helpers/tidyup.py index b784eeb..32f3e9e 100644 --- a/fba/helpers/tidyup.py +++ b/fba/helpers/tidyup.py @@ -23,9 +23,6 @@ def reason(string: str) -> str: # Strip string string = string.strip() - # Replace â with " - string = re.sub("â", "\"", string) - # DEBUG: print(f"DEBUG: string='{string}' - EXIT!") return string diff --git a/fba/models/__init__.py b/fba/models/__init__.py new file mode 100644 index 0000000..bc2afbe --- /dev/null +++ b/fba/models/__init__.py @@ -0,0 +1,20 @@ +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +__all__ = [ + 'blocks', + 'error_log', + 'instances', +] diff --git a/fba/blocks.py b/fba/models/blocks.py similarity index 98% rename from fba/blocks.py rename to fba/models/blocks.py index d9ab70d..8b6c745 100644 --- a/fba/blocks.py +++ b/fba/models/blocks.py @@ -156,10 +156,10 @@ def add_instance(blocker: str, blocked: str, reason: str, block_level: str): # Maybe needs cleaning reason = tidyup.reason(reason) - print(f"INFO: New block: blocker='{blocker}',blocked='{blocked}', reason='{reason}', block_level='{block_level}'") + print(f"INFO: New block: blocker='{blocker}',blocked='{blocked}',reason='{reason}',block_level='{block_level}'") try: fba.cursor.execute( - "INSERT INTO blocks (blocker, blocked, reason, block_level, first_seen, last_seen) VALUES(?, ?, ?, ?, ?, ?)", + "INSERT INTO blocks (blocker, blocked, reason, block_level, first_seen, last_seen) VALUES (?, ?, ?, ?, ?, ?)", ( blocker, blocked, diff --git a/fba/models/error_log.py b/fba/models/error_log.py new file mode 100644 index 0000000..6602406 --- /dev/null +++ b/fba/models/error_log.py @@ -0,0 +1,55 @@ +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import json +import time + +from fba import config +from fba import fba + +def add(domain: str, error: dict): + # DEBUG: print("DEBUG: domain,error[]:", domain, type(error)) + if not isinstance(domain, str): + raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") + elif domain == "": + raise ValueError("Parameter 'domain' is empty") + elif config.get("write_error_log").lower() != "true": + # DEBUG: print(f"DEBUG: Writing to error_log is disabled in configuruation file - EXIT!") + return + + # DEBUG: print("DEBUG: BEFORE error[]:", type(error)) + if isinstance(error, BaseException, error, json.decoder.JSONDecodeError): + error = f"error[{type(error)}]='{str(error)}'" + + # DEBUG: print("DEBUG: AFTER error[]:", type(error)) + if isinstance(error, str): + fba.cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, 999, ?, ?)",[ + domain, + error, + time.time() + ]) + else: + fba.cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, ?, ?, ?)",[ + domain, + error["status_code"], + error["error_message"], + time.time() + ]) + + # Cleanup old entries + # DEBUG: print(f"DEBUG: Purging old records (distance: {config.get('error_log_cleanup')})") + fba.cursor.execute("DELETE FROM error_log WHERE created < ?", [time.time() - config.get("error_log_cleanup")]) + + # DEBUG: print("DEBUG: EXIT!") diff --git a/fba/instances.py b/fba/models/instances.py similarity index 99% rename from fba/instances.py rename to fba/models/instances.py index a6109df..99e61ec 100644 --- a/fba/instances.py +++ b/fba/models/instances.py @@ -29,6 +29,8 @@ from fba import network from fba.helpers import cache +from fba.models import error_log + # Found info from node, such as nodeinfo URL, detection mode that needs to be # written to database. Both arrays must be filled at the same time or else # update_data() will fail @@ -292,7 +294,7 @@ def update_last_error(domain: str, error: dict): # DEBUG: print(f"DEBUG: Invoking update_data({domain}) ...") update_data(domain) - fba.log_error(domain, error) + error_log.add(domain, error) # DEBUG: print("DEBUG: EXIT!") diff --git a/fba/network.py b/fba/network.py index 5b87c90..a86d6d4 100644 --- a/fba/network.py +++ b/fba/network.py @@ -20,7 +20,8 @@ import requests from fba import config from fba import fba -from fba import instances + +from fba.models import instances # HTTP headers for non-API requests web_headers = { diff --git a/fba/networks/friendica.py b/fba/networks/friendica.py index dc63120..6941dab 100644 --- a/fba/networks/friendica.py +++ b/fba/networks/friendica.py @@ -17,11 +17,12 @@ import bs4 from fba import config -from fba import instances from fba import network from fba.helpers import tidyup +from fba.models import instances + def fetch_blocks(domain: str) -> dict: # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") if not isinstance(domain, str): diff --git a/fba/networks/lemmy.py b/fba/networks/lemmy.py index cba46b1..8a0a962 100644 --- a/fba/networks/lemmy.py +++ b/fba/networks/lemmy.py @@ -17,9 +17,10 @@ from fba import config from fba import csrf from fba import federation -from fba import instances from fba import network +from fba.models import instances + def fetch_peers(domain: str) -> list: # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},software='lemmy' - CALLED!") if not isinstance(domain, str): diff --git a/fba/networks/mastodon.py b/fba/networks/mastodon.py index 6fa4605..6470292 100644 --- a/fba/networks/mastodon.py +++ b/fba/networks/mastodon.py @@ -20,15 +20,16 @@ import bs4 import validators from fba import blacklist -from fba import blocks from fba import config from fba import csrf from fba import fba -from fba import instances from fba import network from fba.helpers import tidyup +from fba.models import blocks +from fba.models import instances + language_mapping = { # English -> English "Silenced instances" : "Silenced servers", @@ -63,15 +64,8 @@ def fetch_blocks_from_about(domain: str) -> dict: raise ValueError("Parameter 'domain' is empty") # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain) - blocklist = { - "Suspended servers": [], - "Filtered media" : [], - "Limited servers" : [], - "Silenced servers" : [], - } - doc = None - for path in ("/about/more", "/about"): + for path in ["/about/more", "/about"]: try: # DEBUG: print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...") doc = bs4.BeautifulSoup( @@ -98,6 +92,13 @@ def fetch_blocks_from_about(domain: str) -> dict: print(f"WARNING: Cannot fetch any /about pages for domain='{domain}' - EXIT!") return blocklist + blocklist = { + "Suspended servers": [], + "Filtered media" : [], + "Limited servers" : [], + "Silenced servers" : [], + } + for header in doc.find_all("h3"): header_text = tidyup.reason(header.text) diff --git a/fba/networks/misskey.py b/fba/networks/misskey.py index 68e5396..1dedde7 100644 --- a/fba/networks/misskey.py +++ b/fba/networks/misskey.py @@ -19,12 +19,13 @@ import json from fba import blacklist from fba import config from fba import csrf -from fba import instances from fba import network from fba.helpers import dicts from fba.helpers import tidyup +from fba.models import instances + def fetch_peers(domain: str) -> list: # DEBUG: print(f"DEBUG: domain({len(domain)})={domain} - CALLED!") if not isinstance(domain, str): @@ -202,13 +203,11 @@ def fetch_blocks(domain: str) -> dict: # DEBUG: print(f"DEBUG: instance[{type(instance)}]='{instance}' - suspend") if "isSuspended" in instance and instance["isSuspended"] and not dicts.has_key(blocklist["suspended"], "domain", instance["host"]): count = count + 1 - blocklist["suspended"].append( - { - "domain": tidyup.domain(instance["host"]), - # no reason field, nothing - "reason": None - } - ) + blocklist["suspended"].append({ + "domain": tidyup.domain(instance["host"]), + # no reason field, nothing + "reason": None + }) # DEBUG: print(f"DEBUG: count={count}") if count == 0: diff --git a/fba/networks/peertube.py b/fba/networks/peertube.py index c25cf8c..793fb45 100644 --- a/fba/networks/peertube.py +++ b/fba/networks/peertube.py @@ -16,9 +16,10 @@ from fba import config from fba import csrf -from fba import instances from fba import network +from fba.models import instances + def fetch_peers(domain: str) -> list: print(f"DEBUG: domain({len(domain)})={domain},software='peertube' - CALLED!") if not isinstance(domain, str): diff --git a/fba/networks/pleroma.py b/fba/networks/pleroma.py index 6822495..b32c327 100644 --- a/fba/networks/pleroma.py +++ b/fba/networks/pleroma.py @@ -16,17 +16,25 @@ import inspect +import bs4 import validators from fba import blacklist -from fba import blocks +from fba import config from fba import fba from fba import federation -from fba import instances from fba import network from fba.helpers import tidyup +from fba.models import blocks +from fba.models import instances + +language_mapping = { + # English -> English + "Reject": "Suspended servers", +} + def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!") if not isinstance(domain, str): @@ -42,7 +50,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): elif nodeinfo_url == "": raise ValueError("Parameter 'nodeinfo_url' is empty") - # Blocks + # @TODO Unused blockdict blockdict = list() rows = None try: @@ -65,9 +73,11 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): instances.update_last_nodeinfo(domain) data = rows["metadata"]["federation"] + found = False if "mrf_simple" in data: # DEBUG: print("DEBUG: Found mrf_simple:", domain) + found = True for block_level, blocklist in ( { **data["mrf_simple"], @@ -145,14 +155,15 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): if block_level == "reject": # DEBUG: print("DEBUG: Adding to blockdict:", blocked) blockdict.append({ - "blocked": blocked, - "reason" : None + "blocked": blocked, + "reason" : None }) - else: - # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...") - blocks.update_last_seen(domain, blocked, block_level) + else: + # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...") + blocks.update_last_seen(domain, blocked, block_level) elif "quarantined_instances" in data: # DEBUG: print(f"DEBUG: Found 'quarantined_instances' in JSON response: domain='{domain}'") + found = True block_level = "quarantined" for blocked in data["quarantined_instances"]: @@ -214,12 +225,12 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): if block_level == "reject": # DEBUG: print("DEBUG: Adding to blockdict:", blocked) blockdict.append({ - "blocked": blocked, - "reason" : None + "blocked": blocked, + "reason" : None }) - else: - # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...") - blocks.update_last_seen(domain, blocked, block_level) + else: + # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...") + blocks.update_last_seen(domain, blocked, block_level) else: print(f"WARNING: Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='{domain}'") @@ -229,6 +240,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): # Reasons if "mrf_simple_info" in data: # DEBUG: print("DEBUG: Found mrf_simple_info:", domain) + found = True for block_level, info in ( { **data["mrf_simple_info"], @@ -314,6 +326,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]: # DEBUG: print(f"DEBUG: Found 'quarantined_instances_info' in JSON response: domain='{domain}'") + found = True block_level = "quarantined" #print(data["quarantined_instances_info"]) @@ -385,5 +398,165 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): else: print(f"WARNING: Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='{domain}'") + if not found: + # DEBUG: print(f"DEBUG: Did not find any useable JSON elements, domain='{domain}', continuing with /about page ...") + blocklist = fetch_blocks_from_about(domain) + + # DEBUG: print(f"DEBUG: blocklist()={len(blocklist)}") + if len(blocklist) > 0: + print(f"INFO: Checking {len(blocklist)} record(s) ...") + for block_level in blocklist: + # DEBUG: print(f"DEBUG: block_level='{block_level}'") + rows = blocklist[block_level] + # DEBUG: print(f"DEBUG: rows['{type(rows)}]()={len(rows)}'") + for record in rows: + # DEBUG: print(f"DEBUG: record[]='{type(record)}'") + blocked = tidyup.domain(record["blocked"]) + reason = tidyup.reason(record["reason"]) + # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!") + + if blocked == "": + print("WARNING: blocked is empty after tidyup.domain():", domain, block_level) + continue + elif blacklist.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue + elif blocked.count("*") > 0: + # Obscured domain name with no hash + row = instances.deobscure("*", blocked) + + # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + if row is None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") + continue + + # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'") + blocked = row[0] + origin = row[1] + nodeinfo_url = row[2] + elif blocked.count("?") > 0: + # Obscured domain name with no hash + row = instances.deobscure("?", blocked) + + # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + if row is None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") + continue + + # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'") + blocked = row[0] + origin = row[1] + nodeinfo_url = row[2] + + # DEBUG: print(f"DEBUG: blocked='{blocked}'") + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!") + continue + elif blocked.endswith(".arpa"): + print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") + continue + elif not instances.is_registered(blocked): + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) + + if not blocks.is_instance_blocked(domain, blocked, block_level): + # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level) + blocks.add_instance(domain, blocked, reason, block_level) + + if block_level == "reject": + # DEBUG: print("DEBUG: Adding to blockdict:", blocked) + blockdict.append({ + "blocked": blocked, + "reason" : reason + }) + else: + # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...") + blocks.update_reason(reason, domain, blocked, block_level) + fba.connection.commit() # DEBUG: print("DEBUG: EXIT!") + +def fetch_blocks_from_about(domain: str) -> dict: + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + if not isinstance(domain, str): + raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") + elif domain == "": + raise ValueError("Parameter 'domain' is empty") + + # DEBUG: print(f"DEBUG: Fetching mastodon blocks from domain='{domain}'") + doc = None + for path in ["/instance/about/index.html"]: + try: + # Resetting doc type + doc = None + + # DEBUG: print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...") + response = network.fetch_response( + domain, + path, + network.web_headers, + (config.get("connection_timeout"), config.get("read_timeout")) + ) + + # DEBUG: print(f"DEBUG: response.ok='{response.ok}',response.status_code='{response.status_code}',response.text()={len(response.text)}") + if not response.ok or response.text.strip() == "": + print(f"WARNING: path='{path}' does not exist on domain='{domain}' - SKIPPED!") + continue + + # DEBUG: print(f"DEBUG: Parsing response.text()={len(response.text)} Bytes ...") + doc = bs4.BeautifulSoup( + response.text, + "html.parser", + ) + + # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'") + if doc.find("h2") is not None: + # DEBUG: print(f"DEBUG: Found 'h2' header in path='{path}' - BREAK!") + break + + except BaseException as exception: + print("ERROR: Cannot fetch from domain:", domain, exception) + instances.update_last_error(domain, exception) + break + + # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'") + if doc is None: + print(f"WARNING: Cannot fetch any /about pages for domain='{domain}' - EXIT!") + return blocklist + + blocklist = { + "Suspended servers": [], + "Filtered media" : [], + "Limited servers" : [], + "Silenced servers" : [], + } + + for header in doc.find_all("h2"): + header_text = tidyup.reason(header.text) + + # DEBUG: print(f"DEBUG: header_text='{header_text}' - BEFORE!") + if header_text in language_mapping: + # DEBUG: print(f"DEBUG: header_text='{header_text}' - FOUND!") + header_text = language_mapping[header_text] + else: + print(f"WARNING: header_text='{header_text}' not found in language mapping table") + + # DEBUG: print(f"DEBUG: header_text='{header_text} - AFTER!'") + if header_text in blocklist or header_text.lower() in blocklist: + # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu + # DEBUG: print(f"DEBUG: Found header_text='{header_text}', importing domain blocks ...") + for line in header.find_next("table").find_all("tr")[1:]: + # DEBUG: print(f"DEBUG: line[]='{type(line)}'") + blocklist[header_text].append({ + "blocked": tidyup.domain(line.find_all("td")[0].text), + "reason" : tidyup.reason(line.find_all("td")[1].text), + }) + else: + print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}") + + # DEBUG: print(f"DEBUG: Returning blocklist for domain='{domain}'") + return { + "reject" : blocklist["Suspended servers"], + "media_removal" : blocklist["Filtered media"], + "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"], + } -- 2.39.5