From 1dfdc71033c155a4b7e7d5e32a40b354f381c231 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Mon, 21 Apr 2025 03:06:54 +0200 Subject: [PATCH] Continued: - shorthand "e.g." replaced by "for example" - removed if() block as a loop on an empty list is still not doing anything and the else block only contained a debug line --- fba/boot.py | 22 +++---- fba/commands.py | 4 +- fba/helpers/version.py | 6 +- fba/networks/mastodon.py | 133 +++++++++++++++++++-------------------- fba/networks/pleroma.py | 2 +- 5 files changed, 83 insertions(+), 84 deletions(-) diff --git a/fba/boot.py b/fba/boot.py index d8f290a..01bfe66 100644 --- a/fba/boot.py +++ b/fba/boot.py @@ -79,7 +79,7 @@ def init_parser() -> None: ) parser.set_defaults(command=commands.recheck_obfuscation) parser.add_argument("--domain", help="Instance name (aka. domain)") - parser.add_argument("--software", help="Name of software, e.g. 'lemmy'") + parser.add_argument("--software", help="Name of software, for example 'lemmy'") parser.add_argument("--force-all", action="store_true", help="Include also already existing instances, otherwise only new are checked") parser.add_argument("--delete-unwanted", action="store_true", help="Whether delete or keep (default) unwanted domains") parser.add_argument("--no-obfuscation", action="store_true", help="Check instances with no obfuscation of blocked instances") @@ -91,7 +91,7 @@ def init_parser() -> None: ) parser.set_defaults(command=commands.fetch_blocks) parser.add_argument("--domain", help="Instance name (aka. domain)") - parser.add_argument("--software", help="Name of software, e.g. 'lemmy'") + parser.add_argument("--software", help="Name of software, for example 'lemmy'") parser.add_argument("--only-none", action="store_true", help="Checks only entries which has never been checked.") parser.add_argument("--force-all", action="store_true", help="Forces update of data, no matter what.") @@ -115,7 +115,7 @@ def init_parser() -> None: help="Fetches domains from a FBA-specific RSS feed.", ) parser.set_defaults(command=commands.fetch_fba_rss) - parser.add_argument("--feed", required=True, help="RSS feed to fetch domains from (e.g. https://fba.ryoma.agency/rss?domain=foo.bar).") + parser.add_argument("--feed", required=True, help="RSS feed to fetch domains from (for example https://fba.ryoma.agency/rss?domain=foo.bar).") ### Fetch blocks from FBA's bot account ### parser = subparser_command.add_parser( @@ -123,7 +123,7 @@ def init_parser() -> None: help="Fetches ATOM feed with domains from FBA's bot account.", ) parser.set_defaults(command=commands.fetch_fbabot_atom) - parser.add_argument("--feed", required=True, help="RSS feed to fetch domains from (e.g. https://fba.ryoma.agency/rss?domain=foo.bar).") + parser.add_argument("--feed", required=True, help="RSS feed to fetch domains from (for example https://fba.ryoma.agency/rss?domain=foo.bar).") ### Fetch blocks from oliphant's GIT repository ### parser = subparser_command.add_parser( @@ -147,10 +147,10 @@ def init_parser() -> None: help="Fetches instances (aka. \"domains\") from an initial instance. You may want to re-run this command several times (at least 3 with big instances) to have a decent amount of valid instances.", ) parser.set_defaults(command=commands.fetch_instances) - parser.add_argument("--domain", help="Instance name (aka. domain) to fetch further instances from. Start with a large instance, e.g. mastodon.social .") + parser.add_argument("--domain", help="Instance name (aka. domain) to fetch further instances from. Start with a large instance, for example mastodon.social .") parser.add_argument("--force-all", action="store_true", help="Include also already existing instances, otherwise only new are checked") parser.add_argument("--single", action="store_true", help="Only fetch given instance.") - parser.add_argument("--software", help="Name of software, e.g. 'lemmy'") + parser.add_argument("--software", help="Name of software, for example 'lemmy'") ### Fetch blocks from static text file(s) ### parser = subparser_command.add_parser( @@ -187,7 +187,7 @@ def init_parser() -> None: help="Fetches blocks from fediverse.observer.", ) parser.set_defaults(command=commands.fetch_observer) - parser.add_argument("--software", help="Name of software, e.g. 'lemmy'") + parser.add_argument("--software", help="Name of software, for example 'lemmy'") ### Fetch instances from fedipact.online ### parser = subparser_command.add_parser( @@ -216,7 +216,7 @@ def init_parser() -> None: help="Fetches CSV from fedilist.com", ) parser.set_defaults(command=commands.fetch_fedilist) - parser.add_argument("--software", help="Name of software, e.g. 'lemmy'") + parser.add_argument("--software", help="Name of software, for example 'lemmy'") parser.add_argument("--force-all", action="store_true", help="Include also already existing instances, otherwise only new are checked") ### Update nodeinfo ### @@ -226,8 +226,8 @@ def init_parser() -> None: ) parser.set_defaults(command=commands.update_nodeinfo) parser.add_argument("--domain", help="Instance name (aka. domain)") - parser.add_argument("--software", help="Name of software, e.g. 'lemmy'") - parser.add_argument("--mode", help="Name of detection mode, e.g. 'auto_discovery'") + parser.add_argument("--software", help="Name of software, for example 'lemmy'") + parser.add_argument("--mode", help="Name of detection mode, for example 'auto_discovery'") parser.add_argument("--force-all", action="store_true", help="Forces update of data, no matter what. Replaces all force parameters below.") parser.add_argument("--force-recrawl", action="store_true", help="Forces recrawling all found instances. Can still be limited by --software or any --no-* parameters.") parser.add_argument("--force-update-none", action="store_true", help="Forces updating 'None' value for software (else it won't be updated).") @@ -259,7 +259,7 @@ def init_parser() -> None: ) parser.set_defaults(command=commands.fetch_relays) parser.add_argument("--domain", help="Instance name (aka. 'relay')") - parser.add_argument("--software", help="Name of software, e.g. 'lemmy'") + parser.add_argument("--software", help="Name of software, for example 'lemmy'") parser.add_argument("--force-all", action="store_true", help="Forces update of data, no matter what.") ### Fetches relay list from relaylist.com diff --git a/fba/commands.py b/fba/commands.py index 0cc1777..f73bfb2 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -1054,8 +1054,8 @@ def fetch_instances(args: argparse.Namespace) -> int: if row["software"] is None: logger.warning("row[domain]='%s' has no software detected. You can try to run ./fba.py update_nodeinfo --domain=%s --force-all to get it updated - SKIPPED!", row["domain"], row["domain"]) continue - elif row["software"] is not None and software_helper.is_relay(row["software"]): - logger.warning("row[domain]='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead - SKIPPED!", row["domain"], row["software"]) + elif software_helper.is_relay(row["software"]): + logger.warning("row[domain]='%s' is a relay of type '%s' which is not supported by this command. Please invoke fetch_relays instead - SKIPPED!", row["domain"], row["software"]) continue elif not args.force_all and not args.software in [None, ""] and instances.is_recent(row["domain"]): logger.debug("row[domain]='%s' has recently been crawled - SKIPPED!", row["domain"]) diff --git a/fba/helpers/version.py b/fba/helpers/version.py index 4de65e5..9a08415 100644 --- a/fba/helpers/version.py +++ b/fba/helpers/version.py @@ -24,11 +24,11 @@ from fba.helpers import software as software_helper _patterns = [ # semantic version number (with v|V) prefix) re.compile(r"^(?Pv|V{0,1})(\.{0,1})(?P0|[1-9]\d*)\.(?P0+|[1-9]\d*)(\.(?P0+|[1-9]\d*)(?:-(?P(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?)?$"), - # non-sematic, e.g. 1.2.3.4 + # non-sematic, for example 1.2.3.4 re.compile(r"^(?Pv|V{0,1})(\.{0,1})(?P0|[1-9]\d*)\.(?P0+|[1-9]\d*)(\.(?P0+|[1-9]\d*)(\.(?P0|[1-9]\d*))?)$"), - # non-sematic, e.g. 2023-05[-dev] + # non-sematic, for example 2023-05[-dev] re.compile(r"^(?P[1-9]{1}[0-9]{3})\.(?P[0-9]{2})(-[a-z]+){0,1}$"), - # non-semantic, e.g. abcdef0 + # non-semantic, for example abcdef0 re.compile("^[a-f0-9]{7}$"), ] diff --git a/fba/networks/mastodon.py b/fba/networks/mastodon.py index c74c289..f4fb3f3 100644 --- a/fba/networks/mastodon.py +++ b/fba/networks/mastodon.py @@ -35,29 +35,29 @@ logger = logging.getLogger(__name__) # Language mapping X -> English _language_mapping = { # English -> English - "Silenced instances" : "Silenced servers", - "Suspended instances" : "Suspended servers", - "Suspended servers" : "Suspended servers", - "Limited instances" : "Limited servers", - "Filtered media" : "Filtered media", + "silenced instances" : "silenced servers", + "suspended instances" : "suspended servers", + "suspended servers" : "suspended servers", + "limited instances" : "limited servers", + "filtered media" : "filtered media", # German -> English - "Gesperrte Server" : "Suspended servers", - "Gefilterte Medien" : "Filtered media", - "Stummgeschaltete Server" : "Silenced servers", + "Gesperrte Server" : "suspended servers", + "Gefilterte Medien" : "filtered media", + "Stummgeschaltete Server" : "silenced servers", # Japanese -> English - "停止済みのサーバー" : "Suspended servers", - "制限中のサーバー" : "Limited servers", - "メディアを拒否しているサーバー": "Filtered media", - "サイレンス済みのサーバー" : "Silenced servers", + "停止済みのサーバー" : "suspended servers", + "制限中のサーバー" : "limited servers", + "メディアを拒否しているサーバー": "filtered media", + "サイレンス済みのサーバー" : "silenced servers", # ??? -> English - "שרתים מושעים" : "Suspended servers", - "מדיה מסוננת" : "Filtered media", - "שרתים מוגבלים" : "Silenced servers", + "שרתים מושעים" : "suspended servers", + "מדיה מסוננת" : "filtered media", + "שרתים מוגבלים" : "silenced servers", # French -> English - "Serveurs suspendus" : "Suspended servers", - "Médias filtrés" : "Filtered media", - "Serveurs limités" : "Limited servers", - "Serveurs modérés" : "Limited servers", + "Serveurs suspendus" : "suspended servers", + "Médias filtrés" : "filtered media", + "Serveurs limités" : "limited servers", + "Serveurs modérés" : "limited servers", } # Paths to check @@ -101,10 +101,10 @@ def fetch_blocks_from_about(domain: str) -> dict: break blocklist = { - "Suspended servers": [], - "Filtered media" : [], - "Limited servers" : [], - "Silenced servers" : [], + "suspended servers": [], + "filtered media" : [], + "limited servers" : [], + "silenced servers" : [], } logger.debug("doc[]='%s'", type(doc)) @@ -112,18 +112,23 @@ def fetch_blocks_from_about(domain: str) -> dict: logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain) return [] - for header in doc.find_all("h3"): - header_text = tidyup.reason(header.text) + headers = doc.find_all("h3") - logger.debug("header_text='%s'", header_text) + logger.info("Checking %d h3 headers ...", len(headers)) + for header in headers: + logger.debug("header[]='%s'", type(header)) + header_text = tidyup.reason(header.text.lower()) + + logger.debug("header_text='%s' - BEFORE!", header_text) if header_text in _language_mapping: logger.debug("Translating header_text='%s' ...", header_text) header_text = _language_mapping[header_text] else: logger.warning("header_text='%s' not found in language mapping table", header_text) - if header_text in blocklist or header_text.lower() in blocklist: - # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu + logger.debug("header_text='%s' - AFTER!", header_text) + if header_text in blocklist: + # replaced find_next_siblings with find_all_next to account for instances that for example hide lists in dropdown menu for line in header.find_all_next("table")[0].find_all("tr")[1:]: domain = line.find("span").text reason = line.find_all("td")[1].text @@ -157,9 +162,9 @@ def fetch_blocks_from_about(domain: str) -> dict: logger.debug("Returning blocklist for domain='%s' - EXIT!", domain) return { - "rejected" : blocklist["Suspended servers"], - "media_removal" : blocklist["Filtered media"], - "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"], + "rejected" : blocklist["suspended servers"], + "media_removal" : blocklist["filtered media"], + "followers_only": blocklist["limited servers"] + blocklist["silenced servers"], } def fetch_blocks(domain: str) -> list: @@ -177,42 +182,36 @@ def fetch_blocks(domain: str) -> list: logger.debug("Invoking fetch_blocks_from_about(%s) ...", domain) rows = fetch_blocks_from_about(domain) - logger.debug("rows[%s]()=%d", type(rows), len(rows)) - if len(rows) > 0: - logger.debug("Checking %d entries from domain='%s' ...", len(rows), domain) - for block_level in rows: - logger.debug("block_level='%s'", block_level) - blocklists = rows[block_level] - - logger.debug("block_level='%s',blocklists()=%d", block_level, len(blocklists)) - for block in blocklists: - # Check type - logger.debug("block[]='%s'", type(block)) - if not isinstance(block, dict): - logger.debug("block[]='%s' is of type 'dict' - SKIPPED!", type(block)) - continue - elif not domain_helper.is_wanted(block["domain"]): - logger.debug("block[domain]='%s' is not wanted - SKIPPED!", block["domain"]) - continue - elif block_level in ["accept", "accepted"]: - logger.debug("block[domain]='%s' has unwanted severity level '%s' - SKIPPED!", block["domain"], block_level) - continue - elif "digest" in block and not block["digest"] is None and not validators.hashes.sha256(block["digest"]): - logger.warning("block[domain]='%s' has invalid block[digest]='%s' - SKIPPED!", block["domain"], block["digest"]) - continue - - reason = tidyup.reason(block["reason"]) if "reason" in block and block["reason"] is not None and block["reason"] != "" else None - - logger.debug("Appending blocker='%s',blocked='%s',reason='%s',block_level='%s' ...", domain, block["domain"], reason, block_level) - blocklist.append({ - "blocker" : domain, - "blocked" : block["domain"], - "digest" : block["digest"] if "digest" in block else None, - "reason" : reason, - "block_level": blocks_helper.alias_block_level(block_level), - }) - else: - logger.debug("domain='%s' has no block list", domain) + logger.debug("Checking %d entries from domain='%s' ...", len(rows), domain) + for block_level in rows: + logger.debug("rows[%s]()=%d", block_level, len(rows[block_level])) + for block in rows[block_level]: + # Check type + logger.debug("block[]='%s'", type(block)) + if not isinstance(block, dict): + logger.debug("block[]='%s' is of type 'dict' - SKIPPED!", type(block)) + continue + elif not domain_helper.is_wanted(block["domain"]): + logger.debug("block[domain]='%s' is not wanted - SKIPPED!", block["domain"]) + continue + elif block_level in ["accept", "accepted"]: + logger.debug("block[domain]='%s' has unwanted severity level '%s' - SKIPPED!", block["domain"], block_level) + continue + elif "digest" in block and not block["digest"] is None and not validators.hashes.sha256(block["digest"]): + logger.warning("block[domain]='%s' has invalid block[digest]='%s' - SKIPPED!", block["domain"], block["digest"]) + continue + + logger.debug("block[reason]='%s' - BEFORE!", row["block"]) + reason = tidyup.reason(block["reason"]) if "reason" in block and block["reason"] not in [None, ""] else None + + logger.debug("Appending domain='%s',block[domain]='%s',reason='%s',block_level='%s' ...", domain, block["domain"], reason, block_level) + blocklist.append({ + "blocker" : domain, + "blocked" : block["domain"], + "digest" : block["digest"] if "digest" in block else None, + "reason" : reason, + "block_level": blocks_helper.alias_block_level(block_level), + }) logger.debug("blocklist()=%d - EXIT!", len(blocklist)) return blocklist diff --git a/fba/networks/pleroma.py b/fba/networks/pleroma.py index fea777b..200fa3b 100644 --- a/fba/networks/pleroma.py +++ b/fba/networks/pleroma.py @@ -369,7 +369,7 @@ def fetch_blocks_from_about(domain: str) -> dict: logger.debug("block_level='%s - AFTER!'", block_level) if block_level in blocklist: - # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu + # replaced find_next_siblings with find_all_next to account for instances that for example hide lists in dropdown menu logger.debug("Found block_level='%s', importing domain blocks ...", block_level) for line in header.find_next("table").find_all("tr")[1:]: logger.debug("line[]='%s'", type(line)) -- 2.39.5