From: Roland Häder Date: Mon, 5 Jun 2023 21:11:29 +0000 (+0200) Subject: Continued: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=ba2046941e0c9353a171d05d73a5f4397388d971;p=fba.git Continued: - renamed command.py -> commands.py --- diff --git a/fba/__init__.py b/fba/__init__.py index 888b8c3..70dc665 100644 --- a/fba/__init__.py +++ b/fba/__init__.py @@ -1,7 +1,7 @@ __all__ = [ 'boot', 'cache', - 'command', + 'commands', 'config', 'fba', 'instances' diff --git a/fba/boot.py b/fba/boot.py index b121562..05f9546 100644 --- a/fba/boot.py +++ b/fba/boot.py @@ -19,7 +19,7 @@ import os import sys import tempfile import zc.lockfile -from fba import command +from fba import commands from fba import fba # Lock file @@ -60,14 +60,14 @@ def init_parser(): help="Checks given instance if it exists and returns proper exit code" ) parser.add_argument("--domain", required=True, help="Instance name (aka. domain) to check") - parser.set_defaults(command=command.check_instance) + parser.set_defaults(command=commands.check_instance) ### Fetch from bka.li ### parser = subparser_command.add_parser( "fetch_bkali", help="Fetches domain names from bka.li API", ) - parser.set_defaults(command=command.fetch_bkali) + parser.set_defaults(command=commands.fetch_bkali) ### Fetch blocks from registered instances or given ### parser = subparser_command.add_parser( @@ -75,14 +75,14 @@ def init_parser(): help="Fetches blocks from registered instances (run command fetch_instances first!).", ) parser.add_argument("--domain", help="Instance name (aka. domain) to fetch blocks from") - parser.set_defaults(command=command.fetch_blocks) + parser.set_defaults(command=commands.fetch_blocks) ### Fetch blocks from chaos.social ### parser = subparser_command.add_parser( "fetch_cs", help="Fetches blocks from chaos.social's meta sub domain.", ) - parser.set_defaults(command=command.fetch_cs) + parser.set_defaults(command=commands.fetch_cs) ### Fetch blocks from a FBA-specific RSS feed ### parser = subparser_command.add_parser( @@ -90,14 +90,14 @@ def init_parser(): help="Fetches domains from a FBA-specific RSS feed.", ) parser.add_argument("--feed", required=True, help="RSS feed to fetch domains from (e.g. https://fba.ryoma.agency/rss?domain=foo.bar).") - parser.set_defaults(command=command.fetch_fba_rss) + parser.set_defaults(command=commands.fetch_fba_rss) ### Fetch blocks from FBA's bot account ### parser = subparser_command.add_parser( "fetch_fbabot_atom", help="Fetches ATOM feed with domains from FBA's bot account. You may wish to re-run this command several times (at least 3 with big instances) to have a decent amount of valid instances.", ) - parser.set_defaults(command=command.fetch_fbabot_atom) + parser.set_defaults(command=commands.fetch_fbabot_atom) ### Fetch instances from given initial instance ### parser = subparser_command.add_parser( @@ -105,7 +105,7 @@ def init_parser(): help="Fetches instances (aka. \"domains\") from an initial instance.", ) parser.add_argument("--domain", help="Instance name (aka. domain) to fetch further instances from. Start with a large instance, e.g. mastodon.social .") - parser.set_defaults(command=command.fetch_instances) + parser.set_defaults(command=commands.fetch_instances) # DEBUG: print("DEBUG: init_parser(): EXIT!") diff --git a/fba/command.py b/fba/command.py deleted file mode 100644 index 3bad9dd..0000000 --- a/fba/command.py +++ /dev/null @@ -1,787 +0,0 @@ -# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes -# Copyright (C) 2023 Free Software Foundation -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published -# by the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import argparse -import atoma -import bs4 -import itertools -import json -import re -import reqto -import sys -import time -import validators - -from fba import boot -from fba import config -from fba import fba - -def check_instance(args: argparse.Namespace) -> int: - # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!") - status = 0 - if not validators.domain(args.domain): - print(f"WARNING: args.domain='{args.domain}' is not valid") - status = 100 - elif fba.is_blacklisted(args.domain): - print(f"WARNING: args.domain='{args.domain}' is blacklisted") - status = 101 - elif fba.is_instance_registered(args.domain): - print(f"WARNING: args.domain='{args.domain}' is already registered") - staus = 102 - else: - print(f"INFO: args.domain='{args.domain}' is not known") - - # DEBUG: print(f"DEBUG: status={status} - EXIT!") - return status - -def fetch_bkali(args: argparse.Namespace): - # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!") - domains = list() - try: - fetched = fba.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({ - "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}" - })) - - # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'") - if len(fetched) == 0: - raise Exception("WARNING: Returned no records") - elif not "data" in fetched: - raise Exception(f"WARNING: fetched()={len(fetched)} does not contain element 'data'") - elif not "nodeinfo" in fetched["data"]: - raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain element 'nodeinfo'") - - for entry in fetched["data"]["nodeinfo"]: - # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'") - if not "domain" in entry: - print(f"WARNING: entry does not contain 'domain' - SKIPPED!") - continue - elif not validators.domain(entry["domain"]): - print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!") - continue - elif fba.is_blacklisted(entry["domain"]): - # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!") - continue - elif fba.is_instance_registered(entry["domain"]): - # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!") - continue - - # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...") - domains.append(entry["domain"]) - - except BaseException as e: - print(f"ERROR: Cannot fetch graphql,exception[{type(e)}]:'{str(e)}'") - sys.exit(255) - - # DEBUG: print(f"DEBUG: domains()={len(domains)}") - if len(domains) > 0: - boot.acquire_lock() - - print(f"INFO: Adding {len(domains)} new instances ...") - for domain in domains: - print(f"INFO: Fetching instances from domain='{domain}' ...") - fba.fetch_instances(domain, None, None, sys.argv[0]) - - # DEBUG: print("DEBUG: EXIT!") - -def fetch_blocks(args: argparse.Namespace): - print(f"DEBUG: args[]={type(args)} - CALLED!") - if args.domain != None and args.domain != "": - if not validators.domain(args.domain): - print(f"WARNING: domain='{args.domain}' is not valid.") - return - elif fba.is_blacklisted(args.domain): - print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!") - return - elif not fba.is_instance_registered(args.domain): - print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.") - return - - boot.acquire_lock() - - if args.domain != None and args.domain != "": - fba.cursor.execute( - "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND domain = ?", [args.domain] - ) - else: - fba.cursor.execute( - "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")] - ) - - rows = fba.cursor.fetchall() - print(f"INFO: Checking {len(rows)} entries ...") - for blocker, software, origin, nodeinfo_url in rows: - # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url) - blockdict = [] - blocker = fba.tidyup_domain(blocker) - # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software) - - if blocker == "": - print("WARNING: blocker is now empty!") - continue - elif fba.is_blacklisted(blocker): - print(f"WARNING: blocker='{blocker}' is blacklisted now!") - continue - - # DEBUG: print(f"DEBUG: blocker='{blocker}'") - fba.update_last_blocked(blocker) - - if software == "pleroma": - print("INFO: blocker:", blocker) - try: - # Blocks - json = fba.fetch_nodeinfo(blocker, nodeinfo_url) - if json is None: - print("WARNING: Could not fetch nodeinfo from blocker:", blocker) - continue - elif not "metadata" in json: - print(f"WARNING: json()={len(json)} does not have key 'metadata', blocker='{blocker}'") - continue - elif not "federation" in json["metadata"]: - print(f"WARNING: json()={len(json['metadata'])} does not have key 'federation', blocker='{blocker}'") - continue - - # DEBUG: print("DEBUG: Updating nodeinfo:", blocker) - fba.update_last_nodeinfo(blocker) - - federation = json["metadata"]["federation"] - - if "enabled" in federation: - # DEBUG: print("DEBUG: Instance has no block list to analyze:", blocker) - continue - - if "mrf_simple" in federation: - for block_level, blocks in ( - {**federation["mrf_simple"], - **{"quarantined_instances": federation["quarantined_instances"]}} - ).items(): - # DEBUG: print("DEBUG: block_level, blocks():", block_level, len(blocks)) - block_level = fba.tidyup_domain(block_level) - # DEBUG: print("DEBUG: BEFORE block_level:", block_level) - - if block_level == "": - print("WARNING: block_level is now empty!") - continue - - # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") - for blocked in blocks: - # DEBUG: print("DEBUG: BEFORE blocked:", blocked) - blocked = fba.tidyup_domain(blocked) - # DEBUG: print("DEBUG: AFTER blocked:", blocked) - - if blocked == "": - print("WARNING: blocked is empty after fba.tidyup_domain():", blocker, block_level) - continue - elif fba.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") - continue - elif blocked.count("*") > 1: - # -ACK!-oma also started obscuring domains without hash - fba.cursor.execute( - "SELECT domain, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] - ) - searchres = fba.cursor.fetchone() - # DEBUG: print("DEBUG: searchres[]:", type(searchres)) - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") - continue - - blocked = searchres[0] - nodeinfo_url = searchres[1] - # DEBUG: print("DEBUG: Looked up domain:", blocked) - elif not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") - fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) - - if not fba.is_instance_blocked(blocker, blocked, block_level): - # DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level) - fba.block_instance(blocker, blocked, "unknown", block_level) - - if block_level == "reject": - # DEBUG: print("DEBUG: Adding to blockdict:", blocked) - blockdict.append( - { - "blocked": blocked, - "reason" : None - }) - else: - # DEBUG: print(f"DEBUG: Updating block last seen for blocker='{blocker}',blocked='{blocked}' ...") - fba.update_last_seen(blocker, blocked, block_level) - - # DEBUG: print("DEBUG: Committing changes ...") - fba.connection.commit() - - # Reasons - if "mrf_simple_info" in federation: - # DEBUG: print("DEBUG: Found mrf_simple_info:", blocker) - for block_level, info in ( - {**federation["mrf_simple_info"], - **(federation["quarantined_instances_info"] - if "quarantined_instances_info" in federation - else {})} - ).items(): - # DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items())) - block_level = fba.tidyup_domain(block_level) - # DEBUG: print("DEBUG: BEFORE block_level:", block_level) - - if block_level == "": - print("WARNING: block_level is now empty!") - continue - - # DEBUG: print(f"DEBUG: Checking {len(info.items())} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") - for blocked, reason in info.items(): - # DEBUG: print("DEBUG: BEFORE blocked:", blocked) - blocked = fba.tidyup_domain(blocked) - # DEBUG: print("DEBUG: AFTER blocked:", blocked) - - if blocked == "": - print("WARNING: blocked is empty after fba.tidyup_domain():", blocker, block_level) - continue - elif fba.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") - continue - elif blocked.count("*") > 1: - # same domain guess as above, but for reasons field - fba.cursor.execute( - "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] - ) - searchres = fba.cursor.fetchone() - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") - continue - - blocked = searchres[0] - origin = searchres[1] - nodeinfo_url = searchres[2] - elif not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") - fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) - - # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"]) - fba.update_block_reason(reason["reason"], blocker, blocked, block_level) - - for entry in blockdict: - if entry["blocked"] == blocked: - # DEBUG: print("DEBUG: Updating entry reason:", blocked) - entry["reason"] = reason["reason"] - - fba.connection.commit() - except Exception as e: - print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") - elif software == "mastodon": - print("INFO: blocker:", blocker) - try: - # json endpoint for newer mastodongs - try: - json = { - "reject" : [], - "media_removal" : [], - "followers_only": [], - "report_removal": [] - } - - # handling CSRF, I've saw at least one server requiring it to access the endpoint - # DEBUG: print("DEBUG: Fetching meta:", blocker) - meta = bs4.BeautifulSoup( - fba.get_response(blocker, "/", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, - "html.parser", - ) - try: - csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"] - # DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf) - reqheaders = {**fba.api_headers, **{"X-CSRF-Token": csrf}} - except BaseException as e: - # DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker, e) - reqheaders = fba.api_headers - - # DEBUG: print("DEBUG: Querying API domain_blocks:", blocker) - blocks = fba.get_response(blocker, "/api/v1/instance/domain_blocks", reqheaders, (config.get("connection_timeout"), config.get("read_timeout"))).json() - - print(f"INFO: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}' ...") - for block in blocks: - entry = { - 'domain': block['domain'], - 'hash' : block['digest'], - 'reason': block['comment'] - } - - # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment']) - if block['severity'] == 'suspend': - # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") - json['reject'].append(entry) - elif block['severity'] == 'silence': - # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") - json['followers_only'].append(entry) - elif block['severity'] == 'reject_media': - # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") - json['media_removal'].append(entry) - elif block['severity'] == 'reject_reports': - # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") - json['report_removal'].append(entry) - else: - print("WARNING: Unknown severity:", block['severity'], block['domain']) - except BaseException as e: - # DEBUG: print(f"DEBUG: Failed, trying mastodon-specific fetches: blocker='{blocker}',exception[{type(e)}]={str(e)}") - json = fba.get_mastodon_blocks(blocker) - - print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...") - for block_level, blocks in json.items(): - # DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks)) - block_level = fba.tidyup_domain(block_level) - # DEBUG: print("DEBUG: AFTER-block_level:", block_level) - if block_level == "": - print("WARNING: block_level is empty, blocker:", blocker) - continue - - # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") - for block in blocks: - blocked, blocked_hash, reason = block.values() - # DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason) - blocked = fba.tidyup_domain(blocked) - # DEBUG: print("DEBUG: AFTER-blocked:", blocked) - - if blocked == "": - print("WARNING: blocked is empty:", blocker) - continue - elif fba.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") - continue - elif blocked.count("*") > 0: - # Doing the hash search for instance names as well to tidy up DB - fba.cursor.execute( - "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash] - ) - searchres = fba.cursor.fetchone() - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!") - continue - - # DEBUG: print("DEBUG: Updating domain: ", searchres[0]) - blocked = searchres[0] - origin = searchres[1] - nodeinfo_url = searchres[2] - - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") - fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) - elif not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) - fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) - - blocking = blocked if blocked.count("*") <= 1 else blocked_hash - # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'") - - if not fba.is_instance_blocked(blocker, blocked, block_level): - # DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level) - fba.block_instance(blocker, blocking, reason, block_level) - - if block_level == "reject": - blockdict.append({ - "blocked": blocked, - "reason" : reason - }) - else: - # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocking='{blocking}' ...") - fba.update_last_seen(blocker, blocking, block_level) - fba.update_block_reason(reason, blocker, blocking, block_level) - - # DEBUG: print("DEBUG: Committing changes ...") - fba.connection.commit() - except Exception as e: - print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") - elif software == "friendica" or software == "misskey" or software == "bookwyrm" or software == "takahe": - print("INFO: blocker:", blocker) - try: - if software == "friendica": - json = fba.get_friendica_blocks(blocker) - elif software == "misskey": - json = fba.get_misskey_blocks(blocker) - elif software == "bookwyrm": - print("WARNING: bookwyrm is not fully supported for fetching blacklist!", blocker) - #json = fba.get_bookwyrm_blocks(blocker) - continue - elif software == "takahe": - print("WARNING: takahe is not fully supported for fetching blacklist!", blocker) - #json = fba.get_takahe_blocks(blocker) - continue - - print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...") - for block_level, blocks in json.items(): - # DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks)) - block_level = fba.tidyup_domain(block_level) - # DEBUG: print("DEBUG: AFTER-block_level:", block_level) - if block_level == "": - print("WARNING: block_level is empty, blocker:", blocker) - continue - - # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") - for block in blocks: - blocked, reason = block.values() - # DEBUG: print("DEBUG: BEFORE blocked:", blocked) - blocked = fba.tidyup_domain(blocked) - # DEBUG: print("DEBUG: AFTER blocked:", blocked) - - if blocked == "": - print("WARNING: blocked is empty:", blocker) - continue - elif fba.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") - continue - elif blocked.count("*") > 0: - # Some friendica servers also obscure domains without hash - fba.cursor.execute( - "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] - ) - - searchres = fba.cursor.fetchone() - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") - continue - - blocked = searchres[0] - origin = searchres[1] - nodeinfo_url = searchres[2] - elif blocked.count("?") > 0: - # Some obscure them with question marks, not sure if that's dependent on version or not - fba.cursor.execute( - "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")] - ) - - searchres = fba.cursor.fetchone() - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") - continue - - blocked = searchres[0] - origin = searchres[1] - nodeinfo_url = searchres[2] - elif not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) - fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) - - if not fba.is_instance_blocked(blocker, blocked, block_level): - fba.block_instance(blocker, blocked, reason, block_level) - - if block_level == "reject": - blockdict.append({ - "blocked": blocked, - "reason" : reason - }) - else: - # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...") - fba.update_last_seen(blocker, blocked, block_level) - fba.update_block_reason(reason, blocker, blocked, block_level) - - # DEBUG: print("DEBUG: Committing changes ...") - fba.connection.commit() - except Exception as e: - print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") - elif software == "gotosocial": - print("INFO: blocker:", blocker) - try: - # Blocks - federation = fba.get_response(blocker, "{fba.get_peers_url}?filter=suspended", fba.api_headers, (config.get("connection_timeout"), config.get("read_timeout"))).json() - - if (federation == None): - print("WARNING: No valid response:", blocker); - elif "error" in federation: - print("WARNING: API returned error:", federation["error"]) - else: - print(f"INFO: Checking {len(federation)} entries from blocker='{blocker}',software='{software}' ...") - for peer in federation: - blocked = peer["domain"].lower() - # DEBUG: print("DEBUG: BEFORE blocked:", blocked) - blocked = fba.tidyup_domain(blocked) - # DEBUG: print("DEBUG: AFTER blocked:", blocked) - - if blocked == "": - print("WARNING: blocked is empty:", blocker) - continue - elif fba.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") - continue - elif blocked.count("*") > 0: - # GTS does not have hashes for obscured domains, so we have to guess it - fba.cursor.execute( - "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] - ) - searchres = fba.cursor.fetchone() - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") - continue - - blocked = searchres[0] - origin = searchres[1] - nodeinfo_url = searchres[2] - elif not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") - fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) - - if not fba.is_instance_blocked(blocker, blocked, "reject"): - # DEBUG: print(f"DEBUG: blocker='{blocker}' is blocking '{blocked}' for unknown reason at this point") - fba.block_instance(blocker, blocked, "unknown", "reject") - - blockdict.append({ - "blocked": blocked, - "reason" : None - }) - else: - # DEBUG: print(f"DEBUG: Updating block last seen for blocker='{blocker}',blocked='{blocked}' ...") - fba.update_last_seen(blocker, blocked, "reject") - - if "public_comment" in peer: - # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, peer["public_comment"]) - fba.update_block_reason(peer["public_comment"], blocker, blocked, "reject") - - for entry in blockdict: - if entry["blocked"] == blocked: - # DEBUG: print(f"DEBUG: Setting block reason for blocked='{blocked}':'{peer['public_comment']}'") - entry["reason"] = peer["public_comment"] - - # DEBUG: print("DEBUG: Committing changes ...") - fba.connection.commit() - except Exception as e: - print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") - else: - print("WARNING: Unknown software:", blocker, software) - - if config.get("bot_enabled") and len(blockdict) > 0: - send_bot_post(blocker, blockdict) - - blockdict = [] - - # DEBUG: print("DEBUG: EXIT!") - -def fetch_cs(args: argparse.Namespace): - # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!") - domains = { - "silenced": list(), - "blocked": list(), - } - - try: - doc = bs4.BeautifulSoup( - reqto.get("https://meta.chaos.social/federation", headers=fba.headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))).text, - "html.parser", - ) - # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}") - silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table") - - # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}") - domains["silenced"] = domains["silenced"] + find_domains(silenced) - blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table") - - # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}") - domains["blocked"] = domains["blocked"] + find_domains(blocked) - - except BaseException as e: - print(f"ERROR: Cannot fetch from meta.chaos.social,exception[{type(e)}]:'{str(e)}'") - sys.exit(255) - - # DEBUG: print(f"DEBUG: domains()={len(domains)}") - if len(domains) > 0: - boot.acquire_lock() - - print(f"INFO: Adding {len(domains)} new instances ...") - for block_level in domains: - # DEBUG: print(f"DEBUG: block_level='{block_level}'") - - for row in domains[block_level]: - # DEBUG: print(f"DEBUG: row='{row}'") - if not fba.is_instance_registered(row["domain"]): - print(f"INFO: Fetching instances from domain='{row['domain']}' ...") - fba.fetch_instances(row["domain"], None, None, sys.argv[0]) - - if not fba.is_instance_blocked('chaos.social', row["domain"], block_level): - # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...") - fba.block_instance('chaos.social', row["domain"], row["reason"], block_level) - - # DEBUG: print("DEBUG: Committing changes ...") - fba.connection.commit() - - # DEBUG: print("DEBUG: EXIT!") - -def fetch_fba_rss(args: argparse.Namespace): - # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!") - domains = list() - - try: - print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...") - response = reqto.get(args.feed, headers=fba.headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))) - - # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}") - if response.ok and response.status_code < 300 and len(response.text) > 0: - # DEBUG: print(f"DEBUG: Parsing RSS feed ...") - rss = atoma.parse_rss_bytes(response.content) - - # DEBUG: print(f"DEBUG: rss[]={type(rss)}") - for item in rss.items: - # DEBUG: print(f"DEBUG: item={item}") - domain = item.link.split("=")[1] - - if fba.is_blacklisted(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") - continue - elif domain in domains: - # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!") - continue - elif fba.is_instance_registered(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!") - continue - - # DEBUG: print(f"DEBUG: domain='{domain}'") - domains.append(domain) - - except BaseException as e: - print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'") - sys.exit(255) - - # DEBUG: print(f"DEBUG: domains()={len(domains)}") - if len(domains) > 0: - boot.acquire_lock() - - print(f"INFO: Adding {len(domains)} new instances ...") - for domain in domains: - print(f"INFO: Fetching instances from domain='{domain}' ...") - fba.fetch_instances(domain, None, None, sys.argv[0]) - - # DEBUG: print("DEBUG: EXIT!") - -def fetch_fbabot_atom(args: argparse.Namespace): - # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!") - feed = "https://ryona.agency/users/fba/feed.atom" - - domains = list() - try: - print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...") - response = reqto.get(feed, headers=fba.headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))) - - # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}") - if response.ok and response.status_code < 300 and len(response.text) > 0: - # DEBUG: print(f"DEBUG: Parsing ATOM feed ...") - atom = atoma.parse_atom_bytes(response.content) - - # DEBUG: print(f"DEBUG: atom[]={type(atom)}") - for entry in atom.entries: - # DEBUG: print(f"DEBUG: entry[]={type(entry)}") - doc = bs4.BeautifulSoup(entry.content.value, "html.parser") - # DEBUG: print(f"DEBUG: doc[]={type(doc)}") - for element in doc.findAll("a"): - # DEBUG: print(f"DEBUG: element[{type(element)}]={element}") - domain = fba.tidyup_domain(element["href"]) - - # DEBUG: print(f"DEBUG: domain='{domain}'") - if fba.is_blacklisted(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") - continue - elif domain in domains: - # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!") - continue - elif fba.is_instance_registered(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!") - continue - - # DEBUG: print(f"DEBUG: domain='{domain}'") - domains.append(domain) - - except BaseException as e: - print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'") - sys.exit(255) - - # DEBUG: print(f"DEBUG: domains()={len(domains)}") - if len(domains) > 0: - boot.acquire_lock() - - print(f"INFO: Adding {len(domains)} new instances ...") - for domain in domains: - print(f"INFO: Fetching instances from domain='{domain}' ...") - fba.fetch_instances(domain, None, None, sys.argv[0]) - - # DEBUG: print("DEBUG: EXIT!") - -def fetch_instances(args: argparse.Namespace): - # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!") - boot.acquire_lock() - - # Initial fetch - fba.fetch_instances(args.domain, None, None, sys.argv[0]) - - # Loop through some instances - fba.cursor.execute( - "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")] - ) - - rows = fba.cursor.fetchall() - print(f"INFO: Checking {len(rows)} entries ...") - for row in rows: - # DEBUG: print("DEBUG: domain:", row[0]) - if fba.is_blacklisted(row[0]): - print("WARNING: domain is blacklisted:", row[0]) - continue - - print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'") - fba.fetch_instances(row[0], row[1], row[2], sys.argv[0], row[3]) - - # DEBUG: print("DEBUG: EXIT!") diff --git a/fba/commands.py b/fba/commands.py new file mode 100644 index 0000000..3bad9dd --- /dev/null +++ b/fba/commands.py @@ -0,0 +1,787 @@ +# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import argparse +import atoma +import bs4 +import itertools +import json +import re +import reqto +import sys +import time +import validators + +from fba import boot +from fba import config +from fba import fba + +def check_instance(args: argparse.Namespace) -> int: + # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!") + status = 0 + if not validators.domain(args.domain): + print(f"WARNING: args.domain='{args.domain}' is not valid") + status = 100 + elif fba.is_blacklisted(args.domain): + print(f"WARNING: args.domain='{args.domain}' is blacklisted") + status = 101 + elif fba.is_instance_registered(args.domain): + print(f"WARNING: args.domain='{args.domain}' is already registered") + staus = 102 + else: + print(f"INFO: args.domain='{args.domain}' is not known") + + # DEBUG: print(f"DEBUG: status={status} - EXIT!") + return status + +def fetch_bkali(args: argparse.Namespace): + # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!") + domains = list() + try: + fetched = fba.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({ + "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}" + })) + + # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'") + if len(fetched) == 0: + raise Exception("WARNING: Returned no records") + elif not "data" in fetched: + raise Exception(f"WARNING: fetched()={len(fetched)} does not contain element 'data'") + elif not "nodeinfo" in fetched["data"]: + raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain element 'nodeinfo'") + + for entry in fetched["data"]["nodeinfo"]: + # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'") + if not "domain" in entry: + print(f"WARNING: entry does not contain 'domain' - SKIPPED!") + continue + elif not validators.domain(entry["domain"]): + print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!") + continue + elif fba.is_blacklisted(entry["domain"]): + # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!") + continue + elif fba.is_instance_registered(entry["domain"]): + # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!") + continue + + # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...") + domains.append(entry["domain"]) + + except BaseException as e: + print(f"ERROR: Cannot fetch graphql,exception[{type(e)}]:'{str(e)}'") + sys.exit(255) + + # DEBUG: print(f"DEBUG: domains()={len(domains)}") + if len(domains) > 0: + boot.acquire_lock() + + print(f"INFO: Adding {len(domains)} new instances ...") + for domain in domains: + print(f"INFO: Fetching instances from domain='{domain}' ...") + fba.fetch_instances(domain, None, None, sys.argv[0]) + + # DEBUG: print("DEBUG: EXIT!") + +def fetch_blocks(args: argparse.Namespace): + print(f"DEBUG: args[]={type(args)} - CALLED!") + if args.domain != None and args.domain != "": + if not validators.domain(args.domain): + print(f"WARNING: domain='{args.domain}' is not valid.") + return + elif fba.is_blacklisted(args.domain): + print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!") + return + elif not fba.is_instance_registered(args.domain): + print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.") + return + + boot.acquire_lock() + + if args.domain != None and args.domain != "": + fba.cursor.execute( + "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND domain = ?", [args.domain] + ) + else: + fba.cursor.execute( + "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")] + ) + + rows = fba.cursor.fetchall() + print(f"INFO: Checking {len(rows)} entries ...") + for blocker, software, origin, nodeinfo_url in rows: + # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url) + blockdict = [] + blocker = fba.tidyup_domain(blocker) + # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software) + + if blocker == "": + print("WARNING: blocker is now empty!") + continue + elif fba.is_blacklisted(blocker): + print(f"WARNING: blocker='{blocker}' is blacklisted now!") + continue + + # DEBUG: print(f"DEBUG: blocker='{blocker}'") + fba.update_last_blocked(blocker) + + if software == "pleroma": + print("INFO: blocker:", blocker) + try: + # Blocks + json = fba.fetch_nodeinfo(blocker, nodeinfo_url) + if json is None: + print("WARNING: Could not fetch nodeinfo from blocker:", blocker) + continue + elif not "metadata" in json: + print(f"WARNING: json()={len(json)} does not have key 'metadata', blocker='{blocker}'") + continue + elif not "federation" in json["metadata"]: + print(f"WARNING: json()={len(json['metadata'])} does not have key 'federation', blocker='{blocker}'") + continue + + # DEBUG: print("DEBUG: Updating nodeinfo:", blocker) + fba.update_last_nodeinfo(blocker) + + federation = json["metadata"]["federation"] + + if "enabled" in federation: + # DEBUG: print("DEBUG: Instance has no block list to analyze:", blocker) + continue + + if "mrf_simple" in federation: + for block_level, blocks in ( + {**federation["mrf_simple"], + **{"quarantined_instances": federation["quarantined_instances"]}} + ).items(): + # DEBUG: print("DEBUG: block_level, blocks():", block_level, len(blocks)) + block_level = fba.tidyup_domain(block_level) + # DEBUG: print("DEBUG: BEFORE block_level:", block_level) + + if block_level == "": + print("WARNING: block_level is now empty!") + continue + + # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") + for blocked in blocks: + # DEBUG: print("DEBUG: BEFORE blocked:", blocked) + blocked = fba.tidyup_domain(blocked) + # DEBUG: print("DEBUG: AFTER blocked:", blocked) + + if blocked == "": + print("WARNING: blocked is empty after fba.tidyup_domain():", blocker, block_level) + continue + elif fba.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue + elif blocked.count("*") > 1: + # -ACK!-oma also started obscuring domains without hash + fba.cursor.execute( + "SELECT domain, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] + ) + searchres = fba.cursor.fetchone() + # DEBUG: print("DEBUG: searchres[]:", type(searchres)) + + if searchres == None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") + continue + + blocked = searchres[0] + nodeinfo_url = searchres[1] + # DEBUG: print("DEBUG: Looked up domain:", blocked) + elif not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + elif not fba.is_instance_registered(blocked): + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) + + if not fba.is_instance_blocked(blocker, blocked, block_level): + # DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level) + fba.block_instance(blocker, blocked, "unknown", block_level) + + if block_level == "reject": + # DEBUG: print("DEBUG: Adding to blockdict:", blocked) + blockdict.append( + { + "blocked": blocked, + "reason" : None + }) + else: + # DEBUG: print(f"DEBUG: Updating block last seen for blocker='{blocker}',blocked='{blocked}' ...") + fba.update_last_seen(blocker, blocked, block_level) + + # DEBUG: print("DEBUG: Committing changes ...") + fba.connection.commit() + + # Reasons + if "mrf_simple_info" in federation: + # DEBUG: print("DEBUG: Found mrf_simple_info:", blocker) + for block_level, info in ( + {**federation["mrf_simple_info"], + **(federation["quarantined_instances_info"] + if "quarantined_instances_info" in federation + else {})} + ).items(): + # DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items())) + block_level = fba.tidyup_domain(block_level) + # DEBUG: print("DEBUG: BEFORE block_level:", block_level) + + if block_level == "": + print("WARNING: block_level is now empty!") + continue + + # DEBUG: print(f"DEBUG: Checking {len(info.items())} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") + for blocked, reason in info.items(): + # DEBUG: print("DEBUG: BEFORE blocked:", blocked) + blocked = fba.tidyup_domain(blocked) + # DEBUG: print("DEBUG: AFTER blocked:", blocked) + + if blocked == "": + print("WARNING: blocked is empty after fba.tidyup_domain():", blocker, block_level) + continue + elif fba.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue + elif blocked.count("*") > 1: + # same domain guess as above, but for reasons field + fba.cursor.execute( + "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] + ) + searchres = fba.cursor.fetchone() + + if searchres == None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") + continue + + blocked = searchres[0] + origin = searchres[1] + nodeinfo_url = searchres[2] + elif not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + elif not fba.is_instance_registered(blocked): + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) + + # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"]) + fba.update_block_reason(reason["reason"], blocker, blocked, block_level) + + for entry in blockdict: + if entry["blocked"] == blocked: + # DEBUG: print("DEBUG: Updating entry reason:", blocked) + entry["reason"] = reason["reason"] + + fba.connection.commit() + except Exception as e: + print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") + elif software == "mastodon": + print("INFO: blocker:", blocker) + try: + # json endpoint for newer mastodongs + try: + json = { + "reject" : [], + "media_removal" : [], + "followers_only": [], + "report_removal": [] + } + + # handling CSRF, I've saw at least one server requiring it to access the endpoint + # DEBUG: print("DEBUG: Fetching meta:", blocker) + meta = bs4.BeautifulSoup( + fba.get_response(blocker, "/", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, + "html.parser", + ) + try: + csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"] + # DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf) + reqheaders = {**fba.api_headers, **{"X-CSRF-Token": csrf}} + except BaseException as e: + # DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker, e) + reqheaders = fba.api_headers + + # DEBUG: print("DEBUG: Querying API domain_blocks:", blocker) + blocks = fba.get_response(blocker, "/api/v1/instance/domain_blocks", reqheaders, (config.get("connection_timeout"), config.get("read_timeout"))).json() + + print(f"INFO: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}' ...") + for block in blocks: + entry = { + 'domain': block['domain'], + 'hash' : block['digest'], + 'reason': block['comment'] + } + + # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment']) + if block['severity'] == 'suspend': + # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") + json['reject'].append(entry) + elif block['severity'] == 'silence': + # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") + json['followers_only'].append(entry) + elif block['severity'] == 'reject_media': + # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") + json['media_removal'].append(entry) + elif block['severity'] == 'reject_reports': + # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") + json['report_removal'].append(entry) + else: + print("WARNING: Unknown severity:", block['severity'], block['domain']) + except BaseException as e: + # DEBUG: print(f"DEBUG: Failed, trying mastodon-specific fetches: blocker='{blocker}',exception[{type(e)}]={str(e)}") + json = fba.get_mastodon_blocks(blocker) + + print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...") + for block_level, blocks in json.items(): + # DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks)) + block_level = fba.tidyup_domain(block_level) + # DEBUG: print("DEBUG: AFTER-block_level:", block_level) + if block_level == "": + print("WARNING: block_level is empty, blocker:", blocker) + continue + + # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") + for block in blocks: + blocked, blocked_hash, reason = block.values() + # DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason) + blocked = fba.tidyup_domain(blocked) + # DEBUG: print("DEBUG: AFTER-blocked:", blocked) + + if blocked == "": + print("WARNING: blocked is empty:", blocker) + continue + elif fba.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue + elif blocked.count("*") > 0: + # Doing the hash search for instance names as well to tidy up DB + fba.cursor.execute( + "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash] + ) + searchres = fba.cursor.fetchone() + + if searchres == None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!") + continue + + # DEBUG: print("DEBUG: Updating domain: ", searchres[0]) + blocked = searchres[0] + origin = searchres[1] + nodeinfo_url = searchres[2] + + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + elif not fba.is_instance_registered(blocked): + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) + elif not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + elif not fba.is_instance_registered(blocked): + # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) + fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) + + blocking = blocked if blocked.count("*") <= 1 else blocked_hash + # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'") + + if not fba.is_instance_blocked(blocker, blocked, block_level): + # DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level) + fba.block_instance(blocker, blocking, reason, block_level) + + if block_level == "reject": + blockdict.append({ + "blocked": blocked, + "reason" : reason + }) + else: + # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocking='{blocking}' ...") + fba.update_last_seen(blocker, blocking, block_level) + fba.update_block_reason(reason, blocker, blocking, block_level) + + # DEBUG: print("DEBUG: Committing changes ...") + fba.connection.commit() + except Exception as e: + print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") + elif software == "friendica" or software == "misskey" or software == "bookwyrm" or software == "takahe": + print("INFO: blocker:", blocker) + try: + if software == "friendica": + json = fba.get_friendica_blocks(blocker) + elif software == "misskey": + json = fba.get_misskey_blocks(blocker) + elif software == "bookwyrm": + print("WARNING: bookwyrm is not fully supported for fetching blacklist!", blocker) + #json = fba.get_bookwyrm_blocks(blocker) + continue + elif software == "takahe": + print("WARNING: takahe is not fully supported for fetching blacklist!", blocker) + #json = fba.get_takahe_blocks(blocker) + continue + + print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...") + for block_level, blocks in json.items(): + # DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks)) + block_level = fba.tidyup_domain(block_level) + # DEBUG: print("DEBUG: AFTER-block_level:", block_level) + if block_level == "": + print("WARNING: block_level is empty, blocker:", blocker) + continue + + # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") + for block in blocks: + blocked, reason = block.values() + # DEBUG: print("DEBUG: BEFORE blocked:", blocked) + blocked = fba.tidyup_domain(blocked) + # DEBUG: print("DEBUG: AFTER blocked:", blocked) + + if blocked == "": + print("WARNING: blocked is empty:", blocker) + continue + elif fba.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue + elif blocked.count("*") > 0: + # Some friendica servers also obscure domains without hash + fba.cursor.execute( + "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] + ) + + searchres = fba.cursor.fetchone() + + if searchres == None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") + continue + + blocked = searchres[0] + origin = searchres[1] + nodeinfo_url = searchres[2] + elif blocked.count("?") > 0: + # Some obscure them with question marks, not sure if that's dependent on version or not + fba.cursor.execute( + "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")] + ) + + searchres = fba.cursor.fetchone() + + if searchres == None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") + continue + + blocked = searchres[0] + origin = searchres[1] + nodeinfo_url = searchres[2] + elif not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + elif not fba.is_instance_registered(blocked): + # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) + fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) + + if not fba.is_instance_blocked(blocker, blocked, block_level): + fba.block_instance(blocker, blocked, reason, block_level) + + if block_level == "reject": + blockdict.append({ + "blocked": blocked, + "reason" : reason + }) + else: + # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...") + fba.update_last_seen(blocker, blocked, block_level) + fba.update_block_reason(reason, blocker, blocked, block_level) + + # DEBUG: print("DEBUG: Committing changes ...") + fba.connection.commit() + except Exception as e: + print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") + elif software == "gotosocial": + print("INFO: blocker:", blocker) + try: + # Blocks + federation = fba.get_response(blocker, "{fba.get_peers_url}?filter=suspended", fba.api_headers, (config.get("connection_timeout"), config.get("read_timeout"))).json() + + if (federation == None): + print("WARNING: No valid response:", blocker); + elif "error" in federation: + print("WARNING: API returned error:", federation["error"]) + else: + print(f"INFO: Checking {len(federation)} entries from blocker='{blocker}',software='{software}' ...") + for peer in federation: + blocked = peer["domain"].lower() + # DEBUG: print("DEBUG: BEFORE blocked:", blocked) + blocked = fba.tidyup_domain(blocked) + # DEBUG: print("DEBUG: AFTER blocked:", blocked) + + if blocked == "": + print("WARNING: blocked is empty:", blocker) + continue + elif fba.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue + elif blocked.count("*") > 0: + # GTS does not have hashes for obscured domains, so we have to guess it + fba.cursor.execute( + "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] + ) + searchres = fba.cursor.fetchone() + + if searchres == None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") + continue + + blocked = searchres[0] + origin = searchres[1] + nodeinfo_url = searchres[2] + elif not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + elif not fba.is_instance_registered(blocked): + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) + + if not fba.is_instance_blocked(blocker, blocked, "reject"): + # DEBUG: print(f"DEBUG: blocker='{blocker}' is blocking '{blocked}' for unknown reason at this point") + fba.block_instance(blocker, blocked, "unknown", "reject") + + blockdict.append({ + "blocked": blocked, + "reason" : None + }) + else: + # DEBUG: print(f"DEBUG: Updating block last seen for blocker='{blocker}',blocked='{blocked}' ...") + fba.update_last_seen(blocker, blocked, "reject") + + if "public_comment" in peer: + # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, peer["public_comment"]) + fba.update_block_reason(peer["public_comment"], blocker, blocked, "reject") + + for entry in blockdict: + if entry["blocked"] == blocked: + # DEBUG: print(f"DEBUG: Setting block reason for blocked='{blocked}':'{peer['public_comment']}'") + entry["reason"] = peer["public_comment"] + + # DEBUG: print("DEBUG: Committing changes ...") + fba.connection.commit() + except Exception as e: + print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") + else: + print("WARNING: Unknown software:", blocker, software) + + if config.get("bot_enabled") and len(blockdict) > 0: + send_bot_post(blocker, blockdict) + + blockdict = [] + + # DEBUG: print("DEBUG: EXIT!") + +def fetch_cs(args: argparse.Namespace): + # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!") + domains = { + "silenced": list(), + "blocked": list(), + } + + try: + doc = bs4.BeautifulSoup( + reqto.get("https://meta.chaos.social/federation", headers=fba.headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))).text, + "html.parser", + ) + # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}") + silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table") + + # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}") + domains["silenced"] = domains["silenced"] + find_domains(silenced) + blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table") + + # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}") + domains["blocked"] = domains["blocked"] + find_domains(blocked) + + except BaseException as e: + print(f"ERROR: Cannot fetch from meta.chaos.social,exception[{type(e)}]:'{str(e)}'") + sys.exit(255) + + # DEBUG: print(f"DEBUG: domains()={len(domains)}") + if len(domains) > 0: + boot.acquire_lock() + + print(f"INFO: Adding {len(domains)} new instances ...") + for block_level in domains: + # DEBUG: print(f"DEBUG: block_level='{block_level}'") + + for row in domains[block_level]: + # DEBUG: print(f"DEBUG: row='{row}'") + if not fba.is_instance_registered(row["domain"]): + print(f"INFO: Fetching instances from domain='{row['domain']}' ...") + fba.fetch_instances(row["domain"], None, None, sys.argv[0]) + + if not fba.is_instance_blocked('chaos.social', row["domain"], block_level): + # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...") + fba.block_instance('chaos.social', row["domain"], row["reason"], block_level) + + # DEBUG: print("DEBUG: Committing changes ...") + fba.connection.commit() + + # DEBUG: print("DEBUG: EXIT!") + +def fetch_fba_rss(args: argparse.Namespace): + # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!") + domains = list() + + try: + print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...") + response = reqto.get(args.feed, headers=fba.headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))) + + # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}") + if response.ok and response.status_code < 300 and len(response.text) > 0: + # DEBUG: print(f"DEBUG: Parsing RSS feed ...") + rss = atoma.parse_rss_bytes(response.content) + + # DEBUG: print(f"DEBUG: rss[]={type(rss)}") + for item in rss.items: + # DEBUG: print(f"DEBUG: item={item}") + domain = item.link.split("=")[1] + + if fba.is_blacklisted(domain): + # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") + continue + elif domain in domains: + # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!") + continue + elif fba.is_instance_registered(domain): + # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!") + continue + + # DEBUG: print(f"DEBUG: domain='{domain}'") + domains.append(domain) + + except BaseException as e: + print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'") + sys.exit(255) + + # DEBUG: print(f"DEBUG: domains()={len(domains)}") + if len(domains) > 0: + boot.acquire_lock() + + print(f"INFO: Adding {len(domains)} new instances ...") + for domain in domains: + print(f"INFO: Fetching instances from domain='{domain}' ...") + fba.fetch_instances(domain, None, None, sys.argv[0]) + + # DEBUG: print("DEBUG: EXIT!") + +def fetch_fbabot_atom(args: argparse.Namespace): + # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!") + feed = "https://ryona.agency/users/fba/feed.atom" + + domains = list() + try: + print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...") + response = reqto.get(feed, headers=fba.headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))) + + # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}") + if response.ok and response.status_code < 300 and len(response.text) > 0: + # DEBUG: print(f"DEBUG: Parsing ATOM feed ...") + atom = atoma.parse_atom_bytes(response.content) + + # DEBUG: print(f"DEBUG: atom[]={type(atom)}") + for entry in atom.entries: + # DEBUG: print(f"DEBUG: entry[]={type(entry)}") + doc = bs4.BeautifulSoup(entry.content.value, "html.parser") + # DEBUG: print(f"DEBUG: doc[]={type(doc)}") + for element in doc.findAll("a"): + # DEBUG: print(f"DEBUG: element[{type(element)}]={element}") + domain = fba.tidyup_domain(element["href"]) + + # DEBUG: print(f"DEBUG: domain='{domain}'") + if fba.is_blacklisted(domain): + # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") + continue + elif domain in domains: + # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!") + continue + elif fba.is_instance_registered(domain): + # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!") + continue + + # DEBUG: print(f"DEBUG: domain='{domain}'") + domains.append(domain) + + except BaseException as e: + print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'") + sys.exit(255) + + # DEBUG: print(f"DEBUG: domains()={len(domains)}") + if len(domains) > 0: + boot.acquire_lock() + + print(f"INFO: Adding {len(domains)} new instances ...") + for domain in domains: + print(f"INFO: Fetching instances from domain='{domain}' ...") + fba.fetch_instances(domain, None, None, sys.argv[0]) + + # DEBUG: print("DEBUG: EXIT!") + +def fetch_instances(args: argparse.Namespace): + # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!") + boot.acquire_lock() + + # Initial fetch + fba.fetch_instances(args.domain, None, None, sys.argv[0]) + + # Loop through some instances + fba.cursor.execute( + "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")] + ) + + rows = fba.cursor.fetchall() + print(f"INFO: Checking {len(rows)} entries ...") + for row in rows: + # DEBUG: print("DEBUG: domain:", row[0]) + if fba.is_blacklisted(row[0]): + print("WARNING: domain is blacklisted:", row[0]) + continue + + print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'") + fba.fetch_instances(row[0], row[1], row[2], sys.argv[0], row[3]) + + # DEBUG: print("DEBUG: EXIT!")