From a0a56bec8f9487ee226bbd9317d9d083838fa4c2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Mon, 5 Jun 2023 22:09:42 +0200 Subject: [PATCH] Continued: - introduced argparse which is a more flexible way of handling command-line arguments - moved all commands to fba/command.py you can now access them through $ ./fba.py - please use --help to see which commands are all supported, you can also use it on a single command to get all supported arguments --- check_instance.py => fba.py | 18 +- fba/__init__.py | 9 +- fba/boot.py | 83 ++++ fba/command.py | 787 ++++++++++++++++++++++++++++++++++++ fba/fba.py | 56 +++ fetch_bkali.py | 70 ---- fetch_blocks.py | 527 ------------------------ fetch_cs.py | 124 ------ fetch_fba_rss.py | 67 --- fetch_instances.py | 50 --- requirements.txt | 1 + 11 files changed, 940 insertions(+), 852 deletions(-) rename check_instance.py => fba.py (67%) create mode 100644 fba/command.py delete mode 100755 fetch_bkali.py delete mode 100755 fetch_blocks.py delete mode 100755 fetch_cs.py delete mode 100755 fetch_fba_rss.py delete mode 100755 fetch_instances.py diff --git a/check_instance.py b/fba.py similarity index 67% rename from check_instance.py rename to fba.py index cea34c8..d7d40c7 100755 --- a/check_instance.py +++ b/fba.py @@ -18,21 +18,13 @@ # along with this program. If not, see . import sys -import validators from fba import * -domain = sys.argv[1] +# Init parser +boot.init_parser() -if not validators.domain(domain): - print(f"WARNING: domain='{domain}' is not valid") - sys.exit(100) -elif fba.is_blacklisted(domain): - print(f"WARNING: domain='{domain}' is blacklisted") - sys.exit(101) -elif fba.is_instance_registered(domain): - print(f"WARNING: domain='{domain}' is already registered") - sys.exit(102) - -print(f"INFO: domain='{domain}' is not known") +# Run command +boot.run_command() +# Shutdown again boot.shutdown() diff --git a/fba/__init__.py b/fba/__init__.py index 2df28f7..888b8c3 100644 --- a/fba/__init__.py +++ b/fba/__init__.py @@ -1 +1,8 @@ -__all__ = ['boot', 'cache', 'config', 'fba', 'instances'] +__all__ = [ + 'boot', + 'cache', + 'command', + 'config', + 'fba', + 'instances' +] diff --git a/fba/boot.py b/fba/boot.py index dfe7ae4..b121562 100644 --- a/fba/boot.py +++ b/fba/boot.py @@ -14,15 +14,18 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import argparse import os import sys import tempfile import zc.lockfile +from fba import command from fba import fba # Lock file lockfile = tempfile.gettempdir() + '/fba.lock' LOCK = None +_PARSER = None def acquire_lock(): global LOCK @@ -35,12 +38,92 @@ def acquire_lock(): print(f"ERROR: Cannot aquire lock: '{lockfile}'") sys.exit(100) +def init_parser(): + # DEBUG: print("DEBUG: init_parser(): CALLED!") + global _PARSER + + print("DEBUG: Initializing parser ...") + _PARSER = argparse.ArgumentParser( + prog="Fedi API Block", + description="Fetches block reasons from the fediverse" + ) + subparser_command = _PARSER.add_subparsers( + dest="command", + title="Commands to execute", + required=True, + help="Command to perform", + ) + + ### Check instance ### + parser = subparser_command.add_parser( + "check_instance", + help="Checks given instance if it exists and returns proper exit code" + ) + parser.add_argument("--domain", required=True, help="Instance name (aka. domain) to check") + parser.set_defaults(command=command.check_instance) + + ### Fetch from bka.li ### + parser = subparser_command.add_parser( + "fetch_bkali", + help="Fetches domain names from bka.li API", + ) + parser.set_defaults(command=command.fetch_bkali) + + ### Fetch blocks from registered instances or given ### + parser = subparser_command.add_parser( + "fetch_blocks", + help="Fetches blocks from registered instances (run command fetch_instances first!).", + ) + parser.add_argument("--domain", help="Instance name (aka. domain) to fetch blocks from") + parser.set_defaults(command=command.fetch_blocks) + + ### Fetch blocks from chaos.social ### + parser = subparser_command.add_parser( + "fetch_cs", + help="Fetches blocks from chaos.social's meta sub domain.", + ) + parser.set_defaults(command=command.fetch_cs) + + ### Fetch blocks from a FBA-specific RSS feed ### + parser = subparser_command.add_parser( + "fetch_fba_rss", + help="Fetches domains from a FBA-specific RSS feed.", + ) + parser.add_argument("--feed", required=True, help="RSS feed to fetch domains from (e.g. https://fba.ryoma.agency/rss?domain=foo.bar).") + parser.set_defaults(command=command.fetch_fba_rss) + + ### Fetch blocks from FBA's bot account ### + parser = subparser_command.add_parser( + "fetch_fbabot_atom", + help="Fetches ATOM feed with domains from FBA's bot account. You may wish to re-run this command several times (at least 3 with big instances) to have a decent amount of valid instances.", + ) + parser.set_defaults(command=command.fetch_fbabot_atom) + + ### Fetch instances from given initial instance ### + parser = subparser_command.add_parser( + "fetch_instances", + help="Fetches instances (aka. \"domains\") from an initial instance.", + ) + parser.add_argument("--domain", help="Instance name (aka. domain) to fetch further instances from. Start with a large instance, e.g. mastodon.social .") + parser.set_defaults(command=command.fetch_instances) + + # DEBUG: print("DEBUG: init_parser(): EXIT!") + +def run_command(): + # DEBUG: print("DEBUG: run_command(): CALLED!") + args = _PARSER.parse_args() + print(f"DEBUG: args[{type(args)}]={args}") + args.command(args) + # DEBUG: print("DEBUG: run_command(): EXIT!") + def shutdown(): print("DEBUG: Closing database connection ...") fba.connection.close() + if LOCK != None: print("DEBUG: Releasing lock ...") LOCK.close() print(f"DEBUG: Deleting lockfile='{lockfile}' ...") os.remove(lockfile) + print("DEBUG: Shutdown completed.") diff --git a/fba/command.py b/fba/command.py new file mode 100644 index 0000000..3bad9dd --- /dev/null +++ b/fba/command.py @@ -0,0 +1,787 @@ +# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import argparse +import atoma +import bs4 +import itertools +import json +import re +import reqto +import sys +import time +import validators + +from fba import boot +from fba import config +from fba import fba + +def check_instance(args: argparse.Namespace) -> int: + # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!") + status = 0 + if not validators.domain(args.domain): + print(f"WARNING: args.domain='{args.domain}' is not valid") + status = 100 + elif fba.is_blacklisted(args.domain): + print(f"WARNING: args.domain='{args.domain}' is blacklisted") + status = 101 + elif fba.is_instance_registered(args.domain): + print(f"WARNING: args.domain='{args.domain}' is already registered") + staus = 102 + else: + print(f"INFO: args.domain='{args.domain}' is not known") + + # DEBUG: print(f"DEBUG: status={status} - EXIT!") + return status + +def fetch_bkali(args: argparse.Namespace): + # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!") + domains = list() + try: + fetched = fba.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({ + "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}" + })) + + # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'") + if len(fetched) == 0: + raise Exception("WARNING: Returned no records") + elif not "data" in fetched: + raise Exception(f"WARNING: fetched()={len(fetched)} does not contain element 'data'") + elif not "nodeinfo" in fetched["data"]: + raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain element 'nodeinfo'") + + for entry in fetched["data"]["nodeinfo"]: + # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'") + if not "domain" in entry: + print(f"WARNING: entry does not contain 'domain' - SKIPPED!") + continue + elif not validators.domain(entry["domain"]): + print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!") + continue + elif fba.is_blacklisted(entry["domain"]): + # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!") + continue + elif fba.is_instance_registered(entry["domain"]): + # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!") + continue + + # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...") + domains.append(entry["domain"]) + + except BaseException as e: + print(f"ERROR: Cannot fetch graphql,exception[{type(e)}]:'{str(e)}'") + sys.exit(255) + + # DEBUG: print(f"DEBUG: domains()={len(domains)}") + if len(domains) > 0: + boot.acquire_lock() + + print(f"INFO: Adding {len(domains)} new instances ...") + for domain in domains: + print(f"INFO: Fetching instances from domain='{domain}' ...") + fba.fetch_instances(domain, None, None, sys.argv[0]) + + # DEBUG: print("DEBUG: EXIT!") + +def fetch_blocks(args: argparse.Namespace): + print(f"DEBUG: args[]={type(args)} - CALLED!") + if args.domain != None and args.domain != "": + if not validators.domain(args.domain): + print(f"WARNING: domain='{args.domain}' is not valid.") + return + elif fba.is_blacklisted(args.domain): + print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!") + return + elif not fba.is_instance_registered(args.domain): + print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.") + return + + boot.acquire_lock() + + if args.domain != None and args.domain != "": + fba.cursor.execute( + "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND domain = ?", [args.domain] + ) + else: + fba.cursor.execute( + "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")] + ) + + rows = fba.cursor.fetchall() + print(f"INFO: Checking {len(rows)} entries ...") + for blocker, software, origin, nodeinfo_url in rows: + # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url) + blockdict = [] + blocker = fba.tidyup_domain(blocker) + # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software) + + if blocker == "": + print("WARNING: blocker is now empty!") + continue + elif fba.is_blacklisted(blocker): + print(f"WARNING: blocker='{blocker}' is blacklisted now!") + continue + + # DEBUG: print(f"DEBUG: blocker='{blocker}'") + fba.update_last_blocked(blocker) + + if software == "pleroma": + print("INFO: blocker:", blocker) + try: + # Blocks + json = fba.fetch_nodeinfo(blocker, nodeinfo_url) + if json is None: + print("WARNING: Could not fetch nodeinfo from blocker:", blocker) + continue + elif not "metadata" in json: + print(f"WARNING: json()={len(json)} does not have key 'metadata', blocker='{blocker}'") + continue + elif not "federation" in json["metadata"]: + print(f"WARNING: json()={len(json['metadata'])} does not have key 'federation', blocker='{blocker}'") + continue + + # DEBUG: print("DEBUG: Updating nodeinfo:", blocker) + fba.update_last_nodeinfo(blocker) + + federation = json["metadata"]["federation"] + + if "enabled" in federation: + # DEBUG: print("DEBUG: Instance has no block list to analyze:", blocker) + continue + + if "mrf_simple" in federation: + for block_level, blocks in ( + {**federation["mrf_simple"], + **{"quarantined_instances": federation["quarantined_instances"]}} + ).items(): + # DEBUG: print("DEBUG: block_level, blocks():", block_level, len(blocks)) + block_level = fba.tidyup_domain(block_level) + # DEBUG: print("DEBUG: BEFORE block_level:", block_level) + + if block_level == "": + print("WARNING: block_level is now empty!") + continue + + # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") + for blocked in blocks: + # DEBUG: print("DEBUG: BEFORE blocked:", blocked) + blocked = fba.tidyup_domain(blocked) + # DEBUG: print("DEBUG: AFTER blocked:", blocked) + + if blocked == "": + print("WARNING: blocked is empty after fba.tidyup_domain():", blocker, block_level) + continue + elif fba.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue + elif blocked.count("*") > 1: + # -ACK!-oma also started obscuring domains without hash + fba.cursor.execute( + "SELECT domain, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] + ) + searchres = fba.cursor.fetchone() + # DEBUG: print("DEBUG: searchres[]:", type(searchres)) + + if searchres == None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") + continue + + blocked = searchres[0] + nodeinfo_url = searchres[1] + # DEBUG: print("DEBUG: Looked up domain:", blocked) + elif not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + elif not fba.is_instance_registered(blocked): + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) + + if not fba.is_instance_blocked(blocker, blocked, block_level): + # DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level) + fba.block_instance(blocker, blocked, "unknown", block_level) + + if block_level == "reject": + # DEBUG: print("DEBUG: Adding to blockdict:", blocked) + blockdict.append( + { + "blocked": blocked, + "reason" : None + }) + else: + # DEBUG: print(f"DEBUG: Updating block last seen for blocker='{blocker}',blocked='{blocked}' ...") + fba.update_last_seen(blocker, blocked, block_level) + + # DEBUG: print("DEBUG: Committing changes ...") + fba.connection.commit() + + # Reasons + if "mrf_simple_info" in federation: + # DEBUG: print("DEBUG: Found mrf_simple_info:", blocker) + for block_level, info in ( + {**federation["mrf_simple_info"], + **(federation["quarantined_instances_info"] + if "quarantined_instances_info" in federation + else {})} + ).items(): + # DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items())) + block_level = fba.tidyup_domain(block_level) + # DEBUG: print("DEBUG: BEFORE block_level:", block_level) + + if block_level == "": + print("WARNING: block_level is now empty!") + continue + + # DEBUG: print(f"DEBUG: Checking {len(info.items())} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") + for blocked, reason in info.items(): + # DEBUG: print("DEBUG: BEFORE blocked:", blocked) + blocked = fba.tidyup_domain(blocked) + # DEBUG: print("DEBUG: AFTER blocked:", blocked) + + if blocked == "": + print("WARNING: blocked is empty after fba.tidyup_domain():", blocker, block_level) + continue + elif fba.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue + elif blocked.count("*") > 1: + # same domain guess as above, but for reasons field + fba.cursor.execute( + "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] + ) + searchres = fba.cursor.fetchone() + + if searchres == None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") + continue + + blocked = searchres[0] + origin = searchres[1] + nodeinfo_url = searchres[2] + elif not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + elif not fba.is_instance_registered(blocked): + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) + + # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"]) + fba.update_block_reason(reason["reason"], blocker, blocked, block_level) + + for entry in blockdict: + if entry["blocked"] == blocked: + # DEBUG: print("DEBUG: Updating entry reason:", blocked) + entry["reason"] = reason["reason"] + + fba.connection.commit() + except Exception as e: + print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") + elif software == "mastodon": + print("INFO: blocker:", blocker) + try: + # json endpoint for newer mastodongs + try: + json = { + "reject" : [], + "media_removal" : [], + "followers_only": [], + "report_removal": [] + } + + # handling CSRF, I've saw at least one server requiring it to access the endpoint + # DEBUG: print("DEBUG: Fetching meta:", blocker) + meta = bs4.BeautifulSoup( + fba.get_response(blocker, "/", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, + "html.parser", + ) + try: + csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"] + # DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf) + reqheaders = {**fba.api_headers, **{"X-CSRF-Token": csrf}} + except BaseException as e: + # DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker, e) + reqheaders = fba.api_headers + + # DEBUG: print("DEBUG: Querying API domain_blocks:", blocker) + blocks = fba.get_response(blocker, "/api/v1/instance/domain_blocks", reqheaders, (config.get("connection_timeout"), config.get("read_timeout"))).json() + + print(f"INFO: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}' ...") + for block in blocks: + entry = { + 'domain': block['domain'], + 'hash' : block['digest'], + 'reason': block['comment'] + } + + # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment']) + if block['severity'] == 'suspend': + # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") + json['reject'].append(entry) + elif block['severity'] == 'silence': + # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") + json['followers_only'].append(entry) + elif block['severity'] == 'reject_media': + # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") + json['media_removal'].append(entry) + elif block['severity'] == 'reject_reports': + # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") + json['report_removal'].append(entry) + else: + print("WARNING: Unknown severity:", block['severity'], block['domain']) + except BaseException as e: + # DEBUG: print(f"DEBUG: Failed, trying mastodon-specific fetches: blocker='{blocker}',exception[{type(e)}]={str(e)}") + json = fba.get_mastodon_blocks(blocker) + + print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...") + for block_level, blocks in json.items(): + # DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks)) + block_level = fba.tidyup_domain(block_level) + # DEBUG: print("DEBUG: AFTER-block_level:", block_level) + if block_level == "": + print("WARNING: block_level is empty, blocker:", blocker) + continue + + # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") + for block in blocks: + blocked, blocked_hash, reason = block.values() + # DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason) + blocked = fba.tidyup_domain(blocked) + # DEBUG: print("DEBUG: AFTER-blocked:", blocked) + + if blocked == "": + print("WARNING: blocked is empty:", blocker) + continue + elif fba.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue + elif blocked.count("*") > 0: + # Doing the hash search for instance names as well to tidy up DB + fba.cursor.execute( + "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash] + ) + searchres = fba.cursor.fetchone() + + if searchres == None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!") + continue + + # DEBUG: print("DEBUG: Updating domain: ", searchres[0]) + blocked = searchres[0] + origin = searchres[1] + nodeinfo_url = searchres[2] + + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + elif not fba.is_instance_registered(blocked): + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) + elif not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + elif not fba.is_instance_registered(blocked): + # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) + fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) + + blocking = blocked if blocked.count("*") <= 1 else blocked_hash + # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'") + + if not fba.is_instance_blocked(blocker, blocked, block_level): + # DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level) + fba.block_instance(blocker, blocking, reason, block_level) + + if block_level == "reject": + blockdict.append({ + "blocked": blocked, + "reason" : reason + }) + else: + # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocking='{blocking}' ...") + fba.update_last_seen(blocker, blocking, block_level) + fba.update_block_reason(reason, blocker, blocking, block_level) + + # DEBUG: print("DEBUG: Committing changes ...") + fba.connection.commit() + except Exception as e: + print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") + elif software == "friendica" or software == "misskey" or software == "bookwyrm" or software == "takahe": + print("INFO: blocker:", blocker) + try: + if software == "friendica": + json = fba.get_friendica_blocks(blocker) + elif software == "misskey": + json = fba.get_misskey_blocks(blocker) + elif software == "bookwyrm": + print("WARNING: bookwyrm is not fully supported for fetching blacklist!", blocker) + #json = fba.get_bookwyrm_blocks(blocker) + continue + elif software == "takahe": + print("WARNING: takahe is not fully supported for fetching blacklist!", blocker) + #json = fba.get_takahe_blocks(blocker) + continue + + print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...") + for block_level, blocks in json.items(): + # DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks)) + block_level = fba.tidyup_domain(block_level) + # DEBUG: print("DEBUG: AFTER-block_level:", block_level) + if block_level == "": + print("WARNING: block_level is empty, blocker:", blocker) + continue + + # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") + for block in blocks: + blocked, reason = block.values() + # DEBUG: print("DEBUG: BEFORE blocked:", blocked) + blocked = fba.tidyup_domain(blocked) + # DEBUG: print("DEBUG: AFTER blocked:", blocked) + + if blocked == "": + print("WARNING: blocked is empty:", blocker) + continue + elif fba.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue + elif blocked.count("*") > 0: + # Some friendica servers also obscure domains without hash + fba.cursor.execute( + "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] + ) + + searchres = fba.cursor.fetchone() + + if searchres == None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") + continue + + blocked = searchres[0] + origin = searchres[1] + nodeinfo_url = searchres[2] + elif blocked.count("?") > 0: + # Some obscure them with question marks, not sure if that's dependent on version or not + fba.cursor.execute( + "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")] + ) + + searchres = fba.cursor.fetchone() + + if searchres == None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") + continue + + blocked = searchres[0] + origin = searchres[1] + nodeinfo_url = searchres[2] + elif not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + elif not fba.is_instance_registered(blocked): + # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) + fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) + + if not fba.is_instance_blocked(blocker, blocked, block_level): + fba.block_instance(blocker, blocked, reason, block_level) + + if block_level == "reject": + blockdict.append({ + "blocked": blocked, + "reason" : reason + }) + else: + # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...") + fba.update_last_seen(blocker, blocked, block_level) + fba.update_block_reason(reason, blocker, blocked, block_level) + + # DEBUG: print("DEBUG: Committing changes ...") + fba.connection.commit() + except Exception as e: + print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") + elif software == "gotosocial": + print("INFO: blocker:", blocker) + try: + # Blocks + federation = fba.get_response(blocker, "{fba.get_peers_url}?filter=suspended", fba.api_headers, (config.get("connection_timeout"), config.get("read_timeout"))).json() + + if (federation == None): + print("WARNING: No valid response:", blocker); + elif "error" in federation: + print("WARNING: API returned error:", federation["error"]) + else: + print(f"INFO: Checking {len(federation)} entries from blocker='{blocker}',software='{software}' ...") + for peer in federation: + blocked = peer["domain"].lower() + # DEBUG: print("DEBUG: BEFORE blocked:", blocked) + blocked = fba.tidyup_domain(blocked) + # DEBUG: print("DEBUG: AFTER blocked:", blocked) + + if blocked == "": + print("WARNING: blocked is empty:", blocker) + continue + elif fba.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + continue + elif blocked.count("*") > 0: + # GTS does not have hashes for obscured domains, so we have to guess it + fba.cursor.execute( + "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] + ) + searchres = fba.cursor.fetchone() + + if searchres == None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") + continue + + blocked = searchres[0] + origin = searchres[1] + nodeinfo_url = searchres[2] + elif not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + continue + elif not fba.is_instance_registered(blocked): + # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) + + if not fba.is_instance_blocked(blocker, blocked, "reject"): + # DEBUG: print(f"DEBUG: blocker='{blocker}' is blocking '{blocked}' for unknown reason at this point") + fba.block_instance(blocker, blocked, "unknown", "reject") + + blockdict.append({ + "blocked": blocked, + "reason" : None + }) + else: + # DEBUG: print(f"DEBUG: Updating block last seen for blocker='{blocker}',blocked='{blocked}' ...") + fba.update_last_seen(blocker, blocked, "reject") + + if "public_comment" in peer: + # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, peer["public_comment"]) + fba.update_block_reason(peer["public_comment"], blocker, blocked, "reject") + + for entry in blockdict: + if entry["blocked"] == blocked: + # DEBUG: print(f"DEBUG: Setting block reason for blocked='{blocked}':'{peer['public_comment']}'") + entry["reason"] = peer["public_comment"] + + # DEBUG: print("DEBUG: Committing changes ...") + fba.connection.commit() + except Exception as e: + print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") + else: + print("WARNING: Unknown software:", blocker, software) + + if config.get("bot_enabled") and len(blockdict) > 0: + send_bot_post(blocker, blockdict) + + blockdict = [] + + # DEBUG: print("DEBUG: EXIT!") + +def fetch_cs(args: argparse.Namespace): + # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!") + domains = { + "silenced": list(), + "blocked": list(), + } + + try: + doc = bs4.BeautifulSoup( + reqto.get("https://meta.chaos.social/federation", headers=fba.headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))).text, + "html.parser", + ) + # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}") + silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table") + + # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}") + domains["silenced"] = domains["silenced"] + find_domains(silenced) + blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table") + + # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}") + domains["blocked"] = domains["blocked"] + find_domains(blocked) + + except BaseException as e: + print(f"ERROR: Cannot fetch from meta.chaos.social,exception[{type(e)}]:'{str(e)}'") + sys.exit(255) + + # DEBUG: print(f"DEBUG: domains()={len(domains)}") + if len(domains) > 0: + boot.acquire_lock() + + print(f"INFO: Adding {len(domains)} new instances ...") + for block_level in domains: + # DEBUG: print(f"DEBUG: block_level='{block_level}'") + + for row in domains[block_level]: + # DEBUG: print(f"DEBUG: row='{row}'") + if not fba.is_instance_registered(row["domain"]): + print(f"INFO: Fetching instances from domain='{row['domain']}' ...") + fba.fetch_instances(row["domain"], None, None, sys.argv[0]) + + if not fba.is_instance_blocked('chaos.social', row["domain"], block_level): + # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...") + fba.block_instance('chaos.social', row["domain"], row["reason"], block_level) + + # DEBUG: print("DEBUG: Committing changes ...") + fba.connection.commit() + + # DEBUG: print("DEBUG: EXIT!") + +def fetch_fba_rss(args: argparse.Namespace): + # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!") + domains = list() + + try: + print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...") + response = reqto.get(args.feed, headers=fba.headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))) + + # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}") + if response.ok and response.status_code < 300 and len(response.text) > 0: + # DEBUG: print(f"DEBUG: Parsing RSS feed ...") + rss = atoma.parse_rss_bytes(response.content) + + # DEBUG: print(f"DEBUG: rss[]={type(rss)}") + for item in rss.items: + # DEBUG: print(f"DEBUG: item={item}") + domain = item.link.split("=")[1] + + if fba.is_blacklisted(domain): + # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") + continue + elif domain in domains: + # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!") + continue + elif fba.is_instance_registered(domain): + # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!") + continue + + # DEBUG: print(f"DEBUG: domain='{domain}'") + domains.append(domain) + + except BaseException as e: + print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'") + sys.exit(255) + + # DEBUG: print(f"DEBUG: domains()={len(domains)}") + if len(domains) > 0: + boot.acquire_lock() + + print(f"INFO: Adding {len(domains)} new instances ...") + for domain in domains: + print(f"INFO: Fetching instances from domain='{domain}' ...") + fba.fetch_instances(domain, None, None, sys.argv[0]) + + # DEBUG: print("DEBUG: EXIT!") + +def fetch_fbabot_atom(args: argparse.Namespace): + # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!") + feed = "https://ryona.agency/users/fba/feed.atom" + + domains = list() + try: + print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...") + response = reqto.get(feed, headers=fba.headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))) + + # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}") + if response.ok and response.status_code < 300 and len(response.text) > 0: + # DEBUG: print(f"DEBUG: Parsing ATOM feed ...") + atom = atoma.parse_atom_bytes(response.content) + + # DEBUG: print(f"DEBUG: atom[]={type(atom)}") + for entry in atom.entries: + # DEBUG: print(f"DEBUG: entry[]={type(entry)}") + doc = bs4.BeautifulSoup(entry.content.value, "html.parser") + # DEBUG: print(f"DEBUG: doc[]={type(doc)}") + for element in doc.findAll("a"): + # DEBUG: print(f"DEBUG: element[{type(element)}]={element}") + domain = fba.tidyup_domain(element["href"]) + + # DEBUG: print(f"DEBUG: domain='{domain}'") + if fba.is_blacklisted(domain): + # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") + continue + elif domain in domains: + # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!") + continue + elif fba.is_instance_registered(domain): + # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!") + continue + + # DEBUG: print(f"DEBUG: domain='{domain}'") + domains.append(domain) + + except BaseException as e: + print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'") + sys.exit(255) + + # DEBUG: print(f"DEBUG: domains()={len(domains)}") + if len(domains) > 0: + boot.acquire_lock() + + print(f"INFO: Adding {len(domains)} new instances ...") + for domain in domains: + print(f"INFO: Fetching instances from domain='{domain}' ...") + fba.fetch_instances(domain, None, None, sys.argv[0]) + + # DEBUG: print("DEBUG: EXIT!") + +def fetch_instances(args: argparse.Namespace): + # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!") + boot.acquire_lock() + + # Initial fetch + fba.fetch_instances(args.domain, None, None, sys.argv[0]) + + # Loop through some instances + fba.cursor.execute( + "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")] + ) + + rows = fba.cursor.fetchall() + print(f"INFO: Checking {len(rows)} entries ...") + for row in rows: + # DEBUG: print("DEBUG: domain:", row[0]) + if fba.is_blacklisted(row[0]): + print("WARNING: domain is blacklisted:", row[0]) + continue + + print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'") + fba.fetch_instances(row[0], row[1], row[2], sys.argv[0], row[3]) + + # DEBUG: print("DEBUG: EXIT!") diff --git a/fba/fba.py b/fba/fba.py index a7b1b38..bac2d5d 100644 --- a/fba/fba.py +++ b/fba/fba.py @@ -1074,6 +1074,10 @@ def block_instance(blocker: str, blocked: str, reason: str, block_level: str): elif is_blacklisted(blocked): raise Exception(f"blocked='{blocked}' is blacklisted but function invoked") + if reason != None: + # Maybe needs cleaning + reason = tidyup_reason(reason) + print(f"INFO: New block: blocker='{blocker}',blocked='{blocked}', reason='{reason}', block_level='{block_level}'") try: cursor.execute( @@ -1531,3 +1535,55 @@ def has_element(elements: list, key: str, value: any) -> bool: # DEBUG: print(f"DEBUG: has={has} - EXIT!") return has + +def find_domains(tag: bs4.element.Tag) -> list: + # DEBUG: print(f"DEBUG: tag[]={type(tag)} - CALLED!") + if not isinstance(tag, bs4.element.Tag): + raise ValueError(f"Parameter tag[]={type(tag)} is not type of bs4.element.Tag") + elif not isinstance(tag, bs4.element.Tag): + raise KeyError("Cannot find table with instances!") + elif len(tag.select("tr")) == 0: + raise KeyError("No table rows found in table!") + + domains = list() + for element in tag.select("tr"): + # DEBUG: print(f"DEBUG: element[]={type(element)}") + if not element.find("td"): + # DEBUG: print("DEBUG: Skipping element, no found") + continue + + domain = tidyup_domain(element.find("td").text) + reason = tidyup_reason(element.findAll("td")[1].text) + + # DEBUG: print(f"DEBUG: domain='{domain}',reason='{reason}'") + + if is_blacklisted(domain): + print(f"WARNING: domain='{domain}' is blacklisted - skipped!") + continue + elif domain == "gab.com/.ai, develop.gab.com": + print(f"DEBUG: Multiple domains detected in one row") + domains.append({ + "domain": "gab.com", + "reason": reason, + }) + domains.append({ + "domain": "gab.ai", + "reason": reason, + }) + domains.append({ + "domain": "develop.gab.com", + "reason": reason, + }) + continue + elif not validators.domain(domain): + print(f"WARNING: domain='{domain}' is not a valid domain - skipped!") + continue + + # DEBUG: print(f"DEBUG: Adding domain='{domain}' ...") + domains.append({ + "domain": domain, + "reason": reason, + }) + + # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!") + return domains diff --git a/fetch_bkali.py b/fetch_bkali.py deleted file mode 100755 index 1a1aafc..0000000 --- a/fetch_bkali.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/python3 -# -*- coding: utf-8 -*- - -# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes -# Copyright (C) 2023 Free Software Foundation -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published -# by the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import json -import sys -import validators -from fba import * - -domains = list() -try: - fetched = fba.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({ - "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}" - })) - - # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'") - if len(fetched) == 0: - raise Exception("WARNING: Returned no records") - elif not "data" in fetched: - raise Exception(f"WARNING: fetched()={len(fetched)} does not contain element 'data'") - elif not "nodeinfo" in fetched["data"]: - raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain element 'nodeinfo'") - - for entry in fetched["data"]["nodeinfo"]: - # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'") - if not "domain" in entry: - print(f"WARNING: entry does not contain 'domain' - SKIPPED!") - continue - elif not validators.domain(entry["domain"]): - print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!") - continue - elif fba.is_blacklisted(entry["domain"]): - # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!") - continue - elif fba.is_instance_registered(entry["domain"]): - # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!") - continue - - # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...") - domains.append(entry["domain"]) - -except BaseException as e: - print(f"ERROR: Cannot fetch graphql,exception[{type(e)}]:'{str(e)}'") - sys.exit(255) - -# DEBUG: print(f"DEBUG: domains()={len(domains)}") -if len(domains) > 0: - boot.acquire_lock() - - print(f"INFO: Adding {len(domains)} new instances ...") - for domain in domains: - print(f"INFO: Fetching instances from domain='{domain}' ...") - fba.fetch_instances(domain, None, None, sys.argv[0]) - -boot.shutdown() diff --git a/fetch_blocks.py b/fetch_blocks.py deleted file mode 100755 index c12c26f..0000000 --- a/fetch_blocks.py +++ /dev/null @@ -1,527 +0,0 @@ -#!/usr/bin/python3 -# -*- coding: utf-8 -*- - -# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes -# Copyright (C) 2023 Free Software Foundation -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published -# by the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import bs4 -import itertools -import re -import reqto -import sys -import time -import validators -from fba import * - -boot.acquire_lock() - -fba.cursor.execute( - "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")] -) - -rows = fba.cursor.fetchall() -print(f"INFO: Checking {len(rows)} entries ...") -for blocker, software, origin, nodeinfo_url in rows: - # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url) - blockdict = [] - blocker = fba.tidyup_domain(blocker) - # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software) - - if blocker == "": - print("WARNING: blocker is now empty!") - continue - elif fba.is_blacklisted(blocker): - print(f"WARNING: blocker='{blocker}' is blacklisted now!") - continue - - # DEBUG: print(f"DEBUG: blocker='{blocker}'") - fba.update_last_blocked(blocker) - - if software == "pleroma": - print("INFO: blocker:", blocker) - try: - # Blocks - json = fba.fetch_nodeinfo(blocker, nodeinfo_url) - if json is None: - print("WARNING: Could not fetch nodeinfo from blocker:", blocker) - continue - elif not "metadata" in json: - print(f"WARNING: json()={len(json)} does not have key 'metadata', blocker='{blocker}'") - continue - elif not "federation" in json["metadata"]: - print(f"WARNING: json()={len(json['metadata'])} does not have key 'federation', blocker='{blocker}'") - continue - - # DEBUG: print("DEBUG: Updating nodeinfo:", blocker) - fba.update_last_nodeinfo(blocker) - - federation = json["metadata"]["federation"] - - if "enabled" in federation: - # DEBUG: print("DEBUG: Instance has no block list to analyze:", blocker) - continue - - if "mrf_simple" in federation: - for block_level, blocks in ( - {**federation["mrf_simple"], - **{"quarantined_instances": federation["quarantined_instances"]}} - ).items(): - # DEBUG: print("DEBUG: block_level, blocks():", block_level, len(blocks)) - block_level = fba.tidyup_domain(block_level) - # DEBUG: print("DEBUG: BEFORE block_level:", block_level) - - if block_level == "": - print("WARNING: block_level is now empty!") - continue - - # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") - for blocked in blocks: - # DEBUG: print("DEBUG: BEFORE blocked:", blocked) - blocked = fba.tidyup_domain(blocked) - # DEBUG: print("DEBUG: AFTER blocked:", blocked) - - if blocked == "": - print("WARNING: blocked is empty after fba.tidyup_domain():", blocker, block_level) - continue - elif fba.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") - continue - elif blocked.count("*") > 1: - # -ACK!-oma also started obscuring domains without hash - fba.cursor.execute( - "SELECT domain, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] - ) - searchres = fba.cursor.fetchone() - # DEBUG: print("DEBUG: searchres[]:", type(searchres)) - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") - continue - - blocked = searchres[0] - nodeinfo_url = searchres[1] - # DEBUG: print("DEBUG: Looked up domain:", blocked) - elif not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") - fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) - - if not fba.is_instance_blocked(blocker, blocked, block_level): - # DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level) - fba.block_instance(blocker, blocked, "unknown", block_level) - - if block_level == "reject": - # DEBUG: print("DEBUG: Adding to blockdict:", blocked) - blockdict.append( - { - "blocked": blocked, - "reason" : None - }) - else: - # DEBUG: print(f"DEBUG: Updating block last seen for blocker='{blocker}',blocked='{blocked}' ...") - fba.update_last_seen(blocker, blocked, block_level) - - # DEBUG: print("DEBUG: Committing changes ...") - fba.connection.commit() - - # Reasons - if "mrf_simple_info" in federation: - # DEBUG: print("DEBUG: Found mrf_simple_info:", blocker) - for block_level, info in ( - {**federation["mrf_simple_info"], - **(federation["quarantined_instances_info"] - if "quarantined_instances_info" in federation - else {})} - ).items(): - # DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items())) - block_level = fba.tidyup_domain(block_level) - # DEBUG: print("DEBUG: BEFORE block_level:", block_level) - - if block_level == "": - print("WARNING: block_level is now empty!") - continue - - # DEBUG: print(f"DEBUG: Checking {len(info.items())} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") - for blocked, reason in info.items(): - # DEBUG: print("DEBUG: BEFORE blocked:", blocked) - blocked = fba.tidyup_domain(blocked) - # DEBUG: print("DEBUG: AFTER blocked:", blocked) - - if blocked == "": - print("WARNING: blocked is empty after fba.tidyup_domain():", blocker, block_level) - continue - elif fba.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") - continue - elif blocked.count("*") > 1: - # same domain guess as above, but for reasons field - fba.cursor.execute( - "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] - ) - searchres = fba.cursor.fetchone() - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") - continue - - blocked = searchres[0] - origin = searchres[1] - nodeinfo_url = searchres[2] - elif not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") - fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) - - # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"]) - fba.update_block_reason(reason["reason"], blocker, blocked, block_level) - - for entry in blockdict: - if entry["blocked"] == blocked: - # DEBUG: print("DEBUG: Updating entry reason:", blocked) - entry["reason"] = reason["reason"] - - fba.connection.commit() - except Exception as e: - print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") - elif software == "mastodon": - print("INFO: blocker:", blocker) - try: - # json endpoint for newer mastodongs - try: - json = { - "reject" : [], - "media_removal" : [], - "followers_only": [], - "report_removal": [] - } - - # handling CSRF, I've saw at least one server requiring it to access the endpoint - # DEBUG: print("DEBUG: Fetching meta:", blocker) - meta = bs4.BeautifulSoup( - fba.get_response(blocker, "/", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, - "html.parser", - ) - try: - csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"] - # DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf) - reqheaders = {**fba.api_headers, **{"X-CSRF-Token": csrf}} - except BaseException as e: - # DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker, e) - reqheaders = fba.api_headers - - # DEBUG: print("DEBUG: Querying API domain_blocks:", blocker) - blocks = fba.get_response(blocker, "/api/v1/instance/domain_blocks", reqheaders, (config.get("connection_timeout"), config.get("read_timeout"))).json() - - print(f"INFO: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}' ...") - for block in blocks: - entry = { - 'domain': block['domain'], - 'hash' : block['digest'], - 'reason': block['comment'] - } - - # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment']) - if block['severity'] == 'suspend': - # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") - json['reject'].append(entry) - elif block['severity'] == 'silence': - # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") - json['followers_only'].append(entry) - elif block['severity'] == 'reject_media': - # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") - json['media_removal'].append(entry) - elif block['severity'] == 'reject_reports': - # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") - json['report_removal'].append(entry) - else: - print("WARNING: Unknown severity:", block['severity'], block['domain']) - except BaseException as e: - # DEBUG: print(f"DEBUG: Failed, trying mastodon-specific fetches: blocker='{blocker}',exception[{type(e)}]={str(e)}") - json = fba.get_mastodon_blocks(blocker) - - print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...") - for block_level, blocks in json.items(): - # DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks)) - block_level = fba.tidyup_domain(block_level) - # DEBUG: print("DEBUG: AFTER-block_level:", block_level) - if block_level == "": - print("WARNING: block_level is empty, blocker:", blocker) - continue - - # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") - for block in blocks: - blocked, blocked_hash, reason = block.values() - # DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason) - blocked = fba.tidyup_domain(blocked) - # DEBUG: print("DEBUG: AFTER-blocked:", blocked) - - if blocked == "": - print("WARNING: blocked is empty:", blocker) - continue - elif fba.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") - continue - elif blocked.count("*") > 0: - # Doing the hash search for instance names as well to tidy up DB - fba.cursor.execute( - "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash] - ) - searchres = fba.cursor.fetchone() - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!") - continue - - # DEBUG: print("DEBUG: Updating domain: ", searchres[0]) - blocked = searchres[0] - origin = searchres[1] - nodeinfo_url = searchres[2] - - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") - fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) - elif not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) - fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) - - blocking = blocked if blocked.count("*") <= 1 else blocked_hash - # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'") - - if not fba.is_instance_blocked(blocker, blocked, block_level): - # DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level) - fba.block_instance(blocker, blocking, reason, block_level) - - if block_level == "reject": - blockdict.append({ - "blocked": blocked, - "reason" : reason - }) - else: - # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocking='{blocking}' ...") - fba.update_last_seen(blocker, blocking, block_level) - fba.update_block_reason(reason, blocker, blocking, block_level) - - # DEBUG: print("DEBUG: Committing changes ...") - fba.connection.commit() - except Exception as e: - print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") - elif software == "friendica" or software == "misskey" or software == "bookwyrm" or software == "takahe": - print("INFO: blocker:", blocker) - try: - if software == "friendica": - json = fba.get_friendica_blocks(blocker) - elif software == "misskey": - json = fba.get_misskey_blocks(blocker) - elif software == "bookwyrm": - print("WARNING: bookwyrm is not fully supported for fetching blacklist!", blocker) - #json = fba.get_bookwyrm_blocks(blocker) - continue - elif software == "takahe": - print("WARNING: takahe is not fully supported for fetching blacklist!", blocker) - #json = fba.get_takahe_blocks(blocker) - continue - - print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...") - for block_level, blocks in json.items(): - # DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks)) - block_level = fba.tidyup_domain(block_level) - # DEBUG: print("DEBUG: AFTER-block_level:", block_level) - if block_level == "": - print("WARNING: block_level is empty, blocker:", blocker) - continue - - # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") - for block in blocks: - blocked, reason = block.values() - # DEBUG: print("DEBUG: BEFORE blocked:", blocked) - blocked = fba.tidyup_domain(blocked) - # DEBUG: print("DEBUG: AFTER blocked:", blocked) - - if blocked == "": - print("WARNING: blocked is empty:", blocker) - continue - elif fba.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") - continue - elif blocked.count("*") > 0: - # Some friendica servers also obscure domains without hash - fba.cursor.execute( - "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] - ) - - searchres = fba.cursor.fetchone() - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") - continue - - blocked = searchres[0] - origin = searchres[1] - nodeinfo_url = searchres[2] - elif blocked.count("?") > 0: - # Some obscure them with question marks, not sure if that's dependent on version or not - fba.cursor.execute( - "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")] - ) - - searchres = fba.cursor.fetchone() - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") - continue - - blocked = searchres[0] - origin = searchres[1] - nodeinfo_url = searchres[2] - elif not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) - fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) - - if not fba.is_instance_blocked(blocker, blocked, block_level): - fba.block_instance(blocker, blocked, reason, block_level) - - if block_level == "reject": - blockdict.append({ - "blocked": blocked, - "reason" : reason - }) - else: - # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...") - fba.update_last_seen(blocker, blocked, block_level) - fba.update_block_reason(reason, blocker, blocked, block_level) - - # DEBUG: print("DEBUG: Committing changes ...") - fba.connection.commit() - except Exception as e: - print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") - elif software == "gotosocial": - print("INFO: blocker:", blocker) - try: - # Blocks - federation = fba.get_response(blocker, "{fba.get_peers_url}?filter=suspended", fba.api_headers, (config.get("connection_timeout"), config.get("read_timeout"))).json() - - if (federation == None): - print("WARNING: No valid response:", blocker); - elif "error" in federation: - print("WARNING: API returned error:", federation["error"]) - else: - print(f"INFO: Checking {len(federation)} entries from blocker='{blocker}',software='{software}' ...") - for peer in federation: - blocked = peer["domain"].lower() - # DEBUG: print("DEBUG: BEFORE blocked:", blocked) - blocked = fba.tidyup_domain(blocked) - # DEBUG: print("DEBUG: AFTER blocked:", blocked) - - if blocked == "": - print("WARNING: blocked is empty:", blocker) - continue - elif fba.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") - continue - elif blocked.count("*") > 0: - # GTS does not have hashes for obscured domains, so we have to guess it - fba.cursor.execute( - "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] - ) - searchres = fba.cursor.fetchone() - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") - continue - - blocked = searchres[0] - origin = searchres[1] - nodeinfo_url = searchres[2] - elif not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") - fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) - - if not fba.is_instance_blocked(blocker, blocked, "reject"): - # DEBUG: print(f"DEBUG: blocker='{blocker}' is blocking '{blocked}' for unknown reason at this point") - fba.block_instance(blocker, blocked, "unknown", "reject") - - blockdict.append({ - "blocked": blocked, - "reason" : None - }) - else: - # DEBUG: print(f"DEBUG: Updating block last seen for blocker='{blocker}',blocked='{blocked}' ...") - fba.update_last_seen(blocker, blocked, "reject") - - if "public_comment" in peer: - # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, peer["public_comment"]) - fba.update_block_reason(peer["public_comment"], blocker, blocked, "reject") - - for entry in blockdict: - if entry["blocked"] == blocked: - # DEBUG: print(f"DEBUG: Setting block reason for blocked='{blocked}':'{peer['public_comment']}'") - entry["reason"] = peer["public_comment"] - - # DEBUG: print("DEBUG: Committing changes ...") - fba.connection.commit() - except Exception as e: - print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") - else: - print("WARNING: Unknown software:", blocker, software) - - if config.get("bot_enabled") and len(blockdict) > 0: - send_bot_post(blocker, blockdict) - - blockdict = [] - -boot.shutdown() diff --git a/fetch_cs.py b/fetch_cs.py deleted file mode 100755 index ec7b6de..0000000 --- a/fetch_cs.py +++ /dev/null @@ -1,124 +0,0 @@ -#!/usr/bin/python3 -# -*- coding: utf-8 -*- - -# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes -# Copyright (C) 2023 Free Software Foundation -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published -# by the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import bs4 -import json -import reqto -import sys -import validators -from fba import * - -def find_domains(tag: bs4.element.Tag) -> list: - # DEBUG: print(f"DEBUG: tag[]={type(tag)} - CALLED!") - if not isinstance(tag, bs4.element.Tag): - raise ValueError(f"Parameter tag[]={type(tag)} is not type of bs4.element.Tag") - elif not isinstance(tag, bs4.element.Tag): - raise KeyError("Cannot find table with instances!") - elif len(tag.select("tr")) == 0: - raise KeyError("No table rows found in table!") - - domains = list() - for element in tag.select("tr"): - # DEBUG: print(f"DEBUG: element[]={type(element)}") - if not element.find("td"): - # DEBUG: print("DEBUG: Skipping element, no found") - continue - - domain = fba.tidyup_domain(element.find("td").text) - reason = fba.tidyup_reason(element.findAll("td")[1].text) - - # DEBUG: print(f"DEBUG: domain='{domain}',reason='{reason}'") - - if fba.is_blacklisted(domain): - print(f"WARNING: domain='{domain}' is blacklisted - skipped!") - continue - elif domain == "gab.com/.ai, develop.gab.com": - print(f"DEBUG: Multiple domains detected in one row") - domains.append({ - "domain": "gab.com", - "reason": reason, - }) - domains.append({ - "domain": "gab.ai", - "reason": reason, - }) - domains.append({ - "domain": "develop.gab.com", - "reason": reason, - }) - continue - elif not validators.domain(domain): - print(f"WARNING: domain='{domain}' is not a valid domain - skipped!") - continue - - # DEBUG: print(f"DEBUG: Adding domain='{domain}' ...") - domains.append({ - "domain": domain, - "reason": reason, - }) - - # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!") - return domains - -domains = { - "silenced": list(), - "blocked": list(), -} - -try: - doc = bs4.BeautifulSoup( - reqto.get("https://meta.chaos.social/federation", headers=fba.headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))).text, - "html.parser", - ) - # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}") - silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table") - - # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}") - domains["silenced"] = domains["silenced"] + find_domains(silenced) - blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table") - - # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}") - domains["blocked"] = domains["blocked"] + find_domains(blocked) - -except BaseException as e: - print(f"ERROR: Cannot fetch from meta.chaos.social,exception[{type(e)}]:'{str(e)}'") - sys.exit(255) - -# DEBUG: print(f"DEBUG: domains()={len(domains)}") -if len(domains) > 0: - boot.acquire_lock() - - print(f"INFO: Adding {len(domains)} new instances ...") - for block_level in domains: - # DEBUG: print(f"DEBUG: block_level='{block_level}'") - - for row in domains[block_level]: - # DEBUG: print(f"DEBUG: row='{row}'") - if not fba.is_instance_registered(row["domain"]): - print(f"INFO: Fetching instances from domain='{row['domain']}' ...") - fba.fetch_instances(row["domain"], None, None, sys.argv[0]) - - if not fba.is_instance_blocked('chaos.social', row["domain"], block_level): - # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...") - fba.block_instance('chaos.social', row["domain"], row["reason"], block_level) - - # DEBUG: print("DEBUG: Committing changes ...") - fba.connection.commit() - -boot.shutdown() diff --git a/fetch_fba_rss.py b/fetch_fba_rss.py deleted file mode 100755 index 614b10e..0000000 --- a/fetch_fba_rss.py +++ /dev/null @@ -1,67 +0,0 @@ -#!/usr/bin/python3 -# -*- coding: utf-8 -*- - -# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes -# Copyright (C) 2023 Free Software Foundation -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published -# by the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import reqto -import atoma -import sys -from fba import * - -feed = sys.argv[1] - -domains = list() -try: - print(f"INFO: Fetch FBA-specific RSS feed='{feed}' ...") - response = reqto.get(feed, headers=fba.headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))) - - # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}") - if response.ok and response.status_code < 300 and len(response.text) > 0: - # DEBUG: print(f"DEBUG: Parsing RSS feed ...") - rss = atoma.parse_rss_bytes(response.content) - - # DEBUG: print(f"DEBUG: rss[]={type(rss)}") - for item in rss.items: - # DEBUG: print(f"DEBUG: item={item}") - domain = item.link.split("=")[1] - - if fba.is_blacklisted(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") - continue - elif domain in domains: - # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!") - continue - elif fba.is_instance_registered(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!") - continue - - # DEBUG: print(f"DEBUG: domain='{domain}'") - domains.append(domain) - -except BaseException as e: - print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'") - -# DEBUG: print(f"DEBUG: domains()={len(domains)}") -if len(domains) > 0: - boot.acquire_lock() - - print(f"INFO: Adding {len(domains)} new instances ...") - for domain in domains: - print(f"INFO: Fetching instances from domain='{domain}' ...") - fba.fetch_instances(domain, None, None, sys.argv[0]) - -boot.shutdown() diff --git a/fetch_instances.py b/fetch_instances.py deleted file mode 100755 index 079c1a6..0000000 --- a/fetch_instances.py +++ /dev/null @@ -1,50 +0,0 @@ -#!/usr/bin/python3 -# -*- coding: utf-8 -*- - -# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes -# Copyright (C) 2023 Free Software Foundation -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published -# by the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -import sqlite3 -import sys -import json -import time -import validators -from fba import * - -boot.acquire_lock() - -instance = sys.argv[1] - -# Initial fetch -fba.fetch_instances(instance, None, None, sys.argv[0]) - -# Loop through some instances -fba.cursor.execute( - "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")] -) - -rows = fba.cursor.fetchall() -print(f"INFO: Checking {len(rows)} entries ...") -for row in rows: - # DEBUG: print("DEBUG: domain:", row[0]) - if fba.is_blacklisted(row[0]): - print("WARNING: domain is blacklisted:", row[0]) - continue - - print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'") - fba.fetch_instances(row[0], row[1], row[2], sys.argv[0], row[3]) - -boot.shutdown() diff --git a/requirements.txt b/requirements.txt index a02a02a..cc8c92e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +argparse atoma beautifulsoup4 fastapi -- 2.39.5