]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Mon, 5 Jun 2023 20:09:42 +0000 (22:09 +0200)
committerRoland Häder <roland@mxchange.org>
Mon, 5 Jun 2023 21:08:45 +0000 (23:08 +0200)
- introduced argparse which is a more flexible way of handling command-line
  arguments
- moved all commands to fba/command.py you can now access them through
  $ ./fba.py <command>
- please use --help to see which commands are all supported, you can also use
  it on a single command to get all supported arguments

12 files changed:
check_instance.py [deleted file]
fba.py [new file with mode: 0755]
fba/__init__.py
fba/boot.py
fba/command.py [new file with mode: 0644]
fba/fba.py
fetch_bkali.py [deleted file]
fetch_blocks.py [deleted file]
fetch_cs.py [deleted file]
fetch_fba_rss.py [deleted file]
fetch_instances.py [deleted file]
requirements.txt

diff --git a/check_instance.py b/check_instance.py
deleted file mode 100755 (executable)
index cea34c8..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-#!/usr/bin/python3
-# -*- coding: utf-8 -*-
-
-# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
-# Copyright (C) 2023 Free Software Foundation
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published
-# by the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
-import sys
-import validators
-from fba import *
-
-domain = sys.argv[1]
-
-if not validators.domain(domain):
-    print(f"WARNING: domain='{domain}' is not valid")
-    sys.exit(100)
-elif fba.is_blacklisted(domain):
-    print(f"WARNING: domain='{domain}' is blacklisted")
-    sys.exit(101)
-elif fba.is_instance_registered(domain):
-    print(f"WARNING: domain='{domain}' is already registered")
-    sys.exit(102)
-
-print(f"INFO: domain='{domain}' is not known")
-
-boot.shutdown()
diff --git a/fba.py b/fba.py
new file mode 100755 (executable)
index 0000000..d7d40c7
--- /dev/null
+++ b/fba.py
@@ -0,0 +1,30 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
+# Copyright (C) 2023 Free Software Foundation
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+import sys
+from fba import *
+
+# Init parser
+boot.init_parser()
+
+# Run command
+boot.run_command()
+
+# Shutdown again
+boot.shutdown()
index 2df28f755b9005690e89d9922d3432f028998554..888b8c34b4c12fd077dd964330abaa5aeb266c97 100644 (file)
@@ -1 +1,8 @@
-__all__ = ['boot', 'cache', 'config', 'fba', 'instances']
+__all__ = [
+    'boot',
+    'cache',
+    'command',
+    'config',
+    'fba',
+    'instances'
+]
index dfe7ae4edeb612d1cb578255b30ac8cc6483dd21..b121562c0504075c7f4a9c8591b375b4f0a61b30 100644 (file)
 # You should have received a copy of the GNU Affero General Public License
 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
 
+import argparse
 import os
 import sys
 import tempfile
 import zc.lockfile
+from fba import command
 from fba import fba
 
 # Lock file
 lockfile = tempfile.gettempdir() + '/fba.lock'
 LOCK = None
+_PARSER = None
 
 def acquire_lock():
     global LOCK
@@ -35,12 +38,92 @@ def acquire_lock():
         print(f"ERROR: Cannot aquire lock: '{lockfile}'")
         sys.exit(100)
 
+def init_parser():
+    # DEBUG: print("DEBUG: init_parser(): CALLED!")
+    global _PARSER
+
+    print("DEBUG: Initializing parser ...")
+    _PARSER = argparse.ArgumentParser(
+        prog="Fedi API Block",
+        description="Fetches block reasons from the fediverse"
+    )
+    subparser_command = _PARSER.add_subparsers(
+        dest="command",
+        title="Commands to execute",
+        required=True,
+        help="Command to perform",
+    )
+
+    ### Check instance ###
+    parser = subparser_command.add_parser(
+        "check_instance",
+        help="Checks given instance if it exists and returns proper exit code"
+    )
+    parser.add_argument("--domain", required=True, help="Instance name (aka. domain) to check")
+    parser.set_defaults(command=command.check_instance)
+
+    ### Fetch from bka.li ###
+    parser = subparser_command.add_parser(
+        "fetch_bkali",
+        help="Fetches domain names from bka.li API",
+    )
+    parser.set_defaults(command=command.fetch_bkali)
+
+    ### Fetch blocks from registered instances or given ###
+    parser = subparser_command.add_parser(
+        "fetch_blocks",
+        help="Fetches blocks from registered instances (run command fetch_instances first!).",
+    )
+    parser.add_argument("--domain", help="Instance name (aka. domain) to fetch blocks from")
+    parser.set_defaults(command=command.fetch_blocks)
+
+    ### Fetch blocks from chaos.social ###
+    parser = subparser_command.add_parser(
+        "fetch_cs",
+        help="Fetches blocks from chaos.social's meta sub domain.",
+    )
+    parser.set_defaults(command=command.fetch_cs)
+
+    ### Fetch blocks from a FBA-specific RSS feed  ###
+    parser = subparser_command.add_parser(
+        "fetch_fba_rss",
+        help="Fetches domains from a FBA-specific RSS feed.",
+    )
+    parser.add_argument("--feed", required=True, help="RSS feed to fetch domains from (e.g. https://fba.ryoma.agency/rss?domain=foo.bar).")
+    parser.set_defaults(command=command.fetch_fba_rss)
+
+    ### Fetch blocks from FBA's bot account ###
+    parser = subparser_command.add_parser(
+        "fetch_fbabot_atom",
+        help="Fetches ATOM feed with domains from FBA's bot account. You may wish to re-run this command several times (at least 3 with big instances) to have a decent amount of valid instances.",
+    )
+    parser.set_defaults(command=command.fetch_fbabot_atom)
+
+    ### Fetch instances from given initial instance ###
+    parser = subparser_command.add_parser(
+        "fetch_instances",
+        help="Fetches instances (aka. \"domains\") from an initial instance.",
+    )
+    parser.add_argument("--domain", help="Instance name (aka. domain) to fetch further instances from. Start with a large instance, e.g. mastodon.social .")
+    parser.set_defaults(command=command.fetch_instances)
+
+    # DEBUG: print("DEBUG: init_parser(): EXIT!")
+
+def run_command():
+    # DEBUG: print("DEBUG: run_command(): CALLED!")
+    args = _PARSER.parse_args()
+    print(f"DEBUG: args[{type(args)}]={args}")
+    args.command(args)
+    # DEBUG: print("DEBUG: run_command(): EXIT!")
+
 def shutdown():
     print("DEBUG: Closing database connection ...")
     fba.connection.close()
+
     if LOCK != None:
         print("DEBUG: Releasing lock ...")
         LOCK.close()
         print(f"DEBUG: Deleting lockfile='{lockfile}' ...")
         os.remove(lockfile)
+
     print("DEBUG: Shutdown completed.")
diff --git a/fba/command.py b/fba/command.py
new file mode 100644 (file)
index 0000000..3bad9dd
--- /dev/null
@@ -0,0 +1,787 @@
+# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
+# Copyright (C) 2023 Free Software Foundation
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+import argparse
+import atoma
+import bs4
+import itertools
+import json
+import re
+import reqto
+import sys
+import time
+import validators
+
+from fba import boot
+from fba import config
+from fba import fba
+
+def check_instance(args: argparse.Namespace) -> int:
+    # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
+    status = 0
+    if not validators.domain(args.domain):
+        print(f"WARNING: args.domain='{args.domain}' is not valid")
+        status = 100
+    elif fba.is_blacklisted(args.domain):
+        print(f"WARNING: args.domain='{args.domain}' is blacklisted")
+        status = 101
+    elif fba.is_instance_registered(args.domain):
+        print(f"WARNING: args.domain='{args.domain}' is already registered")
+        staus = 102
+    else:
+        print(f"INFO: args.domain='{args.domain}' is not known")
+
+    # DEBUG: print(f"DEBUG: status={status} - EXIT!")
+    return status
+
+def fetch_bkali(args: argparse.Namespace):
+    # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
+    domains = list()
+    try:
+        fetched = fba.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
+            "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
+        }))
+
+        # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'")
+        if len(fetched) == 0:
+            raise Exception("WARNING: Returned no records")
+        elif not "data" in fetched:
+            raise Exception(f"WARNING: fetched()={len(fetched)} does not contain element 'data'")
+        elif not "nodeinfo" in fetched["data"]:
+            raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain element 'nodeinfo'")
+
+        for entry in fetched["data"]["nodeinfo"]:
+            # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
+            if not "domain" in entry:
+                print(f"WARNING: entry does not contain 'domain' - SKIPPED!")
+                continue
+            elif not validators.domain(entry["domain"]):
+                print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
+                continue
+            elif fba.is_blacklisted(entry["domain"]):
+                # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
+                continue
+            elif fba.is_instance_registered(entry["domain"]):
+                # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
+                continue
+
+            # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
+            domains.append(entry["domain"])
+
+    except BaseException as e:
+        print(f"ERROR: Cannot fetch graphql,exception[{type(e)}]:'{str(e)}'")
+        sys.exit(255)
+
+    # DEBUG: print(f"DEBUG: domains()={len(domains)}")
+    if len(domains) > 0:
+        boot.acquire_lock()
+
+        print(f"INFO: Adding {len(domains)} new instances ...")
+        for domain in domains:
+            print(f"INFO: Fetching instances from domain='{domain}' ...")
+            fba.fetch_instances(domain, None, None, sys.argv[0])
+
+    # DEBUG: print("DEBUG: EXIT!")
+
+def fetch_blocks(args: argparse.Namespace):
+    print(f"DEBUG: args[]={type(args)} - CALLED!")
+    if args.domain != None and args.domain != "":
+        if not validators.domain(args.domain):
+            print(f"WARNING: domain='{args.domain}' is not valid.")
+            return
+        elif fba.is_blacklisted(args.domain):
+            print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
+            return
+        elif not fba.is_instance_registered(args.domain):
+            print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
+            return
+
+    boot.acquire_lock()
+
+    if args.domain != None and args.domain != "":
+        fba.cursor.execute(
+            "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
+        )
+    else:
+        fba.cursor.execute(
+            "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
+        )
+
+    rows = fba.cursor.fetchall()
+    print(f"INFO: Checking {len(rows)} entries ...")
+    for blocker, software, origin, nodeinfo_url in rows:
+        # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
+        blockdict = []
+        blocker = fba.tidyup_domain(blocker)
+        # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
+
+        if blocker == "":
+            print("WARNING: blocker is now empty!")
+            continue
+        elif fba.is_blacklisted(blocker):
+            print(f"WARNING: blocker='{blocker}' is blacklisted now!")
+            continue
+
+        # DEBUG: print(f"DEBUG: blocker='{blocker}'")
+        fba.update_last_blocked(blocker)
+
+        if software == "pleroma":
+            print("INFO: blocker:", blocker)
+            try:
+                # Blocks
+                json = fba.fetch_nodeinfo(blocker, nodeinfo_url)
+                if json is None:
+                    print("WARNING: Could not fetch nodeinfo from blocker:", blocker)
+                    continue
+                elif not "metadata" in json:
+                    print(f"WARNING: json()={len(json)} does not have key 'metadata', blocker='{blocker}'")
+                    continue
+                elif not "federation" in json["metadata"]:
+                    print(f"WARNING: json()={len(json['metadata'])} does not have key 'federation', blocker='{blocker}'")
+                    continue
+
+                # DEBUG: print("DEBUG: Updating nodeinfo:", blocker)
+                fba.update_last_nodeinfo(blocker)
+
+                federation = json["metadata"]["federation"]
+
+                if "enabled" in federation:
+                    # DEBUG: print("DEBUG: Instance has no block list to analyze:", blocker)
+                    continue
+
+                if "mrf_simple" in federation:
+                    for block_level, blocks in (
+                        {**federation["mrf_simple"],
+                        **{"quarantined_instances": federation["quarantined_instances"]}}
+                    ).items():
+                        # DEBUG: print("DEBUG: block_level, blocks():", block_level, len(blocks))
+                        block_level = fba.tidyup_domain(block_level)
+                        # DEBUG: print("DEBUG: BEFORE block_level:", block_level)
+
+                        if block_level == "":
+                            print("WARNING: block_level is now empty!")
+                            continue
+
+                        # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
+                        for blocked in blocks:
+                            # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
+                            blocked = fba.tidyup_domain(blocked)
+                            # DEBUG: print("DEBUG: AFTER blocked:", blocked)
+
+                            if blocked == "":
+                                print("WARNING: blocked is empty after fba.tidyup_domain():", blocker, block_level)
+                                continue
+                            elif fba.is_blacklisted(blocked):
+                                # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
+                                continue
+                            elif blocked.count("*") > 1:
+                                # -ACK!-oma also started obscuring domains without hash
+                                fba.cursor.execute(
+                                    "SELECT domain, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
+                                )
+                                searchres = fba.cursor.fetchone()
+                                # DEBUG: print("DEBUG: searchres[]:", type(searchres))
+
+                                if searchres == None:
+                                    print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
+                                    continue
+
+                                blocked = searchres[0]
+                                nodeinfo_url = searchres[1]
+                                # DEBUG: print("DEBUG: Looked up domain:", blocked)
+                            elif not validators.domain(blocked):
+                                print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
+                                continue
+
+                            # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
+                            if not validators.domain(blocked):
+                                print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
+                                continue
+                            elif not fba.is_instance_registered(blocked):
+                                # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
+                                fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url)
+
+                            if not fba.is_instance_blocked(blocker, blocked, block_level):
+                                # DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level)
+                                fba.block_instance(blocker, blocked, "unknown", block_level)
+
+                                if block_level == "reject":
+                                    # DEBUG: print("DEBUG: Adding to blockdict:", blocked)
+                                    blockdict.append(
+                                        {
+                                            "blocked": blocked,
+                                            "reason" : None
+                                        })
+                            else:
+                                # DEBUG: print(f"DEBUG: Updating block last seen for blocker='{blocker}',blocked='{blocked}' ...")
+                                fba.update_last_seen(blocker, blocked, block_level)
+
+                # DEBUG: print("DEBUG: Committing changes ...")
+                fba.connection.commit()
+
+                # Reasons
+                if "mrf_simple_info" in federation:
+                    # DEBUG: print("DEBUG: Found mrf_simple_info:", blocker)
+                    for block_level, info in (
+                        {**federation["mrf_simple_info"],
+                        **(federation["quarantined_instances_info"]
+                        if "quarantined_instances_info" in federation
+                        else {})}
+                    ).items():
+                        # DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items()))
+                        block_level = fba.tidyup_domain(block_level)
+                        # DEBUG: print("DEBUG: BEFORE block_level:", block_level)
+
+                        if block_level == "":
+                            print("WARNING: block_level is now empty!")
+                            continue
+
+                        # DEBUG: print(f"DEBUG: Checking {len(info.items())} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
+                        for blocked, reason in info.items():
+                            # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
+                            blocked = fba.tidyup_domain(blocked)
+                            # DEBUG: print("DEBUG: AFTER blocked:", blocked)
+
+                            if blocked == "":
+                                print("WARNING: blocked is empty after fba.tidyup_domain():", blocker, block_level)
+                                continue
+                            elif fba.is_blacklisted(blocked):
+                                # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
+                                continue
+                            elif blocked.count("*") > 1:
+                                # same domain guess as above, but for reasons field
+                                fba.cursor.execute(
+                                    "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
+                                )
+                                searchres = fba.cursor.fetchone()
+
+                                if searchres == None:
+                                    print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
+                                    continue
+
+                                blocked = searchres[0]
+                                origin = searchres[1]
+                                nodeinfo_url = searchres[2]
+                            elif not validators.domain(blocked):
+                                print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
+                                continue
+
+                            # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
+                            if not validators.domain(blocked):
+                                print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
+                                continue
+                            elif not fba.is_instance_registered(blocked):
+                                # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
+                                fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url)
+
+                            # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"])
+                            fba.update_block_reason(reason["reason"], blocker, blocked, block_level)
+
+                            for entry in blockdict:
+                                if entry["blocked"] == blocked:
+                                    # DEBUG: print("DEBUG: Updating entry reason:", blocked)
+                                    entry["reason"] = reason["reason"]
+
+                fba.connection.commit()
+            except Exception as e:
+                print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
+        elif software == "mastodon":
+            print("INFO: blocker:", blocker)
+            try:
+                # json endpoint for newer mastodongs
+                try:
+                    json = {
+                        "reject"        : [],
+                        "media_removal" : [],
+                        "followers_only": [],
+                        "report_removal": []
+                    }
+
+                    # handling CSRF, I've saw at least one server requiring it to access the endpoint
+                    # DEBUG: print("DEBUG: Fetching meta:", blocker)
+                    meta = bs4.BeautifulSoup(
+                        fba.get_response(blocker, "/", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
+                        "html.parser",
+                    )
+                    try:
+                        csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
+                        # DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf)
+                        reqheaders = {**fba.api_headers, **{"X-CSRF-Token": csrf}}
+                    except BaseException as e:
+                        # DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker, e)
+                        reqheaders = fba.api_headers
+
+                    # DEBUG: print("DEBUG: Querying API domain_blocks:", blocker)
+                    blocks = fba.get_response(blocker, "/api/v1/instance/domain_blocks", reqheaders, (config.get("connection_timeout"), config.get("read_timeout"))).json()
+
+                    print(f"INFO: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}' ...")
+                    for block in blocks:
+                        entry = {
+                            'domain': block['domain'],
+                            'hash'  : block['digest'],
+                            'reason': block['comment']
+                        }
+
+                        # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
+                        if block['severity'] == 'suspend':
+                            # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
+                            json['reject'].append(entry)
+                        elif block['severity'] == 'silence':
+                            # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
+                            json['followers_only'].append(entry)
+                        elif block['severity'] == 'reject_media':
+                            # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
+                            json['media_removal'].append(entry)
+                        elif block['severity'] == 'reject_reports':
+                            # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
+                            json['report_removal'].append(entry)
+                        else:
+                            print("WARNING: Unknown severity:", block['severity'], block['domain'])
+                except BaseException as e:
+                    # DEBUG: print(f"DEBUG: Failed, trying mastodon-specific fetches: blocker='{blocker}',exception[{type(e)}]={str(e)}")
+                    json = fba.get_mastodon_blocks(blocker)
+
+                print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...")
+                for block_level, blocks in json.items():
+                    # DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
+                    block_level = fba.tidyup_domain(block_level)
+                    # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
+                    if block_level == "":
+                        print("WARNING: block_level is empty, blocker:", blocker)
+                        continue
+
+                    # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
+                    for block in blocks:
+                        blocked, blocked_hash, reason = block.values()
+                        # DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason)
+                        blocked = fba.tidyup_domain(blocked)
+                        # DEBUG: print("DEBUG: AFTER-blocked:", blocked)
+
+                        if blocked == "":
+                            print("WARNING: blocked is empty:", blocker)
+                            continue
+                        elif fba.is_blacklisted(blocked):
+                            # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
+                            continue
+                        elif blocked.count("*") > 0:
+                            # Doing the hash search for instance names as well to tidy up DB
+                            fba.cursor.execute(
+                                "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash]
+                            )
+                            searchres = fba.cursor.fetchone()
+
+                            if searchres == None:
+                                print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
+                                continue
+
+                            # DEBUG: print("DEBUG: Updating domain: ", searchres[0])
+                            blocked = searchres[0]
+                            origin = searchres[1]
+                            nodeinfo_url = searchres[2]
+
+                            # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
+                            if not validators.domain(blocked):
+                                print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
+                                continue
+                            elif not fba.is_instance_registered(blocked):
+                                # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
+                                fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url)
+                        elif not validators.domain(blocked):
+                            print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
+                            continue
+
+                        # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
+                        if not validators.domain(blocked):
+                            print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
+                            continue
+                        elif not fba.is_instance_registered(blocked):
+                            # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
+                            fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url)
+
+                        blocking = blocked if blocked.count("*") <= 1 else blocked_hash
+                        # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'")
+
+                        if not fba.is_instance_blocked(blocker, blocked, block_level):
+                            # DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level)
+                            fba.block_instance(blocker, blocking, reason, block_level)
+
+                            if block_level == "reject":
+                                blockdict.append({
+                                    "blocked": blocked,
+                                    "reason" : reason
+                                })
+                        else:
+                            # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocking='{blocking}' ...")
+                            fba.update_last_seen(blocker, blocking, block_level)
+                            fba.update_block_reason(reason, blocker, blocking, block_level)
+
+                # DEBUG: print("DEBUG: Committing changes ...")
+                fba.connection.commit()
+            except Exception as e:
+                print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
+        elif software == "friendica" or software == "misskey" or software == "bookwyrm" or software == "takahe":
+            print("INFO: blocker:", blocker)
+            try:
+                if software == "friendica":
+                    json = fba.get_friendica_blocks(blocker)
+                elif software == "misskey":
+                    json = fba.get_misskey_blocks(blocker)
+                elif software == "bookwyrm":
+                    print("WARNING: bookwyrm is not fully supported for fetching blacklist!", blocker)
+                    #json = fba.get_bookwyrm_blocks(blocker)
+                    continue
+                elif software == "takahe":
+                    print("WARNING: takahe is not fully supported for fetching blacklist!", blocker)
+                    #json = fba.get_takahe_blocks(blocker)
+                    continue
+
+                print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...")
+                for block_level, blocks in json.items():
+                    # DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
+                    block_level = fba.tidyup_domain(block_level)
+                    # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
+                    if block_level == "":
+                        print("WARNING: block_level is empty, blocker:", blocker)
+                        continue
+
+                    # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
+                    for block in blocks:
+                        blocked, reason = block.values()
+                        # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
+                        blocked = fba.tidyup_domain(blocked)
+                        # DEBUG: print("DEBUG: AFTER blocked:", blocked)
+
+                        if blocked == "":
+                            print("WARNING: blocked is empty:", blocker)
+                            continue
+                        elif fba.is_blacklisted(blocked):
+                            # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
+                            continue
+                        elif blocked.count("*") > 0:
+                            # Some friendica servers also obscure domains without hash
+                            fba.cursor.execute(
+                                "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
+                            )
+
+                            searchres = fba.cursor.fetchone()
+
+                            if searchres == None:
+                                print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
+                                continue
+
+                            blocked = searchres[0]
+                            origin = searchres[1]
+                            nodeinfo_url = searchres[2]
+                        elif blocked.count("?") > 0:
+                            # Some obscure them with question marks, not sure if that's dependent on version or not
+                            fba.cursor.execute(
+                                "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
+                            )
+
+                            searchres = fba.cursor.fetchone()
+
+                            if searchres == None:
+                                print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
+                                continue
+
+                            blocked = searchres[0]
+                            origin = searchres[1]
+                            nodeinfo_url = searchres[2]
+                        elif not validators.domain(blocked):
+                            print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
+                            continue
+
+                        # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
+                        if not validators.domain(blocked):
+                            print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
+                            continue
+                        elif not fba.is_instance_registered(blocked):
+                            # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
+                            fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url)
+
+                        if not fba.is_instance_blocked(blocker, blocked, block_level):
+                            fba.block_instance(blocker, blocked, reason, block_level)
+
+                            if block_level == "reject":
+                                blockdict.append({
+                                    "blocked": blocked,
+                                    "reason" : reason
+                                })
+                        else:
+                            # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
+                            fba.update_last_seen(blocker, blocked, block_level)
+                            fba.update_block_reason(reason, blocker, blocked, block_level)
+
+                # DEBUG: print("DEBUG: Committing changes ...")
+                fba.connection.commit()
+            except Exception as e:
+                print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
+        elif software == "gotosocial":
+            print("INFO: blocker:", blocker)
+            try:
+                # Blocks
+                federation = fba.get_response(blocker, "{fba.get_peers_url}?filter=suspended", fba.api_headers, (config.get("connection_timeout"), config.get("read_timeout"))).json()
+
+                if (federation == None):
+                    print("WARNING: No valid response:", blocker);
+                elif "error" in federation:
+                    print("WARNING: API returned error:", federation["error"])
+                else:
+                    print(f"INFO: Checking {len(federation)} entries from blocker='{blocker}',software='{software}' ...")
+                    for peer in federation:
+                        blocked = peer["domain"].lower()
+                        # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
+                        blocked = fba.tidyup_domain(blocked)
+                        # DEBUG: print("DEBUG: AFTER blocked:", blocked)
+
+                        if blocked == "":
+                            print("WARNING: blocked is empty:", blocker)
+                            continue
+                        elif fba.is_blacklisted(blocked):
+                            # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
+                            continue
+                        elif blocked.count("*") > 0:
+                            # GTS does not have hashes for obscured domains, so we have to guess it
+                            fba.cursor.execute(
+                                "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
+                            )
+                            searchres = fba.cursor.fetchone()
+
+                            if searchres == None:
+                                print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
+                                continue
+
+                            blocked = searchres[0]
+                            origin = searchres[1]
+                            nodeinfo_url = searchres[2]
+                        elif not validators.domain(blocked):
+                            print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
+                            continue
+
+                        # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
+                        if not validators.domain(blocked):
+                            print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
+                            continue
+                        elif not fba.is_instance_registered(blocked):
+                            # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
+                            fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url)
+
+                        if not fba.is_instance_blocked(blocker, blocked, "reject"):
+                            # DEBUG: print(f"DEBUG: blocker='{blocker}' is blocking '{blocked}' for unknown reason at this point")
+                            fba.block_instance(blocker, blocked, "unknown", "reject")
+
+                            blockdict.append({
+                                "blocked": blocked,
+                                "reason" : None
+                            })
+                        else:
+                            # DEBUG: print(f"DEBUG: Updating block last seen for blocker='{blocker}',blocked='{blocked}' ...")
+                            fba.update_last_seen(blocker, blocked, "reject")
+
+                        if "public_comment" in peer:
+                            # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, peer["public_comment"])
+                            fba.update_block_reason(peer["public_comment"], blocker, blocked, "reject")
+
+                            for entry in blockdict:
+                                if entry["blocked"] == blocked:
+                                    # DEBUG: print(f"DEBUG: Setting block reason for blocked='{blocked}':'{peer['public_comment']}'")
+                                    entry["reason"] = peer["public_comment"]
+
+                    # DEBUG: print("DEBUG: Committing changes ...")
+                    fba.connection.commit()
+            except Exception as e:
+                print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
+        else:
+            print("WARNING: Unknown software:", blocker, software)
+
+        if config.get("bot_enabled") and len(blockdict) > 0:
+            send_bot_post(blocker, blockdict)
+
+        blockdict = []
+
+    # DEBUG: print("DEBUG: EXIT!")
+
+def fetch_cs(args: argparse.Namespace):
+    # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
+    domains = {
+        "silenced": list(),
+        "blocked": list(),
+    }
+
+    try:
+        doc = bs4.BeautifulSoup(
+            reqto.get("https://meta.chaos.social/federation", headers=fba.headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))).text,
+            "html.parser",
+        )
+        # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
+        silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table")
+
+        # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
+        domains["silenced"] = domains["silenced"] + find_domains(silenced)
+        blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table")
+
+        # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
+        domains["blocked"] = domains["blocked"] + find_domains(blocked)
+
+    except BaseException as e:
+        print(f"ERROR: Cannot fetch from meta.chaos.social,exception[{type(e)}]:'{str(e)}'")
+        sys.exit(255)
+
+    # DEBUG: print(f"DEBUG: domains()={len(domains)}")
+    if len(domains) > 0:
+        boot.acquire_lock()
+
+        print(f"INFO: Adding {len(domains)} new instances ...")
+        for block_level in domains:
+            # DEBUG: print(f"DEBUG: block_level='{block_level}'")
+
+            for row in domains[block_level]:
+                # DEBUG: print(f"DEBUG: row='{row}'")
+                if not fba.is_instance_registered(row["domain"]):
+                    print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
+                    fba.fetch_instances(row["domain"], None, None, sys.argv[0])
+
+                if not fba.is_instance_blocked('chaos.social', row["domain"], block_level):
+                    # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
+                    fba.block_instance('chaos.social', row["domain"], row["reason"], block_level)
+
+        # DEBUG: print("DEBUG: Committing changes ...")
+        fba.connection.commit()
+
+    # DEBUG: print("DEBUG: EXIT!")
+
+def fetch_fba_rss(args: argparse.Namespace):
+    # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
+    domains = list()
+
+    try:
+        print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
+        response = reqto.get(args.feed, headers=fba.headers, timeout=(config.get("connection_timeout"), config.get("read_timeout")))
+
+        # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
+        if response.ok and response.status_code < 300 and len(response.text) > 0:
+            # DEBUG: print(f"DEBUG: Parsing RSS feed ...")
+            rss = atoma.parse_rss_bytes(response.content)
+
+            # DEBUG: print(f"DEBUG: rss[]={type(rss)}")
+            for item in rss.items:
+                # DEBUG: print(f"DEBUG: item={item}")
+                domain = item.link.split("=")[1]
+
+                if fba.is_blacklisted(domain):
+                    # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
+                    continue
+                elif domain in domains:
+                    # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
+                    continue
+                elif fba.is_instance_registered(domain):
+                    # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
+                    continue
+
+                # DEBUG: print(f"DEBUG: domain='{domain}'")
+                domains.append(domain)
+
+    except BaseException as e:
+        print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'")
+        sys.exit(255)
+
+    # DEBUG: print(f"DEBUG: domains()={len(domains)}")
+    if len(domains) > 0:
+        boot.acquire_lock()
+
+        print(f"INFO: Adding {len(domains)} new instances ...")
+        for domain in domains:
+            print(f"INFO: Fetching instances from domain='{domain}' ...")
+            fba.fetch_instances(domain, None, None, sys.argv[0])
+
+    # DEBUG: print("DEBUG: EXIT!")
+
+def fetch_fbabot_atom(args: argparse.Namespace):
+    # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
+    feed = "https://ryona.agency/users/fba/feed.atom"
+
+    domains = list()
+    try:
+        print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
+        response = reqto.get(feed, headers=fba.headers, timeout=(config.get("connection_timeout"), config.get("read_timeout")))
+
+        # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
+        if response.ok and response.status_code < 300 and len(response.text) > 0:
+            # DEBUG: print(f"DEBUG: Parsing ATOM feed ...")
+            atom = atoma.parse_atom_bytes(response.content)
+
+            # DEBUG: print(f"DEBUG: atom[]={type(atom)}")
+            for entry in atom.entries:
+                # DEBUG: print(f"DEBUG: entry[]={type(entry)}")
+                doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
+                # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
+                for element in doc.findAll("a"):
+                    # DEBUG: print(f"DEBUG: element[{type(element)}]={element}")
+                    domain = fba.tidyup_domain(element["href"])
+
+                    # DEBUG: print(f"DEBUG: domain='{domain}'")
+                    if fba.is_blacklisted(domain):
+                        # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
+                        continue
+                    elif domain in domains:
+                        # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
+                        continue
+                    elif fba.is_instance_registered(domain):
+                        # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
+                        continue
+
+                    # DEBUG: print(f"DEBUG: domain='{domain}'")
+                    domains.append(domain)
+
+    except BaseException as e:
+        print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'")
+        sys.exit(255)
+
+    # DEBUG: print(f"DEBUG: domains()={len(domains)}")
+    if len(domains) > 0:
+        boot.acquire_lock()
+
+        print(f"INFO: Adding {len(domains)} new instances ...")
+        for domain in domains:
+            print(f"INFO: Fetching instances from domain='{domain}' ...")
+            fba.fetch_instances(domain, None, None, sys.argv[0])
+
+    # DEBUG: print("DEBUG: EXIT!")
+
+def fetch_instances(args: argparse.Namespace):
+    # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!")
+    boot.acquire_lock()
+
+    # Initial fetch
+    fba.fetch_instances(args.domain, None, None, sys.argv[0])
+
+    # Loop through some instances
+    fba.cursor.execute(
+        "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
+    )
+
+    rows = fba.cursor.fetchall()
+    print(f"INFO: Checking {len(rows)} entries ...")
+    for row in rows:
+        # DEBUG: print("DEBUG: domain:", row[0])
+        if fba.is_blacklisted(row[0]):
+            print("WARNING: domain is blacklisted:", row[0])
+            continue
+
+        print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
+        fba.fetch_instances(row[0], row[1], row[2], sys.argv[0], row[3])
+
+    # DEBUG: print("DEBUG: EXIT!")
index a7b1b38c88659c12e23f9b73de83d4ab2716ad38..bac2d5d80ed5e8ab5d8e92c820d7848d752fa238 100644 (file)
@@ -1074,6 +1074,10 @@ def block_instance(blocker: str, blocked: str, reason: str, block_level: str):
     elif is_blacklisted(blocked):
         raise Exception(f"blocked='{blocked}' is blacklisted but function invoked")
 
+    if reason != None:
+        # Maybe needs cleaning
+        reason = tidyup_reason(reason)
+
     print(f"INFO: New block: blocker='{blocker}',blocked='{blocked}', reason='{reason}', block_level='{block_level}'")
     try:
         cursor.execute(
@@ -1531,3 +1535,55 @@ def has_element(elements: list, key: str, value: any) -> bool:
 
     # DEBUG: print(f"DEBUG: has={has} - EXIT!")
     return has
+
+def find_domains(tag: bs4.element.Tag) -> list:
+    # DEBUG: print(f"DEBUG: tag[]={type(tag)} - CALLED!")
+    if not isinstance(tag, bs4.element.Tag):
+        raise ValueError(f"Parameter tag[]={type(tag)} is not type of bs4.element.Tag")
+    elif not isinstance(tag, bs4.element.Tag):
+        raise KeyError("Cannot find table with instances!")
+    elif len(tag.select("tr")) == 0:
+        raise KeyError("No table rows found in table!")
+
+    domains = list()
+    for element in tag.select("tr"):
+        # DEBUG: print(f"DEBUG: element[]={type(element)}")
+        if not element.find("td"):
+            # DEBUG: print("DEBUG: Skipping element, no <td> found")
+            continue
+
+        domain = tidyup_domain(element.find("td").text)
+        reason = tidyup_reason(element.findAll("td")[1].text)
+
+        # DEBUG: print(f"DEBUG: domain='{domain}',reason='{reason}'")
+
+        if is_blacklisted(domain):
+            print(f"WARNING: domain='{domain}' is blacklisted - skipped!")
+            continue
+        elif domain == "gab.com/.ai, develop.gab.com":
+            print(f"DEBUG: Multiple domains detected in one row")
+            domains.append({
+                "domain": "gab.com",
+                "reason": reason,
+            })
+            domains.append({
+                "domain": "gab.ai",
+                "reason": reason,
+            })
+            domains.append({
+                "domain": "develop.gab.com",
+                "reason": reason,
+            })
+            continue
+        elif not validators.domain(domain):
+            print(f"WARNING: domain='{domain}' is not a valid domain - skipped!")
+            continue
+
+        # DEBUG: print(f"DEBUG: Adding domain='{domain}' ...")
+        domains.append({
+            "domain": domain,
+            "reason": reason,
+        })
+
+    # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!")
+    return domains
diff --git a/fetch_bkali.py b/fetch_bkali.py
deleted file mode 100755 (executable)
index 1a1aafc..0000000
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/python3
-# -*- coding: utf-8 -*-
-
-# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
-# Copyright (C) 2023 Free Software Foundation
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published
-# by the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
-import json
-import sys
-import validators
-from fba import *
-
-domains = list()
-try:
-    fetched = fba.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
-        "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
-    }))
-
-    # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'")
-    if len(fetched) == 0:
-        raise Exception("WARNING: Returned no records")
-    elif not "data" in fetched:
-        raise Exception(f"WARNING: fetched()={len(fetched)} does not contain element 'data'")
-    elif not "nodeinfo" in fetched["data"]:
-        raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain element 'nodeinfo'")
-
-    for entry in fetched["data"]["nodeinfo"]:
-        # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
-        if not "domain" in entry:
-            print(f"WARNING: entry does not contain 'domain' - SKIPPED!")
-            continue
-        elif not validators.domain(entry["domain"]):
-            print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
-            continue
-        elif fba.is_blacklisted(entry["domain"]):
-            # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
-            continue
-        elif fba.is_instance_registered(entry["domain"]):
-            # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
-            continue
-
-        # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
-        domains.append(entry["domain"])
-
-except BaseException as e:
-    print(f"ERROR: Cannot fetch graphql,exception[{type(e)}]:'{str(e)}'")
-    sys.exit(255)
-
-# DEBUG: print(f"DEBUG: domains()={len(domains)}")
-if len(domains) > 0:
-    boot.acquire_lock()
-
-    print(f"INFO: Adding {len(domains)} new instances ...")
-    for domain in domains:
-        print(f"INFO: Fetching instances from domain='{domain}' ...")
-        fba.fetch_instances(domain, None, None, sys.argv[0])
-
-boot.shutdown()
diff --git a/fetch_blocks.py b/fetch_blocks.py
deleted file mode 100755 (executable)
index c12c26f..0000000
+++ /dev/null
@@ -1,527 +0,0 @@
-#!/usr/bin/python3
-# -*- coding: utf-8 -*-
-
-# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
-# Copyright (C) 2023 Free Software Foundation
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published
-# by the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
-import bs4
-import itertools
-import re
-import reqto
-import sys
-import time
-import validators
-from fba import *
-
-boot.acquire_lock()
-
-fba.cursor.execute(
-    "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")]
-)
-
-rows = fba.cursor.fetchall()
-print(f"INFO: Checking {len(rows)} entries ...")
-for blocker, software, origin, nodeinfo_url in rows:
-    # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
-    blockdict = []
-    blocker = fba.tidyup_domain(blocker)
-    # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
-
-    if blocker == "":
-        print("WARNING: blocker is now empty!")
-        continue
-    elif fba.is_blacklisted(blocker):
-        print(f"WARNING: blocker='{blocker}' is blacklisted now!")
-        continue
-
-    # DEBUG: print(f"DEBUG: blocker='{blocker}'")
-    fba.update_last_blocked(blocker)
-
-    if software == "pleroma":
-        print("INFO: blocker:", blocker)
-        try:
-            # Blocks
-            json = fba.fetch_nodeinfo(blocker, nodeinfo_url)
-            if json is None:
-                print("WARNING: Could not fetch nodeinfo from blocker:", blocker)
-                continue
-            elif not "metadata" in json:
-                print(f"WARNING: json()={len(json)} does not have key 'metadata', blocker='{blocker}'")
-                continue
-            elif not "federation" in json["metadata"]:
-                print(f"WARNING: json()={len(json['metadata'])} does not have key 'federation', blocker='{blocker}'")
-                continue
-
-            # DEBUG: print("DEBUG: Updating nodeinfo:", blocker)
-            fba.update_last_nodeinfo(blocker)
-
-            federation = json["metadata"]["federation"]
-
-            if "enabled" in federation:
-                # DEBUG: print("DEBUG: Instance has no block list to analyze:", blocker)
-                continue
-
-            if "mrf_simple" in federation:
-                for block_level, blocks in (
-                    {**federation["mrf_simple"],
-                    **{"quarantined_instances": federation["quarantined_instances"]}}
-                ).items():
-                    # DEBUG: print("DEBUG: block_level, blocks():", block_level, len(blocks))
-                    block_level = fba.tidyup_domain(block_level)
-                    # DEBUG: print("DEBUG: BEFORE block_level:", block_level)
-
-                    if block_level == "":
-                        print("WARNING: block_level is now empty!")
-                        continue
-
-                    # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
-                    for blocked in blocks:
-                        # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
-                        blocked = fba.tidyup_domain(blocked)
-                        # DEBUG: print("DEBUG: AFTER blocked:", blocked)
-
-                        if blocked == "":
-                            print("WARNING: blocked is empty after fba.tidyup_domain():", blocker, block_level)
-                            continue
-                        elif fba.is_blacklisted(blocked):
-                            # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
-                            continue
-                        elif blocked.count("*") > 1:
-                            # -ACK!-oma also started obscuring domains without hash
-                            fba.cursor.execute(
-                                "SELECT domain, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
-                            )
-                            searchres = fba.cursor.fetchone()
-                            # DEBUG: print("DEBUG: searchres[]:", type(searchres))
-
-                            if searchres == None:
-                                print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
-                                continue
-
-                            blocked = searchres[0]
-                            nodeinfo_url = searchres[1]
-                            # DEBUG: print("DEBUG: Looked up domain:", blocked)
-                        elif not validators.domain(blocked):
-                            print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
-                            continue
-
-                        # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
-                        if not validators.domain(blocked):
-                            print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
-                            continue
-                        elif not fba.is_instance_registered(blocked):
-                            # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
-                            fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url)
-
-                        if not fba.is_instance_blocked(blocker, blocked, block_level):
-                            # DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level)
-                            fba.block_instance(blocker, blocked, "unknown", block_level)
-
-                            if block_level == "reject":
-                                # DEBUG: print("DEBUG: Adding to blockdict:", blocked)
-                                blockdict.append(
-                                    {
-                                        "blocked": blocked,
-                                        "reason" : None
-                                    })
-                        else:
-                            # DEBUG: print(f"DEBUG: Updating block last seen for blocker='{blocker}',blocked='{blocked}' ...")
-                            fba.update_last_seen(blocker, blocked, block_level)
-
-            # DEBUG: print("DEBUG: Committing changes ...")
-            fba.connection.commit()
-
-            # Reasons
-            if "mrf_simple_info" in federation:
-                # DEBUG: print("DEBUG: Found mrf_simple_info:", blocker)
-                for block_level, info in (
-                    {**federation["mrf_simple_info"],
-                    **(federation["quarantined_instances_info"]
-                    if "quarantined_instances_info" in federation
-                    else {})}
-                ).items():
-                    # DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items()))
-                    block_level = fba.tidyup_domain(block_level)
-                    # DEBUG: print("DEBUG: BEFORE block_level:", block_level)
-
-                    if block_level == "":
-                        print("WARNING: block_level is now empty!")
-                        continue
-
-                    # DEBUG: print(f"DEBUG: Checking {len(info.items())} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
-                    for blocked, reason in info.items():
-                        # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
-                        blocked = fba.tidyup_domain(blocked)
-                        # DEBUG: print("DEBUG: AFTER blocked:", blocked)
-
-                        if blocked == "":
-                            print("WARNING: blocked is empty after fba.tidyup_domain():", blocker, block_level)
-                            continue
-                        elif fba.is_blacklisted(blocked):
-                            # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
-                            continue
-                        elif blocked.count("*") > 1:
-                            # same domain guess as above, but for reasons field
-                            fba.cursor.execute(
-                                "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
-                            )
-                            searchres = fba.cursor.fetchone()
-
-                            if searchres == None:
-                                print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
-                                continue
-
-                            blocked = searchres[0]
-                            origin = searchres[1]
-                            nodeinfo_url = searchres[2]
-                        elif not validators.domain(blocked):
-                            print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
-                            continue
-
-                        # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
-                        if not validators.domain(blocked):
-                            print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
-                            continue
-                        elif not fba.is_instance_registered(blocked):
-                            # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
-                            fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url)
-
-                        # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"])
-                        fba.update_block_reason(reason["reason"], blocker, blocked, block_level)
-
-                        for entry in blockdict:
-                            if entry["blocked"] == blocked:
-                                # DEBUG: print("DEBUG: Updating entry reason:", blocked)
-                                entry["reason"] = reason["reason"]
-
-            fba.connection.commit()
-        except Exception as e:
-            print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
-    elif software == "mastodon":
-        print("INFO: blocker:", blocker)
-        try:
-            # json endpoint for newer mastodongs
-            try:
-                json = {
-                    "reject"        : [],
-                    "media_removal" : [],
-                    "followers_only": [],
-                    "report_removal": []
-                }
-
-                # handling CSRF, I've saw at least one server requiring it to access the endpoint
-                # DEBUG: print("DEBUG: Fetching meta:", blocker)
-                meta = bs4.BeautifulSoup(
-                    fba.get_response(blocker, "/", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
-                    "html.parser",
-                )
-                try:
-                    csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
-                    # DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf)
-                    reqheaders = {**fba.api_headers, **{"X-CSRF-Token": csrf}}
-                except BaseException as e:
-                    # DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker, e)
-                    reqheaders = fba.api_headers
-
-                # DEBUG: print("DEBUG: Querying API domain_blocks:", blocker)
-                blocks = fba.get_response(blocker, "/api/v1/instance/domain_blocks", reqheaders, (config.get("connection_timeout"), config.get("read_timeout"))).json()
-
-                print(f"INFO: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}' ...")
-                for block in blocks:
-                    entry = {
-                        'domain': block['domain'],
-                        'hash'  : block['digest'],
-                        'reason': block['comment']
-                    }
-
-                    # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
-                    if block['severity'] == 'suspend':
-                        # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
-                        json['reject'].append(entry)
-                    elif block['severity'] == 'silence':
-                        # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
-                        json['followers_only'].append(entry)
-                    elif block['severity'] == 'reject_media':
-                        # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
-                        json['media_removal'].append(entry)
-                    elif block['severity'] == 'reject_reports':
-                        # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
-                        json['report_removal'].append(entry)
-                    else:
-                        print("WARNING: Unknown severity:", block['severity'], block['domain'])
-            except BaseException as e:
-                # DEBUG: print(f"DEBUG: Failed, trying mastodon-specific fetches: blocker='{blocker}',exception[{type(e)}]={str(e)}")
-                json = fba.get_mastodon_blocks(blocker)
-
-            print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...")
-            for block_level, blocks in json.items():
-                # DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
-                block_level = fba.tidyup_domain(block_level)
-                # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
-                if block_level == "":
-                    print("WARNING: block_level is empty, blocker:", blocker)
-                    continue
-
-                # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
-                for block in blocks:
-                    blocked, blocked_hash, reason = block.values()
-                    # DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason)
-                    blocked = fba.tidyup_domain(blocked)
-                    # DEBUG: print("DEBUG: AFTER-blocked:", blocked)
-
-                    if blocked == "":
-                        print("WARNING: blocked is empty:", blocker)
-                        continue
-                    elif fba.is_blacklisted(blocked):
-                        # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
-                        continue
-                    elif blocked.count("*") > 0:
-                        # Doing the hash search for instance names as well to tidy up DB
-                        fba.cursor.execute(
-                            "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash]
-                        )
-                        searchres = fba.cursor.fetchone()
-
-                        if searchres == None:
-                            print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
-                            continue
-
-                        # DEBUG: print("DEBUG: Updating domain: ", searchres[0])
-                        blocked = searchres[0]
-                        origin = searchres[1]
-                        nodeinfo_url = searchres[2]
-
-                        # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
-                        if not validators.domain(blocked):
-                            print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
-                            continue
-                        elif not fba.is_instance_registered(blocked):
-                            # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
-                            fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url)
-                    elif not validators.domain(blocked):
-                        print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
-                        continue
-
-                    # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
-                    if not validators.domain(blocked):
-                        print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
-                        continue
-                    elif not fba.is_instance_registered(blocked):
-                        # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
-                        fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url)
-
-                    blocking = blocked if blocked.count("*") <= 1 else blocked_hash
-                    # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'")
-
-                    if not fba.is_instance_blocked(blocker, blocked, block_level):
-                        # DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level)
-                        fba.block_instance(blocker, blocking, reason, block_level)
-
-                        if block_level == "reject":
-                            blockdict.append({
-                                "blocked": blocked,
-                                "reason" : reason
-                            })
-                    else:
-                        # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocking='{blocking}' ...")
-                        fba.update_last_seen(blocker, blocking, block_level)
-                        fba.update_block_reason(reason, blocker, blocking, block_level)
-
-            # DEBUG: print("DEBUG: Committing changes ...")
-            fba.connection.commit()
-        except Exception as e:
-            print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
-    elif software == "friendica" or software == "misskey" or software == "bookwyrm" or software == "takahe":
-        print("INFO: blocker:", blocker)
-        try:
-            if software == "friendica":
-                json = fba.get_friendica_blocks(blocker)
-            elif software == "misskey":
-                json = fba.get_misskey_blocks(blocker)
-            elif software == "bookwyrm":
-                print("WARNING: bookwyrm is not fully supported for fetching blacklist!", blocker)
-                #json = fba.get_bookwyrm_blocks(blocker)
-                continue
-            elif software == "takahe":
-                print("WARNING: takahe is not fully supported for fetching blacklist!", blocker)
-                #json = fba.get_takahe_blocks(blocker)
-                continue
-
-            print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...")
-            for block_level, blocks in json.items():
-                # DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
-                block_level = fba.tidyup_domain(block_level)
-                # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
-                if block_level == "":
-                    print("WARNING: block_level is empty, blocker:", blocker)
-                    continue
-
-                # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
-                for block in blocks:
-                    blocked, reason = block.values()
-                    # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
-                    blocked = fba.tidyup_domain(blocked)
-                    # DEBUG: print("DEBUG: AFTER blocked:", blocked)
-
-                    if blocked == "":
-                        print("WARNING: blocked is empty:", blocker)
-                        continue
-                    elif fba.is_blacklisted(blocked):
-                        # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
-                        continue
-                    elif blocked.count("*") > 0:
-                        # Some friendica servers also obscure domains without hash
-                        fba.cursor.execute(
-                            "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
-                        )
-
-                        searchres = fba.cursor.fetchone()
-
-                        if searchres == None:
-                            print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
-                            continue
-
-                        blocked = searchres[0]
-                        origin = searchres[1]
-                        nodeinfo_url = searchres[2]
-                    elif blocked.count("?") > 0:
-                        # Some obscure them with question marks, not sure if that's dependent on version or not
-                        fba.cursor.execute(
-                            "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
-                        )
-
-                        searchres = fba.cursor.fetchone()
-
-                        if searchres == None:
-                            print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
-                            continue
-
-                        blocked = searchres[0]
-                        origin = searchres[1]
-                        nodeinfo_url = searchres[2]
-                    elif not validators.domain(blocked):
-                        print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
-                        continue
-
-                    # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
-                    if not validators.domain(blocked):
-                        print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
-                        continue
-                    elif not fba.is_instance_registered(blocked):
-                        # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
-                        fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url)
-
-                    if not fba.is_instance_blocked(blocker, blocked, block_level):
-                        fba.block_instance(blocker, blocked, reason, block_level)
-
-                        if block_level == "reject":
-                            blockdict.append({
-                                "blocked": blocked,
-                                "reason" : reason
-                            })
-                    else:
-                        # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
-                        fba.update_last_seen(blocker, blocked, block_level)
-                        fba.update_block_reason(reason, blocker, blocked, block_level)
-
-            # DEBUG: print("DEBUG: Committing changes ...")
-            fba.connection.commit()
-        except Exception as e:
-            print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
-    elif software == "gotosocial":
-        print("INFO: blocker:", blocker)
-        try:
-            # Blocks
-            federation = fba.get_response(blocker, "{fba.get_peers_url}?filter=suspended", fba.api_headers, (config.get("connection_timeout"), config.get("read_timeout"))).json()
-
-            if (federation == None):
-                print("WARNING: No valid response:", blocker);
-            elif "error" in federation:
-                print("WARNING: API returned error:", federation["error"])
-            else:
-                print(f"INFO: Checking {len(federation)} entries from blocker='{blocker}',software='{software}' ...")
-                for peer in federation:
-                    blocked = peer["domain"].lower()
-                    # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
-                    blocked = fba.tidyup_domain(blocked)
-                    # DEBUG: print("DEBUG: AFTER blocked:", blocked)
-
-                    if blocked == "":
-                        print("WARNING: blocked is empty:", blocker)
-                        continue
-                    elif fba.is_blacklisted(blocked):
-                        # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
-                        continue
-                    elif blocked.count("*") > 0:
-                        # GTS does not have hashes for obscured domains, so we have to guess it
-                        fba.cursor.execute(
-                            "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
-                        )
-                        searchres = fba.cursor.fetchone()
-
-                        if searchres == None:
-                            print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
-                            continue
-
-                        blocked = searchres[0]
-                        origin = searchres[1]
-                        nodeinfo_url = searchres[2]
-                    elif not validators.domain(blocked):
-                        print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
-                        continue
-
-                    # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
-                    if not validators.domain(blocked):
-                        print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!")
-                        continue
-                    elif not fba.is_instance_registered(blocked):
-                        # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
-                        fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url)
-
-                    if not fba.is_instance_blocked(blocker, blocked, "reject"):
-                        # DEBUG: print(f"DEBUG: blocker='{blocker}' is blocking '{blocked}' for unknown reason at this point")
-                        fba.block_instance(blocker, blocked, "unknown", "reject")
-
-                        blockdict.append({
-                            "blocked": blocked,
-                            "reason" : None
-                        })
-                    else:
-                        # DEBUG: print(f"DEBUG: Updating block last seen for blocker='{blocker}',blocked='{blocked}' ...")
-                        fba.update_last_seen(blocker, blocked, "reject")
-
-                    if "public_comment" in peer:
-                        # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, peer["public_comment"])
-                        fba.update_block_reason(peer["public_comment"], blocker, blocked, "reject")
-
-                        for entry in blockdict:
-                            if entry["blocked"] == blocked:
-                                # DEBUG: print(f"DEBUG: Setting block reason for blocked='{blocked}':'{peer['public_comment']}'")
-                                entry["reason"] = peer["public_comment"]
-
-                # DEBUG: print("DEBUG: Committing changes ...")
-                fba.connection.commit()
-        except Exception as e:
-            print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
-    else:
-        print("WARNING: Unknown software:", blocker, software)
-
-    if config.get("bot_enabled") and len(blockdict) > 0:
-        send_bot_post(blocker, blockdict)
-
-    blockdict = []
-
-boot.shutdown()
diff --git a/fetch_cs.py b/fetch_cs.py
deleted file mode 100755 (executable)
index ec7b6de..0000000
+++ /dev/null
@@ -1,124 +0,0 @@
-#!/usr/bin/python3
-# -*- coding: utf-8 -*-
-
-# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
-# Copyright (C) 2023 Free Software Foundation
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published
-# by the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
-import bs4
-import json
-import reqto
-import sys
-import validators
-from fba import *
-
-def find_domains(tag: bs4.element.Tag) -> list:
-    # DEBUG: print(f"DEBUG: tag[]={type(tag)} - CALLED!")
-    if not isinstance(tag, bs4.element.Tag):
-        raise ValueError(f"Parameter tag[]={type(tag)} is not type of bs4.element.Tag")
-    elif not isinstance(tag, bs4.element.Tag):
-        raise KeyError("Cannot find table with instances!")
-    elif len(tag.select("tr")) == 0:
-        raise KeyError("No table rows found in table!")
-
-    domains = list()
-    for element in tag.select("tr"):
-        # DEBUG: print(f"DEBUG: element[]={type(element)}")
-        if not element.find("td"):
-            # DEBUG: print("DEBUG: Skipping element, no <td> found")
-            continue
-
-        domain = fba.tidyup_domain(element.find("td").text)
-        reason = fba.tidyup_reason(element.findAll("td")[1].text)
-
-        # DEBUG: print(f"DEBUG: domain='{domain}',reason='{reason}'")
-
-        if fba.is_blacklisted(domain):
-            print(f"WARNING: domain='{domain}' is blacklisted - skipped!")
-            continue
-        elif domain == "gab.com/.ai, develop.gab.com":
-            print(f"DEBUG: Multiple domains detected in one row")
-            domains.append({
-                "domain": "gab.com",
-                "reason": reason,
-            })
-            domains.append({
-                "domain": "gab.ai",
-                "reason": reason,
-            })
-            domains.append({
-                "domain": "develop.gab.com",
-                "reason": reason,
-            })
-            continue
-        elif not validators.domain(domain):
-            print(f"WARNING: domain='{domain}' is not a valid domain - skipped!")
-            continue
-
-        # DEBUG: print(f"DEBUG: Adding domain='{domain}' ...")
-        domains.append({
-            "domain": domain,
-            "reason": reason,
-        })
-
-    # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!")
-    return domains
-
-domains = {
-    "silenced": list(),
-    "blocked": list(),
-}
-
-try:
-    doc = bs4.BeautifulSoup(
-        reqto.get("https://meta.chaos.social/federation", headers=fba.headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))).text,
-        "html.parser",
-    )
-    # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}")
-    silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table")
-
-    # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}")
-    domains["silenced"] = domains["silenced"] + find_domains(silenced)
-    blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table")
-
-    # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}")
-    domains["blocked"] = domains["blocked"] + find_domains(blocked)
-
-except BaseException as e:
-    print(f"ERROR: Cannot fetch from meta.chaos.social,exception[{type(e)}]:'{str(e)}'")
-    sys.exit(255)
-
-# DEBUG: print(f"DEBUG: domains()={len(domains)}")
-if len(domains) > 0:
-    boot.acquire_lock()
-
-    print(f"INFO: Adding {len(domains)} new instances ...")
-    for block_level in domains:
-        # DEBUG: print(f"DEBUG: block_level='{block_level}'")
-
-        for row in domains[block_level]:
-            # DEBUG: print(f"DEBUG: row='{row}'")
-            if not fba.is_instance_registered(row["domain"]):
-                print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
-                fba.fetch_instances(row["domain"], None, None, sys.argv[0])
-
-            if not fba.is_instance_blocked('chaos.social', row["domain"], block_level):
-                # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
-                fba.block_instance('chaos.social', row["domain"], row["reason"], block_level)
-
-    # DEBUG: print("DEBUG: Committing changes ...")
-    fba.connection.commit()
-
-boot.shutdown()
diff --git a/fetch_fba_rss.py b/fetch_fba_rss.py
deleted file mode 100755 (executable)
index 614b10e..0000000
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/usr/bin/python3
-# -*- coding: utf-8 -*-
-
-# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
-# Copyright (C) 2023 Free Software Foundation
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published
-# by the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
-import reqto
-import atoma
-import sys
-from fba import *
-
-feed = sys.argv[1]
-
-domains = list()
-try:
-    print(f"INFO: Fetch FBA-specific RSS feed='{feed}' ...")
-    response = reqto.get(feed, headers=fba.headers, timeout=(config.get("connection_timeout"), config.get("read_timeout")))
-
-    # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
-    if response.ok and response.status_code < 300 and len(response.text) > 0:
-        # DEBUG: print(f"DEBUG: Parsing RSS feed ...")
-        rss = atoma.parse_rss_bytes(response.content)
-
-        # DEBUG: print(f"DEBUG: rss[]={type(rss)}")
-        for item in rss.items:
-            # DEBUG: print(f"DEBUG: item={item}")
-            domain = item.link.split("=")[1]
-
-            if fba.is_blacklisted(domain):
-                # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
-                continue
-            elif domain in domains:
-                # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
-                continue
-            elif fba.is_instance_registered(domain):
-                # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
-                continue
-
-            # DEBUG: print(f"DEBUG: domain='{domain}'")
-            domains.append(domain)
-
-except BaseException as e:
-    print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'")
-
-# DEBUG: print(f"DEBUG: domains()={len(domains)}")
-if len(domains) > 0:
-    boot.acquire_lock()
-
-    print(f"INFO: Adding {len(domains)} new instances ...")
-    for domain in domains:
-        print(f"INFO: Fetching instances from domain='{domain}' ...")
-        fba.fetch_instances(domain, None, None, sys.argv[0])
-
-boot.shutdown()
diff --git a/fetch_instances.py b/fetch_instances.py
deleted file mode 100755 (executable)
index 079c1a6..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/python3
-# -*- coding: utf-8 -*-
-
-# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
-# Copyright (C) 2023 Free Software Foundation
-#
-# This program is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Affero General Public License as published
-# by the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Affero General Public License for more details.
-#
-# You should have received a copy of the GNU Affero General Public License
-# along with this program.  If not, see <https://www.gnu.org/licenses/>.
-
-import sqlite3
-import sys
-import json
-import time
-import validators
-from fba import *
-
-boot.acquire_lock()
-
-instance = sys.argv[1]
-
-# Initial fetch
-fba.fetch_instances(instance, None, None, sys.argv[0])
-
-# Loop through some instances
-fba.cursor.execute(
-    "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
-)
-
-rows = fba.cursor.fetchall()
-print(f"INFO: Checking {len(rows)} entries ...")
-for row in rows:
-    # DEBUG: print("DEBUG: domain:", row[0])
-    if fba.is_blacklisted(row[0]):
-        print("WARNING: domain is blacklisted:", row[0])
-        continue
-
-    print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
-    fba.fetch_instances(row[0], row[1], row[2], sys.argv[0], row[3])
-
-boot.shutdown()
index a02a02ac25248bd92c1d17fbdbdca306621beafd..cc8c92e120e05cd6db0cb8be85f2b481ef8b3307 100644 (file)
@@ -1,3 +1,4 @@
+argparse
 atoma
 beautifulsoup4
 fastapi