]> git.mxchange.org Git - fba.git/blobdiff - fetch_blocks.py
Continued:
[fba.git] / fetch_blocks.py
old mode 100644 (file)
new mode 100755 (executable)
index 2a13023..ebf3697
@@ -1,3 +1,22 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
+# Copyright (C) 2023 Free Software Foundation
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
 import reqto
 import time
 import bs4
@@ -5,31 +24,40 @@ import itertools
 import re
 import fba
 
-fba.c.execute(
-    "SELECT domain, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_block"]]
+fba.cursor.execute(
+    "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_block"]]
 )
 
-for blocker, software in fba.c.fetchall():
-    # NOISY-DEBUG: print("DEBUG: BEFORE-blocker,software:", blocker, software)
+rows = fba.cursor.fetchall()
+print(f"INFO: Checking {len(rows)} entries ...")
+for blocker, software, origin, nodeinfo_url in rows:
+    # NOISY-DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
     blockdict = []
     blocker = fba.tidyup(blocker)
-    # NOISY-DEBUG: print("DEBUG: AFTER-blocker,software:", blocker, software)
+    # NOISY-DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
 
     if blocker == "":
         print("WARNING: blocker is now empty!")
         continue
+    elif fba.is_blacklisted(blocker):
+        print(f"WARNING: blocker='{blocker}' is blacklisted now!")
+        continue
 
+    # NOISY-DEBUG: print(f"DEBUG: blocker='{blocker}'")
     fba.update_last_blocked(blocker)
 
     if software == "pleroma":
         print("INFO: blocker:", blocker)
         try:
             # Blocks
-            json = fba.fetch_nodeinfo(blocker)
+            json = fba.fetch_nodeinfo(blocker, nodeinfo_url)
             if json is None:
                 print("WARNING: Could not fetch nodeinfo from blocker:", blocker)
                 continue
 
+            print("DEBUG: Updating nodeinfo:", blocker)
+            fba.update_last_nodeinfo(blocker)
+
             federation = json["metadata"]["federation"]
 
             if "enabled" in federation:
@@ -60,27 +88,23 @@ for blocker, software in fba.c.fetchall():
 
                         if blocked.count("*") > 1:
                             # -ACK!-oma also started obscuring domains without hash
-                            fba.c.execute(
-                                "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
+                            fba.cursor.execute(
+                                "SELECT domain, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
                             )
-                            searchres = fba.c.fetchone()
+                            searchres = fba.cursor.fetchone()
                             # NOISY-DEBUG: print("DEBUG: searchres[]:", type(searchres))
                             if searchres != None:
                                 blocked = searchres[0]
+                                nodeinfo_url = searchres[1]
                                 # NOISY-DEBUG: print("DEBUG: Looked up domain:", blocked)
 
                         # NOISY-DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
-                        fba.c.execute(
-                            "SELECT domain FROM instances WHERE domain = ?", [blocked]
-                        )
+                        if not fba.is_instance_registered(blocked):
+                            # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked, blocker)
+                            fba.add_instance(blocked, blocker, origin, nodeinfo_url)
 
-                        if fba.c.fetchone() == None:
-                            # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked)
-                            fba.add_instance(blocked)
-
-                        timestamp = int(time.time())
-                        fba.c.execute(
-                            "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
+                        fba.cursor.execute(
+                            "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
                             (
                                blocker,
                                blocked,
@@ -88,22 +112,22 @@ for blocker, software in fba.c.fetchall():
                            ),
                         )
 
-                        if fba.c.fetchone() == None:
+                        if fba.cursor.fetchone() == None:
                             # NOISY-DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level)
-                            fba.block_instance(blocker, blocked, "unknown", block_level, timestamp, timestamp)
+                            fba.block_instance(blocker, blocked, "unknown", block_level)
 
                             if block_level == "reject":
                                 # NOISY-DEBUG: print("DEBUG: Adding to blockdict:", blocked)
                                 blockdict.append(
                                     {
                                         "blocked": blocked,
-                                        "reason": None
+                                        "reason" : None
                                     })
                         else:
                             # NOISY-DEBUG: print("DEBUG: Updating last_seen:", blocker, blocked, block_level)
-                            fba.update_last_seen(timestamp, blocker, blocked, block_level)
+                            fba.update_last_seen(blocker, blocked, block_level)
 
-            fba.conn.commit()
+            fba.connection.commit()
 
             # Reasons
             if "mrf_simple_info" in federation:
@@ -132,10 +156,10 @@ for blocker, software in fba.c.fetchall():
                             continue
                         elif blocked.count("*") > 1:
                             # same domain guess as above, but for reasons field
-                            fba.c.execute(
+                            fba.cursor.execute(
                                 "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
                             )
-                            searchres = fba.c.fetchone()
+                            searchres = fba.cursor.fetchone()
 
                             if searchres != None:
                                 blocked = searchres[0]
@@ -148,17 +172,17 @@ for blocker, software in fba.c.fetchall():
                                 # NOISY-DEBUG: print("DEBUG: Updating entry reason:", blocked)
                                 entry["reason"] = reason["reason"]
 
-            fba.conn.commit()
+            fba.connection.commit()
         except Exception as e:
-            print("error:", e, blocker, software)
+            print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
     elif software == "mastodon":
         print("INFO: blocker:", blocker)
         try:
             # json endpoint for newer mastodongs
             try:
                 json = {
-                    "reject": [],
-                    "media_removal": [],
+                    "reject"        : [],
+                    "media_removal" : [],
                     "followers_only": [],
                     "report_removal": []
                 }
@@ -166,25 +190,25 @@ for blocker, software in fba.c.fetchall():
                 # handling CSRF, I've saw at least one server requiring it to access the endpoint
                 # NOISY-DEBUG: print("DEBUG: Fetching meta:", blocker)
                 meta = bs4.BeautifulSoup(
-                    reqto.get(f"https://{blocker}/about", headers=fba.headers, timeout=5).text,
+                    reqto.get(f"https://{blocker}/about", headers=fba.headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).text,
                     "html.parser",
                 )
                 try:
                     csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
                     # NOISY-DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf)
-                    reqheaders = {**fba.headers, **{"x-csrf-token": csrf}}
+                    reqheaders = {**fba.api_headers, **{"X-CSRF-Token": csrf}}
                 except:
                     # NOISY-DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker)
-                    reqheaders = fba.headers
+                    reqheaders = fba.api_headers
 
                 # NOISY-DEBUG: print("DEBUG: Quering API domain_blocks:", blocker)
-                blocks = reqto.get(f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=5).json()
+                blocks = reqto.get(f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json()
 
                 # NOISY-DEBUG: print("DEBUG: blocks():", len(blocks))
                 for block in blocks:
                     entry = {
                         'domain': block['domain'],
-                        'hash': block['digest'],
+                        'hash'  : block['digest'],
                         'reason': block['comment']
                     }
 
@@ -223,27 +247,26 @@ for blocker, software in fba.c.fetchall():
                         continue
                     elif blocked.count("*") < 1:
                         # No obsfucation for this instance
-                        fba.c.execute(
+                        fba.cursor.execute(
                             "SELECT hash FROM instances WHERE domain = ? LIMIT 1", [blocked]
                         )
 
-                        if fba.c.fetchone() == None:
-                            # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked)
-                            fba.add_instance(blocked)
+                        if fba.cursor.fetchone() == None:
+                            # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
+                            fba.add_instance(blocked, blocker, origin)
                     else:
                         # Doing the hash search for instance names as well to tidy up DB
-                        fba.c.execute(
+                        fba.cursor.execute(
                             "SELECT domain FROM instances WHERE hash = ? LIMIT 1", [blocked_hash]
                         )
-                        searchres = fba.c.fetchone()
+                        searchres = fba.cursor.fetchone()
 
                         if searchres != None:
                             # NOISY-DEBUG: print("DEBUG: Updating domain: ", searchres[0])
                             blocked = searchres[0]
 
-                    timestamp = int(time.time())
-                    fba.c.execute(
-                        "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
+                    fba.cursor.execute(
+                        "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
                         (
                             blocker,
                             blocked if blocked.count("*") <= 1 else blocked_hash,
@@ -251,32 +274,38 @@ for blocker, software in fba.c.fetchall():
                         ),
                     )
 
-                    if fba.c.fetchone() == None:
-                        fba.block_instance(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, reason, block_level, timestamp, timestamp)
+                    if fba.cursor.fetchone() == None:
+                        fba.block_instance(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, reason, block_level)
 
                         if block_level == "reject":
                             blockdict.append(
                                 {
                                     "blocked": blocked,
-                                    "reason": reason
+                                    "reason" : reason
                                 })
                     else:
-                        fba.update_last_seen(timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
+                        fba.update_last_seen(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
 
-                    if reason != '':
+                    if reason != "":
                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
                         fba.update_block_reason(reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
 
-            fba.conn.commit()
+            fba.connection.commit()
         except Exception as e:
-            print("error:", e, blocker, software)
-    elif software == "friendica" or software == "misskey":
+            print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
+    elif software == "friendica" or software == "misskey" or software == "bookwyrm" or software == "takahe":
         print("INFO: blocker:", blocker)
         try:
             if software == "friendica":
                 json = fba.get_friendica_blocks(blocker)
             elif software == "misskey":
                 json = fba.get_misskey_blocks(blocker)
+            elif software == "bookwyrm":
+                print("WARNING: bookwyrm is not fully supported for fetching blacklist!", blocker)
+                #json = fba.get_bookwyrm_blocks(blocker)
+            elif software == "takahe":
+                print("WARNING: takahe is not fully supported for fetching blacklist!", blocker)
+                #json = fba.get_takahe_blocks(blocker)
 
             for block_level, blocks in json.items():
                 # NOISY-DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
@@ -288,69 +317,67 @@ for blocker, software in fba.c.fetchall():
 
                 for instance in blocks:
                     blocked, reason = instance.values()
-                    # NOISY-DEBUG: print("DEBUG: BEFORE-blocked:", blocked)
+                    # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
                     blocked = fba.tidyup(blocked)
-                    # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
+                    # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
 
                     if blocked == "":
                         print("WARNING: blocked is empty:", blocker)
                         continue
                     if blocked.count("*") > 0:
                         # Some friendica servers also obscure domains without hash
-                        fba.c.execute(
+                        fba.cursor.execute(
                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
                         )
-                        searchres = fba.c.fetchone()
+                        searchres = fba.cursor.fetchone()
                         if searchres != None:
                             blocked = searchres[0]
 
                     if blocked.count("?") > 0:
                         # Some obscure them with question marks, not sure if that's dependent on version or not
-                        fba.c.execute(
-                            "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
+                        fba.cursor.execute(
+                            "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
                         )
-                        searchres = fba.c.fetchone()
+                        searchres = fba.cursor.fetchone()
                         if searchres != None:
                             blocked = searchres[0]
+                            origin = searchres[1]
+                            nodeinfo_url = searchres[2]
 
                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
-                    fba.c.execute(
-                        "SELECT domain FROM instances WHERE domain = ?", [blocked]
-                    )
+                    if not fba.is_instance_registered(blocked):
+                        # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
+                        fba.add_instance(blocked, blocker, origin, nodeinfo_url)
 
-                    if fba.c.fetchone() == None:
-                        # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked)
-                        fba.add_instance(blocked)
-
-                    timestamp = int(time.time())
-                    fba.c.execute(
+                    fba.cursor.execute(
                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?",
                         (blocker, blocked),
                     )
-                    if fba.c.fetchone() == None:
-                        fba.block_instance(blocker, blocked, reason, block_level, timestamp, timestamp)
+
+                    if fba.cursor.fetchone() == None:
+                        fba.block_instance(blocker, blocked, reason, block_level)
 
                         if block_level == "reject":
                             blockdict.append(
                                 {
                                     "blocked": blocked,
-                                    "reason": reason
+                                    "reason" : reason
                                 })
                     else:
-                        fba.update_last_seen(timestamp, blocker, blocked, block_level)
+                        fba.update_last_seen(blocker, blocked, block_level)
 
                     if reason != '':
                         # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
                         fba.update_block_reason(reason, blocker, blocked, block_level)
 
-            fba.conn.commit()
+            fba.connection.commit()
         except Exception as e:
-            print("error:", e, blocker, software)
+            print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
     elif software == "gotosocial":
         print("INFO: blocker:", blocker)
         try:
             # Blocks
-            federation = reqto.get(f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=fba.headers, timeout=5).json()
+            federation = reqto.get(f"https://{blocker}{get_peers_url}?filter=suspended", headers=fba.api_headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json()
 
             if (federation == None):
                 print("WARNING: No valid response:", blocker);
@@ -360,70 +387,68 @@ for blocker, software in fba.c.fetchall():
                 # NOISY-DEBUG: print("DEBUG: Checking fenderation():", len(federation))
                 for peer in federation:
                     blocked = peer["domain"].lower()
-                    # NOISY-DEBUG: print("DEBUG: BEFORE-blocked:", blocked)
+                    # NOISY-DEBUG: print("DEBUG: BEFORE blocked:", blocked)
                     blocked = fba.tidyup(blocked)
-                    # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
+                    # NOISY-DEBUG: print("DEBUG: AFTER blocked:", blocked)
 
                     if blocked == "":
                         print("WARNING: blocked is empty:", blocker)
                         continue
-                    if blocked.count("*") > 0:
+                    elif blocked.count("*") > 0:
                         # GTS does not have hashes for obscured domains, so we have to guess it
-                        fba.c.execute(
-                            "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
+                        fba.cursor.execute(
+                            "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
                         )
-                        searchres = fba.c.fetchone()
+                        searchres = fba.cursor.fetchone()
 
                         if searchres != None:
                             blocked = searchres[0]
+                            origin = searchres[1]
+                            nodeinfo_url = searchres[2]
 
-                    fba.c.execute(
-                        "SELECT domain FROM instances WHERE domain = ?", [blocked]
-                    )
-
-                    if fba.c.fetchone() == None:
-                        # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked)
-                        fba.add_instance(blocked)
+                    if not fba.is_instance_registered(blocked):
+                        # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked, blocker)
+                        fba.add_instance(blocked, blocker, origin, nodeinfo_url)
 
-                    fba.c.execute(
-                        "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
+                    fba.cursor.execute(
+                        "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
                         (
                             blocker,
                             blocked,
                             "reject"
                         ),
                     )
-                    timestamp = int(time.time())
 
-                    if fba.c.fetchone() == None:
-                        fba.block_instance(blocker, blocked, "unknown", "reject", timestamp, timestamp)
+                    if fba.cursor.fetchone() == None:
+                        # NOISY-DEBUG: print(f"DEBUG: blocker='{blocker}' is blocking '{blocked}' for unknown reason at this point")
+                        fba.block_instance(blocker, blocked, "unknown", "reject")
 
                         blockdict.append(
                             {
                                 "blocked": blocked,
-                                "reason": None
+                                "reason" : None
                             })
                     else:
-                        fba.update_last_seen(timestamp, blocker, blocked, "reject")
+                        fba.update_last_seen(blocker, blocked, "reject")
 
                     if "public_comment" in peer:
-                        reason = peer["public_comment"]
-                        # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason)
-                        fba.update_block_reason(reason, blocker, blocked, "reject")
+                        # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, peer["public_comment"])
+                        fba.update_block_reason(peer["public_comment"], blocker, blocked, "reject")
 
                         for entry in blockdict:
                             if entry["blocked"] == blocked:
-                                entry["reason"] = reason
+                                # NOISY-DEBUG: print(f"DEBUG: Setting block reason for blocked='{blocked}':'{peer['public_comment']}'")
+                                entry["reason"] = peer["public_comment"]
 
-                fba.conn.commit()
+                fba.connection.commit()
         except Exception as e:
-            print("error:", e, blocker, software)
+            print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
     else:
-        print("WARNING: Unknown software:", software)
+        print("WARNING: Unknown software:", blocker, software)
 
     if fba.config["bot_enabled"] and len(blockdict) > 0:
         send_bot_post(blocker, blockdict)
 
     blockdict = []
 
-fba.conn.close()
+fba.connection.close()