]> git.mxchange.org Git - fba.git/blobdiff - fetch_blocks.py
Continued:
[fba.git] / fetch_blocks.py
old mode 100644 (file)
new mode 100755 (executable)
index 71cf74f..ebf3697
@@ -1,3 +1,22 @@
+#!/usr/bin/python3
+# -*- coding: utf-8 -*-
+
+# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
+# Copyright (C) 2023 Free Software Foundation
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
 import reqto
 import time
 import bs4
@@ -6,13 +25,13 @@ import re
 import fba
 
 fba.cursor.execute(
-    "SELECT domain, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_block"]]
+    "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_block"]]
 )
 
 rows = fba.cursor.fetchall()
 print(f"INFO: Checking {len(rows)} entries ...")
-for blocker, software in rows:
-    # NOISY-DEBUG: print("DEBUG: BEFORE blocker,software:", blocker, software)
+for blocker, software, origin, nodeinfo_url in rows:
+    # NOISY-DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
     blockdict = []
     blocker = fba.tidyup(blocker)
     # NOISY-DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
@@ -31,7 +50,7 @@ for blocker, software in rows:
         print("INFO: blocker:", blocker)
         try:
             # Blocks
-            json = fba.fetch_nodeinfo(blocker)
+            json = fba.fetch_nodeinfo(blocker, nodeinfo_url)
             if json is None:
                 print("WARNING: Could not fetch nodeinfo from blocker:", blocker)
                 continue
@@ -70,18 +89,19 @@ for blocker, software in rows:
                         if blocked.count("*") > 1:
                             # -ACK!-oma also started obscuring domains without hash
                             fba.cursor.execute(
-                                "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
+                                "SELECT domain, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
                             )
                             searchres = fba.cursor.fetchone()
                             # NOISY-DEBUG: print("DEBUG: searchres[]:", type(searchres))
                             if searchres != None:
                                 blocked = searchres[0]
+                                nodeinfo_url = searchres[1]
                                 # NOISY-DEBUG: print("DEBUG: Looked up domain:", blocked)
 
                         # NOISY-DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
                         if not fba.is_instance_registered(blocked):
                             # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked, blocker)
-                            fba.add_instance(blocked, blocker, argv[0])
+                            fba.add_instance(blocked, blocker, origin, nodeinfo_url)
 
                         fba.cursor.execute(
                             "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
@@ -101,7 +121,7 @@ for blocker, software in rows:
                                 blockdict.append(
                                     {
                                         "blocked": blocked,
-                                        "reason": None
+                                        "reason" : None
                                     })
                         else:
                             # NOISY-DEBUG: print("DEBUG: Updating last_seen:", blocker, blocked, block_level)
@@ -154,15 +174,15 @@ for blocker, software in rows:
 
             fba.connection.commit()
         except Exception as e:
-            print(f"ERROR: blocker='{blocker}',software='{software}',exception='{str(e)}'")
+            print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
     elif software == "mastodon":
         print("INFO: blocker:", blocker)
         try:
             # json endpoint for newer mastodongs
             try:
                 json = {
-                    "reject": [],
-                    "media_removal": [],
+                    "reject"        : [],
+                    "media_removal" : [],
                     "followers_only": [],
                     "report_removal": []
                 }
@@ -188,7 +208,7 @@ for blocker, software in rows:
                 for block in blocks:
                     entry = {
                         'domain': block['domain'],
-                        'hash': block['digest'],
+                        'hash'  : block['digest'],
                         'reason': block['comment']
                     }
 
@@ -233,7 +253,7 @@ for blocker, software in rows:
 
                         if fba.cursor.fetchone() == None:
                             # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
-                            fba.add_instance(blocked, blocker, argv[0])
+                            fba.add_instance(blocked, blocker, origin)
                     else:
                         # Doing the hash search for instance names as well to tidy up DB
                         fba.cursor.execute(
@@ -261,7 +281,7 @@ for blocker, software in rows:
                             blockdict.append(
                                 {
                                     "blocked": blocked,
-                                    "reason": reason
+                                    "reason" : reason
                                 })
                     else:
                         fba.update_last_seen(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
@@ -272,7 +292,7 @@ for blocker, software in rows:
 
             fba.connection.commit()
         except Exception as e:
-            print(f"ERROR: blocker='{blocker}',software='{software}',exception='{str(e)}'")
+            print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
     elif software == "friendica" or software == "misskey" or software == "bookwyrm" or software == "takahe":
         print("INFO: blocker:", blocker)
         try:
@@ -316,16 +336,18 @@ for blocker, software in rows:
                     if blocked.count("?") > 0:
                         # Some obscure them with question marks, not sure if that's dependent on version or not
                         fba.cursor.execute(
-                            "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
+                            "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")]
                         )
                         searchres = fba.cursor.fetchone()
                         if searchres != None:
                             blocked = searchres[0]
+                            origin = searchres[1]
+                            nodeinfo_url = searchres[2]
 
                     # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked)
                     if not fba.is_instance_registered(blocked):
                         # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
-                        fba.add_instance(blocked, blocker)
+                        fba.add_instance(blocked, blocker, origin, nodeinfo_url)
 
                     fba.cursor.execute(
                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?",
@@ -339,7 +361,7 @@ for blocker, software in rows:
                             blockdict.append(
                                 {
                                     "blocked": blocked,
-                                    "reason": reason
+                                    "reason" : reason
                                 })
                     else:
                         fba.update_last_seen(blocker, blocked, block_level)
@@ -350,7 +372,7 @@ for blocker, software in rows:
 
             fba.connection.commit()
         except Exception as e:
-            print(f"ERROR: blocker='{blocker}',software='{software}',exception='{str(e)}'")
+            print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
     elif software == "gotosocial":
         print("INFO: blocker:", blocker)
         try:
@@ -375,16 +397,18 @@ for blocker, software in rows:
                     elif blocked.count("*") > 0:
                         # GTS does not have hashes for obscured domains, so we have to guess it
                         fba.cursor.execute(
-                            "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
+                            "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
                         )
                         searchres = fba.cursor.fetchone()
 
                         if searchres != None:
                             blocked = searchres[0]
+                            origin = searchres[1]
+                            nodeinfo_url = searchres[2]
 
                     if not fba.is_instance_registered(blocked):
                         # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked, blocker)
-                        fba.add_instance(blocked, blocker)
+                        fba.add_instance(blocked, blocker, origin, nodeinfo_url)
 
                     fba.cursor.execute(
                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1",
@@ -402,7 +426,7 @@ for blocker, software in rows:
                         blockdict.append(
                             {
                                 "blocked": blocked,
-                                "reason": None
+                                "reason" : None
                             })
                     else:
                         fba.update_last_seen(blocker, blocked, "reject")
@@ -418,7 +442,7 @@ for blocker, software in rows:
 
                 fba.connection.commit()
         except Exception as e:
-            print(f"ERROR: blocker='{blocker}',software='{software}',exception='{str(e)}'")
+            print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'")
     else:
         print("WARNING: Unknown software:", blocker, software)