X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=fetch_blocks.py;h=ebf3697eea7340935352befca654994bdbc540b0;hb=0fe16980ab997a2671416b0ac31c5326b368188c;hp=b8cab866b83c3bac6f7ae9bf50d68478224c4e51;hpb=359e6d23b07cab89cbf0c7da40f6c5f516a0e6da;p=fba.git diff --git a/fetch_blocks.py b/fetch_blocks.py old mode 100644 new mode 100755 index b8cab86..ebf3697 --- a/fetch_blocks.py +++ b/fetch_blocks.py @@ -1,3 +1,22 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + import reqto import time import bs4 @@ -6,11 +25,13 @@ import re import fba fba.cursor.execute( - "SELECT domain, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_block"]] + "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_block"]] ) -for blocker, software in fba.cursor.fetchall(): - # NOISY-DEBUG: print("DEBUG: BEFORE blocker,software:", blocker, software) +rows = fba.cursor.fetchall() +print(f"INFO: Checking {len(rows)} entries ...") +for blocker, software, origin, nodeinfo_url in rows: + # NOISY-DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url) blockdict = [] blocker = fba.tidyup(blocker) # NOISY-DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software) @@ -18,14 +39,18 @@ for blocker, software in fba.cursor.fetchall(): if blocker == "": print("WARNING: blocker is now empty!") continue + elif fba.is_blacklisted(blocker): + print(f"WARNING: blocker='{blocker}' is blacklisted now!") + continue + # NOISY-DEBUG: print(f"DEBUG: blocker='{blocker}'") fba.update_last_blocked(blocker) if software == "pleroma": print("INFO: blocker:", blocker) try: # Blocks - json = fba.fetch_nodeinfo(blocker) + json = fba.fetch_nodeinfo(blocker, nodeinfo_url) if json is None: print("WARNING: Could not fetch nodeinfo from blocker:", blocker) continue @@ -64,25 +89,22 @@ for blocker, software in fba.cursor.fetchall(): if blocked.count("*") > 1: # -ACK!-oma also started obscuring domains without hash fba.cursor.execute( - "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] + "SELECT domain, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] ) searchres = fba.cursor.fetchone() # NOISY-DEBUG: print("DEBUG: searchres[]:", type(searchres)) if searchres != None: blocked = searchres[0] + nodeinfo_url = searchres[1] # NOISY-DEBUG: print("DEBUG: Looked up domain:", blocked) # NOISY-DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - fba.cursor.execute( - "SELECT domain FROM instances WHERE domain = ?", [blocked] - ) - - if fba.cursor.fetchone() == None: + if not fba.is_instance_registered(blocked): # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked, blocker) - fba.add_instance(blocked, blocker, argv[0]) + fba.add_instance(blocked, blocker, origin, nodeinfo_url) fba.cursor.execute( - "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?", + "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1", ( blocker, blocked, @@ -99,7 +121,7 @@ for blocker, software in fba.cursor.fetchall(): blockdict.append( { "blocked": blocked, - "reason": None + "reason" : None }) else: # NOISY-DEBUG: print("DEBUG: Updating last_seen:", blocker, blocked, block_level) @@ -152,15 +174,15 @@ for blocker, software in fba.cursor.fetchall(): fba.connection.commit() except Exception as e: - print("error:", e, blocker, software) + print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") elif software == "mastodon": print("INFO: blocker:", blocker) try: # json endpoint for newer mastodongs try: json = { - "reject": [], - "media_removal": [], + "reject" : [], + "media_removal" : [], "followers_only": [], "report_removal": [] } @@ -174,10 +196,10 @@ for blocker, software in fba.cursor.fetchall(): try: csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"] # NOISY-DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf) - reqheaders = {**fba.headers, **{"x-csrf-token": csrf}} + reqheaders = {**fba.api_headers, **{"X-CSRF-Token": csrf}} except: # NOISY-DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker) - reqheaders = fba.headers + reqheaders = fba.api_headers # NOISY-DEBUG: print("DEBUG: Quering API domain_blocks:", blocker) blocks = reqto.get(f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json() @@ -186,7 +208,7 @@ for blocker, software in fba.cursor.fetchall(): for block in blocks: entry = { 'domain': block['domain'], - 'hash': block['digest'], + 'hash' : block['digest'], 'reason': block['comment'] } @@ -226,12 +248,12 @@ for blocker, software in fba.cursor.fetchall(): elif blocked.count("*") < 1: # No obsfucation for this instance fba.cursor.execute( - "SELECT hash FROM instances WHERE domain = ?", [blocked] + "SELECT hash FROM instances WHERE domain = ? LIMIT 1", [blocked] ) if fba.cursor.fetchone() == None: # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) - fba.add_instance(blocked, blocker, argv[0]) + fba.add_instance(blocked, blocker, origin) else: # Doing the hash search for instance names as well to tidy up DB fba.cursor.execute( @@ -244,7 +266,7 @@ for blocker, software in fba.cursor.fetchall(): blocked = searchres[0] fba.cursor.execute( - "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?", + "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1", ( blocker, blocked if blocked.count("*") <= 1 else blocked_hash, @@ -259,18 +281,18 @@ for blocker, software in fba.cursor.fetchall(): blockdict.append( { "blocked": blocked, - "reason": reason + "reason" : reason }) else: fba.update_last_seen(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level) - if reason != '': + if reason != "": # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason) fba.update_block_reason(reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level) fba.connection.commit() except Exception as e: - print("error:", e, blocker, software) + print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") elif software == "friendica" or software == "misskey" or software == "bookwyrm" or software == "takahe": print("INFO: blocker:", blocker) try: @@ -314,25 +336,24 @@ for blocker, software in fba.cursor.fetchall(): if blocked.count("?") > 0: # Some obscure them with question marks, not sure if that's dependent on version or not fba.cursor.execute( - "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")] + "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")] ) searchres = fba.cursor.fetchone() if searchres != None: blocked = searchres[0] + origin = searchres[1] + nodeinfo_url = searchres[2] # NOISY-DEBUG: print("DEBUG: AFTER-blocked:", blocked) - fba.cursor.execute( - "SELECT domain FROM instances WHERE domain = ?", [blocked] - ) - - if fba.cursor.fetchone() == None: + if not fba.is_instance_registered(blocked): # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) - fba.add_instance(blocked, blocker) + fba.add_instance(blocked, blocker, origin, nodeinfo_url) fba.cursor.execute( "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?", (blocker, blocked), ) + if fba.cursor.fetchone() == None: fba.block_instance(blocker, blocked, reason, block_level) @@ -340,7 +361,7 @@ for blocker, software in fba.cursor.fetchall(): blockdict.append( { "blocked": blocked, - "reason": reason + "reason" : reason }) else: fba.update_last_seen(blocker, blocked, block_level) @@ -351,12 +372,12 @@ for blocker, software in fba.cursor.fetchall(): fba.connection.commit() except Exception as e: - print("error:", e, blocker, software) + print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") elif software == "gotosocial": print("INFO: blocker:", blocker) try: # Blocks - federation = reqto.get(f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=fba.headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json() + federation = reqto.get(f"https://{blocker}{get_peers_url}?filter=suspended", headers=fba.api_headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json() if (federation == None): print("WARNING: No valid response:", blocker); @@ -376,23 +397,21 @@ for blocker, software in fba.cursor.fetchall(): elif blocked.count("*") > 0: # GTS does not have hashes for obscured domains, so we have to guess it fba.cursor.execute( - "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] + "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] ) searchres = fba.cursor.fetchone() if searchres != None: blocked = searchres[0] + origin = searchres[1] + nodeinfo_url = searchres[2] - fba.cursor.execute( - "SELECT domain FROM instances WHERE domain = ?", [blocked] - ) - - if fba.cursor.fetchone() == None: - # NOISY-DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason) - fba.update_block_reason(reason, blocker, blocked, block_level) + if not fba.is_instance_registered(blocked): + # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked, blocker) + fba.add_instance(blocked, blocker, origin, nodeinfo_url) fba.cursor.execute( - "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?", + "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ? LIMIT 1", ( blocker, blocked, @@ -401,12 +420,13 @@ for blocker, software in fba.cursor.fetchall(): ) if fba.cursor.fetchone() == None: + # NOISY-DEBUG: print(f"DEBUG: blocker='{blocker}' is blocking '{blocked}' for unknown reason at this point") fba.block_instance(blocker, blocked, "unknown", "reject") blockdict.append( { "blocked": blocked, - "reason": None + "reason" : None }) else: fba.update_last_seen(blocker, blocked, "reject") @@ -417,11 +437,12 @@ for blocker, software in fba.cursor.fetchall(): for entry in blockdict: if entry["blocked"] == blocked: + # NOISY-DEBUG: print(f"DEBUG: Setting block reason for blocked='{blocked}':'{peer['public_comment']}'") entry["reason"] = peer["public_comment"] fba.connection.commit() except Exception as e: - print("error:", e, blocker, software) + print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") else: print("WARNING: Unknown software:", blocker, software)