From 59bdb75fa3253fc3ec2c4e81895fa95cb5d144b0 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Tue, 6 Jun 2023 22:07:09 +0200 Subject: [PATCH] Continued: - moved blacklist-related stuff to fba/blacklist.py - and now, the word 'blacklist' is not racism, words are innocent! --- fba/__init__.py | 1 + fba/blacklist.py | 50 ++++++++++++++++++++++++++++++ fba/blocks.py | 5 +-- fba/commands.py | 21 +++++++------ fba/fba.py | 68 ++++++++--------------------------------- fba/instances.py | 31 +++++++++++++++++++ fba/network/lemmy.py | 2 +- fba/network/mastodon.py | 3 +- fba/network/misskey.py | 5 +-- fba/network/peertube.py | 2 +- fba/network/pleroma.py | 5 +-- 11 files changed, 118 insertions(+), 75 deletions(-) create mode 100644 fba/blacklist.py diff --git a/fba/__init__.py b/fba/__init__.py index 5694777..9844f11 100644 --- a/fba/__init__.py +++ b/fba/__init__.py @@ -1,4 +1,5 @@ __all__ = [ + 'blacklist', 'blocks', 'boot', 'cache', diff --git a/fba/blacklist.py b/fba/blacklist.py new file mode 100644 index 0000000..e208840 --- /dev/null +++ b/fba/blacklist.py @@ -0,0 +1,50 @@ +# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +# Don't check these, known trolls/flooders/testing/developing +blacklist = [ + # Floods network with fake nodes as "research" project + "activitypub-troll.cf", + # Similar troll + "gab.best", + # Similar troll + "4chan.icu", + # Flooder (?) + "social.shrimpcam.pw", + # Flooder (?) + "mastotroll.netz.org", + # Testing/developing installations + "ngrok.io", + "ngrok-free.app", + "misskeytest.chn.moe", +] + +def is_blacklisted(domain: str) -> bool: + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + if type(domain) != str: + raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"Parameter 'domain' is empty") + + blacklisted = False + for peer in blacklist: + # DEBUG: print(f"DEBUG: Checking peer='{peer}' ...") + if peer in domain: + # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted.") + blacklisted = True + + # DEBUG: print(f"DEBUG: blacklisted='{blacklisted}' - EXIT!") + return blacklisted diff --git a/fba/blocks.py b/fba/blocks.py index 1a4c077..dac50f2 100644 --- a/fba/blocks.py +++ b/fba/blocks.py @@ -18,6 +18,7 @@ import sys import time import validators +from fba import blacklist from fba import fba def update_reason(reason: str, blocker: str, blocked: str, block_level: str): @@ -141,9 +142,9 @@ def add_instance(blocker: str, blocked: str, reason: str, block_level: str): raise ValueError(f"Parameter 'blocked' is empty") elif not validators.domain(blocked.split("/")[0]): raise ValueError(f"Bad blocked='{blocked}'") - elif fba.is_blacklisted(blocker): + elif blacklist.is_blacklisted(blocker): raise Exception(f"blocker='{blocker}' is blacklisted but function invoked") - elif fba.is_blacklisted(blocked): + elif blacklist.is_blacklisted(blocked): raise Exception(f"blocked='{blocked}' is blacklisted but function invoked") if reason != None: diff --git a/fba/commands.py b/fba/commands.py index da62402..84ae5ed 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -26,6 +26,7 @@ import sys import time import validators +from fba import blacklist from fba import blocks from fba import boot from fba import config @@ -38,7 +39,7 @@ def check_instance(args: argparse.Namespace) -> int: if not validators.domain(args.domain): print(f"WARNING: args.domain='{args.domain}' is not valid") status = 100 - elif fba.is_blacklisted(args.domain): + elif blacklist.is_blacklisted(args.domain): print(f"WARNING: args.domain='{args.domain}' is blacklisted") status = 101 elif fba.is_instance_registered(args.domain): @@ -74,7 +75,7 @@ def fetch_bkali(args: argparse.Namespace): elif not validators.domain(entry["domain"]): print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!") continue - elif fba.is_blacklisted(entry["domain"]): + elif blacklist.is_blacklisted(entry["domain"]): # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!") continue elif fba.is_instance_registered(entry["domain"]): @@ -105,7 +106,7 @@ def fetch_blocks(args: argparse.Namespace): if not validators.domain(args.domain): print(f"WARNING: domain='{args.domain}' is not valid.") return - elif fba.is_blacklisted(args.domain): + elif blacklist.is_blacklisted(args.domain): print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!") return elif not fba.is_instance_registered(args.domain): @@ -134,12 +135,12 @@ def fetch_blocks(args: argparse.Namespace): if blocker == "": print("WARNING: blocker is now empty!") continue - elif fba.is_blacklisted(blocker): + elif blacklist.is_blacklisted(blocker): print(f"WARNING: blocker='{blocker}' is blacklisted now!") continue # DEBUG: print(f"DEBUG: blocker='{blocker}'") - fba.update_last_blocked(blocker) + instances.update_last_blocked(blocker) if software == "pleroma": print(f"INFO: blocker='{blocker}',software='{software}'") @@ -174,7 +175,7 @@ def fetch_blocks(args: argparse.Namespace): if blocked == "": print("WARNING: blocked is empty:", blocker) continue - elif fba.is_blacklisted(blocked): + elif blacklist.is_blacklisted(blocked): # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") continue elif blocked.count("*") > 0: @@ -257,7 +258,7 @@ def fetch_blocks(args: argparse.Namespace): if blocked == "": print("WARNING: blocked is empty:", blocker) continue - elif fba.is_blacklisted(blocked): + elif blacklist.is_blacklisted(blocked): # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") continue elif blocked.count("*") > 0: @@ -388,7 +389,7 @@ def fetch_fba_rss(args: argparse.Namespace): # DEBUG: print(f"DEBUG: item={item}") domain = item.link.split("=")[1] - if fba.is_blacklisted(domain): + if blacklist.is_blacklisted(domain): # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") continue elif domain in domains: @@ -441,7 +442,7 @@ def fetch_fbabot_atom(args: argparse.Namespace): domain = fba.tidyup_domain(href) # DEBUG: print(f"DEBUG: domain='{domain}'") - if fba.is_blacklisted(domain): + if blacklist.is_blacklisted(domain): # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") continue elif domain in domains: @@ -489,7 +490,7 @@ def fetch_instances(args: argparse.Namespace): print(f"INFO: Checking {len(rows)} entries ...") for row in rows: # DEBUG: print("DEBUG: domain:", row[0]) - if fba.is_blacklisted(row[0]): + if blacklist.is_blacklisted(row[0]): print("WARNING: domain is blacklisted:", row[0]) continue diff --git a/fba/fba.py b/fba/fba.py index 1cc62a4..53e788c 100644 --- a/fba/fba.py +++ b/fba/fba.py @@ -27,6 +27,7 @@ import validators from urllib.parse import urlparse +from fba import blacklist from fba import cache from fba import config from fba import instances @@ -34,24 +35,6 @@ from fba import instances from fba.network import lemmy from fba.network import misskey -# Don't check these, known trolls/flooders/testing/developing -blacklist = [ - # Floods network with fake nodes as "research" project - "activitypub-troll.cf", - # Similar troll - "gab.best", - # Similar troll - "4chan.icu", - # Flooder (?) - "social.shrimpcam.pw", - # Flooder (?) - "mastotroll.netz.org", - # Testing/developing installations - "ngrok.io", - "ngrok-free.app", - "misskeytest.chn.moe", -] - # Array with pending errors needed to be written to database pending_errors = { } @@ -147,7 +130,7 @@ def fetch_instances(domain: str, origin: str, software: str, script: str, path: elif not validators.domain(instance.split("/")[0]): print(f"WARNING: Bad instance='{instance}' from domain='{domain}',origin='{origin}',software='{software}'") continue - elif is_blacklisted(instance): + elif blacklist.is_blacklisted(instance): # DEBUG: print("DEBUG: instance is blacklisted:", instance) continue @@ -174,7 +157,7 @@ def add_peers(rows: dict) -> list: peer = tidyup_domain(peer) # DEBUG: print(f"DEBUG: peer='{peer}' - AFTER!") - if is_blacklisted(peer): + if blacklist.is_blacklisted(peer): # DEBUG: print(f"DEBUG: peer='{peer}' is blacklisted, skipped!") continue @@ -300,7 +283,8 @@ def strip_until(software: str, until: str) -> str: # DEBUG: print(f"DEBUG: software='{software}' - EXIT!") return software -def is_blacklisted(domain: str) -> bool: +def blacklist.is_blacklisted(domain: str) -> bool: + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") if type(domain) != str: raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") elif domain == "": @@ -308,9 +292,12 @@ def is_blacklisted(domain: str) -> bool: blacklisted = False for peer in blacklist: + # DEBUG: print(f"DEBUG: Checking peer='{peer}' ...") if peer in domain: + # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted.") blacklisted = True + # DEBUG: print(f"DEBUG: blacklisted='{blacklisted}' - EXIT!") return blacklisted def remove_pending_error(domain: str): @@ -336,21 +323,6 @@ def get_hash(domain: str) -> str: return hashlib.sha256(domain.encode("utf-8")).hexdigest() -def update_last_blocked(domain: str): - if type(domain) != str: - raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") - elif domain == "": - raise ValueError(f"Parameter 'domain' is empty") - - # DEBUG: print("DEBUG: Updating last_blocked for domain", domain) - instances.set("last_blocked", domain, time.time()) - - # Running pending updated - # DEBUG: print(f"DEBUG: Invoking instances.update_instance_data({domain}) ...") - instances.update_instance_data(domain) - - # DEBUG: print("DEBUG: EXIT!") - def log_error(domain: str, response: requests.models.Response): # DEBUG: print("DEBUG: domain,response[]:", domain, type(response)) if type(domain) != str: @@ -416,22 +388,6 @@ def update_last_error(domain: str, response: requests.models.Response): # DEBUG: print("DEBUG: EXIT!") -def update_last_instance_fetch(domain: str): - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") - if type(domain) != str: - raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") - elif domain == "": - raise ValueError(f"Parameter 'domain' is empty") - - # DEBUG: print("DEBUG: Updating last_instance_fetch for domain:", domain) - instances.set("last_instance_fetch", domain, time.time()) - - # Running pending updated - # DEBUG: print(f"DEBUG: Invoking instances.update_instance_data({domain}) ...") - instances.update_instance_data(domain) - - # DEBUG: print("DEBUG: EXIT!") - def update_last_nodeinfo(domain: str): # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") if type(domain) != str: @@ -507,7 +463,7 @@ def get_peers(domain: str, software: str) -> list: instances.set("total_peers", domain, len(peers)) # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") - update_last_instance_fetch(domain) + instances.update_last_instance_fetch(domain) # DEBUG: print("DEBUG: Returning peers[]:", type(peers)) return peers @@ -848,7 +804,7 @@ def add_instance(domain: str, origin: str, originator: str, path: str = None): raise ValueError(f"Bad domain name='{domain}'") elif origin is not None and not validators.domain(origin.split("/")[0]): raise ValueError(f"Bad origin name='{origin}'") - elif is_blacklisted(domain): + elif blacklist.is_blacklisted(domain): raise Exception(f"domain='{domain}' is blacklisted, but method invoked") # DEBUG: print("DEBUG: domain,origin,originator,path:", domain, origin, originator, path) @@ -1112,7 +1068,7 @@ def fetch_misskey_blocks(domain: str) -> dict: break # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") - update_last_instance_fetch(domain) + instances.update_last_instance_fetch(domain) # DEBUG: print("DEBUG: Returning for domain,blocked(),suspended():", domain, len(blocks["blocked"]), len(blocks["suspended"])) return { @@ -1247,7 +1203,7 @@ def find_domains(tag: bs4.element.Tag) -> list: # DEBUG: print(f"DEBUG: domain='{domain}',reason='{reason}'") - if is_blacklisted(domain): + if blacklist.is_blacklisted(domain): print(f"WARNING: domain='{domain}' is blacklisted - skipped!") continue elif domain == "gab.com/.ai, develop.gab.com": diff --git a/fba/instances.py b/fba/instances.py index 9f1606a..77c4ded 100644 --- a/fba/instances.py +++ b/fba/instances.py @@ -135,3 +135,34 @@ def update_instance_data(domain: str): sys.exit(255) # DEBUG: print("DEBUG: EXIT!") + +def update_last_instance_fetch(domain: str): + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + if type(domain) != str: + raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"Parameter 'domain' is empty") + + # DEBUG: print("DEBUG: Updating last_instance_fetch for domain:", domain) + set("last_instance_fetch", domain, time.time()) + + # Running pending updated + # DEBUG: print(f"DEBUG: Invoking update_instance_data({domain}) ...") + update_instance_data(domain) + + # DEBUG: print("DEBUG: EXIT!") + +def update_last_blocked(domain: str): + if type(domain) != str: + raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") + elif domain == "": + raise ValueError(f"Parameter 'domain' is empty") + + # DEBUG: print("DEBUG: Updating last_blocked for domain", domain) + set("last_blocked", domain, time.time()) + + # Running pending updated + # DEBUG: print(f"DEBUG: Invoking update_instance_data({domain}) ...") + update_instance_data(domain) + + # DEBUG: print("DEBUG: EXIT!") diff --git a/fba/network/lemmy.py b/fba/network/lemmy.py index c75e6ae..9f9ebf9 100644 --- a/fba/network/lemmy.py +++ b/fba/network/lemmy.py @@ -56,7 +56,7 @@ def get_peers(domain: str) -> list: instances.set("total_peers", domain, len(peers)) # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") - fba.update_last_instance_fetch(domain) + instances.update_last_instance_fetch(domain) # DEBUG: print("DEBUG: Returning peers[]:", type(peers)) return peers diff --git a/fba/network/mastodon.py b/fba/network/mastodon.py index 3de8a57..1449fbe 100644 --- a/fba/network/mastodon.py +++ b/fba/network/mastodon.py @@ -17,6 +17,7 @@ import bs4 import validators +from fba import blacklist from fba import blocks from fba import config from fba import fba @@ -193,7 +194,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): if blocked == "": print("WARNING: blocked is empty:", domain) continue - elif fba.is_blacklisted(blocked): + elif blacklist.is_blacklisted(blocked): # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") continue elif blocked.count("*") > 0: diff --git a/fba/network/misskey.py b/fba/network/misskey.py index 3444db7..d77e37c 100644 --- a/fba/network/misskey.py +++ b/fba/network/misskey.py @@ -16,6 +16,7 @@ import json +from fba import blacklist from fba import config from fba import fba from fba import instances @@ -82,7 +83,7 @@ def get_peers(domain: str) -> list: elif type(row["host"]) != str: print(f"WARNING: row[host][]={type(row['host'])} is not 'str'") continue - elif fba.is_blacklisted(row["host"]): + elif blacklist.is_blacklisted(row["host"]): # DEBUG: print(f"DEBUG: row[host]='{row['host']}' is blacklisted. domain='{domain}'") continue elif row["host"] in peers: @@ -101,7 +102,7 @@ def get_peers(domain: str) -> list: instances.set("total_peers", domain, len(peers)) # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") - fba.update_last_instance_fetch(domain) + instances.update_last_instance_fetch(domain) # DEBUG: print("DEBUG: Returning peers[]:", type(peers)) return peers diff --git a/fba/network/peertube.py b/fba/network/peertube.py index 36d9fc4..dc54da5 100644 --- a/fba/network/peertube.py +++ b/fba/network/peertube.py @@ -62,7 +62,7 @@ def get_peers(domain: str) -> list: instances.set("total_peers", domain, len(peers)) # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") - update_last_instance_fetch(domain) + instances.update_last_instance_fetch(domain) # DEBUG: print("DEBUG: Returning peers[]:", type(peers)) return peers diff --git a/fba/network/pleroma.py b/fba/network/pleroma.py index a6a41b5..b24b6d9 100644 --- a/fba/network/pleroma.py +++ b/fba/network/pleroma.py @@ -17,6 +17,7 @@ import inspect import validators +from fba import blacklist from fba import blocks from fba import fba @@ -81,7 +82,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): if blocked == "": print("WARNING: blocked is empty after fba.tidyup_domain():", domain, block_level) continue - elif fba.is_blacklisted(blocked): + elif blacklist.is_blacklisted(blocked): # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") continue elif blocked.count("*") > 1: @@ -155,7 +156,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): if blocked == "": print("WARNING: blocked is empty after fba.tidyup_domain():", domain, block_level) continue - elif fba.is_blacklisted(blocked): + elif blacklist.is_blacklisted(blocked): # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") continue elif blocked.count("*") > 1: -- 2.39.5