From a5f277263433428d10ae367421e5aaa27d3d6a0e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Wed, 21 Jun 2023 03:12:21 +0200 Subject: [PATCH] Continued: - checks against blacklist added --- fba/networks/lemmy.py | 8 ++++---- fba/networks/mastodon.py | 6 ++++++ fba/networks/pleroma.py | 12 ++++++++++++ 3 files changed, 22 insertions(+), 4 deletions(-) diff --git a/fba/networks/lemmy.py b/fba/networks/lemmy.py index a7079d0..e05c2cc 100644 --- a/fba/networks/lemmy.py +++ b/fba/networks/lemmy.py @@ -183,10 +183,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): # DEBUG: print(f"DEBUG: blocked='{blocked}'") if not validators.domain(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is not a valid domain - SKIPPED!") - continue - elif blacklist.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!") + print(f"WARNING: blocked='{blocked}' is not a valid domain - SKIPPED!") continue elif blocked.endswith(".arpa"): print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") @@ -194,6 +191,9 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): elif blocked.endswith(".tld"): print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!") continue + elif blacklist.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!") + continue elif not instances.is_registered(blocked): # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain) instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) diff --git a/fba/networks/mastodon.py b/fba/networks/mastodon.py index dd21519..61971af 100644 --- a/fba/networks/mastodon.py +++ b/fba/networks/mastodon.py @@ -287,6 +287,9 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): elif blocked.endswith(".tld"): print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!") continue + elif blacklist.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!") + continue elif not instances.is_registered(blocked): # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) @@ -301,6 +304,9 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): elif blocked.endswith(".tld"): print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!") continue + elif blacklist.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!") + continue elif not instances.is_registered(blocked): # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain) instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) diff --git a/fba/networks/pleroma.py b/fba/networks/pleroma.py index f23393c..546d7c5 100644 --- a/fba/networks/pleroma.py +++ b/fba/networks/pleroma.py @@ -146,6 +146,9 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): elif blocked.endswith(".tld"): print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!") continue + elif blacklist.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!") + continue elif not instances.is_registered(blocked): # Commit changes fba.connection.commit() @@ -219,6 +222,9 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): elif blocked.endswith(".tld"): print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!") continue + elif blacklist.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!") + continue elif not instances.is_registered(blocked): # Commit changes fba.connection.commit() @@ -325,6 +331,9 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): elif blocked.endswith(".tld"): print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!") continue + elif blacklist.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!") + continue elif not instances.is_registered(blocked): # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) @@ -400,6 +409,9 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): elif blocked.endswith(".tld"): print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!") continue + elif blacklist.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!") + continue elif not instances.is_registered(blocked): # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) -- 2.39.5