From e1b83a193367bc494bbe8549ed9d82816a811634 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Tue, 23 May 2023 21:08:39 +0200 Subject: [PATCH] Continued: - use validators.domain() for checking if it is a valid domain (and host name) --- fba.py | 26 +++++++++++++++----------- fetch_instances.py | 6 +++--- requirements.txt | 1 + 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/fba.py b/fba.py index fea6af2..210abab 100644 --- a/fba.py +++ b/fba.py @@ -7,6 +7,7 @@ import sqlite3 import json import sys import time +import validators with open("config.json") as f: config = json.loads(f.read()) @@ -194,7 +195,7 @@ def get_peers(domain: str, software: str) -> list: try: res = reqto.get(f"https://{domain}/api/v3/site", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])) - if res.ok and res.json() is not None: + if res.ok and isinstance(res.json(), dict): # NOISY-DEBUG: print("DEBUG: Success, res.json():", len(res.json())) json = res.json() @@ -205,6 +206,8 @@ def get_peers(domain: str, software: str) -> list: except BaseException as e: print("WARNING: Exception during fetching JSON:", domain, e) + update_last_nodeinfo(domain) + # NOISY-DEBUG: print("DEBUG: Returning peers[]:", type(peers)) return peers @@ -279,7 +282,7 @@ def fetch_nodeinfo(domain: str) -> list: res = reqto.get(request, headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])) # NOISY-DEBUG: print("DEBUG: res.ok,res.json[]:", res.ok, type(res.json())) - if res.ok and res.json() is dict: + if res.ok and isinstance(res.json(), dict): # NOISY-DEBUG: print("DEBUG: Success:", request) json = res.json() nodeinfos["detection_mode"][domain] = "STATIC_CHECK" @@ -296,7 +299,7 @@ def fetch_nodeinfo(domain: str) -> list: pass # NOISY-DEBUG: print("DEBUG: json[]:", type(json)) - if json is None or len(json) == 0: + if not isinstance(json, dict) or len(json) == 0: print("WARNING: Failed fetching nodeinfo from domain:", domain) # NOISY-DEBUG: print("DEBUG: Returning json[]:", type(json)) @@ -309,7 +312,7 @@ def fetch_wellknown_nodeinfo(domain: str) -> list: try: res = reqto.get(f"https://{domain}/.well-known/nodeinfo", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])) # NOISY-DEBUG: print("DEBUG: domain,res.ok,res.json[]:", domain, res.ok, type(res.json())) - if res.ok and res.json() is dict: + if res.ok and isinstance(res.json(), dict): nodeinfo = res.json() # NOISY-DEBUG: print("DEBUG: Found entries:", len(nodeinfo), domain) if "links" in nodeinfo: @@ -320,7 +323,7 @@ def fetch_wellknown_nodeinfo(domain: str) -> list: # NOISY-DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"]) res = reqto.get(link["href"]) # NOISY-DEBUG: print("DEBUG: href,res.ok,res.status_code:", link["href"], res.ok, res.status_code) - if res.ok and res.json() is dict: + if res.ok and isinstance(res.json(), dict): # NOISY-DEBUG: print("DEBUG: Found JSON nodeinfo():", len(res.json())) json = res.json() nodeinfos["detection_mode"][domain] = "AUTO_DISCOVERY" @@ -346,7 +349,7 @@ def determine_software(domain: str) -> str: json = fetch_nodeinfo(domain) # NOISY-DEBUG: print("DEBUG: json[]:", type(json)) - if json is None or len(json) == 0: + if not isinstance(json, dict) or len(json) == 0: # NOISY-DEBUG: print("DEBUG: Could not determine software type:", domain) return None @@ -442,10 +445,10 @@ def update_last_seen(blocker: str, blocked: str, block_level: str): def block_instance(blocker: str, blocked: str, reason: str, block_level: str): # NOISY-DEBUG: print("DEBUG: blocker,blocked,reason,block_level:", blocker, blocked, reason, block_level) - if blocker.find("@") > 0: + if not validators.domain(blocker): print("WARNING: Bad blocker:", blocker) raise - elif blocked.find("@") > 0: + elif not validators.domain(blocked): print("WARNING: Bad blocked:", blocked) raise @@ -471,10 +474,10 @@ def block_instance(blocker: str, blocked: str, reason: str, block_level: str): def add_instance(domain: str, origin: str, originator: str): # NOISY-DEBUG: print("DEBUG: domain,origin:", domain, origin, originator) - if domain.find("@") > 0: + if not validators.domain(domain): print("WARNING: Bad domain name:", domain) raise - elif origin is not None and origin.find("@") > 0: + elif origin is not None and not validators.domain(origin): print("WARNING: Bad origin name:", origin) raise @@ -565,7 +568,8 @@ def get_mastodon_blocks(domain: str) -> dict: "שרתים מוגבלים" : "Silenced servers", "Serveurs suspendus" : "Suspended servers", "Médias filtrés" : "Filtered media", - "Serveurs limités" : "Silenced servers", + "Serveurs limités" : "Limited servers", + "Serveurs modérés" : "Limited servers", } try: diff --git a/fetch_instances.py b/fetch_instances.py index 285e8e3..635cc1c 100644 --- a/fetch_instances.py +++ b/fetch_instances.py @@ -2,6 +2,7 @@ import sqlite3 import sys import json import time +import validators import fba def fetch_instances(domain: str, origin: str, software: str): @@ -28,11 +29,10 @@ def fetch_instances(domain: str, origin: str, software: str): for instance in peerlist: instance = instance.lower() - if instance.find("@") > 0: + if not validators.domain(instance): print("WARNING: Bad instance name,domain:", instance, domain) continue - - if fba.is_blacklisted(instance): + elif fba.is_blacklisted(instance): # NOISY-DEBUG: print("DEBUG: instance is blacklisted:", instance) continue diff --git a/requirements.txt b/requirements.txt index c5e5818..36b0f93 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,3 +5,4 @@ requests jinja2 eventlet reqto +validators -- 2.39.5