From 553707551e66d6b0352c64bf6113149445d32cb7 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Tue, 6 Jun 2023 09:19:16 +0200 Subject: [PATCH] Continued: - renamed has_element() to has_key() - fixed bad handling of has_key() - also need to count added names and exit loop on zero --- fba/boot.py | 4 +- fba/commands.py | 41 ++++++++++---------- fba/fba.py | 99 +++++++++++++++++++++++++++++------------------- fba/instances.py | 2 +- 4 files changed, 83 insertions(+), 63 deletions(-) diff --git a/fba/boot.py b/fba/boot.py index 01fda7b..13a233d 100644 --- a/fba/boot.py +++ b/fba/boot.py @@ -42,7 +42,7 @@ def init_parser(): # DEBUG: print("DEBUG: init_parser(): CALLED!") global _PARSER - print("DEBUG: Initializing parser ...") + # DEBUG: print("DEBUG: Initializing parser ...") _PARSER = argparse.ArgumentParser( description="Fetches block reasons from the fediverse", epilog="Please note that some commands have optional arguments, you may want to try fba.py --help to find them out.", @@ -113,7 +113,7 @@ def init_parser(): def run_command(): # DEBUG: print("DEBUG: run_command(): CALLED!") args = _PARSER.parse_args() - print(f"DEBUG: args[{type(args)}]={args}") + # DEBUG: print(f"DEBUG: args[{type(args)}]={args}") status = args.command(args) # DEBUG: print("DEBUG: status={status} - EXIT!") return status if type(status) == int else 0 diff --git a/fba/commands.py b/fba/commands.py index c702e0c..6596045 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -59,9 +59,9 @@ def fetch_bkali(args: argparse.Namespace): if len(fetched) == 0: raise Exception("WARNING: Returned no records") elif not "data" in fetched: - raise Exception(f"WARNING: fetched()={len(fetched)} does not contain element 'data'") + raise Exception(f"WARNING: fetched()={len(fetched)} does not contain key 'data'") elif not "nodeinfo" in fetched["data"]: - raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain element 'nodeinfo'") + raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain key 'nodeinfo'") for entry in fetched["data"]["nodeinfo"]: # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'") @@ -533,7 +533,7 @@ def fetch_blocks(args: argparse.Namespace): print("INFO: blocker:", blocker) try: # Blocks - federation = fba.get_response(blocker, "{fba.get_peers_url}?filter=suspended", fba.api_headers, (config.get("connection_timeout"), config.get("read_timeout"))).json() + federation = fba.get_response(blocker, f"{fba.get_peers_url}?filter=suspended", fba.api_headers, (config.get("connection_timeout"), config.get("read_timeout"))).json() if (federation == None): print("WARNING: No valid response:", blocker); @@ -669,7 +669,7 @@ def fetch_fba_rss(args: argparse.Namespace): try: print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...") - response = fba.get_url(args.feed, fba.headers, config.get("connection_timeout"), config.get("read_timeout")) + response = fba.get_url(args.feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))) # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}") if response.ok and response.status_code < 300 and len(response.text) > 0: @@ -691,7 +691,7 @@ def fetch_fba_rss(args: argparse.Namespace): # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!") continue - # DEBUG: print(f"DEBUG: domain='{domain}'") + # DEBUG: print(f"DEBUG: Adding domain='{domain}'") domains.append(domain) except BaseException as e: @@ -729,28 +729,29 @@ def fetch_fbabot_atom(args: argparse.Namespace): doc = bs4.BeautifulSoup(entry.content.value, "html.parser") # DEBUG: print(f"DEBUG: doc[]={type(doc)}") for element in doc.findAll("a"): - # DEBUG: print(f"DEBUG: element[{type(element)}]={element}") - domain = fba.tidyup_domain(element["href"]) + for href in element["href"].split(","): + # DEBUG: print(f"DEBUG: href[{type(href)}]={href}") + domain = fba.tidyup_domain(href) - # DEBUG: print(f"DEBUG: domain='{domain}'") - if fba.is_blacklisted(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") - continue - elif domain in domains: - # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!") - continue - elif fba.is_instance_registered(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!") - continue + # DEBUG: print(f"DEBUG: domain='{domain}'") + if fba.is_blacklisted(domain): + # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") + continue + elif domain in domains: + # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!") + continue + elif fba.is_instance_registered(domain): + # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!") + continue - # DEBUG: print(f"DEBUG: domain='{domain}'") - domains.append(domain) + # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}") + domains.append(domain) except BaseException as e: print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'") sys.exit(255) - # DEBUG: print(f"DEBUG: domains()={len(domains)}") + # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}") if len(domains) > 0: boot.acquire_lock() diff --git a/fba/fba.py b/fba/fba.py index d4df7c8..ba1b52f 100644 --- a/fba/fba.py +++ b/fba/fba.py @@ -23,9 +23,10 @@ import json import sqlite3 import sys import time -import urllib import validators +from urllib.parse import urlparse + from fba import cache from fba import config from fba import instances @@ -122,11 +123,11 @@ patterns = [ ##### Other functions ##### def is_primitive(var: any) -> bool: - # NOISY-DEBUG: # DEBUG: print(f"DEBUG: var[]='{type(var)}' - CALLED!") + # DEBUG: print(f"DEBUG: var[]='{type(var)}' - CALLED!") return type(var) in {int, str, float, bool} or var == None def fetch_instances(domain: str, origin: str, software: str, script: str, path: str = None): - # DEBUG: print(f"DEBUG: domain={domain},origin={origin},software={software},path={path} - CALLED!") + # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',software='{software}',path='{path}' - CALLED!") if type(domain) != str: raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") elif domain == "": @@ -186,11 +187,11 @@ def fetch_instances(domain: str, origin: str, software: str, script: str, path: def add_peers(rows: dict) -> list: # DEBUG: print(f"DEBUG: rows()={len(rows)} - CALLED!") peers = list() - for element in ["linked", "allowed", "blocked"]: - # DEBUG: print(f"DEBUG: Checking element='{element}'") - if element in rows and rows[element] != None: - # DEBUG: print(f"DEBUG: Adding {len(rows[element])} peer(s) to peers list ...") - for peer in rows[element]: + for key in ["linked", "allowed", "blocked"]: + # DEBUG: print(f"DEBUG: Checking key='{key}'") + if key in rows and rows[key] != None: + # DEBUG: print(f"DEBUG: Adding {len(rows[key])} peer(s) to peers list ...") + for peer in rows[key]: # DEBUG: print(f"DEBUG: peer='{peer}' - BEFORE!") peer = tidyup_domain(peer) @@ -438,7 +439,7 @@ def update_last_error(domain: str, response: requests.models.Response): # DEBUG: print("DEBUG: EXIT!") def update_last_instance_fetch(domain: str): - # DEBUG: print(f"DEBUG: domain={domain} - CALLED!") + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") if type(domain) != str: raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") elif domain == "": @@ -454,7 +455,7 @@ def update_last_instance_fetch(domain: str): # DEBUG: print("DEBUG: EXIT!") def update_last_nodeinfo(domain: str): - # DEBUG: print(f"DEBUG: domain={domain} - CALLED!") + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") if type(domain) != str: raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") elif domain == "": @@ -531,7 +532,7 @@ def get_peers(domain: str, software: str) -> list: for row in fetched: # DEBUG: print(f"DEBUG: row():{len(row)}") if not "host" in row: - print(f"WARNING: row()={len(row)} does not contain element 'host': {row},domain='{domain}'") + print(f"WARNING: row()={len(row)} does not contain key 'host': {row},domain='{domain}'") continue elif type(row["host"]) != str: print(f"WARNING: row[host][]={type(row['host'])} is not 'str'") @@ -772,7 +773,7 @@ def fetch_nodeinfo(domain: str, path: str = None) -> list: return data def fetch_wellknown_nodeinfo(domain: str) -> list: - # DEBUG: print(f"DEBUG: domain={domain} - CALLED!") + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") if type(domain) != str: raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") elif domain == "": @@ -795,7 +796,7 @@ def fetch_wellknown_nodeinfo(domain: str) -> list: # DEBUG: print("DEBUG: rel,href:", link["rel"], link["href"]) if link["rel"] in nodeinfo_identifier: # DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"]) - response = get_url(link["href"]) + response = get_url(link["href"], api_headers, (config.get("connection_timeout"), config.get("read_timeout"))) data = json_from_response(response) # DEBUG: print("DEBUG: href,response.ok,response.status_code:", link["href"], response.ok, response.status_code) @@ -1104,7 +1105,7 @@ def block_instance(blocker: str, blocked: str, reason: str, block_level: str): # DEBUG: print("DEBUG: EXIT!") def is_instance_registered(domain: str) -> bool: - # DEBUG: print(f"DEBUG: domain={domain} - CALLED!") + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") if type(domain) != str: raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") elif domain == "": @@ -1129,7 +1130,7 @@ def is_instance_registered(domain: str) -> bool: return registered def add_instance(domain: str, origin: str, originator: str, path: str = None): - # DEBUG: print(f"DEBUG: domain={domain},origin={origin},originator={originator},path={path} - CALLED!") + # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',originator='{originator}',path='{path}' - CALLED!") if type(domain) != str: raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") elif domain == "": @@ -1225,7 +1226,7 @@ def send_bot_post(instance: str, blocks: dict): return True def get_mastodon_blocks(domain: str) -> dict: - # DEBUG: print(f"DEBUG: domain={domain} - CALLED!") + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") if type(domain) != str: raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") elif domain == "": @@ -1275,7 +1276,7 @@ def get_mastodon_blocks(domain: str) -> dict: } def get_friendica_blocks(domain: str) -> dict: - # DEBUG: print(f"DEBUG: domain={domain} - CALLED!") + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") if type(domain) != str: raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") elif domain == "": @@ -1314,7 +1315,7 @@ def get_friendica_blocks(domain: str) -> dict: } def get_misskey_blocks(domain: str) -> dict: - # DEBUG: print(f"DEBUG: domain={domain} - CALLED!") + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") if type(domain) != str: raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") elif domain == "": @@ -1367,9 +1368,11 @@ def get_misskey_blocks(domain: str) -> dict: # DEBUG: print("DEBUG: Raising offset by step:", step) offset = offset + step + count = 0 for instance in fetched: - # just in case - if instance["isSuspended"] and not has_element(blocks["suspended"], "domain", instance): + # Is it there? + if instance["isSuspended"] and not has_key(blocks["suspended"], "domain", instance): + count = count + 1 blocks["suspended"].append( { "domain": tidyup_domain(instance["host"]), @@ -1378,6 +1381,11 @@ def get_misskey_blocks(domain: str) -> dict: } ) + # DEBUG: print(f"DEBUG: count={count}") + if count == 0: + # DEBUG: print(f"DEBUG: API is no more returning new instances, aborting loop!") + break + except BaseException as e: print("WARNING: Caught error, exiting loop:", domain, e) update_last_error(domain, e) @@ -1420,13 +1428,21 @@ def get_misskey_blocks(domain: str) -> dict: # DEBUG: print("DEBUG: Raising offset by step:", step) offset = offset + step + count = 0 for instance in fetched: - if instance["isBlocked"] and not has_element(blocks["blocked"], "domain", instance): + # Is it there? + if instance["isBlocked"] and not has_key(blocks["blocked"], "domain", instance): + count = count + 1 blocks["blocked"].append({ "domain": tidyup_domain(instance["host"]), "reason": None }) + # DEBUG: print(f"DEBUG: count={count}") + if count == 0: + # DEBUG: print(f"DEBUG: API is no more returning new instances, aborting loop!") + break + except BaseException as e: print("ERROR: Exception during POST:", domain, e) update_last_error(domain, e) @@ -1524,22 +1540,24 @@ def get_response(domain: str, path: str, headers: dict, timeout: list) -> reques # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!") return response -def has_element(elements: list, key: str, value: any) -> bool: - # DEBUG: print(f"DEBUG: element()={len(element)},key='{key}',value[]='{type(value)}' - CALLED!") - if type(key) != str: - raise ValueError(f"Parameter key[]='{type(key)}' is not 'str'") - elif key == "": - raise ValueError("Parameter 'key' cannot be empty") +def has_key(keys: list, search: str, value: any) -> bool: + # DEBUG: print(f"DEBUG: keys()={len(keys)},search='{search}',value[]='{type(value)}' - CALLED!") + if type(keys) != list: + raise ValueError(f"Parameter keys[]='{type(keys)}' is not 'list'") + elif type(search) != str: + raise ValueError(f"Parameter search[]='{type(search)}' is not 'str'") + elif search == "": + raise ValueError("Parameter 'search' cannot be empty") has = False - # DEBUG: print(f"DEBUG: Checking elements()={len(elements)} ...") - for element in elements: - # DEBUG: print(f"DEBUG: element[]='{type(element)}'") - if type(element) != dict: - raise ValueError(f"element[]='{type(element)}' is not 'dict'") - elif not key in element: - raise KeyError(f"Cannot find key='{key}'") - elif element[key] == value: + # DEBUG: print(f"DEBUG: Checking keys()={len(keys)} ...") + for key in keys: + # DEBUG: print(f"DEBUG: key['{type(key)}']={key}") + if type(key) != dict: + raise ValueError(f"key[]='{type(key)}' is not 'dict'") + elif not search in key: + raise KeyError(f"Cannot find search='{search}'") + elif key[search] == value: has = True break @@ -1598,21 +1616,22 @@ def find_domains(tag: bs4.element.Tag) -> list: # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!") return domains -def get_url(url: str) -> requests.models.Response: - # DEBUG: print(f"DEBUG: url='{url}' - CALLED!") +def get_url(url: str, headers: dict, timeout: list) -> requests.models.Response: + # DEBUG: print(f"DEBUG: url='{url}',headers()={len(headers)},timeout={timeout} - CALLED!") if type(url) != str: raise ValueError(f"Parameter url[]='{type(url)}' is not 'str'") elif url == "": raise ValueError("Parameter 'url' cannot be empty") # DEBUG: print(f"DEBUG: Parsing url='{url}'") - components = urllib.parse(url) + components = urlparse(url) # Invoke other function, avoid trailing ? + # DEBUG: print(f"DEBUG: components[{type(components)}]={components}") if components.query != "": - response = get_response(components.hostname, f"{components.path}?{components.query}") + response = get_response(components.hostname, f"{components.path}?{components.query}", headers, timeout) else: - response = get_response(components.hostname, f"{components.path}") + response = get_response(components.hostname, f"{components.path}", headers, timeout) # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!") return response diff --git a/fba/instances.py b/fba/instances.py index af018e4..be8cd71 100644 --- a/fba/instances.py +++ b/fba/instances.py @@ -82,7 +82,7 @@ def has_pending_instance_data(domain: str) -> bool: return has_pending def update_instance_data(domain: str): - # DEBUG: print(f"DEBUG: domain={domain} - CALLED!") + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") if type(domain) != str: raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") elif domain == "": -- 2.39.5