]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Tue, 6 Jun 2023 07:19:16 +0000 (09:19 +0200)
committerRoland Häder <roland@mxchange.org>
Tue, 6 Jun 2023 08:02:55 +0000 (10:02 +0200)
- renamed has_element() to has_key()
- fixed bad handling of has_key()
- also need to count added names and exit loop on zero

fba/boot.py
fba/commands.py
fba/fba.py
fba/instances.py

index 01fda7bcea3ceeebbbb81629a823f72ad91dfb64..13a233d5b3e86ad8f3b2bbd9b5b893378ce4179a 100644 (file)
@@ -42,7 +42,7 @@ def init_parser():
     # DEBUG: print("DEBUG: init_parser(): CALLED!")
     global _PARSER
 
-    print("DEBUG: Initializing parser ...")
+    # DEBUG: print("DEBUG: Initializing parser ...")
     _PARSER = argparse.ArgumentParser(
         description="Fetches block reasons from the fediverse",
         epilog="Please note that some commands have optional arguments, you may want to try fba.py <command> --help to find them out.",
@@ -113,7 +113,7 @@ def init_parser():
 def run_command():
     # DEBUG: print("DEBUG: run_command(): CALLED!")
     args = _PARSER.parse_args()
-    print(f"DEBUG: args[{type(args)}]={args}")
+    # DEBUG: print(f"DEBUG: args[{type(args)}]={args}")
     status = args.command(args)
     # DEBUG: print("DEBUG: status={status} - EXIT!")
     return status if type(status) == int else 0
index c702e0c18b10888dcc963c7d185a75b63ca73f14..6596045e09fa12e64975da89d73d6a61d1941de8 100644 (file)
@@ -59,9 +59,9 @@ def fetch_bkali(args: argparse.Namespace):
         if len(fetched) == 0:
             raise Exception("WARNING: Returned no records")
         elif not "data" in fetched:
-            raise Exception(f"WARNING: fetched()={len(fetched)} does not contain element 'data'")
+            raise Exception(f"WARNING: fetched()={len(fetched)} does not contain key 'data'")
         elif not "nodeinfo" in fetched["data"]:
-            raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain element 'nodeinfo'")
+            raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain key 'nodeinfo'")
 
         for entry in fetched["data"]["nodeinfo"]:
             # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
@@ -533,7 +533,7 @@ def fetch_blocks(args: argparse.Namespace):
             print("INFO: blocker:", blocker)
             try:
                 # Blocks
-                federation = fba.get_response(blocker, "{fba.get_peers_url}?filter=suspended", fba.api_headers, (config.get("connection_timeout"), config.get("read_timeout"))).json()
+                federation = fba.get_response(blocker, f"{fba.get_peers_url}?filter=suspended", fba.api_headers, (config.get("connection_timeout"), config.get("read_timeout"))).json()
 
                 if (federation == None):
                     print("WARNING: No valid response:", blocker);
@@ -669,7 +669,7 @@ def fetch_fba_rss(args: argparse.Namespace):
 
     try:
         print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
-        response = fba.get_url(args.feed, fba.headers, config.get("connection_timeout"), config.get("read_timeout"))
+        response = fba.get_url(args.feed, fba.headers, (config.get("connection_timeout"), config.get("read_timeout")))
 
         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
         if response.ok and response.status_code < 300 and len(response.text) > 0:
@@ -691,7 +691,7 @@ def fetch_fba_rss(args: argparse.Namespace):
                     # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
                     continue
 
-                # DEBUG: print(f"DEBUG: domain='{domain}'")
+                # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
                 domains.append(domain)
 
     except BaseException as e:
@@ -729,28 +729,29 @@ def fetch_fbabot_atom(args: argparse.Namespace):
                 doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
                 # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
                 for element in doc.findAll("a"):
-                    # DEBUG: print(f"DEBUG: element[{type(element)}]={element}")
-                    domain = fba.tidyup_domain(element["href"])
+                    for href in element["href"].split(","):
+                        # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
+                        domain = fba.tidyup_domain(href)
 
-                    # DEBUG: print(f"DEBUG: domain='{domain}'")
-                    if fba.is_blacklisted(domain):
-                        # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
-                        continue
-                    elif domain in domains:
-                        # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
-                        continue
-                    elif fba.is_instance_registered(domain):
-                        # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
-                        continue
+                        # DEBUG: print(f"DEBUG: domain='{domain}'")
+                        if fba.is_blacklisted(domain):
+                            # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
+                            continue
+                        elif domain in domains:
+                            # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
+                            continue
+                        elif fba.is_instance_registered(domain):
+                            # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
+                            continue
 
-                    # DEBUG: print(f"DEBUG: domain='{domain}'")
-                    domains.append(domain)
+                        # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
+                        domains.append(domain)
 
     except BaseException as e:
         print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'")
         sys.exit(255)
 
-    # DEBUG: print(f"DEBUG: domains()={len(domains)}")
+    # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
     if len(domains) > 0:
         boot.acquire_lock()
 
index d4df7c8037334ae26e593279a43b679b66eed26a..ba1b52f4f4d2da0c83b73ecced070cdd1ae48044 100644 (file)
@@ -23,9 +23,10 @@ import json
 import sqlite3
 import sys
 import time
-import urllib
 import validators
 
+from urllib.parse import urlparse
+
 from fba import cache
 from fba import config
 from fba import instances
@@ -122,11 +123,11 @@ patterns = [
 ##### Other functions #####
 
 def is_primitive(var: any) -> bool:
-    # NOISY-DEBUG: # DEBUG: print(f"DEBUG: var[]='{type(var)}' - CALLED!")
+    # DEBUG: print(f"DEBUG: var[]='{type(var)}' - CALLED!")
     return type(var) in {int, str, float, bool} or var == None
 
 def fetch_instances(domain: str, origin: str, software: str, script: str, path: str = None):
-    # DEBUG: print(f"DEBUG: domain={domain},origin={origin},software={software},path={path} - CALLED!")
+    # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',software='{software}',path='{path}' - CALLED!")
     if type(domain) != str:
         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
     elif domain == "":
@@ -186,11 +187,11 @@ def fetch_instances(domain: str, origin: str, software: str, script: str, path:
 def add_peers(rows: dict) -> list:
     # DEBUG: print(f"DEBUG: rows()={len(rows)} - CALLED!")
     peers = list()
-    for element in ["linked", "allowed", "blocked"]:
-        # DEBUG: print(f"DEBUG: Checking element='{element}'")
-        if element in rows and rows[element] != None:
-            # DEBUG: print(f"DEBUG: Adding {len(rows[element])} peer(s) to peers list ...")
-            for peer in rows[element]:
+    for key in ["linked", "allowed", "blocked"]:
+        # DEBUG: print(f"DEBUG: Checking key='{key}'")
+        if key in rows and rows[key] != None:
+            # DEBUG: print(f"DEBUG: Adding {len(rows[key])} peer(s) to peers list ...")
+            for peer in rows[key]:
                 # DEBUG: print(f"DEBUG: peer='{peer}' - BEFORE!")
                 peer = tidyup_domain(peer)
 
@@ -438,7 +439,7 @@ def update_last_error(domain: str, response: requests.models.Response):
     # DEBUG: print("DEBUG: EXIT!")
 
 def update_last_instance_fetch(domain: str):
-    # DEBUG: print(f"DEBUG: domain={domain} - CALLED!")
+    # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
     if type(domain) != str:
         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
     elif domain == "":
@@ -454,7 +455,7 @@ def update_last_instance_fetch(domain: str):
     # DEBUG: print("DEBUG: EXIT!")
 
 def update_last_nodeinfo(domain: str):
-    # DEBUG: print(f"DEBUG: domain={domain} - CALLED!")
+    # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
     if type(domain) != str:
         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
     elif domain == "":
@@ -531,7 +532,7 @@ def get_peers(domain: str, software: str) -> list:
             for row in fetched:
                 # DEBUG: print(f"DEBUG: row():{len(row)}")
                 if not "host" in row:
-                    print(f"WARNING: row()={len(row)} does not contain element 'host': {row},domain='{domain}'")
+                    print(f"WARNING: row()={len(row)} does not contain key 'host': {row},domain='{domain}'")
                     continue
                 elif type(row["host"]) != str:
                     print(f"WARNING: row[host][]={type(row['host'])} is not 'str'")
@@ -772,7 +773,7 @@ def fetch_nodeinfo(domain: str, path: str = None) -> list:
     return data
 
 def fetch_wellknown_nodeinfo(domain: str) -> list:
-    # DEBUG: print(f"DEBUG: domain={domain} - CALLED!")
+    # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
     if type(domain) != str:
         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
     elif domain == "":
@@ -795,7 +796,7 @@ def fetch_wellknown_nodeinfo(domain: str) -> list:
                     # DEBUG: print("DEBUG: rel,href:", link["rel"], link["href"])
                     if link["rel"] in nodeinfo_identifier:
                         # DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"])
-                        response = get_url(link["href"])
+                        response = get_url(link["href"], api_headers, (config.get("connection_timeout"), config.get("read_timeout")))
 
                         data = json_from_response(response)
                         # DEBUG: print("DEBUG: href,response.ok,response.status_code:", link["href"], response.ok, response.status_code)
@@ -1104,7 +1105,7 @@ def block_instance(blocker: str, blocked: str, reason: str, block_level: str):
     # DEBUG: print("DEBUG: EXIT!")
 
 def is_instance_registered(domain: str) -> bool:
-    # DEBUG: print(f"DEBUG: domain={domain} - CALLED!")
+    # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
     if type(domain) != str:
         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
     elif domain == "":
@@ -1129,7 +1130,7 @@ def is_instance_registered(domain: str) -> bool:
     return registered
 
 def add_instance(domain: str, origin: str, originator: str, path: str = None):
-    # DEBUG: print(f"DEBUG: domain={domain},origin={origin},originator={originator},path={path} - CALLED!")
+    # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',originator='{originator}',path='{path}' - CALLED!")
     if type(domain) != str:
         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
     elif domain == "":
@@ -1225,7 +1226,7 @@ def send_bot_post(instance: str, blocks: dict):
     return True
 
 def get_mastodon_blocks(domain: str) -> dict:
-    # DEBUG: print(f"DEBUG: domain={domain} - CALLED!")
+    # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
     if type(domain) != str:
         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
     elif domain == "":
@@ -1275,7 +1276,7 @@ def get_mastodon_blocks(domain: str) -> dict:
     }
 
 def get_friendica_blocks(domain: str) -> dict:
-    # DEBUG: print(f"DEBUG: domain={domain} - CALLED!")
+    # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
     if type(domain) != str:
         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
     elif domain == "":
@@ -1314,7 +1315,7 @@ def get_friendica_blocks(domain: str) -> dict:
     }
 
 def get_misskey_blocks(domain: str) -> dict:
-    # DEBUG: print(f"DEBUG: domain={domain} - CALLED!")
+    # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
     if type(domain) != str:
         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
     elif domain == "":
@@ -1367,9 +1368,11 @@ def get_misskey_blocks(domain: str) -> dict:
                 # DEBUG: print("DEBUG: Raising offset by step:", step)
                 offset = offset + step
 
+            count = 0
             for instance in fetched:
-                # just in case
-                if instance["isSuspended"] and not has_element(blocks["suspended"], "domain", instance):
+                # Is it there?
+                if instance["isSuspended"] and not has_key(blocks["suspended"], "domain", instance):
+                    count = count + 1
                     blocks["suspended"].append(
                         {
                             "domain": tidyup_domain(instance["host"]),
@@ -1378,6 +1381,11 @@ def get_misskey_blocks(domain: str) -> dict:
                         }
                     )
 
+            # DEBUG: print(f"DEBUG: count={count}")
+            if count == 0:
+                # DEBUG: print(f"DEBUG: API is no more returning new instances, aborting loop!")
+                break
+
         except BaseException as e:
             print("WARNING: Caught error, exiting loop:", domain, e)
             update_last_error(domain, e)
@@ -1420,13 +1428,21 @@ def get_misskey_blocks(domain: str) -> dict:
                 # DEBUG: print("DEBUG: Raising offset by step:", step)
                 offset = offset + step
 
+            count = 0
             for instance in fetched:
-                if instance["isBlocked"] and not has_element(blocks["blocked"], "domain", instance):
+                # Is it there?
+                if instance["isBlocked"] and not has_key(blocks["blocked"], "domain", instance):
+                    count = count + 1
                     blocks["blocked"].append({
                         "domain": tidyup_domain(instance["host"]),
                         "reason": None
                     })
 
+            # DEBUG: print(f"DEBUG: count={count}")
+            if count == 0:
+                # DEBUG: print(f"DEBUG: API is no more returning new instances, aborting loop!")
+                break
+
         except BaseException as e:
             print("ERROR: Exception during POST:", domain, e)
             update_last_error(domain, e)
@@ -1524,22 +1540,24 @@ def get_response(domain: str, path: str, headers: dict, timeout: list) -> reques
     # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!")
     return response
 
-def has_element(elements: list, key: str, value: any) -> bool:
-    # DEBUG: print(f"DEBUG: element()={len(element)},key='{key}',value[]='{type(value)}' - CALLED!")
-    if type(key) != str:
-        raise ValueError(f"Parameter key[]='{type(key)}' is not 'str'")
-    elif key == "":
-        raise ValueError("Parameter 'key' cannot be empty")
+def has_key(keys: list, search: str, value: any) -> bool:
+    # DEBUG: print(f"DEBUG: keys()={len(keys)},search='{search}',value[]='{type(value)}' - CALLED!")
+    if type(keys) != list:
+        raise ValueError(f"Parameter keys[]='{type(keys)}' is not 'list'")
+    elif type(search) != str:
+        raise ValueError(f"Parameter search[]='{type(search)}' is not 'str'")
+    elif search == "":
+        raise ValueError("Parameter 'search' cannot be empty")
 
     has = False
-    # DEBUG: print(f"DEBUG: Checking elements()={len(elements)} ...")
-    for element in elements:
-        # DEBUG: print(f"DEBUG: element[]='{type(element)}'")
-        if type(element) != dict:
-            raise ValueError(f"element[]='{type(element)}' is not 'dict'")
-        elif not key in element:
-            raise KeyError(f"Cannot find key='{key}'")
-        elif element[key] == value:
+    # DEBUG: print(f"DEBUG: Checking keys()={len(keys)} ...")
+    for key in keys:
+        # DEBUG: print(f"DEBUG: key['{type(key)}']={key}")
+        if type(key) != dict:
+            raise ValueError(f"key[]='{type(key)}' is not 'dict'")
+        elif not search in key:
+            raise KeyError(f"Cannot find search='{search}'")
+        elif key[search] == value:
             has = True
             break
 
@@ -1598,21 +1616,22 @@ def find_domains(tag: bs4.element.Tag) -> list:
     # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!")
     return domains
 
-def get_url(url: str) -> requests.models.Response:
-    # DEBUG: print(f"DEBUG: url='{url}' - CALLED!")
+def get_url(url: str, headers: dict, timeout: list) -> requests.models.Response:
+    # DEBUG: print(f"DEBUG: url='{url}',headers()={len(headers)},timeout={timeout} - CALLED!")
     if type(url) != str:
         raise ValueError(f"Parameter url[]='{type(url)}' is not 'str'")
     elif url == "":
         raise ValueError("Parameter 'url' cannot be empty")
 
     # DEBUG: print(f"DEBUG: Parsing url='{url}'")
-    components = urllib.parse(url)
+    components = urlparse(url)
 
     # Invoke other function, avoid trailing ?
+    # DEBUG: print(f"DEBUG: components[{type(components)}]={components}")
     if components.query != "":
-        response = get_response(components.hostname, f"{components.path}?{components.query}")
+        response = get_response(components.hostname, f"{components.path}?{components.query}", headers, timeout)
     else:
-        response = get_response(components.hostname, f"{components.path}")
+        response = get_response(components.hostname, f"{components.path}", headers, timeout)
 
     # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!")
     return response
index af018e4fabdac74769ee97488b31aa04d585a8a4..be8cd719a5cd6cc0d975fb8d142ebfa3f1a546ad 100644 (file)
@@ -82,7 +82,7 @@ def has_pending_instance_data(domain: str) -> bool:
     return has_pending
 
 def update_instance_data(domain: str):
-    # DEBUG: print(f"DEBUG: domain={domain} - CALLED!")
+    # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
     if type(domain) != str:
         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
     elif domain == "":