]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Tue, 23 May 2023 18:02:26 +0000 (20:02 +0200)
committerRoland Häder <roland@mxchange.org>
Tue, 23 May 2023 18:02:26 +0000 (20:02 +0200)
- lemmy has an other API URL and also other JSON output for getting more peers
- Unfinished: Fetching blocks from Lemmy

fba.py
fetch_blocks.py
fetch_instances.py

diff --git a/fba.py b/fba.py
index 5de6ba35287ae6da3753b56dc400528afb096fda..fea6af29b903d5bfc24311bb31ecba3c4add7c1f 100644 (file)
--- a/fba.py
+++ b/fba.py
@@ -122,7 +122,7 @@ def update_nodeinfos(domain: str):
         print(f"ERROR: failed SQL query: domain='{domain}',sql='{sql}',exception='{e}'")
         sys.exit(255)
 
-    # NOISY-DEBUG: # NOISY-DEBUG: print("DEBUG: Deleting nodeinfos for domain:", domain)
+    # NOISY-DEBUG: print("DEBUG: Deleting nodeinfos for domain:", domain)
     for key in nodeinfos:
         try:
             # NOISY-DEBUG: print("DEBUG: Deleting key:", key)
@@ -133,12 +133,13 @@ def update_nodeinfos(domain: str):
     # NOISY-DEBUG: print("DEBUG: EXIT!")
 
 def update_last_error(domain: str, res: any):
-    # NOISY-DEBUG: print("DEBUG: domain,res.status_code:", domain, res.status_code, res.reason)
+    # NOISY-DEBUG: print("DEBUG: domain,res[]:", domain, type(res))
     try:
-        # NOISY-DEBUG: print("DEBUG: res[]:", type(res))
-        if isinstance(res, BaseException):
+        # NOISY-DEBUG: print("DEBUG: BEFORE res[]:", type(res))
+        if isinstance(res, BaseException) or isinstance(res, json.JSONDecodeError):
             res = str(res)
 
+        # NOISY-DEBUG: print("DEBUG: AFTER res[]:", type(res))
         if type(res) is str:
             cursor.execute("UPDATE instances SET last_status_code = 999, last_error_details = ?, last_updated = ? WHERE domain = ? LIMIT 1", [
                 res,
@@ -184,23 +185,49 @@ def update_last_nodeinfo(domain: str):
     connection.commit()
     # NOISY-DEBUG: print("DEBUG: EXIT!")
 
-def get_peers(domain: str) -> list:
-    # NOISY-DEBUG: print("DEBUG: Getting peers for domain:", domain)
+def get_peers(domain: str, software: str) -> list:
+    # NOISY-DEBUG: print("DEBUG: Getting peers for domain:", domain, software)
     peers = None
 
+    if software == "lemmy":
+        # NOISY-DEBUG: print(f"DEBUG: domain='{domain}' is Lemmy. fetching JSON ...")
+        try:
+            res = reqto.get(f"https://{domain}/api/v3/site", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
+
+            if res.ok and res.json() is not None:
+                # NOISY-DEBUG: print("DEBUG: Success, res.json():", len(res.json()))
+                json = res.json()
+
+                if "federated_instances" in json and "linked" in json["federated_instances"]:
+                    # NOISY-DEBUG: print("DEBUG: Found federated_instances", domain)
+                    peers = json["federated_instances"]["linked"] + json["federated_instances"]["allowed"] + json["federated_instances"]["blocked"]
+
+        except BaseException as e:
+            print("WARNING: Exception during fetching JSON:", domain, e)
+
+        # NOISY-DEBUG: print("DEBUG: Returning peers[]:", type(peers))
+        return peers
+
     try:
-        res = reqto.get(f"https://{domain}{get_peers_url}", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
+        res = reqto.get(f"https://{domain}/api/v1/instance/peers", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
 
         if not res.ok or res.status_code >= 400:
-            print("WARNING: Cannot fetch peers:", domain)
-            update_last_error(domain, res)
+            res = reqto.get(f"https://{domain}/api/v3/site", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
+
+            if "federated_instances" in json and "linked" in json["federated_instances"]:
+                # NOISY-DEBUG: print("DEBUG: Found federated_instances", domain)
+                peers = json["federated_instances"]["linked"] + json["federated_instances"]["allowed"] + json["federated_instances"]["blocked"]
+            else:
+                print("WARNING: Could not reach any JSON API:", domain)
+                update_last_error(domain, res)
         else:
             # NOISY-DEBUG: print("DEBUG: Querying API was successful:", domain, len(res.json()))
             peers = res.json()
             nodeinfos["get_peers_url"][domain] = get_peers_url
 
-    except:
-        print("WARNING: Some error during get():", domain)
+    except BaseException as e:
+        print("WARNING: Some error during get():", domain, e)
+        update_last_error(domain, e)
 
     update_last_nodeinfo(domain)
 
@@ -220,8 +247,8 @@ def post_json_api(domain: str, path: str, data: str) -> list:
 
         update_last_nodeinfo(domain)
         json = res.json()
-    except:
-        print("WARNING: Some error during post():", domain, path, data)
+    except BaseException as e:
+        print("WARNING: Some error during post():", domain, path, data, e)
 
     # NOISY-DEBUG: print("DEBUG: Returning json():", len(json))
     return json
@@ -252,7 +279,7 @@ def fetch_nodeinfo(domain: str) -> list:
             res = reqto.get(request, headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
 
             # NOISY-DEBUG: print("DEBUG: res.ok,res.json[]:", res.ok, type(res.json()))
-            if res.ok and res.json() is not None:
+            if res.ok and res.json() is dict:
                 # NOISY-DEBUG: print("DEBUG: Success:", request)
                 json = res.json()
                 nodeinfos["detection_mode"][domain] = "STATIC_CHECK"
@@ -281,8 +308,8 @@ def fetch_wellknown_nodeinfo(domain: str) -> list:
 
     try:
         res = reqto.get(f"https://{domain}/.well-known/nodeinfo", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
-        # NOISY-DEBUG: print("DEBUG: domain,res.ok:", domain, res.ok)
-        if res.ok and res.json() is not None:
+        # NOISY-DEBUG: print("DEBUG: domain,res.ok,res.json[]:", domain, res.ok, type(res.json()))
+        if res.ok and res.json() is dict:
             nodeinfo = res.json()
             # NOISY-DEBUG: print("DEBUG: Found entries:", len(nodeinfo), domain)
             if "links" in nodeinfo:
@@ -293,7 +320,7 @@ def fetch_wellknown_nodeinfo(domain: str) -> list:
                         # NOISY-DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"])
                         res = reqto.get(link["href"])
                         # NOISY-DEBUG: print("DEBUG: href,res.ok,res.status_code:", link["href"], res.ok, res.status_code)
-                        if res.ok and res.json() is not None:
+                        if res.ok and res.json() is dict:
                             # NOISY-DEBUG: print("DEBUG: Found JSON nodeinfo():", len(res.json()))
                             json = res.json()
                             nodeinfos["detection_mode"][domain] = "AUTO_DISCOVERY"
@@ -387,8 +414,8 @@ def update_block_reason(reason: str, blocker: str, blocked: str, block_level: st
         if cursor.rowcount == 0:
             print("WARNING: Did not update any rows:", domain)
 
-    except:
-        print("ERROR: failed SQL query:", reason, blocker, blocked, block_level)
+    except baseException as e:
+        print("ERROR: failed SQL query:", reason, blocker, blocked, block_level, e)
         sys.exit(255)
 
 def update_last_seen(blocker: str, blocked: str, block_level: str):
@@ -407,8 +434,8 @@ def update_last_seen(blocker: str, blocked: str, block_level: str):
         if cursor.rowcount == 0:
             print("WARNING: Did not update any rows:", domain)
 
-    except:
-        print("ERROR: failed SQL query:", last_seen, blocker, blocked, block_level)
+    except BaseException as e:
+        print("ERROR: failed SQL query:", last_seen, blocker, blocked, block_level, e)
         sys.exit(255)
 
     # NOISY-DEBUG: print("DEBUG: EXIT!")
@@ -436,8 +463,8 @@ def block_instance(blocker: str, blocked: str, reason: str, block_level: str):
              ),
         )
 
-    except:
-        print("ERROR: failed SQL query:", blocker, blocked, reason, block_level, first_added, last_seen)
+    except BaseException as e:
+        print("ERROR: failed SQL query:", blocker, blocked, reason, block_level, first_added, last_seen, e)
         sys.exit(255)
 
     # NOISY-DEBUG: print("DEBUG: EXIT!")
@@ -469,7 +496,7 @@ def add_instance(domain: str, origin: str, originator: str):
         )
 
         if domain in nodeinfos["nodeinfo_url"]:
-            # NOISY-DEBUG print("DEBUG: domain has pending nodeinfo being updated:", domain)
+            # NOISY-DEBUG # NOISY-DEBUG: print("DEBUG: domain has pending nodeinfo being updated:", domain)
             update_nodeinfos(domain)
 
         if domain in pending_errors:
@@ -546,8 +573,9 @@ def get_mastodon_blocks(domain: str) -> dict:
             reqto.get(f"https://{domain}/about/more", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])).text,
             "html.parser",
         )
-    except:
-        print("ERROR: Cannot fetch from domain:", domain)
+    except BaseException as e:
+        print("ERROR: Cannot fetch from domain:", domain, e)
+        update_last_error(domain, e)
         return {}
 
     for header in doc.find_all("h3"):
@@ -583,8 +611,9 @@ def get_friendica_blocks(domain: str) -> dict:
             reqto.get(f"https://{domain}/friendica", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])).text,
             "html.parser",
         )
-    except:
-        print("WARNING: Failed to fetch /friendica from domain:", domain)
+    except baseException as e:
+        print("WARNING: Failed to fetch /friendica from domain:", domain, e)
+        update_last_error(domain, e)
         return {}
 
     blocklist = doc.find(id="about_blocklist")
@@ -612,113 +641,111 @@ def get_misskey_blocks(domain: str) -> dict:
         "blocked"  : []
     }
 
-    try:
-        counter = 0
-        step = 99
-        while True:
-            # iterating through all "suspended" (follow-only in its terminology)
-            # instances page-by-page, since that troonware doesn't support
-            # sending them all at once
-            try:
-                if counter == 0:
-                    # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
-                    doc = post_json_api(domain, "/api/federation/instances/", json.dumps({
-                        "sort"     : "+caughtAt",
-                        "host"     : None,
-                        "suspended": True,
-                        "limit"    : step
-                    }))
-                else:
-                    # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
-                    doc = post_json_api(domain, "/api/federation/instances/", json.dumps({
-                        "sort"     : "+caughtAt",
-                        "host"     : None,
-                        "suspended": True,
-                        "limit"    : step,
-                        "offset"   : counter-1
-                    }))
-
-                # NOISY-DEBUG: print("DEBUG: doc():", len(doc))
-                if len(doc) == 0:
-                    # NOISY-DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain)
-                    break
-
-                for instance in doc:
-                    # just in case
-                    if instance["isSuspended"]:
-                        blocks["suspended"].append(
-                            {
-                                "domain": tidyup(instance["host"]),
-                                # no reason field, nothing
-                                "reason": ""
-                            }
-                        )
-
-                if len(doc) < step:
-                    # NOISY-DEBUG: print("DEBUG: End of request:", len(doc), step)
-                    break
-
-                # NOISY-DEBUG: print("DEBUG: Raising counter by step:", step)
-                counter = counter + step
-
-            except:
-                print("WARNING: Caught error, exiting loop:", domain)
-                counter = 0
+    counter = 0
+    step = 99
+    while True:
+        # iterating through all "suspended" (follow-only in its terminology)
+        # instances page-by-page, since that troonware doesn't support
+        # sending them all at once
+        try:
+            if counter == 0:
+                # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
+                doc = post_json_api(domain, "/api/federation/instances/", json.dumps({
+                    "sort"     : "+caughtAt",
+                    "host"     : None,
+                    "suspended": True,
+                    "limit"    : step
+                }))
+            else:
+                # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
+                doc = post_json_api(domain, "/api/federation/instances/", json.dumps({
+                    "sort"     : "+caughtAt",
+                    "host"     : None,
+                    "suspended": True,
+                    "limit"    : step,
+                    "offset"   : counter-1
+                }))
+
+            # NOISY-DEBUG: print("DEBUG: doc():", len(doc))
+            if len(doc) == 0:
+                # NOISY-DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain)
                 break
 
-        while True:
-            # same shit, different asshole ("blocked" aka full suspend)
-            try:
-                if counter == 0:
-                    # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
-                    doc = post_json_api(domain,"/api/federation/instances", json.dumps({
-                        "sort"   : "+caughtAt",
-                        "host"   : None,
-                        "blocked": True,
-                        "limit"  : step
-                    }))
-                else:
-                    # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
-                    doc = post_json_api(domain,"/api/federation/instances", json.dumps({
-                        "sort"   : "+caughtAt",
-                        "host"   : None,
-                        "blocked": True,
-                        "limit"  : step,
-                        "offset" : counter-1
-                    }))
-
-                # NOISY-DEBUG: print("DEBUG: doc():", len(doc))
-                if len(doc) == 0:
-                    # NOISY-DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain)
-                    break
-
-                for instance in doc:
-                    if instance["isBlocked"]:
-                        blocks["blocked"].append({
+            for instance in doc:
+                # just in case
+                if instance["isSuspended"]:
+                    blocks["suspended"].append(
+                        {
                             "domain": tidyup(instance["host"]),
+                            # no reason field, nothing
                             "reason": ""
-                        })
+                        }
+                    )
+
+            if len(doc) < step:
+                # NOISY-DEBUG: print("DEBUG: End of request:", len(doc), step)
+                break
+
+            # NOISY-DEBUG: print("DEBUG: Raising counter by step:", step)
+            counter = counter + step
+
+        except BaseException as e:
+            print("WARNING: Caught error, exiting loop:", domain, e)
+            update_last_error(domain, e)
+            counter = 0
+            break
 
-                if len(doc) < step:
-                    # NOISY-DEBUG: print("DEBUG: End of request:", len(doc), step)
-                    break
+    while True:
+        # same shit, different asshole ("blocked" aka full suspend)
+        try:
+            if counter == 0:
+                # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
+                doc = post_json_api(domain,"/api/federation/instances", json.dumps({
+                    "sort"   : "+caughtAt",
+                    "host"   : None,
+                    "blocked": True,
+                    "limit"  : step
+                }))
+            else:
+                # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
+                doc = post_json_api(domain,"/api/federation/instances", json.dumps({
+                    "sort"   : "+caughtAt",
+                    "host"   : None,
+                    "blocked": True,
+                    "limit"  : step,
+                    "offset" : counter-1
+                }))
+
+            # NOISY-DEBUG: print("DEBUG: doc():", len(doc))
+            if len(doc) == 0:
+                # NOISY-DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain)
+                break
 
-                # NOISY-DEBUG: print("DEBUG: Raising counter by step:", step)
-                counter = counter + step
+            for instance in doc:
+                if instance["isBlocked"]:
+                    blocks["blocked"].append({
+                        "domain": tidyup(instance["host"]),
+                        "reason": ""
+                    })
 
-            except:
-                counter = 0
+            if len(doc) < step:
+                # NOISY-DEBUG: print("DEBUG: End of request:", len(doc), step)
                 break
 
-        # NOISY-DEBUG: print("DEBUG: Returning for domain,blocked(),suspended():", domain, len(blocks["blocked"]), len(blocks["suspended"]))
-        return {
-            "reject"        : blocks["blocked"],
-            "followers_only": blocks["suspended"]
-        }
+            # NOISY-DEBUG: print("DEBUG: Raising counter by step:", step)
+            counter = counter + step
 
-    except:
-        print("WARNING: API request failed for domain:", domain)
-        return {}
+        except BaseException as e:
+            print("ERROR: Exception during POST:", domain, e)
+            update_last_error(domain, e)
+            counter = 0
+            break
+
+    # NOISY-DEBUG: print("DEBUG: Returning for domain,blocked(),suspended():", domain, len(blocks["blocked"]), len(blocks["suspended"]))
+    return {
+        "reject"        : blocks["blocked"],
+        "followers_only": blocks["suspended"]
+    }
 
 def tidyup(string: str) -> str:
     # some retards put their blocks in variable case
index a5aa9ea76e014e462c36dacd9875803e9d436049..4b4aba4dc1c19a09323e49dffadea5aa8b1ec6e4 100644 (file)
@@ -360,7 +360,7 @@ for blocker, software in fba.cursor.fetchall():
         print("INFO: blocker:", blocker)
         try:
             # Blocks
-            federation = reqto.get(f"https://{blocker}{get_peers_url}?filter=suspended", headers=fba.headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json()
+            federation = reqto.get(f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=fba.headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json()
 
             if (federation == None):
                 print("WARNING: No valid response:", blocker);
index 9aab60c1a15dedcf9e9a5b633c968a2c58197e53..285e8e33643953bf83f177253ee1e1e86e7b83c6 100644 (file)
@@ -4,8 +4,8 @@ import json
 import time
 import fba
 
-def fetch_instances(domain: str, origin: str):
-    # NOISY-DEBUG: print("DEBUG: domain,origin:", domain, origin)
+def fetch_instances(domain: str, origin: str, software: str):
+    # NOISY-DEBUG: print("DEBUG: domain,origin,software:", domain, origin, software)
     fba.cursor.execute(
         "SELECT domain FROM instances WHERE domain = ? LIMIT 1", [domain]
     )
@@ -14,8 +14,8 @@ def fetch_instances(domain: str, origin: str):
         # NOISY-DEBUG: print("DEBUG: Adding new domain:", domain, origin)
         fba.add_instance(domain, origin, sys.argv[0])
 
-    # NOISY-DEBUG: print("DEBUG: Fetching instances for domain:", domain, origin)
-    peerlist = fba.get_peers(domain)
+    # NOISY-DEBUG: print("DEBUG: Fetching instances for domain:", domain, software)
+    peerlist = fba.get_peers(domain, software)
 
     if (peerlist is None):
         print("ERROR: Cannot fetch peers:", domain)
@@ -55,21 +55,20 @@ def fetch_instances(domain: str, origin: str):
 instance = sys.argv[1]
 
 # Initial fetch
-fetch_instances(instance, None)
+fetch_instances(instance, None, None)
 
 # Loop through some instances
 fba.cursor.execute(
-    "SELECT domain FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_nodeinfo IS NULL OR last_nodeinfo < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_instance"]]
+    "SELECT domain,software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_nodeinfo IS NULL OR last_nodeinfo < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_instance"]]
 )
 
 for row in fba.cursor.fetchall():
-    domain = row[0]
-    # NOISY-DEBUG: print("DEBUG: domain:", domain)
-    if fba.is_blacklisted(domain):
-        print("WARNING: domain is blacklisted:", domain)
+    # NOISY-DEBUG: print("DEBUG: domain:", row[0])
+    if fba.is_blacklisted(row[0]):
+        print("WARNING: domain is blacklisted:", row[0])
         continue
 
-    print("INFO: Fetching instances for instance:", domain)
-    fetch_instances(domain, None)
+    print("INFO: Fetching instances for instance:", row[0])
+    fetch_instances(row[0], None, row[1])
 
 fba.connection.close()