]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Sun, 28 May 2023 12:06:53 +0000 (14:06 +0200)
committerRoland Häder <roland@mxchange.org>
Sun, 28 May 2023 12:20:02 +0000 (14:20 +0200)
- encapsulted into function add_peers()
- need to add "Content-Type: application/json" for API requests, thanks to Kromonos
- introduced 'api_headers' for JSON API requests

fba.py
fetch_blocks.py

diff --git a/fba.py b/fba.py
index 82bf0290aa9e4147f25657a5987c4bb039eeb9c2..5ea42a05e4d4edd5f7d53cb84cf6e52ff3e681b5 100644 (file)
--- a/fba.py
+++ b/fba.py
@@ -43,9 +43,14 @@ nodeinfo_identifier = [
     "http://nodeinfo.diaspora.software/ns/schema/1.0",
 ]
 
-# HTTP headers for requests
+# HTTP headers for non-API requests
 headers = {
-    "user-agent": config["useragent"],
+    "User-Agent": config["useragent"],
+}
+# HTTP headers for API requests
+api_headers = {
+    "User-Agent": config["useragent"],
+    "Content-Type": "application/json",
 }
 
 # Found info from node, such as nodeinfo URL, detection mode that needs to be
@@ -105,6 +110,18 @@ patterns = [
     re.compile("^[a-f0-9]{7}$"),
 ]
 
+def add_peers(rows: dict) -> dict:
+    # DEBUG: print(f"DEBUG: rows()={len(rows)} - CALLED!")
+    peers = {}
+    for element in ["linked", "allowed", "blocked"]:
+        # DEBUG: print(f"DEBUG: Checking element='{element}'")
+        if element in rows and rows[element] != None:
+            # DEBUG: print(f"DEBUG: Adding {len(rows[element])} peer(s) to peers list ...")
+            peers = {**peers, **rows[element]}
+
+    # DEBUG: print(f"DEBUG: peers()={len(peers)} - CALLED!")
+    return peers
+
 def remove_version(software: str) -> str:
     # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
     if not "." in software and " " not in software:
@@ -342,23 +359,26 @@ def get_peers(domain: str, software: str) -> list:
         while True:
             if counter == 0:
                 fetched = post_json_api(domain, "/api/federation/instances", json.dumps({
-                    "sort" : "+caughtAt",
+                    "sort" : "+pubAt",
                     "host" : None,
                     "limit": step
-                }))
+                }), {"Origin": domain})
             else:
                 fetched = post_json_api(domain, "/api/federation/instances", json.dumps({
-                    "sort"  : "+caughtAt",
+                    "sort"  : "+pubAt",
                     "host"  : None,
                     "limit" : step,
                     "offset": counter - 1
-                }))
+                }), {"Origin": domain})
 
             # DEBUG: print("DEBUG: fetched():", len(fetched))
             if len(fetched) == 0:
                 # DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain)
                 break
 
+            # DEBUG: print("DEBUG: Raising counter by step:", step)
+            counter = counter + step
+
             # Check records
             for row in fetched:
                 # DEBUG: print(f"DEBUG: row():{len(row)}")
@@ -371,19 +391,21 @@ def get_peers(domain: str, software: str) -> list:
     elif software == "lemmy":
         # DEBUG: print(f"DEBUG: domain='{domain}' is Lemmy, fetching JSON ...")
         try:
-            res = reqto.get(f"https://{domain}/api/v3/site", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
+            res = reqto.get(f"https://{domain}/api/v3/site", headers=api_headers, timeout=(config["connection_timeout"], config["read_timeout"]))
 
             # DEBUG: print(f"DEBUG: res.ok={res.ok},res.status_code='{res.status_code}'")
-            if res.ok and isinstance(res.json(), dict):
-                # DEBUG: print("DEBUG: Success, res.json():", len(res.json()))
-                data = res.json()
-
-                if "federated_instances" in data and "linked" in data["federated_instances"]:
-                    # DEBUG: print("DEBUG: Found federated_instances", domain)
-                    peers = data["federated_instances"]["linked"] + data["federated_instances"]["allowed"] + data["federated_instances"]["blocked"]
+            if not res.ok or res.status_code >= 400:
+                print("WARNING: Could not reach any JSON API:", domain)
+                update_last_error(domain, res)
+            elif "federated_instances" in res.json():
+                # DEBUG: print("DEBUG: Found federated_instances", domain)
+                peers = peers + add_peers(res.json()["federated_instances"])
+            else:
+                print("WARNING: JSON response does not contain 'federated_instances':", domain)
+                update_last_error(domain, res)
 
         except BaseException as e:
-            print("WARNING: Exception during fetching JSON:", domain, e)
+            print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception:'{e}'")
 
         update_last_nodeinfo(domain)
 
@@ -397,7 +419,7 @@ def get_peers(domain: str, software: str) -> list:
             # DEBUG: print(f"DEBUG: domain='{domain}',mode='{mode}'")
             while True:
                 try:
-                    res = reqto.get(f"https://{domain}/api/v1/server/{mode}?start={start}&count=100", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
+                    res = reqto.get(f"https://{domain}/api/v1/server/{mode}?start={start}&count=100", headers=api_headers, timeout=(config["connection_timeout"], config["read_timeout"]))
 
                     # DEBUG: print(f"DEBUG: res.ok={res.ok},res.status_code='{res.status_code}'")
                     if res.ok and isinstance(res.json(), dict):
@@ -422,7 +444,7 @@ def get_peers(domain: str, software: str) -> list:
                         start = start + 100
 
                 except BaseException as e:
-                    print("WARNING: Exception during fetching JSON:", domain, e)
+                    print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception:'{e}'")
 
             update_last_nodeinfo(domain)
 
@@ -431,25 +453,19 @@ def get_peers(domain: str, software: str) -> list:
 
     # DEBUG: print(f"DEBUG: Fetching get_peers_url='{get_peers_url}' from '{domain}' ...")
     try:
-        res = reqto.get(f"https://{domain}{get_peers_url}", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
+        res = reqto.get(f"https://{domain}{get_peers_url}", headers=api_headers, timeout=(config["connection_timeout"], config["read_timeout"]))
 
         # DEBUG: print(f"DEBUG: res.ok={res.ok},res.status_code={res.status_code}")
         if not res.ok or res.status_code >= 400:
             # DEBUG: print(f"DEBUG: Was not able to fetch '{get_peers_url}', trying alternative ...")
-            res = reqto.get(f"https://{domain}/api/v3/site", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
+            res = reqto.get(f"https://{domain}/api/v3/site", headers=api_headers, timeout=(config["connection_timeout"], config["read_timeout"]))
 
             if not res.ok or res.status_code >= 400:
                 print("WARNING: Could not reach any JSON API:", domain)
                 update_last_error(domain, res)
-            elif "federated_instances" in res.json() and "linked" in res.json()["federated_instances"]:
+            elif "federated_instances" in res.json():
                 # DEBUG: print("DEBUG: Found federated_instances", domain)
-                data = res.json()
-
-                for element in ["linked", "allowed", "blocked"]:
-                    # DEBUG: print(f"DEBUG: Checking element='{element}'")
-                    if element in data["federated_instances"] and data["federated_instances"][element] != None:
-                        print(f"DEBUG Adding {len(data['federated_instances'][element])} peer(s) to peers list ...")
-                        peers = peers + data["federated_instances"][element]
+                peers = peers + add_peers(res.json()["federated_instances"])
             else:
                 print("WARNING: JSON response does not contain 'federated_instances':", domain)
                 update_last_error(domain, res)
@@ -467,11 +483,11 @@ def get_peers(domain: str, software: str) -> list:
     # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
     return peers
 
-def post_json_api(domain: str, path: str, parameter: str) -> list:
-    # DEBUG: print("DEBUG: Sending POST to domain,path,parameter:", domain, path, parameter)
+def post_json_api(domain: str, path: str, parameter: str, extra_headers: dict = {}) -> list:
+    # DEBUG: print("DEBUG: Sending POST to domain,path,parameter:", domain, path, parameter, extra_headers)
     data = {}
     try:
-        res = reqto.post(f"https://{domain}{path}", data=parameter, headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
+        res = reqto.post(f"https://{domain}{path}", data=parameter, headers={**api_headers, **extra_headers}, timeout=(config["connection_timeout"], config["read_timeout"]))
 
         # DEBUG: print(f"DEBUG: res.ok={res.ok},res.status_code={res.status_code}")
         if not res.ok or res.status_code >= 400:
@@ -510,7 +526,7 @@ def fetch_nodeinfo(domain: str) -> list:
     for request in requests:
         try:
             # DEBUG: print("DEBUG: Fetching request:", request)
-            res = reqto.get(request, headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
+            res = reqto.get(request, headers=api_headers, timeout=(config["connection_timeout"], config["read_timeout"]))
 
             # DEBUG: print(f"DEBUG: res.ok={res.ok},res.status_code={res.status_code}")
             if res.ok and isinstance(res.json(), dict):
@@ -537,7 +553,7 @@ def fetch_wellknown_nodeinfo(domain: str) -> list:
     data = {}
 
     try:
-        res = reqto.get(f"https://{domain}/.well-known/nodeinfo", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
+        res = reqto.get(f"https://{domain}/.well-known/nodeinfo", headers=api_headers, timeout=(config["connection_timeout"], config["read_timeout"]))
         # DEBUG: print("DEBUG: domain,res.ok,res.json[]:", domain, res.ok, type(res.json()))
         if res.ok and isinstance(res.json(), dict):
             nodeinfo = res.json()
@@ -815,9 +831,9 @@ def add_instance(domain: str, origin: str, originator: str):
         )
 
         for key in nodeinfos:
-            p# DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',nodeinfos[key]={nodeinfos[key]}")
+            # DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',nodeinfos[key]={nodeinfos[key]}")
             if domain in nodeinfos[key]:
-                p# DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo being updated ...")
+                # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo being updated ...")
                 update_nodeinfos(domain)
                 remove_pending_error(domain)
                 break
@@ -856,7 +872,7 @@ def send_bot_post(instance: str, blocks: dict):
     if truncated:
         message = message + "(the list has been truncated to the first 20 entries)"
 
-    botheaders = {**headers, **{"Authorization": "Bearer " + config["bot_token"]}}
+    botheaders = {**api_headers, **{"Authorization": "Bearer " + config["bot_token"]}}
 
     req = reqto.post(
         f"{config['bot_instance']}/api/v1/statuses",
@@ -963,21 +979,21 @@ def get_misskey_blocks(domain: str) -> dict:
         try:
             if counter == 0:
                 # DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
-                doc = post_json_api(domain, "/api/federation/instances/", json.dumps({
-                    "sort"     : "+caughtAt",
+                doc = post_json_api(domain, "/api/federation/instances", json.dumps({
+                    "sort"     : "+pubAt",
                     "host"     : None,
                     "suspended": True,
                     "limit"    : step
-                }))
+                }), {"Origin": domain})
             else:
                 # DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
-                doc = post_json_api(domain, "/api/federation/instances/", json.dumps({
-                    "sort"     : "+caughtAt",
+                doc = post_json_api(domain, "/api/federation/instances", json.dumps({
+                    "sort"     : "+pubAt",
                     "host"     : None,
                     "suspended": True,
                     "limit"    : step,
-                    "offset"   : counter-1
-                }))
+                    "offset"   : counter - 1
+                }), {"Origin": domain})
 
             # DEBUG: print("DEBUG: doc():", len(doc))
             if len(doc) == 0:
@@ -1014,20 +1030,20 @@ def get_misskey_blocks(domain: str) -> dict:
             if counter == 0:
                 # DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
                 doc = post_json_api(domain,"/api/federation/instances", json.dumps({
-                    "sort"   : "+caughtAt",
+                    "sort"   : "+pubAt",
                     "host"   : None,
                     "blocked": True,
                     "limit"  : step
-                }))
+                }), {"Origin": domain})
             else:
                 # DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
                 doc = post_json_api(domain,"/api/federation/instances", json.dumps({
-                    "sort"   : "+caughtAt",
+                    "sort"   : "+pubAt",
                     "host"   : None,
                     "blocked": True,
                     "limit"  : step,
                     "offset" : counter-1
-                }))
+                }), {"Origin": domain})
 
             # DEBUG: print("DEBUG: doc():", len(doc))
             if len(doc) == 0:
index ca6dc1d17c2144259abd43a0b2f1702aa35215d7..5cda8284cb155e0cecd8fd51d74b1ae1a7ef0658 100644 (file)
@@ -176,10 +176,10 @@ for blocker, software in rows:
                 try:
                     csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
                     # NOISY-DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf)
-                    reqheaders = {**fba.headers, **{"x-csrf-token": csrf}}
+                    reqheaders = {**fba.api_headers, **{"X-CSRF-Token": csrf}}
                 except:
                     # NOISY-DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker)
-                    reqheaders = fba.headers
+                    reqheaders = fba.api_headers
 
                 # NOISY-DEBUG: print("DEBUG: Quering API domain_blocks:", blocker)
                 blocks = reqto.get(f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json()
@@ -355,7 +355,7 @@ for blocker, software in rows:
         print("INFO: blocker:", blocker)
         try:
             # Blocks
-            federation = reqto.get(f"https://{blocker}{get_peers_url}?filter=suspended", headers=fba.headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json()
+            federation = reqto.get(f"https://{blocker}{get_peers_url}?filter=suspended", headers=fba.api_headers, timeout=(fba.config["connection_timeout"], config["read_timeout"])).json()
 
             if (federation == None):
                 print("WARNING: No valid response:", blocker);