]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Sun, 28 May 2023 13:19:41 +0000 (15:19 +0200)
committerRoland Häder <roland@mxchange.org>
Sun, 28 May 2023 13:19:41 +0000 (15:19 +0200)
- 100 rows should work! (the fail-safe check "fetched versus expected" will
  kick in here)
- also read origin and pass over 'origin' during fetching instances

config.defaults.json
fba.py
fetch_instances.py

index c5787e6b2369b96bfaf705708b7615931a8abda2..ca48eef9a11a09ecbc1b08d490de85e268c6ae1b 100644 (file)
@@ -12,5 +12,5 @@
     "slogan"            : "### Your footer slogan ###",
     "recheck_instance"  : 3600,
     "recheck_block"     : 3600,
-    "misskey_offset"    : 10
+    "misskey_offset"    : 100
 }
diff --git a/fba.py b/fba.py
index 5ea42a05e4d4edd5f7d53cb84cf6e52ff3e681b5..f92258048e9e5ad2f1a654acf9662b0d47a58359 100644 (file)
--- a/fba.py
+++ b/fba.py
@@ -354,10 +354,14 @@ def get_peers(domain: str, software: str) -> list:
     if software == "misskey":
         # DEBUG: print(f"DEBUG: domain='{domain}' is misskey, sending API POST request ...")
 
-        counter = 0
+        offset = 0
         step = config["misskey_offset"]
+        # iterating through all "suspended" (follow-only in its terminology)
+        # instances page-by-page, since that troonware doesn't support
+        # sending them all at once
         while True:
-            if counter == 0:
+            # DEBUG: print(f"DEBUG: Fetching offset='{offset}' from '{domain}' ...")
+            if offset == 0:
                 fetched = post_json_api(domain, "/api/federation/instances", json.dumps({
                     "sort" : "+pubAt",
                     "host" : None,
@@ -368,23 +372,30 @@ def get_peers(domain: str, software: str) -> list:
                     "sort"  : "+pubAt",
                     "host"  : None,
                     "limit" : step,
-                    "offset": counter - 1
+                    "offset": offset - 1
                 }), {"Origin": domain})
 
             # DEBUG: print("DEBUG: fetched():", len(fetched))
             if len(fetched) == 0:
                 # DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain)
                 break
-
-            # DEBUG: print("DEBUG: Raising counter by step:", step)
-            counter = counter + step
+            elif len(fetched) != config["misskey_offset"]:
+                print(f"WARNING: Fetched '{len(fetched)}' row(s) but expected: '{config['misskey_offset']}'")
+                offset = offset + (config["misskey_offset"] - len(fetched))
+            else:
+                # DEBUG: print("DEBUG: Raising offset by step:", step)
+                offset = offset + step
 
             # Check records
             for row in fetched:
                 # DEBUG: print(f"DEBUG: row():{len(row)}")
-                if "host" in row:
+                if "host" in row and is_blacklisted(row["host"]):
+                    print(f"WARNING: row[host]='{row['host']}' is blacklisted. domain='{domain}'")
+                elif "host" in row:
                     # DEBUG: print(f"DEBUG: Adding peer: '{row['host']}'")
                     peers.append(row["host"])
+                else:
+                    print(f"WARNING: row()={len(row)} does not contain element 'host': {row},domain='{domain}'")
 
         # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
         return peers
@@ -498,7 +509,7 @@ def post_json_api(domain: str, path: str, parameter: str, extra_headers: dict =
             data = res.json()
 
     except BaseException as e:
-        print("WARNING: Some error during post():", domain, path, parameter, e)
+        print(f"WARNING: Some error during post(): domain='{domain},path='{path}',parameter()={len(parameter)},exception:'{e}'")
 
     # DEBUG: print("DEBUG: Returning data():", len(data))
     return data
@@ -507,7 +518,7 @@ def fetch_nodeinfo(domain: str) -> list:
     # DEBUG: print("DEBUG: Fetching nodeinfo from domain:", domain)
 
     nodeinfo = fetch_wellknown_nodeinfo(domain)
-    # DEBUG: print("DEBUG:nodeinfo:", len(nodeinfo))
+    # DEBUG: print("DEBUG: nodeinfo:", len(nodeinfo))
 
     if len(nodeinfo) > 0:
         # DEBUG: print("DEBUG: Returning auto-discovered nodeinfo:", len(nodeinfo))
@@ -953,6 +964,7 @@ def get_friendica_blocks(domain: str) -> dict:
         return {}
 
     for line in blocklist.find("table").find_all("tr")[1:]:
+        # DEBUG: print(f"DEBUG: line='{line}'")
         blocks.append({
             "domain": tidyup(line.find_all("td")[0].text),
             "reason": tidyup(line.find_all("td")[1].text)
@@ -970,37 +982,44 @@ def get_misskey_blocks(domain: str) -> dict:
         "blocked"  : []
     }
 
-    counter = 0
+    offset = 0
     step = config["misskey_offset"]
     while True:
         # iterating through all "suspended" (follow-only in its terminology)
         # instances page-by-page, since that troonware doesn't support
         # sending them all at once
         try:
-            if counter == 0:
-                # DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
-                doc = post_json_api(domain, "/api/federation/instances", json.dumps({
+            print(f"DEBUG: Fetching offset='{offset}' from '{domain}' ...")
+            if offset == 0:
+                # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset)
+                fetched = post_json_api(domain, "/api/federation/instances", json.dumps({
                     "sort"     : "+pubAt",
                     "host"     : None,
                     "suspended": True,
                     "limit"    : step
                 }), {"Origin": domain})
             else:
-                # DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
-                doc = post_json_api(domain, "/api/federation/instances", json.dumps({
+                # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset)
+                fetched = post_json_api(domain, "/api/federation/instances", json.dumps({
                     "sort"     : "+pubAt",
                     "host"     : None,
                     "suspended": True,
                     "limit"    : step,
-                    "offset"   : counter - 1
+                    "offset"   : offset - 1
                 }), {"Origin": domain})
 
-            # DEBUG: print("DEBUG: doc():", len(doc))
-            if len(doc) == 0:
+            print("DEBUG: fetched():", len(fetched))
+            if len(fetched) == 0:
                 # DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain)
                 break
+            elif len(fetched) != config["misskey_offset"]:
+                print(f"WARNING: Fetched '{len(fetched)}' row(s) but expected: '{config['misskey_offset']}'")
+                offset = offset + (config["misskey_offset"] - len(fetched))
+            else:
+                # DEBUG: print("DEBUG: Raising offset by step:", step)
+                offset = offset + step
 
-            for instance in doc:
+            for instance in fetched:
                 # just in case
                 if instance["isSuspended"]:
                     blocks["suspended"].append(
@@ -1011,63 +1030,55 @@ def get_misskey_blocks(domain: str) -> dict:
                         }
                     )
 
-            if len(doc) < step:
-                # DEBUG: print("DEBUG: End of request:", len(doc), step)
-                break
-
-            # DEBUG: print("DEBUG: Raising counter by step:", step)
-            counter = counter + step
-
         except BaseException as e:
             print("WARNING: Caught error, exiting loop:", domain, e)
             update_last_error(domain, e)
-            counter = 0
+            offset = 0
             break
 
     while True:
         # same shit, different asshole ("blocked" aka full suspend)
         try:
-            if counter == 0:
-                # DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
-                doc = post_json_api(domain,"/api/federation/instances", json.dumps({
+            if offset == 0:
+                # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset)
+                fetched = post_json_api(domain,"/api/federation/instances", json.dumps({
                     "sort"   : "+pubAt",
                     "host"   : None,
                     "blocked": True,
                     "limit"  : step
                 }), {"Origin": domain})
             else:
-                # DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
-                doc = post_json_api(domain,"/api/federation/instances", json.dumps({
+                # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset)
+                fetched = post_json_api(domain,"/api/federation/instances", json.dumps({
                     "sort"   : "+pubAt",
                     "host"   : None,
                     "blocked": True,
                     "limit"  : step,
-                    "offset" : counter-1
+                    "offset" : offset-1
                 }), {"Origin": domain})
 
-            # DEBUG: print("DEBUG: doc():", len(doc))
-            if len(doc) == 0:
+            print("DEBUG: fetched():", len(fetched))
+            if len(fetched) == 0:
                 # DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain)
                 break
+            elif len(fetched) != config["misskey_offset"]:
+                print(f"WARNING: Fetched '{len(fetched)}' row(s) but expected: '{config['misskey_offset']}'")
+                offset = offset + (config["misskey_offset"] - len(fetched))
+            else:
+                # DEBUG: print("DEBUG: Raising offset by step:", step)
+                offset = offset + step
 
-            for instance in doc:
+            for instance in fetched:
                 if instance["isBlocked"]:
                     blocks["blocked"].append({
                         "domain": tidyup(instance["host"]),
                         "reason": None
                     })
 
-            if len(doc) < step:
-                # DEBUG: print("DEBUG: End of request:", len(doc), step)
-                break
-
-            # DEBUG: print("DEBUG: Raising counter by step:", step)
-            counter = counter + step
-
         except BaseException as e:
             print("ERROR: Exception during POST:", domain, e)
             update_last_error(domain, e)
-            counter = 0
+            offset = 0
             break
 
     # DEBUG: print("DEBUG: Returning for domain,blocked(),suspended():", domain, len(blocks["blocked"]), len(blocks["suspended"]))
index a53ef2213ef826c2b96a5858f168cd54d69daad4..fbd113afaafc6579a475a0ba3defba4fb79737b6 100644 (file)
@@ -53,7 +53,7 @@ fetch_instances(instance, None, None)
 
 # Loop through some instances
 fba.cursor.execute(
-    "SELECT domain,software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_nodeinfo IS NULL OR last_nodeinfo < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_instance"]]
+    "SELECT domain,origin,software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_nodeinfo IS NULL OR last_nodeinfo < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_instance"]]
 )
 
 rows = fba.cursor.fetchall()
@@ -64,7 +64,7 @@ for row in rows:
         print("WARNING: domain is blacklisted:", row[0])
         continue
 
-    print("INFO: Fetching instances for instance:", row[0])
-    fetch_instances(row[0], None, row[1])
+    print(f"INFO: Fetching instances for instance '{row[0]}'('{row[2]}') of origin '{row[1]}'")
+    fetch_instances(row[0], row[1], row[2])
 
 fba.connection.close()