]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Fri, 8 Aug 2025 16:46:22 +0000 (18:46 +0200)
committerRoland Häder <roland@mxchange.org>
Fri, 8 Aug 2025 17:03:03 +0000 (19:03 +0200)
- misskey needs some random sleep, too, so let's externalize those hardcoded
  values first and then apply them, too
- if any other software can be such a mess, then it is misskey and its deviates

blocks_empty.db
config.defaults.json
fba/commands.py
fba/models/instances.py
fba/networks/misskey.py

index 716df9923b70e41700066e566a0d6922a9c412ed..28875db8a72fb8a6de2d16d65ed47aa47e1875ca 100644 (file)
Binary files a/blocks_empty.db and b/blocks_empty.db differ
index a616c72d6e67113c03fe742809561fbf57ac5da7..28180d06e44def40eff45a47959c3fe56ed18bbc 100644 (file)
@@ -28,6 +28,8 @@
     "theme"                   : "light",
     "allow_i2p_domain"        : false,
     "instances_social_api_key": "",
+    "low_sleep"               : 3,
+    "high_sleep"              : 5,
     "max_crawl_depth"         : 2000,
     "min_peers_length"        : 1000
 }
index 8610bf0023a655d333999d2318df8005aa3d4b91..756134863cf32c2f056694aefb9b2f3a103c0d51 100644 (file)
@@ -19,9 +19,9 @@ import json
 import logging
 
 import argparse
+import numpy
 import time
 import urllib
-import numpy
 
 import atoma
 import bs4
@@ -628,8 +628,9 @@ def fetch_observer(args: argparse.Namespace) -> int:
             logger.info("Fetching instances for domain='%s',software='%s' ...", domain, software)
             federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
 
-        logger.debug("Random sleep to mitigate source's automatic blocking (sorry) ...")
-        time.sleep(default_rng.integers(low=1, high=3))
+        sleep = default_rng.integers(low=config.get("low_sleep"), high=config.get("high_sleep"))
+        logger.debug("Random sleep of %d seconds to mitigate source's automatic blocking (sorry) ...", sleep)
+        time.sleep(sleep)
 
     logger.debug("Success! - EXIT!")
     return 0
index 3ac33dc55b4548bfdf8709c45758be622a2ffaa9..50789fc6e456b29f832ec998d74dab4d4898ec57 100644 (file)
@@ -69,6 +69,8 @@ _pending = {
     "last_status_code"   : {},
     # Last error details
     "last_error_details" : {},
+    # Last offset
+    "last_offset"        : {},
     # Wether obfuscation has been used
     "has_obfuscation"    : {},
     # Original software
@@ -456,6 +458,36 @@ def set_last_response_time(domain: str, response_time: float) -> None:
     _set_pending_data("last_response_time", domain, response_time)
     logger.debug("EXIT!")
 
+def get_last_offset(domain: str) -> int:
+    logger.debug("domain='%s' - CALLED!", domain)
+    domain_helper.raise_on(domain)
+
+    database.cursor.execute("SELECT last_offset FROM instances WHERE domain = ? LIMIT 1", [domain])
+
+    offset = 0
+
+    row = database.cursor.fetchone()
+    logger.debug("row[%s]='%s'", type(row), row)
+
+    if row is not None:
+        offfset = row["last_offset"]
+
+    logger.debug("offset=%d - EXIT!", offset)
+    return offset
+
+def set_last_offset(domain: str, offset: int) -> None:
+    logger.debug("domain='%s',offset=%d - CALLED!", domain, offset)
+    domain_helper.raise_on(domain)
+
+    if not isinstance(offset, int):
+        raise TypeError(f"offset[]='{type(offset)}' has not expected type 'float'")
+    elif offset < 0:
+        raise ValueError(f"offset={offset} is below zero")
+
+    # Set timestamp
+    _set_pending_data("last_offset", domain, offset)
+    logger.debug("EXIT!")
+
 def set_last_requested_path(domain: str, path: float) -> None:
     logger.debug("domain='%s',path=%s - CALLED!", domain, path)
     domain_helper.raise_on(domain)
index 4c4b99d135c58aefc63a90b2df008574295890e3..279e326443d38e9ce3c346a96e73899271af8210 100644 (file)
@@ -17,6 +17,9 @@
 import json
 import logging
 
+import numpy
+import time
+
 from fba.helpers import blacklist
 from fba.helpers import config
 from fba.helpers import dicts as dict_helper
@@ -32,6 +35,9 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 #logger.setLevel(logging.DEBUG)
 
+# Default random number generator
+default_rng = numpy.random.default_rng()
+
 def fetch_peers(domain: str) -> list:
     logger.debug("domain='%s' - CALLED!", domain)
     domain_helper.raise_on(domain)
@@ -66,16 +72,18 @@ def fetch_peers(domain: str) -> list:
         logger.debug("Fetching offset=%d from domain='%s' ...", offset, domain)
         if offset == 0:
             fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
-                "sort" : "+pubSub",
-                "host" : None,
-                "limit": step
+                "allowPartial": True,
+                "sort"        : "+pubSub",
+                "host"        : None,
+                "limit"       : step
             }), headers)
         else:
             fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
-                "sort"  : "+pubSub",
-                "host"  : None,
-                "limit" : step,
-                "offset": offset - 1
+                "allowPartial": True,
+                "sort"        : "+pubSub",
+                "host"        : None,
+                "limit"       : step,
+                "offset"      : offset - 1
             }), headers)
 
         # Check records
@@ -157,7 +165,7 @@ def fetch_blocks(domain: str) -> list:
         return []
 
     blocklist = []
-    offset    = 0
+    offset    = instances.get_last_offset(domain)
     step      = config.get("misskey_limit")
 
     # iterating through all "suspended" (follow-only in its terminology)
@@ -170,17 +178,19 @@ def fetch_blocks(domain: str) -> list:
             if offset == 0:
                 logger.debug("Sending JSON API request to domain='%s',step=%d ...", domain, step)
                 fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
-                    "sort"     : "+pubSub",
-                    "host"     : None,
-                    "limit"    : step
+                    "allowPartial": True,
+                    "sort"        : "+pubSub",
+                    "host"        : None,
+                    "limit"       : step
                 }), headers)
             else:
                 logger.debug("Sending JSON API request to domain='%s',step=%d,offset=%d ...", domain, step, offset)
                 fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
-                    "sort"     : "+pubSub",
-                    "host"     : None,
-                    "limit"    : step,
-                    "offset"   : offset - 1
+                    "allowPartial": True,
+                    "sort"        : "+pubSub",
+                    "host"        : None,
+                    "limit"       : step,
+                    "offset"      : offset - 1
                 }), headers)
 
             logger.debug("fetched[]='%s'", type(fetched))
@@ -259,19 +269,25 @@ def fetch_blocks(domain: str) -> list:
                         "block_level": "silenced",
                     })
                 else:
-                    logger.debug("domain='%s',blocked='%s' is not marked suspended - SKIPPED!", domain, blocked)
+                    count = count + 1
+                    logger.debug("domain='%s',blocked='%s' is not marked suspended, blocked or silenced - SKIPPED!", domain, blocked)
                     continue
 
             logger.debug("count=%d", count)
             if count == 0:
                 logger.debug("API is no more returning new instances, aborting loop! domain='%s'", domain)
+                instances.set_last_offset(domain, 0)
                 break
 
         except network.exceptions as exception:
             logger.warning("Caught error, exiting loop: domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
+            instances.set_last_offset(domain, offset)
             instances.set_last_error(domain, exception)
-            offset = 0
             break
 
+        sleep = default_rng.integers(low=config.get("low_sleep"), high=config.get("high_sleep"))
+        logger.debug("Random sleep of %d seconds to mitigate source's automatic blocking, offset=%d ...", sleep, offset)
+        time.sleep(sleep)
+
     logger.debug("blocklist()=%d - EXIT!", len(blocklist))
     return blocklist