From: Roland Häder Date: Fri, 8 Aug 2025 16:46:22 +0000 (+0200) Subject: Continued: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=a2c9819f72fdc1c88f3d109f0f915ae69674587a;p=fba.git Continued: - misskey needs some random sleep, too, so let's externalize those hardcoded values first and then apply them, too - if any other software can be such a mess, then it is misskey and its deviates --- diff --git a/blocks_empty.db b/blocks_empty.db index 716df99..28875db 100644 Binary files a/blocks_empty.db and b/blocks_empty.db differ diff --git a/config.defaults.json b/config.defaults.json index a616c72..28180d0 100644 --- a/config.defaults.json +++ b/config.defaults.json @@ -28,6 +28,8 @@ "theme" : "light", "allow_i2p_domain" : false, "instances_social_api_key": "", + "low_sleep" : 3, + "high_sleep" : 5, "max_crawl_depth" : 2000, "min_peers_length" : 1000 } diff --git a/fba/commands.py b/fba/commands.py index 8610bf0..7561348 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -19,9 +19,9 @@ import json import logging import argparse +import numpy import time import urllib -import numpy import atoma import bs4 @@ -628,8 +628,9 @@ def fetch_observer(args: argparse.Namespace) -> int: logger.info("Fetching instances for domain='%s',software='%s' ...", domain, software) federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name) - logger.debug("Random sleep to mitigate source's automatic blocking (sorry) ...") - time.sleep(default_rng.integers(low=1, high=3)) + sleep = default_rng.integers(low=config.get("low_sleep"), high=config.get("high_sleep")) + logger.debug("Random sleep of %d seconds to mitigate source's automatic blocking (sorry) ...", sleep) + time.sleep(sleep) logger.debug("Success! - EXIT!") return 0 diff --git a/fba/models/instances.py b/fba/models/instances.py index 3ac33dc..50789fc 100644 --- a/fba/models/instances.py +++ b/fba/models/instances.py @@ -69,6 +69,8 @@ _pending = { "last_status_code" : {}, # Last error details "last_error_details" : {}, + # Last offset + "last_offset" : {}, # Wether obfuscation has been used "has_obfuscation" : {}, # Original software @@ -456,6 +458,36 @@ def set_last_response_time(domain: str, response_time: float) -> None: _set_pending_data("last_response_time", domain, response_time) logger.debug("EXIT!") +def get_last_offset(domain: str) -> int: + logger.debug("domain='%s' - CALLED!", domain) + domain_helper.raise_on(domain) + + database.cursor.execute("SELECT last_offset FROM instances WHERE domain = ? LIMIT 1", [domain]) + + offset = 0 + + row = database.cursor.fetchone() + logger.debug("row[%s]='%s'", type(row), row) + + if row is not None: + offfset = row["last_offset"] + + logger.debug("offset=%d - EXIT!", offset) + return offset + +def set_last_offset(domain: str, offset: int) -> None: + logger.debug("domain='%s',offset=%d - CALLED!", domain, offset) + domain_helper.raise_on(domain) + + if not isinstance(offset, int): + raise TypeError(f"offset[]='{type(offset)}' has not expected type 'float'") + elif offset < 0: + raise ValueError(f"offset={offset} is below zero") + + # Set timestamp + _set_pending_data("last_offset", domain, offset) + logger.debug("EXIT!") + def set_last_requested_path(domain: str, path: float) -> None: logger.debug("domain='%s',path=%s - CALLED!", domain, path) domain_helper.raise_on(domain) diff --git a/fba/networks/misskey.py b/fba/networks/misskey.py index 4c4b99d..279e326 100644 --- a/fba/networks/misskey.py +++ b/fba/networks/misskey.py @@ -17,6 +17,9 @@ import json import logging +import numpy +import time + from fba.helpers import blacklist from fba.helpers import config from fba.helpers import dicts as dict_helper @@ -32,6 +35,9 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) #logger.setLevel(logging.DEBUG) +# Default random number generator +default_rng = numpy.random.default_rng() + def fetch_peers(domain: str) -> list: logger.debug("domain='%s' - CALLED!", domain) domain_helper.raise_on(domain) @@ -66,16 +72,18 @@ def fetch_peers(domain: str) -> list: logger.debug("Fetching offset=%d from domain='%s' ...", offset, domain) if offset == 0: fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({ - "sort" : "+pubSub", - "host" : None, - "limit": step + "allowPartial": True, + "sort" : "+pubSub", + "host" : None, + "limit" : step }), headers) else: fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({ - "sort" : "+pubSub", - "host" : None, - "limit" : step, - "offset": offset - 1 + "allowPartial": True, + "sort" : "+pubSub", + "host" : None, + "limit" : step, + "offset" : offset - 1 }), headers) # Check records @@ -157,7 +165,7 @@ def fetch_blocks(domain: str) -> list: return [] blocklist = [] - offset = 0 + offset = instances.get_last_offset(domain) step = config.get("misskey_limit") # iterating through all "suspended" (follow-only in its terminology) @@ -170,17 +178,19 @@ def fetch_blocks(domain: str) -> list: if offset == 0: logger.debug("Sending JSON API request to domain='%s',step=%d ...", domain, step) fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({ - "sort" : "+pubSub", - "host" : None, - "limit" : step + "allowPartial": True, + "sort" : "+pubSub", + "host" : None, + "limit" : step }), headers) else: logger.debug("Sending JSON API request to domain='%s',step=%d,offset=%d ...", domain, step, offset) fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({ - "sort" : "+pubSub", - "host" : None, - "limit" : step, - "offset" : offset - 1 + "allowPartial": True, + "sort" : "+pubSub", + "host" : None, + "limit" : step, + "offset" : offset - 1 }), headers) logger.debug("fetched[]='%s'", type(fetched)) @@ -259,19 +269,25 @@ def fetch_blocks(domain: str) -> list: "block_level": "silenced", }) else: - logger.debug("domain='%s',blocked='%s' is not marked suspended - SKIPPED!", domain, blocked) + count = count + 1 + logger.debug("domain='%s',blocked='%s' is not marked suspended, blocked or silenced - SKIPPED!", domain, blocked) continue logger.debug("count=%d", count) if count == 0: logger.debug("API is no more returning new instances, aborting loop! domain='%s'", domain) + instances.set_last_offset(domain, 0) break except network.exceptions as exception: logger.warning("Caught error, exiting loop: domain='%s',exception[%s]='%s'", domain, type(exception), str(exception)) + instances.set_last_offset(domain, offset) instances.set_last_error(domain, exception) - offset = 0 break + sleep = default_rng.integers(low=config.get("low_sleep"), high=config.get("high_sleep")) + logger.debug("Random sleep of %d seconds to mitigate source's automatic blocking, offset=%d ...", sleep, offset) + time.sleep(sleep) + logger.debug("blocklist()=%d - EXIT!", len(blocklist)) return blocklist