From a50f503902489e80a441c67de9e31ff20df174e6 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Tue, 4 Jul 2023 12:03:00 +0200 Subject: [PATCH] Continued: - first (if needed) acquire lock, then check (if needed) api_domain - also check host name (components.netloc) for feed in fetch_fba_rss command --- fba/commands.py | 78 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 57 insertions(+), 21 deletions(-) diff --git a/fba/commands.py b/fba/commands.py index 45bc929..e6cfd2f 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -167,6 +167,9 @@ def fetch_pixelfed_api(args: argparse.Namespace) -> int: def fetch_bkali(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) + logger.debug("Invoking locking.acquire() ...") + locking.acquire() + api_domain = "gql.apis.bka.li" if apis.is_recent(api_domain): logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain) @@ -231,8 +234,6 @@ def fetch_bkali(args: argparse.Namespace) -> int: logger.debug("domains()=%d", len(domains)) if len(domains) > 0: - locking.acquire() - logger.info("Adding %d new instances ...", len(domains)) for domain in domains: try: @@ -260,6 +261,7 @@ def fetch_blocks(args: argparse.Namespace) -> int: logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain) return 102 + logger.debug("Invoking locking.acquire() ...") locking.acquire() if args.domain is not None and args.domain != "": @@ -429,6 +431,9 @@ def fetch_blocks(args: argparse.Namespace) -> int: def fetch_observer(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) + logger.debug("Invoking locking.acquire() ...") + locking.acquire() + api_domain = "fediverse.observer" if apis.is_recent(api_domain): logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain) @@ -437,9 +442,6 @@ def fetch_observer(args: argparse.Namespace) -> int: logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain) apis.update(api_domain) - # Acquire lock - locking.acquire() - types = list() if args.software is None: logger.info("Fetching software list ...") @@ -522,6 +524,9 @@ def fetch_observer(args: argparse.Namespace) -> int: def fetch_todon_wiki(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) + logger.debug("Invoking locking.acquire() ...") + locking.acquire() + api_domain = "wiki.todon.eu" if apis.is_recent(api_domain): logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain) @@ -530,8 +535,6 @@ def fetch_todon_wiki(args: argparse.Namespace) -> int: logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain) apis.update(api_domain) - locking.acquire() - blocklist = { "silenced": list(), "reject": list(), @@ -603,6 +606,10 @@ def fetch_todon_wiki(args: argparse.Namespace) -> int: def fetch_cs(args: argparse.Namespace): logger.debug("args[]='%s' - CALLED!", type(args)) + + logger.debug("Invoking locking.acquire() ...") + locking.acquire() + extensions = [ "extra", "abbr", @@ -659,7 +666,6 @@ def fetch_cs(args: argparse.Namespace): logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"])) blockdict = list() if len(domains) > 0: - locking.acquire() for block_level in domains: logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level])) @@ -702,8 +708,21 @@ def fetch_cs(args: argparse.Namespace): def fetch_fba_rss(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) + domains = list() + logger.debug("Invoking locking.acquire() ...") + locking.acquire() + + components = urlparse(args.feed) + + if apis.is_recent(components.netloc): + logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc) + return 0 + else: + logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc) + apis.update(components.netloc) + logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed) response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) @@ -739,8 +758,6 @@ def fetch_fba_rss(args: argparse.Namespace) -> int: logger.debug("domains()=%d", len(domains)) if len(domains) > 0: - locking.acquire() - logger.info("Adding %d new instances ...", len(domains)) for domain in domains: try: @@ -757,6 +774,9 @@ def fetch_fba_rss(args: argparse.Namespace) -> int: def fetch_fbabot_atom(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) + logger.debug("Invoking locking.acquire() ...") + locking.acquire() + api_domain = "ryana.agency" if apis.is_recent(api_domain): logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain) @@ -810,8 +830,6 @@ def fetch_fbabot_atom(args: argparse.Namespace) -> int: logger.debug("domains()=%d", len(domains)) if len(domains) > 0: - locking.acquire() - logger.info("Adding %d new instances ...", len(domains)) for domain in domains: logger.debug("domain='%s'", domain) @@ -829,6 +847,15 @@ def fetch_fbabot_atom(args: argparse.Namespace) -> int: def fetch_instances(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) + logger.debug("args.domain='%s' - checking ...", args.domain) + if not validators.domain(args.domain): + logger.warning("args.domain='%s' is not valid.", args.domain) + return 100 + elif blacklist.is_blacklisted(args.domain): + logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain) + return 101 + + logger.debug("Invoking locking.acquire() ...") locking.acquire() # Initial fetch @@ -874,6 +901,9 @@ def fetch_instances(args: argparse.Namespace) -> int: def fetch_oliphant(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) + logger.debug("Invoking locking.acquire() ...") + locking.acquire() + api_domain = "codeberg.org" if apis.is_recent(api_domain): logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain) @@ -882,8 +912,6 @@ def fetch_oliphant(args: argparse.Namespace) -> int: logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain) apis.update(api_domain) - locking.acquire() - # Base URL base_url = f"https://{api_domain}/oliphant/blocklists/raw/branch/main/blocklists" @@ -1040,6 +1068,7 @@ def fetch_oliphant(args: argparse.Namespace) -> int: def fetch_txt(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) + logger.debug("Invoking locking.acquire() ...") locking.acquire() # Static URLs @@ -1088,6 +1117,9 @@ def fetch_txt(args: argparse.Namespace) -> int: def fetch_fedipact(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) + logger.debug("Invoking locking.acquire() ...") + locking.acquire() + api_domain = "fedipact.online" if apis.is_recent(api_domain): logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain) @@ -1096,8 +1128,6 @@ def fetch_fedipact(args: argparse.Namespace) -> int: logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain) apis.update(api_domain) - locking.acquire() - response = utils.fetch_url( f"https://{api_domain}", network.web_headers, @@ -1140,6 +1170,9 @@ def fetch_fedipact(args: argparse.Namespace) -> int: def fetch_joinfediverse(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) + logger.debug("Invoking locking.acquire() ...") + locking.acquire() + api_domain = "joinfediverse.wiki" if apis.is_recent(api_domain): logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain) @@ -1148,8 +1181,6 @@ def fetch_joinfediverse(args: argparse.Namespace) -> int: logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain) apis.update(api_domain) - locking.acquire() - raw = utils.fetch_url( f"https://{api_domain}/FediBlock", network.web_headers, @@ -1311,6 +1342,7 @@ def fetch_joinfediverse(args: argparse.Namespace) -> int: def recheck_obfuscation(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) + logger.debug("Invoking locking.acquire() ...") locking.acquire() if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain): @@ -1421,6 +1453,9 @@ def recheck_obfuscation(args: argparse.Namespace) -> int: def fetch_fedilist(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) + logger.debug("Invoking locking.acquire() ...") + locking.acquire() + api_domain = "demo.fedilist.com" if apis.is_recent(api_domain): logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain) @@ -1434,8 +1469,6 @@ def fetch_fedilist(args: argparse.Namespace) -> int: logger.debug("args.software='%s'", args.software) url = f"http://{api_domain}/instance/csv?software={args.software}&onion=not" - locking.acquire() - logger.info("Fetching url='%s' ...", url) response = reqto.get( url, @@ -1480,6 +1513,7 @@ def fetch_fedilist(args: argparse.Namespace) -> int: def update_nodeinfo(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) + logger.debug("Invoking locking.acquire() ...") locking.acquire() if args.domain is not None and args.domain != "": @@ -1522,6 +1556,9 @@ def update_nodeinfo(args: argparse.Namespace) -> int: def fetch_instances_social(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) + logger.debug("Invoking locking.acquire() ...") + locking.acquire() + api_domain = "instances.social" if config.get("instances_social_api_key") == "": @@ -1534,7 +1571,6 @@ def fetch_instances_social(args: argparse.Namespace) -> int: logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain) apis.update(api_domain) - locking.acquire() headers = { "Authorization": f"Bearer {config.get('instances_social_api_key')}", } -- 2.39.5