From 3b9a8d2b9719b00c40056217217542f2995d3a0f Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Sat, 23 Dec 2023 17:10:59 +0100 Subject: [PATCH] Continued: - fediverse.observer has changed their API to Graph (JSON POST) - domain singleuser.club blacklisted, this domain started flooding with sub-domains which have wwXX as another sub-domain --- fba/commands.py | 44 ++++++++++++++++++++++++++++------------ fba/helpers/blacklist.py | 1 + 2 files changed, 32 insertions(+), 13 deletions(-) diff --git a/fba/commands.py b/fba/commands.py index 5ddb44a..4772276 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -525,29 +525,47 @@ def fetch_observer(args: argparse.Namespace) -> int: logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software) continue - doc = None + items = list() try: logger.debug("Fetching table data for software='%s' ...", software) - raw = utils.fetch_url( - f"https://{source_domain}/app/views/tabledata.php?software={software}", - network.web_headers, - (config.get("connection_timeout"), config.get("read_timeout")) - ).text + raw = network.post_json_api( + f"api.{source_domain}", + "/", + json.dumps({ + "query": "{nodes(softwarename:\"" + software + "\"){domain}}" + }) + ) + logger.debug("raw[%s]()=%d", type(raw), len(raw)) + if "exception" in raw: + logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"])) + raise raw["exception"] + elif "error_message" in raw: + logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"]) + continue + elif not "data" in raw["json"]: + logger.warning("Cannot find key 'nodes' in raw[json]()=%d", len(raw["json"])) + continue + elif not "nodes" in raw["json"]["data"]: + logger.warning("Cannot find key 'nodes' in raw[json][data]()=%d", len(raw["json"]["data"])) + continue + + items = raw["json"]["data"]["nodes"] + logger.debug("items()=%d", len(items)) - doc = bs4.BeautifulSoup(raw, features="html.parser") - logger.debug("doc[]='%s'", type(doc)) except network.exceptions as exception: logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception)) continue - items = doc.findAll("a", {"class": "url"}) logger.info("Checking %d items,software='%s' ...", len(items), software) for item in items: logger.debug("item[]='%s'", type(item)) - domain = item.decode_contents() - logger.debug("domain[%s]='%s'", type(domain), domain) - domain = tidyup.domain(domain) if domain not in [None, ""] else None + if not "domain" in item: + logger.debug("item()=%d has not element 'domain'", len(item)) + continue + + logger.debug("item[domain]='%s' - BEFORE!", item["domain"]) + domain = tidyup.domain(item["domain"]) if item["domain"] not in [None, ""] else None logger.debug("domain='%s' - AFTER!", domain) if domain in [None, ""]: @@ -565,7 +583,7 @@ def fetch_observer(args: argparse.Namespace) -> int: logger.debug("domain='%s' is already registered - SKIPPED!", domain) continue - logger.info("Fetching instances for domain='%s' ...", domain) + logger.info("Fetching instances for domain='%s',software='%s' ...", domain, software) federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name) logger.debug("Success! - EXIT!") diff --git a/fba/helpers/blacklist.py b/fba/helpers/blacklist.py index 6d0c6ff..dc3bb84 100644 --- a/fba/helpers/blacklist.py +++ b/fba/helpers/blacklist.py @@ -65,6 +65,7 @@ _blacklist = { "free-pic.org" : "Parked domain, no fediverse instance", "co-mastdn.ga" : "Parked domain, no fediverse instance", "chocoflan.net" : "Parked domain, no fediverse instance", + "singleuser.club" : "Parked domain, no fediverse instance", "qwest.net" : "Dynamic IP address hosts should not be used for fediverse instances", } -- 2.39.5