From d70e579cedc97e3439a6abe7f2fd97bdb96ce61c Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Thu, 1 May 2025 13:55:23 +0200 Subject: [PATCH] Continued: - skip wordpress.com instances as the public API is always different to the "instance" - skip empty doc (BeautifulSoup4) result (HTML parser failed) - tpzo fixed --- fba/commands.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/fba/commands.py b/fba/commands.py index adc2c87..31d216d 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -94,7 +94,7 @@ def check_nodeinfo(args: argparse.Namespace) -> int: # Fetch rows database.cursor.execute("SELECT domain, software, nodeinfo_url \ FROM instances \ -WHERE nodeinfo_url IS NOT NULL \ +WHERE nodeinfo_url IS NOT NULL AND nodeinfo_url NOT LIKE '%public-api.wordpress.com%' \ ORDER BY domain ASC") cnt = 0 @@ -107,7 +107,7 @@ ORDER BY domain ASC") logger.debug("punycode='%s' - AFTER!", punycode) if row["nodeinfo_url"].startswith("/"): - logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches - SKIP!", row["nodeinfo_url"]) + logger.debug("row[nodeinfo_url]='%s' is a relative URL and matches always - SKIP!", row["nodeinfo_url"]) continue elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1: logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"]) @@ -1835,7 +1835,10 @@ def fetch_relays(args: argparse.Namespace) -> int: continue logger.debug("row[software]='%s'", row["software"]) - if row["software"] == "activityrelay": + if doc is None: + logger.debug("row[domain]='%s' does not return valid HTML - SKIPPED!", row["domain"]) + continue + elif row["software"] == "activityrelay": logger.debug("Checking row[domain]='%s' ...", row["domain"]) tags = doc.findAll("p") -- 2.39.5