From: Roland Häder Date: Thu, 1 May 2025 11:55:23 +0000 (+0200) Subject: Continued: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=d70e579cedc97e3439a6abe7f2fd97bdb96ce61c;p=fba.git Continued: - skip wordpress.com instances as the public API is always different to the "instance" - skip empty doc (BeautifulSoup4) result (HTML parser failed) - tpzo fixed --- diff --git a/fba/commands.py b/fba/commands.py index adc2c87..31d216d 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -94,7 +94,7 @@ def check_nodeinfo(args: argparse.Namespace) -> int: # Fetch rows database.cursor.execute("SELECT domain, software, nodeinfo_url \ FROM instances \ -WHERE nodeinfo_url IS NOT NULL \ +WHERE nodeinfo_url IS NOT NULL AND nodeinfo_url NOT LIKE '%public-api.wordpress.com%' \ ORDER BY domain ASC") cnt = 0 @@ -107,7 +107,7 @@ ORDER BY domain ASC") logger.debug("punycode='%s' - AFTER!", punycode) if row["nodeinfo_url"].startswith("/"): - logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches - SKIP!", row["nodeinfo_url"]) + logger.debug("row[nodeinfo_url]='%s' is a relative URL and matches always - SKIP!", row["nodeinfo_url"]) continue elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1: logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"]) @@ -1835,7 +1835,10 @@ def fetch_relays(args: argparse.Namespace) -> int: continue logger.debug("row[software]='%s'", row["software"]) - if row["software"] == "activityrelay": + if doc is None: + logger.debug("row[domain]='%s' does not return valid HTML - SKIPPED!", row["domain"]) + continue + elif row["software"] == "activityrelay": logger.debug("Checking row[domain]='%s' ...", row["domain"]) tags = doc.findAll("p")