# Fetch rows
database.cursor.execute("SELECT domain, software, nodeinfo_url \
FROM instances \
-WHERE nodeinfo_url IS NOT NULL \
+WHERE nodeinfo_url IS NOT NULL AND nodeinfo_url NOT LIKE '%public-api.wordpress.com%' \
ORDER BY domain ASC")
cnt = 0
logger.debug("punycode='%s' - AFTER!", punycode)
if row["nodeinfo_url"].startswith("/"):
- logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches - SKIP!", row["nodeinfo_url"])
+ logger.debug("row[nodeinfo_url]='%s' is a relative URL and matches always - SKIP!", row["nodeinfo_url"])
continue
elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
continue
logger.debug("row[software]='%s'", row["software"])
- if row["software"] == "activityrelay":
+ if doc is None:
+ logger.debug("row[domain]='%s' does not return valid HTML - SKIPPED!", row["domain"])
+ continue
+ elif row["software"] == "activityrelay":
logger.debug("Checking row[domain]='%s' ...", row["domain"])
tags = doc.findAll("p")