for row in rows:
logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
punycode = domain_helper.encode_idna(row["domain"])
+ logger.debug("punycode='%s' - AFTER!", punycode)
if row["nodeinfo_url"].startswith("/"):
- logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
+ logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches - SKIP!", row["nodeinfo_url"])
continue
elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
sources.update(source_domain)
logger.info("Fetching instances from source_domain='%s' ...", source_domain)
- raw = network.fetch_url(
- f"https://{source_domain}/api/v1/instances",
- headers=network.web_headers,
- timeout=config.timeout
- ).text
- logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
-
- parsed = json.loads(raw)
- logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
-
- if "data" not in parsed:
- logger.warning("parsed()=%d does not contain key 'data'")
- return 1
+ rows = network.fetch_json_rows(
+ source_domain,
+ "/api/v1/instances",
+ network.web_headers,
+ "data"
+ )
- logger.info("Checking %d instances ...", len(parsed["data"]))
- for row in parsed["data"]:
+ logger.info("Checking %d instances ...", len(rows))
+ for row in rows:
logger.debug("row[]='%s'", type(row))
if "host" not in row:
logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
sources.update(source_domain)
- logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
- raw = network.fetch_url(
- f"https://{source_domain}/instances.json",
- headers=network.web_headers,
- timeout=config.timeout
- ).text
- logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
-
- parsed = json.loads(raw)
- logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
-
- if "instancesInfos" not in parsed:
- logger.warning("parsed()=%d does not contain element 'instancesInfos'")
- return 1
+ logger.info("Fetching /instances.json from source_domain='%s' ...", source_domain)
+ rows = network.fetch_json_rows(
+ source_domain,
+ "/instances.json",
+ network.web_headers,
+ "instancesInfos"
+ )
- logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
- for row in parsed["instancesInfos"]:
+ logger.info("Checking %d instane(s) ...", len(rows))
+ for row in rows:
logger.debug("row[%s]='%s'", type(row), row)
if "url" not in row:
logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
continue
elif (args.force_all is None or not args.force_all) and instances.is_registered(domain):
- logger.debug("domain='%s' is already registered, --force-all not specified: args.force_all[]='%s'", domain, type(args.force_all))
+ logger.debug("domain='%s' is already registered, --force-all not specified: args.force_all[]='%s' - SKIPPED!", domain, type(args.force_all))
continue
elif instances.is_recent(domain):
logger.debug("domain='%s' has recently been crawled - SKIPPED!", domain)
logger.info("Checking %d row(s) ...", len(rows))
for row in rows:
- logger.debug("row[%s]='%s' - BEFORE!", type(row), row)
+ logger.debug("row[%s]='%s'", type(row), row)
if "url" not in row:
logger.warning("row='%s' has no required element 'url' - SKIPPED!", row)
continue
+ logger.debug("row[url]='%s' - BEFORE!", row["url"])
domain = urlparse(row["url"]).netloc.lower().split(":")[0]
logger.debug("domain='%s' - AFTER!", domain)
logger.warning("raw()=%d does not contain key 'json' in response - SKIPPED!", len(raw))
continue
elif "metadata" not in raw["json"]:
- logger.warning("raw[json]()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]))
+ logger.warning("raw[json]()=%d does not contain key 'metadata' in response - SKIPPED!", len(raw["json"]))
continue
elif "peers" not in raw["json"]["metadata"]:
- logger.warning("raw[json][metadata()=%d does not contain key 'json' in response - SKIPPED!", len(raw["json"]["metadata"]))
+ logger.warning("raw[json][metadata()=%d does not contain key 'peers' in response - SKIPPED!", len(raw["json"]["metadata"]))
continue
else:
logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
continue
elif "registered instances" not in tag.contents[0]:
- logger.debug("Skipping paragraph, text not found.")
+ logger.debug("Skipping paragraph, text not found - SKIPPED!")
continue
logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
logger.debug("domains()=%d,domain='%s'", len(domains), domain)
if dict_helper.has_key(domains, "domain", domain):
- logger.debug("domain='%s' already added", domain)
+ logger.debug("domain='%s' already added - SKIPPED!", domain)
continue
logger.debug("Appending domain='%s',row[domain]='%s',row[software]='%s' ...", domain, row["domain"], row["software"])