- # DEBUG: print("DEBUG: rel,href:", link["rel"], link["href"])
- if link["rel"] in nodeinfo_identifier:
- # DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"])
+ # DEBUG: print(f"DEBUG: link[{type(link)}]='{link}'")
+ if not isinstance(link, dict) or not "rel" in link:
+ print(f"WARNING: link[]='{type(link)}' is not 'dict' or no element 'rel' found")
+ elif link["rel"] in nodeinfo_identifier:
+ # Default is that 'href' has a complete URL, but some hosts don't send that
+ url = link["href"]
+ components = urlparse(link["href"])
+
+ # DEBUG: print(f"DEBUG: components[{type(components)}]='{components}'")
+ if components.scheme == "" and components.netloc == "":
+ # DEBUG: print(f"DEBUG: link[href]='{link['href']}' has no scheme and host name in it, prepending from domain='{domain}'")
+ url = f"https://{domain}{url}"
+ components = urlparse(url)
+
+ if blacklist.is_blacklisted(components.netloc):
+ print(f"WARNING: components.netloc='{components.netloc}' is blacklisted - SKIPPED!")
+ continue
+ elif not validators.domain(components.netloc):
+ print(f"WARNING: components.netloc='{components.netloc}' is not a valid domain - SKIPPED!")
+ continue
+
+ # DEBUG: print("DEBUG: Fetching nodeinfo from:", url)