if "domain" not in row:
logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
continue
- elif row["domain"] is None or row["domain"] == "":
+ elif row["domain"] in [None, ""]:
logger.debug("row[domain]='%s' is empty - SKIPPED!", row["domain"])
continue
if "domain" not in entry:
logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
continue
- elif entry["domain"] is None or entry["domain"] == "":
+ elif entry["domain"] in [None, ""]:
logger.debug("entry[domain]='%s' is empty - SKIPPED!", entry["domain"])
continue
elif not domain_helper.is_wanted(entry["domain"]):
try:
logger.info("Fetching instances from domain='%s' ...", domain)
- federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
+ federation.fetch_instances(domain, "tak.teleyal.blog", None, inspect.currentframe().f_code.co_name)
except network.exceptions as exception:
logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
instances.set_last_error(domain, exception)
block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
- if block["blocked"] is None or block["blocked"] == "":
+ if block["blocked"] in [None, ""]:
logger.warning("block[blocked]='%s' is empty, blocker='%s'", block["blocked"], blocker)
continue
elif block["blocked"].endswith(".onion"):
nodeinfo_url = row["nodeinfo_url"]
logger.debug("Looking up instance by domain, blocked='%s'", block["blocked"])
- if block["blocked"] is None or block["blocked"] == "":
+ if block["blocked"] in [None, ""]:
logger.debug("block[blocked]='%s' is empty - SKIPPED!", block["blocked"])
continue
logger.debug("args.software='%s' does not match software='%s' - SKIPPED!", args.software, software)
continue
- doc = None
+ items = list()
try:
logger.debug("Fetching table data for software='%s' ...", software)
- raw = utils.fetch_url(
- f"https://{source_domain}/app/views/tabledata.php?software={software}",
- network.web_headers,
- (config.get("connection_timeout"), config.get("read_timeout"))
- ).text
+ raw = network.post_json_api(
+ f"api.{source_domain}",
+ "/",
+ json.dumps({
+ "query": "{nodes(softwarename:\"" + software + "\"){domain}}"
+ })
+ )
+
logger.debug("raw[%s]()=%d", type(raw), len(raw))
+ if "exception" in raw:
+ logger.warning("row[domain]='%s' has caused an exception: '%s' - raising again ...", row["domain"], type(raw["exception"]))
+ raise raw["exception"]
+ elif "error_message" in raw:
+ logger.warning("row[domain]='%s' has caused error message: '%s' - SKIPPED!", row["domain"], raw["error_message"])
+ continue
+ elif not "data" in raw["json"]:
+ logger.warning("Cannot find key 'nodes' in raw[json]()=%d", len(raw["json"]))
+ continue
+ elif not "nodes" in raw["json"]["data"]:
+ logger.warning("Cannot find key 'nodes' in raw[json][data]()=%d", len(raw["json"]["data"]))
+ continue
+
+ items = raw["json"]["data"]["nodes"]
+ logger.debug("items()=%d", len(items))
- doc = bs4.BeautifulSoup(raw, features="html.parser")
- logger.debug("doc[]='%s'", type(doc))
except network.exceptions as exception:
logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
continue
- items = doc.findAll("a", {"class": "url"})
logger.info("Checking %d items,software='%s' ...", len(items), software)
for item in items:
logger.debug("item[]='%s'", type(item))
- domain = item.decode_contents()
- logger.debug("domain[%s]='%s'", type(domain), domain)
- domain = tidyup.domain(domain) if domain not in [None, ""] else None
+ if not "domain" in item:
+ logger.debug("item()=%d has not element 'domain'", len(item))
+ continue
+
+ logger.debug("item[domain]='%s' - BEFORE!", item["domain"])
+ domain = tidyup.domain(item["domain"]) if item["domain"] not in [None, ""] else None
logger.debug("domain='%s' - AFTER!", domain)
- if domain is None or domain == "":
+ if domain in [None, ""]:
logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
continue
logger.debug("domain='%s' is already registered - SKIPPED!", domain)
continue
- logger.info("Fetching instances for domain='%s'", domain)
+ logger.info("Fetching instances for domain='%s',software='%s' ...", domain, software)
federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
logger.debug("Success! - EXIT!")
domain = tidyup.domain(domain) if domain not in[None, ""] else None
logger.debug("domain='%s' - AFTER!", domain)
- if domain is None or domain == "":
+ if domain in [None, ""]:
logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
continue
logger.debug("entry[]='%s'", type(entry))
doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
logger.debug("doc[]='%s'", type(doc))
+ elements = doc.findAll("a")
- for element in doc.findAll("a"):
- logger.debug("element[]='%s'", type(element))
+ logger.debug("Checking %d element(s) ...", len(elements))
+ for element in elements:
+ logger.debug("element[%s]='%s'", type(element), element)
for href in element["href"].split(","):
logger.debug("href[%s]='%s' - BEFORE!", type(href), href)
domain = tidyup.domain(href) if href not in [None, ""] else None
logger.debug("domain='%s' - AFTER!", domain)
- if domain is None or domain == "":
+ if domain in [None, ""]:
logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
continue
origin = row["origin"]
software = row["software"]
- if software_helper.is_relay(software):
- logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
+ logger.debug("software='%s'", software)
+ if software is None:
+ logger.warning("args.domain='%s' has no software detected. You can try to run ./fba.py update_nodeinfo --domain=%s --force to get it updated.", args.domain, args.domain)
return 102
+ elif software_helper.is_relay(software):
+ logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
+ return 103
# Initial fetch
try:
logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
instances.set_last_error(args.domain, exception)
instances.update(args.domain)
- return 100
+ return 104
if args.single:
logger.debug("Not fetching more instances - EXIT!")
# Loop through some instances
database.cursor.execute(
- "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
+ "SELECT domain, origin, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
)
rows = database.cursor.fetchall()
logger.info("Checking %d entries ...", len(rows))
for row in rows:
- logger.debug("row[domain]='%s'", row["domain"])
- if row["domain"] == "":
- logger.debug("row[domain] is empty - SKIPPED!")
- continue
-
logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
domain = row["domain"].encode("idna").decode("utf-8")
logger.debug("domain='%s' - AFTER!", domain)
continue
try:
- logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
- federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
+ logger.info("Fetching instances for domain='%s',origin='%s',software='%s' ...", domain, row["origin"], row["software"])
+ federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name)
except network.exceptions as exception:
logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
instances.set_last_error(domain, exception)
for domain in domains:
logger.debug("domain='%s' - BEFORE!", domain)
domain = tidyup.domain(domain) if domain not in[None, ""] else None
-
logger.debug("domain='%s' - AFTER!", domain)
- if domain is None or domain == "":
+
+ if domain in [None, ""]:
logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
continue
elif not domain_helper.is_wanted(domain):
logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
continue
- elif instances.is_recent(domain):
- logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
+ elif not args.force and instances.is_registered(domain):
+ logger.debug("domain='%s' is already registered - SKIPPED!", domain)
continue
- logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
- processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name)
-
+ logger.debug("Processing domain='%s',row[blocker]='%s' ...", domain, row["blocker"])
+ processed = processing.instance(domain, row["blocker"], inspect.currentframe().f_code.co_name, force=args.force)
logger.debug("processed='%s'", processed)
- if not processed:
- logger.debug("domain='%s' was not generically processed - SKIPPED!", domain)
- continue
logger.debug("Success! - EXIT!")
return 0
domain = tidyup.domain(row.contents[0]) if row.contents[0] not in [None, ""] else None
logger.debug("domain='%s' - AFTER!", domain)
- if domain is None or domain == "":
+ if domain in [None, ""]:
logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
continue
logger.info("Checking %d domains ...", len(rows))
for row in rows:
logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
- if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
+ if blacklist.is_blacklisted(row["domain"]):
+ logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
+ continue
+ elif (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
continue
logger.debug("blocking()=%d", len(blocking))
if len(blocking) == 0:
+ logger.debug("Empty blocking list, trying individual fetch_blocks() for row[software]='%s' ...", row["software"])
if row["software"] == "pleroma":
logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
blocking = pleroma.fetch_blocks(row["domain"])
# c.s isn't part of oliphant's "hidden" blocklists
logger.debug("row[domain]='%s'", row["domain"])
- if row["domain"] != "chaos.social" and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
+ if row["domain"] != "chaos.social" and row["software"] is not None and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
instances.set_last_blocked(row["domain"])
instances.set_total_blocks(row["domain"], blocking)
domain = tidyup.domain(row["hostname"]) if row["hostname"] not in [None, ""] else None
logger.debug("domain='%s' - AFTER!", domain)
- if domain is None or domain == "":
+ if domain in [None, ""]:
logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
continue
elif args.no_software:
logger.info("Fetching domains with no software type detected ...")
database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL ORDER BY last_updated ASC")
+ elif args.with_software:
+ logger.info("Fetching domains with any software type detected ...")
+ database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NOT NULL ORDER BY last_updated ASC")
elif args.no_auto:
logger.info("Fetching domains with other detection mode than AUTO_DISOVERY being set ...")
database.cursor.execute("SELECT domain, software FROM instances WHERE detection_mode IS NOT NULL AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC")
cnt = 0
for row in domains:
logger.debug("row[]='%s'", type(row))
- if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
+ if blacklist.is_blacklisted(row["domain"]):
+ logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
+ continue
+ elif not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
continue
logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
continue
- logger.info("Fetching instances from domain='%s'", domain)
+ logger.info("Fetching instances from domain='%s' ...", domain)
federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
logger.debug("Success! - EXIT!")
locking.acquire()
if args.domain is not None and args.domain != "":
+ logger.debug("Fetching instances record for args.domain='%s' ...", args.domain)
database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND domain = ? LIMIT 1", [args.domain])
elif args.software is not None and args.software != "":
- database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND software = ?", [args.software])
+ logger.debug("Fetching instances records for args.software='%s' ...", args.software)
+ database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND nodeinfo_url IS NOT NULL AND software = ? ORDER BY last_updated DESC", [args.software])
else:
- database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay')")
+ logger.debug("Fetch all relay instances ...")
+ database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay', 'pub-relay') AND nodeinfo_url IS NOT NULL ORDER BY last_updated DESC")
domains = list()
rows = database.cursor.fetchall()
logger.info("Checking %d relays ...", len(rows))
for row in rows:
- logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
- peers = list()
+ logger.debug("row[domain]='%s',row[software]='%s'", row["domain"], row["software"])
if not args.force and instances.is_recent(row["domain"]):
logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
continue
+ elif row["nodeinfo_url"] is None:
+ logger.warning("row[domain]='%s' has empty nodeinfo_url but this is required - SKIPPED!", row["domain"])
+ continue
+ peers = list()
try:
+ logger.debug("row[domain]='%s',row[software]='%s' - checking ....", row["domain"], row["software"])
if row["software"] == "pub-relay":
logger.info("Fetching row[nodeinfo_url]='%s' from relay row[domain]='%s',row[software]='%s' ...", row["nodeinfo_url"], row["domain"], row["software"])
raw = network.fetch_api_url(
domain = tidyup.domain(domain) if domain not in[None, ""] else None
logger.debug("domain='%s' - AFTER!", domain)
- if domain is None or domain == "":
+ if domain in [None, ""]:
logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
continue
elif domain not in peers:
domain = tidyup.domain(domain) if domain not in[None, ""] else None
logger.debug("domain='%s' - AFTER!", domain)
- if domain is None or domain == "":
+ if domain in [None, ""]:
logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
continue
elif domain not in peers:
domain = tidyup.domain(domain) if domain not in[None, ""] else None
logger.debug("domain='%s' - AFTER!", domain)
- if domain is None or domain == "":
+ if domain in [None, ""]:
logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
continue
elif domain not in peers: