(config.get("connection_timeout"), config.get("read_timeout"))
)
- logger.debug("JSON API returned %d elements", len(fetched))
+ logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
if "error_message" in fetched:
logger.warning("API returned error_message='%s' - EXIT!", fetched["error_message"])
return 101
if "domain" not in row:
logger.warning("row='%s' does not contain element 'domain' - SKIPPED!", row)
continue
- elif row["domain"] == "":
- logger.debug("row[domain] is empty - SKIPPED!")
+ elif row["domain"] in [None, ""]:
+ logger.debug("row[domain]='%s' is empty - SKIPPED!", row["domain"])
continue
logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
if "domain" not in entry:
logger.warning("entry()=%d does not contain 'domain' - SKIPPED!", len(entry))
continue
- elif entry["domain"] == "":
- logger.debug("entry[domain] is empty - SKIPPED!")
+ elif entry["domain"] in [None, ""]:
+ logger.debug("entry[domain]='%s' is empty - SKIPPED!", entry["domain"])
continue
elif not domain_helper.is_wanted(entry["domain"]):
logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
try:
logger.info("Fetching instances from domain='%s' ...", domain)
- federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
+ federation.fetch_instances(domain, "tak.teleyal.blog", None, inspect.currentframe().f_code.co_name)
except network.exceptions as exception:
logger.warning("Exception '%s' during fetching instances (fetch_bkali) from domain='%s'", type(exception), domain)
instances.set_last_error(domain, exception)
database.cursor.execute(
"SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [args.software]
)
- elif args.force:
- # Re-check all
- logger.debug("Re-checking all instances ...")
+ elif args.only_none:
+ # Check only entries with total_blocked=None
database.cursor.execute(
- "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
+ "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND total_blocks IS NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
)
else:
# Re-check after "timeout" (aka. minimum interval)
database.cursor.execute(
- "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND (last_blocked IS NULL OR last_blocked < ?) AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_block")]
+ "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY total_blocks DESC, last_response_time ASC, last_updated ASC"
)
rows = database.cursor.fetchall()
if not domain_helper.is_wanted(blocker):
logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
continue
+ elif not args.force and instances.is_recent(blocker, "last_blocked"):
+ logger.debug("blocker='%s' has been recently accessed - SKIPPED!", blocker)
+ continue
logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
instances.set_last_blocked(blocker)
block["reason"] = tidyup.reason(block["reason"]) if block["reason"] is not None and block["reason"] != "" else None
logger.debug("blocked='%s',reason='%s' - AFTER!", block["blocked"], block["reason"])
- if block["blocked"] == "":
- logger.warning("blocked is empty, blocker='%s'", blocker)
+ if block["blocked"] in [None, ""]:
+ logger.warning("block[blocked]='%s' is empty, blocker='%s'", block["blocked"], blocker)
continue
elif block["blocked"].endswith(".onion"):
logger.debug("blocked='%s' is a TOR .onion domain - SKIPPED", block["blocked"])
continue
+ elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain") == "true":
+ logger.debug("blocked='%s' is an I2P .onion domain - SKIPPED", block["blocked"])
+ continue
elif block["blocked"].endswith(".arpa"):
logger.debug("blocked='%s' is a reverse IP address - SKIPPED", block["blocked"])
continue
origin = row["origin"]
nodeinfo_url = row["nodeinfo_url"]
- logger.debug("Looking up instance by domainm, blocked='%s'", block["blocked"])
- if block["blocked"] == "":
- logger.debug("block[blocked] is empty - SKIPPED!")
+ logger.debug("Looking up instance by domain, blocked='%s'", block["blocked"])
+ if block["blocked"] in [None, ""]:
+ logger.debug("block[blocked]='%s' is empty - SKIPPED!", block["blocked"])
continue
logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
for item in items:
logger.debug("item[]='%s'", type(item))
domain = item.decode_contents()
+ logger.debug("domain[%s]='%s'", type(domain), domain)
domain = tidyup.domain(domain) if domain not in [None, ""] else None
logger.debug("domain='%s' - AFTER!", domain)
- if domain is None or domain == "":
+ if domain in [None, ""]:
logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
continue
domain = tidyup.domain(domain) if domain not in[None, ""] else None
logger.debug("domain='%s' - AFTER!", domain)
- if domain is None or domain == "":
+ if domain in [None, ""]:
logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
continue
logger.debug("entry[]='%s'", type(entry))
doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
logger.debug("doc[]='%s'", type(doc))
+
for element in doc.findAll("a"):
logger.debug("element[]='%s'", type(element))
for href in element["href"].split(","):
domain = tidyup.domain(href) if href not in [None, ""] else None
logger.debug("domain='%s' - AFTER!", domain)
- if domain is None or domain == "":
+ if domain in [None, ""]:
logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
continue
origin = row["origin"]
software = row["software"]
- if software_helper.is_relay(software):
- logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
+ if software is None:
+ logger.warning("args.domain='%s' has no software detected. You can try to run ./fba.py update_nodeinfo --domain=%s --force to get it updated.", args.domain, args.domain)
return 102
+ elif software_helper.is_relay(software):
+ logger.warning("args.domain='%s' is of software type '%s' which is not supported by this command. Please invoke fetch_relays instead.", args.domain, software)
+ return 103
# Initial fetch
try:
logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
instances.set_last_error(args.domain, exception)
instances.update(args.domain)
- return 100
+ return 104
if args.single:
logger.debug("Not fetching more instances - EXIT!")
# Loop through some instances
database.cursor.execute(
- "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
+ "SELECT domain, origin, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus', 'neodb') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY total_peers DESC, last_response_time ASC, last_updated ASC", [time.time() - config.get("recheck_instance")]
)
rows = database.cursor.fetchall()
continue
try:
- logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
- federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
+ logger.info("Fetching instances for domain='%s',origin='%s',software='%s' ...", domain, row["origin"], row["software"])
+ federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name)
except network.exceptions as exception:
logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
instances.set_last_error(domain, exception)
logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
for block in blocklists.oliphant_blocklists:
# Is domain given and not equal blocker?
+ logger.debug("block[blocker]='%s',block[csv_url]='%s'", block["blocker"], block["csv_url"])
if isinstance(args.domain, str) and args.domain != block["blocker"]:
logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
continue
domain = tidyup.domain(domain) if domain not in[None, ""] else None
logger.debug("domain='%s' - AFTER!", domain)
- if domain is None or domain == "":
+ if domain in [None, ""]:
logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
continue
elif not domain_helper.is_wanted(domain):
domain = tidyup.domain(row.contents[0]) if row.contents[0] not in [None, ""] else None
logger.debug("domain='%s' - AFTER!", domain)
- if domain is None or domain == "":
+ if domain in [None, ""]:
logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
continue
locking.acquire()
if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
- database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
+ database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND domain = ?", [args.domain])
elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
- database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
+ database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE (has_obfuscation = 1 OR has_obfuscation IS NULL) AND software = ?", [args.software])
else:
- database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
+ database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 OR has_obfuscation IS NULL")
rows = database.cursor.fetchall()
logger.info("Checking %d domains ...", len(rows))
logger.debug("blocking()=%d", len(blocking))
if len(blocking) == 0:
+ logger.debug("Empty blocking list, trying individual fetch_blocks() for row[software]='%s' ...", row["software"])
if row["software"] == "pleroma":
logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
blocking = pleroma.fetch_blocks(row["domain"])
# c.s isn't part of oliphant's "hidden" blocklists
logger.debug("row[domain]='%s'", row["domain"])
- if row["domain"] != "chaos.social" and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
+ if row["domain"] != "chaos.social" and row["software"] is not None and not software_helper.is_relay(row["software"]) and not blocklists.has(row["domain"]):
logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
instances.set_last_blocked(row["domain"])
instances.set_total_blocks(row["domain"], blocking)
if block["blocked"] == "":
logger.debug("block[blocked] is empty - SKIPPED!")
continue
+ elif block["blocked"].endswith(".onion"):
+ logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
+ continue
+ elif block["blocked"].endswith(".i2p") and config.get("allow_i2p_domain") == "true":
+ logger.debug("blocked='%s' is an I2P onion domain name - SKIPPED!", block["blocked"])
+ continue
elif block["blocked"].endswith(".arpa"):
logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
continue
elif block["blocked"].endswith(".tld"):
logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
continue
- elif block["blocked"].endswith(".onion"):
- logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
- continue
elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
logger.debug("block='%s' is obfuscated.", block["blocked"])
obfuscated = obfuscated + 1
})
logger.debug("Setting obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
+ instances.set_has_obfuscation(row["domain"], (obfuscated > 0))
instances.set_obfuscated_blocks(row["domain"], obfuscated)
logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
domain = tidyup.domain(row["hostname"]) if row["hostname"] not in [None, ""] else None
logger.debug("domain='%s' - AFTER!", domain)
- if domain is None or domain == "":
+ if domain in [None, ""]:
logger.debug("domain='%s' is empty after tidyup.domain(): row[hostname]='%s' - SKIPPED!", domain, row["hostname"])
continue
if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
continue
+ elif blacklist.is_blacklisted(row["domain"]):
+ logger.debug("row[domain]='%s' is blacklisted - SKIPPED!", row["domain"])
+ continue
try:
logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
fetched = network.get_json_api(
source_domain,
"/api/1.0/instances/list?count=0&sort_by=name",
- headers,
- (config.get("connection_timeout"), config.get("read_timeout"))
+ headers=headers,
+ timeout=(config.get("connection_timeout"), config.get("read_timeout"))
)
- logger.debug("fetched[]='%s'", type(fetched))
+ logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
if "error_message" in fetched:
logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
logger.debug("Success! - EXIT!")
return 0
+def fetch_relaylist(args: argparse.Namespace) -> int:
+ logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
+ locking.acquire()
+
+ source_domain = "api.relaylist.com"
+
+ if sources.is_recent(source_domain):
+ logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
+ return 1
+ else:
+ logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
+ sources.update(source_domain)
+
+ logger.info("Fetching list from source_domain='%s' ...", source_domain)
+ fetched = network.get_json_api(
+ source_domain,
+ "/relays",
+ {},
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ )
+ logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
+
+ if "error_message" in fetched:
+ logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
+ return 2
+ elif "exception" in fetched:
+ logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
+ return 3
+ elif "json" not in fetched:
+ logger.warning("fetched has no element 'json' - EXIT!")
+ return 4
+
+ domains = list()
+
+ logger.info("Checking %d row(s) ...", len(fetched["json"]))
+ for row in fetched["json"]:
+ logger.debug("row[]='%s'", type(row))
+ domain = urlparse(row["url"]).netloc.lower().split(":")[0]
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if domain is None and domain == "":
+ logger.debug("domain='%s' is empty after tidyup.domain() - SKIPPED!", domain)
+ continue
+
+ logger.debug("domain='%s' - BEFORE!", domain)
+ domain = domain.encode("idna").decode("utf-8")
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if not domain_helper.is_wanted(domain):
+ logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
+ continue
+ elif domain in domains:
+ logger.debug("domain='%s' is already added - SKIPPED!", domain)
+ continue
+ elif instances.is_registered(domain):
+ logger.debug("domain='%s' is already registered - SKIPPED!", domain)
+ continue
+ elif instances.is_recent(domain):
+ logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
+ continue
+
+ logger.info("Fetching instances from domain='%s'", domain)
+ federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
+
+ logger.debug("Success! - EXIT!")
+ return 0
+
def fetch_relays(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
domain = tidyup.domain(domain) if domain not in[None, ""] else None
logger.debug("domain='%s' - AFTER!", domain)
- if domain is None or domain == "":
+ if domain in [None, ""]:
logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
continue
elif domain not in peers:
logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
peers.append(domain)
+ logger.debug("domains()=%d,domain='%s'", len(domains), domain)
if dict_helper.has_key(domains, "domain", domain):
logger.debug("domain='%s' already added", domain)
continue
domain = tidyup.domain(domain) if domain not in[None, ""] else None
logger.debug("domain='%s' - AFTER!", domain)
- if domain is None or domain == "":
+ if domain in [None, ""]:
logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
continue
elif domain not in peers:
logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
peers.append(domain)
+ logger.debug("domains()=%d,domain='%s'", len(domains), domain)
if dict_helper.has_key(domains, "domain", domain):
logger.debug("domain='%s' already added", domain)
continue
domain = tidyup.domain(domain) if domain not in[None, ""] else None
logger.debug("domain='%s' - AFTER!", domain)
- if domain is None or domain == "":
+ if domain in [None, ""]:
logger.debug("domain='%s' is empty after tidyup.domain() from origin='%s' - SKIPPED!", domain, row["domain"])
continue
elif domain not in peers:
logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
peers.append(domain)
+ logger.debug("domains()=%d,domain='%s'", len(domains), domain)
if dict_helper.has_key(domains, "domain", domain):
logger.debug("domain='%s' already added", domain)
continue
- logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
+ logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
domains.append({
"domain": domain,
"origin": row["domain"],