from fba.helpers import blacklist
from fba.helpers import config
from fba.helpers import cookies
+from fba.helpers import dicts as dict_helper
+from fba.helpers import domain as domain_helper
from fba.helpers import locking
from fba.helpers import processing
from fba.helpers import software as software_helper
return list()
try:
- logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
+ logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
fetched = network.get_json_api(
source_domain,
"/api/v1/servers/all.json?scope=All&country=all&language=all",
domain = row["domain"].encode("idna").decode("utf-8")
logger.debug("domain='%s' - AFTER!", domain)
- if not utils.is_domain_wanted(domain):
+ if not domain_helper.is_wanted(domain):
logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
continue
elif instances.is_registered(domain):
elif entry["domain"] == "":
logger.debug("entry[domain] is empty - SKIPPED!")
continue
- elif not utils.is_domain_wanted(entry["domain"]):
+ elif not domain_helper.is_wanted(entry["domain"]):
logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
continue
elif instances.is_registered(entry["domain"]):
if args.domain is not None and args.domain != "":
# Re-check single domain
- logger.debug("Querying database for single args.domain='%s' ...", args.domain)
+ logger.debug("Querying database for args.domain='%s' ...", args.domain)
database.cursor.execute(
"SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
)
database.cursor.execute(
"SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
)
+ elif args.force:
+ # Re-check all
+ logger.debug("Re-checking all instances ...")
+ database.cursor.execute(
+ "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC"
+ )
else:
# Re-check after "timeout" (aka. minimum interval)
database.cursor.execute(
elif nodeinfo_url is None or nodeinfo_url == "":
logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
continue
- elif not utils.is_domain_wanted(blocker):
+ elif not domain_helper.is_wanted(blocker):
logger.debug("blocker='%s' is not wanted - SKIPPED!", blocker)
continue
if software == "pleroma":
logger.info("blocker='%s',software='%s'", blocker, software)
blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
+ logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
elif software == "mastodon":
logger.info("blocker='%s',software='%s'", blocker, software)
blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
+ logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
elif software == "lemmy":
logger.info("blocker='%s',software='%s'", blocker, software)
blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
+ logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
elif software == "friendica":
logger.info("blocker='%s',software='%s'", blocker, software)
blocking = friendica.fetch_blocks(blocker)
+ logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
elif software == "misskey":
logger.info("blocker='%s',software='%s'", blocker, software)
blocking = misskey.fetch_blocks(blocker)
+ logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
else:
logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
- if not utils.is_domain_wanted(block["blocked"]):
+ if not domain_helper.is_wanted(block["blocked"]):
logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
continue
elif block["block_level"] in ["accept", "accepted"]:
domain = domain.encode("idna").decode("utf-8")
logger.debug("domain='%s' - AFTER!", domain)
- if not utils.is_domain_wanted(domain):
+ if not domain_helper.is_wanted(domain):
logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
continue
elif instances.is_registered(domain):
"reject": list(),
}
- raw = utils.fetch_url(f"https://{source_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
+ logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
+ raw = utils.fetch_url(
+ f"https://{source_domain}/todon/domainblocks",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ ).text
logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
doc = bs4.BeautifulSoup(raw, "html.parser")
blocker = "todon.eu"
logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
+ instances.set_last_blocked(blocker)
instances.set_total_blocks(blocker, blocking)
blockdict = list()
logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
sources.update(source_domain)
- raw = utils.fetch_url(f"https://{source_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
+ logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
+ raw = utils.fetch_url(
+ f"https://{source_domain}/chaossocial/meta/master/federation.md",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ ).text
logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
blocker = "chaos.social"
logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
+ instances.set_last_blocked(blocker)
instances.set_total_blocks(blocker, blocking)
logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
domain = domain.encode("idna").decode("utf-8")
logger.debug("domain='%s' - AFTER!", domain)
- if not utils.is_domain_wanted(domain):
+ if not domain_helper.is_wanted(domain):
logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
continue
elif domain in domains:
domain = domain.encode("idna").decode("utf-8")
logger.debug("domain='%s' - AFTER!", domain)
- if not utils.is_domain_wanted(domain):
+ if not domain_helper.is_wanted(domain):
logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
continue
elif domain in domains:
domain = row["domain"].encode("idna").decode("utf-8")
logger.debug("domain='%s' - AFTER!", domain)
- if not utils.is_domain_wanted(domain):
+ if not domain_helper.is_wanted(domain):
logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
continue
logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
continue
+ instances.set_last_blocked(block["blocker"])
+
# Fetch this URL
logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
if domain == "":
logger.debug("domain is empty - SKIPPED!")
continue
- elif not utils.is_domain_wanted(domain):
+ elif not domain_helper.is_wanted(domain):
logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
continue
elif instances.is_recent(domain):
logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
sources.update(source_domain)
+ logger.info("Fetching / from source_domain='%s' ...", source_domain)
response = utils.fetch_url(
f"https://{source_domain}",
network.web_headers,
domain = domain.encode("idna").decode("utf-8")
logger.debug("domain='%s' - AFTER!", domain)
- if not utils.is_domain_wanted(domain):
+ if not domain_helper.is_wanted(domain):
logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
continue
elif instances.is_registered(domain):
logger.debug("Success! - EXIT!")
return 0
+def fetch_joinmobilizon(args: argparse.Namespace) -> int:
+ logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
+ locking.acquire()
+
+ source_domain = "instances.joinmobilizon.org"
+ if sources.is_recent(source_domain):
+ logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
+ return 0
+ else:
+ logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
+ sources.update(source_domain)
+
+ logger.info("Fetching instances from source_domain='%s' ...", source_domain)
+ raw = utils.fetch_url(
+ f"https://{source_domain}/api/v1/instances",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ ).text
+ logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
+
+ parsed = json.loads(raw)
+ logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
+
+ if "data" not in parsed:
+ logger.warning("parsed()=%d does not contain key 'data'")
+ return 1
+
+ logger.info("Checking %d instances ...", len(parsed["data"]))
+ for row in parsed["data"]:
+ logger.debug("row[]='%s'", type(row))
+ if "host" not in row:
+ logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
+ continue
+ elif not domain_helper.is_wanted(row["host"]):
+ logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
+ continue
+ elif instances.is_registered(row["host"]):
+ logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
+ continue
+
+ logger.info("Fetching row[host]='%s' ...", row["host"])
+ federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
+
+ logger.debug("Success! - EXIT!")
+ return 0
+
def fetch_joinmisskey(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
sources.update(source_domain)
+ logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
raw = utils.fetch_url(
f"https://{source_domain}/instances.json",
network.web_headers,
if "url" not in row:
logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
continue
- elif not utils.is_domain_wanted(row["url"]):
+ elif not domain_helper.is_wanted(row["url"]):
logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
continue
elif instances.is_registered(row["url"]):
logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
sources.update(source_domain)
+ logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain)
raw = utils.fetch_url(
f"https://{source_domain}/FediBlock",
network.web_headers,
if block["blocked"] == "":
logger.debug("block[blocked] is empty - SKIPPED!")
continue
- elif not utils.is_domain_wanted(block["blocked"]):
+ elif not domain_helper.is_wanted(block["blocked"]):
logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
continue
elif instances.is_recent(block["blocked"]):
logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
continue
- logger.info("Proccessing blocked='%s' ...", block["blocked"])
+ logger.debug("Proccessing blocked='%s' ...", block["blocked"])
processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
blockdict = list()
for blocker in domains:
blocker = blocker[0]
logger.debug("blocker[%s]='%s'", type(blocker), blocker)
+ instances.set_last_blocked(blocker)
for block in blocking:
logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
if block["blocked"] == "":
logger.debug("block[blocked] is empty - SKIPPED!")
continue
- elif not utils.is_domain_wanted(block["blocked"]):
+ elif not domain_helper.is_wanted(block["blocked"]):
logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
continue
logger.debug("Invoking locking.acquire() ...")
locking.acquire()
- if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
+ if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
blocking = misskey.fetch_blocks(row["domain"])
else:
- logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
+ logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
logger.debug("row[domain]='%s'", row["domain"])
# chaos.social requires special care ...
if row["domain"] != "chaos.social":
logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
+ instances.set_last_blocked(row["domain"])
instances.set_total_blocks(row["domain"], blocking)
obfuscated = 0
logger.debug("block='%s' is obfuscated.", block["blocked"])
obfuscated = obfuscated + 1
blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
- elif not utils.is_domain_wanted(block["blocked"]):
+ elif not domain_helper.is_wanted(block["blocked"]):
logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
continue
elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
if blocked is not None and blocked != block["blocked"]:
logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
obfuscated = obfuscated - 1
+
if blocks.is_instance_blocked(row["domain"], blocked):
logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
continue
+ elif blacklist.is_blacklisted(blocked):
+ logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
+ continue
block["block_level"] = blocks.alias_block_level(block["block_level"])
"reason" : block["reason"],
})
+ logger.debug("Settings obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
+ instances.set_obfuscated_blocks(row["domain"], obfuscated)
+
logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
if obfuscated == 0 and len(blocking) > 0:
logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
logger.debug("reader[]='%s'", type(reader))
- for row in reader:
+ if reader is None:
+ logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
+ return 2
+
+ rows = list(reader)
+
+ logger.info("Checking %d rows ...", len(rows))
+ for row in rows:
logger.debug("row[]='%s'", type(row))
if "hostname" not in row:
logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
domain = domain.encode("idna").decode("utf-8")
logger.debug("domain='%s' - AFTER!", domain)
- if not utils.is_domain_wanted(domain):
+ if not domain_helper.is_wanted(domain):
logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
continue
elif (args.force is None or not args.force) and instances.is_registered(domain):
cnt = 0
for row in domains:
logger.debug("row[]='%s'", type(row))
+ if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
+ logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
+ continue
+
try:
logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
software = federation.determine_software(row["domain"])
logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
instances.set_software(row["domain"], software)
- instances.set_success(row["domain"])
+ if software is not None:
+ logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
+ instances.set_success(row["domain"])
except network.exceptions as exception:
logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
instances.set_last_error(row["domain"], exception)
"Authorization": f"Bearer {config.get('instances_social_api_key')}",
}
+ logger.info("Fetching list from source_domain='%s' ...", source_domain)
fetched = network.get_json_api(
source_domain,
"/api/1.0/instances/list?count=0&sort_by=name",
domain = domain.encode("idna").decode("utf-8")
logger.debug("domain='%s' - AFTER!", domain)
- if not utils.is_domain_wanted(domain):
+ if not domain_helper.is_wanted(domain):
logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
continue
elif domain in domains:
logger.debug("Success! - EXIT!")
return 0
+def fetch_relays(args: argparse.Namespace) -> int:
+ logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
+ locking.acquire()
+
+ if args.domain is not None and args.domain != "":
+ database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay') AND domain = ? LIMIT 1", [args.domain])
+ else:
+ database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay')")
+
+ domains = list()
+ rows = database.cursor.fetchall()
+
+ logger.info("Checking %d relays ...", len(rows))
+ for row in rows:
+ logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
+ if not args.force and instances.is_recent(row["domain"]):
+ logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
+ continue
+
+ try:
+ logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
+ raw = utils.fetch_url(
+ f"https://{row['domain']}",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ ).text
+ logger.debug("raw[%s]()=%d", type(raw), len(raw))
+ except network.exceptions as exception:
+ logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
+ instances.set_last_error(row["domain"], exception)
+ instances.set_last_instance_fetch(row["domain"])
+ instances.update_data(row["domain"])
+ continue
+
+ doc = bs4.BeautifulSoup(raw, features="html.parser")
+ logger.debug("doc[]='%s'", type(doc))
+
+ logger.debug("row[software]='%s'", row["software"])
+ if row["software"] == "activityrelay":
+ logger.debug("Checking row[domain]='%s' ...", row["domain"])
+ tags = doc.findAll("p")
+
+ logger.debug("Checking %d paragraphs ...", len(tags))
+ for tag in tags:
+ logger.debug("tag[]='%s'", type(tag))
+ if len(tag.contents) == 0:
+ logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
+ continue
+ elif "registered instances" not in tag.contents[0]:
+ logger.debug("Skipping paragraph, text not found.")
+ continue
+
+ logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
+ for domain in tag.contents:
+ logger.debug("domain[%s]='%s'", type(domain), domain)
+ if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
+ continue
+
+ domain = str(domain)
+ if not domain_helper.is_wanted(domain):
+ logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
+ continue
+
+ logger.debug("domain='%s' - BEFORE!", domain)
+ domain = tidyup.domain(domain)
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if domain == "":
+ logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
+ continue
+ elif instances.is_registered(domain):
+ logger.debug("domain='%s' is already registered - SKIPPED!", domain)
+ continue
+ elif dict_helper.has_key(domains, "domain", domain):
+ logger.debug("domain='%s' already added", domain)
+ continue
+
+ logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
+ domains.append({
+ "domain": domain,
+ "origin": row["domain"],
+ })
+ elif row["software"] in ["aoderelay", "selective-relay"]:
+ logger.debug("Checking row[domain]='%s' ...", row["domain"])
+ if row["software"] == "aoderelay":
+ tags = doc.findAll("section", {"class": "instance"})
+ else:
+ tags = doc.find("div", {"id": "instances"}).findAll("li")
+
+ logger.debug("Checking %d tags ...", len(tags))
+ for tag in tags:
+ logger.debug("tag[]='%s'", type(tag))
+
+ link = tag.find("a")
+ logger.debug("link[%s]='%s'", type(link), link)
+ if link is None:
+ logger.warning("tag='%s' has no a-tag ...", tag)
+ continue
+
+ components = urlparse(link["href"])
+ domain = components.netloc.lower()
+
+ if not domain_helper.is_wanted(domain):
+ logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
+ continue
+
+ logger.debug("domain='%s' - BEFORE!", domain)
+ domain = tidyup.domain(domain)
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if domain == "":
+ logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
+ continue
+ elif instances.is_registered(domain):
+ logger.debug("domain='%s' is already registered - SKIPPED!", domain)
+ continue
+ elif dict_helper.has_key(domains, "domain", domain):
+ logger.debug("domain='%s' already added", domain)
+ continue
+
+ logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
+ domains.append({
+ "domain": domain,
+ "origin": row["domain"],
+ })
+ else:
+ logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
+
+ logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
+ instances.set_last_instance_fetch(row["domain"])
+ instances.update_data(row["domain"])
+
+ logger.info("Found %d domains to add ...", len(domains))
+ for row in domains:
+ logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
+ federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
+
+ logger.debug("Success! - EXIT!")
+ return 0
+
def convert_idna(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))