+
+def fetch_joinfediverse(args: argparse.Namespace) -> int:
+ logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
+ locking.acquire()
+
+ source_domain = "joinfediverse.wiki"
+ if sources.is_recent(source_domain):
+ logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
+ return 0
+ else:
+ logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
+ sources.update(source_domain)
+
+ raw = utils.fetch_url(
+ f"https://{source_domain}/FediBlock",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ ).text
+ logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
+
+ doc = bs4.BeautifulSoup(raw, "html.parser")
+ logger.debug("doc[]='%s'", type(doc))
+
+ tables = doc.findAll("table", {"class": "wikitable"})
+
+ logger.info("Analyzing %d table(s) ...", len(tables))
+ blocklist = list()
+ for table in tables:
+ logger.debug("table[]='%s'", type(table))
+
+ rows = table.findAll("tr")
+ logger.info("Checking %d row(s) ...", len(rows))
+ block_headers = dict()
+ for row in rows:
+ logger.debug("row[%s]='%s'", type(row), row)
+
+ headers = row.findAll("th")
+ logger.debug("Found headers()=%d header(s)", len(headers))
+ if len(headers) > 1:
+ block_headers = dict()
+ cnt = 0
+ for header in headers:
+ cnt = cnt + 1
+ logger.debug("header[]='%s',cnt=%d", type(header), cnt)
+ text = header.contents[0]
+
+ logger.debug("text[]='%s'", type(text))
+ if not isinstance(text, str):
+ logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
+ continue
+ elif validators.domain(text.strip()):
+ logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
+ continue
+
+ text = tidyup.domain(text.strip())
+ logger.debug("text='%s' - AFTER!", text)
+ if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
+ logger.debug("Found header: '%s'=%d", text, cnt)
+ block_headers[cnt] = text
+
+ elif len(block_headers) == 0:
+ logger.debug("row is not scrapable - SKIPPED!")
+ continue
+ elif len(block_headers) > 0:
+ logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
+ cnt = 0
+ block = dict()
+
+ for element in row.find_all(["th", "td"]):
+ cnt = cnt + 1
+ logger.debug("element[]='%s',cnt=%d", type(element), cnt)
+ if cnt in block_headers:
+ logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
+
+ text = element.text.strip()
+ key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
+
+ logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
+ if key in ["domain", "instance"]:
+ block[key] = text
+ elif key == "reason":
+ block[key] = tidyup.reason(text)
+ elif key == "subdomain(s)":
+ block[key] = list()
+ if text != "":
+ block[key] = text.split("/")
+ else:
+ logger.debug("key='%s'", key)
+ block[key] = text
+
+ logger.debug("block()=%d ...", len(block))
+ if len(block) > 0:
+ logger.debug("Appending block()=%d ...", len(block))
+ blocklist.append(block)
+
+ logger.debug("blocklist()=%d", len(blocklist))
+
+ database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
+ domains = database.cursor.fetchall()
+
+ logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
+ blocking = list()
+ for block in blocklist:
+ logger.debug("block='%s'", block)
+ if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
+ origin = block["blocked"]
+ logger.debug("origin='%s'", origin)
+ for subdomain in block["subdomain(s)"]:
+ block["blocked"] = subdomain + "." + origin
+ logger.debug("block[blocked]='%s'", block["blocked"])
+ blocking.append(block)
+ else:
+ blocking.append(block)
+
+ logger.debug("blocking()=%d", blocking)
+ for block in blocking:
+ logger.debug("block[]='%s'", type(block))
+ if "blocked" not in block:
+ raise KeyError(f"block()={len(block)} does not have element 'blocked'")
+
+ block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
+ logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
+
+ if block["blocked"] == "":
+ logger.debug("block[blocked] is empty - SKIPPED!")
+ continue
+ elif not utils.is_domain_wanted(block["blocked"]):
+ logger.warning("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
+ continue
+ elif instances.is_recent(block["blocked"]):
+ logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
+ continue
+
+ logger.info("Proccessing blocked='%s' ...", block["blocked"])
+ processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
+
+ blockdict = list()
+ for blocker in domains:
+ blocker = blocker[0]
+ logger.debug("blocker[%s]='%s'", type(blocker), blocker)
+
+ for block in blocking:
+ logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
+ block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
+
+ logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
+ if block["blocked"] == "":
+ logger.debug("block[blocked] is empty - SKIPPED!")
+ continue
+ elif not utils.is_domain_wanted(block["blocked"]):
+ logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
+ continue
+
+ logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
+ if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
+ logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
+ blockdict.append({
+ "blocked": block["blocked"],
+ "reason" : block["reason"],
+ })
+
+ if instances.has_pending(blocker):
+ logger.debug("Flushing updates for blocker='%s' ...", blocker)
+ instances.update_data(blocker)
+
+ logger.debug("Invoking commit() ...")
+ database.connection.commit()
+
+ logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
+ if config.get("bot_enabled") and len(blockdict) > 0:
+ logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
+ network.send_bot_post(blocker, blockdict)
+
+ logger.debug("Success! - EXIT!")
+ return 0
+
+def recheck_obfuscation(args: argparse.Namespace) -> int:
+ logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
+ locking.acquire()
+
+ if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
+ database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
+ elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
+ database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
+ else:
+ database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1")
+
+ rows = database.cursor.fetchall()
+ logger.info("Checking %d domains ...", len(rows))
+ for row in rows:
+ logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
+ if (args.all is None or not args.all) and instances.is_recent(row["domain"]) and args.domain is None and args.software is None:
+ logger.debug("row[domain]='%s' has been recently checked, args.all[]='%s' - SKIPPED!", row["domain"], type(args.all))
+ continue
+
+ blocking = list()
+ if row["software"] == "pleroma":
+ logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
+ blocking = pleroma.fetch_blocks(row["domain"], row["nodeinfo_url"])
+ elif row["software"] == "mastodon":
+ logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
+ blocking = mastodon.fetch_blocks(row["domain"], row["nodeinfo_url"])
+ elif row["software"] == "lemmy":
+ logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
+ blocking = lemmy.fetch_blocks(row["domain"], row["nodeinfo_url"])
+ elif row["software"] == "friendica":
+ logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
+ blocking = friendica.fetch_blocks(row["domain"])
+ elif row["software"] == "misskey":
+ logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
+ blocking = misskey.fetch_blocks(row["domain"])
+ else:
+ logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
+
+ logger.debug("row[domain]='%s'", row["domain"])
+ if row["domain"] != "chaos.social":
+ logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
+ instances.set_total_blocks(row["domain"], blocking)
+
+ obfuscated = 0
+ blockdict = list()
+
+ logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
+ for block in blocking:
+ logger.debug("block[blocked]='%s'", block["blocked"])
+ blocked = None
+
+ if block["blocked"] == "":
+ logger.debug("block[blocked] is empty - SKIPPED!")
+ continue
+ elif block["blocked"].endswith(".arpa"):
+ logger.debug("blocked='%s' is a reversed IP address - SKIPPED!", block["blocked"])
+ continue
+ elif block["blocked"].endswith(".tld"):
+ logger.debug("blocked='%s' is a fake domain name - SKIPPED!", block["blocked"])
+ continue
+ elif block["blocked"].endswith(".onion"):
+ logger.debug("blocked='%s' is a TOR onion domain name - SKIPPED!", block["blocked"])
+ continue
+ elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
+ logger.debug("block='%s' is obfuscated.", block["blocked"])
+ obfuscated = obfuscated + 1
+ blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
+ elif not utils.is_domain_wanted(block["blocked"]):
+ logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
+ continue
+ elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
+ logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
+ continue
+
+ logger.debug("blocked[%s]='%s',block[blocked]='%s'", type(blocked), blocked, block["blocked"])
+ if blocked is not None and blocked != block["blocked"]:
+ logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
+ obfuscated = obfuscated - 1
+ if blocks.is_instance_blocked(row["domain"], blocked):
+ logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
+ continue
+
+ block["block_level"] = blocks.alias_block_level(block["block_level"])
+
+ logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
+ if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
+ logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
+ blockdict.append({
+ "blocked": blocked,
+ "reason" : block["reason"],
+ })
+
+ logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
+ if obfuscated == 0 and len(blocking) > 0:
+ logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
+ instances.set_has_obfuscation(row["domain"], False)
+
+ if instances.has_pending(row["domain"]):
+ logger.debug("Flushing updates for blocker='%s' ...", row["domain"])
+ instances.update_data(row["domain"])
+
+ logger.debug("Invoking commit() ...")
+ database.connection.commit()
+
+ logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
+ if config.get("bot_enabled") and len(blockdict) > 0:
+ logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", row["domain"], len(blockdict))
+ network.send_bot_post(row["domain"], blockdict)
+
+ logger.debug("Success! - EXIT!")
+ return 0
+
+def fetch_fedilist(args: argparse.Namespace) -> int:
+ logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
+ locking.acquire()
+
+ source_domain = "demo.fedilist.com"
+ if sources.is_recent(source_domain):
+ logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
+ return 0
+ else:
+ logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
+ sources.update(source_domain)
+
+ url = f"http://{source_domain}/instance/csv?onion=not"
+ if args.software is not None and args.software != "":
+ logger.debug("args.software='%s'", args.software)
+ url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
+
+ logger.info("Fetching url='%s' ...", url)
+ response = reqto.get(
+ url,
+ headers=network.web_headers,
+ timeout=(config.get("connection_timeout"), config.get("read_timeout")),
+ allow_redirects=False
+ )
+
+ logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
+ if not response.ok or response.status_code >= 300 or len(response.content) == 0:
+ logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
+ return 1
+
+ reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
+
+ logger.debug("reader[]='%s'", type(reader))
+ for row in reader:
+ logger.debug("row[]='%s'", type(row))
+ if "hostname" not in row:
+ logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
+ continue
+
+ logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
+ domain = tidyup.domain(row["hostname"])
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if domain == "":
+ logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
+ continue
+
+ logger.debug("domain='%s' - BEFORE!", domain)
+ domain = domain.encode("idna").decode("utf-8")
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if not utils.is_domain_wanted(domain):
+ logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
+ continue
+ elif (args.all is None or not args.all) and instances.is_registered(domain):
+ logger.debug("domain='%s' is already registered, --all not specified: args.all[]='%s'", domain, type(args.all))
+ continue
+ elif instances.is_recent(domain):
+ logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
+ continue
+
+ logger.info("Fetching instances from domain='%s' ...", domain)
+ federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
+
+ logger.debug("Success! - EXIT!")
+ return 0
+
+def update_nodeinfo(args: argparse.Namespace) -> int:
+ logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
+ locking.acquire()
+
+ if args.domain is not None and args.domain != "":
+ logger.debug("Fetching args.domain='%s'", args.domain)
+ database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
+ elif args.software is not None and args.software != "":
+ logger.info("Fetching domains for args.software='%s'", args.software)
+ database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
+ else:
+ logger.info("Fetching domains for recently updated ...")
+ database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
+
+ domains = database.cursor.fetchall()
+
+ logger.info("Checking %d domain(s) ...", len(domains))
+ cnt = 0
+ for row in domains:
+ logger.debug("row[]='%s'", type(row))
+ try:
+ logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
+ software = federation.determine_software(row["domain"])
+
+ logger.debug("Determined software='%s'", software)
+ if (software != row["software"] and software is not None) or args.force is True:
+ logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
+ instances.set_software(row["domain"], software)
+
+ instances.set_success(row["domain"])
+ except network.exceptions as exception:
+ logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
+ instances.set_last_error(row["domain"], exception)
+
+ instances.set_last_nodeinfo(row["domain"])
+ instances.update_data(row["domain"])
+ cnt = cnt + 1
+
+ logger.debug("Success! - EXIT!")
+ return 0
+
+def fetch_instances_social(args: argparse.Namespace) -> int:
+ logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
+ locking.acquire()
+
+ source_domain = "instances.social"
+
+ if config.get("instances_social_api_key") == "":
+ logger.error("API key not set. Please set in your config.json file.")
+ return 1
+ elif sources.is_recent(source_domain):
+ logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
+ return 0
+ else:
+ logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
+ sources.update(source_domain)
+
+ headers = {
+ "Authorization": f"Bearer {config.get('instances_social_api_key')}",
+ }
+
+ fetched = network.get_json_api(
+ source_domain,
+ "/api/1.0/instances/list?count=0&sort_by=name",
+ headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ )
+ logger.debug("fetched[]='%s'", type(fetched))
+
+ if "error_message" in fetched:
+ logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
+ return 2
+ elif "exception" in fetched:
+ logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
+ return 3
+ elif "json" not in fetched:
+ logger.warning("fetched has no element 'json' - EXIT!")
+ return 4
+ elif "instances" not in fetched["json"]:
+ logger.warning("fetched[row] has no element 'instances' - EXIT!")
+ return 5
+
+ domains = list()
+ rows = fetched["json"]["instances"]
+
+ logger.info("Checking %d row(s) ...", len(rows))
+ for row in rows:
+ logger.debug("row[]='%s'", type(row))
+ domain = tidyup.domain(row["name"])
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if domain == "":
+ logger.debug("domain is empty - SKIPPED!")
+ continue
+
+ logger.debug("domain='%s' - BEFORE!", domain)
+ domain = domain.encode("idna").decode("utf-8")
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if not utils.is_domain_wanted(domain):
+ logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
+ continue
+ elif domain in domains:
+ logger.debug("domain='%s' is already added - SKIPPED!", domain)
+ continue
+ elif instances.is_registered(domain):
+ logger.debug("domain='%s' is already registered - SKIPPED!", domain)
+ continue
+ elif instances.is_recent(domain):
+ logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
+ continue
+
+ logger.info("Fetching instances from domain='%s'", domain)
+ federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
+
+ logger.debug("Success! - EXIT!")
+ return 0
+
+def convert_idna(args: argparse.Namespace) -> int:
+ logger.debug("args[]='%s' - CALLED!", type(args))
+
+ database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
+ rows = database.cursor.fetchall()
+
+ logger.debug("rows[]='%s'", type(rows))
+ instances.translate_idnas(rows, "domain")
+
+ database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
+ rows = database.cursor.fetchall()
+
+ logger.debug("rows[]='%s'", type(rows))
+ instances.translate_idnas(rows, "origin")
+
+ database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
+ rows = database.cursor.fetchall()
+
+ logger.debug("rows[]='%s'", type(rows))
+ blocks.translate_idnas(rows, "blocker")
+
+ database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
+ rows = database.cursor.fetchall()
+
+ logger.debug("rows[]='%s'", type(rows))
+ blocks.translate_idnas(rows, "blocked")
+
+ logger.debug("Success! - EXIT!")
+ return 0