import logging
import time
+from urllib.parse import urlparse
+
import argparse
import atoma
import bs4
from fba import utils
from fba.helpers import blacklist
+from fba.helpers import blocklists
from fba.helpers import config
from fba.helpers import cookies
+from fba.helpers import dicts as dict_helper
+from fba.helpers import domain as domain_helper
from fba.helpers import locking
+from fba.helpers import processing
from fba.helpers import software as software_helper
from fba.helpers import tidyup
from fba.models import blocks
from fba.models import instances
+from fba.models import sources
from fba.networks import friendica
from fba.networks import lemmy
logger.debug("args[]='%s' - CALLED!", type(args))
# Fetch rows
- database.cursor.execute("SELECT domain, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
+ database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE nodeinfo_url IS NOT NULL ORDER BY domain ASC")
cnt = 0
for row in database.cursor.fetchall():
- logger.debug("Checking row[domain]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["nodeinfo_url"])
+ logger.debug("Checking row[domain]='%s',row[software]='%s',row[nodeinfo_url]='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
punycode = row["domain"].encode("idna").decode("utf-8")
if row["nodeinfo_url"].startswith("/"):
logger.debug("row[nodeinfo_url]='%s' is a relative URL and always matches", row["nodeinfo_url"])
continue
elif row["nodeinfo_url"].find(punycode) == -1 and row["nodeinfo_url"].find(row["domain"]) == -1:
- logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s'", punycode, row["nodeinfo_url"])
+ logger.warning("punycode='%s' is not found in row[nodeinfo_url]='%s',row[software]='%s'", punycode, row["nodeinfo_url"], row["software"])
cnt = cnt + 1
logger.info("Found %d row(s)", cnt)
def fetch_pixelfed_api(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
- # No CSRF by default, you don't have to add network.api_headers by yourself here
+ # No CSRF by default, you don't have to add network.source_headers by yourself here
headers = tuple()
+ source_domain = "pixelfed.org"
+
+ if sources.is_recent(source_domain):
+ logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
+ return 0
+ else:
+ logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
+ sources.update(source_domain)
try:
- logger.debug("Checking CSRF from pixelfed.org")
- headers = csrf.determine("pixelfed.org", dict())
+ logger.debug("Checking CSRF from source_domain='%s' ...", source_domain)
+ headers = csrf.determine(source_domain, dict())
except network.exceptions as exception:
logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
return list()
try:
- logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
+ logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
fetched = network.get_json_api(
- "pixelfed.org",
+ source_domain,
"/api/v1/servers/all.json?scope=All&country=all&language=all",
headers,
(config.get("connection_timeout"), config.get("read_timeout"))
elif row["domain"] == "":
logger.debug("row[domain] is empty - SKIPPED!")
continue
- elif not utils.is_domain_wanted(row["domain"]):
- logger.warning("row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
+
+ logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
+ domain = row["domain"].encode("idna").decode("utf-8")
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if not domain_helper.is_wanted(domain):
+ logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
continue
- elif instances.is_registered(row["domain"]):
- logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
+ elif instances.is_registered(domain):
+ logger.debug("domain='%s' is already registered - SKIPPED!", domain)
continue
- elif instances.is_recent(row["domain"]):
- logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
+ elif instances.is_recent(domain):
+ logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
continue
- logger.debug("Fetching instances from row[domain]='%s' ...", row["domain"])
- federation.fetch_instances(row["domain"], None, None, inspect.currentframe().f_code.co_name)
+ logger.debug("Fetching instances from domain='%s' ...", domain)
+ federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
except network.exceptions as exception:
logger.warning("Cannot fetch graphql,exception[%s]:'%s' - EXIT!", type(exception), str(exception))
def fetch_bkali(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
+ locking.acquire()
+
+ source_domain = "gql.api.bka.li"
+ if sources.is_recent(source_domain):
+ logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
+ return 0
+ else:
+ logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
+ sources.update(source_domain)
+
domains = list()
try:
- fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
- "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
- }))
+ logger.info("Fetching domainlist from source_domain='%s' ...", source_domain)
+ fetched = network.post_json_api(
+ source_domain,
+ "/v1/graphql",
+ json.dumps({
+ "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
+ })
+ )
logger.debug("fetched[]='%s'", type(fetched))
if "error_message" in fetched:
- logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched["error_message"])
+ logger.warning("post_json_api() for 'gql.sources.bka.li' returned error message='%s", fetched["error_message"])
return 100
elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
elif entry["domain"] == "":
logger.debug("entry[domain] is empty - SKIPPED!")
continue
- elif not utils.is_domain_wanted(entry["domain"]):
- logger.warning("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
+ elif not domain_helper.is_wanted(entry["domain"]):
+ logger.debug("entry[domain]='%s' is not wanted - SKIPPED!", entry["domain"])
continue
elif instances.is_registered(entry["domain"]):
logger.debug("entry[domain]='%s' is already registered - SKIPPED!", entry["domain"])
logger.debug("domains()=%d", len(domains))
if len(domains) > 0:
- locking.acquire()
-
logger.info("Adding %d new instances ...", len(domains))
for domain in domains:
+ logger.debug("domain='%s' - BEFORE!", domain)
+ domain = domain.encode("idna").decode("utf-8")
+ logger.debug("domain='%s' - AFTER!", domain)
+
try:
logger.info("Fetching instances from domain='%s' ...", domain)
federation.fetch_instances(domain, 'tak.teleyal.blog', None, inspect.currentframe().f_code.co_name)
logger.warning("args.domain='%s' is not registered, please run ./utils.py fetch_instances '%s' first.", args.domain, args.domain)
return 102
+ logger.debug("Invoking locking.acquire() ...")
locking.acquire()
if args.domain is not None and args.domain != "":
# Re-check single domain
- logger.debug("Querying database for single args.domain='%s' ...", args.domain)
+ logger.debug("Querying database for args.domain='%s' ...", args.domain)
database.cursor.execute(
"SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
)
database.cursor.execute(
"SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ? AND nodeinfo_url IS NOT NULL", [args.software]
)
+ elif args.force:
+ # Re-check all
+ logger.debug("Re-checking all instances ...")
+ database.cursor.execute(
+ "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey') AND nodeinfo_url IS NOT NULL ORDER BY rowid DESC"
+ )
else:
# Re-check after "timeout" (aka. minimum interval)
database.cursor.execute(
logger.info("Checking %d entries ...", len(rows))
for blocker, software, origin, nodeinfo_url in rows:
logger.debug("blocker='%s',software='%s',origin='%s',nodeinfo_url='%s'", blocker, software, origin, nodeinfo_url)
- blocker = tidyup.domain(blocker)
- logger.debug("blocker='%s' - AFTER!", blocker)
- if blocker == "":
- logger.warning("blocker is now empty!")
- continue
- elif nodeinfo_url is None or nodeinfo_url == "":
- logger.debug("blocker='%s',software='%s' has empty nodeinfo_url", blocker, software)
+ if nodeinfo_url is None:
+ logger.debug("blocker='%s',software='%s' has no nodeinfo_url set - SKIPPED!", blocker, software)
continue
- elif not utils.is_domain_wanted(blocker):
+ elif not domain_helper.is_wanted(blocker):
logger.warning("blocker='%s' is not wanted - SKIPPED!", blocker)
continue
- logger.debug("blocker='%s'", blocker)
+ logger.debug("Setting last_blocked,has_obfuscation=false for blocker='%s' ...", blocker)
instances.set_last_blocked(blocker)
instances.set_has_obfuscation(blocker, False)
blocking = list()
- blockdict = list()
- if software == "pleroma":
- logger.info("blocker='%s',software='%s'", blocker, software)
- blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
- elif software == "mastodon":
- logger.info("blocker='%s',software='%s'", blocker, software)
- blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
- elif software == "lemmy":
- logger.info("blocker='%s',software='%s'", blocker, software)
- blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
- elif software == "friendica":
- logger.info("blocker='%s',software='%s'", blocker, software)
- blocking = friendica.fetch_blocks(blocker)
- elif software == "misskey":
- logger.info("blocker='%s',software='%s'", blocker, software)
- blocking = misskey.fetch_blocks(blocker)
+
+ if blocker != "chaos.social" and not blocklists.is_excluded(blocker):
+ logger.debug("blocker='%s',software='%s'", blocker, software)
+ if software == "pleroma":
+ logger.info("blocker='%s',software='%s'", blocker, software)
+ blocking = pleroma.fetch_blocks(blocker, nodeinfo_url)
+ logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
+ elif software == "mastodon":
+ logger.info("blocker='%s',software='%s'", blocker, software)
+ blocking = mastodon.fetch_blocks(blocker, nodeinfo_url)
+ logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
+ elif software == "lemmy":
+ logger.info("blocker='%s',software='%s'", blocker, software)
+ blocking = lemmy.fetch_blocks(blocker, nodeinfo_url)
+ logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
+ elif software == "friendica":
+ logger.info("blocker='%s',software='%s'", blocker, software)
+ blocking = friendica.fetch_blocks(blocker)
+ logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
+ elif software == "misskey":
+ logger.info("blocker='%s',software='%s'", blocker, software)
+ blocking = misskey.fetch_blocks(blocker)
+ logger.debug("blocker='%s' returned %d entries,software='%s'", blocker, len(blocking), software)
+ else:
+ logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
+
+ logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
+ instances.set_total_blocks(blocker, blocking)
else:
- logger.warning("Unknown software: blocker='%s',software='%s'", blocker, software)
+ logger.debug("Skipping blocker='%s', run ./fba.py fetch_cs or fetch_oliphant instead!", blocker)
logger.info("Checking %d entries from blocker='%s',software='%s' ...", len(blocking), blocker, software)
blockdict = list()
if block["blocked"] == "":
logger.debug("block[blocked] is empty - SKIPPED!")
continue
- elif not utils.is_domain_wanted(block["blocked"]):
- logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
+
+ logger.debug("block[blocked]='%s' - BEFORE!", block["blocked"])
+ block["blocked"] = block["blocked"].lstrip(".").encode("idna").decode("utf-8")
+ logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
+
+ if not domain_helper.is_wanted(block["blocked"]):
+ logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
continue
elif block["block_level"] in ["accept", "accepted"]:
logger.debug("blocked='%s' is accepted, not wanted here - SKIPPED!", block["blocked"])
logger.debug("Hash wasn't found, adding: blocked='%s',blocker='%s'", block["blocked"], blocker)
federation.fetch_instances(block["blocked"], blocker, None, inspect.currentframe().f_code.co_name)
- block["block_level"] = utils.alias_block_level(block["block_level"])
+ block["block_level"] = blocks.alias_block_level(block["block_level"])
- if utils.process_block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
+ if processing.block(blocker, block["blocked"], block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
blockdict.append({
"blocked": block["blocked"],
def fetch_observer(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
- # Acquire lock
+ logger.debug("Invoking locking.acquire() ...")
locking.acquire()
+ source_domain = "fediverse.observer"
+ if sources.is_recent(source_domain):
+ logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
+ return 0
+ else:
+ logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
+ sources.update(source_domain)
+
types = list()
if args.software is None:
logger.info("Fetching software list ...")
raw = utils.fetch_url(
- "https://fediverse.observer",
+ f"https://{source_domain}",
network.web_headers,
(config.get("connection_timeout"), config.get("read_timeout"))
).text
doc = bs4.BeautifulSoup(raw, features="html.parser")
logger.debug("doc[]='%s'", type(doc))
- items = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"}).findAll("a", {"class": "dropdown-item"})
+ navbar = doc.find("div", {"aria-labelledby": "navbarDropdownMenuSoftwares"})
+ logger.debug("navbar[]='%s'", type(navbar))
+ if navbar is None:
+ logger.warning("Cannot find navigation bar, cannot continue!")
+ return 1
+
+ items = navbar.findAll("a", {"class": "dropdown-item"})
logger.debug("items[]='%s'", type(items))
logger.info("Checking %d menu items ...", len(items))
try:
logger.debug("Fetching table data for software='%s' ...", software)
raw = utils.fetch_url(
- f"https://fediverse.observer/app/views/tabledata.php?software={software}",
+ f"https://{source_domain}/app/views/tabledata.php?software={software}",
network.web_headers,
(config.get("connection_timeout"), config.get("read_timeout"))
).text
doc = bs4.BeautifulSoup(raw, features="html.parser")
logger.debug("doc[]='%s'", type(doc))
except network.exceptions as exception:
- logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
+ logger.warning("Cannot fetch software='%s' from source_domain='%s': '%s'", software, source_domain, type(exception))
continue
items = doc.findAll("a", {"class": "url"})
for item in items:
logger.debug("item[]='%s'", type(item))
domain = item.decode_contents()
-
logger.debug("domain='%s' - AFTER!", domain)
+
if domain == "":
logger.debug("domain is empty - SKIPPED!")
continue
- elif not utils.is_domain_wanted(domain):
- logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
+
+ logger.debug("domain='%s' - BEFORE!", domain)
+ domain = domain.encode("idna").decode("utf-8")
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if not domain_helper.is_wanted(domain):
+ logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
continue
elif instances.is_registered(domain):
logger.debug("domain='%s' is already registered - SKIPPED!", domain)
continue
- elif instances.is_recent(domain):
- logger.debug("domain='%s' is recently being handled - SKIPPED!", domain)
- continue
software = software_helper.alias(software)
logger.info("Fetching instances for domain='%s'", domain)
def fetch_todon_wiki(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
+ logger.debug("Invoking locking.acquire() ...")
locking.acquire()
+
+ source_domain = "wiki.todon.eu"
+ if sources.is_recent(source_domain):
+ logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
+ return 0
+ else:
+ logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
+ sources.update(source_domain)
+
blocklist = {
"silenced": list(),
"reject": list(),
}
- raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
+ logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
+ raw = utils.fetch_url(
+ f"https://{source_domain}/todon/domainblocks",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ ).text
logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
doc = bs4.BeautifulSoup(raw, "html.parser")
logger.info("Checking %d suspended entries ...", len(suspended))
blocklist["reject"] = utils.find_domains(suspended, "div")
+ blocking = blocklist["silenced"] + blocklist["reject"]
+ blocker = "todon.eu"
+
+ logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
+ instances.set_last_blocked(blocker)
+ instances.set_total_blocks(blocker, blocking)
+
blockdict = list()
for block_level in blocklist:
blockers = blocklist[block_level]
if not instances.is_registered(blocked):
try:
logger.info("Fetching instances from domain='%s' ...", blocked)
- federation.fetch_instances(blocked, 'chaos.social', None, inspect.currentframe().f_code.co_name)
+ federation.fetch_instances(blocked, blocker, None, inspect.currentframe().f_code.co_name)
except network.exceptions as exception:
logger.warning("Exception '%s' during fetching instances (fetch_cs) from blocked='%s'", type(exception), blocked)
instances.set_last_error(blocked, exception)
- if blocks.is_instance_blocked("todon.eu", blocked, block_level):
+ if blocks.is_instance_blocked(blocker, blocked, block_level):
logger.debug("blocked='%s',block_level='%s' is already blocked - SKIPPED!", blocked, block_level)
continue
logger.info("Adding new block: blocked='%s',block_level='%s'", blocked, block_level)
- if utils.process_block("todon.eu", blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
- logger.debug("Appending blocked='%s',reason='%s' for blocker='todon.eu' ...", blocked, block_level)
+ if processing.block(blocker, blocked, None, block_level) and block_level == "reject" and config.get("bot_enabled"):
+ logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", blocked, block_level, blocker)
blockdict.append({
"blocked": blocked,
"reason" : None,
logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
if config.get("bot_enabled") and len(blockdict) > 0:
- logger.info("Sending bot POST for blocker='todon.eu',blockdict()=%d ...", len(blockdict))
- network.send_bot_post("todon.eu", blockdict)
+ logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
+ network.send_bot_post(blocker, blockdict)
+
+ logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
+ if instances.has_pending(blocker):
+ logger.debug("Flushing updates for blocker='%s' ...", blocker)
+ instances.update_data(blocker)
logger.debug("Success! - EXIT!")
return 0
def fetch_cs(args: argparse.Namespace):
logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
+ locking.acquire()
+
extensions = [
"extra",
"abbr",
"wikilinks"
]
- domains = {
+ blocklist = {
"silenced": list(),
"reject" : list(),
}
- raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
+ source_domain = "raw.githubusercontent.com"
+ if sources.is_recent(source_domain):
+ logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
+ return 0
+ else:
+ logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
+ sources.update(source_domain)
+
+ logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
+ raw = utils.fetch_url(
+ f"https://{source_domain}/chaossocial/meta/master/federation.md",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ ).text
logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
logger.debug("silenced[%s]()=%d", type(silenced), len(silenced))
- domains["silenced"] = federation.find_domains(silenced)
+ blocklist["silenced"] = federation.find_domains(silenced)
blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
logger.debug("blocked[%s]()=%d", type(blocked), len(blocked))
- domains["reject"] = federation.find_domains(blocked)
+ blocklist["reject"] = federation.find_domains(blocked)
- logger.debug("domains[silenced]()=%d,domains[reject]()=%d", len(domains["silenced"]), len(domains["reject"]))
- blockdict = list()
- if len(domains) > 0:
- locking.acquire()
+ blocking = blocklist["silenced"] + blocklist["reject"]
+ blocker = "chaos.social"
+
+ logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", blocker, len(blocking))
+ instances.set_last_blocked(blocker)
+ instances.set_total_blocks(blocker, blocking)
- for block_level in domains:
- logger.info("block_level='%s' has %d row(s)", block_level, len(domains[block_level]))
+ logger.debug("blocklist[silenced]()=%d,blocklist[reject]()=%d", len(blocklist["silenced"]), len(blocklist["reject"]))
+ if len(blocking) > 0:
+ blockdict = list()
+ for block_level in blocklist:
+ logger.info("block_level='%s' has %d row(s)", block_level, len(blocklist[block_level]))
- for row in domains[block_level]:
+ for row in blocklist[block_level]:
logger.debug("row[%s]='%s'", type(row), row)
- if instances.is_recent(row["domain"], "last_blocked"):
- logger.debug("row[domain]='%s' has been recently crawled - SKIPPED!", row["domain"])
+ if not "domain" in row:
+ logger.warning("row[]='%s' has no element 'domain' - SKIPPED!", type(row))
continue
elif not instances.is_registered(row["domain"]):
try:
logger.info("Fetching instances from domain='%s' ...", row["domain"])
- federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
+ federation.fetch_instances(row["domain"], blocker, None, inspect.currentframe().f_code.co_name)
except network.exceptions as exception:
logger.warning("Exception '%s' during fetching instances (fetch_cs) from row[domain]='%s'", type(exception), row["domain"])
instances.set_last_error(row["domain"], exception)
- if utils.process_block("chaos.social", row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
- logger.debug("Appending blocked='%s',reason='%s' for blocker='chaos.social' ...", row["domain"], block_level)
+ if processing.block(blocker, row["domain"], row["reason"], block_level) and block_level == "reject" and config.get("bot_enabled"):
+ logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", row["domain"], block_level, blocker)
blockdict.append({
"blocked": row["domain"],
"reason" : row["reason"],
logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
if config.get("bot_enabled") and len(blockdict) > 0:
- logger.info("Sending bot POST for blocker='chaos.social',blockdict()=%d ...", len(blockdict))
- network.send_bot_post("chaos.social", blockdict)
+ logger.info("Sending bot POST for blocker='%s',blockdict()=%d ...", blocker, len(blockdict))
+ network.send_bot_post(blocker, blockdict)
+
+ logger.debug("Checking if blocker='%s' has pending updates ...", blocker)
+ if instances.has_pending(blocker):
+ logger.debug("Flushing updates for blocker='%s' ...", blocker)
+ instances.update_data(blocker)
logger.debug("Success! - EXIT!")
return 0
def fetch_fba_rss(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
+
domains = list()
+ logger.debug("Invoking locking.acquire() ...")
+ locking.acquire()
+
+ components = urlparse(args.feed)
+
+ if sources.is_recent(components.netloc):
+ logger.info("API from components.netloc='%s' has recently being accessed - EXIT!", components.netloc)
+ return 0
+ else:
+ logger.debug("components.netloc='%s' has not been recently used, marking ...", components.netloc)
+ sources.update(components.netloc)
+
logger.info("Fetch FBA-specific RSS args.feed='%s' ...", args.feed)
response = utils.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
logger.debug("rss[]='%s'", type(rss))
for item in rss.items:
- logger.debug("item='%s'", item)
+ logger.debug("item[%s]='%s'", type(item), item)
domain = tidyup.domain(item.link.split("=")[1])
logger.debug("domain='%s' - AFTER!", domain)
if domain == "":
logger.debug("domain is empty - SKIPPED!")
continue
- elif not utils.is_domain_wanted(domain):
- logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
+
+ logger.debug("domain='%s' - BEFORE!", domain)
+ domain = domain.encode("idna").decode("utf-8")
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if not domain_helper.is_wanted(domain):
+ logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
continue
elif domain in domains:
logger.debug("domain='%s' is already added - SKIPPED!", domain)
logger.debug("domains()=%d", len(domains))
if len(domains) > 0:
- locking.acquire()
-
logger.info("Adding %d new instances ...", len(domains))
for domain in domains:
+ logger.debug("domain='%s'", domain)
try:
logger.info("Fetching instances from domain='%s' ...", domain)
federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
def fetch_fbabot_atom(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
- feed = "https://ryona.agency/users/fba/feed.atom"
+
+ logger.debug("Invoking locking.acquire() ...")
+ locking.acquire()
+
+ source_domain = "ryona.agency"
+ feed = f"https://{source_domain}/users/fba/feed.atom"
+
+ logger.debug("args.feed[%s]='%s'", type(args.feed), args.feed)
+ if args.feed is not None and validators.url(args.feed):
+ logger.debug("Setting feed='%s' ...", args.feed)
+ feed = str(args.feed)
+ source_domain = urlparse(args.feed).netloc
+
+ if sources.is_recent(source_domain):
+ logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
+ return 0
+ else:
+ logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
+ sources.update(source_domain)
domains = list()
if domain == "":
logger.debug("domain is empty - SKIPPED!")
continue
- elif not utils.is_domain_wanted(domain):
- logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
+
+ logger.debug("domain='%s' - BEFORE!", domain)
+ domain = domain.encode("idna").decode("utf-8")
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if not domain_helper.is_wanted(domain):
+ logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
continue
elif domain in domains:
logger.debug("domain='%s' is already added - SKIPPED!", domain)
logger.debug("domains()=%d", len(domains))
if len(domains) > 0:
- locking.acquire()
-
logger.info("Adding %d new instances ...", len(domains))
for domain in domains:
+ logger.debug("domain='%s'", domain)
try:
logger.info("Fetching instances from domain='%s' ...", domain)
- federation.fetch_instances(domain, "ryona.agency", None, inspect.currentframe().f_code.co_name)
+ federation.fetch_instances(domain, source_domain, None, inspect.currentframe().f_code.co_name)
except network.exceptions as exception:
logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
instances.set_last_error(domain, exception)
def fetch_instances(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("args.domain='%s' - checking ...", args.domain)
+ if not validators.domain(args.domain):
+ logger.warning("args.domain='%s' is not valid.", args.domain)
+ return 100
+ elif blacklist.is_blacklisted(args.domain):
+ logger.warning("args.domain='%s' is blacklisted, won't check it!", args.domain)
+ return 101
+
+ logger.debug("Invoking locking.acquire() ...")
locking.acquire()
+ # Initialize values
+ domain = tidyup.domain(args.domain)
+ origin = software = None
+
+ # Fetch record
+ database.cursor.execute("SELECT origin, software FROM instances WHERE domain = ? LIMIT 1", [args.domain])
+ row = database.cursor.fetchone()
+ if row is not None:
+ origin = row["origin"]
+ software = row["software"]
+
# Initial fetch
try:
- logger.info("Fetching instances from args.domain='%s' ...", args.domain)
- federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
+ logger.info("Fetching instances from args.domain='%s',origin='%s',software='%s' ...", domain, origin, software)
+ federation.fetch_instances(domain, origin, software, inspect.currentframe().f_code.co_name)
except network.exceptions as exception:
logger.warning("Exception '%s' during fetching instances (fetch_instances) from args.domain='%s'", type(exception), args.domain)
instances.set_last_error(args.domain, exception)
# Loop through some instances
database.cursor.execute(
- "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
+ "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'lemmy', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")]
)
rows = database.cursor.fetchall()
if row["domain"] == "":
logger.debug("row[domain] is empty - SKIPPED!")
continue
- elif not utils.is_domain_wanted(row["domain"]):
- logger.warning("Domain row[domain]='%s' is not wanted - SKIPPED!", row["domain"])
+
+ logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
+ domain = row["domain"].encode("idna").decode("utf-8")
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if not domain_helper.is_wanted(domain):
+ logger.debug("Domain domain='%s' is not wanted - SKIPPED!", domain)
continue
try:
- logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row["domain"], row["origin"], row["software"], row["nodeinfo_url"])
- federation.fetch_instances(row["domain"], row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
+ logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", domain, row["origin"], row["software"], row["nodeinfo_url"])
+ federation.fetch_instances(domain, row["origin"], row["software"], inspect.currentframe().f_code.co_name, row["nodeinfo_url"])
except network.exceptions as exception:
- logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[domain]='%s'", type(exception), row["domain"])
- instances.set_last_error(row["domain"], exception)
+ logger.warning("Exception '%s' during fetching instances (fetch_instances) from domain='%s'", type(exception), domain)
+ instances.set_last_error(domain, exception)
logger.debug("Success - EXIT!")
return 0
def fetch_oliphant(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
locking.acquire()
+ source_domain = "codeberg.org"
+ if sources.is_recent(source_domain):
+ logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
+ return 0
+ else:
+ logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
+ sources.update(source_domain)
+
# Base URL
- base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
-
- # URLs to fetch
- blocklists = (
- {
- "blocker": "artisan.chat",
- "csv_url": "mastodon/artisan.chat.csv",
- },{
- "blocker": "mastodon.art",
- "csv_url": "mastodon/mastodon.art.csv",
- },{
- "blocker": "pleroma.envs.net",
- "csv_url": "mastodon/pleroma.envs.net.csv",
- },{
- "blocker": "oliphant.social",
- "csv_url": "mastodon/_unified_tier3_blocklist.csv",
- },{
- "blocker": "mastodon.online",
- "csv_url": "mastodon/mastodon.online.csv",
- },{
- "blocker": "mastodon.social",
- "csv_url": "mastodon/mastodon.social.csv",
- },{
- "blocker": "mastodon.social",
- "csv_url": "other/missing-tier0-mastodon.social.csv",
- },{
- "blocker": "rage.love",
- "csv_url": "mastodon/rage.love.csv",
- },{
- "blocker": "sunny.garden",
- "csv_url": "mastodon/sunny.garden.csv",
- },{
- "blocker": "solarpunk.moe",
- "csv_url": "mastodon/solarpunk.moe.csv",
- },{
- "blocker": "toot.wales",
- "csv_url": "mastodon/toot.wales.csv",
- },{
- "blocker": "union.place",
- "csv_url": "mastodon/union.place.csv",
- }
- )
+ base_url = f"https://{source_domain}/oliphant/blocklists/raw/branch/main/blocklists"
domains = list()
- logger.debug("Downloading %d files ...", len(blocklists))
- for block in blocklists:
+ logger.debug("Downloading %d files ...", len(blocklists.oliphant_blocklists))
+ for block in blocklists.oliphant_blocklists:
# Is domain given and not equal blocker?
if isinstance(args.domain, str) and args.domain != block["blocker"]:
logger.debug("Skipping blocker='%s', not matching args.domain='%s'", block["blocker"], args.domain)
elif args.domain in domains:
logger.debug("args.domain='%s' already handled - SKIPPED!", args.domain)
continue
- elif instances.is_recent(block["blocker"]):
- logger.debug("block[blocker]='%s' has been recently crawled - SKIPPED!", block["blocker"])
- continue
+
+ instances.set_last_blocked(block["blocker"])
# Fetch this URL
logger.info("Fetching csv_url='%s' for blocker='%s' ...", block["csv_url"], block["blocker"])
response = utils.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
- logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
- if not response.ok or response.status_code > 399 or response.content == "":
+ logger.debug("response.ok='%s',response.status_code=%d,response.content()=%d", response.ok, response.status_code, len(response.content))
+ if not response.ok or response.status_code >= 300 or response.content == "":
logger.warning("Could not fetch csv_url='%s' for blocker='%s' - SKIPPED!", block["csv_url"], block["blocker"])
continue
logger.debug("Fetched %d Bytes, parsing CSV ...", len(response.content))
- reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
+ reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
- logger.debug("reader[]='%s'", type(reader))
blockdict = list()
+
+ cnt = 0
for row in reader:
logger.debug("row[%s]='%s'", type(row), row)
domain = severity = None
reject_media = reject_reports = False
+
if "#domain" in row:
domain = row["#domain"]
elif "domain" in row:
continue
if "#severity" in row:
- severity = row["#severity"]
+ severity = blocks.alias_block_level(row["#severity"])
elif "severity" in row:
- severity = row["severity"]
+ severity = blocks.alias_block_level(row["severity"])
else:
logger.debug("row='%s' does not contain severity column", row)
continue
elif "reject_reports" in row and row["reject_reports"].lower() == "true":
reject_reports = True
+ cnt = cnt + 1
logger.debug("domain='%s',severity='%s',reject_media='%s',reject_reports='%s'", domain, severity, reject_media, reject_reports)
if domain == "":
logger.debug("domain is empty - SKIPPED!")
continue
- elif not utils.is_domain_wanted(domain):
- logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
+ elif domain.endswith(".onion"):
+ logger.debug("domain='%s' is a TOR .onion domain - SKIPPED", domain)
+ continue
+ elif domain.endswith(".arpa"):
+ logger.debug("domain='%s' is a reverse IP address - SKIPPED", domain)
+ continue
+ elif domain.endswith(".tld"):
+ logger.debug("domain='%s' is a fake domain - SKIPPED", domain)
+ continue
+ elif domain.find("*") >= 0 or domain.find("?") >= 0:
+ logger.debug("domain='%s' is obfuscated - Invoking utils.deobfuscate(%s, %s) ...", domain, domain, block["blocker"])
+ domain = utils.deobfuscate(domain, block["blocker"])
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if not validators.domain(domain):
+ logger.debug("domain='%s' is not a valid domain - SKIPPED!")
+ continue
+ elif blacklist.is_blacklisted(domain):
+ logger.warning("domain='%s' is blacklisted - SKIPPED!", domain)
+ continue
+ elif blocks.is_instance_blocked(block["blocker"], domain, severity):
+ logger.debug("block[blocker]='%s' has already blocked domain='%s' with severity='%s' - SKIPPED!", block["blocker"], domain, severity)
continue
logger.debug("Marking domain='%s' as handled", domain)
domains.append(domain)
logger.debug("Processing domain='%s' ...", domain)
- processed = utils.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
+ processed = processing.domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
logger.debug("processed='%s'", processed)
- if utils.process_block(block["blocker"], domain, None, "reject") and config.get("bot_enabled"):
+ if processing.block(block["blocker"], domain, None, severity) and config.get("bot_enabled"):
logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", domain, block["block_level"], block["blocker"])
blockdict.append({
"blocked": domain,
})
if reject_media:
- utils.process_block(block["blocker"], domain, None, "reject_media")
+ processing.block(block["blocker"], domain, None, "reject_media")
if reject_reports:
- utils.process_block(block["blocker"], domain, None, "reject_reports")
+ processing.block(block["blocker"], domain, None, "reject_reports")
+
+ logger.debug("block[blocker]='%s'", block["blocker"])
+ if not blocklists.is_excluded(block["blocker"]):
+ logger.debug("Invoking instances.set_total_blocks(%s, domains()=%d) ...", block["blocker"], len(domains))
+ instances.set_total_blocks(block["blocker"], domains)
+
+ logger.debug("Checking if blocker='%s' has pending updates ...", block["blocker"])
+ if instances.has_pending(block["blocker"]):
+ logger.debug("Flushing updates for block[blocker]='%s' ...", block["blocker"])
+ instances.update_data(block["blocker"])
logger.debug("Invoking commit() ...")
database.connection.commit()
def fetch_txt(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
locking.acquire()
# Static URLs
if domain == "":
logger.debug("domain is empty - SKIPPED!")
continue
- elif not utils.is_domain_wanted(domain):
- logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
+ elif not domain_helper.is_wanted(domain):
+ logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
continue
elif instances.is_recent(domain):
logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
continue
logger.debug("Processing domain='%s',row[blocker]='%s'", domain, row["blocker"])
- processed = utils.process_domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
+ processed = processing.domain(domain, row["blocker"], inspect.currentframe().f_code.co_name)
logger.debug("processed='%s'", processed)
if not processed:
def fetch_fedipact(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
locking.acquire()
- response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
+ source_domain = "fedipact.online"
+ if sources.is_recent(source_domain):
+ logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
+ return 0
+ else:
+ logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
+ sources.update(source_domain)
+
+ logger.info("Fetching / from source_domain='%s' ...", source_domain)
+ response = utils.fetch_url(
+ f"https://{source_domain}",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ )
logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
if response.ok and response.status_code < 300 and response.text != "":
if domain == "":
logger.debug("domain is empty - SKIPPED!")
continue
- elif not utils.is_domain_wanted(domain):
- logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
+
+ logger.debug("domain='%s' - BEFORE!", domain)
+ domain = domain.encode("idna").decode("utf-8")
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if not domain_helper.is_wanted(domain):
+ logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
continue
elif instances.is_registered(domain):
logger.debug("domain='%s' is already registered - SKIPPED!", domain)
continue
logger.info("Fetching domain='%s' ...", domain)
- federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
+ federation.fetch_instances(domain, "beach.city", None, inspect.currentframe().f_code.co_name)
+
+ logger.debug("Success! - EXIT!")
+ return 0
+
+def fetch_joinmobilizon(args: argparse.Namespace) -> int:
+ logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
+ locking.acquire()
+
+ source_domain = "instances.joinmobilizon.org"
+ if sources.is_recent(source_domain):
+ logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
+ return 0
+ else:
+ logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
+ sources.update(source_domain)
+
+ logger.info("Fetching instances from source_domain='%s' ...", source_domain)
+ raw = utils.fetch_url(
+ f"https://{source_domain}/api/v1/instances",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ ).text
+ logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
+
+ parsed = json.loads(raw)
+ logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
+
+ if "data" not in parsed:
+ logger.warning("parsed()=%d does not contain key 'data'")
+ return 1
+
+ logger.info("Checking %d instances ...", len(parsed["data"]))
+ for row in parsed["data"]:
+ logger.debug("row[]='%s'", type(row))
+ if "host" not in row:
+ logger.warning("row='%s' does not contain key 'host' - SKIPPED!", row)
+ continue
+ elif not domain_helper.is_wanted(row["host"]):
+ logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
+ continue
+ elif instances.is_registered(row["host"]):
+ logger.debug("row[host]='%s' is already registered - SKIPPED!", row["host"])
+ continue
+
+ logger.info("Fetching row[host]='%s' ...", row["host"])
+ federation.fetch_instances(row["host"], "demo.mobilizon.org", None, inspect.currentframe().f_code.co_name)
+
+ logger.debug("Success! - EXIT!")
+ return 0
+
+def fetch_joinmisskey(args: argparse.Namespace) -> int:
+ logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
+ locking.acquire()
+
+ source_domain = "instanceapp.misskey.page"
+ if sources.is_recent(source_domain):
+ logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
+ return 0
+ else:
+ logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
+ sources.update(source_domain)
+
+ logger.info("Fetching instances.json from source_domain='%s' ...", source_domain)
+ raw = utils.fetch_url(
+ f"https://{source_domain}/instances.json",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ ).text
+ logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
+
+ parsed = json.loads(raw)
+ logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
+
+ if "instancesInfos" not in parsed:
+ logger.warning("parsed()=%d does not contain element 'instancesInfos'")
+ return 1
+
+ logger.info("Checking %d instane(s) ...", len(parsed["instancesInfos"]))
+ for row in parsed["instancesInfos"]:
+ logger.debug("row[%s]='%s'", type(row), row)
+ if "url" not in row:
+ logger.warning("row()=%d does not have element 'url' - SKIPPED!", len(row))
+ continue
+ elif not domain_helper.is_wanted(row["url"]):
+ logger.debug("row[url]='%s' is not wanted - SKIPPED!", row["url"])
+ continue
+ elif instances.is_registered(row["url"]):
+ logger.debug("row[url]='%s' is already registered - SKIPPED!", row["url"])
+ continue
+
+ logger.info("Fetching row[url]='%s' ...", row["url"])
+ federation.fetch_instances(row["url"], "misskey.io", None, inspect.currentframe().f_code.co_name)
logger.debug("Success! - EXIT!")
return 0
def fetch_joinfediverse(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
locking.acquire()
- raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
+ source_domain = "joinfediverse.wiki"
+ if sources.is_recent(source_domain):
+ logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
+ return 0
+ else:
+ logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
+ sources.update(source_domain)
+
+ logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain)
+ raw = utils.fetch_url(
+ f"https://{source_domain}/FediBlock",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ ).text
logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
doc = bs4.BeautifulSoup(raw, "html.parser")
logger.debug("text[]='%s'", type(text))
if not isinstance(text, str):
- logger.debug("text[]='%s' is not 'str' - SKIPPED!", type(text))
+ logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
continue
elif validators.domain(text.strip()):
logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
continue
text = tidyup.domain(text.strip())
- logger.debug("text='%s'", text)
+ logger.debug("text='%s' - AFTER!", text)
if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
logger.debug("Found header: '%s'=%d", text, cnt)
block_headers[cnt] = text
logger.debug("block='%s'", block)
if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
origin = block["blocked"]
+ logger.debug("origin='%s'", origin)
for subdomain in block["subdomain(s)"]:
block["blocked"] = subdomain + "." + origin
+ logger.debug("block[blocked]='%s'", block["blocked"])
blocking.append(block)
else:
blocking.append(block)
logger.debug("blocking()=%d", blocking)
for block in blocking:
logger.debug("block[]='%s'", type(block))
- block["blocked"] = tidyup.domain(block["blocked"])
+ if "blocked" not in block:
+ raise KeyError(f"block()={len(block)} does not have element 'blocked'")
+ block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
+
if block["blocked"] == "":
logger.debug("block[blocked] is empty - SKIPPED!")
continue
- elif not utils.is_domain_wanted(block["blocked"]):
- logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
+ elif not domain_helper.is_wanted(block["blocked"]):
+ logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
continue
elif instances.is_recent(block["blocked"]):
- logger.debug("blocked='%s' has been recently checked - SKIPPED!", block["blocked"])
+ logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
continue
- logger.info("Proccessing blocked='%s' ...", block["blocked"])
- utils.process_domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
+ logger.debug("Proccessing blocked='%s' ...", block["blocked"])
+ processing.domain(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
blockdict = list()
for blocker in domains:
blocker = blocker[0]
logger.debug("blocker[%s]='%s'", type(blocker), blocker)
+ instances.set_last_blocked(blocker)
for block in blocking:
- logger.debug("block[blocked]='%s',block[reason]='%s' - BEFORE!", block["blocked"], block["reason"])
+ logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
if block["blocked"] == "":
logger.debug("block[blocked] is empty - SKIPPED!")
continue
- elif not utils.is_domain_wanted(block["blocked"]):
- logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
+ elif not domain_helper.is_wanted(block["blocked"]):
+ logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
continue
logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
- if utils.process_block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
+ if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
blockdict.append({
"blocked": block["blocked"],
def recheck_obfuscation(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
+ logger.debug("Invoking locking.acquire() ...")
locking.acquire()
- if isinstance(args.domain, str) and args.domain != "" and utils.is_domain_wanted(args.domain):
+ if isinstance(args.domain, str) and args.domain != "" and domain_helper.is_wanted(args.domain):
database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND domain = ?", [args.domain])
elif isinstance(args.software, str) and args.software != "" and validators.domain(args.software) == args.software:
database.cursor.execute("SELECT domain, software, nodeinfo_url FROM instances WHERE has_obfuscation = 1 AND software = ?", [args.software])
logger.info("Checking %d domains ...", len(rows))
for row in rows:
logger.debug("Fetching peers from domain='%s',software='%s',nodeinfo_url='%s' ...", row["domain"], row["software"], row["nodeinfo_url"])
+ if (args.force is None or not args.force) and args.domain is None and args.software is None and instances.is_recent(row["domain"], "last_blocked"):
+ logger.debug("row[domain]='%s' has been recently checked, args.force[]='%s' - SKIPPED!", row["domain"], type(args.force))
+ continue
blocking = list()
if row["software"] == "pleroma":
logger.debug("domain='%s',software='%s'", row["domain"], row["software"])
blocking = misskey.fetch_blocks(row["domain"])
else:
- logger.warning("Unknown sofware: domain='%s',software='%s'", row["domain"], row["software"])
+ logger.warning("Unknown software: domain='%s',software='%s'", row["domain"], row["software"])
+
+ logger.debug("row[domain]='%s'", row["domain"])
+
+ # chaos.social requires special care ...
+ if row["domain"] != "chaos.social" and not blocklists.is_excluded(row["domain"]):
+ logger.debug("Invoking instances.set_total_blocks(%s, %d) ...", row["domain"], len(blocking))
+ instances.set_last_blocked(row["domain"])
+ instances.set_total_blocks(row["domain"], blocking)
- logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
obfuscated = 0
blockdict = list()
+
+ logger.info("Checking %d block(s) from domain='%s' ...", len(blocking), row["domain"])
for block in blocking:
logger.debug("block[blocked]='%s'", block["blocked"])
blocked = None
elif block["blocked"].find("*") >= 0 or block["blocked"].find("?") >= 0:
logger.debug("block='%s' is obfuscated.", block["blocked"])
obfuscated = obfuscated + 1
- blocked = utils.deobfuscate_domain(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
- elif not utils.is_domain_wanted(block["blocked"]):
- logger.warning("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
+ blocked = utils.deobfuscate(block["blocked"], row["domain"], block["hash"] if "hash" in block else None)
+ elif not domain_helper.is_wanted(block["blocked"]):
+ logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
continue
elif blocks.is_instance_blocked(row["domain"], block["blocked"]):
logger.debug("blocked='%s' is already blocked - SKIPPED!", block["blocked"])
if blocked is not None and blocked != block["blocked"]:
logger.debug("blocked='%s' was deobfuscated to blocked='%s'", block["blocked"], blocked)
obfuscated = obfuscated - 1
+
if blocks.is_instance_blocked(row["domain"], blocked):
logger.debug("blocked='%s' is already blocked by domain='%s' - SKIPPED!", blocked, row["domain"])
continue
+ elif blacklist.is_blacklisted(blocked):
+ logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
+ continue
- block["block_level"] = utils.alias_block_level(block["block_level"])
+ block["block_level"] = blocks.alias_block_level(block["block_level"])
logger.info("blocked='%s' has been deobfuscated to blocked='%s', adding ...", block["blocked"], blocked)
- if utils.process_block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
+ if processing.block(row["domain"], blocked, block["reason"], block["block_level"]) and block["block_level"] == "reject" and config.get("bot_enabled"):
logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], row["domain"])
blockdict.append({
"blocked": blocked,
"reason" : block["reason"],
})
+ logger.debug("Settings obfuscated=%d for row[domain]='%s' ...", obfuscated, row["domain"])
+ instances.set_obfuscated_blocks(row["domain"], obfuscated)
+
logger.info("domain='%s' has %d obfuscated domain(s)", row["domain"], obfuscated)
if obfuscated == 0 and len(blocking) > 0:
logger.info("Block list from domain='%s' has been fully deobfuscated.", row["domain"])
logger.debug("Success! - EXIT!")
return 0
+
+def fetch_fedilist(args: argparse.Namespace) -> int:
+ logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
+ locking.acquire()
+
+ source_domain = "demo.fedilist.com"
+ if sources.is_recent(source_domain):
+ logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
+ return 0
+ else:
+ logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
+ sources.update(source_domain)
+
+ url = f"http://{source_domain}/instance/csv?onion=not"
+ if args.software is not None and args.software != "":
+ logger.debug("args.software='%s'", args.software)
+ url = f"http://{source_domain}/instance/csv?software={args.software}&onion=not"
+
+ logger.info("Fetching url='%s' ...", url)
+ response = reqto.get(
+ url,
+ headers=network.web_headers,
+ timeout=(config.get("connection_timeout"), config.get("read_timeout")),
+ allow_redirects=False
+ )
+
+ logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
+ if not response.ok or response.status_code >= 300 or len(response.content) == 0:
+ logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
+ return 1
+
+ reader = csv.DictReader(response.content.decode("utf-8").splitlines(), dialect="unix")
+
+ logger.debug("reader[]='%s'", type(reader))
+ if reader is None:
+ logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
+ return 2
+
+ rows = list(reader)
+
+ logger.info("Checking %d rows ...", len(rows))
+ for row in rows:
+ logger.debug("row[]='%s'", type(row))
+ if "hostname" not in row:
+ logger.warning("row()=%d has no element 'hostname' - SKIPPED!", len(row))
+ continue
+
+ logger.debug("row[hostname]='%s' - BEFORE!", row["hostname"])
+ domain = tidyup.domain(row["hostname"])
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if domain == "":
+ logger.debug("domain is empty after tidyup: row[hostname]='%s' - SKIPPED!", row["hostname"])
+ continue
+
+ logger.debug("domain='%s' - BEFORE!", domain)
+ domain = domain.encode("idna").decode("utf-8")
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if not domain_helper.is_wanted(domain):
+ logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
+ continue
+ elif (args.force is None or not args.force) and instances.is_registered(domain):
+ logger.debug("domain='%s' is already registered, --force not specified: args.force[]='%s'", domain, type(args.force))
+ continue
+ elif instances.is_recent(domain):
+ logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
+ continue
+
+ logger.info("Fetching instances from domain='%s' ...", domain)
+ federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
+
+ logger.debug("Success! - EXIT!")
+ return 0
+
+def update_nodeinfo(args: argparse.Namespace) -> int:
+ logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
+ locking.acquire()
+
+ if args.domain is not None and args.domain != "":
+ logger.debug("Fetching args.domain='%s'", args.domain)
+ database.cursor.execute("SELECT domain, software FROM instances WHERE domain = ?", [args.domain])
+ elif args.software is not None and args.software != "":
+ logger.info("Fetching domains for args.software='%s'", args.software)
+ database.cursor.execute("SELECT domain, software FROM instances WHERE software = ?", [args.software])
+ elif args.no_software:
+ logger.info("Fetching domains with no software type detected ...")
+ database.cursor.execute("SELECT domain, software FROM instances WHERE software IS NULL AND (last_nodeinfo < ? OR last_nodeinfo IS NULL)", [time.time() - config.get("recheck_nodeinfo")])
+ else:
+ logger.info("Fetching domains for recently updated ...")
+ database.cursor.execute("SELECT domain, software FROM instances WHERE last_nodeinfo < ? OR last_nodeinfo IS NULL", [time.time() - config.get("recheck_nodeinfo")])
+
+ domains = database.cursor.fetchall()
+
+ logger.info("Checking %d domain(s) ...", len(domains))
+ cnt = 0
+ for row in domains:
+ logger.debug("row[]='%s'", type(row))
+ if not args.force and instances.is_recent(row["domain"], "last_nodeinfo"):
+ logger.debug("row[domain]='%s' has been recently checked - SKIPPED!", row["domain"])
+ continue
+
+ try:
+ logger.info("Checking nodeinfo for row[domain]='%s',row[software]='%s' (%s%%) ...", row["domain"], row["software"], "{:5.1f}".format(cnt / len(domains) * 100))
+ software = federation.determine_software(row["domain"])
+
+ logger.debug("Determined software='%s'", software)
+ if (software != row["software"] and software is not None) or args.force is True:
+ logger.warning("Software type for row[domain]='%s' has changed from '%s' to '%s'!", row["domain"], row["software"], software)
+ instances.set_software(row["domain"], software)
+
+ if software is not None:
+ logger.debug("Setting row[domain]='%s' as successfully determined ...", row["domain"])
+ instances.set_success(row["domain"])
+ except network.exceptions as exception:
+ logger.warning("Exception '%s' during updating nodeinfo for row[domain]='%s'", type(exception), row["domain"])
+ instances.set_last_error(row["domain"], exception)
+
+ instances.set_last_nodeinfo(row["domain"])
+ instances.update_data(row["domain"])
+ cnt = cnt + 1
+
+ logger.debug("Success! - EXIT!")
+ return 0
+
+def fetch_instances_social(args: argparse.Namespace) -> int:
+ logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
+ locking.acquire()
+
+ source_domain = "instances.social"
+
+ if config.get("instances_social_api_key") == "":
+ logger.error("API key not set. Please set in your config.json file.")
+ return 1
+ elif sources.is_recent(source_domain):
+ logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
+ return 0
+ else:
+ logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
+ sources.update(source_domain)
+
+ headers = {
+ "Authorization": f"Bearer {config.get('instances_social_api_key')}",
+ }
+
+ logger.info("Fetching list from source_domain='%s' ...", source_domain)
+ fetched = network.get_json_api(
+ source_domain,
+ "/api/1.0/instances/list?count=0&sort_by=name",
+ headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ )
+ logger.debug("fetched[]='%s'", type(fetched))
+
+ if "error_message" in fetched:
+ logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
+ return 2
+ elif "exception" in fetched:
+ logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
+ return 3
+ elif "json" not in fetched:
+ logger.warning("fetched has no element 'json' - EXIT!")
+ return 4
+ elif "instances" not in fetched["json"]:
+ logger.warning("fetched[row] has no element 'instances' - EXIT!")
+ return 5
+
+ domains = list()
+ rows = fetched["json"]["instances"]
+
+ logger.info("Checking %d row(s) ...", len(rows))
+ for row in rows:
+ logger.debug("row[]='%s'", type(row))
+ domain = tidyup.domain(row["name"])
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if domain == "":
+ logger.debug("domain is empty - SKIPPED!")
+ continue
+
+ logger.debug("domain='%s' - BEFORE!", domain)
+ domain = domain.encode("idna").decode("utf-8")
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if not domain_helper.is_wanted(domain):
+ logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
+ continue
+ elif domain in domains:
+ logger.debug("domain='%s' is already added - SKIPPED!", domain)
+ continue
+ elif instances.is_registered(domain):
+ logger.debug("domain='%s' is already registered - SKIPPED!", domain)
+ continue
+ elif instances.is_recent(domain):
+ logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
+ continue
+
+ logger.info("Fetching instances from domain='%s'", domain)
+ federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
+
+ logger.debug("Success! - EXIT!")
+ return 0
+
+def fetch_relays(args: argparse.Namespace) -> int:
+ logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
+ locking.acquire()
+
+ if args.domain is not None and args.domain != "":
+ database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay') AND domain = ? LIMIT 1", [args.domain])
+ else:
+ database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay')")
+
+ domains = list()
+ rows = database.cursor.fetchall()
+
+ logger.info("Checking %d relays ...", len(rows))
+ for row in rows:
+ logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
+ peers = list()
+ if not args.force and instances.is_recent(row["domain"]):
+ logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
+ continue
+
+ try:
+ logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
+ raw = utils.fetch_url(
+ f"https://{row['domain']}",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ ).text
+ logger.debug("raw[%s]()=%d", type(raw), len(raw))
+ except network.exceptions as exception:
+ logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
+ instances.set_last_error(row["domain"], exception)
+ instances.set_last_instance_fetch(row["domain"])
+ instances.update_data(row["domain"])
+ continue
+
+ doc = bs4.BeautifulSoup(raw, features="html.parser")
+ logger.debug("doc[]='%s'", type(doc))
+
+ logger.debug("row[software]='%s'", row["software"])
+ if row["software"] == "activityrelay":
+ logger.debug("Checking row[domain]='%s' ...", row["domain"])
+ tags = doc.findAll("p")
+
+ logger.debug("Checking %d paragraphs ...", len(tags))
+ for tag in tags:
+ logger.debug("tag[]='%s'", type(tag))
+ if len(tag.contents) == 0:
+ logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
+ continue
+ elif "registered instances" not in tag.contents[0]:
+ logger.debug("Skipping paragraph, text not found.")
+ continue
+
+ logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
+ for domain in tag.contents:
+ logger.debug("domain[%s]='%s'", type(domain), domain)
+ if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
+ continue
+
+ domain = str(domain)
+ logger.debug("domain='%s'", domain)
+ if not domain_helper.is_wanted(domain):
+ logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
+ continue
+
+ logger.debug("domain='%s' - BEFORE!", domain)
+ domain = tidyup.domain(domain)
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if domain == "":
+ logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
+ continue
+ elif domain not in peers:
+ logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
+ peers.append(domain)
+
+ if dict_helper.has_key(domains, "domain", domain):
+ logger.debug("domain='%s' already added", domain)
+ continue
+
+ logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
+ domains.append({
+ "domain": domain,
+ "origin": row["domain"],
+ })
+ elif row["software"] in ["aoderelay", "selective-relay"]:
+ logger.debug("Checking row[domain]='%s' ...", row["domain"])
+ if row["software"] == "aoderelay":
+ tags = doc.findAll("section", {"class": "instance"})
+ else:
+ tags = doc.find("div", {"id": "instances"}).findAll("li")
+
+ logger.debug("Checking %d tags ...", len(tags))
+ for tag in tags:
+ logger.debug("tag[]='%s'", type(tag))
+
+ link = tag.find("a")
+ logger.debug("link[%s]='%s'", type(link), link)
+ if link is None:
+ logger.warning("tag='%s' has no a-tag ...", tag)
+ continue
+
+ components = urlparse(link["href"])
+ domain = components.netloc.lower()
+
+ if not domain_helper.is_wanted(domain):
+ logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
+ continue
+
+ logger.debug("domain='%s' - BEFORE!", domain)
+ domain = tidyup.domain(domain)
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if domain == "":
+ logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
+ continue
+ elif domain not in peers:
+ logger.debug("Appending domain='%s' to peers list for relay='%s' ...", domain, row["domain"])
+ peers.append(domain)
+
+ if dict_helper.has_key(domains, "domain", domain):
+ logger.debug("domain='%s' already added", domain)
+ continue
+
+ logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
+ domains.append({
+ "domain": domain,
+ "origin": row["domain"],
+ })
+ else:
+ logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
+
+ logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
+ instances.set_last_instance_fetch(row["domain"])
+
+ logger.info("Relay '%s' has %d peer(s) registered.", row["domain"], len(peers))
+ instances.set_total_peers(row["domain"], peers)
+
+ logger.debug("Flushing data for row[domain]='%s'", row["domain"])
+ instances.update_data(row["domain"])
+
+ logger.info("Checking %d domains ...", len(domains))
+ for row in domains:
+ logger.debug("row[domain]='%s',row[origin]='%s'", row["domain"], row["origin"])
+ if instances.is_registered(row["domain"]):
+ logger.debug("row[domain]='%s' is already registered - SKIPPED!", row["domain"])
+ continue
+
+ logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
+ federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
+
+ logger.debug("Success! - EXIT!")
+ return 0
+
+def convert_idna(args: argparse.Namespace) -> int:
+ logger.debug("args[]='%s' - CALLED!", type(args))
+
+ database.cursor.execute("SELECT domain FROM instances WHERE domain NOT LIKE '%xn--%' ORDER BY domain ASC")
+ rows = database.cursor.fetchall()
+
+ logger.debug("rows[]='%s'", type(rows))
+ instances.translate_idnas(rows, "domain")
+
+ database.cursor.execute("SELECT origin FROM instances WHERE origin NOT LIKE '%xn--%' ORDER BY origin ASC")
+ rows = database.cursor.fetchall()
+
+ logger.debug("rows[]='%s'", type(rows))
+ instances.translate_idnas(rows, "origin")
+
+ database.cursor.execute("SELECT blocker FROM blocks WHERE blocker NOT LIKE '%xn--%' ORDER BY blocker ASC")
+ rows = database.cursor.fetchall()
+
+ logger.debug("rows[]='%s'", type(rows))
+ blocks.translate_idnas(rows, "blocker")
+
+ database.cursor.execute("SELECT blocked FROM blocks WHERE blocked NOT LIKE '%xn--%' ORDER BY blocked ASC")
+ rows = database.cursor.fetchall()
+
+ logger.debug("rows[]='%s'", type(rows))
+ blocks.translate_idnas(rows, "blocked")
+
+ logger.debug("Success! - EXIT!")
+ return 0
+
+def remove_invalid(args: argparse.Namespace) -> int:
+ logger.debug("args[]='%s' - CALLED!", type(args))
+
+ logger.debug("Invoking locking.acquire() ...")
+ locking.acquire()
+
+ database.cursor.execute("SELECT domain FROM instances ORDER BY domain ASC")
+ rows = database.cursor.fetchall()
+
+ logger.info("Checking %d domains ...", len(rows))
+ for row in rows:
+ logger.debug("row[domain]='%s'", row["domain"])
+ if not validators.domain(row["domain"].split("/")[0]):
+ logger.info("Invalid row[domain]='%s' found, removing ...", row["domain"])
+ database.cursor.execute("DELETE FROM blocks WHERE blocker = ? OR blocked = ?", [row["domain"], row["domain"]])
+ database.cursor.execute("DELETE FROM instances WHERE domain = ? LIMIT 1", [row["domain"]])
+
+ logger.debug("Invoking commit() ...")
+ database.connection.commit()
+
+ logger.info("Vaccum cleaning database ...")
+ database.cursor.execute("VACUUM")
+
+ logger.debug("Success! - EXIT!")
+ return 0