from fba.helpers import blacklist
from fba.helpers import config
from fba.helpers import cookies
+from fba.helpers import dicts as dict_helper
from fba.helpers import locking
from fba.helpers import processing
from fba.helpers import software as software_helper
return list()
try:
- logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
+ logger.info("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
fetched = network.get_json_api(
source_domain,
"/api/v1/servers/all.json?scope=All&country=all&language=all",
"reject": list(),
}
- raw = utils.fetch_url(f"https://{source_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
+ logger.debug("Fetching domainblocks from source_domain='%s'", source_domain)
+ raw = utils.fetch_url(
+ f"https://{source_domain}/todon/domainblocks",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ ).text
logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
doc = bs4.BeautifulSoup(raw, "html.parser")
logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
sources.update(source_domain)
- raw = utils.fetch_url(f"https://{source_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
+ logger.info("Fetching federation.md from source_domain='%s' ...", source_domain)
+ raw = utils.fetch_url(
+ f"https://{source_domain}/chaossocial/meta/master/federation.md",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ ).text
logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
"Authorization": f"Bearer {config.get('instances_social_api_key')}",
}
+ logger.info("Fetching list from source_domain='%s' ...", source_domain)
fetched = network.get_json_api(
source_domain,
"/api/1.0/instances/list?count=0&sort_by=name",
def fetch_relays(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
- database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay')")
+ if args.domain is not None and args.domain != "":
+ database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay') AND domain = ? LIMIT 1", [args.domain])
+ else:
+ database.cursor.execute("SELECT domain, software FROM instances WHERE software IN ('activityrelay', 'aoderelay', 'selective-relay')")
domains = list()
-
rows = database.cursor.fetchall()
+
logger.info("Checking %d relays ...", len(rows))
for row in rows:
- logger.debug("Fetching peers from row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
+ logger.debug("row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
+ if not args.force and instances.is_recent(row["domain"]):
+ logger.debug("row[domain]='%s' has been recently fetched - SKIPPED!", row["domain"])
+ continue
+
+ try:
+ logger.info("Fetching / from relay row[domain]='%s',row[software]='%s' ...", row["domain"], row["software"])
+ raw = utils.fetch_url(
+ f"https://{row['domain']}",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ ).text
+ logger.debug("raw[%s]()=%d", type(raw), len(raw))
+ except network.exceptions as exception:
+ logger.warning("Exception '%s' during fetching from relay '%s': '%s'", type(exception), row["domain"], str(exception))
+ instances.set_last_error(row["domain"], exception)
+ instances.set_last_instance_fetch(row["domain"])
+ instances.update_data(row["domain"])
+ continue
+
+ doc = bs4.BeautifulSoup(raw, features="html.parser")
+ logger.debug("doc[]='%s'", type(doc))
+
+ logger.debug("row[software]='%s'", row["software"])
+ if row["software"] == "activityrelay":
+ logger.debug("Checking row[domain]='%s' ...", row["domain"])
+ tags = doc.findAll("p")
+
+ logger.debug("Checking %d paragraphs ...", len(tags))
+ for tag in tags:
+ logger.debug("tag[]='%s'", type(tag))
+ if len(tag.contents) == 0:
+ logger.debug("tag='%s' is an empty tag - SKIPPED!", tag)
+ continue
+ elif "registered instances" not in tag.contents[0]:
+ logger.debug("Skipping paragraph, text not found.")
+ continue
+
+ logger.debug("Found tag.contents[0][]='%s'", tag.contents[0])
+ for domain in tag.contents:
+ logger.debug("domain[%s]='%s'", type(domain), domain)
+ if not isinstance(domain, bs4.element.NavigableString) or "registered instances" in domain:
+ continue
+
+ domain = str(domain)
+ if not utils.is_domain_wanted(domain):
+ logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
+ continue
+
+ logger.debug("domain='%s' - BEFORE!", domain)
+ domain = tidyup.domain(domain)
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if domain == "":
+ logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
+ continue
+ elif instances.is_registered(domain):
+ logger.debug("domain='%s' is already registered - SKIPPED!", domain)
+ continue
+ elif dict_helper.has_key(domains, "domain", domain):
+ logger.debug("domain='%s' already added", domain)
+ continue
+
+ logger.debug("Appending domain='%s',origin='%s',software='%s' ...", domain, row["domain"], row["software"])
+ domains.append({
+ "domain": domain,
+ "origin": row["domain"],
+ })
+ elif row["software"] in ["aoderelay", "selective-relay"]:
+ logger.debug("Checking row[domain]='%s' ...", row["domain"])
+ if row["software"] == "aoderelay":
+ tags = doc.findAll("section", {"class": "instance"})
+ else:
+ tags = doc.find("div", {"id": "instances"}).findAll("li")
+
+ logger.debug("Checking %d tags ...", len(tags))
+ for tag in tags:
+ logger.debug("tag[]='%s'", type(tag))
+
+ link = tag.find("a")
+ logger.debug("link[%s]='%s'", type(link), link)
+ if link is None:
+ logger.warning("tag='%s' has no a-tag ...", tag)
+ continue
+
+ components = urlparse(link["href"])
+ domain = components.netloc.lower()
+
+ if not utils.is_domain_wanted(domain):
+ logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
+ continue
+
+ logger.debug("domain='%s' - BEFORE!", domain)
+ domain = tidyup.domain(domain)
+ logger.debug("domain='%s' - AFTER!", domain)
+
+ if domain == "":
+ logger.debug("Empty domain after tidyup.domain() from origin='%s' - SKIPPED!", row["domain"])
+ continue
+ elif instances.is_registered(domain):
+ logger.debug("domain='%s' is already registered - SKIPPED!", domain)
+ continue
+ elif dict_helper.has_key(domains, "domain", domain):
+ logger.debug("domain='%s' already added", domain)
+ continue
+
+ logger.debug("Appending domain='%s',origin='%s',software='%s'", domain, row["domain"], row["software"])
+ domains.append({
+ "domain": domain,
+ "origin": row["domain"],
+ })
+ else:
+ logger.warning("row[domain]='%s',row[software]='%s' is not supported", row["domain"], row["software"])
+
+ logger.debug("Updating last_instance_fetch for row[domain]='%s' ...", row["domain"])
+ instances.set_last_instance_fetch(row["domain"])
+ instances.update_data(row["domain"])
+
+ logger.info("Found %d domains to add ...", len(domains))
+ for row in domains:
+ logger.info("Fetching row[domain]='%s',row[origin]='%s' ...", row["domain"], row["origin"])
+ federation.fetch_instances(row["domain"], row["origin"], None, inspect.currentframe().f_code.co_name)
logger.debug("Success! - EXIT!")
return 0