from fba.http import federation
from fba.http import network
+from fba.models import apis
from fba.models import blocks
from fba.models import instances
# No CSRF by default, you don't have to add network.api_headers by yourself here
headers = tuple()
- domain = "pixelfed.org"
+ api_domain = "pixelfed.org"
+
+ if apis.is_recent(api_domain):
+ logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+ return 0
+ else:
+ logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+ apis.update(api_domain)
try:
- logger.debug("Checking CSRF from domain='%s' ...", domain)
- headers = csrf.determine(domain, dict())
+ logger.debug("Checking CSRF from api_domain='%s' ...", api_domain)
+ headers = csrf.determine(api_domain, dict())
except network.exceptions as exception:
logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
return list()
try:
logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
fetched = network.get_json_api(
- domain,
+ api_domain,
"/api/v1/servers/all.json?scope=All&country=all&language=all",
headers,
(config.get("connection_timeout"), config.get("read_timeout"))
def fetch_bkali(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
+
+ api_domain = "gql.apis.bka.li"
+ if apis.is_recent(api_domain):
+ logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+ return 0
+ else:
+ logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+ apis.update(api_domain)
+
domains = list()
try:
- fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
- "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
- }))
+ logger.info("Fetching domainlist from api_domain='%s' ...", api_domain)
+ fetched = network.post_json_api(
+ api_domain,
+ "/v1/graphql",
+ json.dumps({
+ "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
+ })
+ )
logger.debug("fetched[]='%s'", type(fetched))
if "error_message" in fetched:
- logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched["error_message"])
+ logger.warning("post_json_api() for 'gql.apis.bka.li' returned error message='%s", fetched["error_message"])
return 100
elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
def fetch_observer(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
+ api_domain = "fediverse.observer"
+ if apis.is_recent(api_domain):
+ logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+ return 0
+ else:
+ logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+ apis.update(api_domain)
+
# Acquire lock
locking.acquire()
if args.software is None:
logger.info("Fetching software list ...")
raw = utils.fetch_url(
- "https://fediverse.observer",
+ f"https://{api_domain}",
network.web_headers,
(config.get("connection_timeout"), config.get("read_timeout"))
).text
try:
logger.debug("Fetching table data for software='%s' ...", software)
raw = utils.fetch_url(
- f"https://fediverse.observer/app/views/tabledata.php?software={software}",
+ f"https://{api_domain}/app/views/tabledata.php?software={software}",
network.web_headers,
(config.get("connection_timeout"), config.get("read_timeout"))
).text
doc = bs4.BeautifulSoup(raw, features="html.parser")
logger.debug("doc[]='%s'", type(doc))
except network.exceptions as exception:
- logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
+ logger.warning("Cannot fetch software='%s' from api_domain='%s': '%s'", software, api_domain, type(exception))
continue
items = doc.findAll("a", {"class": "url"})
def fetch_todon_wiki(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
+ api_domain = "wiki.todon.eu"
+ if apis.is_recent(api_domain):
+ logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+ return 0
+ else:
+ logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+ apis.update(api_domain)
+
locking.acquire()
+
blocklist = {
"silenced": list(),
"reject": list(),
}
- raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
+ raw = utils.fetch_url(f"https://{api_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
doc = bs4.BeautifulSoup(raw, "html.parser")
"reject" : list(),
}
- raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
+ api_domain = "raw.githubusercontent.com"
+ if apis.is_recent(api_domain):
+ logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+ return 0
+ else:
+ logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+ apis.update(api_domain)
+
+ raw = utils.fetch_url(f"https://{api_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
def fetch_fbabot_atom(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
- feed = "https://ryona.agency/users/fba/feed.atom"
+
+ api_domain = "ryana.agency"
+ if apis.is_recent(api_domain):
+ logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+ return 0
+ else:
+ logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+ apis.update(api_domain)
+
+ feed = f"https://{api_domain}/users/fba/feed.atom"
domains = list()
logger.debug("domain='%s'", domain)
try:
logger.info("Fetching instances from domain='%s' ...", domain)
- federation.fetch_instances(domain, "ryona.agency", None, inspect.currentframe().f_code.co_name)
+ federation.fetch_instances(domain, api_domain, None, inspect.currentframe().f_code.co_name)
except network.exceptions as exception:
logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
instances.set_last_error(domain, exception)
def fetch_instances(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
+
locking.acquire()
# Initial fetch
def fetch_oliphant(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
+
+ api_domain = "codeberg.org"
+ if apis.is_recent(api_domain):
+ logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+ return 0
+ else:
+ logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+ apis.update(api_domain)
+
locking.acquire()
# Base URL
- base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
+ base_url = f"https://{api_domain}/oliphant/blocklists/raw/branch/main/blocklists"
# URLs to fetch
blocklists = (
def fetch_txt(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
+
locking.acquire()
# Static URLs
def fetch_fedipact(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
+
+ api_domain = "fedipact.online"
+ if apis.is_recent(api_domain):
+ logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+ return 0
+ else:
+ logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+ apis.update(api_domain)
+
locking.acquire()
- response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
+ response = utils.fetch_url(
+ f"https://{api_domain}",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ )
logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
if response.ok and response.status_code < 300 and response.text != "":
def fetch_joinfediverse(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
+
+ api_domain = "joinfediverse.wiki"
+ if apis.is_recent(api_domain):
+ logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+ return 0
+ else:
+ logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+ apis.update(api_domain)
+
locking.acquire()
- raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
+ raw = utils.fetch_url(
+ f"https://{api_domain}/FediBlock",
+ network.web_headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ ).text
logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
doc = bs4.BeautifulSoup(raw, "html.parser")
def fetch_fedilist(args: argparse.Namespace) -> int:
logger.debug("args[]='%s' - CALLED!", type(args))
- url = "http://demo.fedilist.com/instance/csv?onion=not"
+ api_domain = "demo.fedilist.com"
+ if apis.is_recent(api_domain):
+ logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+ return 0
+ else:
+ logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+ apis.update(api_domain)
+
+ url = f"http://{api_domain}/instance/csv?onion=not"
if args.software is not None and args.software != "":
logger.debug("args.software='%s'", args.software)
- url = f"http://demo.fedilist.com/instance/csv?software={args.software}&onion=not"
+ url = f"http://{api_domain}/instance/csv?software={args.software}&onion=not"
locking.acquire()
logger.debug("Success! - EXIT!")
return 0
+
+def fetch_instances_social(args: argparse.Namespace) -> int:
+ logger.debug("args[]='%s' - CALLED!", type(args))
+
+ api_domain = "instances.social"
+
+ if config.get("instances_social_api_key") == "":
+ logger.error("API key not set. Please set in your config.json file.")
+ return 1
+ elif apis.is_recent(api_domain):
+ logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+ return 0
+ else:
+ logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+ apis.update(api_domain)
+
+ locking.acquire()
+ headers = {
+ "Authorization": f"Bearer {config.get('instances_social_api_key')}",
+ }
+
+ fetched = network.get_json_api(
+ api_domain,
+ "/api/1.0/instances/list?count=0&sort_by=name",
+ headers,
+ (config.get("connection_timeout"), config.get("read_timeout"))
+ )
+ logger.debug("fetched[]='%s'", type(fetched))
+
+ if "error_message" in fetched:
+ logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
+ return 2
+ elif "exception" in fetched:
+ logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
+ return 3
+ elif "json" not in fetched:
+ logger.warning("fetched has no element 'json' - EXIT!")
+ return 4
+ elif "instances" not in fetched["json"]:
+ logger.warning("fetched[row] has no element 'instances' - EXIT!")
+ return 5
+
+ domains = list()
+ rows = fetched["json"]["instances"]
+
+ logger.info("Checking %d row(s) ...", len(rows))
+ for row in rows:
+ logger.debug("row[]='%s'", type(row))
+ domain = tidyup.domain(row["name"])
+
+ logger.debug("domain='%s' - AFTER!", domain)
+ if domain == "":
+ logger.debug("domain is empty - SKIPPED!")
+ continue
+ elif not utils.is_domain_wanted(domain):
+ logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
+ continue
+ elif domain in domains:
+ logger.debug("domain='%s' is already added - SKIPPED!", domain)
+ continue
+ elif instances.is_registered(domain):
+ logger.debug("domain='%s' is already registered - SKIPPED!", domain)
+ continue
+ elif instances.is_recent(domain):
+ logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
+ continue
+
+ logger.info("Fetching instances from domain='%s'", domain)
+ federation.fetch_instances(domain, api_domain, None, inspect.currentframe().f_code.co_name)
+
+ logger.debug("Success! - EXIT!")
+ return 0