]> git.mxchange.org Git - fba.git/blobdiff - fba/commands.py
Continued:
[fba.git] / fba / commands.py
index 05970a11fc56e2c149e7fac6a1e9e5c1e75d60f3..cbfc97ad35904ba553007c96eb383a965b479b73 100644 (file)
@@ -41,6 +41,7 @@ from fba.helpers import tidyup
 from fba.http import federation
 from fba.http import network
 
+from fba.models import apis
 from fba.models import blocks
 from fba.models import instances
 
@@ -100,11 +101,18 @@ def fetch_pixelfed_api(args: argparse.Namespace) -> int:
 
     # No CSRF by default, you don't have to add network.api_headers by yourself here
     headers = tuple()
-    domain = "pixelfed.org"
+    api_domain = "pixelfed.org"
+
+    if apis.is_recent(api_domain):
+        logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+        return 0
+    else:
+        logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+        apis.update(api_domain)
 
     try:
-        logger.debug("Checking CSRF from domain='%s' ...", domain)
-        headers = csrf.determine(domain, dict())
+        logger.debug("Checking CSRF from api_domain='%s' ...", api_domain)
+        headers = csrf.determine(api_domain, dict())
     except network.exceptions as exception:
         logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
         return list()
@@ -112,7 +120,7 @@ def fetch_pixelfed_api(args: argparse.Namespace) -> int:
     try:
         logger.debug("Fetching JSON from pixelfed.org API, headers()=%d ...", len(headers))
         fetched = network.get_json_api(
-            domain,
+            api_domain,
             "/api/v1/servers/all.json?scope=All&country=all&language=all",
             headers,
             (config.get("connection_timeout"), config.get("read_timeout"))
@@ -158,15 +166,29 @@ def fetch_pixelfed_api(args: argparse.Namespace) -> int:
 
 def fetch_bkali(args: argparse.Namespace) -> int:
     logger.debug("args[]='%s' - CALLED!", type(args))
+
+    api_domain = "gql.apis.bka.li"
+    if apis.is_recent(api_domain):
+        logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+        return 0
+    else:
+        logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+        apis.update(api_domain)
+
     domains = list()
     try:
-        fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
-            "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
-        }))
+        logger.info("Fetching domainlist from api_domain='%s' ...", api_domain)
+        fetched = network.post_json_api(
+            api_domain,
+            "/v1/graphql",
+            json.dumps({
+                "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
+            })
+        )
 
         logger.debug("fetched[]='%s'", type(fetched))
         if "error_message" in fetched:
-            logger.warning("post_json_api() for 'gql.api.bka.li' returned error message='%s", fetched["error_message"])
+            logger.warning("post_json_api() for 'gql.apis.bka.li' returned error message='%s", fetched["error_message"])
             return 100
         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
             logger.warning("post_json_api() returned error: '%s", fetched["error"]["message"])
@@ -407,6 +429,14 @@ def fetch_blocks(args: argparse.Namespace) -> int:
 def fetch_observer(args: argparse.Namespace) -> int:
     logger.debug("args[]='%s' - CALLED!", type(args))
 
+    api_domain = "fediverse.observer"
+    if apis.is_recent(api_domain):
+        logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+        return 0
+    else:
+        logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+        apis.update(api_domain)
+
     # Acquire lock
     locking.acquire()
 
@@ -414,7 +444,7 @@ def fetch_observer(args: argparse.Namespace) -> int:
     if args.software is None:
         logger.info("Fetching software list ...")
         raw = utils.fetch_url(
-            "https://fediverse.observer",
+            f"https://{api_domain}",
             network.web_headers,
             (config.get("connection_timeout"), config.get("read_timeout"))
         ).text
@@ -450,7 +480,7 @@ def fetch_observer(args: argparse.Namespace) -> int:
         try:
             logger.debug("Fetching table data for software='%s' ...", software)
             raw = utils.fetch_url(
-                f"https://fediverse.observer/app/views/tabledata.php?software={software}",
+                f"https://{api_domain}/app/views/tabledata.php?software={software}",
                 network.web_headers,
                 (config.get("connection_timeout"), config.get("read_timeout"))
             ).text
@@ -459,7 +489,7 @@ def fetch_observer(args: argparse.Namespace) -> int:
             doc = bs4.BeautifulSoup(raw, features="html.parser")
             logger.debug("doc[]='%s'", type(doc))
         except network.exceptions as exception:
-            logger.warning("Cannot fetch software='%s' from fediverse.observer: '%s'", software, type(exception))
+            logger.warning("Cannot fetch software='%s' from api_domain='%s': '%s'", software, api_domain, type(exception))
             continue
 
         items = doc.findAll("a", {"class": "url"})
@@ -492,13 +522,22 @@ def fetch_observer(args: argparse.Namespace) -> int:
 def fetch_todon_wiki(args: argparse.Namespace) -> int:
     logger.debug("args[]='%s' - CALLED!", type(args))
 
+    api_domain = "wiki.todon.eu"
+    if apis.is_recent(api_domain):
+        logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+        return 0
+    else:
+        logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+        apis.update(api_domain)
+
     locking.acquire()
+
     blocklist = {
         "silenced": list(),
         "reject": list(),
     }
 
-    raw = utils.fetch_url("https://wiki.todon.eu/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
+    raw = utils.fetch_url(f"https://{api_domain}/todon/domainblocks", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
 
     doc = bs4.BeautifulSoup(raw, "html.parser")
@@ -589,7 +628,15 @@ def fetch_cs(args: argparse.Namespace):
         "reject"  : list(),
     }
 
-    raw = utils.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
+    api_domain = "raw.githubusercontent.com"
+    if apis.is_recent(api_domain):
+        logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+        return 0
+    else:
+        logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+        apis.update(api_domain)
+
+    raw = utils.fetch_url(f"https://{api_domain}/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
 
     doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features="html.parser")
@@ -709,7 +756,16 @@ def fetch_fba_rss(args: argparse.Namespace) -> int:
 
 def fetch_fbabot_atom(args: argparse.Namespace) -> int:
     logger.debug("args[]='%s' - CALLED!", type(args))
-    feed = "https://ryona.agency/users/fba/feed.atom"
+
+    api_domain = "ryana.agency"
+    if apis.is_recent(api_domain):
+        logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+        return 0
+    else:
+        logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+        apis.update(api_domain)
+
+    feed = f"https://{api_domain}/users/fba/feed.atom"
 
     domains = list()
 
@@ -761,7 +817,7 @@ def fetch_fbabot_atom(args: argparse.Namespace) -> int:
             logger.debug("domain='%s'", domain)
             try:
                 logger.info("Fetching instances from domain='%s' ...", domain)
-                federation.fetch_instances(domain, "ryona.agency", None, inspect.currentframe().f_code.co_name)
+                federation.fetch_instances(domain, api_domain, None, inspect.currentframe().f_code.co_name)
             except network.exceptions as exception:
                 logger.warning("Exception '%s' during fetching instances (fetch_fbabot_atom) from domain='%s'", type(exception), domain)
                 instances.set_last_error(domain, exception)
@@ -772,6 +828,7 @@ def fetch_fbabot_atom(args: argparse.Namespace) -> int:
 
 def fetch_instances(args: argparse.Namespace) -> int:
     logger.debug("args[]='%s' - CALLED!", type(args))
+
     locking.acquire()
 
     # Initial fetch
@@ -816,10 +873,19 @@ def fetch_instances(args: argparse.Namespace) -> int:
 
 def fetch_oliphant(args: argparse.Namespace) -> int:
     logger.debug("args[]='%s' - CALLED!", type(args))
+
+    api_domain = "codeberg.org"
+    if apis.is_recent(api_domain):
+        logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+        return 0
+    else:
+        logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+        apis.update(api_domain)
+
     locking.acquire()
 
     # Base URL
-    base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists"
+    base_url = f"https://{api_domain}/oliphant/blocklists/raw/branch/main/blocklists"
 
     # URLs to fetch
     blocklists = (
@@ -973,6 +1039,7 @@ def fetch_oliphant(args: argparse.Namespace) -> int:
 
 def fetch_txt(args: argparse.Namespace) -> int:
     logger.debug("args[]='%s' - CALLED!", type(args))
+
     locking.acquire()
 
     # Static URLs
@@ -1020,9 +1087,22 @@ def fetch_txt(args: argparse.Namespace) -> int:
 
 def fetch_fedipact(args: argparse.Namespace) -> int:
     logger.debug("args[]='%s' - CALLED!", type(args))
+
+    api_domain = "fedipact.online"
+    if apis.is_recent(api_domain):
+        logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+        return 0
+    else:
+        logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+        apis.update(api_domain)
+
     locking.acquire()
 
-    response = utils.fetch_url("https://fedipact.online", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
+    response = utils.fetch_url(
+        f"https://{api_domain}",
+        network.web_headers,
+        (config.get("connection_timeout"), config.get("read_timeout"))
+    )
 
     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
     if response.ok and response.status_code < 300 and response.text != "":
@@ -1059,9 +1139,22 @@ def fetch_fedipact(args: argparse.Namespace) -> int:
 
 def fetch_joinfediverse(args: argparse.Namespace) -> int:
     logger.debug("args[]='%s' - CALLED!", type(args))
+
+    api_domain = "joinfediverse.wiki"
+    if apis.is_recent(api_domain):
+        logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+        return 0
+    else:
+        logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+        apis.update(api_domain)
+
     locking.acquire()
 
-    raw = utils.fetch_url("https://joinfediverse.wiki/FediBlock", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
+    raw = utils.fetch_url(
+        f"https://{api_domain}/FediBlock",
+        network.web_headers,
+        (config.get("connection_timeout"), config.get("read_timeout"))
+    ).text
     logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
 
     doc = bs4.BeautifulSoup(raw, "html.parser")
@@ -1328,10 +1421,18 @@ def recheck_obfuscation(args: argparse.Namespace) -> int:
 def fetch_fedilist(args: argparse.Namespace) -> int:
     logger.debug("args[]='%s' - CALLED!", type(args))
 
-    url = "http://demo.fedilist.com/instance/csv?onion=not"
+    api_domain = "demo.fedilist.com"
+    if apis.is_recent(api_domain):
+        logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+        return 0
+    else:
+        logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+        apis.update(api_domain)
+
+    url = f"http://{api_domain}/instance/csv?onion=not"
     if args.software is not None and args.software != "":
         logger.debug("args.software='%s'", args.software)
-        url = f"http://demo.fedilist.com/instance/csv?software={args.software}&onion=not"
+        url = f"http://{api_domain}/instance/csv?software={args.software}&onion=not"
 
     locking.acquire()
 
@@ -1413,3 +1514,75 @@ def update_nodeinfo(args: argparse.Namespace) -> int:
 
     logger.debug("Success! - EXIT!")
     return 0
+
+def fetch_instances_social(args: argparse.Namespace) -> int:
+    logger.debug("args[]='%s' - CALLED!", type(args))
+
+    api_domain = "instances.social"
+
+    if config.get("instances_social_api_key") == "":
+        logger.error("API key not set. Please set in your config.json file.")
+        return 1
+    elif apis.is_recent(api_domain):
+        logger.info("API from api_domain='%s' has recently being accessed - EXIT!", api_domain)
+        return 0
+    else:
+        logger.debug("api_domain='%s' has not been recently used, marking ...", api_domain)
+        apis.update(api_domain)
+
+    locking.acquire()
+    headers = {
+        "Authorization": f"Bearer {config.get('instances_social_api_key')}",
+    }
+
+    fetched = network.get_json_api(
+        api_domain,
+        "/api/1.0/instances/list?count=0&sort_by=name",
+        headers,
+        (config.get("connection_timeout"), config.get("read_timeout"))
+    )
+    logger.debug("fetched[]='%s'", type(fetched))
+
+    if "error_message" in fetched:
+        logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
+        return 2
+    elif "exception" in fetched:
+        logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
+        return 3
+    elif "json" not in fetched:
+        logger.warning("fetched has no element 'json' - EXIT!")
+        return 4
+    elif "instances" not in fetched["json"]:
+        logger.warning("fetched[row] has no element 'instances' - EXIT!")
+        return 5
+
+    domains = list()
+    rows = fetched["json"]["instances"]
+
+    logger.info("Checking %d row(s) ...", len(rows))
+    for row in rows:
+        logger.debug("row[]='%s'", type(row))
+        domain = tidyup.domain(row["name"])
+
+        logger.debug("domain='%s' - AFTER!", domain)
+        if domain == "":
+            logger.debug("domain is empty - SKIPPED!")
+            continue
+        elif not utils.is_domain_wanted(domain):
+            logger.warning("domain='%s' is not wanted - SKIPPED!", domain)
+            continue
+        elif domain in domains:
+            logger.debug("domain='%s' is already added - SKIPPED!", domain)
+            continue
+        elif instances.is_registered(domain):
+            logger.debug("domain='%s' is already registered - SKIPPED!", domain)
+            continue
+        elif instances.is_recent(domain):
+            logger.debug("domain='%s' has been recently crawled - SKIPPED!", domain)
+            continue
+
+        logger.info("Fetching instances from domain='%s'", domain)
+        federation.fetch_instances(domain, api_domain, None, inspect.currentframe().f_code.co_name)
+
+    logger.debug("Success! - EXIT!")
+    return 0