]> git.mxchange.org Git - fba.git/blobdiff - fba/networks/misskey.py
Notation applied:
[fba.git] / fba / networks / misskey.py
index 1ec9cb72b3e67b9508a82ad9ea6fa7ed11f1b724..e47c7dd9d04548c5d63171fec5dfb7665a83e69a 100644 (file)
 
 import json
 import logging
-import validators
 
 from fba import csrf
+from fba import utils
 
-from fba.helpers import blacklist
 from fba.helpers import config
-from fba.helpers import dicts
+from fba.helpers import domain as domain_helper
 from fba.helpers import tidyup
 
 from fba.http import network
@@ -33,41 +32,30 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 
 def fetch_peers(domain: str) -> list:
-    logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
-    if not isinstance(domain, str):
-        raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
-    elif domain == "":
-        raise ValueError("Parameter 'domain' is empty")
-    elif domain.lower() != domain:
-        raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
-    elif not validators.domain(domain.split("/")[0]):
-        raise ValueError(f"domain='{domain}' is not a valid domain")
-    elif domain.endswith(".arpa"):
-        raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
-    elif domain.endswith(".tld"):
-        raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
-
-    logger.debug(f"domain='{domain}' is misskey, sending API POST request ...")
-    peers   = list()
-    offset  = 0
-    step    = config.get("misskey_limit")
+    logger.debug("domain='%s' - CALLED!", domain)
+    domain_helper.raise_on(domain)
+
+    logger.debug("domain='%s' is misskey, sending API POST request ...", domain)
+    peers  = list()
+    offset = 0
+    step   = config.get("misskey_limit")
 
     # No CSRF by default, you don't have to add network.api_headers by yourself here
     headers = tuple()
 
     try:
-        logger.debug(f"Checking CSRF for domain='{domain}'")
+        logger.debug("Checking CSRF for domain='%s'", domain)
         headers = csrf.determine(domain, dict())
     except network.exceptions as exception:
-        logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
+        logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
         instances.set_last_error(domain, exception)
-        return peers
+        return list()
 
     # iterating through all "suspended" (follow-only in its terminology)
     # instances page-by-page, since that troonware doesn't support
     # sending them all at once
     while True:
-        logger.debug(f"Fetching offset='{offset}' from '{domain}' ...")
+        logger.debug("Fetching offset=%d from domain='%s' ...", offset, domain)
         if offset == 0:
             fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
                 "sort" : "+pubAt",
@@ -85,87 +73,61 @@ def fetch_peers(domain: str) -> list:
         # Check records
         logger.debug("fetched[]='%s'", type(fetched))
         if "error_message" in fetched:
-            logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}")
+            logger.warning("post_json_api() for domain='%s' returned error message: '%s'", domain, fetched['error_message'])
             instances.set_last_error(domain, fetched)
             break
         elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
-            logger.warning(f"post_json_api() returned error: {fetched['error']['message']}")
+            logger.warning("post_json_api() returned error: '%s'", fetched['error']['message'])
             instances.set_last_error(domain, fetched["json"]["error"]["message"])
             break
 
         rows = fetched["json"]
 
-        logger.debug(f"rows()={len(rows)}")
+        logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
         if len(rows) == 0:
-            logger.debug(f"Returned zero bytes, exiting loop, domain='{domain}'")
+            logger.debug("Returned zero bytes, domain='%s' - BREAK!", domain)
             break
         elif len(rows) != config.get("misskey_limit"):
-            logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'")
+            logger.debug("Fetched %d row(s) but expected: %d", len(rows), config.get('misskey_limit'))
             offset = offset + (config.get("misskey_limit") - len(rows))
         else:
-            logger.debug(f"Raising offset by step={step}")
+            logger.debug("Raising offset by step=%d", step)
             offset = offset + step
 
         already = 0
-        logger.debug(f"rows({len(rows)})[]='{type(rows)}'")
+        logger.debug("rows(%d))[]='%s'", len(rows), type(rows))
         for row in rows:
-            logger.debug(f"row()={len(row)}")
+            logger.debug("row()=%d", len(row))
             if "host" not in row:
-                logger.warning(f"row()={len(row)} does not contain key 'host': {row},domain='{domain}'")
+                logger.warning("row()=%d does not contain key 'host': row='%s',domain='%s' - SKIPPED!", len(row), row, domain)
                 continue
             elif not isinstance(row["host"], str):
-                logger.warning(f"row[host][]='{type(row['host'])}' is not 'str' - SKIPPED!")
-                continue
-            elif not validators.domain(row["host"].split("/")[0]):
-                logger.warning(f"row[host]='{row['host']}' is not a valid domain - SKIPPED!")
-                continue
-            elif row["host"].endswith(".arpa"):
-                logger.warning(f"row[host]='{row['host']}' is a domain for reversed IP addresses - SKIPPED!")
+                logger.warning("row[host][]='%s' is not 'str' - SKIPPED!", type(row['host']))
                 continue
-            elif row["host"].endswith(".tld"):
-                logger.warning(f"row[host]='{row['host']}' is a fake domain - SKIPPED!")
-                continue
-            elif blacklist.is_blacklisted(row["host"]):
-                logger.debug(f"row[host]='{row['host']}' is blacklisted. domain='{domain}' - SKIPPED!")
+            elif not utils.is_domain_wanted(row["host"]):
+                logger.debug("row[host]='%s' is not wanted, domain='%s' - SKIPPED!", row['host'], domain)
                 continue
             elif row["host"] in peers:
-                logger.debug(f"Not adding row[host]='{row['host']}', already found.")
+                logger.debug("Not adding row[host]='%s', already found - SKIPPED!", row['host'])
                 already = already + 1
                 continue
 
-            logger.debug(f"Adding peer: '{row['host']}'")
+            logger.debug("Adding peer: row[host]='%s'", row['host'])
             peers.append(row["host"])
 
         if already == len(rows):
-            logger.debug(f"Host returned same set of '{already}' instances, aborting loop!")
+            logger.debug("Host returned same set of %d instance(s) - BREAK!", already)
             break
 
-    logger.debug(f"Adding '{len(peers)}' for domain='{domain}'")
-    instances.set_total_peers(domain, peers)
-
-    logger.debug(f"Returning peers[]='{type(peers)}'")
+    logger.debug("peers()=%d - EXIT!", len(peers))
     return peers
 
-def fetch_blocks(domain: str) -> dict:
-    logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
-    if not isinstance(domain, str):
-        raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
-    elif domain == "":
-        raise ValueError("Parameter 'domain' is empty")
-    elif domain.lower() != domain:
-        raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
-    elif not validators.domain(domain.split("/")[0]):
-        raise ValueError(f"domain='{domain}' is not a valid domain")
-    elif domain.endswith(".arpa"):
-        raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
-    elif domain.endswith(".tld"):
-        raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
-
-    logger.debug(f"Fetching misskey blocks from domain='{domain}'")
-    blocklist = {
-        "suspended": [],
-        "blocked"  : []
-    }
+def fetch_blocks(domain: str) -> list:
+    logger.debug("domain='%s' - CALLED!", domain)
+    domain_helper.raise_on(domain)
+
+    logger.debug("Fetching misskey blocks from domain='%s'", domain)
+    blocklist = list()
 
     offset  = 0
     step    = config.get("misskey_limit")
@@ -174,10 +136,10 @@ def fetch_blocks(domain: str) -> dict:
     headers = tuple()
 
     try:
-        logger.debug(f"Checking CSRF for domain='{domain}'")
+        logger.debug("Checking CSRF for domain='%s'", domain)
         headers = csrf.determine(domain, dict())
     except network.exceptions as exception:
-        logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!")
+        logger.warning("Exception '%s' during checking CSRF (fetch_blocks,%s) - EXIT!", type(exception), __name__)
         instances.set_last_error(domain, exception)
         return blocklist
 
@@ -185,9 +147,9 @@ def fetch_blocks(domain: str) -> dict:
     # instances page-by-page since it doesn't support sending them all at once
     while True:
         try:
-            logger.debug(f"Fetching offset='{offset}' from '{domain}' ...")
+            logger.debug("Fetching offset=%d from domain='%s' ...", offset, domain)
             if offset == 0:
-                logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset)
+                logger.debug("Sending JSON API request to domain='%s',step=%d,offset=%d", domain, step, offset)
                 fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
                     "sort"     : "+pubAt",
                     "host"     : None,
@@ -195,7 +157,7 @@ def fetch_blocks(domain: str) -> dict:
                     "limit"    : step
                 }), headers)
             else:
-                logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset)
+                logger.debug("Sending JSON API request to domain='%s',step=%d,offset=%d", domain, step, offset)
                 fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
                     "sort"     : "+pubAt",
                     "host"     : None,
@@ -206,46 +168,47 @@ def fetch_blocks(domain: str) -> dict:
 
             logger.debug("fetched[]='%s'", type(fetched))
             if "error_message" in fetched:
-                logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}")
+                logger.warning("post_json_api() for domain='%s' returned error message: '%s'", domain, fetched['error_message'])
                 instances.set_last_error(domain, fetched)
                 break
             elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
-                logger.warning(f"post_json_api() returned error: {fetched['error']['message']}")
+                logger.warning("post_json_api() returned error: '%s'", fetched['error']['message'])
                 instances.set_last_error(domain, fetched["json"]["error"]["message"])
                 break
 
             rows = fetched["json"]
 
-            logger.debug(f"rows({len(rows)})={rows} - suspend")
+            logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
             if len(rows) == 0:
-                logger.debug("Returned zero bytes, exiting loop:", domain)
+                logger.debug("Returned zero bytes, domain='%s' - BREAK!", domain)
                 break
             elif len(rows) != config.get("misskey_limit"):
-                logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'")
+                logger.debug("Fetched %d row(s) but expected: %d", len(rows), config.get('misskey_limit'))
                 offset = offset + (config.get("misskey_limit") - len(rows))
             else:
-                logger.debug("Raising offset by step:", step)
+                logger.debug("Raising offset by step=%d", step)
                 offset = offset + step
 
             count = 0
             for instance in rows:
                 # Is it there?
-                logger.debug(f"instance[{type(instance)}]='{instance}' - suspend")
-                if "isSuspended" in instance and instance["isSuspended"] and not dicts.has_key(blocklist["suspended"], "domain", instance["host"]):
+                logger.debug("instance[%s]='%s'", type(instance), instance)
+                if "isSuspended" in instance and instance["isSuspended"]:
                     count = count + 1
-                    blocklist["suspended"].append({
-                        "domain": tidyup.domain(instance["host"]),
-                        # no reason field, nothing
-                        "reason": None
+                    blocklist.append({
+                        "blocker"    : domain,
+                        "blocked"    : tidyup.domain(instance["host"]),
+                        "reason"     : None,
+                        "block_level": "suspended",
                     })
 
-            logger.debug(f"count={count}")
+            logger.debug("count=%d", count)
             if count == 0:
-                logger.debug("API is no more returning new instances, aborting loop!")
+                logger.debug("API is no more returning new instances, aborting loop! domain='%s'", domain)
                 break
 
         except network.exceptions as exception:
-            logger.warning(f"Caught error, exiting loop: domain='{domain}',exception[{type(exception)}]='{str(exception)}'")
+            logger.warning("Caught error, exiting loop: domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
             instances.set_last_error(domain, exception)
             offset = 0
             break
@@ -254,7 +217,7 @@ def fetch_blocks(domain: str) -> dict:
         # Fetch blocked (full suspended) instances
         try:
             if offset == 0:
-                logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset)
+                logger.debug("Sending JSON API request to domain='%s',step=%d,offset=%d", domain, step, offset)
                 fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
                     "sort"   : "+pubAt",
                     "host"   : None,
@@ -262,7 +225,7 @@ def fetch_blocks(domain: str) -> dict:
                     "limit"  : step
                 }), headers)
             else:
-                logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset)
+                logger.debug("Sending JSON API request to domain='%s',step=%d,offset=%d", domain, step, offset)
                 fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
                     "sort"   : "+pubAt",
                     "host"   : None,
@@ -273,51 +236,52 @@ def fetch_blocks(domain: str) -> dict:
 
             logger.debug("fetched[]='%s'", type(fetched))
             if "error_message" in fetched:
-                logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}")
+                logger.warning("post_json_api() for domain='%s' returned error message: '%s'", domain, fetched['error_message'])
                 instances.set_last_error(domain, fetched)
                 break
             elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
-                logger.warning(f"post_json_api() returned error: {fetched['error']['message']}")
+                logger.warning("post_json_api() returned error: '%s'", fetched['error']['message'])
                 instances.set_last_error(domain, fetched["json"]["error"]["message"])
                 break
 
             rows = fetched["json"]
 
-            logger.debug(f"rows({len(rows)})={rows} - blocked")
+            logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
             if len(rows) == 0:
-                logger.debug("Returned zero bytes, exiting loop:", domain)
+                logger.debug("Returned zero bytes, domain='%s' - BREAK!", domain)
                 break
             elif len(rows) != config.get("misskey_limit"):
-                logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'")
+                logger.debug("Fetched %d row(s) but expected: %d'", len(rows), config.get('misskey_limit'))
                 offset = offset + (config.get("misskey_limit") - len(rows))
             else:
-                logger.debug("Raising offset by step:", step)
+                logger.debug("Raising offset by step=%d", step)
                 offset = offset + step
 
             count = 0
             for instance in rows:
                 # Is it there?
-                logger.debug(f"instance[{type(instance)}]='{instance}' - blocked")
-                if "isBlocked" in instance and instance["isBlocked"] and not dicts.has_key(blocklist["blocked"], "domain", instance["host"]):
+                logger.debug("instance[%s]='%s'", type(instance), instance)
+                if "isBlocked" in instance and instance["isBlocked"]:
                     count = count + 1
-                    blocklist["blocked"].append({
-                        "domain": tidyup.domain(instance["host"]),
-                        "reason": None
+                    blocked = tidyup.domain(instance["host"])
+                    logger.debug("Appending blocker='%s',blocked='%s',block_level='reject'", domain, blocked)
+                    blocklist.append({
+                        "blocker"    : domain,
+                        "blocked"    : blocked,
+                        "reason"     : None,
+                        "block_level": "reject",
                     })
 
-            logger.debug(f"count={count}")
+            logger.debug("count=%d", count)
             if count == 0:
                 logger.debug("API is no more returning new instances, aborting loop!")
                 break
 
         except network.exceptions as exception:
-            logger.warning(f"Caught error, exiting loop: domain='{domain}',exception[{type(exception)}]='{str(exception)}'")
+            logger.warning("Caught error, exiting loop: domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
             instances.set_last_error(domain, exception)
             offset = 0
             break
 
-    logger.debug(f"Returning for domain='{domain}',blocked()={len(blocklist['blocked'])},suspended()={len(blocklist['suspended'])}")
-    return {
-        "reject"        : blocklist["blocked"],
-        "followers_only": blocklist["suspended"]
-    }
+    logger.debug("blocklist()=%d - EXIT!", len(blocklist))
+    return blocklist