X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=fba%2Fnetworks%2Fmisskey.py;h=e47c7dd9d04548c5d63171fec5dfb7665a83e69a;hb=1d06145d9e92de8458e3fd6d0a7e75e629466f16;hp=1ec9cb72b3e67b9508a82ad9ea6fa7ed11f1b724;hpb=2c841ed5b537b238dcbee64e20152d719ce91686;p=fba.git diff --git a/fba/networks/misskey.py b/fba/networks/misskey.py index 1ec9cb7..e47c7dd 100644 --- a/fba/networks/misskey.py +++ b/fba/networks/misskey.py @@ -16,13 +16,12 @@ import json import logging -import validators from fba import csrf +from fba import utils -from fba.helpers import blacklist from fba.helpers import config -from fba.helpers import dicts +from fba.helpers import domain as domain_helper from fba.helpers import tidyup from fba.http import network @@ -33,41 +32,30 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def fetch_peers(domain: str) -> list: - logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain) - if not isinstance(domain, str): - raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") - elif domain == "": - raise ValueError("Parameter 'domain' is empty") - elif domain.lower() != domain: - raise ValueError(f"Parameter domain='{domain}' must be all lower-case") - elif not validators.domain(domain.split("/")[0]): - raise ValueError(f"domain='{domain}' is not a valid domain") - elif domain.endswith(".arpa"): - raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!") - elif domain.endswith(".tld"): - raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!") - - logger.debug(f"domain='{domain}' is misskey, sending API POST request ...") - peers = list() - offset = 0 - step = config.get("misskey_limit") + logger.debug("domain='%s' - CALLED!", domain) + domain_helper.raise_on(domain) + + logger.debug("domain='%s' is misskey, sending API POST request ...", domain) + peers = list() + offset = 0 + step = config.get("misskey_limit") # No CSRF by default, you don't have to add network.api_headers by yourself here headers = tuple() try: - logger.debug(f"Checking CSRF for domain='{domain}'") + logger.debug("Checking CSRF for domain='%s'", domain) headers = csrf.determine(domain, dict()) except network.exceptions as exception: - logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!") + logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__) instances.set_last_error(domain, exception) - return peers + return list() # iterating through all "suspended" (follow-only in its terminology) # instances page-by-page, since that troonware doesn't support # sending them all at once while True: - logger.debug(f"Fetching offset='{offset}' from '{domain}' ...") + logger.debug("Fetching offset=%d from domain='%s' ...", offset, domain) if offset == 0: fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({ "sort" : "+pubAt", @@ -85,87 +73,61 @@ def fetch_peers(domain: str) -> list: # Check records logger.debug("fetched[]='%s'", type(fetched)) if "error_message" in fetched: - logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}") + logger.warning("post_json_api() for domain='%s' returned error message: '%s'", domain, fetched['error_message']) instances.set_last_error(domain, fetched) break elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]: - logger.warning(f"post_json_api() returned error: {fetched['error']['message']}") + logger.warning("post_json_api() returned error: '%s'", fetched['error']['message']) instances.set_last_error(domain, fetched["json"]["error"]["message"]) break rows = fetched["json"] - logger.debug(f"rows()={len(rows)}") + logger.debug("rows(%d)[]='%s'", len(rows), type(rows)) if len(rows) == 0: - logger.debug(f"Returned zero bytes, exiting loop, domain='{domain}'") + logger.debug("Returned zero bytes, domain='%s' - BREAK!", domain) break elif len(rows) != config.get("misskey_limit"): - logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'") + logger.debug("Fetched %d row(s) but expected: %d", len(rows), config.get('misskey_limit')) offset = offset + (config.get("misskey_limit") - len(rows)) else: - logger.debug(f"Raising offset by step={step}") + logger.debug("Raising offset by step=%d", step) offset = offset + step already = 0 - logger.debug(f"rows({len(rows)})[]='{type(rows)}'") + logger.debug("rows(%d))[]='%s'", len(rows), type(rows)) for row in rows: - logger.debug(f"row()={len(row)}") + logger.debug("row()=%d", len(row)) if "host" not in row: - logger.warning(f"row()={len(row)} does not contain key 'host': {row},domain='{domain}'") + logger.warning("row()=%d does not contain key 'host': row='%s',domain='%s' - SKIPPED!", len(row), row, domain) continue elif not isinstance(row["host"], str): - logger.warning(f"row[host][]='{type(row['host'])}' is not 'str' - SKIPPED!") - continue - elif not validators.domain(row["host"].split("/")[0]): - logger.warning(f"row[host]='{row['host']}' is not a valid domain - SKIPPED!") - continue - elif row["host"].endswith(".arpa"): - logger.warning(f"row[host]='{row['host']}' is a domain for reversed IP addresses - SKIPPED!") + logger.warning("row[host][]='%s' is not 'str' - SKIPPED!", type(row['host'])) continue - elif row["host"].endswith(".tld"): - logger.warning(f"row[host]='{row['host']}' is a fake domain - SKIPPED!") - continue - elif blacklist.is_blacklisted(row["host"]): - logger.debug(f"row[host]='{row['host']}' is blacklisted. domain='{domain}' - SKIPPED!") + elif not utils.is_domain_wanted(row["host"]): + logger.debug("row[host]='%s' is not wanted, domain='%s' - SKIPPED!", row['host'], domain) continue elif row["host"] in peers: - logger.debug(f"Not adding row[host]='{row['host']}', already found.") + logger.debug("Not adding row[host]='%s', already found - SKIPPED!", row['host']) already = already + 1 continue - logger.debug(f"Adding peer: '{row['host']}'") + logger.debug("Adding peer: row[host]='%s'", row['host']) peers.append(row["host"]) if already == len(rows): - logger.debug(f"Host returned same set of '{already}' instances, aborting loop!") + logger.debug("Host returned same set of %d instance(s) - BREAK!", already) break - logger.debug(f"Adding '{len(peers)}' for domain='{domain}'") - instances.set_total_peers(domain, peers) - - logger.debug(f"Returning peers[]='{type(peers)}'") + logger.debug("peers()=%d - EXIT!", len(peers)) return peers -def fetch_blocks(domain: str) -> dict: - logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain) - if not isinstance(domain, str): - raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") - elif domain == "": - raise ValueError("Parameter 'domain' is empty") - elif domain.lower() != domain: - raise ValueError(f"Parameter domain='{domain}' must be all lower-case") - elif not validators.domain(domain.split("/")[0]): - raise ValueError(f"domain='{domain}' is not a valid domain") - elif domain.endswith(".arpa"): - raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!") - elif domain.endswith(".tld"): - raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!") - - logger.debug(f"Fetching misskey blocks from domain='{domain}'") - blocklist = { - "suspended": [], - "blocked" : [] - } +def fetch_blocks(domain: str) -> list: + logger.debug("domain='%s' - CALLED!", domain) + domain_helper.raise_on(domain) + + logger.debug("Fetching misskey blocks from domain='%s'", domain) + blocklist = list() offset = 0 step = config.get("misskey_limit") @@ -174,10 +136,10 @@ def fetch_blocks(domain: str) -> dict: headers = tuple() try: - logger.debug(f"Checking CSRF for domain='{domain}'") + logger.debug("Checking CSRF for domain='%s'", domain) headers = csrf.determine(domain, dict()) except network.exceptions as exception: - logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!") + logger.warning("Exception '%s' during checking CSRF (fetch_blocks,%s) - EXIT!", type(exception), __name__) instances.set_last_error(domain, exception) return blocklist @@ -185,9 +147,9 @@ def fetch_blocks(domain: str) -> dict: # instances page-by-page since it doesn't support sending them all at once while True: try: - logger.debug(f"Fetching offset='{offset}' from '{domain}' ...") + logger.debug("Fetching offset=%d from domain='%s' ...", offset, domain) if offset == 0: - logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset) + logger.debug("Sending JSON API request to domain='%s',step=%d,offset=%d", domain, step, offset) fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({ "sort" : "+pubAt", "host" : None, @@ -195,7 +157,7 @@ def fetch_blocks(domain: str) -> dict: "limit" : step }), headers) else: - logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset) + logger.debug("Sending JSON API request to domain='%s',step=%d,offset=%d", domain, step, offset) fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({ "sort" : "+pubAt", "host" : None, @@ -206,46 +168,47 @@ def fetch_blocks(domain: str) -> dict: logger.debug("fetched[]='%s'", type(fetched)) if "error_message" in fetched: - logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}") + logger.warning("post_json_api() for domain='%s' returned error message: '%s'", domain, fetched['error_message']) instances.set_last_error(domain, fetched) break elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]: - logger.warning(f"post_json_api() returned error: {fetched['error']['message']}") + logger.warning("post_json_api() returned error: '%s'", fetched['error']['message']) instances.set_last_error(domain, fetched["json"]["error"]["message"]) break rows = fetched["json"] - logger.debug(f"rows({len(rows)})={rows} - suspend") + logger.debug("rows(%d)[]='%s'", len(rows), type(rows)) if len(rows) == 0: - logger.debug("Returned zero bytes, exiting loop:", domain) + logger.debug("Returned zero bytes, domain='%s' - BREAK!", domain) break elif len(rows) != config.get("misskey_limit"): - logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'") + logger.debug("Fetched %d row(s) but expected: %d", len(rows), config.get('misskey_limit')) offset = offset + (config.get("misskey_limit") - len(rows)) else: - logger.debug("Raising offset by step:", step) + logger.debug("Raising offset by step=%d", step) offset = offset + step count = 0 for instance in rows: # Is it there? - logger.debug(f"instance[{type(instance)}]='{instance}' - suspend") - if "isSuspended" in instance and instance["isSuspended"] and not dicts.has_key(blocklist["suspended"], "domain", instance["host"]): + logger.debug("instance[%s]='%s'", type(instance), instance) + if "isSuspended" in instance and instance["isSuspended"]: count = count + 1 - blocklist["suspended"].append({ - "domain": tidyup.domain(instance["host"]), - # no reason field, nothing - "reason": None + blocklist.append({ + "blocker" : domain, + "blocked" : tidyup.domain(instance["host"]), + "reason" : None, + "block_level": "suspended", }) - logger.debug(f"count={count}") + logger.debug("count=%d", count) if count == 0: - logger.debug("API is no more returning new instances, aborting loop!") + logger.debug("API is no more returning new instances, aborting loop! domain='%s'", domain) break except network.exceptions as exception: - logger.warning(f"Caught error, exiting loop: domain='{domain}',exception[{type(exception)}]='{str(exception)}'") + logger.warning("Caught error, exiting loop: domain='%s',exception[%s]='%s'", domain, type(exception), str(exception)) instances.set_last_error(domain, exception) offset = 0 break @@ -254,7 +217,7 @@ def fetch_blocks(domain: str) -> dict: # Fetch blocked (full suspended) instances try: if offset == 0: - logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset) + logger.debug("Sending JSON API request to domain='%s',step=%d,offset=%d", domain, step, offset) fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({ "sort" : "+pubAt", "host" : None, @@ -262,7 +225,7 @@ def fetch_blocks(domain: str) -> dict: "limit" : step }), headers) else: - logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset) + logger.debug("Sending JSON API request to domain='%s',step=%d,offset=%d", domain, step, offset) fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({ "sort" : "+pubAt", "host" : None, @@ -273,51 +236,52 @@ def fetch_blocks(domain: str) -> dict: logger.debug("fetched[]='%s'", type(fetched)) if "error_message" in fetched: - logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}") + logger.warning("post_json_api() for domain='%s' returned error message: '%s'", domain, fetched['error_message']) instances.set_last_error(domain, fetched) break elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]: - logger.warning(f"post_json_api() returned error: {fetched['error']['message']}") + logger.warning("post_json_api() returned error: '%s'", fetched['error']['message']) instances.set_last_error(domain, fetched["json"]["error"]["message"]) break rows = fetched["json"] - logger.debug(f"rows({len(rows)})={rows} - blocked") + logger.debug("rows(%d)[]='%s'", len(rows), type(rows)) if len(rows) == 0: - logger.debug("Returned zero bytes, exiting loop:", domain) + logger.debug("Returned zero bytes, domain='%s' - BREAK!", domain) break elif len(rows) != config.get("misskey_limit"): - logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'") + logger.debug("Fetched %d row(s) but expected: %d'", len(rows), config.get('misskey_limit')) offset = offset + (config.get("misskey_limit") - len(rows)) else: - logger.debug("Raising offset by step:", step) + logger.debug("Raising offset by step=%d", step) offset = offset + step count = 0 for instance in rows: # Is it there? - logger.debug(f"instance[{type(instance)}]='{instance}' - blocked") - if "isBlocked" in instance and instance["isBlocked"] and not dicts.has_key(blocklist["blocked"], "domain", instance["host"]): + logger.debug("instance[%s]='%s'", type(instance), instance) + if "isBlocked" in instance and instance["isBlocked"]: count = count + 1 - blocklist["blocked"].append({ - "domain": tidyup.domain(instance["host"]), - "reason": None + blocked = tidyup.domain(instance["host"]) + logger.debug("Appending blocker='%s',blocked='%s',block_level='reject'", domain, blocked) + blocklist.append({ + "blocker" : domain, + "blocked" : blocked, + "reason" : None, + "block_level": "reject", }) - logger.debug(f"count={count}") + logger.debug("count=%d", count) if count == 0: logger.debug("API is no more returning new instances, aborting loop!") break except network.exceptions as exception: - logger.warning(f"Caught error, exiting loop: domain='{domain}',exception[{type(exception)}]='{str(exception)}'") + logger.warning("Caught error, exiting loop: domain='%s',exception[%s]='%s'", domain, type(exception), str(exception)) instances.set_last_error(domain, exception) offset = 0 break - logger.debug(f"Returning for domain='{domain}',blocked()={len(blocklist['blocked'])},suspended()={len(blocklist['suspended'])}") - return { - "reject" : blocklist["blocked"], - "followers_only": blocklist["suspended"] - } + logger.debug("blocklist()=%d - EXIT!", len(blocklist)) + return blocklist