import json
import logging
-import validators
-
-from fba import csrf
from fba.helpers import blacklist
from fba.helpers import config
-from fba.helpers import dicts
+from fba.helpers import dicts as dict_helper
+from fba.helpers import domain as domain_helper
from fba.helpers import tidyup
+from fba.http import csrf
from fba.http import network
from fba.models import instances
logger = logging.getLogger(__name__)
def fetch_peers(domain: str) -> list:
- logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
- if not isinstance(domain, str):
- raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
- elif domain == "":
- raise ValueError("Parameter 'domain' is empty")
- elif domain.lower() != domain:
- raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
- elif not validators.domain(domain.split("/")[0]):
- raise ValueError(f"domain='{domain}' is not a valid domain")
- elif domain.endswith(".arpa"):
- raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
- elif domain.endswith(".tld"):
- raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
-
- logger.debug(f"domain='{domain}' is misskey, sending API POST request ...")
- peers = list()
- offset = 0
- step = config.get("misskey_limit")
+ logger.debug("domain='%s' - CALLED!", domain)
+ domain_helper.raise_on(domain)
+
+ if blacklist.is_blacklisted(domain):
+ raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
+ elif not instances.is_registered(domain):
+ raise Exception(f"domain='{domain}' is not registered but function is invoked.")
+
+ logger.debug("domain='%s' is misskey, sending API POST request ...", domain)
+ peers = list()
+ offset = 0
+ step = config.get("misskey_limit")
# No CSRF by default, you don't have to add network.api_headers by yourself here
headers = tuple()
try:
- logger.debug(f"Checking CSRF for domain='{domain}'")
+ logger.debug("Checking CSRF for domain='%s'", domain)
headers = csrf.determine(domain, dict())
except network.exceptions as exception:
- logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
+ logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__)
instances.set_last_error(domain, exception)
- return peers
+
+ logger.debug("Returning empty list ... - EXIT!")
+ return list()
# iterating through all "suspended" (follow-only in its terminology)
# instances page-by-page, since that troonware doesn't support
# sending them all at once
while True:
- logger.debug(f"Fetching offset='{offset}' from '{domain}' ...")
+ logger.debug("Fetching offset=%d from domain='%s' ...", offset, domain)
if offset == 0:
fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
"sort" : "+pubAt",
# Check records
logger.debug("fetched[]='%s'", type(fetched))
if "error_message" in fetched:
- logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}")
+ logger.warning("post_json_api() for domain='%s' returned error message: '%s'", domain, fetched['error_message'])
instances.set_last_error(domain, fetched)
break
elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
- logger.warning(f"post_json_api() returned error: {fetched['error']['message']}")
+ logger.warning("post_json_api() returned error: '%s'", fetched["json"]["error"]["message"])
instances.set_last_error(domain, fetched["json"]["error"]["message"])
break
rows = fetched["json"]
- logger.debug(f"rows()={len(rows)}")
+ logger.debug("rows(%d)[]='%s',step=%d", len(rows), type(rows), step)
if len(rows) == 0:
- logger.debug(f"Returned zero bytes, exiting loop, domain='{domain}'")
+ logger.debug("Returned zero bytes, domain='%s' - BREAK!", domain)
break
elif len(rows) != config.get("misskey_limit"):
- logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'")
+ logger.debug("Fetched %d row(s) but expected: %d", len(rows), config.get('misskey_limit'))
offset = offset + (config.get("misskey_limit") - len(rows))
else:
- logger.debug(f"Raising offset by step={step}")
+ logger.debug("Raising offset by step=%d", step)
offset = offset + step
- already = 0
- logger.debug(f"rows({len(rows)})[]='{type(rows)}'")
+ added = 0
+ logger.debug("rows(%d))[]='%s'", len(rows), type(rows))
for row in rows:
- logger.debug(f"row()={len(row)}")
+ logger.debug("row()=%d", len(row))
if "host" not in row:
- logger.warning(f"row()={len(row)} does not contain key 'host': {row},domain='{domain}'")
+ logger.warning("row()=%d does not contain key 'host': row='%s',domain='%s' - SKIPPED!", len(row), row, domain)
continue
elif not isinstance(row["host"], str):
- logger.warning(f"row[host][]='{type(row['host'])}' is not 'str' - SKIPPED!")
- continue
- elif not validators.domain(row["host"].split("/")[0]):
- logger.warning(f"row[host]='{row['host']}' is not a valid domain - SKIPPED!")
- continue
- elif row["host"].endswith(".arpa"):
- logger.warning(f"row[host]='{row['host']}' is a domain for reversed IP addresses - SKIPPED!")
- continue
- elif row["host"].endswith(".tld"):
- logger.warning(f"row[host]='{row['host']}' is a fake domain - SKIPPED!")
- continue
- elif blacklist.is_blacklisted(row["host"]):
- logger.debug(f"row[host]='{row['host']}' is blacklisted. domain='{domain}' - SKIPPED!")
+ logger.warning("row[host][]='%s' is not of type 'str' - SKIPPED!", type(row['host']))
continue
elif row["host"] in peers:
- logger.debug(f"Not adding row[host]='{row['host']}', already found.")
- already = already + 1
+ logger.debug("Not adding row[host]='%s', already found - SKIPPED!", row['host'])
+ continue
+ elif not domain_helper.is_wanted(row["host"]):
+ logger.debug("row[host]='%s' is not wanted - SKIPPED!", row["host"])
continue
- logger.debug(f"Adding peer: '{row['host']}'")
+ logger.debug("Adding peer: row[host]='%s'", row['host'])
+ added = added + 1
peers.append(row["host"])
- if already == len(rows):
- logger.debug(f"Host returned same set of '{already}' instances, aborting loop!")
+ logger.debug("added=%d,rows()=%d", added, len(rows))
+ if added == 0:
+ logger.debug("Host returned already added (%d) peers - BREAK!", len(rows))
break
- logger.debug(f"Adding '{len(peers)}' for domain='{domain}'")
- instances.set_total_peers(domain, peers)
-
- logger.debug(f"Returning peers[]='{type(peers)}'")
+ logger.debug("peers()=%d - EXIT!", len(peers))
return peers
-def fetch_blocks(domain: str) -> dict:
- logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
- if not isinstance(domain, str):
- raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
- elif domain == "":
- raise ValueError("Parameter 'domain' is empty")
- elif domain.lower() != domain:
- raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
- elif not validators.domain(domain.split("/")[0]):
- raise ValueError(f"domain='{domain}' is not a valid domain")
- elif domain.endswith(".arpa"):
- raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
- elif domain.endswith(".tld"):
- raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
-
- logger.debug(f"Fetching misskey blocks from domain='{domain}'")
- blocklist = {
- "suspended": [],
- "blocked" : []
- }
-
- offset = 0
- step = config.get("misskey_limit")
+def fetch_blocks(domain: str) -> list:
+ logger.debug("domain='%s' - CALLED!", domain)
+ domain_helper.raise_on(domain)
+
+ if blacklist.is_blacklisted(domain):
+ raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
+ elif not instances.is_registered(domain):
+ raise Exception(f"domain='{domain}' is not registered but function is invoked.")
# No CSRF by default, you don't have to add network.api_headers by yourself here
headers = tuple()
try:
- logger.debug(f"Checking CSRF for domain='{domain}'")
+ logger.debug("Checking CSRF for domain='%s' ...", domain)
headers = csrf.determine(domain, dict())
except network.exceptions as exception:
- logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!")
+ logger.warning("Exception '%s' during checking CSRF (fetch_blocks,%s)", type(exception), __name__)
instances.set_last_error(domain, exception)
- return blocklist
+
+ logger.debug("Returning empty list ... - EXIT!")
+ return list()
+
+ blocklist = list()
+ offset = 0
+ step = config.get("misskey_limit")
# iterating through all "suspended" (follow-only in its terminology)
# instances page-by-page since it doesn't support sending them all at once
+ logger.debug("Fetching misskey blocks from domain='%s'", domain)
while True:
+ logger.debug("offset=%d", offset)
try:
- logger.debug(f"Fetching offset='{offset}' from '{domain}' ...")
+ logger.debug("Fetching offset=%d from domain='%s' ...", offset, domain)
if offset == 0:
- logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset)
+ logger.debug("Sending JSON API request to domain='%s',step=%d,offset=%d", domain, step, offset)
fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
"sort" : "+pubAt",
"host" : None,
"limit" : step
}), headers)
else:
- logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset)
+ logger.debug("Sending JSON API request to domain='%s',step=%d,offset=%d", domain, step, offset)
fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
"sort" : "+pubAt",
"host" : None,
logger.debug("fetched[]='%s'", type(fetched))
if "error_message" in fetched:
- logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}")
+ logger.warning("post_json_api() for domain='%s' returned error message: '%s'", domain, fetched['error_message'])
instances.set_last_error(domain, fetched)
break
elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
- logger.warning(f"post_json_api() returned error: {fetched['error']['message']}")
+ logger.warning("post_json_api() returned error: '%s'", fetched["json"]["error"]["message"])
instances.set_last_error(domain, fetched["json"]["error"]["message"])
break
rows = fetched["json"]
- logger.debug(f"rows({len(rows)})={rows} - suspend")
+ logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
if len(rows) == 0:
- logger.debug("Returned zero bytes, exiting loop:", domain)
+ logger.debug("Returned zero bytes, domain='%s' - BREAK!", domain)
break
elif len(rows) != config.get("misskey_limit"):
- logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'")
+ logger.debug("Fetched %d row(s) but expected: %d", len(rows), config.get('misskey_limit'))
offset = offset + (config.get("misskey_limit") - len(rows))
else:
- logger.debug("Raising offset by step:", step)
+ logger.debug("Raising offset by step=%d", step)
offset = offset + step
count = 0
+ logger.debug("Checking %d row(s) of instances ...", len(rows))
for instance in rows:
# Is it there?
- logger.debug(f"instance[{type(instance)}]='{instance}' - suspend")
- if "isSuspended" in instance and instance["isSuspended"] and not dicts.has_key(blocklist["suspended"], "domain", instance["host"]):
+ logger.debug("instance[]='%s'", type(instance))
+ if "host" not in instance:
+ logger.warning("instance(%d)='%s' has no key 'host' - SKIPPED!", len(instance), instance)
+ continue
+ elif instance["host"] in [None, ""]:
+ logger.debug("instance[host]='%s' is None or empty - SKIPPED!", instance["host"])
+ continue
+
+ logger.debug("instance[host]='%s' - BEFORE!", instance["host"])
+ blocked = tidyup.domain(instance["host"])
+ logger.debug("blocked[%s]='%s' - AFTER!", type(blocked), blocked)
+
+ if blocked in [None, ""]:
+ logger.warning("instance[host]='%s' is None or empty after tidyup.domain() - SKIPPED!", instance["host"])
+ continue
+ elif not domain_helper.is_wanted(blocked):
+ logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
+ continue
+ elif "isSuspended" in instance and instance["isSuspended"] and not dict_helper.has_key(blocklist, "blocked", blocked):
count = count + 1
- blocklist["suspended"].append({
- "domain": tidyup.domain(instance["host"]),
- # no reason field, nothing
- "reason": None
+ logger.debug("Appending blocker='%s',blocked='%s',block_level='suspended'", domain, blocked)
+ blocklist.append({
+ "blocker" : domain,
+ "blocked" : blocked,
+ "reason" : None,
+ "block_level": "suspended",
})
- logger.debug(f"count={count}")
+ logger.debug("count=%d", count)
if count == 0:
- logger.debug("API is no more returning new instances, aborting loop!")
+ logger.debug("API is no more returning new instances, aborting loop! domain='%s'", domain)
break
except network.exceptions as exception:
- logger.warning(f"Caught error, exiting loop: domain='{domain}',exception[{type(exception)}]='{str(exception)}'")
+ logger.warning("Caught error, exiting loop: domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
instances.set_last_error(domain, exception)
offset = 0
break
while True:
# Fetch blocked (full suspended) instances
+ logger.debug("offset=%d", offset)
try:
if offset == 0:
- logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset)
+ logger.debug("Sending JSON API request to domain='%s',step=%d,offset=%d", domain, step, offset)
fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
"sort" : "+pubAt",
"host" : None,
"limit" : step
}), headers)
else:
- logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset)
+ logger.debug("Sending JSON API request to domain='%s',step=%d,offset=%d", domain, step, offset)
fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
"sort" : "+pubAt",
"host" : None,
logger.debug("fetched[]='%s'", type(fetched))
if "error_message" in fetched:
- logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}")
+ logger.warning("post_json_api() for domain='%s' returned error message: '%s'", domain, fetched['error_message'])
instances.set_last_error(domain, fetched)
break
elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
- logger.warning(f"post_json_api() returned error: {fetched['error']['message']}")
+ logger.warning("post_json_api() returned error: '%s'", fetched["json"]["error"]["message"])
instances.set_last_error(domain, fetched["json"]["error"]["message"])
break
rows = fetched["json"]
- logger.debug(f"rows({len(rows)})={rows} - blocked")
+ logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
if len(rows) == 0:
- logger.debug("Returned zero bytes, exiting loop:", domain)
+ logger.debug("Returned zero bytes, domain='%s' - BREAK!", domain)
break
elif len(rows) != config.get("misskey_limit"):
- logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'")
+ logger.debug("Fetched %d row(s) but expected: %d'", len(rows), config.get('misskey_limit'))
offset = offset + (config.get("misskey_limit") - len(rows))
else:
- logger.debug("Raising offset by step:", step)
+ logger.debug("Raising offset by step=%d", step)
offset = offset + step
count = 0
+ logger.debug("Checking %d row(s) of instances ...", len(rows))
for instance in rows:
# Is it there?
- logger.debug(f"instance[{type(instance)}]='{instance}' - blocked")
- if "isBlocked" in instance and instance["isBlocked"] and not dicts.has_key(blocklist["blocked"], "domain", instance["host"]):
+ logger.debug("instance[]='%s'", type(instance))
+ blocked = tidyup.domain(instance["host"]) if instance["host"] != "" else None
+ logger.debug("blocked='%s' - AFTER!", blocked)
+
+ if blocked in [None, ""]:
+ logger.warning("instance[host]='%s' is None or empty after tidyup.domain() - SKIPPED!", instance["host"])
+ continue
+ elif not domain_helper.is_wanted(blocked):
+ logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
+ continue
+ elif "isBlocked" in instance and instance["isBlocked"] and not dict_helper.has_key(blocklist, "blocked", blocked):
count = count + 1
- blocklist["blocked"].append({
- "domain": tidyup.domain(instance["host"]),
- "reason": None
+ logger.debug("Appending blocker='%s',blocked='%s',block_level='reject'", domain, blocked)
+ blocklist.append({
+ "blocker" : domain,
+ "blocked" : blocked,
+ "reason" : None,
+ "block_level": "reject",
})
- logger.debug(f"count={count}")
+ logger.debug("count=%d", count)
if count == 0:
logger.debug("API is no more returning new instances, aborting loop!")
break
except network.exceptions as exception:
- logger.warning(f"Caught error, exiting loop: domain='{domain}',exception[{type(exception)}]='{str(exception)}'")
+ logger.warning("Caught error, exiting loop: domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
instances.set_last_error(domain, exception)
offset = 0
break
- logger.debug(f"Returning for domain='{domain}',blocked()={len(blocklist['blocked'])},suspended()={len(blocklist['suspended'])}")
- return {
- "reject" : blocklist["blocked"],
- "followers_only": blocklist["suspended"]
- }
+ logger.debug("blocklist()=%d - EXIT!", len(blocklist))
+ return blocklist