# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import logging
+
import argparse
from fba import commands
from fba.helpers import locking
+logging.basicConfig(level=logging.WARNING)
+logger = logging.getLogger(__name__)
+
+# Argument parser
_PARSER = None
def init_parser():
- # DEBUG: print("DEBUG: init_parser(): CALLED!")
+ logger.debug("init_parser(): CALLED!")
global _PARSER
- # DEBUG: print("DEBUG: Initializing parser ...")
+ logger.debug("Initializing parser ...")
_PARSER = argparse.ArgumentParser(
description="Fetches block reasons from the fediverse",
epilog="Please note that some commands have optional arguments, you may want to try fba.py <command> --help to find them out.",
)
parser.set_defaults(command=commands.fetch_observer)
- # DEBUG: print("DEBUG: init_parser(): EXIT!")
+ logger.debug("init_parser(): EXIT!")
def run_command():
- # DEBUG: print("DEBUG: run_command(): CALLED!")
+ logger.debug("run_command(): CALLED!")
args = _PARSER.parse_args()
- # DEBUG: print(f"DEBUG: args[{type(args)}]={args}")
+ logger.debug(f"args[{type(args)}]={args}")
status = args.command(args)
- # DEBUG: print("DEBUG: status={status} - EXIT!")
+ logger.debug("status={status} - EXIT!")
return status if isinstance(status, int) else 0
def shutdown():
- # DEBUG: print("DEBUG: Closing database connection ...")
+ logger.debug("Closing database connection ...")
fba.connection.close()
locking.release()
- # DEBUG: print("DEBUG: Shutdown completed.")
+ logger.debug("Shutdown completed.")
import csv
import inspect
import json
+import logging
import time
import argparse
from fba.networks import misskey
from fba.networks import pleroma
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
def check_instance(args: argparse.Namespace) -> int:
- # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!")
+ logger.debug(f"args.domain='{args.domain}' - CALLED!")
status = 0
if not validators.domain(args.domain):
- print(f"WARNING: args.domain='{args.domain}' is not valid")
+ logger.warning(f"args.domain='{args.domain}' is not valid")
status = 100
elif blacklist.is_blacklisted(args.domain):
- print(f"WARNING: args.domain='{args.domain}' is blacklisted")
+ logger.warning(f"args.domain='{args.domain}' is blacklisted")
status = 101
elif instances.is_registered(args.domain):
- print(f"WARNING: args.domain='{args.domain}' is already registered")
+ logger.warning(f"args.domain='{args.domain}' is already registered")
status = 102
else:
- print(f"INFO: args.domain='{args.domain}' is not known")
+ logger.info(f"args.domain='{args.domain}' is not known")
- # DEBUG: print(f"DEBUG: status={status} - EXIT!")
+ logger.debug(f"status={status} - EXIT!")
return status
def fetch_bkali(args: argparse.Namespace) -> int:
- # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
+ logger.debug(f"args[]='{type(args)}' - CALLED!")
domains = list()
try:
fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({
"query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}"
}))
- # DEBUG: print(f"DEBUG: fetched[]='{type(fetched)}'")
+ logger.debug(f"fetched[]='{type(fetched)}'")
if "error_message" in fetched:
- print(f"WARNING: post_json_api() for 'gql.api.bka.li' returned error message: {fetched['error_message']}")
+ logger.warning(f"post_json_api() for 'gql.api.bka.li' returned error message: {fetched['error_message']}")
return 100
elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
- print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}")
+ logger.warning(f"post_json_api() returned error: {fetched['error']['message']}")
return 101
rows = fetched["json"]
- # DEBUG: print(f"DEBUG: rows({len(rows)})[]='{type(rows)}'")
+ logger.debug(f"rows({len(rows)})[]='{type(rows)}'")
if len(rows) == 0:
raise Exception("WARNING: Returned no records")
elif "data" not in rows:
raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'")
for entry in rows["data"]["nodeinfo"]:
- # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'")
+ logger.debug(f"entry['{type(entry)}']='{entry}'")
if "domain" not in entry:
- print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!")
+ logger.warning(f"entry()={len(entry)} does not contain 'domain' - SKIPPED!")
continue
elif not validators.domain(entry["domain"]):
- print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!")
+ logger.warning(f"domain='{entry['domain']}' is not a valid domain - SKIPPED!")
continue
elif blacklist.is_blacklisted(entry["domain"]):
- # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!")
+ logger.debug(f"domain='{entry['domain']}' is blacklisted - SKIPPED!")
continue
elif instances.is_registered(entry["domain"]):
- # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!")
+ logger.debug(f"domain='{entry['domain']}' is already registered - SKIPPED!")
continue
elif instances.is_recent(entry["domain"]):
- # DEBUG: print(f"DEBUG: domain='{entry['domain']}' has been recently fetched - SKIPPED!")
+ logger.debug(f"domain='{entry['domain']}' has been recently fetched - SKIPPED!")
continue
- # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...")
+ logger.debug(f"Adding domain='{entry['domain']}' ...")
domains.append(entry["domain"])
except network.exceptions as exception:
- print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}' - EXIT!")
+ logger.error(f"Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}' - EXIT!")
return 102
- # DEBUG: print(f"DEBUG: domains()={len(domains)}")
+ logger.debug(f"domains()={len(domains)}")
if len(domains) > 0:
locking.acquire()
- print(f"INFO: Adding {len(domains)} new instances ...")
+ logger.info(f"Adding {len(domains)} new instances ...")
for domain in domains:
try:
- print(f"INFO: Fetching instances from domain='{domain}' ...")
+ logger.info(f"Fetching instances from domain='{domain}' ...")
federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
- # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...")
+ logger.debug(f"Invoking cookies.clear({domain}) ...")
cookies.clear(domain)
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_bkali) from domain='{domain}'")
+ logger.warning(f"Exception '{type(exception)}' during fetching instances (fetch_bkali) from domain='{domain}'")
instances.set_last_error(domain, exception)
- # DEBUG: print("DEBUG: Success - EXIT!")
+ logger.debug("Success - EXIT!")
return 0
def fetch_blocks(args: argparse.Namespace):
- # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
+ logger.debug(f"args[]='{type(args)}' - CALLED!")
if args.domain is not None and args.domain != "":
- # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...")
+ logger.debug(f"args.domain='{args.domain}' - checking ...")
if not validators.domain(args.domain):
- print(f"WARNING: domain='{args.domain}' is not valid.")
+ logger.warning(f"domain='{args.domain}' is not valid.")
return
elif blacklist.is_blacklisted(args.domain):
- print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!")
+ logger.warning(f"domain='{args.domain}' is blacklisted, won't check it!")
return
elif not instances.is_registered(args.domain):
- print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
+ logger.warning(f"domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.")
return
locking.acquire()
if args.domain is not None and args.domain != "":
# Re-check single domain
- # DEBUG: print(f"DEBUG: Querying database for single args.domain='{args.domain}' ...")
+ logger.debug(f"Querying database for single args.domain='{args.domain}' ...")
fba.cursor.execute(
"SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
)
elif args.software is not None and args.software != "":
# Re-check single software
- # DEBUG: print(f"DEBUG: Querying database for args.software='{args.software}' ...")
+ logger.debug(f"Querying database for args.software='{args.software}' ...")
fba.cursor.execute(
"SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ?", [args.software]
)
)
rows = fba.cursor.fetchall()
- print(f"INFO: Checking {len(rows)} entries ...")
+ logger.info(f"Checking {len(rows)} entries ...")
for blocker, software, origin, nodeinfo_url in rows:
- # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
+ logger.debug("BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url)
blockdict = list()
blocker = tidyup.domain(blocker)
- # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software)
+ logger.debug("AFTER blocker,software:", blocker, software)
if blocker == "":
- print("WARNING: blocker is now empty!")
+ logger.warning("blocker is now empty!")
continue
elif nodeinfo_url is None or nodeinfo_url == "":
- # DEBUG: print(f"DEBUG: blocker='{blocker}',software='{software}' has empty nodeinfo_url")
+ logger.debug(f"blocker='{blocker}',software='{software}' has empty nodeinfo_url")
continue
elif blacklist.is_blacklisted(blocker):
- print(f"WARNING: blocker='{blocker}' is blacklisted now!")
+ logger.warning(f"blocker='{blocker}' is blacklisted now!")
continue
- # DEBUG: print(f"DEBUG: blocker='{blocker}'")
+ logger.debug(f"blocker='{blocker}'")
instances.set_last_blocked(blocker)
if software == "pleroma":
- print(f"INFO: blocker='{blocker}',software='{software}'")
+ logger.info(f"blocker='{blocker}',software='{software}'")
pleroma.fetch_blocks(blocker, origin, nodeinfo_url)
elif software == "mastodon":
- print(f"INFO: blocker='{blocker}',software='{software}'")
+ logger.info(f"blocker='{blocker}',software='{software}'")
mastodon.fetch_blocks(blocker, origin, nodeinfo_url)
elif software == "lemmy":
- print(f"INFO: blocker='{blocker}',software='{software}'")
+ logger.info(f"blocker='{blocker}',software='{software}'")
lemmy.fetch_blocks(blocker, origin, nodeinfo_url)
elif software == "friendica" or software == "misskey":
- print(f"INFO: blocker='{blocker}',software='{software}'")
+ logger.info(f"blocker='{blocker}',software='{software}'")
blocking = list()
if software == "friendica":
elif software == "misskey":
blocking = misskey.fetch_blocks(blocker)
- print(f"INFO: Checking {len(blocking.items())} entries from blocker='{blocker}',software='{software}' ...")
+ logger.info(f"Checking {len(blocking.items())} entries from blocker='{blocker}',software='{software}' ...")
for block_level, blocklist in blocking.items():
- # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
+ logger.debug("blocker,block_level,blocklist():", blocker, block_level, len(blocklist))
block_level = tidyup.domain(block_level)
- # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
+ logger.debug("AFTER-block_level:", block_level)
if block_level == "":
- print("WARNING: block_level is empty, blocker:", blocker)
+ logger.warning("block_level is empty, blocker:", blocker)
continue
- # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
+ logger.debug(f"Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...")
for block in blocklist:
blocked, reason = block.values()
- # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
+ logger.debug(f"blocked='{blocked}',reason='{reason}' - BEFORE!")
blocked = tidyup.domain(blocked)
reason = tidyup.reason(reason) if reason is not None and reason != "" else None
- # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
+ logger.debug(f"blocked='{blocked}',reason='{reason}' - AFTER!")
if blocked == "":
- print("WARNING: blocked is empty:", blocker)
+ logger.warning("blocked is empty:", blocker)
continue
elif blacklist.is_blacklisted(blocked):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
+ logger.debug(f"blocked='{blocked}' is blacklisted - skipping!")
continue
elif blocked.count("*") > 0:
# Some friendica servers also obscure domains without hash
row = instances.deobscure("*", blocked)
- # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
+ logger.debug(f"row[]='{type(row)}'")
if row is None:
- print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!")
+ logger.warning(f"Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!")
continue
blocked = row[0]
# Some obscure them with question marks, not sure if that's dependent on version or not
row = instances.deobscure("?", blocked)
- # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
+ logger.debug(f"row[]='{type(row)}'")
if row is None:
- print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!")
+ logger.warning(f"Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!")
continue
blocked = row[0]
origin = row[1]
nodeinfo_url = row[2]
- # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
+ logger.debug("Looking up instance by domain:", blocked)
if not validators.domain(blocked):
- print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - SKIPPED!")
+ logger.warning(f"blocked='{blocked}',software='{software}' is not a valid domain name - SKIPPED!")
continue
elif blocked.endswith(".arpa"):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is a reverse IP domain - SKIPPED!")
+ logger.debug(f"blocked='{blocked}' is a reverse IP domain - SKIPPED!")
continue
elif blocked.endswith(".tld"):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is a fake domain - SKIPPED!")
+ logger.debug(f"blocked='{blocked}' is a fake domain - SKIPPED!")
continue
elif not instances.is_registered(blocked):
- # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker)
+ logger.debug("Hash wasn't found, adding:", blocked, blocker)
try:
instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url)
except network.exceptions as exception:
"reason" : reason
})
else:
- # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
+ logger.debug(f"Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...")
blocks.update_last_seen(blocker, blocked, block_level)
blocks.update_reason(reason, blocker, blocked, block_level)
- # DEBUG: print(f"DEBUG: Invoking cookies.clear({blocked}) ...")
+ logger.debug(f"Invoking cookies.clear({blocked}) ...")
cookies.clear(blocked)
- # DEBUG: print("DEBUG: Committing changes ...")
+ logger.debug("Committing changes ...")
fba.connection.commit()
else:
- print("WARNING: Unknown software:", blocker, software)
+ logger.warning("Unknown software:", blocker, software)
if instances.has_pending(blocker):
- # DEBUG: print(f"DEBUG: Invoking instances.update_data({blocker}) ...")
+ logger.debug(f"Invoking instances.update_data({blocker}) ...")
instances.update_data(blocker)
if config.get("bot_enabled") and len(blockdict) > 0:
network.send_bot_post(blocker, blockdict)
- # DEBUG: print(f"DEBUG: Invoking cookies.clear({blocker}) ...")
+ logger.debug(f"Invoking cookies.clear({blocker}) ...")
cookies.clear(blocker)
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def fetch_observer(args: argparse.Namespace):
- # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
+ logger.debug(f"args[]='{type(args)}' - CALLED!")
types = [
"akoma",
"birdsitelive",
locking.acquire()
- print(f"INFO: Fetching {len(types)} different table data ...")
+ logger.info(f"Fetching {len(types)} different table data ...")
for software in types:
doc = None
try:
- # DEBUG: print(f"DEBUG: Fetching table data for software='{software}' ...")
+ logger.debug(f"Fetching table data for software='{software}' ...")
raw = fba.fetch_url(f"https://fediverse.observer/app/views/tabledata.php?software={software}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
- # DEBUG: print(f"DEBUG: raw[{type(raw)}]()={len(raw)}")
+ logger.debug(f"raw[{type(raw)}]()={len(raw)}")
doc = bs4.BeautifulSoup(raw, features='html.parser')
- # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
+ logger.debug(f"doc[]='{type(doc)}'")
except network.exceptions as exception:
- print(f"WARNING: Cannot fetch software='{software}' from fediverse.observer: '{type(exception)}'")
+ logger.warning(f"Cannot fetch software='{software}' from fediverse.observer: '{type(exception)}'")
continue
items = doc.findAll("a", {"class": "url"})
- print(f"INFO: Checking {len(items)} items,software='{software}' ...")
+ logger.info(f"Checking {len(items)} items,software='{software}' ...")
for item in items:
- # DEBUG: print(f"DEBUG: item[]='{type(item)}'")
+ logger.debug(f"item[]='{type(item)}'")
domain = item.decode_contents()
- # DEBUG: print(f"DEBUG: domain='{domain}'")
+ logger.debug(f"domain='{domain}'")
if not validators.domain(domain.split("/")[0]):
- print(f"WARNING: domain='{domain}' is not a valid domain - SKIPPED!")
+ logger.warning(f"domain='{domain}' is not a valid domain - SKIPPED!")
continue
elif blacklist.is_blacklisted(domain):
- # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
+ logger.debug(f"domain='{domain}' is blacklisted - SKIPPED!")
continue
elif instances.is_registered(domain):
- # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
+ logger.debug(f"domain='{domain}' is already registered - SKIPPED!")
continue
- print(f"INFO: Fetching instances for domain='{domain}',software='{software}'")
+ logger.info(f"Fetching instances for domain='{domain}',software='{software}'")
federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
- # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...")
+ logger.debug(f"Invoking cookies.clear({domain}) ...")
cookies.clear(domain)
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def fetch_cs(args: argparse.Namespace):
- # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
+ logger.debug(f"args[]='{type(args)}' - CALLED!")
extensions = [
"extra",
"abbr",
}
raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text
- # DEBUG: print(f"DEBUG: raw()={len(raw)}[]='{type(raw)}'")
+ logger.debug(f"raw()={len(raw)}[]='{type(raw)}'")
doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser')
- # DEBUG: print(f"DEBUG: doc()={len(doc)}[]='{type(doc)}'")
+ logger.debug(f"doc()={len(doc)}[]='{type(doc)}'")
silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody")
- # DEBUG: print(f"DEBUG: silenced[]='{type(silenced)}'")
+ logger.debug(f"silenced[]='{type(silenced)}'")
domains["silenced"] = domains["silenced"] + federation.find_domains(silenced)
blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody")
- # DEBUG: print(f"DEBUG: blocked[]='{type(blocked)}'")
+ logger.debug(f"blocked[]='{type(blocked)}'")
domains["reject"] = domains["reject"] + federation.find_domains(blocked)
- # DEBUG: print(f"DEBUG: domains()={len(domains)}")
+ logger.debug(f"domains()={len(domains)}")
if len(domains) > 0:
locking.acquire()
- print(f"INFO: Adding {len(domains)} new instances ...")
+ logger.info(f"Adding {len(domains)} new instances ...")
for block_level in domains:
- # DEBUG: print(f"DEBUG: block_level='{block_level}'")
+ logger.debug(f"block_level='{block_level}'")
for row in domains[block_level]:
- # DEBUG: print(f"DEBUG: row='{row}'")
+ logger.debug(f"row='{row}'")
if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level):
- # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
+ logger.debug(f"domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...")
blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level)
if not instances.is_registered(row["domain"]):
try:
- print(f"INFO: Fetching instances from domain='{row['domain']}' ...")
+ logger.info(f"Fetching instances from domain='{row['domain']}' ...")
federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name)
- # DEBUG: print(f"DEBUG: Invoking cookies.clear({row['domain']}) ...")
+ logger.debug(f"Invoking cookies.clear({row['domain']}) ...")
cookies.clear(row["domain"])
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_cs) from domain='{row['domain']}'")
+ logger.warning(f"Exception '{type(exception)}' during fetching instances (fetch_cs) from domain='{row['domain']}'")
instances.set_last_error(row["domain"], exception)
- # DEBUG: print("DEBUG: Committing changes ...")
+ logger.debug("Committing changes ...")
fba.connection.commit()
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def fetch_fba_rss(args: argparse.Namespace):
- # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
+ logger.debug(f"args[]='{type(args)}' - CALLED!")
domains = list()
- print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...")
+ logger.info(f"Fetch FBA-specific RSS args.feed='{args.feed}' ...")
response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
- # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
+ logger.debug(f"response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
if response.ok and response.status_code < 300 and len(response.text) > 0:
- # DEBUG: print(f"DEBUG: Parsing RSS feed ({len(response.text)} Bytes) ...")
+ logger.debug(f"Parsing RSS feed ({len(response.text)} Bytes) ...")
rss = atoma.parse_rss_bytes(response.content)
- # DEBUG: print(f"DEBUG: rss[]='{type(rss)}'")
+ logger.debug(f"rss[]='{type(rss)}'")
for item in rss.items:
- # DEBUG: print(f"DEBUG: item={item}")
+ logger.debug(f"item={item}")
domain = item.link.split("=")[1]
if blacklist.is_blacklisted(domain):
- # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
+ logger.debug(f"domain='{domain}' is blacklisted - SKIPPED!")
continue
elif domain in domains:
- # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
+ logger.debug(f"domain='{domain}' is already added - SKIPPED!")
continue
elif instances.is_registered(domain):
- # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
+ logger.debug(f"domain='{domain}' is already registered - SKIPPED!")
continue
- # DEBUG: print(f"DEBUG: Adding domain='{domain}'")
+ logger.debug(f"Adding domain='{domain}'")
domains.append(domain)
- # DEBUG: print(f"DEBUG: domains()={len(domains)}")
+ logger.debug(f"domains()={len(domains)}")
if len(domains) > 0:
locking.acquire()
- print(f"INFO: Adding {len(domains)} new instances ...")
+ logger.info(f"Adding {len(domains)} new instances ...")
for domain in domains:
try:
- print(f"INFO: Fetching instances from domain='{domain}' ...")
+ logger.info(f"Fetching instances from domain='{domain}' ...")
federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
- # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...")
+ logger.debug(f"Invoking cookies.clear({domain}) ...")
cookies.clear(domain)
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_fba_rss) from domain='{domain}'")
+ logger.warning(f"Exception '{type(exception)}' during fetching instances (fetch_fba_rss) from domain='{domain}'")
instances.set_last_error(domain, exception)
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def fetch_fbabot_atom(args: argparse.Namespace):
- # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
+ logger.debug(f"args[]='{type(args)}' - CALLED!")
feed = "https://ryona.agency/users/fba/feed.atom"
domains = list()
- print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...")
+ logger.info(f"Fetching ATOM feed='{feed}' from FBA bot account ...")
response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
- # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
+ logger.debug(f"response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}")
if response.ok and response.status_code < 300 and len(response.text) > 0:
- # DEBUG: print(f"DEBUG: Parsing ATOM feed ({len(response.text)} Bytes) ...")
+ logger.debug(f"Parsing ATOM feed ({len(response.text)} Bytes) ...")
atom = atoma.parse_atom_bytes(response.content)
- # DEBUG: print(f"DEBUG: atom[]='{type(atom)}'")
+ logger.debug(f"atom[]='{type(atom)}'")
for entry in atom.entries:
- # DEBUG: print(f"DEBUG: entry[]='{type(entry)}'")
+ logger.debug(f"entry[]='{type(entry)}'")
doc = bs4.BeautifulSoup(entry.content.value, "html.parser")
- # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
+ logger.debug(f"doc[]='{type(doc)}'")
for element in doc.findAll("a"):
for href in element["href"].split(","):
- # DEBUG: print(f"DEBUG: href[{type(href)}]={href}")
+ logger.debug(f"href[{type(href)}]={href}")
domain = tidyup.domain(href)
- # DEBUG: print(f"DEBUG: domain='{domain}'")
+ logger.debug(f"domain='{domain}'")
if blacklist.is_blacklisted(domain):
- # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
+ logger.debug(f"domain='{domain}' is blacklisted - SKIPPED!")
continue
elif domain in domains:
- # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!")
+ logger.debug(f"domain='{domain}' is already added - SKIPPED!")
continue
elif instances.is_registered(domain):
- # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!")
+ logger.debug(f"domain='{domain}' is already registered - SKIPPED!")
continue
- # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}")
+ logger.debug(f"Adding domain='{domain}',domains()={len(domains)}")
domains.append(domain)
- # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}")
+ logger.debug(f"domains({len(domains)})={domains}")
if len(domains) > 0:
locking.acquire()
- print(f"INFO: Adding {len(domains)} new instances ...")
+ logger.info(f"Adding {len(domains)} new instances ...")
for domain in domains:
try:
- print(f"INFO: Fetching instances from domain='{domain}' ...")
+ logger.info(f"Fetching instances from domain='{domain}' ...")
federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name)
- # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...")
+ logger.debug(f"Invoking cookies.clear({domain}) ...")
cookies.clear(domain)
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_fbabot_atom) from domain='{domain}'")
+ logger.warning(f"Exception '{type(exception)}' during fetching instances (fetch_fbabot_atom) from domain='{domain}'")
instances.set_last_error(domain, exception)
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def fetch_instances(args: argparse.Namespace) -> int:
- # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
+ logger.debug(f"args[]='{type(args)}' - CALLED!")
locking.acquire()
# Initial fetch
try:
- print(f"INFO: Fetching instances from args.domain='{args.domain}' ...")
+ logger.info(f"Fetching instances from args.domain='{args.domain}' ...")
federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name)
- # DEBUG: print(f"DEBUG: Invoking cookies.clear({args.domain}) ...")
+ logger.debug(f"Invoking cookies.clear({args.domain}) ...")
cookies.clear(args.domain)
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_instances) from args.domain='{args.domain}'")
+ logger.warning(f"Exception '{type(exception)}' during fetching instances (fetch_instances) from args.domain='{args.domain}'")
instances.set_last_error(args.domain, exception)
return 100
if args.single:
- # DEBUG: print("DEBUG: Not fetching more instances - EXIT!")
+ logger.debug("Not fetching more instances - EXIT!")
return 0
# Loop through some instances
)
rows = fba.cursor.fetchall()
- print(f"INFO: Checking {len(rows)} entries ...")
+ logger.info(f"Checking {len(rows)} entries ...")
for row in rows:
- # DEBUG: print(f"DEBUG: domain='{row[0]}'")
+ logger.debug(f"domain='{row[0]}'")
if blacklist.is_blacklisted(row[0]):
- print("WARNING: domain is blacklisted:", row[0])
+ logger.warning("domain is blacklisted:", row[0])
continue
try:
- print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
+ logger.info(f"Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'")
federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3])
- # DEBUG: print(f"DEBUG: Invoking cookies.clear({row[0]}) ...")
+ logger.debug(f"Invoking cookies.clear({row[0]}) ...")
cookies.clear(row[0])
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_instances) from domain='{row[0]}'")
+ logger.warning(f"Exception '{type(exception)}' during fetching instances (fetch_instances) from domain='{row[0]}'")
instances.set_last_error(row[0], exception)
- # DEBUG: print("DEBUG: Success - EXIT!")
+ logger.debug("Success - EXIT!")
return 0
def fetch_oliphant(args: argparse.Namespace):
- # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
+ logger.debug(f"args[]='{type(args)}' - CALLED!")
locking.acquire()
# Base URL
for block in blocklists:
# Is domain given and not equal blocker?
if isinstance(args.domain, str) and args.domain != block["blocker"]:
- # DEBUG: print(f"DEBUG: Skipping blocker='{block['blocker']}', not matching args.domain='{args.domain}'")
+ logger.debug(f"Skipping blocker='{block['blocker']}', not matching args.domain='{args.domain}'")
continue
elif args.domain in domains:
- # DEBUG: print(f"DEBUG: args.domain='{args.domain}' already handled - SKIPPED!")
+ logger.debug(f"args.domain='{args.domain}' already handled - SKIPPED!")
continue
# Fetch this URL
- print(f"INFO: Fetching csv_url='{block['csv_url']}' for blocker='{block['blocker']}' ...")
+ logger.info(f"Fetching csv_url='{block['csv_url']}' for blocker='{block['blocker']}' ...")
response = fba.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
- # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
+ logger.debug(f"response[]='{type(response)}'")
if response.ok and response.content != "":
- # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
+ logger.debug(f"Fetched {len(response.content)} Bytes, parsing CSV ...")
reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix")
- # DEBUG: print(f"DEBUG: reader[]='{type(reader)}'")
+ logger.debug(f"reader[]='{type(reader)}'")
for row in reader:
domain = None
if "#domain" in row:
elif "domain" in row:
domain = row["domain"]
else:
- # DEBUG: print(f"DEBUG: row='{row}' does not contain domain column")
+ logger.debug(f"row='{row}' does not contain domain column")
continue
if not validators.domain(domain):
- print(f"WARNING: domain='{domain}' is not a valid domain name - SKIPPED!")
+ logger.warning(f"domain='{domain}' is not a valid domain name - SKIPPED!")
continue
elif domain.endswith(".arpa"):
- # DEBUG: print(f"DEBUG: domain='{domain}' is a reverse IP domain - SKIPPED!")
+ logger.debug(f"domain='{domain}' is a reverse IP domain - SKIPPED!")
continue
elif domain.endswith(".tld"):
- # DEBUG: print(f"DEBUG: domain='{domain}' is a fake domain - SKIPPED!")
+ logger.debug(f"domain='{domain}' is a fake domain - SKIPPED!")
continue
elif blacklist.is_blacklisted(domain):
- # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
+ logger.debug(f"domain='{domain}' is blacklisted - SKIPPED!")
continue
- # DEBUG: print(f"DEBUG: Marking domain='{domain}' as handled")
+ logger.debug(f"Marking domain='{domain}' as handled")
domains.append(domain)
- # DEBUG: print(f"DEBUG: Processing domain='{domain}' ...")
+ logger.debug(f"Processing domain='{domain}' ...")
processed = fba.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name)
- # DEBUG: print(f"DEBUG: processed='{processed}'")
+ logger.debug(f"processed='{processed}'")
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def fetch_txt(args: argparse.Namespace):
- # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!")
+ logger.debug(f"args[]='{type(args)}' - CALLED!")
locking.acquire()
# Static URLs
"https://seirdy.one/pb/bsl.txt",
)
- print(f"INFO: Checking {len(urls)} text file(s) ...")
+ logger.info(f"Checking {len(urls)} text file(s) ...")
for url in urls:
- # DEBUG: print(f"DEBUG: Fetching url='{url}' ...")
+ logger.debug(f"Fetching url='{url}' ...")
response = fba.fetch_url(url, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
- # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
+ logger.debug(f"response[]='{type(response)}'")
if response.ok and response.text != "":
- # DEBUG: print(f"DEBUG: Returned {len(response.text.strip())} Bytes for processing")
+ logger.debug(f"Returned {len(response.text.strip())} Bytes for processing")
domains = response.text.split("\n")
- print(f"INFO: Processing {len(domains)} domains ...")
+ logger.info(f"Processing {len(domains)} domains ...")
for domain in domains:
- # DEBUG: print(f"DEBUG: domain='{domain}'")
+ logger.debug(f"domain='{domain}'")
if domain == "":
- # DEBUG: print("DEBUG: domain is empty - SKIPPED!")
+ logger.debug("domain is empty - SKIPPED!")
continue
elif not validators.domain(domain):
- print(f"WARNING: domain='{domain}' is not a valid domain name - SKIPPED!")
+ logger.warning(f"domain='{domain}' is not a valid domain name - SKIPPED!")
continue
elif domain.endswith(".arpa"):
- # DEBUG: print(f"DEBUG: domain='{domain}' is a reverse IP domain - SKIPPED!")
+ logger.debug(f"domain='{domain}' is a reverse IP domain - SKIPPED!")
continue
elif domain.endswith(".tld"):
- # DEBUG: print(f"DEBUG: domain='{domain}' is a fake domain - SKIPPED!")
+ logger.debug(f"domain='{domain}' is a fake domain - SKIPPED!")
continue
elif blacklist.is_blacklisted(domain):
- # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
+ logger.debug(f"domain='{domain}' is blacklisted - SKIPPED!")
continue
- # DEBUG: print(f"DEBUG: domain='{domain}'")
+ logger.debug(f"domain='{domain}'")
processed = fba.process_domain(domain, 'seirdy.one', inspect.currentframe().f_code.co_name)
- # DEBUG: print(f"DEBUG: processed='{processed}'")
+ logger.debug(f"processed='{processed}'")
if not processed:
- # DEBUG: print(f"DEBUG: domain='{domain}' was not generically processed - SKIPPED!")
+ logger.debug(f"domain='{domain}' was not generically processed - SKIPPED!")
continue
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import logging
+
import bs4
import reqto
import validators
-
from fba.helpers import config
from fba.helpers import cookies
from fba.http import network
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
def determine(domain: str, headers: dict) -> dict:
- # DEBUG: print(f"DEBUG: domain='{domain}',headers()={len(headers)} - CALLED!")
+ logger.debug(f"domain='{domain}',headers()={len(headers)} - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
reqheaders = headers
# Fetch / to check for meta tag indicating csrf
- # DEBUG: print(f"DEBUG: Fetching / from domain='{domain}' for CSRF check ...")
+ logger.debug(f"Fetching / from domain='{domain}' for CSRF check ...")
response = reqto.get(
f"https://{domain}/",
headers=network.web_headers,
timeout=(config.get("connection_timeout"), config.get("read_timeout"))
)
- # DEBUG: print(f"DEBUG: response.ok='{response.ok}',response.status_code={response.status_code},response.text()={len(response.text)}")
+ logger.debug(f"response.ok='{response.ok}',response.status_code={response.status_code},response.text()={len(response.text)}")
if response.ok and response.status_code < 300 and response.text.find("<html") > 0:
# Save cookies
- # DEBUG: print(f"DEBUG: Parsing response.text()={len(response.text)} Bytes ...")
+ logger.debug(f"Parsing response.text()={len(response.text)} Bytes ...")
cookies.store(domain, response.cookies.get_dict())
# Parse text
response.text,
"html.parser"
)
- # DEBUG: print(f"DEBUG: meta[]='{type(meta)}'")
+ logger.debug(f"meta[]='{type(meta)}'")
tag = meta.find("meta", attrs={"name": "csrf-token"})
- # DEBUG: print(f"DEBUG: tag={tag}")
+ logger.debug(f"tag={tag}")
if tag is not None:
- # DEBUG: print(f"DEBUG: Adding CSRF token='{tag['content']}' for domain='{domain}'")
+ logger.debug(f"Adding CSRF token='{tag['content']}' for domain='{domain}'")
reqheaders["X-CSRF-Token"] = tag["content"]
- # DEBUG: print(f"DEBUG: reqheaders()={len(reqheaders)} - EXIT!")
+ logger.debug(f"reqheaders()={len(reqheaders)} - EXIT!")
return reqheaders
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import hashlib
+import logging
import sqlite3
from urllib.parse import urlparse
from fba.models import instances
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
# Connect to database
connection = sqlite3.connect("blocks.db")
cursor = connection.cursor()
##### Other functions #####
def is_primitive(var: any) -> bool:
- # DEBUG: print(f"DEBUG: var[]='{type(var)}' - CALLED!")
+ logger.debug(f"var[]='{type(var)}' - CALLED!")
return type(var) in {int, str, float, bool} or var is None
def get_hash(domain: str) -> str:
- # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
+ logger.debug(f"domain='{domain}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
return hashlib.sha256(domain.encode("utf-8")).hexdigest()
def fetch_url(url: str, headers: dict, timeout: tuple) -> requests.models.Response:
- # DEBUG: print(f"DEBUG: url='{url}',headers()={len(headers)},timeout={timeout} - CALLED!")
+ logger.debug(f"url='{url}',headers()={len(headers)},timeout={timeout} - CALLED!")
if not isinstance(url, str):
raise ValueError(f"Parameter url[]='{type(url)}' is not 'str'")
elif url == "":
elif not isinstance(timeout, tuple):
raise ValueError(f"Parameter timeout[]='{type(timeout)}' is not 'tuple'")
- # DEBUG: print(f"DEBUG: Parsing url='{url}'")
+ logger.debug(f"Parsing url='{url}'")
components = urlparse(url)
# Invoke other function, avoid trailing ?
- # DEBUG: print(f"DEBUG: components[{type(components)}]={components}")
+ logger.debug(f"components[{type(components)}]={components}")
if components.query != "":
response = network.fetch_response(components.netloc, f"{components.path}?{components.query}", headers, timeout)
else:
response = network.fetch_response(components.netloc, components.path if isinstance(components.path, str) and components.path != '' else '/', headers, timeout)
- # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!")
+ logger.debug(f"response[]='{type(response)}' - EXXIT!")
return response
def process_domain(domain: str, blocker: str, command: str) -> bool:
- # DEBUG: print(f"DEBUG: domain='{domain}',blocker='{blocker}',command='{command}' - CALLED!")
+ logger.debug(f"domain='{domain}',blocker='{blocker}',command='{command}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
# Try to de-obscure it
row = instances.deobscure("*", domain)
- # DEBUG: print(f"DEBUG: row[{type(row)}]='{row}'")
+ logger.debug(f"row[{type(row)}]='{row}'")
if row is None:
- print(f"WARNING: Cannot de-obfucate domain='{domain}' - SKIPPED!")
+ logger.warning(f"Cannot de-obfucate domain='{domain}' - SKIPPED!")
return False
- # DEBUG: print(f"DEBUG: domain='{domain}' de-obscured to '{row[0]}'")
+ logger.debug(f"domain='{domain}' de-obscured to '{row[0]}'")
domain = row[0]
elif domain.find("?") > 0:
# Try to de-obscure it
row = instances.deobscure("?", domain)
- # DEBUG: print(f"DEBUG: row[{type(row)}]='{row}'")
+ logger.debug(f"row[{type(row)}]='{row}'")
if row is None:
- print(f"WARNING: Cannot de-obfucate domain='{domain}' - SKIPPED!")
+ logger.warning(f"Cannot de-obfucate domain='{domain}' - SKIPPED!")
return False
- # DEBUG: print(f"DEBUG: domain='{domain}' de-obscured to '{row[0]}'")
+ logger.debug(f"domain='{domain}' de-obscured to '{row[0]}'")
domain = row[0]
if not validators.domain(domain.split("/")[0]):
- print(f"WARNING: domain='{domain}' is not a valid domain - SKIPPED!")
+ logger.warning(f"domain='{domain}' is not a valid domain - SKIPPED!")
return False
elif domain.endswith(".arpa"):
- print(f"WARNING: domain='{domain}' is a reversed .arpa domain and should not be used generally.")
+ logger.warning(f"domain='{domain}' is a reversed .arpa domain and should not be used generally.")
return False
elif blacklist.is_blacklisted(domain):
- # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
+ logger.debug(f"domain='{domain}' is blacklisted - SKIPPED!")
return False
elif instances.is_recent(domain):
- # DEBUG: print(f"DEBUG: domain='{domain}' has been recently checked - SKIPPED!")
+ logger.debug(f"domain='{domain}' has been recently checked - SKIPPED!")
return False
processed = False
try:
- print(f"INFO: Fetching instances for instane='{domain}',blocker='{blocker}',command='{command}' ...")
+ logger.info(f"Fetching instances for instane='{domain}',blocker='{blocker}',command='{command}' ...")
federation.fetch_instances(domain, blocker, None, command)
processed = True
- # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...")
+ logger.debug(f"Invoking cookies.clear({domain}) ...")
cookies.clear(domain)
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_oliphant) from domain='{domain}'")
+ logger.warning(f"Exception '{type(exception)}' during fetching instances (fetch_oliphant) from domain='{domain}'")
instances.set_last_error(domain, exception)
- # DEBUG: print(f"DEBUG: processed='{processed}' - EXIT!")
+ logger.debug(f"processed='{processed}' - EXIT!")
return processed
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import logging
+
import validators
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
# Don't check these, known trolls/flooders/testing/developing
blacklist = [
# Floods federation with fake nodes as "research" project
]
def is_blacklisted(domain: str) -> bool:
- # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
+ logger.debug(f"domain='{domain}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
blacklisted = False
for peer in blacklist:
- # DEBUG: print(f"DEBUG: Checking peer='{peer}' ...")
+ logger.debug(f"Checking peer='{peer}' ...")
if peer in domain:
- # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted.")
+ logger.debug(f"domain='{domain}' is blacklisted.")
blacklisted = True
- # DEBUG: print(f"DEBUG: blacklisted='{blacklisted}' - EXIT!")
+ logger.debug(f"blacklisted='{blacklisted}' - EXIT!")
return blacklisted
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import logging
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
# Cache for redundant SQL queries
_cache = {}
return key in _cache
def set_all(key: str, rows: list, value: any):
- # DEBUG: print(f"DEBUG: key='{key}',rows()={len(rows)},value[]='{type(value)}' - CALLED!")
+ logger.debug(f"key='{key}',rows()={len(rows)},value[]='{type(value)}' - CALLED!")
if not isinstance(key, str):
raise ValueError("Parameter key[]='{type(key)}' is not 'str'")
elif not key_exists(key):
- # DEBUG: print(f"DEBUG: Cache for key='{key}' not initialized.")
+ logger.debug(f"Cache for key='{key}' not initialized.")
_cache[key] = {}
for sub in rows:
- # DEBUG: print(f"DEBUG: Setting key='{key}',sub[{type(sub)}]='{sub}'")
+ logger.debug(f"Setting key='{key}',sub[{type(sub)}]='{sub}'")
if isinstance(sub, tuple):
- # DEBUG: print(f"DEBUG: Setting key='{key}',sub[{type(sub)}]='{sub}',value[]='{type(value)}'")
+ logger.debug(f"Setting key='{key}',sub[{type(sub)}]='{sub}',value[]='{type(value)}'")
_cache[key][sub[0]] = value
else:
- print(f"WARNING: Unsupported type sub[]='{type(sub)}'")
+ logger.warning(f"Unsupported type sub[]='{type(sub)}'")
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def set_sub_key(key: str, sub: str, value: any):
- # DEBUG: print(f"DEBUG: key='{key}',sub='{sub}',value[]='{type(value)}' - CALLED!")
+ logger.debug(f"key='{key}',sub='{sub}',value[]='{type(value)}' - CALLED!")
if not isinstance(key, str):
raise ValueError("Parameter key[]='{type(key)}' is not 'str'")
elif not isinstance(sub, str):
elif not key_exists(key):
raise Exception(f"Cache for key='{key}' is not initialized, but function invoked")
- # DEBUG: print(f"DEBUG: Setting key='{key}',sub='{sub}',value[]='{type(value)}' ...")
+ logger.debug(f"Setting key='{key}',sub='{sub}',value[]='{type(value)}' ...")
_cache[key][sub] = value
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def sub_key_exists(key: str, sub: str) -> bool:
- # DEBUG: print(f"DEBUG: key='{key}',sub='{sub}' - CALLED!")
+ logger.debug(f"key='{key}',sub='{sub}' - CALLED!")
if not isinstance(key, str):
raise ValueError("Parameter key[]='{type(key)}' is not 'str'")
elif not isinstance(sub, str):
exists = sub in _cache[key]
- # DEBUG: print(f"DEBUG: exists='{exists}' - EXIT!")
+ logger.debug(f"exists='{exists}' - EXIT!")
return exists
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import logging
import json
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
with open("config.json") as f:
- # DEBUG: print("DEBUG: Loading configuration file ...")
+ logger.debug("Loading configuration file ...")
_config = json.loads(f.read())
def get(key: str) -> any:
- # DEBUG: print(f"DEBUG: key[{type(key)}]={key} - CALLED!")
+ logger.debug(f"key[{type(key)}]={key} - CALLED!")
if not isinstance(key, str):
raise ValueError(f"Parameter key[]='{type(key)}' is not 'str'")
elif key == "":
elif not key in _config:
raise KeyError(f"key='{key}' does not exist in _config array")
- # DEBUG: print(f"DEBUG: _config[{key}]={_config[key]} - EXIT!")
+ logger.debug(f"_config[{key}]={_config[key]} - EXIT!")
return _config[key]
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import logging
+
import validators
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
# Cookies stor
_cookies = {}
def store (domain: str, cookies: dict):
- # DEBUG: print(f"DEBUG: domain='{domain}',cookies()={len(cookies)} - CALLED!")
+ logger.debug(f"domain='{domain}',cookies()={len(cookies)} - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
_cookies[domain] = cookies
- # DEBUG: print(f"DEBUG: EXIT!")
+ logger.debug(f"EXIT!")
def get_all(domain: str) -> dict:
- # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
+ logger.debug(f"domain='{domain}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
elif not has(domain):
raise Exception(f"domain='{domain}' has no cookies stored, maybe invoke store() first?")
- # DEBUG: print(f"DEBUG: _cookies[{domain}]()={len(_cookies[domain])} - EXIT!")
+ logger.debug(f"_cookies[{domain}]()={len(_cookies[domain])} - EXIT!")
return _cookies[domain]
def has (domain: str) -> bool:
- # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
+ logger.debug(f"domain='{domain}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
has_cookies = domain in _cookies
- # DEBUG: print(f"DEBUG: has_cookies='{has_cookies}' - EXIT!")
+ logger.debug(f"has_cookies='{has_cookies}' - EXIT!")
return has_cookies
def clear (domain: str):
- # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
+ logger.debug(f"domain='{domain}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
if has(domain):
- # DEBUG: print(f"DEBUG: Removing cookies for domain='{domain}' ...")
+ logger.debug(f"Removing cookies for domain='{domain}' ...")
del _cookies[domain]
- # DEBUG: print(f"DEBUG: EXIT!")
+ logger.debug(f"EXIT!")
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import logging
+
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
def has_key(lists: list, key: str, value: any) -> bool:
- # DEBUG: print(f"DEBUG: lists()={len(lists)},key='{key}',value[]='{type(value)}' - CALLED!")
+ logger.debug(f"lists()={len(lists)},key='{key}',value[]='{type(value)}' - CALLED!")
if not isinstance(lists, list):
raise ValueError(f"Parameter lists[]='{type(lists)}' is not 'list'")
elif not isinstance(key, str):
raise ValueError("Parameter 'key' is empty")
has = False
- # DEBUG: print(f"DEBUG: Checking lists()={len(lists)} ...")
+ logger.debug(f"Checking lists()={len(lists)} ...")
for row in lists:
- # DEBUG: print(f"DEBUG: row['{type(row)}']={row}")
+ logger.debug(f"row['{type(row)}']={row}")
if not isinstance(row, dict):
raise ValueError(f"row[]='{type(row)}' is not 'dict'")
elif not key in row:
has = True
break
- # DEBUG: print(f"DEBUG: has={has} - EXIT!")
+ logger.debug(f"has={has} - EXIT!")
return has
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import logging
import os
import sys
import tempfile
import zc.lockfile
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
# Lock file
lockfile = tempfile.gettempdir() + '/fba.lock'
LOCK = None
def acquire():
global LOCK
- # DEBUG: print("DEBUG: CALLED!")
+ logger.debug("CALLED!")
try:
- # DEBUG: print(f"DEBUG: Acquiring lock: '{lockfile}'")
+ logger.debug(f"Acquiring lock: '{lockfile}'")
LOCK = zc.lockfile.LockFile(lockfile)
- # DEBUG: print("DEBUG: Lock obtained.")
+ logger.debug("Lock obtained.")
except zc.lockfile.LockError:
- print(f"ERROR: Cannot aquire lock: '{lockfile}'")
+ logger.error(f"Cannot aquire lock: '{lockfile}'")
sys.exit(100)
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def release():
- # DEBUG: print("DEBUG: CALLED!")
+ logger.debug("CALLED!")
if LOCK is not None:
- # DEBUG: print("DEBUG: Releasing lock ...")
+ logger.debug("Releasing lock ...")
LOCK.close()
- # DEBUG: print(f"DEBUG: Deleting lockfile='{lockfile}' ...")
+ logger.debug(f"Deleting lockfile='{lockfile}' ...")
os.remove(lockfile)
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import logging
import re
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
def reason(string: str) -> str:
- # DEBUG: print(f"DEBUG: string='{string}' - CALLED!")
+ logger.debug(f"string='{string}' - CALLED!")
if not isinstance(string, str):
raise ValueError(f"Parameter string[]='{type(string)}' is not 'str'")
# Strip string
string = string.strip()
- # DEBUG: print(f"DEBUG: string='{string}' - EXIT!")
+ logger.debug(f"string='{string}' - EXIT!")
return string
def domain(string: str) -> str:
- # DEBUG: print(f"DEBUG: string='{string}' - CALLED!")
+ logger.debug(f"string='{string}' - CALLED!")
if not isinstance(string, str):
raise ValueError(f"Parameter string[]='{type(string)}' is not 'str'")
# All lower-case and strip spaces out + last dot
string = string.lower().strip().rstrip(".")
- # DEBUG: print(f"DEBUG: string='{string}' - #1")
+ logger.debug(f"string='{string}' - #1")
# No port number
string = re.sub("\:\d+$", "", string)
- # DEBUG: print(f"DEBUG: string='{string}' - #2")
+ logger.debug(f"string='{string}' - #2")
# No protocol, sometimes without the slashes
string = re.sub("^https?\:(\/*)", "", string)
- # DEBUG: print(f"DEBUG: string='{string}' - #3")
+ logger.debug(f"string='{string}' - #3")
# No trailing slash
string = re.sub("\/$", "", string)
- # DEBUG: print(f"DEBUG: string='{string}' - #4")
+ logger.debug(f"string='{string}' - #4")
# No @ or : sign
string = re.sub("^\@", "", string)
string = string.split(":")[0]
- # DEBUG: print(f"DEBUG: string='{string}' - #4")
+ logger.debug(f"string='{string}' - #4")
# No individual users in block lists
string = re.sub("(.+)\@", "", string)
- # DEBUG: print(f"DEBUG: string='{string}' - #5")
+ logger.debug(f"string='{string}' - #5")
if string.find("/profile/"):
string = string.split("/profile/")[0]
elif string.find("/users/"):
string = string.split("/users/")[0]
- # DEBUG: print(f"DEBUG: string='{string}' - EXIT!")
+ logger.debug(f"string='{string}' - EXIT!")
return string
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import logging
import re
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
# Pattern instance for version numbers
patterns = [
# semantic version number (with v|V) prefix)
]
def remove(software: str) -> str:
- # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
+ logger.debug(f"software='{software}' - CALLED!")
if "." not in software and " " not in software:
- print(f"WARNING: software='{software}' does not contain a version number.")
+ logger.warning(f"software='{software}' does not contain a version number.")
return software
temp = software
elif " - " in software:
temp = software.split(" - ")[0]
- # DEBUG: print(f"DEBUG: software='{software}'")
+ logger.debug(f"software='{software}'")
version = None
if " " in software:
version = temp.split(" ")[-1]
elif "-" in software:
version = temp.split("-")[-1]
else:
- # DEBUG: print(f"DEBUG: Was not able to find common seperator, returning untouched software='{software}'")
+ logger.debug(f"Was not able to find common seperator, returning untouched software='{software}'")
return software
match = None
- # DEBUG: print(f"DEBUG: Checking {len(patterns)} patterns ...")
+ logger.debug(f"Checking {len(patterns)} patterns ...")
for pattern in patterns:
# Run match()
match = pattern.match(version)
- # DEBUG: print(f"DEBUG: match[]='{type(match)}'")
+ logger.debug(f"match[]='{type(match)}'")
if isinstance(match, re.Match):
- # DEBUG: print(f"DEBUG: version='{version}' is matching pattern='{pattern}'")
+ logger.debug(f"version='{version}' is matching pattern='{pattern}'")
break
- # DEBUG: print(f"DEBUG: version[{type(version)}]='{version}',match='{match}'")
+ logger.debug(f"version[{type(version)}]='{version}',match='{match}'")
if not isinstance(match, re.Match):
- print(f"WARNING: version='{version}' does not match regex, leaving software='{software}' untouched.")
+ logger.warning(f"version='{version}' does not match regex, leaving software='{software}' untouched.")
return software
- # DEBUG: print(f"DEBUG: Found valid version number: '{version}', removing it ...")
+ logger.debug(f"Found valid version number: '{version}', removing it ...")
end = len(temp) - len(version) - 1
- # DEBUG: print(f"DEBUG: end[{type(end)}]={end}")
+ logger.debug(f"end[{type(end)}]={end}")
software = temp[0:end].strip()
if " version" in software:
- # DEBUG: print(f"DEBUG: software='{software}' contains word ' version'")
+ logger.debug(f"software='{software}' contains word ' version'")
software = strip_until(software, " version")
- # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
+ logger.debug(f"software='{software}' - EXIT!")
return software
def strip_powered_by(software: str) -> str:
- # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
+ logger.debug(f"software='{software}' - CALLED!")
if not isinstance(software, str):
raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'")
elif software == "":
raise ValueError("Parameter 'software' is empty")
elif "powered by" not in software:
- print(f"WARNING: Cannot find 'powered by' in software='{software}'!")
+ logger.warning(f"Cannot find 'powered by' in software='{software}'!")
return software
start = software.find("powered by ")
- # DEBUG: print(f"DEBUG: start[{type(start)}]='{start}'")
+ logger.debug(f"start[{type(start)}]='{start}'")
software = software[start + 11:].strip()
- # DEBUG: print(f"DEBUG: software='{software}'")
+ logger.debug(f"software='{software}'")
software = strip_until(software, " - ")
- # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
+ logger.debug(f"software='{software}' - EXIT!")
return software
def strip_hosted_on(software: str) -> str:
- # DEBUG: print(f"DEBUG: software='{software}' - CALLED!")
+ logger.debug(f"software='{software}' - CALLED!")
if not isinstance(software, str):
raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'")
elif software == "":
raise ValueError("Parameter 'software' is empty")
elif "hosted on" not in software:
- print(f"WARNING: Cannot find 'hosted on' in '{software}'!")
+ logger.warning(f"Cannot find 'hosted on' in '{software}'!")
return software
end = software.find("hosted on ")
- # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'")
+ logger.debug(f"end[{type(end)}]='{end}'")
software = software[0:end].strip()
- # DEBUG: print(f"DEBUG: software[{type(software)}]='{software}'")
+ logger.debug(f"software[{type(software)}]='{software}'")
software = strip_until(software, " - ")
- # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
+ logger.debug(f"software='{software}' - EXIT!")
return software
def strip_until(software: str, until: str) -> str:
- # DEBUG: print(f"DEBUG: software='{software}',until='{until}' - CALLED!")
+ logger.debug(f"software='{software}',until='{until}' - CALLED!")
if not isinstance(software, str):
raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'")
elif software == "":
elif until == "":
raise ValueError("Parameter 'until' is empty")
elif not until in software:
- print(f"WARNING: Cannot find '{until}' in '{software}'!")
+ logger.warning(f"Cannot find '{until}' in '{software}'!")
return software
# Next, strip until part
end = software.find(until)
- # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'")
+ logger.debug(f"end[{type(end)}]='{end}'")
if end > 0:
software = software[0:end].strip()
- # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
+ logger.debug(f"software='{software}' - EXIT!")
return software
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import logging
+
from urllib.parse import urlparse
import bs4
from fba.networks import misskey
from fba.networks import peertube
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
# "rel" identifiers (no real URLs)
nodeinfo_identifier = [
"https://nodeinfo.diaspora.software/ns/schema/2.1",
]
def fetch_instances(domain: str, origin: str, software: str, command: str, path: str = None):
- # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',software='{software}',path='{path}' - CALLED!")
+ logger.debug(f"domain='{domain}',origin='{origin}',software='{software}',path='{path}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
elif not isinstance(origin, str) and origin is not None:
raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
elif software is None:
- # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
+ logger.debug(f"Updating last_instance_fetch for domain='{domain}' ...")
instances.set_last_instance_fetch(domain)
- # DEBUG: print(f"DEBUG: software for domain='{domain}' is not set, determining ...")
+ logger.debug(f"software for domain='{domain}' is not set, determining ...")
software = None
try:
software = determine_software(domain, path)
except network.exceptions as exception:
- # DEBUG: print(f"DEBUG: Exception '{type(exception)}' during determining software type")
+ logger.debug(f"Exception '{type(exception)}' during determining software type")
pass
- # DEBUG: print(f"DEBUG: Determined software='{software}' for domain='{domain}'")
+ logger.debug(f"Determined software='{software}' for domain='{domain}'")
elif not isinstance(software, str):
raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'")
elif not isinstance(command, str):
raise ValueError(f"domain='{domain}' is a fake domain")
if not instances.is_registered(domain):
- # DEBUG: print(f"DEBUG: Adding new domain='{domain}',origin='{origin}',command='{command}',path='{path}',software='{software}'")
+ logger.debug(f"Adding new domain='{domain}',origin='{origin}',command='{command}',path='{path}',software='{software}'")
instances.add(domain, origin, command, path, software)
- # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
+ logger.debug(f"Updating last_instance_fetch for domain='{domain}' ...")
instances.set_last_instance_fetch(domain)
- # DEBUG: print("DEBUG: Fetching instances for domain:", domain, software)
+ logger.debug("Fetching instances for domain:", domain, software)
peerlist = fetch_peers(domain, software)
if peerlist is None:
- print("ERROR: Cannot fetch peers:", domain)
+ logger.warning("Cannot fetch peers:", domain)
return
elif instances.has_pending(domain):
- # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...")
+ logger.debug(f"domain='{domain}' has pending nodeinfo data, flushing ...")
instances.update_data(domain)
- print(f"INFO: Checking {len(peerlist)} instances from domain='{domain}' ...")
+ logger.info(f"Checking {len(peerlist)} instances from domain='{domain}' ...")
for instance in peerlist:
- # DEBUG: print(f"DEBUG: instance='{instance}'")
+ logger.debug(f"instance='{instance}'")
if instance is None:
# Skip "None" types as tidup.domain() cannot parse them
continue
- # DEBUG: print(f"DEBUG: instance='{instance}' - BEFORE")
+ logger.debug(f"instance='{instance}' - BEFORE")
instance = tidyup.domain(instance)
- # DEBUG: print(f"DEBUG: instance='{instance}' - AFTER")
+ logger.debug(f"instance='{instance}' - AFTER")
if instance == "":
- print(f"WARNING: Empty instance after tidyup.domain(), domain='{domain}'")
+ logger.warning(f"Empty instance after tidyup.domain(), domain='{domain}'")
continue
elif not validators.domain(instance.split("/")[0]):
- print(f"WARNING: Bad instance='{instance}' from domain='{domain}',origin='{origin}'")
+ logger.warning(f"Bad instance='{instance}' from domain='{domain}',origin='{origin}'")
continue
elif instance.endswith(".arpa"):
- print(f"WARNING: instance='{instance}' is a reversed .arpa domain and should not be used generally.")
+ logger.warning(f"instance='{instance}' is a reversed .arpa domain and should not be used generally.")
continue
elif blacklist.is_blacklisted(instance):
- # DEBUG: print("DEBUG: instance is blacklisted:", instance)
+ logger.debug("instance is blacklisted:", instance)
continue
elif instance.find("/profile/") > 0 or instance.find("/users/") > 0:
- # DEBUG: print(f"DEBUG: instance='{instance}' is a link to a single user profile - SKIPPED!")
+ logger.debug(f"instance='{instance}' is a link to a single user profile - SKIPPED!")
continue
elif instance.endswith(".tld"):
- # DEBUG: print(f"DEBUG: instance='{instance}' is a fake domain - SKIPPED!")
+ logger.debug(f"instance='{instance}' is a fake domain - SKIPPED!")
continue
elif not instances.is_registered(instance):
- # DEBUG: print("DEBUG: Adding new instance:", instance, domain)
+ logger.debug("Adding new instance:", instance, domain)
instances.add(instance, domain, command)
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def fetch_peers(domain: str, software: str) -> list:
- # DEBUG: print(f"DEBUG: domain({len(domain)})='{domain}',software='{software}' - CALLED!")
+ logger.debug(f"domain({len(domain)})='{domain}',software='{software}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
raise ValueError(f"software[]='{type(software)}' is not 'str'")
if software == "misskey":
- # DEBUG: print(f"DEBUG: Invoking misskey.fetch_peers({domain}) ...")
+ logger.debug(f"Invoking misskey.fetch_peers({domain}) ...")
return misskey.fetch_peers(domain)
elif software == "lemmy":
- # DEBUG: print(f"DEBUG: Invoking lemmy.fetch_peers({domain}) ...")
+ logger.debug(f"Invoking lemmy.fetch_peers({domain}) ...")
return lemmy.fetch_peers(domain)
elif software == "peertube":
- # DEBUG: print(f"DEBUG: Invoking peertube.fetch_peers({domain}) ...")
+ logger.debug(f"Invoking peertube.fetch_peers({domain}) ...")
return peertube.fetch_peers(domain)
# Init peers variable
headers = tuple()
try:
- # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
+ logger.debug(f"Checking CSRF for domain='{domain}'")
headers = csrf.determine(domain, dict())
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
+ logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
instances.set_last_error(domain, exception)
return peers
- # DEBUG: print(f"DEBUG: Fetching peers from '{domain}',software='{software}' ...")
+ logger.debug(f"Fetching peers from '{domain}',software='{software}' ...")
data = network.get_json_api(
domain,
"/api/v1/instance/peers",
(config.get("connection_timeout"), config.get("read_timeout"))
)
- # DEBUG: print(f"DEBUG: data[]='{type(data)}'")
+ logger.debug(f"data[]='{type(data)}'")
if "error_message" in data:
- # DEBUG: print("DEBUG: Was not able to fetch peers, trying alternative ...")
+ logger.debug("Was not able to fetch peers, trying alternative ...")
data = network.get_json_api(
domain,
"/api/v3/site",
(config.get("connection_timeout"), config.get("read_timeout"))
)
- # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
+ logger.debug(f"response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
if "error_message" in data:
- print(f"WARNING: Could not reach any JSON API at domain='{domain}',status_code='{data['status_code']}',error_message='{data['error_message']}'")
+ logger.warning(f"Could not reach any JSON API at domain='{domain}',status_code='{data['status_code']}',error_message='{data['error_message']}'")
elif "federated_instances" in data["json"]:
- # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
+ logger.debug(f"Found federated_instances for domain='{domain}'")
peers = peers + add_peers(data["json"]["federated_instances"])
- # DEBUG: print("DEBUG: Added instance(s) to peers")
+ logger.debug("Added instance(s) to peers")
else:
message = "JSON response does not contain 'federated_instances' or 'error_message'"
- print(f"WARNING: {message},domain='{domain}'")
+ logger.warning(f"{message},domain='{domain}'")
instances.set_last_error(domain, message)
elif isinstance(data["json"], list):
# DEBUG print("DEBUG: Querying API was successful:", domain, len(data['json']))
peers = data["json"]
else:
- print(f"WARNING: Cannot parse data[json][]='{type(data['json'])}'")
+ logger.warning(f"Cannot parse data[json][]='{type(data['json'])}'")
- # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
+ logger.debug(f"Adding '{len(peers)}' for domain='{domain}'")
instances.set_total_peers(domain, peers)
- # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
+ logger.debug("Returning peers[]:", type(peers))
return peers
def fetch_nodeinfo(domain: str, path: str = None) -> dict:
- # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' - CALLED!")
+ logger.debug(f"domain='{domain}',path='{path}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
elif not isinstance(path, str) and path is not None:
raise ValueError(f"Parameter path[]='{type(path)}' is not 'str'")
- # DEBUG: print(f"DEBUG: Fetching nodeinfo from domain='{domain}' ...")
+ logger.debug(f"Fetching nodeinfo from domain='{domain}' ...")
nodeinfo = fetch_wellknown_nodeinfo(domain)
- # DEBUG: print(f"DEBUG: nodeinfo[{type(nodeinfo)}]({len(nodeinfo)}='{nodeinfo}'")
+ logger.debug(f"nodeinfo[{type(nodeinfo)}]({len(nodeinfo)}='{nodeinfo}'")
if "error_message" not in nodeinfo and "json" in nodeinfo and len(nodeinfo["json"]) > 0:
- # DEBUG: print(f"DEBUG: Found nodeinfo[json]()={len(nodeinfo['json'])} - EXIT!")
+ logger.debug(f"Found nodeinfo[json]()={len(nodeinfo['json'])} - EXIT!")
return nodeinfo["json"]
# No CSRF by default, you don't have to add network.api_headers by yourself here
data = dict()
try:
- # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
+ logger.debug(f"Checking CSRF for domain='{domain}'")
headers = csrf.determine(domain, dict())
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during checking CSRF (nodeinfo,{__name__}) - EXIT!")
+ logger.warning(f"Exception '{type(exception)}' during checking CSRF (nodeinfo,{__name__}) - EXIT!")
instances.set_last_error(domain, exception)
return {
"status_code" : 500,
]
for request in request_paths:
- # DEBUG: print(f"DEBUG: path[{type(path)}]='{path}',request='{request}'")
+ logger.debug(f"path[{type(path)}]='{path}',request='{request}'")
if path is None or path == request or path == f"http://{domain}{path}" or path == f"https://{domain}{path}":
- # DEBUG: print(f"DEBUG: Fetching request='{request}' from domain='{domain}' ...")
+ logger.debug(f"Fetching request='{request}' from domain='{domain}' ...")
if path == f"http://{domain}{path}" or path == f"https://{domain}{path}":
- # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' has protocol in path, splitting ...")
+ logger.debug(f"domain='{domain}',path='{path}' has protocol in path, splitting ...")
components = urlparse(path)
path = components.path
(config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
)
- # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
+ logger.debug(f"response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
if "error_message" not in data:
- # DEBUG: print("DEBUG: Success:", request)
+ logger.debug("Success:", request)
instances.set_detection_mode(domain, "STATIC_CHECK")
instances.set_nodeinfo_url(domain, request)
break
- print(f"WARNING: Failed fetching nodeinfo from domain='{domain}',status_code='{data['status_code']}',error_message='{data['error_message']}'")
+ logger.warning(f"Failed fetching nodeinfo from domain='{domain}',status_code='{data['status_code']}',error_message='{data['error_message']}'")
- # DEBUG: print(f"DEBUG: data()={len(data)} - EXIT!")
+ logger.debug(f"data()={len(data)} - EXIT!")
return data
def fetch_wellknown_nodeinfo(domain: str) -> dict:
- # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
+ logger.debug(f"domain='{domain}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
headers = tuple()
try:
- # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
+ logger.debug(f"Checking CSRF for domain='{domain}'")
headers = csrf.determine(domain, dict())
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_wellknown_nodeinfo,{__name__}) - EXIT!")
+ logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_wellknown_nodeinfo,{__name__}) - EXIT!")
instances.set_last_error(domain, exception)
return {
"status_code" : 500,
"exception" : exception,
}
- # DEBUG: print("DEBUG: Fetching .well-known info for domain:", domain)
+ logger.debug("Fetching .well-known info for domain:", domain)
data = network.get_json_api(
domain,
"/.well-known/nodeinfo",
if "error_message" not in data:
nodeinfo = data["json"]
- # DEBUG: print("DEBUG: Found entries:", len(nodeinfo), domain)
+ logger.debug("Found entries:", len(nodeinfo), domain)
if "links" in nodeinfo:
- # DEBUG: print("DEBUG: Found links in nodeinfo():", len(nodeinfo["links"]))
+ logger.debug("Found links in nodeinfo():", len(nodeinfo["links"]))
for link in nodeinfo["links"]:
- # DEBUG: print(f"DEBUG: link[{type(link)}]='{link}'")
+ logger.debug(f"link[{type(link)}]='{link}'")
if not isinstance(link, dict) or not "rel" in link:
- print(f"WARNING: link[]='{type(link)}' is not 'dict' or no element 'rel' found")
+ logger.warning(f"link[]='{type(link)}' is not 'dict' or no element 'rel' found")
elif link["rel"] in nodeinfo_identifier:
# Default is that 'href' has a complete URL, but some hosts don't send that
url = link["href"]
components = urlparse(link["href"])
- # DEBUG: print(f"DEBUG: components[{type(components)}]='{components}'")
+ logger.debug(f"components[{type(components)}]='{components}'")
if components.scheme == "" and components.netloc == "":
- # DEBUG: print(f"DEBUG: link[href]='{link['href']}' has no scheme and host name in it, prepending from domain='{domain}'")
+ logger.debug(f"link[href]='{link['href']}' has no scheme and host name in it, prepending from domain='{domain}'")
url = f"https://{domain}{url}"
components = urlparse(url)
if not validators.domain(components.netloc):
- print(f"WARNING: components.netloc='{components.netloc}' is not a valid domain - SKIPPED!")
+ logger.warning(f"components.netloc='{components.netloc}' is not a valid domain - SKIPPED!")
continue
elif domain.endswith(".arpa"):
- print(f"WARNING: domain='{domain}' is a domain for reversed IP addresses - SKIPPED!")
+ logger.warning(f"domain='{domain}' is a domain for reversed IP addresses - SKIPPED!")
continue
elif domain.endswith(".tld"):
- print(f"WARNING: domain='{domain}' is a fake domain - SKIPPED!")
+ logger.warning(f"domain='{domain}' is a fake domain - SKIPPED!")
continue
elif blacklist.is_blacklisted(components.netloc):
- # DEBUG: print(f"DEBUG: components.netloc='{components.netloc}' is blacklisted - SKIPPED!")
+ logger.debug(f"components.netloc='{components.netloc}' is blacklisted - SKIPPED!")
continue
- # DEBUG: print("DEBUG: Fetching nodeinfo from:", url)
+ logger.debug("Fetching nodeinfo from:", url)
data = network.fetch_api_url(
url,
(config.get("connection_timeout"), config.get("read_timeout"))
)
- # DEBUG: print("DEBUG: href,data[]:", link["href"], type(data))
+ logger.debug("href,data[]:", link["href"], type(data))
if "error_message" not in data and "json" in data:
- # DEBUG: print("DEBUG: Found JSON nodeinfo():", len(data))
+ logger.debug("Found JSON nodeinfo():", len(data))
instances.set_detection_mode(domain, "AUTO_DISCOVERY")
instances.set_nodeinfo_url(domain, link["href"])
break
else:
instances.set_last_error(domain, data)
else:
- print("WARNING: Unknown 'rel' value:", domain, link["rel"])
+ logger.warning("Unknown 'rel' value:", domain, link["rel"])
else:
- print("WARNING: nodeinfo does not contain 'links':", domain)
+ logger.warning("nodeinfo does not contain 'links':", domain)
- # DEBUG: print("DEBUG: Returning data[]:", type(data))
+ logger.debug("Returning data[]:", type(data))
return data
def fetch_generator_from_path(domain: str, path: str = "/") -> str:
- # DEBUG: print(f"DEBUG: domain({len(domain)})='{domain}',path='{path}' - CALLED!")
+ logger.debug(f"domain({len(domain)})='{domain}',path='{path}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
elif path == "":
raise ValueError("Parameter 'path' is empty")
- # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' - CALLED!")
+ logger.debug(f"domain='{domain}',path='{path}' - CALLED!")
software = None
- # DEBUG: print(f"DEBUG: Fetching path='{path}' from '{domain}' ...")
+ logger.debug(f"Fetching path='{path}' from '{domain}' ...")
response = network.fetch_response(domain, path, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
- # DEBUG: print("DEBUG: domain,response.ok,response.status_code,response.text[]:", domain, response.ok, response.status_code, type(response.text))
+ logger.debug("domain,response.ok,response.status_code,response.text[]:", domain, response.ok, response.status_code, type(response.text))
if response.ok and response.status_code < 300 and response.text.find("<html") > 0:
- # DEBUG: print(f"DEBUG: Parsing response.text()={len(response.text)} Bytes ...")
+ logger.debug(f"Parsing response.text()={len(response.text)} Bytes ...")
doc = bs4.BeautifulSoup(response.text, "html.parser")
- # DEBUG: print("DEBUG: doc[]:", type(doc))
+ logger.debug("doc[]:", type(doc))
generator = doc.find("meta", {"name" : "generator"})
site_name = doc.find("meta", {"property": "og:site_name"})
- # DEBUG: print(f"DEBUG: generator='{generator}',site_name='{site_name}'")
+ logger.debug(f"generator='{generator}',site_name='{site_name}'")
if isinstance(generator, bs4.element.Tag) and isinstance(generator.get("content"), str):
- # DEBUG: print("DEBUG: Found generator meta tag:", domain)
+ logger.debug("Found generator meta tag:", domain)
software = tidyup.domain(generator.get("content"))
- # DEBUG: print(f"DEBUG: software[{type(software)}]='{software}'")
+ logger.debug(f"software[{type(software)}]='{software}'")
if software is not None and software != "":
- print(f"INFO: domain='{domain}' is generated by '{software}'")
+ logger.info(f"domain='{domain}' is generated by '{software}'")
instances.set_detection_mode(domain, "GENERATOR")
elif isinstance(site_name, bs4.element.Tag) and isinstance(site_name.get("content"), str):
- # DEBUG: print("DEBUG: Found property=og:site_name:", domain)
+ logger.debug("Found property=og:site_name:", domain)
software = tidyup.domain(site_name.get("content"))
- # DEBUG: print(f"DEBUG: software[{type(software)}]='{software}'")
+ logger.debug(f"software[{type(software)}]='{software}'")
if software is not None and software != "":
- print(f"INFO: domain='{domain}' has og:site_name='{software}'")
+ logger.info(f"domain='{domain}' has og:site_name='{software}'")
instances.set_detection_mode(domain, "SITE_NAME")
- # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
+ logger.debug(f"software[]='{type(software)}'")
if isinstance(software, str) and software == "":
- # DEBUG: print(f"DEBUG: Corrected empty string to None for software of domain='{domain}'")
+ logger.debug(f"Corrected empty string to None for software of domain='{domain}'")
software = None
elif isinstance(software, str) and ("." in software or " " in software):
- # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
+ logger.debug(f"software='{software}' may contain a version number, domain='{domain}', removing it ...")
software = version.remove(software)
- # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
+ logger.debug(f"software[]='{type(software)}'")
if isinstance(software, str) and "powered by " in software:
- # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
+ logger.debug(f"software='{software}' has 'powered by' in it")
software = version.remove(version.strip_powered_by(software))
elif isinstance(software, str) and " hosted on " in software:
- # DEBUG: print(f"DEBUG: software='{software}' has 'hosted on' in it")
+ logger.debug(f"software='{software}' has 'hosted on' in it")
software = version.remove(version.strip_hosted_on(software))
elif isinstance(software, str) and " by " in software:
- # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
+ logger.debug(f"software='{software}' has ' by ' in it")
software = version.strip_until(software, " by ")
elif isinstance(software, str) and " see " in software:
- # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
+ logger.debug(f"software='{software}' has ' see ' in it")
software = version.strip_until(software, " see ")
- # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
+ logger.debug(f"software='{software}' - EXIT!")
return software
def determine_software(domain: str, path: str = None) -> str:
- # DEBUG: print(f"DEBUG: domain({len(domain)})='{domain}',path='{path}' - CALLED!")
+ logger.debug(f"domain({len(domain)})='{domain}',path='{path}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
elif not isinstance(path, str) and path is not None:
raise ValueError(f"Parameter path[]='{type(path)}' is not 'str'")
- # DEBUG: print("DEBUG: Determining software for domain,path:", domain, path)
+ logger.debug("Determining software for domain,path:", domain, path)
software = None
- # DEBUG: print(f"DEBUG: Fetching nodeinfo from '{domain}' ...")
+ logger.debug(f"Fetching nodeinfo from '{domain}' ...")
data = fetch_nodeinfo(domain, path)
- # DEBUG: print(f"DEBUG: data[{type(data)}]='{data}'")
+ logger.debug(f"data[{type(data)}]='{data}'")
if "exception" in data:
# Continue raising it
raise data["exception"]
elif "error_message" in data:
- # DEBUG: print(f"DEBUG: Returned error_message during fetching nodeinfo: '{data['error_message']}',status_code='{data['status_code']}'")
+ logger.debug(f"Returned error_message during fetching nodeinfo: '{data['error_message']}',status_code='{data['status_code']}'")
return fetch_generator_from_path(domain)
elif "status" in data and data["status"] == "error" and "message" in data:
- print("WARNING: JSON response is an error:", data["message"])
+ logger.warning("JSON response is an error:", data["message"])
instances.set_last_error(domain, data["message"])
return fetch_generator_from_path(domain)
elif "message" in data:
- print("WARNING: JSON response contains only a message:", data["message"])
+ logger.warning("JSON response contains only a message:", data["message"])
instances.set_last_error(domain, data["message"])
return fetch_generator_from_path(domain)
elif "software" not in data or "name" not in data["software"]:
- # DEBUG: print(f"DEBUG: JSON response from domain='{domain}' does not include [software][name], fetching / ...")
+ logger.debug(f"JSON response from domain='{domain}' does not include [software][name], fetching / ...")
software = fetch_generator_from_path(domain)
- # DEBUG: print(f"DEBUG: Generator for domain='{domain}' is: '{software}'")
+ logger.debug(f"Generator for domain='{domain}' is: '{software}'")
elif "software" in data and "name" in data["software"]:
- # DEBUG: print("DEBUG: Found data[software][name] in JSON response")
+ logger.debug("Found data[software][name] in JSON response")
software = data["software"]["name"]
if software is None:
- # DEBUG: print("DEBUG: Returning None - EXIT!")
+ logger.debug("Returning None - EXIT!")
return None
sofware = tidyup.domain(software)
- # DEBUG: print("DEBUG: sofware after tidyup.domain():", software)
+ logger.debug("sofware after tidyup.domain():", software)
if software in ["akkoma", "rebased", "akkounfucked", "ched"]:
- # DEBUG: print("DEBUG: Setting pleroma:", domain, software)
+ logger.debug("Setting pleroma:", domain, software)
software = "pleroma"
elif software in ["hometown", "ecko"]:
- # DEBUG: print("DEBUG: Setting mastodon:", domain, software)
+ logger.debug("Setting mastodon:", domain, software)
software = "mastodon"
elif software in ["slipfox calckey", "calckey", "groundpolis", "foundkey", "cherrypick", "meisskey", "magnetar", "keybump"]:
- # DEBUG: print("DEBUG: Setting misskey:", domain, software)
+ logger.debug("Setting misskey:", domain, software)
software = "misskey"
elif software == "runtube.re":
- # DEBUG: print("DEBUG: Setting peertube:", domain, software)
+ logger.debug("Setting peertube:", domain, software)
software = "peertube"
elif software == "nextcloud social":
- # DEBUG: print("DEBUG: Setting nextcloud:", domain, software)
+ logger.debug("Setting nextcloud:", domain, software)
software = "nextcloud"
elif software.find("/") > 0:
- print("WARNING: Spliting of slash:", software)
+ logger.warning("Spliting of slash:", software)
software = tidyup.domain(software.split("/")[-1])
elif software.find("|") > 0:
- print("WARNING: Spliting of pipe:", software)
+ logger.warning("Spliting of pipe:", software)
software = tidyup.domain(software.split("|")[0])
elif "powered by" in software:
- # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
+ logger.debug(f"software='{software}' has 'powered by' in it")
software = version.strip_powered_by(software)
elif isinstance(software, str) and " by " in software:
- # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
+ logger.debug(f"software='{software}' has ' by ' in it")
software = version.strip_until(software, " by ")
elif isinstance(software, str) and " see " in software:
- # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
+ logger.debug(f"software='{software}' has ' see ' in it")
software = version.strip_until(software, " see ")
- # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
+ logger.debug(f"software[]='{type(software)}'")
if software == "":
- print("WARNING: tidyup.domain() left no software name behind:", domain)
+ logger.warning("tidyup.domain() left no software name behind:", domain)
software = None
- # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
+ logger.debug(f"software[]='{type(software)}'")
if str(software) == "":
- # DEBUG: print(f"DEBUG: software for '{domain}' was not detected, trying generator ...")
+ logger.debug(f"software for '{domain}' was not detected, trying generator ...")
software = fetch_generator_from_path(domain)
elif len(str(software)) > 0 and ("." in software or " " in software):
- # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
+ logger.debug(f"software='{software}' may contain a version number, domain='{domain}', removing it ...")
software = version.remove(software)
- # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
+ logger.debug(f"software[]='{type(software)}'")
if isinstance(software, str) and "powered by" in software:
- # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
+ logger.debug(f"software='{software}' has 'powered by' in it")
software = version.remove(version.strip_powered_by(software))
- # DEBUG: print("DEBUG: Returning domain,software:", domain, software)
+ logger.debug("Returning domain,software:", domain, software)
return software
def find_domains(tag: bs4.element.Tag) -> list:
- # DEBUG: print(f"DEBUG: tag[]='{type(tag)}' - CALLED!")
+ logger.debug(f"tag[]='{type(tag)}' - CALLED!")
if not isinstance(tag, bs4.element.Tag):
raise ValueError(f"Parameter tag[]='{type(tag)}' is not type of bs4.element.Tag")
elif len(tag.select("tr")) == 0:
domains = list()
for element in tag.select("tr"):
- # DEBUG: print(f"DEBUG: element[]='{type(element)}'")
+ logger.debug(f"element[]='{type(element)}'")
if not element.find("td"):
- # DEBUG: print("DEBUG: Skipping element, no <td> found")
+ logger.debug("Skipping element, no <td> found")
continue
domain = tidyup.domain(element.find("td").text)
reason = tidyup.reason(element.findAll("td")[1].text)
- # DEBUG: print(f"DEBUG: domain='{domain}',reason='{reason}'")
+ logger.debug(f"domain='{domain}',reason='{reason}'")
if not validators.domain(domain.split("/")[0]):
- print(f"WARNING: domain='{domain}' is not a valid domain - SKIPPED!")
+ logger.warning(f"domain='{domain}' is not a valid domain - SKIPPED!")
continue
elif domain.endswith(".arpa"):
- print(f"WARNING: domain='{domain}' is a domain for reversed IP addresses - SKIPPED!")
+ logger.warning(f"domain='{domain}' is a domain for reversed IP addresses - SKIPPED!")
continue
elif domain.endswith(".tld"):
- print(f"WARNING: domain='{domain}' is a fake domain - SKIPPED!")
+ logger.warning(f"domain='{domain}' is a fake domain - SKIPPED!")
continue
elif blacklist.is_blacklisted(domain):
- # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!")
+ logger.debug(f"domain='{domain}' is blacklisted - SKIPPED!")
continue
elif domain == "gab.com/.ai, develop.gab.com":
- # DEBUG: print("DEBUG: Multiple domains detected in one row")
+ logger.debug("Multiple domains detected in one row")
domains.append({
"domain": "gab.com",
"reason": reason,
})
continue
elif not validators.domain(domain.split("/")[0]):
- print(f"WARNING: domain='{domain}' is not a valid domain - SKIPPED!")
+ logger.warning(f"domain='{domain}' is not a valid domain - SKIPPED!")
continue
- # DEBUG: print(f"DEBUG: Adding domain='{domain}',reason='{reason}' ...")
+ logger.debug(f"Adding domain='{domain}',reason='{reason}' ...")
domains.append({
"domain": domain,
"reason": reason,
})
- # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!")
+ logger.debug(f"domains()={len(domains)} - EXIT!")
return domains
def add_peers(rows: dict) -> list:
- # DEBUG: print(f"DEBUG: rows()={len(rows)} - CALLED!")
+ logger.debug(f"rows()={len(rows)} - CALLED!")
peers = list()
for key in ["linked", "allowed", "blocked"]:
- # DEBUG: print(f"DEBUG: Checking key='{key}'")
+ logger.debug(f"Checking key='{key}'")
if key not in rows or rows[key] is None:
- print(f"WARNING: Cannot find key='{key}' or it is NoneType - SKIPPED!")
+ logger.warning(f"Cannot find key='{key}' or it is NoneType - SKIPPED!")
continue
- # DEBUG: print(f"DEBUG: Adding {len(rows[key])} peer(s) to peers list ...")
+ logger.debug(f"Adding {len(rows[key])} peer(s) to peers list ...")
for peer in rows[key]:
- # DEBUG: print(f"DEBUG: peer='{peer}' - BEFORE!")
+ logger.debug(f"peer='{peer}' - BEFORE!")
if isinstance(peer, dict) and "domain" in peer:
- # DEBUG: print(f"DEBUG: peer[domain]='{peer['domain']}'")
+ logger.debug(f"peer[domain]='{peer['domain']}'")
peer = tidyup.domain(peer["domain"])
elif isinstance(peer, str):
- # DEBUG: print(f"DEBUG: peer='{peer}'")
+ logger.debug(f"peer='{peer}'")
peer = tidyup.domain(peer)
else:
raise ValueError(f"peer[]='{type(peer)}' is not supported,key='{key}'")
- # DEBUG: print(f"DEBUG: peer='{peer}' - AFTER!")
+ logger.debug(f"peer='{peer}' - AFTER!")
if not validators.domain(peer):
- print(f"WARNING: peer='{peer}' is not a valid domain - SKIPPED!")
+ logger.warning(f"peer='{peer}' is not a valid domain - SKIPPED!")
continue
elif peer.endswith(".arpa"):
- print(f"WARNING: peer='{peer}' is a domain for reversed IP addresses -SKIPPED!")
+ logger.warning(f"peer='{peer}' is a domain for reversed IP addresses -SKIPPED!")
continue
elif peer.endswith(".tld"):
- print(f"WARNING: peer='{peer}' is a fake domain - SKIPPED!")
+ logger.warning(f"peer='{peer}' is a fake domain - SKIPPED!")
continue
elif blacklist.is_blacklisted(peer):
- # DEBUG: print(f"DEBUG: peer='{peer}' is blacklisted - SKIPPED!")
+ logger.debug(f"peer='{peer}' is blacklisted - SKIPPED!")
continue
- # DEBUG: print(f"DEBUG: Adding peer='{peer}' ...")
+ logger.debug(f"Adding peer='{peer}' ...")
peers.append(peer)
- # DEBUG: print(f"DEBUG: peers()={len(peers)} - EXIT!")
+ logger.debug(f"peers()={len(peers)} - EXIT!")
return peers
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import logging
+
import json
import reqto
import requests
from fba.models import instances
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
# HTTP headers for non-API requests
web_headers = {
"User-Agent": config.get("useragent"),
)
def post_json_api(domain: str, path: str, data: str = "", headers: dict = {}) -> dict:
- # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',data='{data}',headers()={len(headers)} - CALLED!")
+ logger.debug(f"domain='{domain}',path='{path}',data='{data}',headers()={len(headers)} - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
}
try:
- # DEBUG: print(f"DEBUG: Sending POST to domain='{domain}',path='{path}',data='{data}',headers({len(headers)})={headers}")
+ logger.debug(f"Sending POST to domain='{domain}',path='{path}',data='{data}',headers({len(headers)})={headers}")
response = reqto.post(
f"https://{domain}{path}",
data=data,
json_reply["json"] = json_from_response(response)
- # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'")
+ logger.debug(f"response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'")
if not response.ok or response.status_code >= 400:
- print(f"WARNING: Cannot query JSON API: domain='{domain}',path='{path}',data()={len(data)},response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'")
+ logger.warning(f"Cannot query JSON API: domain='{domain}',path='{path}',data()={len(data)},response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'")
json_reply["status_code"] = response.status_code
json_reply["error_message"] = response.reason
del json_reply["json"]
instances.set_last_error(domain, response)
except exceptions as exception:
- # DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'")
+ logger.debug(f"Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'")
json_reply["status_code"] = 999
json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'"
json_reply["exception"] = exception
instances.set_last_error(domain, exception)
raise exception
- # DEBUG: print(f"DEBUG: Returning json_reply({len(json_reply)})=[]:{type(json_reply)}")
+ logger.debug(f"Returning json_reply({len(json_reply)})=[]:{type(json_reply)}")
return json_reply
def fetch_api_url(url: str, timeout: tuple) -> dict:
- # DEBUG: print(f"DEBUG: url='{url}',timeout()={len(timeout)} - CALLED!")
+ logger.debug(f"url='{url}',timeout()={len(timeout)} - CALLED!")
if not isinstance(url, str):
raise ValueError(f"Parameter url[]='{type(url)}' is not 'str'")
elif not isinstance(timeout, tuple):
}
try:
- # DEBUG: print(f"DEBUG: Fetching url='{url}' ...")
+ logger.debug(f"Fetching url='{url}' ...")
response = fba.fetch_url(url, api_headers, timeout)
json_reply["json"] = json_from_response(response)
- # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'")
+ logger.debug(f"response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'")
if not response.ok or response.status_code >= 400:
- print(f"WARNING: Cannot query JSON API: url='{url}',response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'")
+ logger.warning(f"Cannot query JSON API: url='{url}',response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'")
json_reply["status_code"] = response.status_code
json_reply["error_message"] = response.reason
del json_reply["json"]
except exceptions as exception:
- # DEBUG: print(f"DEBUG: Fetching '{url}' failed. exception[{type(exception)}]='{str(exception)}'")
+ logger.debug(f"Fetching '{url}' failed. exception[{type(exception)}]='{str(exception)}'")
json_reply["status_code"] = 999
json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'"
json_reply["exception"] = exception
raise exception
- # DEBUG: print(f"DEBUG: Returning json_reply({len(json_reply)})=[]:{type(json_reply)}")
+ logger.debug(f"Returning json_reply({len(json_reply)})=[]:{type(json_reply)}")
return json_reply
def get_json_api(domain: str, path: str, headers: dict, timeout: tuple) -> dict:
- # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',timeout()={len(timeout)} - CALLED!")
+ logger.debug(f"domain='{domain}',path='{path}',timeout()={len(timeout)} - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
}
try:
- # DEBUG: print(f"DEBUG: Sending GET to domain='{domain}',path='{path}',timeout({len(timeout)})={timeout}")
+ logger.debug(f"Sending GET to domain='{domain}',path='{path}',timeout({len(timeout)})={timeout}")
response = reqto.get(
f"https://{domain}{path}",
headers={**api_headers, **headers},
)
except exceptions as exception:
- # DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'")
+ logger.debug(f"Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'")
json_reply["status_code"] = 999
json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'"
json_reply["exception"] = exception
json_reply["json"] = json_from_response(response)
- # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'")
+ logger.debug(f"response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'")
if not response.ok or response.status_code >= 400:
- print(f"WARNING: Cannot query JSON API: domain='{domain}',path='{path}',response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'")
+ logger.warning(f"Cannot query JSON API: domain='{domain}',path='{path}',response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'")
json_reply["status_code"] = response.status_code
json_reply["error_message"] = response.reason
del json_reply["json"]
instances.set_last_error(domain, response)
- # DEBUG: print(f"DEBUG: Returning json_reply({len(json_reply)})=[]:{type(json_reply)}")
+ logger.debug(f"Returning json_reply({len(json_reply)})=[]:{type(json_reply)}")
return json_reply
def send_bot_post(domain: str, blocklist: dict):
- # DEBUG: print(f"DEBUG: domain='{domain}',blocklist()={len(blocklist)} - CALLED!")
+ logger.debug(f"domain='{domain}',blocklist()={len(blocklist)} - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
truncated = True
blocklist = blocklist[0 : 19]
- # DEBUG: print(f"DEBUG: blocklist()={len(blocklist)}")
+ logger.debug(f"blocklist()={len(blocklist)}")
for block in blocklist:
- # DEBUG: print(f"DEBUG: block['{type(block)}']={block}")
+ logger.debug(f"block['{type(block)}']={block}")
if block["reason"] is None or block["reason"] == '':
message = message + block["blocked"] + " with unspecified reason\n"
else:
return True
def fetch_response(domain: str, path: str, headers: dict, timeout: tuple) -> requests.models.Response:
- # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',headers()={len(headers)},timeout={timeout} - CALLED!")
+ logger.debug(f"domain='{domain}',path='{path}',headers()={len(headers)},timeout={timeout} - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
raise ValueError(f"timeout[]='{type(timeout)}' is not 'tuple'")
try:
- # DEBUG: print(f"DEBUG: Sending GET request to '{domain}{path}' ...")
+ logger.debug(f"Sending GET request to '{domain}{path}' ...")
response = reqto.get(
f"https://{domain}{path}",
headers=headers,
)
except exceptions as exception:
- # DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'")
+ logger.debug(f"Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'")
instances.set_last_error(domain, exception)
raise exception
- # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!")
+ logger.debug(f"response[]='{type(response)}' - EXXIT!")
return response
def json_from_response(response: requests.models.Response) -> list:
- # DEBUG: print(f"DEBUG: response[]='{type(response)}' - CALLED!")
+ logger.debug(f"response[]='{type(response)}' - CALLED!")
if not isinstance(response, requests.models.Response):
raise ValueError(f"Parameter response[]='{type(response)}' is not type of 'Response'")
data = list()
if response.text.strip() != "":
- # DEBUG: print(f"DEBUG: response.text()={len(response.text)} is not empty, invoking response.json() ...")
+ logger.debug(f"response.text()={len(response.text)} is not empty, invoking response.json() ...")
try:
data = response.json()
except json.decoder.JSONDecodeError:
pass
- # DEBUG: print(f"DEBUG: data[]='{type(data)}' - EXIT!")
+ logger.debug(f"data[]='{type(data)}' - EXIT!")
return data
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import logging
+
import time
import validators
from fba.helpers import blacklist
from fba.helpers import tidyup
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
def update_reason(reason: str, blocker: str, blocked: str, block_level: str):
- # DEBUG: print(f"DEBUG: reason='{reason}',blocker={blocker},blocked={blocked},block_level={block_level} - CALLED!")
+ logger.debug(f"reason='{reason}',blocker={blocker},blocked={blocked},block_level={block_level} - CALLED!")
if not isinstance(reason, str) and reason is not None:
raise ValueError(f"Parameter reason[]='{type(reason)}' is not 'str'")
elif not isinstance(blocker, str):
elif block_level == "accept":
raise ValueError("Accepted domains are not wanted here")
- # DEBUG: print("DEBUG: Updating block reason:", reason, blocker, blocked, block_level)
+ logger.debug("Updating block reason:", reason, blocker, blocked, block_level)
fba.cursor.execute(
"UPDATE blocks SET reason = ?, last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ? AND (reason IS NULL OR reason = '') LIMIT 1",
[
block_level
])
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def update_last_seen(blocker: str, blocked: str, block_level: str):
- # DEBUG: print("DEBUG: Updating last_seen for:", blocker, blocked, block_level)
+ logger.debug("Updating last_seen for:", blocker, blocked, block_level)
if not isinstance(blocker, str):
raise ValueError(f"Parameter blocker[]='{type(blocker)}' is not 'str'")
elif blocker == "":
block_level
])
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def is_instance_blocked(blocker: str, blocked: str, block_level: str) -> bool:
- # DEBUG: print(f"DEBUG: blocker={blocker},blocked={blocked},block_level={block_level} - CALLED!")
+ logger.debug(f"blocker={blocker},blocked={blocked},block_level={block_level} - CALLED!")
if not isinstance(blocker, str):
raise ValueError(f"Parameter blocker[]='{type(blocker)}' is not of type 'str'")
elif blocker == "":
is_blocked = fba.cursor.fetchone() is not None
- # DEBUG: print(f"DEBUG: is_blocked='{is_blocked}' - EXIT!")
+ logger.debug(f"is_blocked='{is_blocked}' - EXIT!")
return is_blocked
def add_instance(blocker: str, blocked: str, reason: str, block_level: str):
- # DEBUG: print("DEBUG: blocker,blocked,reason,block_level:", blocker, blocked, reason, block_level)
+ logger.debug("blocker,blocked,reason,block_level:", blocker, blocked, reason, block_level)
if not isinstance(blocker, str):
raise ValueError(f"Parameter blocker[]='{type(blocker)}' is not 'str'")
elif blocker == "":
# Maybe needs cleaning
reason = tidyup.reason(reason)
- print(f"INFO: New block: blocker='{blocker}',blocked='{blocked}',reason='{reason}',block_level='{block_level}'")
+ logger.info(f"New block: blocker='{blocker}',blocked='{blocked}',reason='{reason}',block_level='{block_level}'")
fba.cursor.execute(
"INSERT INTO blocks (blocker, blocked, reason, block_level, first_seen, last_seen) VALUES (?, ?, ?, ?, ?, ?)",
time.time()
])
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import logging
import json
import time
from fba.helpers import config
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
def add(domain: str, error: dict):
- # DEBUG: print("DEBUG: domain,error[]:", domain, type(error))
+ logger.debug("domain,error[]:", domain, type(error))
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
elif domain.endswith(".tld"):
raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
elif config.get("write_error_log").lower() != "true":
- # DEBUG: print(f"DEBUG: Writing to error_log is disabled in configuruation file - EXIT!")
+ logger.debug(f"Writing to error_log is disabled in configuruation file - EXIT!")
return
- # DEBUG: print("DEBUG: BEFORE error[]:", type(error))
+ logger.debug("BEFORE error[]:", type(error))
if isinstance(error, BaseException, error, json.decoder.JSONDecodeError):
error = f"error[{type(error)}]='{str(error)}'"
- # DEBUG: print("DEBUG: AFTER error[]:", type(error))
+ logger.debug("AFTER error[]:", type(error))
if isinstance(error, str):
fba.cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, 999, ?, ?)",[
domain,
])
# Cleanup old entries
- # DEBUG: print(f"DEBUG: Purging old records (distance: {config.get('error_log_cleanup')})")
+ logger.debug(f"Purging old records (distance: {config.get('error_log_cleanup')})")
fba.cursor.execute("DELETE FROM error_log WHERE created < ?", [time.time() - config.get("error_log_cleanup")])
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import logging
import json
import time
from fba.models import error_log
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
# Found info from node, such as nodeinfo URL, detection mode that needs to be
# written to database. Both arrays must be filled at the same time or else
# update_data() will fail
}
def _set_data(key: str, domain: str, value: any):
- # DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',value[]='{type(value)}' - CALLED!")
+ logger.debug(f"key='{key}',domain='{domain}',value[]='{type(value)}' - CALLED!")
if not isinstance(key, str):
raise ValueError("Parameter key[]='{type(key)}' is not 'str'")
elif key == "":
# Set it
_pending[key][domain] = value
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def has_pending(domain: str) -> bool:
- # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
+ logger.debug(f"domain='{domain}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
has = False
for key in _pending:
- # DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',_pending[key]()='{len(_pending[key])}'")
+ logger.debug(f"key='{key}',domain='{domain}',_pending[key]()='{len(_pending[key])}'")
if domain in _pending[key]:
has = True
break
- # DEBUG: print(f"DEBUG: has='{has}' - EXIT!")
+ logger.debug(f"has='{has}' - EXIT!")
return has
def update_data(domain: str):
- # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
+ logger.debug(f"domain='{domain}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
elif not is_registered(domain):
raise Exception(f"domain='{domain}' cannot be updated while not being registered")
- # DEBUG: print(f"DEBUG: Updating instance data for domain='{domain}' ...")
+ logger.debug(f"Updating instance data for domain='{domain}' ...")
sql_string = ""
fields = list()
for key in _pending:
- # DEBUG: print("DEBUG: key:", key)
+ logger.debug("key:", key)
if domain in _pending[key]:
- # DEBUG: print(f"DEBUG: Adding '{_pending[key][domain]}' for key='{key}' ...")
+ logger.debug(f"Adding '{_pending[key][domain]}' for key='{key}' ...")
fields.append(_pending[key][domain])
sql_string += f" {key} = ?,"
- # DEBUG: print(f"DEBUG: sql_string()={len(sql_string)}")
+ logger.debug(f"sql_string()={len(sql_string)}")
if sql_string == "":
raise ValueError(f"No fields have been set, but method invoked, domain='{domain}'")
# For WHERE statement
fields.append(domain)
- # DEBUG: print(f"DEBUG: sql_string='{sql_string}',fields()={len(fields)}")
+ logger.debug(f"sql_string='{sql_string}',fields()={len(fields)}")
sql_string = "UPDATE instances SET" + sql_string + " last_updated = ? WHERE domain = ? LIMIT 1"
- # DEBUG: print("DEBUG: sql_string:", sql_string)
+ logger.debug("sql_string:", sql_string)
- # DEBUG: print("DEBUG: Executing SQL:", sql_string)
+ logger.debug("Executing SQL:", sql_string)
fba.cursor.execute(sql_string, fields)
- # DEBUG: print(f"DEBUG: Success! (rowcount={fba.cursor.rowcount })")
+ logger.debug(f"Success! (rowcount={fba.cursor.rowcount })")
if fba.cursor.rowcount == 0:
raise Exception(f"Did not update any rows: domain='{domain}',fields()={len(fields)}")
- # DEBUG: print("DEBUG: Committing changes ...")
+ logger.debug("Committing changes ...")
fba.connection.commit()
- # DEBUG: print(f"DEBUG: Deleting _pending for domain='{domain}'")
+ logger.debug(f"Deleting _pending for domain='{domain}'")
for key in _pending:
- # DEBUG: print(f"DEBUG: domain='{domain}',key='{key}'")
+ logger.debug(f"domain='{domain}',key='{key}'")
if domain in _pending[key]:
del _pending[key][domain]
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def add(domain: str, origin: str, command: str, path: str = None, software: str = None):
- # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',command='{command}',path='{path}',software='{software}' - CALLED!")
+ logger.debug(f"domain='{domain}',origin='{origin}',command='{command}',path='{path}',software='{software}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
if software is None:
try:
- # DEBUG: print("DEBUG: domain,origin,command,path:", domain, origin, command, path)
+ logger.debug("domain,origin,command,path:", domain, origin, command, path)
software = federation.determine_software(domain, path)
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during determining software type")
+ logger.warning(f"Exception '{type(exception)}' during determining software type")
set_last_error(domain, exception)
- # DEBUG: print("DEBUG: Determined software:", software)
+ logger.debug("Determined software:", software)
if software == "lemmy" and domain.find("/c/") > 0:
domain = domain.split("/c/")[0]
if is_registered(domain):
- print(f"WARNING: domain='{domain}' already registered after cutting off user part. - EXIT!")
+ logger.warning(f"domain='{domain}' already registered after cutting off user part. - EXIT!")
return
- print(f"INFO: Adding instance domain='{domain}' (origin='{origin}',software='{software}')")
+ logger.info(f"Adding instance domain='{domain}' (origin='{origin}',software='{software}')")
fba.cursor.execute(
"INSERT INTO instances (domain, origin, command, hash, software, first_seen) VALUES (?, ?, ?, ?, ?, ?)",
(
),
)
- # DEBUG: print(f"DEBUG: Marking domain='{domain}' as registered.")
+ logger.debug(f"Marking domain='{domain}' as registered.")
cache.set_sub_key("is_registered", domain, True)
if has_pending(domain):
- # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo being updated ...")
+ logger.debug(f"domain='{domain}' has pending nodeinfo being updated ...")
update_data(domain)
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def set_last_nodeinfo(domain: str):
- # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
+ logger.debug(f"domain='{domain}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
elif domain.endswith(".tld"):
raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
- # DEBUG: print("DEBUG: Updating last_nodeinfo for domain:", domain)
+ logger.debug("Updating last_nodeinfo for domain:", domain)
_set_data("last_nodeinfo", domain, time.time())
# Running pending updated
- # DEBUG: print(f"DEBUG: Invoking update_data({domain}) ...")
+ logger.debug(f"Invoking update_data({domain}) ...")
update_data(domain)
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def set_last_error(domain: str, error: dict):
- # DEBUG: print("DEBUG: domain,error[]:", domain, type(error))
+ logger.debug("domain,error[]:", domain, type(error))
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
elif domain.endswith(".tld"):
raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
- # DEBUG: print("DEBUG: BEFORE error[]:", type(error))
+ logger.debug("BEFORE error[]:", type(error))
if isinstance(error, (BaseException, json.decoder.JSONDecodeError)):
error = f"error[{type(error)}]='{str(error)}'"
- # DEBUG: print("DEBUG: AFTER error[]:", type(error))
+ logger.debug("AFTER error[]:", type(error))
if isinstance(error, str):
- # DEBUG: print(f"DEBUG: Setting last_error_details='{error}'")
+ logger.debug(f"Setting last_error_details='{error}'")
_set_data("last_status_code" , domain, 999)
_set_data("last_error_details", domain, error if error != "" else None)
elif isinstance(error, requests.models.Response):
- # DEBUG: print(f"DEBUG: Setting last_error_details='{error.reason}'")
+ logger.debug(f"Setting last_error_details='{error.reason}'")
_set_data("last_status_code" , domain, error.status_code)
_set_data("last_error_details", domain, error.reason if error.reason != "" else None)
elif not isinstance(error, dict):
raise KeyError(f"Cannot handle keys in error[{type(error)}]='{error}'")
elif "status_code" in error and "error_message" in error:
- # DEBUG: print(f"DEBUG: Setting last_error_details='{error['error_message']}'")
+ logger.debug(f"Setting last_error_details='{error['error_message']}'")
_set_data("last_status_code" , domain, error["status_code"])
_set_data("last_error_details", domain, error["error_message"] if error["error_message"] != "" else None)
elif "json" in error and "error" in error["json"]:
_set_data("last_status_code" , domain, error["status_code"])
_set_data("last_error_details", domain, error["json"]["error"] if error["json"]["error"] != "" else None)
- # DEBUG: print(f"DEBUG: Invoking error_log.add(domain='{domain}',error[]='{type(error)}'")
+ logger.debug(f"Invoking error_log.add(domain='{domain}',error[]='{type(error)}'")
error_log.add(domain, error)
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def is_registered(domain: str) -> bool:
- # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
+ logger.debug(f"domain='{domain}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
elif domain.endswith(".tld"):
raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
- # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
+ logger.debug(f"domain='{domain}' - CALLED!")
if not cache.key_exists("is_registered"):
- # DEBUG: print("DEBUG: Cache for 'is_registered' not initialized, fetching all rows ...")
+ logger.debug("Cache for 'is_registered' not initialized, fetching all rows ...")
fba.cursor.execute("SELECT domain FROM instances")
# Check Set all
# Is cache found?
registered = cache.sub_key_exists("is_registered", domain)
- # DEBUG: print(f"DEBUG: registered='{registered}' - EXIT!")
+ logger.debug(f"registered='{registered}' - EXIT!")
return registered
def is_recent(domain: str) -> bool:
- # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
+ logger.debug(f"domain='{domain}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
elif domain.endswith(".tld"):
raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
elif not is_registered(domain):
- # DEBUG: print(f"DEBUG: domain='{domain}' is not registered, returning False - EXIT!")
+ logger.debug(f"domain='{domain}' is not registered, returning False - EXIT!")
return False
# Query database
# Fetch row
fetched = fba.cursor.fetchone()[0]
- # DEBUG: print(f"DEBUG: fetched[{type(fetched)}]='{fetched}'")
+ logger.debug(f"fetched[{type(fetched)}]='{fetched}'")
recently = isinstance(fetched, float) and time.time() - fetched <= config.get("recheck_instance")
- # DEBUG: print(f"DEBUG: recently='{recently}' - EXIT!")
+ logger.debug(f"recently='{recently}' - EXIT!")
return recently
def deobscure(char: str, domain: str, blocked_hash: str = None) -> tuple:
- # DEBUG: print(f"DEBUG: char='{char}',domain='{domain}',blocked_hash='{blocked_hash}' - CALLED!")
+ logger.debug(f"char='{char}',domain='{domain}',blocked_hash='{blocked_hash}' - CALLED!")
if not isinstance(char, str):
raise ValueError(f"Parameter char[]='{type(char)}' is not 'str'")
elif char == "":
raise ValueError(f"Parameter blocked_hash[]='{type(blocked_hash)}' is not 'str'")
if isinstance(blocked_hash, str):
- # DEBUG: print(f"DEBUG: Looking up blocked_hash='{blocked_hash}' ...")
+ logger.debug(f"Looking up blocked_hash='{blocked_hash}' ...")
fba.cursor.execute(
"SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash]
)
row = fba.cursor.fetchone()
- # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
+ logger.debug(f"row[]='{type(row)}'")
if row is None:
- # DEBUG: print(f"DEBUG: blocked_hash='{blocked_hash}' not found, trying domain='{domain}' ...")
+ logger.debug(f"blocked_hash='{blocked_hash}' not found, trying domain='{domain}' ...")
return deobscure(char, domain)
else:
- # DEBUG: print(f"DEBUG: Looking up domain='{domain}' ...")
+ logger.debug(f"Looking up domain='{domain}' ...")
fba.cursor.execute(
"SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [domain.replace(char, "_")]
)
row = fba.cursor.fetchone()
- # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
+ logger.debug(f"row[]='{type(row)}'")
- # DEBUG: print(f"DEBUG: row[]='{type(row)}' - EXIT!")
+ logger.debug(f"row[]='{type(row)}' - EXIT!")
return row
def set_last_blocked(domain: str):
- # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
+ logger.debug(f"domain='{domain}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
# Set timestamp
_set_data("last_blocked", domain, time.time())
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def set_last_instance_fetch(domain: str):
- # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
+ logger.debug(f"domain='{domain}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
# Set timestamp
_set_data("last_instance_fetch", domain, time.time())
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def set_total_peers(domain: str, peers: list):
- # DEBUG: print(f"DEBUG: domain='{domain}',peers()={len(peers)} - CALLED!")
+ logger.debug(f"domain='{domain}',peers()={len(peers)} - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
# Set timestamp
_set_data("total_peers", domain, len(peers))
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def set_nodeinfo_url(domain: str, url: str):
- # DEBUG: print(f"DEBUG: domain='{domain}',url='{url}' - CALLED!")
+ logger.debug(f"domain='{domain}',url='{url}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
# Set timestamp
_set_data("nodeinfo_url", domain, url)
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def set_detection_mode(domain: str, mode: str):
- # DEBUG: print(f"DEBUG: domain='{domain}',mode='{mode}' - CALLED!")
+ logger.debug(f"domain='{domain}',mode='{mode}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
# Set timestamp
_set_data("detection_mode", domain, mode)
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import logging
+
import bs4
import validators
from fba.models import instances
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
def fetch_blocks(domain: str) -> dict:
- # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
+ logger.debug(f"domain='{domain}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
block_tag = None
try:
- # DEBUG: print("DEBUG: Fetching friendica blocks from domain:", domain)
+ logger.debug("Fetching friendica blocks from domain:", domain)
doc = bs4.BeautifulSoup(
network.fetch_response(
domain,
).text,
"html.parser",
)
- # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
+ logger.debug(f"doc[]='{type(doc)}'")
block_tag = doc.find(id="about_blocklist")
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during fetching instances (friendica) from domain='{domain}'")
+ logger.warning(f"Exception '{type(exception)}' during fetching instances (friendica) from domain='{domain}'")
instances.set_last_error(domain, exception)
return dict()
# Prevents exceptions:
if block_tag is None:
- # DEBUG: print("DEBUG: Instance has no block list:", domain)
+ logger.debug("Instance has no block list:", domain)
return dict()
table = block_tag.find("table")
- # DEBUG: print(f"DEBUG: table[]='{type(table)}'")
+ logger.debug(f"table[]='{type(table)}'")
if table.find("tbody"):
rows = table.find("tbody").find_all("tr")
else:
rows = table.find_all("tr")
- # DEBUG: print(f"DEBUG: Found rows()={len(rows)}")
+ logger.debug(f"Found rows()={len(rows)}")
for line in rows:
- # DEBUG: print(f"DEBUG: line='{line}'")
+ logger.debug(f"line='{line}'")
blocked = tidyup.domain(line.find_all("td")[0].text)
reason = tidyup.reason(line.find_all("td")[1].text)
- # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}'")
+ logger.debug(f"blocked='{blocked}',reason='{reason}'")
if not validators.domain(blocked):
- print(f"WARNING: blocked='{blocked}' is not a valid domain - SKIPPED!")
+ logger.warning(f"blocked='{blocked}' is not a valid domain - SKIPPED!")
continue
elif blocked.endswith(".arpa"):
- print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
+ logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
continue
elif blocked.endswith(".tld"):
- print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!")
continue
elif blacklist.is_blacklisted(blocked):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
+ logger.debug(f"blocked='{blocked}' is blacklisted - SKIPPED!")
continue
- # DEBUG: print(f"DEBUG: Appending blocked='{blocked}',reason='{reason}'")
+ logger.debug(f"Appending blocked='{blocked}',reason='{reason}'")
blocklist.append({
"domain": tidyup.domain(blocked),
"reason": tidyup.reason(reason)
})
- # DEBUG: print("DEBUG: Next!")
+ logger.debug("Next!")
- # DEBUG: print("DEBUG: Returning blocklist() for domain:", domain, len(blocklist))
+ logger.debug("Returning blocklist() for domain:", domain, len(blocklist))
return {
"reject": blocklist
}
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import inspect
+import logging
import bs4
import validators
from fba.models import blocks
from fba.models import instances
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
def fetch_peers(domain: str) -> list:
- # DEBUG: print(f"DEBUG: domain({len(domain)})='{domain}',software='lemmy' - CALLED!")
+ logger.debug(f"domain({len(domain)})='{domain}',software='lemmy' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
headers = tuple()
try:
- # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
+ logger.debug(f"Checking CSRF for domain='{domain}'")
headers = csrf.determine(domain, dict())
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
+ logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
instances.set_last_error(domain, exception)
return peers
try:
- # DEBUG: print(f"DEBUG: domain='{domain}' is Lemmy, fetching JSON ...")
+ logger.debug(f"domain='{domain}' is Lemmy, fetching JSON ...")
data = network.get_json_api(
domain,
"/api/v3/site",
(config.get("connection_timeout"), config.get("read_timeout"))
)
- # DEBUG: print(f"DEBUG: data[]='{type(data)}'")
+ logger.debug(f"data[]='{type(data)}'")
if "error_message" in data:
- print("WARNING: Could not reach any JSON API:", domain)
+ logger.warning("Could not reach any JSON API:", domain)
instances.set_last_error(domain, data)
elif "federated_instances" in data["json"]:
- # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
+ logger.debug(f"Found federated_instances for domain='{domain}'")
peers = peers + federation.add_peers(data["json"]["federated_instances"])
- # DEBUG: print("DEBUG: Added instance(s) to peers")
+ logger.debug("Added instance(s) to peers")
else:
- print("WARNING: JSON response does not contain 'federated_instances':", domain)
+ logger.warning("JSON response does not contain 'federated_instances':", domain)
instances.set_last_error(domain, data)
except network.exceptions as exception:
- print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(exception)}]:'{str(exception)}'")
+ logger.warning(f"Exception during fetching JSON: domain='{domain}',exception[{type(exception)}]:'{str(exception)}'")
instances.set_last_error(domain, exception)
- # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
+ logger.debug(f"Adding '{len(peers)}' for domain='{domain}'")
instances.set_total_peers(domain, peers)
- # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
+ logger.debug("Returning peers[]:", type(peers))
return peers
def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
- # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
+ logger.debug(f"domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
"report_removal": [],
}
- # DEBUG: print(f"DEBUG: Fetching /instances from domain='{domain}'")
+ logger.debug(f"Fetching /instances from domain='{domain}'")
response = network.fetch_response(
domain,
"/instances",
(config.get("connection_timeout"), config.get("read_timeout"))
)
- # DEBUG: print(f"DEBUG: response.ok='{response.ok}',response.status_code={response.status_code},response.text()={len(response.text)}")
+ logger.debug(f"response.ok='{response.ok}',response.status_code={response.status_code},response.text()={len(response.text)}")
if response.ok and response.status_code < 300 and response.text != "":
- # DEBUG: print(f"DEBUG: Parsing {len(response.text)} Bytes ...")
+ logger.debug(f"Parsing {len(response.text)} Bytes ...")
doc = bs4.BeautifulSoup(response.text, "html.parser")
- # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
+ logger.debug(f"doc[]={type(doc)}")
headers = doc.findAll("h5")
found = None
- # DEBUG: print(f"DEBUG: Search in {len(headers)} header(s) ...")
+ logger.debug(f"Search in {len(headers)} header(s) ...")
for header in headers:
- # DEBUG: print(f"DEBUG: header[]={type(header)}")
+ logger.debug(f"header[]={type(header)}")
content = header.contents[0]
- # DEBUG: print(f"DEBUG: content='{content}'")
+ logger.debug(f"content='{content}'")
if content in translations:
- # DEBUG: print("DEBUG: Found header with blocked instances - BREAK!")
+ logger.debug("Found header with blocked instances - BREAK!")
found = header
break
- # DEBUG: print(f"DEBUG: found[]='{type(found)}'")
+ logger.debug(f"found[]='{type(found)}'")
if found is None:
- # DEBUG: print(f"DEBUG: domain='{domain}' is not blocking any instances - EXIT!")
+ logger.debug(f"domain='{domain}' is not blocking any instances - EXIT!")
return
blocking = found.find_next("ul").findAll("a")
- # DEBUG: print(f"DEBUG: Found {len(blocking)} blocked instance(s) ...")
+ logger.debug(f"Found {len(blocking)} blocked instance(s) ...")
for tag in blocking:
- # DEBUG: print(f"DEBUG: tag[]='{type(tag)}'")
+ logger.debug(f"tag[]='{type(tag)}'")
blocked = tidyup.domain(tag.contents[0])
- # DEBUG: print(f"DEBUG: blocked='{blocked}'")
+ logger.debug(f"blocked='{blocked}'")
if not validators.domain(blocked):
- print(f"WARNING: blocked='{blocked}' is not a valid domain - SKIPPED!")
+ logger.warning(f"blocked='{blocked}' is not a valid domain - SKIPPED!")
continue
elif blocked.endswith(".arpa"):
- print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
+ logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
continue
elif blocked.endswith(".tld"):
- print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!")
continue
elif blacklist.is_blacklisted(blocked):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
+ logger.debug(f"blocked='{blocked}' is blacklisted - SKIPPED!")
continue
elif not instances.is_registered(blocked):
- # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
+ logger.debug("Hash wasn't found, adding:", blocked, domain)
instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
if not blocks.is_instance_blocked(domain, blocked, "reject"):
- # DEBUG: print("DEBUG: Blocking:", domain, blocked)
+ logger.debug("Blocking:", domain, blocked)
blocks.add_instance(domain, blocked, None, "reject")
found_blocks.append({
"reason" : None
})
else:
- # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
+ logger.debug(f"Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
blocks.update_last_seen(domain, blocked, "reject")
- # DEBUG: print("DEBUG: Committing changes ...")
+ logger.debug("Committing changes ...")
fba.connection.commit()
except network.exceptions as exception:
- print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
+ logger.warning(f"domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
instances.set_last_error(domain, exception)
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import inspect
+import logging
import bs4
import validators
from fba.models import blocks
from fba.models import instances
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# Language mapping X -> English
language_mapping = {
# English -> English
"Silenced instances" : "Silenced servers",
}
def fetch_blocks_from_about(domain: str) -> dict:
- # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
+ logger.debug(f"domain='{domain}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
elif domain.endswith(".tld"):
raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
- # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain)
+ logger.debug("Fetching mastodon blocks from domain:", domain)
doc = None
for path in ["/about/more", "/about"]:
try:
- # DEBUG: print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...")
+ logger.debug(f"Fetching path='{path}' from domain='{domain}' ...")
doc = bs4.BeautifulSoup(
network.fetch_response(
domain,
)
if len(doc.find_all("h3")) > 0:
- # DEBUG: print(f"DEBUG: path='{path}' had some headlines - BREAK!")
+ logger.debug(f"path='{path}' had some headlines - BREAK!")
break
except network.exceptions as exception:
- print(f"ERROR: Cannot fetch from domain='{domain}',exception='{type(exception)}'")
+ logger.warning(f"Cannot fetch from domain='{domain}',exception='{type(exception)}'")
instances.set_last_error(domain, exception)
break
"Silenced servers" : [],
}
- # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
+ logger.debug(f"doc[]='{type(doc)}'")
if doc is None:
- print(f"WARNING: Cannot fetch any /about pages for domain='{domain}' - EXIT!")
+ logger.warning(f"Cannot fetch any /about pages for domain='{domain}' - EXIT!")
return blocklist
for header in doc.find_all("h3"):
header_text = tidyup.reason(header.text)
- # DEBUG: print(f"DEBUG: header_text='{header_text}'")
+ logger.debug(f"header_text='{header_text}'")
if header_text in language_mapping:
- # DEBUG: print(f"DEBUG: header_text='{header_text}'")
+ logger.debug(f"header_text='{header_text}'")
header_text = language_mapping[header_text]
else:
- print(f"WARNING: header_text='{header_text}' not found in language mapping table")
+ logger.warning(f"header_text='{header_text}' not found in language mapping table")
if header_text in blocklist or header_text.lower() in blocklist:
# replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
"reason": tidyup.reason(line.find_all("td")[1].text),
})
else:
- print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}")
+ logger.warning(f"header_text='{header_text}' not found in blocklist()={len(blocklist)}")
- # DEBUG: print("DEBUG: Returning blocklist for domain:", domain)
+ logger.debug("Returning blocklist for domain:", domain)
return {
"reject" : blocklist["Suspended servers"],
"media_removal" : blocklist["Filtered media"],
}
def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
- # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
+ logger.debug(f"domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
headers = tuple()
try:
- # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
+ logger.debug(f"Checking CSRF for domain='{domain}'")
headers = csrf.determine(domain, dict())
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!")
+ logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!")
instances.set_last_error(domain, exception)
return
"report_removal": [],
}
- # DEBUG: print("DEBUG: Querying API domain_blocks:", domain)
+ logger.debug("Querying API domain_blocks:", domain)
data = network.get_json_api(
domain,
"/api/v1/instance/domain_blocks",
(config.get("connection_timeout"), config.get("read_timeout"))
)
- # DEBUG: print(f"DEBUG: data[]='{type(data)}'")
+ logger.debug(f"data[]='{type(data)}'")
if "error_message" in data:
- # DEBUG: print(f"DEBUG: Was not able to fetch domain_blocks from domain='{domain}': status_code='{data['status_code']}',error_message='{data['error_message']}'")
+ logger.debug(f"Was not able to fetch domain_blocks from domain='{domain}': status_code='{data['status_code']}',error_message='{data['error_message']}'")
instances.set_last_error(domain, data)
return
elif "json" in data and "error" in data["json"]:
- print(f"WARNING: JSON API returned error message: '{data['json']['error']}'")
+ logger.warning(f"JSON API returned error message: '{data['json']['error']}'")
instances.set_last_error(domain, data)
return
else:
blocklist = data["json"]
if len(blocklist) > 0:
- print(f"INFO: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon' ...")
+ logger.info(f"Checking {len(blocklist)} entries from domain='{domain}',software='mastodon' ...")
for block in blocklist:
# Check type
- # DEBUG: print(f"DEBUG: block[]='{type(block)}'")
+ logger.debug(f"block[]='{type(block)}'")
if not isinstance(block, dict):
- # DEBUG: print(f"DEBUG: block[]='{type(block)}' is of type 'dict' - SKIPPED!")
+ logger.debug(f"block[]='{type(block)}' is of type 'dict' - SKIPPED!")
continue
# Map block -> entry
- # DEBUG: print(f"DEBUG: block[{type(block)}]='{block}'")
+ logger.debug(f"block[{type(block)}]='{block}'")
entry = {
"domain": block["domain"],
"hash" : block["digest"],
"reason": block["comment"] if "comment" in block else None
}
- # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
+ logger.debug("severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
if block['severity'] == 'suspend':
- # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
+ logger.debug(f"Adding entry='{entry}' with severity='{block['severity']}' ...")
rows['reject'].append(entry)
elif block['severity'] == 'silence':
- # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
+ logger.debug(f"Adding entry='{entry}' with severity='{block['severity']}' ...")
rows['followers_only'].append(entry)
elif block['severity'] == 'reject_media':
- # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
+ logger.debug(f"Adding entry='{entry}' with severity='{block['severity']}' ...")
rows['media_removal'].append(entry)
elif block['severity'] == 'reject_reports':
- # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
+ logger.debug(f"Adding entry='{entry}' with severity='{block['severity']}' ...")
rows['report_removal'].append(entry)
else:
- print(f"WARNING: Unknown severity='{block['severity']}', domain='{block['domain']}'")
+ logger.warning(f"Unknown severity='{block['severity']}', domain='{block['domain']}'")
else:
- # DEBUG: print(f"DEBUG: domain='{domain}' has returned zero rows, trying /about/more page ...")
+ logger.debug(f"domain='{domain}' has returned zero rows, trying /about/more page ...")
rows = fetch_blocks_from_about(domain)
- print(f"INFO: Checking {len(rows.items())} entries from domain='{domain}',software='mastodon' ...")
+ logger.info(f"Checking {len(rows.items())} entries from domain='{domain}',software='mastodon' ...")
for block_level, blocklist in rows.items():
- # DEBUG: print("DEBUG: domain,block_level,blocklist():", domain, block_level, len(blocklist))
+ logger.debug("domain,block_level,blocklist():", domain, block_level, len(blocklist))
block_level = tidyup.domain(block_level)
- # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
+ logger.debug("AFTER-block_level:", block_level)
if block_level == "":
- print("WARNING: block_level is empty, domain:", domain)
+ logger.warning("block_level is empty, domain:", domain)
continue
elif block_level == "accept":
- # DEBUG: print(f"DEBUG: domain='{domain}' skipping block_level='accept'")
+ logger.debug(f"domain='{domain}' skipping block_level='accept'")
continue
- # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon',block_level='{block_level}' ...")
+ logger.debug(f"Checking {len(blocklist)} entries from domain='{domain}',software='mastodon',block_level='{block_level}' ...")
for block in blocklist:
- # DEBUG: print(f"DEBUG: block[]='{type(block)}'")
+ logger.debug(f"block[]='{type(block)}'")
blocked, blocked_hash, reason = block.values()
- # DEBUG: print(f"DEBUG: blocked='{blocked}',blocked_hash='{blocked_hash}',reason='{reason}':")
+ logger.debug(f"blocked='{blocked}',blocked_hash='{blocked_hash}',reason='{reason}':")
blocked = tidyup.domain(blocked)
reason = tidyup.reason(reason) if reason is not None and reason != "" else None
- # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
+ logger.debug(f"blocked='{blocked}',reason='{reason}' - AFTER!")
if blocked == "":
- print("WARNING: blocked is empty:", domain)
+ logger.warning("blocked is empty:", domain)
continue
elif blacklist.is_blacklisted(blocked):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
+ logger.debug(f"blocked='{blocked}' is blacklisted - skipping!")
continue
elif blocked.count("*") > 0:
# Doing the hash search for instance names as well to tidy up DB
row = instances.deobscure("*", blocked, blocked_hash)
- # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
+ logger.debug(f"row[]='{type(row)}'")
if row is None:
- print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
+ logger.warning(f"Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
continue
- # DEBUG: print("DEBUG: Updating domain: ", row[0])
+ logger.debug("Updating domain: ", row[0])
blocked = row[0]
origin = row[1]
nodeinfo_url = row[2]
# Doing the hash search for instance names as well to tidy up DB
row = instances.deobscure("?", blocked, blocked_hash)
- # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
+ logger.debug(f"row[]='{type(row)}'")
if row is None:
- print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
+ logger.warning(f"Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
continue
- # DEBUG: print("DEBUG: Updating domain: ", row[0])
+ logger.debug("Updating domain: ", row[0])
blocked = row[0]
origin = row[1]
nodeinfo_url = row[2]
- # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
+ logger.debug("Looking up instance by domain:", blocked)
if not validators.domain(blocked):
- print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!")
+ logger.warning(f"blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!")
continue
elif blocked.endswith(".arpa"):
- print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
+ logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
continue
elif blocked.endswith(".tld"):
- print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!")
continue
elif blacklist.is_blacklisted(blocked):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
+ logger.debug(f"blocked='{blocked}' is blacklisted - SKIPPED!")
continue
elif not instances.is_registered(blocked):
- # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
+ logger.debug(f"Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
- # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
+ logger.debug("Looking up instance by domain:", blocked)
if not validators.domain(blocked):
- print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!")
+ logger.warning(f"blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!")
continue
elif blocked.endswith(".arpa"):
- print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
+ logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
continue
elif blocked.endswith(".tld"):
- print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!")
continue
elif blacklist.is_blacklisted(blocked):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
+ logger.debug(f"blocked='{blocked}' is blacklisted - SKIPPED!")
continue
elif not instances.is_registered(blocked):
- # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
+ logger.debug("Hash wasn't found, adding:", blocked, domain)
instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
if not blocks.is_instance_blocked(domain, blocked, block_level):
- # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
+ logger.debug("Blocking:", domain, blocked, block_level)
blocks.add_instance(domain, blocked, reason, block_level)
if block_level == "reject":
"reason" : reason
})
else:
- # DEBUG: print(f"DEBUG: Updating block last seen and reason for domain='{domain}',blocked='{blocked}' ...")
+ logger.debug(f"Updating block last seen and reason for domain='{domain}',blocked='{blocked}' ...")
blocks.update_last_seen(domain, blocked, block_level)
blocks.update_reason(reason, domain, blocked, block_level)
- # DEBUG: print("DEBUG: Committing changes ...")
+ logger.debug("Committing changes ...")
fba.connection.commit()
except network.exceptions as exception:
- print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
+ logger.warning(f"domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
instances.set_last_error(domain, exception)
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import json
+import logging
import validators
from fba import csrf
from fba.models import instances
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
def fetch_peers(domain: str) -> list:
- # DEBUG: print(f"DEBUG: domain({len(domain)})='{domain}' - CALLED!")
+ logger.debug(f"domain({len(domain)})='{domain}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
elif domain.endswith(".tld"):
raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
- # DEBUG: print(f"DEBUG: domain='{domain}' is misskey, sending API POST request ...")
+ logger.debug(f"domain='{domain}' is misskey, sending API POST request ...")
peers = list()
offset = 0
step = config.get("misskey_limit")
headers = tuple()
try:
- # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
+ logger.debug(f"Checking CSRF for domain='{domain}'")
headers = csrf.determine(domain, dict())
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
+ logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
instances.set_last_error(domain, exception)
return peers
# instances page-by-page, since that troonware doesn't support
# sending them all at once
while True:
- # DEBUG: print(f"DEBUG: Fetching offset='{offset}' from '{domain}' ...")
+ logger.debug(f"Fetching offset='{offset}' from '{domain}' ...")
if offset == 0:
fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
"sort" : "+pubAt",
}), headers)
# Check records
- # DEBUG: print(f"DEBUG: fetched[]='{type(fetched)}'")
+ logger.debug(f"fetched[]='{type(fetched)}'")
if "error_message" in fetched:
- print(f"WARNING: post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}")
+ logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}")
instances.set_last_error(domain, fetched)
break
elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
- print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}")
+ logger.warning(f"post_json_api() returned error: {fetched['error']['message']}")
instances.set_last_error(domain, fetched["json"]["error"]["message"])
break
rows = fetched["json"]
- # DEBUG: print(f"DEBUG: rows()={len(rows)}")
+ logger.debug(f"rows()={len(rows)}")
if len(rows) == 0:
- # DEBUG: print(f"DEBUG: Returned zero bytes, exiting loop, domain='{domain}'")
+ logger.debug(f"Returned zero bytes, exiting loop, domain='{domain}'")
break
elif len(rows) != config.get("misskey_limit"):
- # DEBUG: print(f"DEBUG: Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'")
+ logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'")
offset = offset + (config.get("misskey_limit") - len(rows))
else:
- # DEBUG: print(f"DEBUG: Raising offset by step={step}")
+ logger.debug(f"Raising offset by step={step}")
offset = offset + step
already = 0
- # DEBUG: print(f"DEBUG: rows({len(rows)})[]='{type(rows)}'")
+ logger.debug(f"rows({len(rows)})[]='{type(rows)}'")
for row in rows:
- # DEBUG: print(f"DEBUG: row()={len(row)}")
+ logger.debug(f"row()={len(row)}")
if "host" not in row:
- print(f"WARNING: row()={len(row)} does not contain key 'host': {row},domain='{domain}'")
+ logger.warning(f"row()={len(row)} does not contain key 'host': {row},domain='{domain}'")
continue
elif not isinstance(row["host"], str):
- print(f"WARNING: row[host][]='{type(row['host'])}' is not 'str' - SKIPPED!")
+ logger.warning(f"row[host][]='{type(row['host'])}' is not 'str' - SKIPPED!")
continue
elif not validators.domain(row["host"].split("/")[0]):
- print(f"WARNING: row[host]='{row['host']}' is not a valid domain - SKIPPED!")
+ logger.warning(f"row[host]='{row['host']}' is not a valid domain - SKIPPED!")
continue
elif row["host"].endswith(".arpa"):
- print(f"WARNING: row[host]='{row['host']}' is a domain for reversed IP addresses - SKIPPED!")
+ logger.warning(f"row[host]='{row['host']}' is a domain for reversed IP addresses - SKIPPED!")
continue
elif row["host"].endswith(".tld"):
- print(f"WARNING: row[host]='{row['host']}' is a fake domain - SKIPPED!")
+ logger.warning(f"row[host]='{row['host']}' is a fake domain - SKIPPED!")
continue
elif blacklist.is_blacklisted(row["host"]):
- # DEBUG: print(f"DEBUG: row[host]='{row['host']}' is blacklisted. domain='{domain}' - SKIPPED!")
+ logger.debug(f"row[host]='{row['host']}' is blacklisted. domain='{domain}' - SKIPPED!")
continue
elif row["host"] in peers:
- # DEBUG: print(f"DEBUG: Not adding row[host]='{row['host']}', already found.")
+ logger.debug(f"Not adding row[host]='{row['host']}', already found.")
already = already + 1
continue
- # DEBUG: print(f"DEBUG: Adding peer: '{row['host']}'")
+ logger.debug(f"Adding peer: '{row['host']}'")
peers.append(row["host"])
if already == len(rows):
- # DEBUG: print(f"DEBUG: Host returned same set of '{already}' instances, aborting loop!")
+ logger.debug(f"Host returned same set of '{already}' instances, aborting loop!")
break
- # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
+ logger.debug(f"Adding '{len(peers)}' for domain='{domain}'")
instances.set_total_peers(domain, peers)
- # DEBUG: print(f"DEBUG: Returning peers[]='{type(peers)}'")
+ logger.debug(f"Returning peers[]='{type(peers)}'")
return peers
def fetch_blocks(domain: str) -> dict:
- # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
+ logger.debug(f"domain='{domain}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
elif domain.endswith(".tld"):
raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
- # DEBUG: print(f"DEBUG: Fetching misskey blocks from domain='{domain}'")
+ logger.debug(f"Fetching misskey blocks from domain='{domain}'")
blocklist = {
"suspended": [],
"blocked" : []
headers = tuple()
try:
- # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
+ logger.debug(f"Checking CSRF for domain='{domain}'")
headers = csrf.determine(domain, dict())
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!")
+ logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!")
instances.set_last_error(domain, exception)
return blocklist
# instances page-by-page since it doesn't support sending them all at once
while True:
try:
- # DEBUG: print(f"DEBUG: Fetching offset='{offset}' from '{domain}' ...")
+ logger.debug(f"Fetching offset='{offset}' from '{domain}' ...")
if offset == 0:
- # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset)
+ logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset)
fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
"sort" : "+pubAt",
"host" : None,
"limit" : step
}), headers)
else:
- # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset)
+ logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset)
fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
"sort" : "+pubAt",
"host" : None,
"offset" : offset - 1
}), headers)
- # DEBUG: print(f"DEBUG: fetched[]='{type(fetched)}'")
+ logger.debug(f"fetched[]='{type(fetched)}'")
if "error_message" in fetched:
- print(f"WARNING: post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}")
+ logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}")
instances.set_last_error(domain, fetched)
break
elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
- print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}")
+ logger.warning(f"post_json_api() returned error: {fetched['error']['message']}")
instances.set_last_error(domain, fetched["json"]["error"]["message"])
break
rows = fetched["json"]
- # DEBUG: print(f"DEBUG: rows({len(rows)})={rows} - suspend")
+ logger.debug(f"rows({len(rows)})={rows} - suspend")
if len(rows) == 0:
- # DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain)
+ logger.debug("Returned zero bytes, exiting loop:", domain)
break
elif len(rows) != config.get("misskey_limit"):
- # DEBUG: print(f"DEBUG: Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'")
+ logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'")
offset = offset + (config.get("misskey_limit") - len(rows))
else:
- # DEBUG: print("DEBUG: Raising offset by step:", step)
+ logger.debug("Raising offset by step:", step)
offset = offset + step
count = 0
for instance in rows:
# Is it there?
- # DEBUG: print(f"DEBUG: instance[{type(instance)}]='{instance}' - suspend")
+ logger.debug(f"instance[{type(instance)}]='{instance}' - suspend")
if "isSuspended" in instance and instance["isSuspended"] and not dicts.has_key(blocklist["suspended"], "domain", instance["host"]):
count = count + 1
blocklist["suspended"].append({
"reason": None
})
- # DEBUG: print(f"DEBUG: count={count}")
+ logger.debug(f"count={count}")
if count == 0:
- # DEBUG: print("DEBUG: API is no more returning new instances, aborting loop!")
+ logger.debug("API is no more returning new instances, aborting loop!")
break
except network.exceptions as exception:
- print(f"WARNING: Caught error, exiting loop: domain='{domain}',exception[{type(exception)}]='{str(exception)}'")
+ logger.warning(f"Caught error, exiting loop: domain='{domain}',exception[{type(exception)}]='{str(exception)}'")
instances.set_last_error(domain, exception)
offset = 0
break
# Fetch blocked (full suspended) instances
try:
if offset == 0:
- # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset)
+ logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset)
fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
"sort" : "+pubAt",
"host" : None,
"limit" : step
}), headers)
else:
- # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset)
+ logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset)
fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
"sort" : "+pubAt",
"host" : None,
"offset" : offset - 1
}), headers)
- # DEBUG: print(f"DEBUG: fetched[]='{type(fetched)}'")
+ logger.debug(f"fetched[]='{type(fetched)}'")
if "error_message" in fetched:
- print(f"WARNING: post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}")
+ logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}")
instances.set_last_error(domain, fetched)
break
elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
- print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}")
+ logger.warning(f"post_json_api() returned error: {fetched['error']['message']}")
instances.set_last_error(domain, fetched["json"]["error"]["message"])
break
rows = fetched["json"]
- # DEBUG: print(f"DEBUG: rows({len(rows)})={rows} - blocked")
+ logger.debug(f"rows({len(rows)})={rows} - blocked")
if len(rows) == 0:
- # DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain)
+ logger.debug("Returned zero bytes, exiting loop:", domain)
break
elif len(rows) != config.get("misskey_limit"):
- # DEBUG: print(f"DEBUG: Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'")
+ logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'")
offset = offset + (config.get("misskey_limit") - len(rows))
else:
- # DEBUG: print("DEBUG: Raising offset by step:", step)
+ logger.debug("Raising offset by step:", step)
offset = offset + step
count = 0
for instance in rows:
# Is it there?
- # DEBUG: print(f"DEBUG: instance[{type(instance)}]='{instance}' - blocked")
+ logger.debug(f"instance[{type(instance)}]='{instance}' - blocked")
if "isBlocked" in instance and instance["isBlocked"] and not dicts.has_key(blocklist["blocked"], "domain", instance["host"]):
count = count + 1
blocklist["blocked"].append({
"reason": None
})
- # DEBUG: print(f"DEBUG: count={count}")
+ logger.debug(f"count={count}")
if count == 0:
- # DEBUG: print("DEBUG: API is no more returning new instances, aborting loop!")
+ logger.debug("API is no more returning new instances, aborting loop!")
break
except network.exceptions as exception:
- print(f"WARNING: Caught error, exiting loop: domain='{domain}',exception[{type(exception)}]='{str(exception)}'")
+ logger.warning(f"Caught error, exiting loop: domain='{domain}',exception[{type(exception)}]='{str(exception)}'")
instances.set_last_error(domain, exception)
offset = 0
break
- # DEBUG: print(f"DEBUG: Returning for domain='{domain}',blocked()={len(blocklist['blocked'])},suspended()={len(blocklist['suspended'])}")
+ logger.debug(f"Returning for domain='{domain}',blocked()={len(blocklist['blocked'])},suspended()={len(blocklist['suspended'])}")
return {
"reject" : blocklist["blocked"],
"followers_only": blocklist["suspended"]
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
+import logging
+
import validators
from fba import csrf
from fba.models import instances
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
def fetch_peers(domain: str) -> list:
- # DEBUG: print(f"DEBUG: domain({len(domain)})='{domain}',software='peertube' - CALLED!")
+ logger.debug(f"domain({len(domain)})='{domain}',software='peertube' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
elif domain.endswith(".tld"):
raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
- # DEBUG: print(f"DEBUG: domain='{domain}' is a PeerTube, fetching JSON ...")
+ logger.debug(f"domain='{domain}' is a PeerTube, fetching JSON ...")
peers = list()
start = 0
headers = tuple()
try:
- # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
+ logger.debug(f"Checking CSRF for domain='{domain}'")
headers = csrf.determine(domain, dict())
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
+ logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
instances.set_last_error(domain, exception)
return peers
for mode in ["followers", "following"]:
- # DEBUG: print(f"DEBUG: domain='{domain}',mode='{mode}'")
+ logger.debug(f"domain='{domain}',mode='{mode}'")
while True:
data = network.get_json_api(
domain,
(config.get("connection_timeout"), config.get("read_timeout"))
)
- # DEBUG: print(f"DEBUG: data[]='{type(data)}'")
+ logger.debug(f"data[]='{type(data)}'")
if "error_message" not in data:
- # DEBUG: print(f"DEBUG: Success, data[json]()={len(data['json'])}")
+ logger.debug(f"Success, data[json]()={len(data['json'])}")
if "data" in data["json"]:
rows = data["json"]["data"]
- # DEBUG: print(f"DEBUG: Found {len(rows)} record(s).")
+ logger.debug(f"Found {len(rows)} record(s).")
for record in rows:
- # DEBUG: print(f"DEBUG: record()={len(record)}")
+ logger.debug(f"record()={len(record)}")
for mode2 in ["follower", "following" ]:
- # DEBUG: print(f"DEBUG: mode2='{mode2}'")
+ logger.debug(f"mode2='{mode2}'")
if mode2 in record and "host" in record[mode2]:
- # DEBUG: print(f"DEBUG: Found host='{record[mode2]['host']}', adding ...")
+ logger.debug(f"Found host='{record[mode2]['host']}', adding ...")
peers.append(record[mode2]["host"])
else:
- print(f"WARNING: record from '{domain}' has no '{mode2}' or 'host' record: {record}")
+ logger.warning(f"record from '{domain}' has no '{mode2}' or 'host' record: {record}")
if len(rows) < 100:
- # DEBUG: print(f"DEBUG: Reached end of JSON response, domain='{domain}'")
+ logger.debug(f"Reached end of JSON response, domain='{domain}'")
break
# Continue with next row
start = start + 100
else:
- print(f"WARNING: domain='{domain}' causes error during API query: '{data['error_message']}' - SKIPPED!")
+ logger.warning(f"domain='{domain}' causes error during API query: '{data['error_message']}' - SKIPPED!")
break
- # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
+ logger.debug(f"Adding '{len(peers)}' for domain='{domain}'")
instances.set_total_peers(domain, peers)
- # DEBUG: print(f"DEBUG: Returning peers[]='{type(peers)}'")
+ logger.debug(f"Returning peers[]='{type(peers)}'")
return peers
# along with this program. If not, see <https://www.gnu.org/licenses/>.
import inspect
+import logging
import bs4
import validators
from fba.models import blocks
from fba.models import instances
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# Language mapping X -> English
language_mapping = {
# English -> English
"Reject": "Suspended servers",
}
def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
- # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
+ logger.debug(f"domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
blockdict = list()
rows = None
try:
- # DEBUG: print(f"DEBUG: Fetching nodeinfo: domain='{domain}',nodeinfo_url='{nodeinfo_url}'")
+ logger.debug(f"Fetching nodeinfo: domain='{domain}',nodeinfo_url='{nodeinfo_url}'")
rows = federation.fetch_nodeinfo(domain, nodeinfo_url)
except network.exceptions as exception:
- print(f"WARNING: Exception '{type(exception)}' during fetching nodeinfo")
+ logger.warning(f"Exception '{type(exception)}' during fetching nodeinfo")
instances.set_last_error(domain, exception)
if rows is None:
- print("WARNING: Could not fetch nodeinfo from domain:", domain)
+ logger.warning("Could not fetch nodeinfo from domain:", domain)
return
elif "metadata" not in rows:
- print(f"WARNING: rows()={len(rows)} does not have key 'metadata', domain='{domain}'")
+ logger.warning(f"rows()={len(rows)} does not have key 'metadata', domain='{domain}'")
return
elif "federation" not in rows["metadata"]:
- print(f"WARNING: rows()={len(rows['metadata'])} does not have key 'federation', domain='{domain}'")
+ logger.warning(f"rows()={len(rows['metadata'])} does not have key 'federation', domain='{domain}'")
return
data = rows["metadata"]["federation"]
found = False
- # DEBUG: print(f"DEBUG: data[]='{type(data)}'")
+ logger.debug(f"data[]='{type(data)}'")
if "mrf_simple" in data:
- # DEBUG: print("DEBUG: Found mrf_simple:", domain)
+ logger.debug("Found mrf_simple:", domain)
found = True
for block_level, blocklist in (
{
}
}
).items():
- # DEBUG: print("DEBUG: block_level, blocklist():", block_level, len(blocklist))
+ logger.debug("block_level, blocklist():", block_level, len(blocklist))
block_level = tidyup.domain(block_level)
- # DEBUG: print("DEBUG: BEFORE block_level:", block_level)
+ logger.debug("BEFORE block_level:", block_level)
if block_level == "":
- print("WARNING: block_level is now empty!")
+ logger.warning("block_level is now empty!")
continue
elif block_level == "accept":
- # DEBUG: print(f"DEBUG: domain='{domain}' skipping block_level='accept'")
+ logger.debug(f"domain='{domain}' skipping block_level='accept'")
continue
- # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',block_level='{block_level}' ...")
+ logger.debug(f"Checking {len(blocklist)} entries from domain='{domain}',block_level='{block_level}' ...")
if len(blocklist) > 0:
for blocked in blocklist:
- # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
+ logger.debug("BEFORE blocked:", blocked)
blocked = tidyup.domain(blocked)
- # DEBUG: print("DEBUG: AFTER blocked:", blocked)
+ logger.debug("AFTER blocked:", blocked)
if blocked == "":
- print("WARNING: blocked is empty after tidyup.domain():", domain, block_level)
+ logger.warning("blocked is empty after tidyup.domain():", domain, block_level)
continue
elif blacklist.is_blacklisted(blocked):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
+ logger.debug(f"blocked='{blocked}' is blacklisted - skipping!")
continue
elif blocked.count("*") > 0:
# Obscured domain name with no hash
row = instances.deobscure("*", blocked)
- # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
+ logger.debug(f"row[]='{type(row)}'")
if row is None:
- print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
+ logger.warning(f"Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
continue
- # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
+ logger.debug(f"blocked='{blocked}' de-obscured to '{row[0]}'")
blocked = row[0]
origin = row[1]
nodeinfo_url = row[2]
# Obscured domain name with no hash
row = instances.deobscure("?", blocked)
- # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
+ logger.debug(f"row[]='{type(row)}'")
if row is None:
- print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
+ logger.warning(f"Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
continue
- # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
+ logger.debug(f"blocked='{blocked}' de-obscured to '{row[0]}'")
blocked = row[0]
origin = row[1]
nodeinfo_url = row[2]
- # DEBUG: print(f"DEBUG: blocked='{blocked}'")
+ logger.debug(f"blocked='{blocked}'")
if not validators.domain(blocked):
- print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!")
+ logger.warning(f"blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!")
continue
elif blocked.endswith(".arpa"):
- print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
+ logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
continue
elif blocked.endswith(".tld"):
- print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!")
continue
elif blacklist.is_blacklisted(blocked):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
+ logger.debug(f"blocked='{blocked}' is blacklisted - SKIPPED!")
continue
elif not instances.is_registered(blocked):
# Commit changes
fba.connection.commit()
- # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
+ logger.debug(f"Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
if not blocks.is_instance_blocked(domain, blocked, block_level):
- # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
+ logger.debug("Blocking:", domain, blocked, block_level)
blocks.add_instance(domain, blocked, None, block_level)
if block_level == "reject":
- # DEBUG: print("DEBUG: Adding to blockdict:", blocked)
+ logger.debug("Adding to blockdict:", blocked)
blockdict.append({
"blocked": blocked,
"reason" : None
})
else:
- # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
+ logger.debug(f"Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
blocks.update_last_seen(domain, blocked, block_level)
elif "quarantined_instances" in data:
- # DEBUG: print(f"DEBUG: Found 'quarantined_instances' in JSON response: domain='{domain}'")
+ logger.debug(f"Found 'quarantined_instances' in JSON response: domain='{domain}'")
found = True
block_level = "quarantined"
for blocked in data["quarantined_instances"]:
- # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
+ logger.debug("BEFORE blocked:", blocked)
blocked = tidyup.domain(blocked)
- # DEBUG: print("DEBUG: AFTER blocked:", blocked)
+ logger.debug("AFTER blocked:", blocked)
if blocked == "":
- print("WARNING: blocked is empty after tidyup.domain():", domain, block_level)
+ logger.warning("blocked is empty after tidyup.domain():", domain, block_level)
continue
elif blacklist.is_blacklisted(blocked):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
+ logger.debug(f"blocked='{blocked}' is blacklisted - skipping!")
continue
elif blocked.count("*") > 0:
# Obscured domain name with no hash
row = instances.deobscure("*", blocked)
- # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
+ logger.debug(f"row[]='{type(row)}'")
if row is None:
- print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
+ logger.warning(f"Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
continue
- # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
+ logger.debug(f"blocked='{blocked}' de-obscured to '{row[0]}'")
blocked = row[0]
origin = row[1]
nodeinfo_url = row[2]
# Obscured domain name with no hash
row = instances.deobscure("?", blocked)
- # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
+ logger.debug(f"row[]='{type(row)}'")
if row is None:
- print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
+ logger.warning(f"Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
continue
- # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
+ logger.debug(f"blocked='{blocked}' de-obscured to '{row[0]}'")
blocked = row[0]
origin = row[1]
nodeinfo_url = row[2]
- # DEBUG: print(f"DEBUG: blocked='{blocked}'")
+ logger.debug(f"blocked='{blocked}'")
if not validators.domain(blocked):
- print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!")
+ logger.warning(f"blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!")
continue
elif blocked.endswith(".arpa"):
- print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
+ logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
continue
elif blocked.endswith(".tld"):
- print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!")
continue
elif blacklist.is_blacklisted(blocked):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
+ logger.debug(f"blocked='{blocked}' is blacklisted - SKIPPED!")
continue
elif not instances.is_registered(blocked):
# Commit changes
fba.connection.commit()
- # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
+ logger.debug(f"Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
if not blocks.is_instance_blocked(domain, blocked, block_level):
- # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
+ logger.debug("Blocking:", domain, blocked, block_level)
blocks.add_instance(domain, blocked, None, block_level)
if block_level == "reject":
- # DEBUG: print("DEBUG: Adding to blockdict:", blocked)
+ logger.debug("Adding to blockdict:", blocked)
blockdict.append({
"blocked": blocked,
"reason" : None
})
else:
- # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
+ logger.debug(f"Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
blocks.update_last_seen(domain, blocked, block_level)
else:
- print(f"WARNING: Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='{domain}'")
+ logger.warning(f"Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='{domain}'")
- # DEBUG: print("DEBUG: Committing changes ...")
+ logger.debug("Committing changes ...")
fba.connection.commit()
# Reasons
if "mrf_simple_info" in data:
- # DEBUG: print("DEBUG: Found mrf_simple_info:", domain)
+ logger.debug("Found mrf_simple_info:", domain)
found = True
for block_level, info in (
{
**(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
}
).items():
- # DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items()))
+ logger.debug("block_level, info.items():", block_level, len(info.items()))
block_level = tidyup.domain(block_level)
- # DEBUG: print("DEBUG: BEFORE block_level:", block_level)
+ logger.debug("BEFORE block_level:", block_level)
if block_level == "":
- print("WARNING: block_level is now empty!")
+ logger.warning("block_level is now empty!")
continue
elif block_level == "accept":
- # DEBUG: print(f"DEBUG: domain='{domain}' skipping block_level='accept'")
+ logger.debug(f"domain='{domain}' skipping block_level='accept'")
continue
- # DEBUG: print(f"DEBUG: Checking {len(info.items())} entries from domain='{domain}',software='pleroma',block_level='{block_level}' ...")
+ logger.debug(f"Checking {len(info.items())} entries from domain='{domain}',software='pleroma',block_level='{block_level}' ...")
for blocked, reason in info.items():
- # DEBUG: print(f"DEBUG: blocked='{blocked}',reason[{type(reason)}]='{reason}' - BEFORE!")
+ logger.debug(f"blocked='{blocked}',reason[{type(reason)}]='{reason}' - BEFORE!")
blocked = tidyup.domain(blocked)
if isinstance(reason, str):
- # DEBUG: print("DEBUG: reason[] is a string")
+ logger.debug("reason[] is a string")
reason = tidyup.reason(reason)
elif isinstance(reason, dict) and "reason" in reason:
- # DEBUG: print("DEBUG: reason[] is a dict")
+ logger.debug("reason[] is a dict")
reason = tidyup.reason(reason["reason"])
elif reason is not None:
raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
- # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
+ logger.debug(f"blocked='{blocked}',reason='{reason}' - AFTER!")
if blocked == "":
- print("WARNING: blocked is empty after tidyup.domain():", domain, block_level)
+ logger.warning("blocked is empty after tidyup.domain():", domain, block_level)
continue
elif blacklist.is_blacklisted(blocked):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
+ logger.debug(f"blocked='{blocked}' is blacklisted - skipping!")
continue
elif blocked.count("*") > 0:
# Obscured domain name with no hash
row = instances.deobscure("*", blocked)
- # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
+ logger.debug(f"row[]='{type(row)}'")
if row is None:
- print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
+ logger.warning(f"Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
continue
- # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
+ logger.debug(f"blocked='{blocked}' de-obscured to '{row[0]}'")
blocked = row[0]
origin = row[1]
nodeinfo_url = row[2]
# Obscured domain name with no hash
row = instances.deobscure("?", blocked)
- # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
+ logger.debug(f"row[]='{type(row)}'")
if row is None:
- print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
+ logger.warning(f"Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
continue
- # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
+ logger.debug(f"blocked='{blocked}' de-obscured to '{row[0]}'")
blocked = row[0]
origin = row[1]
nodeinfo_url = row[2]
- # DEBUG: print(f"DEBUG: blocked='{blocked}'")
+ logger.debug(f"blocked='{blocked}'")
if not validators.domain(blocked):
- print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!")
+ logger.warning(f"blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!")
continue
elif blocked.endswith(".arpa"):
- print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
+ logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
continue
elif blocked.endswith(".tld"):
- print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!")
continue
elif blacklist.is_blacklisted(blocked):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
+ logger.debug(f"blocked='{blocked}' is blacklisted - SKIPPED!")
continue
elif not instances.is_registered(blocked):
- # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
+ logger.debug(f"Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
- # DEBUG: print(f"DEBUG: Updating block reason: reason='{reason}',domain='{domain}',blocked='{blocked}',block_level='{block_level}'")
+ logger.debug(f"Updating block reason: reason='{reason}',domain='{domain}',blocked='{blocked}',block_level='{block_level}'")
blocks.update_reason(reason, domain, blocked, block_level)
- # DEBUG: print(f"DEBUG: blockdict()={len(blockdict)}")
+ logger.debug(f"blockdict()={len(blockdict)}")
for entry in blockdict:
if entry["blocked"] == blocked:
- # DEBUG: print(f"DEBUG: Updating entry reason: blocked='{blocked}',reason='{reason}'")
+ logger.debug(f"Updating entry reason: blocked='{blocked}',reason='{reason}'")
entry["reason"] = reason
elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
- # DEBUG: print(f"DEBUG: Found 'quarantined_instances_info' in JSON response: domain='{domain}'")
+ logger.debug(f"Found 'quarantined_instances_info' in JSON response: domain='{domain}'")
found = True
block_level = "quarantined"
#print(data["quarantined_instances_info"])
rows = data["quarantined_instances_info"]["quarantined_instances"]
for blocked in rows:
- # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
+ logger.debug("BEFORE blocked:", blocked)
blocked = tidyup.domain(blocked)
- # DEBUG: print("DEBUG: AFTER blocked:", blocked)
+ logger.debug("AFTER blocked:", blocked)
if blocked not in rows or "reason" not in rows[blocked]:
- print(f"WARNING: Cannot find blocked='{blocked}' in rows()={len(rows)},domain='{domain}'")
+ logger.warning(f"Cannot find blocked='{blocked}' in rows()={len(rows)},domain='{domain}'")
break
reason = rows[blocked]["reason"]
- # DEBUG: print(f"DEBUG: reason='{reason}'")
+ logger.debug(f"reason='{reason}'")
if blocked == "":
- print("WARNING: blocked is empty after tidyup.domain():", domain, block_level)
+ logger.warning("blocked is empty after tidyup.domain():", domain, block_level)
continue
elif blacklist.is_blacklisted(blocked):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
+ logger.debug(f"blocked='{blocked}' is blacklisted - skipping!")
continue
elif blocked.count("*") > 0:
# Obscured domain name with no hash
row = instances.deobscure("*", blocked)
- # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
+ logger.debug(f"row[]='{type(row)}'")
if row is None:
- print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
+ logger.warning(f"Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
continue
- # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
+ logger.debug(f"blocked='{blocked}' de-obscured to '{row[0]}'")
blocked = row[0]
origin = row[1]
nodeinfo_url = row[2]
# Obscured domain name with no hash
row = instances.deobscure("?", blocked)
- # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
+ logger.debug(f"row[]='{type(row)}'")
if row is None:
- print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
+ logger.warning(f"Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
continue
- # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
+ logger.debug(f"blocked='{blocked}' de-obscured to '{row[0]}'")
blocked = row[0]
origin = row[1]
nodeinfo_url = row[2]
- # DEBUG: print(f"DEBUG: blocked='{blocked}'")
+ logger.debug(f"blocked='{blocked}'")
if not validators.domain(blocked):
- print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!")
+ logger.warning(f"blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!")
continue
elif blocked.endswith(".arpa"):
- print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
+ logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
continue
elif blocked.endswith(".tld"):
- print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!")
continue
elif blacklist.is_blacklisted(blocked):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
+ logger.debug(f"blocked='{blocked}' is blacklisted - SKIPPED!")
continue
elif not instances.is_registered(blocked):
- # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
+ logger.debug(f"Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
- # DEBUG: print(f"DEBUG: Updating block reason: reason='{reason}',domain='{domain}',blocked='{blocked}',block_level='{block_level}'")
+ logger.debug(f"Updating block reason: reason='{reason}',domain='{domain}',blocked='{blocked}',block_level='{block_level}'")
blocks.update_reason(reason, domain, blocked, block_level)
- # DEBUG: print(f"DEBUG: blockdict()={len(blockdict)}")
+ logger.debug(f"blockdict()={len(blockdict)}")
for entry in blockdict:
if entry["blocked"] == blocked:
- # DEBUG: print(f"DEBUG: Updating entry reason: blocked='{blocked}',reason='{reason}'")
+ logger.debug(f"Updating entry reason: blocked='{blocked}',reason='{reason}'")
entry["reason"] = reason
else:
- print(f"WARNING: Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='{domain}'")
+ logger.warning(f"Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='{domain}'")
if not found:
- # DEBUG: print(f"DEBUG: Did not find any useable JSON elements, domain='{domain}', continuing with /about page ...")
+ logger.debug(f"Did not find any useable JSON elements, domain='{domain}', continuing with /about page ...")
blocklist = fetch_blocks_from_about(domain)
- # DEBUG: print(f"DEBUG: blocklist()={len(blocklist)}")
+ logger.debug(f"blocklist()={len(blocklist)}")
if len(blocklist) > 0:
- print(f"INFO: Checking {len(blocklist)} record(s) ...")
+ logger.info(f"Checking {len(blocklist)} record(s) ...")
for block_level in blocklist:
- # DEBUG: print(f"DEBUG: block_level='{block_level}'")
+ logger.debug(f"block_level='{block_level}'")
rows = blocklist[block_level]
- # DEBUG: print(f"DEBUG: rows['{type(rows)}]()={len(rows)}'")
+ logger.debug(f"rows['{type(rows)}]()={len(rows)}'")
for record in rows:
- # DEBUG: print(f"DEBUG: record[]='{type(record)}'")
+ logger.debug(f"record[]='{type(record)}'")
blocked = tidyup.domain(record["blocked"])
reason = tidyup.reason(record["reason"])
- # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
+ logger.debug(f"blocked='{blocked}',reason='{reason}' - AFTER!")
if blocked == "":
- print("WARNING: blocked is empty after tidyup.domain():", domain, block_level)
+ logger.warning("blocked is empty after tidyup.domain():", domain, block_level)
continue
elif blacklist.is_blacklisted(blocked):
- # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
+ logger.debug(f"blocked='{blocked}' is blacklisted - skipping!")
continue
elif blocked.count("*") > 0:
# Obscured domain name with no hash
row = instances.deobscure("*", blocked)
- # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
+ logger.debug(f"row[]='{type(row)}'")
if row is None:
- print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
+ logger.warning(f"Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
continue
- # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
+ logger.debug(f"blocked='{blocked}' de-obscured to '{row[0]}'")
blocked = row[0]
origin = row[1]
nodeinfo_url = row[2]
# Obscured domain name with no hash
row = instances.deobscure("?", blocked)
- # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
+ logger.debug(f"row[]='{type(row)}'")
if row is None:
- print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
+ logger.warning(f"Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
continue
- # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
+ logger.debug(f"blocked='{blocked}' de-obscured to '{row[0]}'")
blocked = row[0]
origin = row[1]
nodeinfo_url = row[2]
- # DEBUG: print(f"DEBUG: blocked='{blocked}'")
+ logger.debug(f"blocked='{blocked}'")
if not validators.domain(blocked):
- print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!")
+ logger.warning(f"blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!")
continue
elif blocked.endswith(".arpa"):
- print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
+ logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
continue
elif blocked.endswith(".tld"):
- print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
+ logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!")
continue
elif not instances.is_registered(blocked):
- # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
+ logger.debug(f"Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
if not blocks.is_instance_blocked(domain, blocked, block_level):
- # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
+ logger.debug("Blocking:", domain, blocked, block_level)
blocks.add_instance(domain, blocked, reason, block_level)
if block_level == "reject":
- # DEBUG: print("DEBUG: Adding to blockdict:", blocked)
+ logger.debug("Adding to blockdict:", blocked)
blockdict.append({
"blocked": blocked,
"reason" : reason
})
else:
- # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
+ logger.debug(f"Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
blocks.update_reason(reason, domain, blocked, block_level)
fba.connection.commit()
- # DEBUG: print("DEBUG: EXIT!")
+ logger.debug("EXIT!")
def fetch_blocks_from_about(domain: str) -> dict:
- # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
+ logger.debug(f"domain='{domain}' - CALLED!")
if not isinstance(domain, str):
raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
elif domain == "":
elif domain.endswith(".tld"):
raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
- # DEBUG: print(f"DEBUG: Fetching mastodon blocks from domain='{domain}'")
+ logger.debug(f"Fetching mastodon blocks from domain='{domain}'")
doc = None
for path in ["/instance/about/index.html"]:
try:
# Resetting doc type
doc = None
- # DEBUG: print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...")
+ logger.debug(f"Fetching path='{path}' from domain='{domain}' ...")
response = network.fetch_response(
domain,
path,
(config.get("connection_timeout"), config.get("read_timeout"))
)
- # DEBUG: print(f"DEBUG: response.ok='{response.ok}',response.status_code='{response.status_code}',response.text()={len(response.text)}")
+ logger.debug(f"response.ok='{response.ok}',response.status_code='{response.status_code}',response.text()={len(response.text)}")
if not response.ok or response.text.strip() == "":
- print(f"WARNING: path='{path}' does not exist on domain='{domain}' - SKIPPED!")
+ logger.warning(f"path='{path}' does not exist on domain='{domain}' - SKIPPED!")
continue
- # DEBUG: print(f"DEBUG: Parsing response.text()={len(response.text)} Bytes ...")
+ logger.debug(f"Parsing response.text()={len(response.text)} Bytes ...")
doc = bs4.BeautifulSoup(
response.text,
"html.parser",
)
- # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
+ logger.debug(f"doc[]='{type(doc)}'")
if doc.find("h2") is not None:
- # DEBUG: print(f"DEBUG: Found 'h2' header in path='{path}' - BREAK!")
+ logger.debug(f"Found 'h2' header in path='{path}' - BREAK!")
break
except network.exceptions as exception:
- print("ERROR: Cannot fetch from domain:", domain, exception)
+ logger.warning("Cannot fetch from domain:", domain, exception)
instances.set_last_error(domain, exception)
break
"Silenced servers" : [],
}
- # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
+ logger.debug(f"doc[]='{type(doc)}'")
if doc is None:
- print(f"WARNING: Cannot fetch any /about pages for domain='{domain}' - EXIT!")
+ logger.warning(f"Cannot fetch any /about pages for domain='{domain}' - EXIT!")
return blocklist
for header in doc.find_all("h2"):
header_text = tidyup.reason(header.text)
- # DEBUG: print(f"DEBUG: header_text='{header_text}' - BEFORE!")
+ logger.debug(f"header_text='{header_text}' - BEFORE!")
if header_text in language_mapping:
- # DEBUG: print(f"DEBUG: header_text='{header_text}' - FOUND!")
+ logger.debug(f"header_text='{header_text}' - FOUND!")
header_text = language_mapping[header_text]
else:
- print(f"WARNING: header_text='{header_text}' not found in language mapping table")
+ logger.warning(f"header_text='{header_text}' not found in language mapping table")
- # DEBUG: print(f"DEBUG: header_text='{header_text} - AFTER!'")
+ logger.debug(f"header_text='{header_text} - AFTER!'")
if header_text in blocklist or header_text.lower() in blocklist:
# replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
- # DEBUG: print(f"DEBUG: Found header_text='{header_text}', importing domain blocks ...")
+ logger.debug(f"Found header_text='{header_text}', importing domain blocks ...")
for line in header.find_next("table").find_all("tr")[1:]:
- # DEBUG: print(f"DEBUG: line[]='{type(line)}'")
+ logger.debug(f"line[]='{type(line)}'")
blocklist[header_text].append({
"blocked": tidyup.domain(line.find_all("td")[0].text),
"reason" : tidyup.reason(line.find_all("td")[1].text),
})
else:
- print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}")
+ logger.warning(f"header_text='{header_text}' not found in blocklist()={len(blocklist)}")
- # DEBUG: print(f"DEBUG: Returning blocklist for domain='{domain}'")
+ logger.debug(f"Returning blocklist for domain='{domain}'")
return {
"reject" : blocklist["Suspended servers"],
"media_removal" : blocklist["Filtered media"],