From 874b573858431f6c56e0432cdb311282cbe0fadf Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Wed, 21 Jun 2023 08:09:23 +0200 Subject: [PATCH] Continued: - introduced real logger, all commented-out debug lines are now active --- fba/boot.py | 22 ++- fba/commands.py | 358 +++++++++++++++++++------------------- fba/csrf.py | 22 ++- fba/fba.py | 46 ++--- fba/helpers/blacklist.py | 13 +- fba/helpers/cache.py | 27 +-- fba/helpers/config.py | 10 +- fba/helpers/cookies.py | 23 ++- fba/helpers/dicts.py | 13 +- fba/helpers/locking.py | 22 ++- fba/helpers/tidyup.py | 24 +-- fba/helpers/version.py | 58 +++--- fba/http/federation.py | 305 ++++++++++++++++---------------- fba/http/network.py | 61 ++++--- fba/models/blocks.py | 25 +-- fba/models/error_log.py | 16 +- fba/models/instances.py | 132 +++++++------- fba/networks/friendica.py | 37 ++-- fba/networks/lemmy.py | 76 ++++---- fba/networks/mastodon.py | 133 +++++++------- fba/networks/misskey.py | 118 +++++++------ fba/networks/peertube.py | 37 ++-- fba/networks/pleroma.py | 299 +++++++++++++++---------------- 23 files changed, 991 insertions(+), 886 deletions(-) diff --git a/fba/boot.py b/fba/boot.py index ed80085..0e083b8 100644 --- a/fba/boot.py +++ b/fba/boot.py @@ -14,6 +14,8 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import logging + import argparse from fba import commands @@ -21,13 +23,17 @@ from fba import fba from fba.helpers import locking +logging.basicConfig(level=logging.WARNING) +logger = logging.getLogger(__name__) + +# Argument parser _PARSER = None def init_parser(): - # DEBUG: print("DEBUG: init_parser(): CALLED!") + logger.debug("init_parser(): CALLED!") global _PARSER - # DEBUG: print("DEBUG: Initializing parser ...") + logger.debug("Initializing parser ...") _PARSER = argparse.ArgumentParser( description="Fetches block reasons from the fediverse", epilog="Please note that some commands have optional arguments, you may want to try fba.py --help to find them out.", @@ -116,18 +122,18 @@ def init_parser(): ) parser.set_defaults(command=commands.fetch_observer) - # DEBUG: print("DEBUG: init_parser(): EXIT!") + logger.debug("init_parser(): EXIT!") def run_command(): - # DEBUG: print("DEBUG: run_command(): CALLED!") + logger.debug("run_command(): CALLED!") args = _PARSER.parse_args() - # DEBUG: print(f"DEBUG: args[{type(args)}]={args}") + logger.debug(f"args[{type(args)}]={args}") status = args.command(args) - # DEBUG: print("DEBUG: status={status} - EXIT!") + logger.debug("status={status} - EXIT!") return status if isinstance(status, int) else 0 def shutdown(): - # DEBUG: print("DEBUG: Closing database connection ...") + logger.debug("Closing database connection ...") fba.connection.close() locking.release() - # DEBUG: print("DEBUG: Shutdown completed.") + logger.debug("Shutdown completed.") diff --git a/fba/commands.py b/fba/commands.py index ae79e81..4bb5291 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -17,6 +17,7 @@ import csv import inspect import json +import logging import time import argparse @@ -46,43 +47,46 @@ from fba.networks import mastodon from fba.networks import misskey from fba.networks import pleroma +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + def check_instance(args: argparse.Namespace) -> int: - # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!") + logger.debug(f"args.domain='{args.domain}' - CALLED!") status = 0 if not validators.domain(args.domain): - print(f"WARNING: args.domain='{args.domain}' is not valid") + logger.warning(f"args.domain='{args.domain}' is not valid") status = 100 elif blacklist.is_blacklisted(args.domain): - print(f"WARNING: args.domain='{args.domain}' is blacklisted") + logger.warning(f"args.domain='{args.domain}' is blacklisted") status = 101 elif instances.is_registered(args.domain): - print(f"WARNING: args.domain='{args.domain}' is already registered") + logger.warning(f"args.domain='{args.domain}' is already registered") status = 102 else: - print(f"INFO: args.domain='{args.domain}' is not known") + logger.info(f"args.domain='{args.domain}' is not known") - # DEBUG: print(f"DEBUG: status={status} - EXIT!") + logger.debug(f"status={status} - EXIT!") return status def fetch_bkali(args: argparse.Namespace) -> int: - # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!") + logger.debug(f"args[]='{type(args)}' - CALLED!") domains = list() try: fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({ "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}" })) - # DEBUG: print(f"DEBUG: fetched[]='{type(fetched)}'") + logger.debug(f"fetched[]='{type(fetched)}'") if "error_message" in fetched: - print(f"WARNING: post_json_api() for 'gql.api.bka.li' returned error message: {fetched['error_message']}") + logger.warning(f"post_json_api() for 'gql.api.bka.li' returned error message: {fetched['error_message']}") return 100 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]: - print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}") + logger.warning(f"post_json_api() returned error: {fetched['error']['message']}") return 101 rows = fetched["json"] - # DEBUG: print(f"DEBUG: rows({len(rows)})[]='{type(rows)}'") + logger.debug(f"rows({len(rows)})[]='{type(rows)}'") if len(rows) == 0: raise Exception("WARNING: Returned no records") elif "data" not in rows: @@ -91,74 +95,74 @@ def fetch_bkali(args: argparse.Namespace) -> int: raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'") for entry in rows["data"]["nodeinfo"]: - # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'") + logger.debug(f"entry['{type(entry)}']='{entry}'") if "domain" not in entry: - print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!") + logger.warning(f"entry()={len(entry)} does not contain 'domain' - SKIPPED!") continue elif not validators.domain(entry["domain"]): - print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!") + logger.warning(f"domain='{entry['domain']}' is not a valid domain - SKIPPED!") continue elif blacklist.is_blacklisted(entry["domain"]): - # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!") + logger.debug(f"domain='{entry['domain']}' is blacklisted - SKIPPED!") continue elif instances.is_registered(entry["domain"]): - # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!") + logger.debug(f"domain='{entry['domain']}' is already registered - SKIPPED!") continue elif instances.is_recent(entry["domain"]): - # DEBUG: print(f"DEBUG: domain='{entry['domain']}' has been recently fetched - SKIPPED!") + logger.debug(f"domain='{entry['domain']}' has been recently fetched - SKIPPED!") continue - # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...") + logger.debug(f"Adding domain='{entry['domain']}' ...") domains.append(entry["domain"]) except network.exceptions as exception: - print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}' - EXIT!") + logger.error(f"Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}' - EXIT!") return 102 - # DEBUG: print(f"DEBUG: domains()={len(domains)}") + logger.debug(f"domains()={len(domains)}") if len(domains) > 0: locking.acquire() - print(f"INFO: Adding {len(domains)} new instances ...") + logger.info(f"Adding {len(domains)} new instances ...") for domain in domains: try: - print(f"INFO: Fetching instances from domain='{domain}' ...") + logger.info(f"Fetching instances from domain='{domain}' ...") federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name) - # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...") + logger.debug(f"Invoking cookies.clear({domain}) ...") cookies.clear(domain) except network.exceptions as exception: - print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_bkali) from domain='{domain}'") + logger.warning(f"Exception '{type(exception)}' during fetching instances (fetch_bkali) from domain='{domain}'") instances.set_last_error(domain, exception) - # DEBUG: print("DEBUG: Success - EXIT!") + logger.debug("Success - EXIT!") return 0 def fetch_blocks(args: argparse.Namespace): - # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!") + logger.debug(f"args[]='{type(args)}' - CALLED!") if args.domain is not None and args.domain != "": - # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...") + logger.debug(f"args.domain='{args.domain}' - checking ...") if not validators.domain(args.domain): - print(f"WARNING: domain='{args.domain}' is not valid.") + logger.warning(f"domain='{args.domain}' is not valid.") return elif blacklist.is_blacklisted(args.domain): - print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!") + logger.warning(f"domain='{args.domain}' is blacklisted, won't check it!") return elif not instances.is_registered(args.domain): - print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.") + logger.warning(f"domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.") return locking.acquire() if args.domain is not None and args.domain != "": # Re-check single domain - # DEBUG: print(f"DEBUG: Querying database for single args.domain='{args.domain}' ...") + logger.debug(f"Querying database for single args.domain='{args.domain}' ...") fba.cursor.execute( "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain] ) elif args.software is not None and args.software != "": # Re-check single software - # DEBUG: print(f"DEBUG: Querying database for args.software='{args.software}' ...") + logger.debug(f"Querying database for args.software='{args.software}' ...") fba.cursor.execute( "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software = ?", [args.software] ) @@ -169,37 +173,37 @@ def fetch_blocks(args: argparse.Namespace): ) rows = fba.cursor.fetchall() - print(f"INFO: Checking {len(rows)} entries ...") + logger.info(f"Checking {len(rows)} entries ...") for blocker, software, origin, nodeinfo_url in rows: - # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url) + logger.debug("BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url) blockdict = list() blocker = tidyup.domain(blocker) - # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software) + logger.debug("AFTER blocker,software:", blocker, software) if blocker == "": - print("WARNING: blocker is now empty!") + logger.warning("blocker is now empty!") continue elif nodeinfo_url is None or nodeinfo_url == "": - # DEBUG: print(f"DEBUG: blocker='{blocker}',software='{software}' has empty nodeinfo_url") + logger.debug(f"blocker='{blocker}',software='{software}' has empty nodeinfo_url") continue elif blacklist.is_blacklisted(blocker): - print(f"WARNING: blocker='{blocker}' is blacklisted now!") + logger.warning(f"blocker='{blocker}' is blacklisted now!") continue - # DEBUG: print(f"DEBUG: blocker='{blocker}'") + logger.debug(f"blocker='{blocker}'") instances.set_last_blocked(blocker) if software == "pleroma": - print(f"INFO: blocker='{blocker}',software='{software}'") + logger.info(f"blocker='{blocker}',software='{software}'") pleroma.fetch_blocks(blocker, origin, nodeinfo_url) elif software == "mastodon": - print(f"INFO: blocker='{blocker}',software='{software}'") + logger.info(f"blocker='{blocker}',software='{software}'") mastodon.fetch_blocks(blocker, origin, nodeinfo_url) elif software == "lemmy": - print(f"INFO: blocker='{blocker}',software='{software}'") + logger.info(f"blocker='{blocker}',software='{software}'") lemmy.fetch_blocks(blocker, origin, nodeinfo_url) elif software == "friendica" or software == "misskey": - print(f"INFO: blocker='{blocker}',software='{software}'") + logger.info(f"blocker='{blocker}',software='{software}'") blocking = list() if software == "friendica": @@ -207,36 +211,36 @@ def fetch_blocks(args: argparse.Namespace): elif software == "misskey": blocking = misskey.fetch_blocks(blocker) - print(f"INFO: Checking {len(blocking.items())} entries from blocker='{blocker}',software='{software}' ...") + logger.info(f"Checking {len(blocking.items())} entries from blocker='{blocker}',software='{software}' ...") for block_level, blocklist in blocking.items(): - # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist)) + logger.debug("blocker,block_level,blocklist():", blocker, block_level, len(blocklist)) block_level = tidyup.domain(block_level) - # DEBUG: print("DEBUG: AFTER-block_level:", block_level) + logger.debug("AFTER-block_level:", block_level) if block_level == "": - print("WARNING: block_level is empty, blocker:", blocker) + logger.warning("block_level is empty, blocker:", blocker) continue - # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") + logger.debug(f"Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") for block in blocklist: blocked, reason = block.values() - # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!") + logger.debug(f"blocked='{blocked}',reason='{reason}' - BEFORE!") blocked = tidyup.domain(blocked) reason = tidyup.reason(reason) if reason is not None and reason != "" else None - # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!") + logger.debug(f"blocked='{blocked}',reason='{reason}' - AFTER!") if blocked == "": - print("WARNING: blocked is empty:", blocker) + logger.warning("blocked is empty:", blocker) continue elif blacklist.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + logger.debug(f"blocked='{blocked}' is blacklisted - skipping!") continue elif blocked.count("*") > 0: # Some friendica servers also obscure domains without hash row = instances.deobscure("*", blocked) - # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + logger.debug(f"row[]='{type(row)}'") if row is None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!") + logger.warning(f"Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!") continue blocked = row[0] @@ -246,27 +250,27 @@ def fetch_blocks(args: argparse.Namespace): # Some obscure them with question marks, not sure if that's dependent on version or not row = instances.deobscure("?", blocked) - # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + logger.debug(f"row[]='{type(row)}'") if row is None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!") + logger.warning(f"Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!") continue blocked = row[0] origin = row[1] nodeinfo_url = row[2] - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + logger.debug("Looking up instance by domain:", blocked) if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - SKIPPED!") + logger.warning(f"blocked='{blocked}',software='{software}' is not a valid domain name - SKIPPED!") continue elif blocked.endswith(".arpa"): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is a reverse IP domain - SKIPPED!") + logger.debug(f"blocked='{blocked}' is a reverse IP domain - SKIPPED!") continue elif blocked.endswith(".tld"): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is a fake domain - SKIPPED!") + logger.debug(f"blocked='{blocked}' is a fake domain - SKIPPED!") continue elif not instances.is_registered(blocked): - # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) + logger.debug("Hash wasn't found, adding:", blocked, blocker) try: instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url) except network.exceptions as exception: @@ -282,32 +286,32 @@ def fetch_blocks(args: argparse.Namespace): "reason" : reason }) else: - # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...") + logger.debug(f"Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...") blocks.update_last_seen(blocker, blocked, block_level) blocks.update_reason(reason, blocker, blocked, block_level) - # DEBUG: print(f"DEBUG: Invoking cookies.clear({blocked}) ...") + logger.debug(f"Invoking cookies.clear({blocked}) ...") cookies.clear(blocked) - # DEBUG: print("DEBUG: Committing changes ...") + logger.debug("Committing changes ...") fba.connection.commit() else: - print("WARNING: Unknown software:", blocker, software) + logger.warning("Unknown software:", blocker, software) if instances.has_pending(blocker): - # DEBUG: print(f"DEBUG: Invoking instances.update_data({blocker}) ...") + logger.debug(f"Invoking instances.update_data({blocker}) ...") instances.update_data(blocker) if config.get("bot_enabled") and len(blockdict) > 0: network.send_bot_post(blocker, blockdict) - # DEBUG: print(f"DEBUG: Invoking cookies.clear({blocker}) ...") + logger.debug(f"Invoking cookies.clear({blocker}) ...") cookies.clear(blocker) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def fetch_observer(args: argparse.Namespace): - # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!") + logger.debug(f"args[]='{type(args)}' - CALLED!") types = [ "akoma", "birdsitelive", @@ -343,48 +347,48 @@ def fetch_observer(args: argparse.Namespace): locking.acquire() - print(f"INFO: Fetching {len(types)} different table data ...") + logger.info(f"Fetching {len(types)} different table data ...") for software in types: doc = None try: - # DEBUG: print(f"DEBUG: Fetching table data for software='{software}' ...") + logger.debug(f"Fetching table data for software='{software}' ...") raw = fba.fetch_url(f"https://fediverse.observer/app/views/tabledata.php?software={software}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text - # DEBUG: print(f"DEBUG: raw[{type(raw)}]()={len(raw)}") + logger.debug(f"raw[{type(raw)}]()={len(raw)}") doc = bs4.BeautifulSoup(raw, features='html.parser') - # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'") + logger.debug(f"doc[]='{type(doc)}'") except network.exceptions as exception: - print(f"WARNING: Cannot fetch software='{software}' from fediverse.observer: '{type(exception)}'") + logger.warning(f"Cannot fetch software='{software}' from fediverse.observer: '{type(exception)}'") continue items = doc.findAll("a", {"class": "url"}) - print(f"INFO: Checking {len(items)} items,software='{software}' ...") + logger.info(f"Checking {len(items)} items,software='{software}' ...") for item in items: - # DEBUG: print(f"DEBUG: item[]='{type(item)}'") + logger.debug(f"item[]='{type(item)}'") domain = item.decode_contents() - # DEBUG: print(f"DEBUG: domain='{domain}'") + logger.debug(f"domain='{domain}'") if not validators.domain(domain.split("/")[0]): - print(f"WARNING: domain='{domain}' is not a valid domain - SKIPPED!") + logger.warning(f"domain='{domain}' is not a valid domain - SKIPPED!") continue elif blacklist.is_blacklisted(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") + logger.debug(f"domain='{domain}' is blacklisted - SKIPPED!") continue elif instances.is_registered(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!") + logger.debug(f"domain='{domain}' is already registered - SKIPPED!") continue - print(f"INFO: Fetching instances for domain='{domain}',software='{software}'") + logger.info(f"Fetching instances for domain='{domain}',software='{software}'") federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name) - # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...") + logger.debug(f"Invoking cookies.clear({domain}) ...") cookies.clear(domain) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def fetch_cs(args: argparse.Namespace): - # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!") + logger.debug(f"args[]='{type(args)}' - CALLED!") extensions = [ "extra", "abbr", @@ -411,172 +415,172 @@ def fetch_cs(args: argparse.Namespace): } raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text - # DEBUG: print(f"DEBUG: raw()={len(raw)}[]='{type(raw)}'") + logger.debug(f"raw()={len(raw)}[]='{type(raw)}'") doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser') - # DEBUG: print(f"DEBUG: doc()={len(doc)}[]='{type(doc)}'") + logger.debug(f"doc()={len(doc)}[]='{type(doc)}'") silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody") - # DEBUG: print(f"DEBUG: silenced[]='{type(silenced)}'") + logger.debug(f"silenced[]='{type(silenced)}'") domains["silenced"] = domains["silenced"] + federation.find_domains(silenced) blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody") - # DEBUG: print(f"DEBUG: blocked[]='{type(blocked)}'") + logger.debug(f"blocked[]='{type(blocked)}'") domains["reject"] = domains["reject"] + federation.find_domains(blocked) - # DEBUG: print(f"DEBUG: domains()={len(domains)}") + logger.debug(f"domains()={len(domains)}") if len(domains) > 0: locking.acquire() - print(f"INFO: Adding {len(domains)} new instances ...") + logger.info(f"Adding {len(domains)} new instances ...") for block_level in domains: - # DEBUG: print(f"DEBUG: block_level='{block_level}'") + logger.debug(f"block_level='{block_level}'") for row in domains[block_level]: - # DEBUG: print(f"DEBUG: row='{row}'") + logger.debug(f"row='{row}'") if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level): - # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...") + logger.debug(f"domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...") blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level) if not instances.is_registered(row["domain"]): try: - print(f"INFO: Fetching instances from domain='{row['domain']}' ...") + logger.info(f"Fetching instances from domain='{row['domain']}' ...") federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name) - # DEBUG: print(f"DEBUG: Invoking cookies.clear({row['domain']}) ...") + logger.debug(f"Invoking cookies.clear({row['domain']}) ...") cookies.clear(row["domain"]) except network.exceptions as exception: - print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_cs) from domain='{row['domain']}'") + logger.warning(f"Exception '{type(exception)}' during fetching instances (fetch_cs) from domain='{row['domain']}'") instances.set_last_error(row["domain"], exception) - # DEBUG: print("DEBUG: Committing changes ...") + logger.debug("Committing changes ...") fba.connection.commit() - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def fetch_fba_rss(args: argparse.Namespace): - # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!") + logger.debug(f"args[]='{type(args)}' - CALLED!") domains = list() - print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...") + logger.info(f"Fetch FBA-specific RSS args.feed='{args.feed}' ...") response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) - # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}") + logger.debug(f"response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}") if response.ok and response.status_code < 300 and len(response.text) > 0: - # DEBUG: print(f"DEBUG: Parsing RSS feed ({len(response.text)} Bytes) ...") + logger.debug(f"Parsing RSS feed ({len(response.text)} Bytes) ...") rss = atoma.parse_rss_bytes(response.content) - # DEBUG: print(f"DEBUG: rss[]='{type(rss)}'") + logger.debug(f"rss[]='{type(rss)}'") for item in rss.items: - # DEBUG: print(f"DEBUG: item={item}") + logger.debug(f"item={item}") domain = item.link.split("=")[1] if blacklist.is_blacklisted(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") + logger.debug(f"domain='{domain}' is blacklisted - SKIPPED!") continue elif domain in domains: - # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!") + logger.debug(f"domain='{domain}' is already added - SKIPPED!") continue elif instances.is_registered(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!") + logger.debug(f"domain='{domain}' is already registered - SKIPPED!") continue - # DEBUG: print(f"DEBUG: Adding domain='{domain}'") + logger.debug(f"Adding domain='{domain}'") domains.append(domain) - # DEBUG: print(f"DEBUG: domains()={len(domains)}") + logger.debug(f"domains()={len(domains)}") if len(domains) > 0: locking.acquire() - print(f"INFO: Adding {len(domains)} new instances ...") + logger.info(f"Adding {len(domains)} new instances ...") for domain in domains: try: - print(f"INFO: Fetching instances from domain='{domain}' ...") + logger.info(f"Fetching instances from domain='{domain}' ...") federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name) - # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...") + logger.debug(f"Invoking cookies.clear({domain}) ...") cookies.clear(domain) except network.exceptions as exception: - print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_fba_rss) from domain='{domain}'") + logger.warning(f"Exception '{type(exception)}' during fetching instances (fetch_fba_rss) from domain='{domain}'") instances.set_last_error(domain, exception) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def fetch_fbabot_atom(args: argparse.Namespace): - # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!") + logger.debug(f"args[]='{type(args)}' - CALLED!") feed = "https://ryona.agency/users/fba/feed.atom" domains = list() - print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...") + logger.info(f"Fetching ATOM feed='{feed}' from FBA bot account ...") response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) - # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}") + logger.debug(f"response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}") if response.ok and response.status_code < 300 and len(response.text) > 0: - # DEBUG: print(f"DEBUG: Parsing ATOM feed ({len(response.text)} Bytes) ...") + logger.debug(f"Parsing ATOM feed ({len(response.text)} Bytes) ...") atom = atoma.parse_atom_bytes(response.content) - # DEBUG: print(f"DEBUG: atom[]='{type(atom)}'") + logger.debug(f"atom[]='{type(atom)}'") for entry in atom.entries: - # DEBUG: print(f"DEBUG: entry[]='{type(entry)}'") + logger.debug(f"entry[]='{type(entry)}'") doc = bs4.BeautifulSoup(entry.content.value, "html.parser") - # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'") + logger.debug(f"doc[]='{type(doc)}'") for element in doc.findAll("a"): for href in element["href"].split(","): - # DEBUG: print(f"DEBUG: href[{type(href)}]={href}") + logger.debug(f"href[{type(href)}]={href}") domain = tidyup.domain(href) - # DEBUG: print(f"DEBUG: domain='{domain}'") + logger.debug(f"domain='{domain}'") if blacklist.is_blacklisted(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") + logger.debug(f"domain='{domain}' is blacklisted - SKIPPED!") continue elif domain in domains: - # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!") + logger.debug(f"domain='{domain}' is already added - SKIPPED!") continue elif instances.is_registered(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!") + logger.debug(f"domain='{domain}' is already registered - SKIPPED!") continue - # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}") + logger.debug(f"Adding domain='{domain}',domains()={len(domains)}") domains.append(domain) - # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}") + logger.debug(f"domains({len(domains)})={domains}") if len(domains) > 0: locking.acquire() - print(f"INFO: Adding {len(domains)} new instances ...") + logger.info(f"Adding {len(domains)} new instances ...") for domain in domains: try: - print(f"INFO: Fetching instances from domain='{domain}' ...") + logger.info(f"Fetching instances from domain='{domain}' ...") federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name) - # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...") + logger.debug(f"Invoking cookies.clear({domain}) ...") cookies.clear(domain) except network.exceptions as exception: - print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_fbabot_atom) from domain='{domain}'") + logger.warning(f"Exception '{type(exception)}' during fetching instances (fetch_fbabot_atom) from domain='{domain}'") instances.set_last_error(domain, exception) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def fetch_instances(args: argparse.Namespace) -> int: - # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!") + logger.debug(f"args[]='{type(args)}' - CALLED!") locking.acquire() # Initial fetch try: - print(f"INFO: Fetching instances from args.domain='{args.domain}' ...") + logger.info(f"Fetching instances from args.domain='{args.domain}' ...") federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name) - # DEBUG: print(f"DEBUG: Invoking cookies.clear({args.domain}) ...") + logger.debug(f"Invoking cookies.clear({args.domain}) ...") cookies.clear(args.domain) except network.exceptions as exception: - print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_instances) from args.domain='{args.domain}'") + logger.warning(f"Exception '{type(exception)}' during fetching instances (fetch_instances) from args.domain='{args.domain}'") instances.set_last_error(args.domain, exception) return 100 if args.single: - # DEBUG: print("DEBUG: Not fetching more instances - EXIT!") + logger.debug("Not fetching more instances - EXIT!") return 0 # Loop through some instances @@ -585,28 +589,28 @@ def fetch_instances(args: argparse.Namespace) -> int: ) rows = fba.cursor.fetchall() - print(f"INFO: Checking {len(rows)} entries ...") + logger.info(f"Checking {len(rows)} entries ...") for row in rows: - # DEBUG: print(f"DEBUG: domain='{row[0]}'") + logger.debug(f"domain='{row[0]}'") if blacklist.is_blacklisted(row[0]): - print("WARNING: domain is blacklisted:", row[0]) + logger.warning("domain is blacklisted:", row[0]) continue try: - print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'") + logger.info(f"Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'") federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3]) - # DEBUG: print(f"DEBUG: Invoking cookies.clear({row[0]}) ...") + logger.debug(f"Invoking cookies.clear({row[0]}) ...") cookies.clear(row[0]) except network.exceptions as exception: - print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_instances) from domain='{row[0]}'") + logger.warning(f"Exception '{type(exception)}' during fetching instances (fetch_instances) from domain='{row[0]}'") instances.set_last_error(row[0], exception) - # DEBUG: print("DEBUG: Success - EXIT!") + logger.debug("Success - EXIT!") return 0 def fetch_oliphant(args: argparse.Namespace): - # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!") + logger.debug(f"args[]='{type(args)}' - CALLED!") locking.acquire() # Base URL @@ -657,22 +661,22 @@ def fetch_oliphant(args: argparse.Namespace): for block in blocklists: # Is domain given and not equal blocker? if isinstance(args.domain, str) and args.domain != block["blocker"]: - # DEBUG: print(f"DEBUG: Skipping blocker='{block['blocker']}', not matching args.domain='{args.domain}'") + logger.debug(f"Skipping blocker='{block['blocker']}', not matching args.domain='{args.domain}'") continue elif args.domain in domains: - # DEBUG: print(f"DEBUG: args.domain='{args.domain}' already handled - SKIPPED!") + logger.debug(f"args.domain='{args.domain}' already handled - SKIPPED!") continue # Fetch this URL - print(f"INFO: Fetching csv_url='{block['csv_url']}' for blocker='{block['blocker']}' ...") + logger.info(f"Fetching csv_url='{block['csv_url']}' for blocker='{block['blocker']}' ...") response = fba.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) - # DEBUG: print(f"DEBUG: response[]='{type(response)}'") + logger.debug(f"response[]='{type(response)}'") if response.ok and response.content != "": - # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...") + logger.debug(f"Fetched {len(response.content)} Bytes, parsing CSV ...") reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix") - # DEBUG: print(f"DEBUG: reader[]='{type(reader)}'") + logger.debug(f"reader[]='{type(reader)}'") for row in reader: domain = None if "#domain" in row: @@ -680,34 +684,34 @@ def fetch_oliphant(args: argparse.Namespace): elif "domain" in row: domain = row["domain"] else: - # DEBUG: print(f"DEBUG: row='{row}' does not contain domain column") + logger.debug(f"row='{row}' does not contain domain column") continue if not validators.domain(domain): - print(f"WARNING: domain='{domain}' is not a valid domain name - SKIPPED!") + logger.warning(f"domain='{domain}' is not a valid domain name - SKIPPED!") continue elif domain.endswith(".arpa"): - # DEBUG: print(f"DEBUG: domain='{domain}' is a reverse IP domain - SKIPPED!") + logger.debug(f"domain='{domain}' is a reverse IP domain - SKIPPED!") continue elif domain.endswith(".tld"): - # DEBUG: print(f"DEBUG: domain='{domain}' is a fake domain - SKIPPED!") + logger.debug(f"domain='{domain}' is a fake domain - SKIPPED!") continue elif blacklist.is_blacklisted(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") + logger.debug(f"domain='{domain}' is blacklisted - SKIPPED!") continue - # DEBUG: print(f"DEBUG: Marking domain='{domain}' as handled") + logger.debug(f"Marking domain='{domain}' as handled") domains.append(domain) - # DEBUG: print(f"DEBUG: Processing domain='{domain}' ...") + logger.debug(f"Processing domain='{domain}' ...") processed = fba.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name) - # DEBUG: print(f"DEBUG: processed='{processed}'") + logger.debug(f"processed='{processed}'") - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def fetch_txt(args: argparse.Namespace): - # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!") + logger.debug(f"args[]='{type(args)}' - CALLED!") locking.acquire() # Static URLs @@ -715,41 +719,41 @@ def fetch_txt(args: argparse.Namespace): "https://seirdy.one/pb/bsl.txt", ) - print(f"INFO: Checking {len(urls)} text file(s) ...") + logger.info(f"Checking {len(urls)} text file(s) ...") for url in urls: - # DEBUG: print(f"DEBUG: Fetching url='{url}' ...") + logger.debug(f"Fetching url='{url}' ...") response = fba.fetch_url(url, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) - # DEBUG: print(f"DEBUG: response[]='{type(response)}'") + logger.debug(f"response[]='{type(response)}'") if response.ok and response.text != "": - # DEBUG: print(f"DEBUG: Returned {len(response.text.strip())} Bytes for processing") + logger.debug(f"Returned {len(response.text.strip())} Bytes for processing") domains = response.text.split("\n") - print(f"INFO: Processing {len(domains)} domains ...") + logger.info(f"Processing {len(domains)} domains ...") for domain in domains: - # DEBUG: print(f"DEBUG: domain='{domain}'") + logger.debug(f"domain='{domain}'") if domain == "": - # DEBUG: print("DEBUG: domain is empty - SKIPPED!") + logger.debug("domain is empty - SKIPPED!") continue elif not validators.domain(domain): - print(f"WARNING: domain='{domain}' is not a valid domain name - SKIPPED!") + logger.warning(f"domain='{domain}' is not a valid domain name - SKIPPED!") continue elif domain.endswith(".arpa"): - # DEBUG: print(f"DEBUG: domain='{domain}' is a reverse IP domain - SKIPPED!") + logger.debug(f"domain='{domain}' is a reverse IP domain - SKIPPED!") continue elif domain.endswith(".tld"): - # DEBUG: print(f"DEBUG: domain='{domain}' is a fake domain - SKIPPED!") + logger.debug(f"domain='{domain}' is a fake domain - SKIPPED!") continue elif blacklist.is_blacklisted(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") + logger.debug(f"domain='{domain}' is blacklisted - SKIPPED!") continue - # DEBUG: print(f"DEBUG: domain='{domain}'") + logger.debug(f"domain='{domain}'") processed = fba.process_domain(domain, 'seirdy.one', inspect.currentframe().f_code.co_name) - # DEBUG: print(f"DEBUG: processed='{processed}'") + logger.debug(f"processed='{processed}'") if not processed: - # DEBUG: print(f"DEBUG: domain='{domain}' was not generically processed - SKIPPED!") + logger.debug(f"domain='{domain}' was not generically processed - SKIPPED!") continue - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") diff --git a/fba/csrf.py b/fba/csrf.py index b0cd258..69510b5 100644 --- a/fba/csrf.py +++ b/fba/csrf.py @@ -14,18 +14,22 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import logging + import bs4 import reqto import validators - from fba.helpers import config from fba.helpers import cookies from fba.http import network +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + def determine(domain: str, headers: dict) -> dict: - # DEBUG: print(f"DEBUG: domain='{domain}',headers()={len(headers)} - CALLED!") + logger.debug(f"domain='{domain}',headers()={len(headers)} - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -45,17 +49,17 @@ def determine(domain: str, headers: dict) -> dict: reqheaders = headers # Fetch / to check for meta tag indicating csrf - # DEBUG: print(f"DEBUG: Fetching / from domain='{domain}' for CSRF check ...") + logger.debug(f"Fetching / from domain='{domain}' for CSRF check ...") response = reqto.get( f"https://{domain}/", headers=network.web_headers, timeout=(config.get("connection_timeout"), config.get("read_timeout")) ) - # DEBUG: print(f"DEBUG: response.ok='{response.ok}',response.status_code={response.status_code},response.text()={len(response.text)}") + logger.debug(f"response.ok='{response.ok}',response.status_code={response.status_code},response.text()={len(response.text)}") if response.ok and response.status_code < 300 and response.text.find(" 0: # Save cookies - # DEBUG: print(f"DEBUG: Parsing response.text()={len(response.text)} Bytes ...") + logger.debug(f"Parsing response.text()={len(response.text)} Bytes ...") cookies.store(domain, response.cookies.get_dict()) # Parse text @@ -63,13 +67,13 @@ def determine(domain: str, headers: dict) -> dict: response.text, "html.parser" ) - # DEBUG: print(f"DEBUG: meta[]='{type(meta)}'") + logger.debug(f"meta[]='{type(meta)}'") tag = meta.find("meta", attrs={"name": "csrf-token"}) - # DEBUG: print(f"DEBUG: tag={tag}") + logger.debug(f"tag={tag}") if tag is not None: - # DEBUG: print(f"DEBUG: Adding CSRF token='{tag['content']}' for domain='{domain}'") + logger.debug(f"Adding CSRF token='{tag['content']}' for domain='{domain}'") reqheaders["X-CSRF-Token"] = tag["content"] - # DEBUG: print(f"DEBUG: reqheaders()={len(reqheaders)} - EXIT!") + logger.debug(f"reqheaders()={len(reqheaders)} - EXIT!") return reqheaders diff --git a/fba/fba.py b/fba/fba.py index b31a99d..a4bd61d 100644 --- a/fba/fba.py +++ b/fba/fba.py @@ -14,6 +14,7 @@ # along with this program. If not, see . import hashlib +import logging import sqlite3 from urllib.parse import urlparse @@ -29,6 +30,9 @@ from fba.http import network from fba.models import instances +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + # Connect to database connection = sqlite3.connect("blocks.db") cursor = connection.cursor() @@ -36,11 +40,11 @@ cursor = connection.cursor() ##### Other functions ##### def is_primitive(var: any) -> bool: - # DEBUG: print(f"DEBUG: var[]='{type(var)}' - CALLED!") + logger.debug(f"var[]='{type(var)}' - CALLED!") return type(var) in {int, str, float, bool} or var is None def get_hash(domain: str) -> str: - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + logger.debug(f"domain='{domain}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -57,7 +61,7 @@ def get_hash(domain: str) -> str: return hashlib.sha256(domain.encode("utf-8")).hexdigest() def fetch_url(url: str, headers: dict, timeout: tuple) -> requests.models.Response: - # DEBUG: print(f"DEBUG: url='{url}',headers()={len(headers)},timeout={timeout} - CALLED!") + logger.debug(f"url='{url}',headers()={len(headers)},timeout={timeout} - CALLED!") if not isinstance(url, str): raise ValueError(f"Parameter url[]='{type(url)}' is not 'str'") elif url == "": @@ -67,21 +71,21 @@ def fetch_url(url: str, headers: dict, timeout: tuple) -> requests.models.Respon elif not isinstance(timeout, tuple): raise ValueError(f"Parameter timeout[]='{type(timeout)}' is not 'tuple'") - # DEBUG: print(f"DEBUG: Parsing url='{url}'") + logger.debug(f"Parsing url='{url}'") components = urlparse(url) # Invoke other function, avoid trailing ? - # DEBUG: print(f"DEBUG: components[{type(components)}]={components}") + logger.debug(f"components[{type(components)}]={components}") if components.query != "": response = network.fetch_response(components.netloc, f"{components.path}?{components.query}", headers, timeout) else: response = network.fetch_response(components.netloc, components.path if isinstance(components.path, str) and components.path != '' else '/', headers, timeout) - # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!") + logger.debug(f"response[]='{type(response)}' - EXXIT!") return response def process_domain(domain: str, blocker: str, command: str) -> bool: - # DEBUG: print(f"DEBUG: domain='{domain}',blocker='{blocker}',command='{command}' - CALLED!") + logger.debug(f"domain='{domain}',blocker='{blocker}',command='{command}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -107,49 +111,49 @@ def process_domain(domain: str, blocker: str, command: str) -> bool: # Try to de-obscure it row = instances.deobscure("*", domain) - # DEBUG: print(f"DEBUG: row[{type(row)}]='{row}'") + logger.debug(f"row[{type(row)}]='{row}'") if row is None: - print(f"WARNING: Cannot de-obfucate domain='{domain}' - SKIPPED!") + logger.warning(f"Cannot de-obfucate domain='{domain}' - SKIPPED!") return False - # DEBUG: print(f"DEBUG: domain='{domain}' de-obscured to '{row[0]}'") + logger.debug(f"domain='{domain}' de-obscured to '{row[0]}'") domain = row[0] elif domain.find("?") > 0: # Try to de-obscure it row = instances.deobscure("?", domain) - # DEBUG: print(f"DEBUG: row[{type(row)}]='{row}'") + logger.debug(f"row[{type(row)}]='{row}'") if row is None: - print(f"WARNING: Cannot de-obfucate domain='{domain}' - SKIPPED!") + logger.warning(f"Cannot de-obfucate domain='{domain}' - SKIPPED!") return False - # DEBUG: print(f"DEBUG: domain='{domain}' de-obscured to '{row[0]}'") + logger.debug(f"domain='{domain}' de-obscured to '{row[0]}'") domain = row[0] if not validators.domain(domain.split("/")[0]): - print(f"WARNING: domain='{domain}' is not a valid domain - SKIPPED!") + logger.warning(f"domain='{domain}' is not a valid domain - SKIPPED!") return False elif domain.endswith(".arpa"): - print(f"WARNING: domain='{domain}' is a reversed .arpa domain and should not be used generally.") + logger.warning(f"domain='{domain}' is a reversed .arpa domain and should not be used generally.") return False elif blacklist.is_blacklisted(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") + logger.debug(f"domain='{domain}' is blacklisted - SKIPPED!") return False elif instances.is_recent(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' has been recently checked - SKIPPED!") + logger.debug(f"domain='{domain}' has been recently checked - SKIPPED!") return False processed = False try: - print(f"INFO: Fetching instances for instane='{domain}',blocker='{blocker}',command='{command}' ...") + logger.info(f"Fetching instances for instane='{domain}',blocker='{blocker}',command='{command}' ...") federation.fetch_instances(domain, blocker, None, command) processed = True - # DEBUG: print(f"DEBUG: Invoking cookies.clear({domain}) ...") + logger.debug(f"Invoking cookies.clear({domain}) ...") cookies.clear(domain) except network.exceptions as exception: - print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_oliphant) from domain='{domain}'") + logger.warning(f"Exception '{type(exception)}' during fetching instances (fetch_oliphant) from domain='{domain}'") instances.set_last_error(domain, exception) - # DEBUG: print(f"DEBUG: processed='{processed}' - EXIT!") + logger.debug(f"processed='{processed}' - EXIT!") return processed diff --git a/fba/helpers/blacklist.py b/fba/helpers/blacklist.py index 3961d7d..6d6a6a1 100644 --- a/fba/helpers/blacklist.py +++ b/fba/helpers/blacklist.py @@ -14,8 +14,13 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import logging + import validators +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + # Don't check these, known trolls/flooders/testing/developing blacklist = [ # Floods federation with fake nodes as "research" project @@ -37,7 +42,7 @@ blacklist = [ ] def is_blacklisted(domain: str) -> bool: - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + logger.debug(f"domain='{domain}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -53,10 +58,10 @@ def is_blacklisted(domain: str) -> bool: blacklisted = False for peer in blacklist: - # DEBUG: print(f"DEBUG: Checking peer='{peer}' ...") + logger.debug(f"Checking peer='{peer}' ...") if peer in domain: - # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted.") + logger.debug(f"domain='{domain}' is blacklisted.") blacklisted = True - # DEBUG: print(f"DEBUG: blacklisted='{blacklisted}' - EXIT!") + logger.debug(f"blacklisted='{blacklisted}' - EXIT!") return blacklisted diff --git a/fba/helpers/cache.py b/fba/helpers/cache.py index cfc5278..97d5a7d 100644 --- a/fba/helpers/cache.py +++ b/fba/helpers/cache.py @@ -14,6 +14,11 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import logging + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + # Cache for redundant SQL queries _cache = {} @@ -23,25 +28,25 @@ def key_exists(key: str) -> bool: return key in _cache def set_all(key: str, rows: list, value: any): - # DEBUG: print(f"DEBUG: key='{key}',rows()={len(rows)},value[]='{type(value)}' - CALLED!") + logger.debug(f"key='{key}',rows()={len(rows)},value[]='{type(value)}' - CALLED!") if not isinstance(key, str): raise ValueError("Parameter key[]='{type(key)}' is not 'str'") elif not key_exists(key): - # DEBUG: print(f"DEBUG: Cache for key='{key}' not initialized.") + logger.debug(f"Cache for key='{key}' not initialized.") _cache[key] = {} for sub in rows: - # DEBUG: print(f"DEBUG: Setting key='{key}',sub[{type(sub)}]='{sub}'") + logger.debug(f"Setting key='{key}',sub[{type(sub)}]='{sub}'") if isinstance(sub, tuple): - # DEBUG: print(f"DEBUG: Setting key='{key}',sub[{type(sub)}]='{sub}',value[]='{type(value)}'") + logger.debug(f"Setting key='{key}',sub[{type(sub)}]='{sub}',value[]='{type(value)}'") _cache[key][sub[0]] = value else: - print(f"WARNING: Unsupported type sub[]='{type(sub)}'") + logger.warning(f"Unsupported type sub[]='{type(sub)}'") - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def set_sub_key(key: str, sub: str, value: any): - # DEBUG: print(f"DEBUG: key='{key}',sub='{sub}',value[]='{type(value)}' - CALLED!") + logger.debug(f"key='{key}',sub='{sub}',value[]='{type(value)}' - CALLED!") if not isinstance(key, str): raise ValueError("Parameter key[]='{type(key)}' is not 'str'") elif not isinstance(sub, str): @@ -49,13 +54,13 @@ def set_sub_key(key: str, sub: str, value: any): elif not key_exists(key): raise Exception(f"Cache for key='{key}' is not initialized, but function invoked") - # DEBUG: print(f"DEBUG: Setting key='{key}',sub='{sub}',value[]='{type(value)}' ...") + logger.debug(f"Setting key='{key}',sub='{sub}',value[]='{type(value)}' ...") _cache[key][sub] = value - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def sub_key_exists(key: str, sub: str) -> bool: - # DEBUG: print(f"DEBUG: key='{key}',sub='{sub}' - CALLED!") + logger.debug(f"key='{key}',sub='{sub}' - CALLED!") if not isinstance(key, str): raise ValueError("Parameter key[]='{type(key)}' is not 'str'") elif not isinstance(sub, str): @@ -65,5 +70,5 @@ def sub_key_exists(key: str, sub: str) -> bool: exists = sub in _cache[key] - # DEBUG: print(f"DEBUG: exists='{exists}' - EXIT!") + logger.debug(f"exists='{exists}' - EXIT!") return exists diff --git a/fba/helpers/config.py b/fba/helpers/config.py index 1ae1a25..3f199ba 100644 --- a/fba/helpers/config.py +++ b/fba/helpers/config.py @@ -14,14 +14,18 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import logging import json +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + with open("config.json") as f: - # DEBUG: print("DEBUG: Loading configuration file ...") + logger.debug("Loading configuration file ...") _config = json.loads(f.read()) def get(key: str) -> any: - # DEBUG: print(f"DEBUG: key[{type(key)}]={key} - CALLED!") + logger.debug(f"key[{type(key)}]={key} - CALLED!") if not isinstance(key, str): raise ValueError(f"Parameter key[]='{type(key)}' is not 'str'") elif key == "": @@ -29,5 +33,5 @@ def get(key: str) -> any: elif not key in _config: raise KeyError(f"key='{key}' does not exist in _config array") - # DEBUG: print(f"DEBUG: _config[{key}]={_config[key]} - EXIT!") + logger.debug(f"_config[{key}]={_config[key]} - EXIT!") return _config[key] diff --git a/fba/helpers/cookies.py b/fba/helpers/cookies.py index 09d8034..a42ce2f 100644 --- a/fba/helpers/cookies.py +++ b/fba/helpers/cookies.py @@ -14,13 +14,18 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import logging + import validators +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + # Cookies stor _cookies = {} def store (domain: str, cookies: dict): - # DEBUG: print(f"DEBUG: domain='{domain}',cookies()={len(cookies)} - CALLED!") + logger.debug(f"domain='{domain}',cookies()={len(cookies)} - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -38,10 +43,10 @@ def store (domain: str, cookies: dict): _cookies[domain] = cookies - # DEBUG: print(f"DEBUG: EXIT!") + logger.debug(f"EXIT!") def get_all(domain: str) -> dict: - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + logger.debug(f"domain='{domain}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -57,11 +62,11 @@ def get_all(domain: str) -> dict: elif not has(domain): raise Exception(f"domain='{domain}' has no cookies stored, maybe invoke store() first?") - # DEBUG: print(f"DEBUG: _cookies[{domain}]()={len(_cookies[domain])} - EXIT!") + logger.debug(f"_cookies[{domain}]()={len(_cookies[domain])} - EXIT!") return _cookies[domain] def has (domain: str) -> bool: - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + logger.debug(f"domain='{domain}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -71,11 +76,11 @@ def has (domain: str) -> bool: has_cookies = domain in _cookies - # DEBUG: print(f"DEBUG: has_cookies='{has_cookies}' - EXIT!") + logger.debug(f"has_cookies='{has_cookies}' - EXIT!") return has_cookies def clear (domain: str): - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + logger.debug(f"domain='{domain}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -90,7 +95,7 @@ def clear (domain: str): raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!") if has(domain): - # DEBUG: print(f"DEBUG: Removing cookies for domain='{domain}' ...") + logger.debug(f"Removing cookies for domain='{domain}' ...") del _cookies[domain] - # DEBUG: print(f"DEBUG: EXIT!") + logger.debug(f"EXIT!") diff --git a/fba/helpers/dicts.py b/fba/helpers/dicts.py index 6820913..3ab7440 100644 --- a/fba/helpers/dicts.py +++ b/fba/helpers/dicts.py @@ -13,8 +13,13 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import logging + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + def has_key(lists: list, key: str, value: any) -> bool: - # DEBUG: print(f"DEBUG: lists()={len(lists)},key='{key}',value[]='{type(value)}' - CALLED!") + logger.debug(f"lists()={len(lists)},key='{key}',value[]='{type(value)}' - CALLED!") if not isinstance(lists, list): raise ValueError(f"Parameter lists[]='{type(lists)}' is not 'list'") elif not isinstance(key, str): @@ -23,9 +28,9 @@ def has_key(lists: list, key: str, value: any) -> bool: raise ValueError("Parameter 'key' is empty") has = False - # DEBUG: print(f"DEBUG: Checking lists()={len(lists)} ...") + logger.debug(f"Checking lists()={len(lists)} ...") for row in lists: - # DEBUG: print(f"DEBUG: row['{type(row)}']={row}") + logger.debug(f"row['{type(row)}']={row}") if not isinstance(row, dict): raise ValueError(f"row[]='{type(row)}' is not 'dict'") elif not key in row: @@ -34,5 +39,5 @@ def has_key(lists: list, key: str, value: any) -> bool: has = True break - # DEBUG: print(f"DEBUG: has={has} - EXIT!") + logger.debug(f"has={has} - EXIT!") return has diff --git a/fba/helpers/locking.py b/fba/helpers/locking.py index 27f5eac..269b1d4 100644 --- a/fba/helpers/locking.py +++ b/fba/helpers/locking.py @@ -14,36 +14,40 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import logging import os import sys import tempfile import zc.lockfile +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + # Lock file lockfile = tempfile.gettempdir() + '/fba.lock' LOCK = None def acquire(): global LOCK - # DEBUG: print("DEBUG: CALLED!") + logger.debug("CALLED!") try: - # DEBUG: print(f"DEBUG: Acquiring lock: '{lockfile}'") + logger.debug(f"Acquiring lock: '{lockfile}'") LOCK = zc.lockfile.LockFile(lockfile) - # DEBUG: print("DEBUG: Lock obtained.") + logger.debug("Lock obtained.") except zc.lockfile.LockError: - print(f"ERROR: Cannot aquire lock: '{lockfile}'") + logger.error(f"Cannot aquire lock: '{lockfile}'") sys.exit(100) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def release(): - # DEBUG: print("DEBUG: CALLED!") + logger.debug("CALLED!") if LOCK is not None: - # DEBUG: print("DEBUG: Releasing lock ...") + logger.debug("Releasing lock ...") LOCK.close() - # DEBUG: print(f"DEBUG: Deleting lockfile='{lockfile}' ...") + logger.debug(f"Deleting lockfile='{lockfile}' ...") os.remove(lockfile) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") diff --git a/fba/helpers/tidyup.py b/fba/helpers/tidyup.py index cdbfa40..f09c282 100644 --- a/fba/helpers/tidyup.py +++ b/fba/helpers/tidyup.py @@ -13,52 +13,56 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import logging import re +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + def reason(string: str) -> str: - # DEBUG: print(f"DEBUG: string='{string}' - CALLED!") + logger.debug(f"string='{string}' - CALLED!") if not isinstance(string, str): raise ValueError(f"Parameter string[]='{type(string)}' is not 'str'") # Strip string string = string.strip() - # DEBUG: print(f"DEBUG: string='{string}' - EXIT!") + logger.debug(f"string='{string}' - EXIT!") return string def domain(string: str) -> str: - # DEBUG: print(f"DEBUG: string='{string}' - CALLED!") + logger.debug(f"string='{string}' - CALLED!") if not isinstance(string, str): raise ValueError(f"Parameter string[]='{type(string)}' is not 'str'") # All lower-case and strip spaces out + last dot string = string.lower().strip().rstrip(".") - # DEBUG: print(f"DEBUG: string='{string}' - #1") + logger.debug(f"string='{string}' - #1") # No port number string = re.sub("\:\d+$", "", string) - # DEBUG: print(f"DEBUG: string='{string}' - #2") + logger.debug(f"string='{string}' - #2") # No protocol, sometimes without the slashes string = re.sub("^https?\:(\/*)", "", string) - # DEBUG: print(f"DEBUG: string='{string}' - #3") + logger.debug(f"string='{string}' - #3") # No trailing slash string = re.sub("\/$", "", string) - # DEBUG: print(f"DEBUG: string='{string}' - #4") + logger.debug(f"string='{string}' - #4") # No @ or : sign string = re.sub("^\@", "", string) string = string.split(":")[0] - # DEBUG: print(f"DEBUG: string='{string}' - #4") + logger.debug(f"string='{string}' - #4") # No individual users in block lists string = re.sub("(.+)\@", "", string) - # DEBUG: print(f"DEBUG: string='{string}' - #5") + logger.debug(f"string='{string}' - #5") if string.find("/profile/"): string = string.split("/profile/")[0] elif string.find("/users/"): string = string.split("/users/")[0] - # DEBUG: print(f"DEBUG: string='{string}' - EXIT!") + logger.debug(f"string='{string}' - EXIT!") return string diff --git a/fba/helpers/version.py b/fba/helpers/version.py index 8cdb362..ab46d80 100644 --- a/fba/helpers/version.py +++ b/fba/helpers/version.py @@ -13,8 +13,12 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import logging import re +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + # Pattern instance for version numbers patterns = [ # semantic version number (with v|V) prefix) @@ -28,9 +32,9 @@ patterns = [ ] def remove(software: str) -> str: - # DEBUG: print(f"DEBUG: software='{software}' - CALLED!") + logger.debug(f"software='{software}' - CALLED!") if "." not in software and " " not in software: - print(f"WARNING: software='{software}' does not contain a version number.") + logger.warning(f"software='{software}' does not contain a version number.") return software temp = software @@ -41,7 +45,7 @@ def remove(software: str) -> str: elif " - " in software: temp = software.split(" - ")[0] - # DEBUG: print(f"DEBUG: software='{software}'") + logger.debug(f"software='{software}'") version = None if " " in software: version = temp.split(" ")[-1] @@ -50,81 +54,81 @@ def remove(software: str) -> str: elif "-" in software: version = temp.split("-")[-1] else: - # DEBUG: print(f"DEBUG: Was not able to find common seperator, returning untouched software='{software}'") + logger.debug(f"Was not able to find common seperator, returning untouched software='{software}'") return software match = None - # DEBUG: print(f"DEBUG: Checking {len(patterns)} patterns ...") + logger.debug(f"Checking {len(patterns)} patterns ...") for pattern in patterns: # Run match() match = pattern.match(version) - # DEBUG: print(f"DEBUG: match[]='{type(match)}'") + logger.debug(f"match[]='{type(match)}'") if isinstance(match, re.Match): - # DEBUG: print(f"DEBUG: version='{version}' is matching pattern='{pattern}'") + logger.debug(f"version='{version}' is matching pattern='{pattern}'") break - # DEBUG: print(f"DEBUG: version[{type(version)}]='{version}',match='{match}'") + logger.debug(f"version[{type(version)}]='{version}',match='{match}'") if not isinstance(match, re.Match): - print(f"WARNING: version='{version}' does not match regex, leaving software='{software}' untouched.") + logger.warning(f"version='{version}' does not match regex, leaving software='{software}' untouched.") return software - # DEBUG: print(f"DEBUG: Found valid version number: '{version}', removing it ...") + logger.debug(f"Found valid version number: '{version}', removing it ...") end = len(temp) - len(version) - 1 - # DEBUG: print(f"DEBUG: end[{type(end)}]={end}") + logger.debug(f"end[{type(end)}]={end}") software = temp[0:end].strip() if " version" in software: - # DEBUG: print(f"DEBUG: software='{software}' contains word ' version'") + logger.debug(f"software='{software}' contains word ' version'") software = strip_until(software, " version") - # DEBUG: print(f"DEBUG: software='{software}' - EXIT!") + logger.debug(f"software='{software}' - EXIT!") return software def strip_powered_by(software: str) -> str: - # DEBUG: print(f"DEBUG: software='{software}' - CALLED!") + logger.debug(f"software='{software}' - CALLED!") if not isinstance(software, str): raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'") elif software == "": raise ValueError("Parameter 'software' is empty") elif "powered by" not in software: - print(f"WARNING: Cannot find 'powered by' in software='{software}'!") + logger.warning(f"Cannot find 'powered by' in software='{software}'!") return software start = software.find("powered by ") - # DEBUG: print(f"DEBUG: start[{type(start)}]='{start}'") + logger.debug(f"start[{type(start)}]='{start}'") software = software[start + 11:].strip() - # DEBUG: print(f"DEBUG: software='{software}'") + logger.debug(f"software='{software}'") software = strip_until(software, " - ") - # DEBUG: print(f"DEBUG: software='{software}' - EXIT!") + logger.debug(f"software='{software}' - EXIT!") return software def strip_hosted_on(software: str) -> str: - # DEBUG: print(f"DEBUG: software='{software}' - CALLED!") + logger.debug(f"software='{software}' - CALLED!") if not isinstance(software, str): raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'") elif software == "": raise ValueError("Parameter 'software' is empty") elif "hosted on" not in software: - print(f"WARNING: Cannot find 'hosted on' in '{software}'!") + logger.warning(f"Cannot find 'hosted on' in '{software}'!") return software end = software.find("hosted on ") - # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'") + logger.debug(f"end[{type(end)}]='{end}'") software = software[0:end].strip() - # DEBUG: print(f"DEBUG: software[{type(software)}]='{software}'") + logger.debug(f"software[{type(software)}]='{software}'") software = strip_until(software, " - ") - # DEBUG: print(f"DEBUG: software='{software}' - EXIT!") + logger.debug(f"software='{software}' - EXIT!") return software def strip_until(software: str, until: str) -> str: - # DEBUG: print(f"DEBUG: software='{software}',until='{until}' - CALLED!") + logger.debug(f"software='{software}',until='{until}' - CALLED!") if not isinstance(software, str): raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'") elif software == "": @@ -134,15 +138,15 @@ def strip_until(software: str, until: str) -> str: elif until == "": raise ValueError("Parameter 'until' is empty") elif not until in software: - print(f"WARNING: Cannot find '{until}' in '{software}'!") + logger.warning(f"Cannot find '{until}' in '{software}'!") return software # Next, strip until part end = software.find(until) - # DEBUG: print(f"DEBUG: end[{type(end)}]='{end}'") + logger.debug(f"end[{type(end)}]='{end}'") if end > 0: software = software[0:end].strip() - # DEBUG: print(f"DEBUG: software='{software}' - EXIT!") + logger.debug(f"software='{software}' - EXIT!") return software diff --git a/fba/http/federation.py b/fba/http/federation.py index 13fc60a..1d00794 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -13,6 +13,8 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import logging + from urllib.parse import urlparse import bs4 @@ -33,6 +35,9 @@ from fba.networks import lemmy from fba.networks import misskey from fba.networks import peertube +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + # "rel" identifiers (no real URLs) nodeinfo_identifier = [ "https://nodeinfo.diaspora.software/ns/schema/2.1", @@ -46,7 +51,7 @@ nodeinfo_identifier = [ ] def fetch_instances(domain: str, origin: str, software: str, command: str, path: str = None): - # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',software='{software}',path='{path}' - CALLED!") + logger.debug(f"domain='{domain}',origin='{origin}',software='{software}',path='{path}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -62,18 +67,18 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path: elif not isinstance(origin, str) and origin is not None: raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'") elif software is None: - # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") + logger.debug(f"Updating last_instance_fetch for domain='{domain}' ...") instances.set_last_instance_fetch(domain) - # DEBUG: print(f"DEBUG: software for domain='{domain}' is not set, determining ...") + logger.debug(f"software for domain='{domain}' is not set, determining ...") software = None try: software = determine_software(domain, path) except network.exceptions as exception: - # DEBUG: print(f"DEBUG: Exception '{type(exception)}' during determining software type") + logger.debug(f"Exception '{type(exception)}' during determining software type") pass - # DEBUG: print(f"DEBUG: Determined software='{software}' for domain='{domain}'") + logger.debug(f"Determined software='{software}' for domain='{domain}'") elif not isinstance(software, str): raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'") elif not isinstance(command, str): @@ -88,59 +93,59 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path: raise ValueError(f"domain='{domain}' is a fake domain") if not instances.is_registered(domain): - # DEBUG: print(f"DEBUG: Adding new domain='{domain}',origin='{origin}',command='{command}',path='{path}',software='{software}'") + logger.debug(f"Adding new domain='{domain}',origin='{origin}',command='{command}',path='{path}',software='{software}'") instances.add(domain, origin, command, path, software) - # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...") + logger.debug(f"Updating last_instance_fetch for domain='{domain}' ...") instances.set_last_instance_fetch(domain) - # DEBUG: print("DEBUG: Fetching instances for domain:", domain, software) + logger.debug("Fetching instances for domain:", domain, software) peerlist = fetch_peers(domain, software) if peerlist is None: - print("ERROR: Cannot fetch peers:", domain) + logger.warning("Cannot fetch peers:", domain) return elif instances.has_pending(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...") + logger.debug(f"domain='{domain}' has pending nodeinfo data, flushing ...") instances.update_data(domain) - print(f"INFO: Checking {len(peerlist)} instances from domain='{domain}' ...") + logger.info(f"Checking {len(peerlist)} instances from domain='{domain}' ...") for instance in peerlist: - # DEBUG: print(f"DEBUG: instance='{instance}'") + logger.debug(f"instance='{instance}'") if instance is None: # Skip "None" types as tidup.domain() cannot parse them continue - # DEBUG: print(f"DEBUG: instance='{instance}' - BEFORE") + logger.debug(f"instance='{instance}' - BEFORE") instance = tidyup.domain(instance) - # DEBUG: print(f"DEBUG: instance='{instance}' - AFTER") + logger.debug(f"instance='{instance}' - AFTER") if instance == "": - print(f"WARNING: Empty instance after tidyup.domain(), domain='{domain}'") + logger.warning(f"Empty instance after tidyup.domain(), domain='{domain}'") continue elif not validators.domain(instance.split("/")[0]): - print(f"WARNING: Bad instance='{instance}' from domain='{domain}',origin='{origin}'") + logger.warning(f"Bad instance='{instance}' from domain='{domain}',origin='{origin}'") continue elif instance.endswith(".arpa"): - print(f"WARNING: instance='{instance}' is a reversed .arpa domain and should not be used generally.") + logger.warning(f"instance='{instance}' is a reversed .arpa domain and should not be used generally.") continue elif blacklist.is_blacklisted(instance): - # DEBUG: print("DEBUG: instance is blacklisted:", instance) + logger.debug("instance is blacklisted:", instance) continue elif instance.find("/profile/") > 0 or instance.find("/users/") > 0: - # DEBUG: print(f"DEBUG: instance='{instance}' is a link to a single user profile - SKIPPED!") + logger.debug(f"instance='{instance}' is a link to a single user profile - SKIPPED!") continue elif instance.endswith(".tld"): - # DEBUG: print(f"DEBUG: instance='{instance}' is a fake domain - SKIPPED!") + logger.debug(f"instance='{instance}' is a fake domain - SKIPPED!") continue elif not instances.is_registered(instance): - # DEBUG: print("DEBUG: Adding new instance:", instance, domain) + logger.debug("Adding new instance:", instance, domain) instances.add(instance, domain, command) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def fetch_peers(domain: str, software: str) -> list: - # DEBUG: print(f"DEBUG: domain({len(domain)})='{domain}',software='{software}' - CALLED!") + logger.debug(f"domain({len(domain)})='{domain}',software='{software}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -157,13 +162,13 @@ def fetch_peers(domain: str, software: str) -> list: raise ValueError(f"software[]='{type(software)}' is not 'str'") if software == "misskey": - # DEBUG: print(f"DEBUG: Invoking misskey.fetch_peers({domain}) ...") + logger.debug(f"Invoking misskey.fetch_peers({domain}) ...") return misskey.fetch_peers(domain) elif software == "lemmy": - # DEBUG: print(f"DEBUG: Invoking lemmy.fetch_peers({domain}) ...") + logger.debug(f"Invoking lemmy.fetch_peers({domain}) ...") return lemmy.fetch_peers(domain) elif software == "peertube": - # DEBUG: print(f"DEBUG: Invoking peertube.fetch_peers({domain}) ...") + logger.debug(f"Invoking peertube.fetch_peers({domain}) ...") return peertube.fetch_peers(domain) # Init peers variable @@ -173,14 +178,14 @@ def fetch_peers(domain: str, software: str) -> list: headers = tuple() try: - # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'") + logger.debug(f"Checking CSRF for domain='{domain}'") headers = csrf.determine(domain, dict()) except network.exceptions as exception: - print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!") + logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!") instances.set_last_error(domain, exception) return peers - # DEBUG: print(f"DEBUG: Fetching peers from '{domain}',software='{software}' ...") + logger.debug(f"Fetching peers from '{domain}',software='{software}' ...") data = network.get_json_api( domain, "/api/v1/instance/peers", @@ -188,9 +193,9 @@ def fetch_peers(domain: str, software: str) -> list: (config.get("connection_timeout"), config.get("read_timeout")) ) - # DEBUG: print(f"DEBUG: data[]='{type(data)}'") + logger.debug(f"data[]='{type(data)}'") if "error_message" in data: - # DEBUG: print("DEBUG: Was not able to fetch peers, trying alternative ...") + logger.debug("Was not able to fetch peers, trying alternative ...") data = network.get_json_api( domain, "/api/v3/site", @@ -198,31 +203,31 @@ def fetch_peers(domain: str, software: str) -> list: (config.get("connection_timeout"), config.get("read_timeout")) ) - # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'") + logger.debug(f"response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'") if "error_message" in data: - print(f"WARNING: Could not reach any JSON API at domain='{domain}',status_code='{data['status_code']}',error_message='{data['error_message']}'") + logger.warning(f"Could not reach any JSON API at domain='{domain}',status_code='{data['status_code']}',error_message='{data['error_message']}'") elif "federated_instances" in data["json"]: - # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'") + logger.debug(f"Found federated_instances for domain='{domain}'") peers = peers + add_peers(data["json"]["federated_instances"]) - # DEBUG: print("DEBUG: Added instance(s) to peers") + logger.debug("Added instance(s) to peers") else: message = "JSON response does not contain 'federated_instances' or 'error_message'" - print(f"WARNING: {message},domain='{domain}'") + logger.warning(f"{message},domain='{domain}'") instances.set_last_error(domain, message) elif isinstance(data["json"], list): # DEBUG print("DEBUG: Querying API was successful:", domain, len(data['json'])) peers = data["json"] else: - print(f"WARNING: Cannot parse data[json][]='{type(data['json'])}'") + logger.warning(f"Cannot parse data[json][]='{type(data['json'])}'") - # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") + logger.debug(f"Adding '{len(peers)}' for domain='{domain}'") instances.set_total_peers(domain, peers) - # DEBUG: print("DEBUG: Returning peers[]:", type(peers)) + logger.debug("Returning peers[]:", type(peers)) return peers def fetch_nodeinfo(domain: str, path: str = None) -> dict: - # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' - CALLED!") + logger.debug(f"domain='{domain}',path='{path}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -238,12 +243,12 @@ def fetch_nodeinfo(domain: str, path: str = None) -> dict: elif not isinstance(path, str) and path is not None: raise ValueError(f"Parameter path[]='{type(path)}' is not 'str'") - # DEBUG: print(f"DEBUG: Fetching nodeinfo from domain='{domain}' ...") + logger.debug(f"Fetching nodeinfo from domain='{domain}' ...") nodeinfo = fetch_wellknown_nodeinfo(domain) - # DEBUG: print(f"DEBUG: nodeinfo[{type(nodeinfo)}]({len(nodeinfo)}='{nodeinfo}'") + logger.debug(f"nodeinfo[{type(nodeinfo)}]({len(nodeinfo)}='{nodeinfo}'") if "error_message" not in nodeinfo and "json" in nodeinfo and len(nodeinfo["json"]) > 0: - # DEBUG: print(f"DEBUG: Found nodeinfo[json]()={len(nodeinfo['json'])} - EXIT!") + logger.debug(f"Found nodeinfo[json]()={len(nodeinfo['json'])} - EXIT!") return nodeinfo["json"] # No CSRF by default, you don't have to add network.api_headers by yourself here @@ -251,10 +256,10 @@ def fetch_nodeinfo(domain: str, path: str = None) -> dict: data = dict() try: - # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'") + logger.debug(f"Checking CSRF for domain='{domain}'") headers = csrf.determine(domain, dict()) except network.exceptions as exception: - print(f"WARNING: Exception '{type(exception)}' during checking CSRF (nodeinfo,{__name__}) - EXIT!") + logger.warning(f"Exception '{type(exception)}' during checking CSRF (nodeinfo,{__name__}) - EXIT!") instances.set_last_error(domain, exception) return { "status_code" : 500, @@ -272,11 +277,11 @@ def fetch_nodeinfo(domain: str, path: str = None) -> dict: ] for request in request_paths: - # DEBUG: print(f"DEBUG: path[{type(path)}]='{path}',request='{request}'") + logger.debug(f"path[{type(path)}]='{path}',request='{request}'") if path is None or path == request or path == f"http://{domain}{path}" or path == f"https://{domain}{path}": - # DEBUG: print(f"DEBUG: Fetching request='{request}' from domain='{domain}' ...") + logger.debug(f"Fetching request='{request}' from domain='{domain}' ...") if path == f"http://{domain}{path}" or path == f"https://{domain}{path}": - # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' has protocol in path, splitting ...") + logger.debug(f"domain='{domain}',path='{path}' has protocol in path, splitting ...") components = urlparse(path) path = components.path @@ -287,20 +292,20 @@ def fetch_nodeinfo(domain: str, path: str = None) -> dict: (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout")) ) - # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'") + logger.debug(f"response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'") if "error_message" not in data: - # DEBUG: print("DEBUG: Success:", request) + logger.debug("Success:", request) instances.set_detection_mode(domain, "STATIC_CHECK") instances.set_nodeinfo_url(domain, request) break - print(f"WARNING: Failed fetching nodeinfo from domain='{domain}',status_code='{data['status_code']}',error_message='{data['error_message']}'") + logger.warning(f"Failed fetching nodeinfo from domain='{domain}',status_code='{data['status_code']}',error_message='{data['error_message']}'") - # DEBUG: print(f"DEBUG: data()={len(data)} - EXIT!") + logger.debug(f"data()={len(data)} - EXIT!") return data def fetch_wellknown_nodeinfo(domain: str) -> dict: - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + logger.debug(f"domain='{domain}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -318,10 +323,10 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict: headers = tuple() try: - # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'") + logger.debug(f"Checking CSRF for domain='{domain}'") headers = csrf.determine(domain, dict()) except network.exceptions as exception: - print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_wellknown_nodeinfo,{__name__}) - EXIT!") + logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_wellknown_nodeinfo,{__name__}) - EXIT!") instances.set_last_error(domain, exception) return { "status_code" : 500, @@ -329,7 +334,7 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict: "exception" : exception, } - # DEBUG: print("DEBUG: Fetching .well-known info for domain:", domain) + logger.debug("Fetching .well-known info for domain:", domain) data = network.get_json_api( domain, "/.well-known/nodeinfo", @@ -339,61 +344,61 @@ def fetch_wellknown_nodeinfo(domain: str) -> dict: if "error_message" not in data: nodeinfo = data["json"] - # DEBUG: print("DEBUG: Found entries:", len(nodeinfo), domain) + logger.debug("Found entries:", len(nodeinfo), domain) if "links" in nodeinfo: - # DEBUG: print("DEBUG: Found links in nodeinfo():", len(nodeinfo["links"])) + logger.debug("Found links in nodeinfo():", len(nodeinfo["links"])) for link in nodeinfo["links"]: - # DEBUG: print(f"DEBUG: link[{type(link)}]='{link}'") + logger.debug(f"link[{type(link)}]='{link}'") if not isinstance(link, dict) or not "rel" in link: - print(f"WARNING: link[]='{type(link)}' is not 'dict' or no element 'rel' found") + logger.warning(f"link[]='{type(link)}' is not 'dict' or no element 'rel' found") elif link["rel"] in nodeinfo_identifier: # Default is that 'href' has a complete URL, but some hosts don't send that url = link["href"] components = urlparse(link["href"]) - # DEBUG: print(f"DEBUG: components[{type(components)}]='{components}'") + logger.debug(f"components[{type(components)}]='{components}'") if components.scheme == "" and components.netloc == "": - # DEBUG: print(f"DEBUG: link[href]='{link['href']}' has no scheme and host name in it, prepending from domain='{domain}'") + logger.debug(f"link[href]='{link['href']}' has no scheme and host name in it, prepending from domain='{domain}'") url = f"https://{domain}{url}" components = urlparse(url) if not validators.domain(components.netloc): - print(f"WARNING: components.netloc='{components.netloc}' is not a valid domain - SKIPPED!") + logger.warning(f"components.netloc='{components.netloc}' is not a valid domain - SKIPPED!") continue elif domain.endswith(".arpa"): - print(f"WARNING: domain='{domain}' is a domain for reversed IP addresses - SKIPPED!") + logger.warning(f"domain='{domain}' is a domain for reversed IP addresses - SKIPPED!") continue elif domain.endswith(".tld"): - print(f"WARNING: domain='{domain}' is a fake domain - SKIPPED!") + logger.warning(f"domain='{domain}' is a fake domain - SKIPPED!") continue elif blacklist.is_blacklisted(components.netloc): - # DEBUG: print(f"DEBUG: components.netloc='{components.netloc}' is blacklisted - SKIPPED!") + logger.debug(f"components.netloc='{components.netloc}' is blacklisted - SKIPPED!") continue - # DEBUG: print("DEBUG: Fetching nodeinfo from:", url) + logger.debug("Fetching nodeinfo from:", url) data = network.fetch_api_url( url, (config.get("connection_timeout"), config.get("read_timeout")) ) - # DEBUG: print("DEBUG: href,data[]:", link["href"], type(data)) + logger.debug("href,data[]:", link["href"], type(data)) if "error_message" not in data and "json" in data: - # DEBUG: print("DEBUG: Found JSON nodeinfo():", len(data)) + logger.debug("Found JSON nodeinfo():", len(data)) instances.set_detection_mode(domain, "AUTO_DISCOVERY") instances.set_nodeinfo_url(domain, link["href"]) break else: instances.set_last_error(domain, data) else: - print("WARNING: Unknown 'rel' value:", domain, link["rel"]) + logger.warning("Unknown 'rel' value:", domain, link["rel"]) else: - print("WARNING: nodeinfo does not contain 'links':", domain) + logger.warning("nodeinfo does not contain 'links':", domain) - # DEBUG: print("DEBUG: Returning data[]:", type(data)) + logger.debug("Returning data[]:", type(data)) return data def fetch_generator_from_path(domain: str, path: str = "/") -> str: - # DEBUG: print(f"DEBUG: domain({len(domain)})='{domain}',path='{path}' - CALLED!") + logger.debug(f"domain({len(domain)})='{domain}',path='{path}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -411,64 +416,64 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: elif path == "": raise ValueError("Parameter 'path' is empty") - # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' - CALLED!") + logger.debug(f"domain='{domain}',path='{path}' - CALLED!") software = None - # DEBUG: print(f"DEBUG: Fetching path='{path}' from '{domain}' ...") + logger.debug(f"Fetching path='{path}' from '{domain}' ...") response = network.fetch_response(domain, path, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) - # DEBUG: print("DEBUG: domain,response.ok,response.status_code,response.text[]:", domain, response.ok, response.status_code, type(response.text)) + logger.debug("domain,response.ok,response.status_code,response.text[]:", domain, response.ok, response.status_code, type(response.text)) if response.ok and response.status_code < 300 and response.text.find(" 0: - # DEBUG: print(f"DEBUG: Parsing response.text()={len(response.text)} Bytes ...") + logger.debug(f"Parsing response.text()={len(response.text)} Bytes ...") doc = bs4.BeautifulSoup(response.text, "html.parser") - # DEBUG: print("DEBUG: doc[]:", type(doc)) + logger.debug("doc[]:", type(doc)) generator = doc.find("meta", {"name" : "generator"}) site_name = doc.find("meta", {"property": "og:site_name"}) - # DEBUG: print(f"DEBUG: generator='{generator}',site_name='{site_name}'") + logger.debug(f"generator='{generator}',site_name='{site_name}'") if isinstance(generator, bs4.element.Tag) and isinstance(generator.get("content"), str): - # DEBUG: print("DEBUG: Found generator meta tag:", domain) + logger.debug("Found generator meta tag:", domain) software = tidyup.domain(generator.get("content")) - # DEBUG: print(f"DEBUG: software[{type(software)}]='{software}'") + logger.debug(f"software[{type(software)}]='{software}'") if software is not None and software != "": - print(f"INFO: domain='{domain}' is generated by '{software}'") + logger.info(f"domain='{domain}' is generated by '{software}'") instances.set_detection_mode(domain, "GENERATOR") elif isinstance(site_name, bs4.element.Tag) and isinstance(site_name.get("content"), str): - # DEBUG: print("DEBUG: Found property=og:site_name:", domain) + logger.debug("Found property=og:site_name:", domain) software = tidyup.domain(site_name.get("content")) - # DEBUG: print(f"DEBUG: software[{type(software)}]='{software}'") + logger.debug(f"software[{type(software)}]='{software}'") if software is not None and software != "": - print(f"INFO: domain='{domain}' has og:site_name='{software}'") + logger.info(f"domain='{domain}' has og:site_name='{software}'") instances.set_detection_mode(domain, "SITE_NAME") - # DEBUG: print(f"DEBUG: software[]='{type(software)}'") + logger.debug(f"software[]='{type(software)}'") if isinstance(software, str) and software == "": - # DEBUG: print(f"DEBUG: Corrected empty string to None for software of domain='{domain}'") + logger.debug(f"Corrected empty string to None for software of domain='{domain}'") software = None elif isinstance(software, str) and ("." in software or " " in software): - # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...") + logger.debug(f"software='{software}' may contain a version number, domain='{domain}', removing it ...") software = version.remove(software) - # DEBUG: print(f"DEBUG: software[]='{type(software)}'") + logger.debug(f"software[]='{type(software)}'") if isinstance(software, str) and "powered by " in software: - # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it") + logger.debug(f"software='{software}' has 'powered by' in it") software = version.remove(version.strip_powered_by(software)) elif isinstance(software, str) and " hosted on " in software: - # DEBUG: print(f"DEBUG: software='{software}' has 'hosted on' in it") + logger.debug(f"software='{software}' has 'hosted on' in it") software = version.remove(version.strip_hosted_on(software)) elif isinstance(software, str) and " by " in software: - # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it") + logger.debug(f"software='{software}' has ' by ' in it") software = version.strip_until(software, " by ") elif isinstance(software, str) and " see " in software: - # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it") + logger.debug(f"software='{software}' has ' see ' in it") software = version.strip_until(software, " see ") - # DEBUG: print(f"DEBUG: software='{software}' - EXIT!") + logger.debug(f"software='{software}' - EXIT!") return software def determine_software(domain: str, path: str = None) -> str: - # DEBUG: print(f"DEBUG: domain({len(domain)})='{domain}',path='{path}' - CALLED!") + logger.debug(f"domain({len(domain)})='{domain}',path='{path}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -484,96 +489,96 @@ def determine_software(domain: str, path: str = None) -> str: elif not isinstance(path, str) and path is not None: raise ValueError(f"Parameter path[]='{type(path)}' is not 'str'") - # DEBUG: print("DEBUG: Determining software for domain,path:", domain, path) + logger.debug("Determining software for domain,path:", domain, path) software = None - # DEBUG: print(f"DEBUG: Fetching nodeinfo from '{domain}' ...") + logger.debug(f"Fetching nodeinfo from '{domain}' ...") data = fetch_nodeinfo(domain, path) - # DEBUG: print(f"DEBUG: data[{type(data)}]='{data}'") + logger.debug(f"data[{type(data)}]='{data}'") if "exception" in data: # Continue raising it raise data["exception"] elif "error_message" in data: - # DEBUG: print(f"DEBUG: Returned error_message during fetching nodeinfo: '{data['error_message']}',status_code='{data['status_code']}'") + logger.debug(f"Returned error_message during fetching nodeinfo: '{data['error_message']}',status_code='{data['status_code']}'") return fetch_generator_from_path(domain) elif "status" in data and data["status"] == "error" and "message" in data: - print("WARNING: JSON response is an error:", data["message"]) + logger.warning("JSON response is an error:", data["message"]) instances.set_last_error(domain, data["message"]) return fetch_generator_from_path(domain) elif "message" in data: - print("WARNING: JSON response contains only a message:", data["message"]) + logger.warning("JSON response contains only a message:", data["message"]) instances.set_last_error(domain, data["message"]) return fetch_generator_from_path(domain) elif "software" not in data or "name" not in data["software"]: - # DEBUG: print(f"DEBUG: JSON response from domain='{domain}' does not include [software][name], fetching / ...") + logger.debug(f"JSON response from domain='{domain}' does not include [software][name], fetching / ...") software = fetch_generator_from_path(domain) - # DEBUG: print(f"DEBUG: Generator for domain='{domain}' is: '{software}'") + logger.debug(f"Generator for domain='{domain}' is: '{software}'") elif "software" in data and "name" in data["software"]: - # DEBUG: print("DEBUG: Found data[software][name] in JSON response") + logger.debug("Found data[software][name] in JSON response") software = data["software"]["name"] if software is None: - # DEBUG: print("DEBUG: Returning None - EXIT!") + logger.debug("Returning None - EXIT!") return None sofware = tidyup.domain(software) - # DEBUG: print("DEBUG: sofware after tidyup.domain():", software) + logger.debug("sofware after tidyup.domain():", software) if software in ["akkoma", "rebased", "akkounfucked", "ched"]: - # DEBUG: print("DEBUG: Setting pleroma:", domain, software) + logger.debug("Setting pleroma:", domain, software) software = "pleroma" elif software in ["hometown", "ecko"]: - # DEBUG: print("DEBUG: Setting mastodon:", domain, software) + logger.debug("Setting mastodon:", domain, software) software = "mastodon" elif software in ["slipfox calckey", "calckey", "groundpolis", "foundkey", "cherrypick", "meisskey", "magnetar", "keybump"]: - # DEBUG: print("DEBUG: Setting misskey:", domain, software) + logger.debug("Setting misskey:", domain, software) software = "misskey" elif software == "runtube.re": - # DEBUG: print("DEBUG: Setting peertube:", domain, software) + logger.debug("Setting peertube:", domain, software) software = "peertube" elif software == "nextcloud social": - # DEBUG: print("DEBUG: Setting nextcloud:", domain, software) + logger.debug("Setting nextcloud:", domain, software) software = "nextcloud" elif software.find("/") > 0: - print("WARNING: Spliting of slash:", software) + logger.warning("Spliting of slash:", software) software = tidyup.domain(software.split("/")[-1]) elif software.find("|") > 0: - print("WARNING: Spliting of pipe:", software) + logger.warning("Spliting of pipe:", software) software = tidyup.domain(software.split("|")[0]) elif "powered by" in software: - # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it") + logger.debug(f"software='{software}' has 'powered by' in it") software = version.strip_powered_by(software) elif isinstance(software, str) and " by " in software: - # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it") + logger.debug(f"software='{software}' has ' by ' in it") software = version.strip_until(software, " by ") elif isinstance(software, str) and " see " in software: - # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it") + logger.debug(f"software='{software}' has ' see ' in it") software = version.strip_until(software, " see ") - # DEBUG: print(f"DEBUG: software[]='{type(software)}'") + logger.debug(f"software[]='{type(software)}'") if software == "": - print("WARNING: tidyup.domain() left no software name behind:", domain) + logger.warning("tidyup.domain() left no software name behind:", domain) software = None - # DEBUG: print(f"DEBUG: software[]='{type(software)}'") + logger.debug(f"software[]='{type(software)}'") if str(software) == "": - # DEBUG: print(f"DEBUG: software for '{domain}' was not detected, trying generator ...") + logger.debug(f"software for '{domain}' was not detected, trying generator ...") software = fetch_generator_from_path(domain) elif len(str(software)) > 0 and ("." in software or " " in software): - # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...") + logger.debug(f"software='{software}' may contain a version number, domain='{domain}', removing it ...") software = version.remove(software) - # DEBUG: print(f"DEBUG: software[]='{type(software)}'") + logger.debug(f"software[]='{type(software)}'") if isinstance(software, str) and "powered by" in software: - # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it") + logger.debug(f"software='{software}' has 'powered by' in it") software = version.remove(version.strip_powered_by(software)) - # DEBUG: print("DEBUG: Returning domain,software:", domain, software) + logger.debug("Returning domain,software:", domain, software) return software def find_domains(tag: bs4.element.Tag) -> list: - # DEBUG: print(f"DEBUG: tag[]='{type(tag)}' - CALLED!") + logger.debug(f"tag[]='{type(tag)}' - CALLED!") if not isinstance(tag, bs4.element.Tag): raise ValueError(f"Parameter tag[]='{type(tag)}' is not type of bs4.element.Tag") elif len(tag.select("tr")) == 0: @@ -581,30 +586,30 @@ def find_domains(tag: bs4.element.Tag) -> list: domains = list() for element in tag.select("tr"): - # DEBUG: print(f"DEBUG: element[]='{type(element)}'") + logger.debug(f"element[]='{type(element)}'") if not element.find("td"): - # DEBUG: print("DEBUG: Skipping element, no found") + logger.debug("Skipping element, no found") continue domain = tidyup.domain(element.find("td").text) reason = tidyup.reason(element.findAll("td")[1].text) - # DEBUG: print(f"DEBUG: domain='{domain}',reason='{reason}'") + logger.debug(f"domain='{domain}',reason='{reason}'") if not validators.domain(domain.split("/")[0]): - print(f"WARNING: domain='{domain}' is not a valid domain - SKIPPED!") + logger.warning(f"domain='{domain}' is not a valid domain - SKIPPED!") continue elif domain.endswith(".arpa"): - print(f"WARNING: domain='{domain}' is a domain for reversed IP addresses - SKIPPED!") + logger.warning(f"domain='{domain}' is a domain for reversed IP addresses - SKIPPED!") continue elif domain.endswith(".tld"): - print(f"WARNING: domain='{domain}' is a fake domain - SKIPPED!") + logger.warning(f"domain='{domain}' is a fake domain - SKIPPED!") continue elif blacklist.is_blacklisted(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") + logger.debug(f"domain='{domain}' is blacklisted - SKIPPED!") continue elif domain == "gab.com/.ai, develop.gab.com": - # DEBUG: print("DEBUG: Multiple domains detected in one row") + logger.debug("Multiple domains detected in one row") domains.append({ "domain": "gab.com", "reason": reason, @@ -619,55 +624,55 @@ def find_domains(tag: bs4.element.Tag) -> list: }) continue elif not validators.domain(domain.split("/")[0]): - print(f"WARNING: domain='{domain}' is not a valid domain - SKIPPED!") + logger.warning(f"domain='{domain}' is not a valid domain - SKIPPED!") continue - # DEBUG: print(f"DEBUG: Adding domain='{domain}',reason='{reason}' ...") + logger.debug(f"Adding domain='{domain}',reason='{reason}' ...") domains.append({ "domain": domain, "reason": reason, }) - # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!") + logger.debug(f"domains()={len(domains)} - EXIT!") return domains def add_peers(rows: dict) -> list: - # DEBUG: print(f"DEBUG: rows()={len(rows)} - CALLED!") + logger.debug(f"rows()={len(rows)} - CALLED!") peers = list() for key in ["linked", "allowed", "blocked"]: - # DEBUG: print(f"DEBUG: Checking key='{key}'") + logger.debug(f"Checking key='{key}'") if key not in rows or rows[key] is None: - print(f"WARNING: Cannot find key='{key}' or it is NoneType - SKIPPED!") + logger.warning(f"Cannot find key='{key}' or it is NoneType - SKIPPED!") continue - # DEBUG: print(f"DEBUG: Adding {len(rows[key])} peer(s) to peers list ...") + logger.debug(f"Adding {len(rows[key])} peer(s) to peers list ...") for peer in rows[key]: - # DEBUG: print(f"DEBUG: peer='{peer}' - BEFORE!") + logger.debug(f"peer='{peer}' - BEFORE!") if isinstance(peer, dict) and "domain" in peer: - # DEBUG: print(f"DEBUG: peer[domain]='{peer['domain']}'") + logger.debug(f"peer[domain]='{peer['domain']}'") peer = tidyup.domain(peer["domain"]) elif isinstance(peer, str): - # DEBUG: print(f"DEBUG: peer='{peer}'") + logger.debug(f"peer='{peer}'") peer = tidyup.domain(peer) else: raise ValueError(f"peer[]='{type(peer)}' is not supported,key='{key}'") - # DEBUG: print(f"DEBUG: peer='{peer}' - AFTER!") + logger.debug(f"peer='{peer}' - AFTER!") if not validators.domain(peer): - print(f"WARNING: peer='{peer}' is not a valid domain - SKIPPED!") + logger.warning(f"peer='{peer}' is not a valid domain - SKIPPED!") continue elif peer.endswith(".arpa"): - print(f"WARNING: peer='{peer}' is a domain for reversed IP addresses -SKIPPED!") + logger.warning(f"peer='{peer}' is a domain for reversed IP addresses -SKIPPED!") continue elif peer.endswith(".tld"): - print(f"WARNING: peer='{peer}' is a fake domain - SKIPPED!") + logger.warning(f"peer='{peer}' is a fake domain - SKIPPED!") continue elif blacklist.is_blacklisted(peer): - # DEBUG: print(f"DEBUG: peer='{peer}' is blacklisted - SKIPPED!") + logger.debug(f"peer='{peer}' is blacklisted - SKIPPED!") continue - # DEBUG: print(f"DEBUG: Adding peer='{peer}' ...") + logger.debug(f"Adding peer='{peer}' ...") peers.append(peer) - # DEBUG: print(f"DEBUG: peers()={len(peers)} - EXIT!") + logger.debug(f"peers()={len(peers)} - EXIT!") return peers diff --git a/fba/http/network.py b/fba/http/network.py index a8c62f0..ad6c9a0 100644 --- a/fba/http/network.py +++ b/fba/http/network.py @@ -14,6 +14,8 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import logging + import json import reqto import requests @@ -27,6 +29,9 @@ from fba.helpers import cookies from fba.models import instances +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + # HTTP headers for non-API requests web_headers = { "User-Agent": config.get("useragent"), @@ -51,7 +56,7 @@ exceptions = ( ) def post_json_api(domain: str, path: str, data: str = "", headers: dict = {}) -> dict: - # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',data='{data}',headers()={len(headers)} - CALLED!") + logger.debug(f"domain='{domain}',path='{path}',data='{data}',headers()={len(headers)} - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -78,7 +83,7 @@ def post_json_api(domain: str, path: str, data: str = "", headers: dict = {}) -> } try: - # DEBUG: print(f"DEBUG: Sending POST to domain='{domain}',path='{path}',data='{data}',headers({len(headers)})={headers}") + logger.debug(f"Sending POST to domain='{domain}',path='{path}',data='{data}',headers({len(headers)})={headers}") response = reqto.post( f"https://{domain}{path}", data=data, @@ -89,27 +94,27 @@ def post_json_api(domain: str, path: str, data: str = "", headers: dict = {}) -> json_reply["json"] = json_from_response(response) - # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'") + logger.debug(f"response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'") if not response.ok or response.status_code >= 400: - print(f"WARNING: Cannot query JSON API: domain='{domain}',path='{path}',data()={len(data)},response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'") + logger.warning(f"Cannot query JSON API: domain='{domain}',path='{path}',data()={len(data)},response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'") json_reply["status_code"] = response.status_code json_reply["error_message"] = response.reason del json_reply["json"] instances.set_last_error(domain, response) except exceptions as exception: - # DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'") + logger.debug(f"Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'") json_reply["status_code"] = 999 json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'" json_reply["exception"] = exception instances.set_last_error(domain, exception) raise exception - # DEBUG: print(f"DEBUG: Returning json_reply({len(json_reply)})=[]:{type(json_reply)}") + logger.debug(f"Returning json_reply({len(json_reply)})=[]:{type(json_reply)}") return json_reply def fetch_api_url(url: str, timeout: tuple) -> dict: - # DEBUG: print(f"DEBUG: url='{url}',timeout()={len(timeout)} - CALLED!") + logger.debug(f"url='{url}',timeout()={len(timeout)} - CALLED!") if not isinstance(url, str): raise ValueError(f"Parameter url[]='{type(url)}' is not 'str'") elif not isinstance(timeout, tuple): @@ -120,30 +125,30 @@ def fetch_api_url(url: str, timeout: tuple) -> dict: } try: - # DEBUG: print(f"DEBUG: Fetching url='{url}' ...") + logger.debug(f"Fetching url='{url}' ...") response = fba.fetch_url(url, api_headers, timeout) json_reply["json"] = json_from_response(response) - # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'") + logger.debug(f"response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'") if not response.ok or response.status_code >= 400: - print(f"WARNING: Cannot query JSON API: url='{url}',response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'") + logger.warning(f"Cannot query JSON API: url='{url}',response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'") json_reply["status_code"] = response.status_code json_reply["error_message"] = response.reason del json_reply["json"] except exceptions as exception: - # DEBUG: print(f"DEBUG: Fetching '{url}' failed. exception[{type(exception)}]='{str(exception)}'") + logger.debug(f"Fetching '{url}' failed. exception[{type(exception)}]='{str(exception)}'") json_reply["status_code"] = 999 json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'" json_reply["exception"] = exception raise exception - # DEBUG: print(f"DEBUG: Returning json_reply({len(json_reply)})=[]:{type(json_reply)}") + logger.debug(f"Returning json_reply({len(json_reply)})=[]:{type(json_reply)}") return json_reply def get_json_api(domain: str, path: str, headers: dict, timeout: tuple) -> dict: - # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',timeout()={len(timeout)} - CALLED!") + logger.debug(f"domain='{domain}',path='{path}',timeout()={len(timeout)} - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -170,7 +175,7 @@ def get_json_api(domain: str, path: str, headers: dict, timeout: tuple) -> dict: } try: - # DEBUG: print(f"DEBUG: Sending GET to domain='{domain}',path='{path}',timeout({len(timeout)})={timeout}") + logger.debug(f"Sending GET to domain='{domain}',path='{path}',timeout({len(timeout)})={timeout}") response = reqto.get( f"https://{domain}{path}", headers={**api_headers, **headers}, @@ -179,7 +184,7 @@ def get_json_api(domain: str, path: str, headers: dict, timeout: tuple) -> dict: ) except exceptions as exception: - # DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'") + logger.debug(f"Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'") json_reply["status_code"] = 999 json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'" json_reply["exception"] = exception @@ -188,19 +193,19 @@ def get_json_api(domain: str, path: str, headers: dict, timeout: tuple) -> dict: json_reply["json"] = json_from_response(response) - # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'") + logger.debug(f"response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'") if not response.ok or response.status_code >= 400: - print(f"WARNING: Cannot query JSON API: domain='{domain}',path='{path}',response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'") + logger.warning(f"Cannot query JSON API: domain='{domain}',path='{path}',response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'") json_reply["status_code"] = response.status_code json_reply["error_message"] = response.reason del json_reply["json"] instances.set_last_error(domain, response) - # DEBUG: print(f"DEBUG: Returning json_reply({len(json_reply)})=[]:{type(json_reply)}") + logger.debug(f"Returning json_reply({len(json_reply)})=[]:{type(json_reply)}") return json_reply def send_bot_post(domain: str, blocklist: dict): - # DEBUG: print(f"DEBUG: domain='{domain}',blocklist()={len(blocklist)} - CALLED!") + logger.debug(f"domain='{domain}',blocklist()={len(blocklist)} - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -223,9 +228,9 @@ def send_bot_post(domain: str, blocklist: dict): truncated = True blocklist = blocklist[0 : 19] - # DEBUG: print(f"DEBUG: blocklist()={len(blocklist)}") + logger.debug(f"blocklist()={len(blocklist)}") for block in blocklist: - # DEBUG: print(f"DEBUG: block['{type(block)}']={block}") + logger.debug(f"block['{type(block)}']={block}") if block["reason"] is None or block["reason"] == '': message = message + block["blocked"] + " with unspecified reason\n" else: @@ -253,7 +258,7 @@ def send_bot_post(domain: str, blocklist: dict): return True def fetch_response(domain: str, path: str, headers: dict, timeout: tuple) -> requests.models.Response: - # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',headers()={len(headers)},timeout={timeout} - CALLED!") + logger.debug(f"domain='{domain}',path='{path}',headers()={len(headers)},timeout={timeout} - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -276,7 +281,7 @@ def fetch_response(domain: str, path: str, headers: dict, timeout: tuple) -> req raise ValueError(f"timeout[]='{type(timeout)}' is not 'tuple'") try: - # DEBUG: print(f"DEBUG: Sending GET request to '{domain}{path}' ...") + logger.debug(f"Sending GET request to '{domain}{path}' ...") response = reqto.get( f"https://{domain}{path}", headers=headers, @@ -285,25 +290,25 @@ def fetch_response(domain: str, path: str, headers: dict, timeout: tuple) -> req ) except exceptions as exception: - # DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'") + logger.debug(f"Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'") instances.set_last_error(domain, exception) raise exception - # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!") + logger.debug(f"response[]='{type(response)}' - EXXIT!") return response def json_from_response(response: requests.models.Response) -> list: - # DEBUG: print(f"DEBUG: response[]='{type(response)}' - CALLED!") + logger.debug(f"response[]='{type(response)}' - CALLED!") if not isinstance(response, requests.models.Response): raise ValueError(f"Parameter response[]='{type(response)}' is not type of 'Response'") data = list() if response.text.strip() != "": - # DEBUG: print(f"DEBUG: response.text()={len(response.text)} is not empty, invoking response.json() ...") + logger.debug(f"response.text()={len(response.text)} is not empty, invoking response.json() ...") try: data = response.json() except json.decoder.JSONDecodeError: pass - # DEBUG: print(f"DEBUG: data[]='{type(data)}' - EXIT!") + logger.debug(f"data[]='{type(data)}' - EXIT!") return data diff --git a/fba/models/blocks.py b/fba/models/blocks.py index 7e67979..7a09742 100644 --- a/fba/models/blocks.py +++ b/fba/models/blocks.py @@ -14,6 +14,8 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import logging + import time import validators @@ -22,8 +24,11 @@ from fba import fba from fba.helpers import blacklist from fba.helpers import tidyup +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + def update_reason(reason: str, blocker: str, blocked: str, block_level: str): - # DEBUG: print(f"DEBUG: reason='{reason}',blocker={blocker},blocked={blocked},block_level={block_level} - CALLED!") + logger.debug(f"reason='{reason}',blocker={blocker},blocked={blocked},block_level={block_level} - CALLED!") if not isinstance(reason, str) and reason is not None: raise ValueError(f"Parameter reason[]='{type(reason)}' is not 'str'") elif not isinstance(blocker, str): @@ -45,7 +50,7 @@ def update_reason(reason: str, blocker: str, blocked: str, block_level: str): elif block_level == "accept": raise ValueError("Accepted domains are not wanted here") - # DEBUG: print("DEBUG: Updating block reason:", reason, blocker, blocked, block_level) + logger.debug("Updating block reason:", reason, blocker, blocked, block_level) fba.cursor.execute( "UPDATE blocks SET reason = ?, last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ? AND (reason IS NULL OR reason = '') LIMIT 1", [ @@ -56,10 +61,10 @@ def update_reason(reason: str, blocker: str, blocked: str, block_level: str): block_level ]) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def update_last_seen(blocker: str, blocked: str, block_level: str): - # DEBUG: print("DEBUG: Updating last_seen for:", blocker, blocked, block_level) + logger.debug("Updating last_seen for:", blocker, blocked, block_level) if not isinstance(blocker, str): raise ValueError(f"Parameter blocker[]='{type(blocker)}' is not 'str'") elif blocker == "": @@ -88,10 +93,10 @@ def update_last_seen(blocker: str, blocked: str, block_level: str): block_level ]) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def is_instance_blocked(blocker: str, blocked: str, block_level: str) -> bool: - # DEBUG: print(f"DEBUG: blocker={blocker},blocked={blocked},block_level={block_level} - CALLED!") + logger.debug(f"blocker={blocker},blocked={blocked},block_level={block_level} - CALLED!") if not isinstance(blocker, str): raise ValueError(f"Parameter blocker[]='{type(blocker)}' is not of type 'str'") elif blocker == "": @@ -122,11 +127,11 @@ def is_instance_blocked(blocker: str, blocked: str, block_level: str) -> bool: is_blocked = fba.cursor.fetchone() is not None - # DEBUG: print(f"DEBUG: is_blocked='{is_blocked}' - EXIT!") + logger.debug(f"is_blocked='{is_blocked}' - EXIT!") return is_blocked def add_instance(blocker: str, blocked: str, reason: str, block_level: str): - # DEBUG: print("DEBUG: blocker,blocked,reason,block_level:", blocker, blocked, reason, block_level) + logger.debug("blocker,blocked,reason,block_level:", blocker, blocked, reason, block_level) if not isinstance(blocker, str): raise ValueError(f"Parameter blocker[]='{type(blocker)}' is not 'str'") elif blocker == "": @@ -158,7 +163,7 @@ def add_instance(blocker: str, blocked: str, reason: str, block_level: str): # Maybe needs cleaning reason = tidyup.reason(reason) - print(f"INFO: New block: blocker='{blocker}',blocked='{blocked}',reason='{reason}',block_level='{block_level}'") + logger.info(f"New block: blocker='{blocker}',blocked='{blocked}',reason='{reason}',block_level='{block_level}'") fba.cursor.execute( "INSERT INTO blocks (blocker, blocked, reason, block_level, first_seen, last_seen) VALUES (?, ?, ?, ?, ?, ?)", @@ -171,4 +176,4 @@ def add_instance(blocker: str, blocked: str, reason: str, block_level: str): time.time() ]) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") diff --git a/fba/models/error_log.py b/fba/models/error_log.py index b684408..4a71c06 100644 --- a/fba/models/error_log.py +++ b/fba/models/error_log.py @@ -13,6 +13,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import logging import json import time @@ -22,8 +23,11 @@ from fba import fba from fba.helpers import config +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + def add(domain: str, error: dict): - # DEBUG: print("DEBUG: domain,error[]:", domain, type(error)) + logger.debug("domain,error[]:", domain, type(error)) if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -37,14 +41,14 @@ def add(domain: str, error: dict): elif domain.endswith(".tld"): raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!") elif config.get("write_error_log").lower() != "true": - # DEBUG: print(f"DEBUG: Writing to error_log is disabled in configuruation file - EXIT!") + logger.debug(f"Writing to error_log is disabled in configuruation file - EXIT!") return - # DEBUG: print("DEBUG: BEFORE error[]:", type(error)) + logger.debug("BEFORE error[]:", type(error)) if isinstance(error, BaseException, error, json.decoder.JSONDecodeError): error = f"error[{type(error)}]='{str(error)}'" - # DEBUG: print("DEBUG: AFTER error[]:", type(error)) + logger.debug("AFTER error[]:", type(error)) if isinstance(error, str): fba.cursor.execute("INSERT INTO error_log (domain, error_code, error_message, created) VALUES (?, 999, ?, ?)",[ domain, @@ -60,7 +64,7 @@ def add(domain: str, error: dict): ]) # Cleanup old entries - # DEBUG: print(f"DEBUG: Purging old records (distance: {config.get('error_log_cleanup')})") + logger.debug(f"Purging old records (distance: {config.get('error_log_cleanup')})") fba.cursor.execute("DELETE FROM error_log WHERE created < ?", [time.time() - config.get("error_log_cleanup")]) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") diff --git a/fba/models/instances.py b/fba/models/instances.py index 0be35dd..efb8d9f 100644 --- a/fba/models/instances.py +++ b/fba/models/instances.py @@ -14,6 +14,7 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import logging import json import time @@ -31,6 +32,9 @@ from fba.http import network from fba.models import error_log +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + # Found info from node, such as nodeinfo URL, detection mode that needs to be # written to database. Both arrays must be filled at the same time or else # update_data() will fail @@ -57,7 +61,7 @@ _pending = { } def _set_data(key: str, domain: str, value: any): - # DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',value[]='{type(value)}' - CALLED!") + logger.debug(f"key='{key}',domain='{domain}',value[]='{type(value)}' - CALLED!") if not isinstance(key, str): raise ValueError("Parameter key[]='{type(key)}' is not 'str'") elif key == "": @@ -82,10 +86,10 @@ def _set_data(key: str, domain: str, value: any): # Set it _pending[key][domain] = value - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def has_pending(domain: str) -> bool: - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + logger.debug(f"domain='{domain}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -101,16 +105,16 @@ def has_pending(domain: str) -> bool: has = False for key in _pending: - # DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',_pending[key]()='{len(_pending[key])}'") + logger.debug(f"key='{key}',domain='{domain}',_pending[key]()='{len(_pending[key])}'") if domain in _pending[key]: has = True break - # DEBUG: print(f"DEBUG: has='{has}' - EXIT!") + logger.debug(f"has='{has}' - EXIT!") return has def update_data(domain: str): - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + logger.debug(f"domain='{domain}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -128,17 +132,17 @@ def update_data(domain: str): elif not is_registered(domain): raise Exception(f"domain='{domain}' cannot be updated while not being registered") - # DEBUG: print(f"DEBUG: Updating instance data for domain='{domain}' ...") + logger.debug(f"Updating instance data for domain='{domain}' ...") sql_string = "" fields = list() for key in _pending: - # DEBUG: print("DEBUG: key:", key) + logger.debug("key:", key) if domain in _pending[key]: - # DEBUG: print(f"DEBUG: Adding '{_pending[key][domain]}' for key='{key}' ...") + logger.debug(f"Adding '{_pending[key][domain]}' for key='{key}' ...") fields.append(_pending[key][domain]) sql_string += f" {key} = ?," - # DEBUG: print(f"DEBUG: sql_string()={len(sql_string)}") + logger.debug(f"sql_string()={len(sql_string)}") if sql_string == "": raise ValueError(f"No fields have been set, but method invoked, domain='{domain}'") @@ -148,30 +152,30 @@ def update_data(domain: str): # For WHERE statement fields.append(domain) - # DEBUG: print(f"DEBUG: sql_string='{sql_string}',fields()={len(fields)}") + logger.debug(f"sql_string='{sql_string}',fields()={len(fields)}") sql_string = "UPDATE instances SET" + sql_string + " last_updated = ? WHERE domain = ? LIMIT 1" - # DEBUG: print("DEBUG: sql_string:", sql_string) + logger.debug("sql_string:", sql_string) - # DEBUG: print("DEBUG: Executing SQL:", sql_string) + logger.debug("Executing SQL:", sql_string) fba.cursor.execute(sql_string, fields) - # DEBUG: print(f"DEBUG: Success! (rowcount={fba.cursor.rowcount })") + logger.debug(f"Success! (rowcount={fba.cursor.rowcount })") if fba.cursor.rowcount == 0: raise Exception(f"Did not update any rows: domain='{domain}',fields()={len(fields)}") - # DEBUG: print("DEBUG: Committing changes ...") + logger.debug("Committing changes ...") fba.connection.commit() - # DEBUG: print(f"DEBUG: Deleting _pending for domain='{domain}'") + logger.debug(f"Deleting _pending for domain='{domain}'") for key in _pending: - # DEBUG: print(f"DEBUG: domain='{domain}',key='{key}'") + logger.debug(f"domain='{domain}',key='{key}'") if domain in _pending[key]: del _pending[key][domain] - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def add(domain: str, origin: str, command: str, path: str = None, software: str = None): - # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',command='{command}',path='{path}',software='{software}' - CALLED!") + logger.debug(f"domain='{domain}',origin='{origin}',command='{command}',path='{path}',software='{software}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -215,20 +219,20 @@ def add(domain: str, origin: str, command: str, path: str = None, software: str if software is None: try: - # DEBUG: print("DEBUG: domain,origin,command,path:", domain, origin, command, path) + logger.debug("domain,origin,command,path:", domain, origin, command, path) software = federation.determine_software(domain, path) except network.exceptions as exception: - print(f"WARNING: Exception '{type(exception)}' during determining software type") + logger.warning(f"Exception '{type(exception)}' during determining software type") set_last_error(domain, exception) - # DEBUG: print("DEBUG: Determined software:", software) + logger.debug("Determined software:", software) if software == "lemmy" and domain.find("/c/") > 0: domain = domain.split("/c/")[0] if is_registered(domain): - print(f"WARNING: domain='{domain}' already registered after cutting off user part. - EXIT!") + logger.warning(f"domain='{domain}' already registered after cutting off user part. - EXIT!") return - print(f"INFO: Adding instance domain='{domain}' (origin='{origin}',software='{software}')") + logger.info(f"Adding instance domain='{domain}' (origin='{origin}',software='{software}')") fba.cursor.execute( "INSERT INTO instances (domain, origin, command, hash, software, first_seen) VALUES (?, ?, ?, ?, ?, ?)", ( @@ -241,17 +245,17 @@ def add(domain: str, origin: str, command: str, path: str = None, software: str ), ) - # DEBUG: print(f"DEBUG: Marking domain='{domain}' as registered.") + logger.debug(f"Marking domain='{domain}' as registered.") cache.set_sub_key("is_registered", domain, True) if has_pending(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo being updated ...") + logger.debug(f"domain='{domain}' has pending nodeinfo being updated ...") update_data(domain) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def set_last_nodeinfo(domain: str): - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + logger.debug(f"domain='{domain}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -265,17 +269,17 @@ def set_last_nodeinfo(domain: str): elif domain.endswith(".tld"): raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!") - # DEBUG: print("DEBUG: Updating last_nodeinfo for domain:", domain) + logger.debug("Updating last_nodeinfo for domain:", domain) _set_data("last_nodeinfo", domain, time.time()) # Running pending updated - # DEBUG: print(f"DEBUG: Invoking update_data({domain}) ...") + logger.debug(f"Invoking update_data({domain}) ...") update_data(domain) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def set_last_error(domain: str, error: dict): - # DEBUG: print("DEBUG: domain,error[]:", domain, type(error)) + logger.debug("domain,error[]:", domain, type(error)) if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -289,36 +293,36 @@ def set_last_error(domain: str, error: dict): elif domain.endswith(".tld"): raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!") - # DEBUG: print("DEBUG: BEFORE error[]:", type(error)) + logger.debug("BEFORE error[]:", type(error)) if isinstance(error, (BaseException, json.decoder.JSONDecodeError)): error = f"error[{type(error)}]='{str(error)}'" - # DEBUG: print("DEBUG: AFTER error[]:", type(error)) + logger.debug("AFTER error[]:", type(error)) if isinstance(error, str): - # DEBUG: print(f"DEBUG: Setting last_error_details='{error}'") + logger.debug(f"Setting last_error_details='{error}'") _set_data("last_status_code" , domain, 999) _set_data("last_error_details", domain, error if error != "" else None) elif isinstance(error, requests.models.Response): - # DEBUG: print(f"DEBUG: Setting last_error_details='{error.reason}'") + logger.debug(f"Setting last_error_details='{error.reason}'") _set_data("last_status_code" , domain, error.status_code) _set_data("last_error_details", domain, error.reason if error.reason != "" else None) elif not isinstance(error, dict): raise KeyError(f"Cannot handle keys in error[{type(error)}]='{error}'") elif "status_code" in error and "error_message" in error: - # DEBUG: print(f"DEBUG: Setting last_error_details='{error['error_message']}'") + logger.debug(f"Setting last_error_details='{error['error_message']}'") _set_data("last_status_code" , domain, error["status_code"]) _set_data("last_error_details", domain, error["error_message"] if error["error_message"] != "" else None) elif "json" in error and "error" in error["json"]: _set_data("last_status_code" , domain, error["status_code"]) _set_data("last_error_details", domain, error["json"]["error"] if error["json"]["error"] != "" else None) - # DEBUG: print(f"DEBUG: Invoking error_log.add(domain='{domain}',error[]='{type(error)}'") + logger.debug(f"Invoking error_log.add(domain='{domain}',error[]='{type(error)}'") error_log.add(domain, error) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def is_registered(domain: str) -> bool: - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + logger.debug(f"domain='{domain}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -332,9 +336,9 @@ def is_registered(domain: str) -> bool: elif domain.endswith(".tld"): raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!") - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + logger.debug(f"domain='{domain}' - CALLED!") if not cache.key_exists("is_registered"): - # DEBUG: print("DEBUG: Cache for 'is_registered' not initialized, fetching all rows ...") + logger.debug("Cache for 'is_registered' not initialized, fetching all rows ...") fba.cursor.execute("SELECT domain FROM instances") # Check Set all @@ -343,11 +347,11 @@ def is_registered(domain: str) -> bool: # Is cache found? registered = cache.sub_key_exists("is_registered", domain) - # DEBUG: print(f"DEBUG: registered='{registered}' - EXIT!") + logger.debug(f"registered='{registered}' - EXIT!") return registered def is_recent(domain: str) -> bool: - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + logger.debug(f"domain='{domain}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -361,7 +365,7 @@ def is_recent(domain: str) -> bool: elif domain.endswith(".tld"): raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!") elif not is_registered(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is not registered, returning False - EXIT!") + logger.debug(f"domain='{domain}' is not registered, returning False - EXIT!") return False # Query database @@ -370,14 +374,14 @@ def is_recent(domain: str) -> bool: # Fetch row fetched = fba.cursor.fetchone()[0] - # DEBUG: print(f"DEBUG: fetched[{type(fetched)}]='{fetched}'") + logger.debug(f"fetched[{type(fetched)}]='{fetched}'") recently = isinstance(fetched, float) and time.time() - fetched <= config.get("recheck_instance") - # DEBUG: print(f"DEBUG: recently='{recently}' - EXIT!") + logger.debug(f"recently='{recently}' - EXIT!") return recently def deobscure(char: str, domain: str, blocked_hash: str = None) -> tuple: - # DEBUG: print(f"DEBUG: char='{char}',domain='{domain}',blocked_hash='{blocked_hash}' - CALLED!") + logger.debug(f"char='{char}',domain='{domain}',blocked_hash='{blocked_hash}' - CALLED!") if not isinstance(char, str): raise ValueError(f"Parameter char[]='{type(char)}' is not 'str'") elif char == "": @@ -398,31 +402,31 @@ def deobscure(char: str, domain: str, blocked_hash: str = None) -> tuple: raise ValueError(f"Parameter blocked_hash[]='{type(blocked_hash)}' is not 'str'") if isinstance(blocked_hash, str): - # DEBUG: print(f"DEBUG: Looking up blocked_hash='{blocked_hash}' ...") + logger.debug(f"Looking up blocked_hash='{blocked_hash}' ...") fba.cursor.execute( "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash] ) row = fba.cursor.fetchone() - # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + logger.debug(f"row[]='{type(row)}'") if row is None: - # DEBUG: print(f"DEBUG: blocked_hash='{blocked_hash}' not found, trying domain='{domain}' ...") + logger.debug(f"blocked_hash='{blocked_hash}' not found, trying domain='{domain}' ...") return deobscure(char, domain) else: - # DEBUG: print(f"DEBUG: Looking up domain='{domain}' ...") + logger.debug(f"Looking up domain='{domain}' ...") fba.cursor.execute( "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [domain.replace(char, "_")] ) row = fba.cursor.fetchone() - # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + logger.debug(f"row[]='{type(row)}'") - # DEBUG: print(f"DEBUG: row[]='{type(row)}' - EXIT!") + logger.debug(f"row[]='{type(row)}' - EXIT!") return row def set_last_blocked(domain: str): - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + logger.debug(f"domain='{domain}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -438,10 +442,10 @@ def set_last_blocked(domain: str): # Set timestamp _set_data("last_blocked", domain, time.time()) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def set_last_instance_fetch(domain: str): - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + logger.debug(f"domain='{domain}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -457,10 +461,10 @@ def set_last_instance_fetch(domain: str): # Set timestamp _set_data("last_instance_fetch", domain, time.time()) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def set_total_peers(domain: str, peers: list): - # DEBUG: print(f"DEBUG: domain='{domain}',peers()={len(peers)} - CALLED!") + logger.debug(f"domain='{domain}',peers()={len(peers)} - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -478,10 +482,10 @@ def set_total_peers(domain: str, peers: list): # Set timestamp _set_data("total_peers", domain, len(peers)) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def set_nodeinfo_url(domain: str, url: str): - # DEBUG: print(f"DEBUG: domain='{domain}',url='{url}' - CALLED!") + logger.debug(f"domain='{domain}',url='{url}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -501,10 +505,10 @@ def set_nodeinfo_url(domain: str, url: str): # Set timestamp _set_data("nodeinfo_url", domain, url) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def set_detection_mode(domain: str, mode: str): - # DEBUG: print(f"DEBUG: domain='{domain}',mode='{mode}' - CALLED!") + logger.debug(f"domain='{domain}',mode='{mode}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -524,4 +528,4 @@ def set_detection_mode(domain: str, mode: str): # Set timestamp _set_data("detection_mode", domain, mode) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") diff --git a/fba/networks/friendica.py b/fba/networks/friendica.py index 13f3583..65e859e 100644 --- a/fba/networks/friendica.py +++ b/fba/networks/friendica.py @@ -14,6 +14,8 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import logging + import bs4 import validators @@ -25,8 +27,11 @@ from fba.http import network from fba.models import instances +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + def fetch_blocks(domain: str) -> dict: - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + logger.debug(f"domain='{domain}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -44,7 +49,7 @@ def fetch_blocks(domain: str) -> dict: block_tag = None try: - # DEBUG: print("DEBUG: Fetching friendica blocks from domain:", domain) + logger.debug("Fetching friendica blocks from domain:", domain) doc = bs4.BeautifulSoup( network.fetch_response( domain, @@ -54,55 +59,55 @@ def fetch_blocks(domain: str) -> dict: ).text, "html.parser", ) - # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'") + logger.debug(f"doc[]='{type(doc)}'") block_tag = doc.find(id="about_blocklist") except network.exceptions as exception: - print(f"WARNING: Exception '{type(exception)}' during fetching instances (friendica) from domain='{domain}'") + logger.warning(f"Exception '{type(exception)}' during fetching instances (friendica) from domain='{domain}'") instances.set_last_error(domain, exception) return dict() # Prevents exceptions: if block_tag is None: - # DEBUG: print("DEBUG: Instance has no block list:", domain) + logger.debug("Instance has no block list:", domain) return dict() table = block_tag.find("table") - # DEBUG: print(f"DEBUG: table[]='{type(table)}'") + logger.debug(f"table[]='{type(table)}'") if table.find("tbody"): rows = table.find("tbody").find_all("tr") else: rows = table.find_all("tr") - # DEBUG: print(f"DEBUG: Found rows()={len(rows)}") + logger.debug(f"Found rows()={len(rows)}") for line in rows: - # DEBUG: print(f"DEBUG: line='{line}'") + logger.debug(f"line='{line}'") blocked = tidyup.domain(line.find_all("td")[0].text) reason = tidyup.reason(line.find_all("td")[1].text) - # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}'") + logger.debug(f"blocked='{blocked}',reason='{reason}'") if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}' is not a valid domain - SKIPPED!") + logger.warning(f"blocked='{blocked}' is not a valid domain - SKIPPED!") continue elif blocked.endswith(".arpa"): - print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") + logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") continue elif blocked.endswith(".tld"): - print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!") + logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!") continue elif blacklist.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!") + logger.debug(f"blocked='{blocked}' is blacklisted - SKIPPED!") continue - # DEBUG: print(f"DEBUG: Appending blocked='{blocked}',reason='{reason}'") + logger.debug(f"Appending blocked='{blocked}',reason='{reason}'") blocklist.append({ "domain": tidyup.domain(blocked), "reason": tidyup.reason(reason) }) - # DEBUG: print("DEBUG: Next!") + logger.debug("Next!") - # DEBUG: print("DEBUG: Returning blocklist() for domain:", domain, len(blocklist)) + logger.debug("Returning blocklist() for domain:", domain, len(blocklist)) return { "reject": blocklist } diff --git a/fba/networks/lemmy.py b/fba/networks/lemmy.py index 3cd96d1..15817b6 100644 --- a/fba/networks/lemmy.py +++ b/fba/networks/lemmy.py @@ -15,6 +15,7 @@ # along with this program. If not, see . import inspect +import logging import bs4 import validators @@ -32,8 +33,11 @@ from fba.http import network from fba.models import blocks from fba.models import instances +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + def fetch_peers(domain: str) -> list: - # DEBUG: print(f"DEBUG: domain({len(domain)})='{domain}',software='lemmy' - CALLED!") + logger.debug(f"domain({len(domain)})='{domain}',software='lemmy' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -53,15 +57,15 @@ def fetch_peers(domain: str) -> list: headers = tuple() try: - # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'") + logger.debug(f"Checking CSRF for domain='{domain}'") headers = csrf.determine(domain, dict()) except network.exceptions as exception: - print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!") + logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!") instances.set_last_error(domain, exception) return peers try: - # DEBUG: print(f"DEBUG: domain='{domain}' is Lemmy, fetching JSON ...") + logger.debug(f"domain='{domain}' is Lemmy, fetching JSON ...") data = network.get_json_api( domain, "/api/v3/site", @@ -69,30 +73,30 @@ def fetch_peers(domain: str) -> list: (config.get("connection_timeout"), config.get("read_timeout")) ) - # DEBUG: print(f"DEBUG: data[]='{type(data)}'") + logger.debug(f"data[]='{type(data)}'") if "error_message" in data: - print("WARNING: Could not reach any JSON API:", domain) + logger.warning("Could not reach any JSON API:", domain) instances.set_last_error(domain, data) elif "federated_instances" in data["json"]: - # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'") + logger.debug(f"Found federated_instances for domain='{domain}'") peers = peers + federation.add_peers(data["json"]["federated_instances"]) - # DEBUG: print("DEBUG: Added instance(s) to peers") + logger.debug("Added instance(s) to peers") else: - print("WARNING: JSON response does not contain 'federated_instances':", domain) + logger.warning("JSON response does not contain 'federated_instances':", domain) instances.set_last_error(domain, data) except network.exceptions as exception: - print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(exception)}]:'{str(exception)}'") + logger.warning(f"Exception during fetching JSON: domain='{domain}',exception[{type(exception)}]:'{str(exception)}'") instances.set_last_error(domain, exception) - # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") + logger.debug(f"Adding '{len(peers)}' for domain='{domain}'") instances.set_total_peers(domain, peers) - # DEBUG: print("DEBUG: Returning peers[]:", type(peers)) + logger.debug("Returning peers[]:", type(peers)) return peers def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): - # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!") + logger.debug(f"domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -159,7 +163,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): "report_removal": [], } - # DEBUG: print(f"DEBUG: Fetching /instances from domain='{domain}'") + logger.debug(f"Fetching /instances from domain='{domain}'") response = network.fetch_response( domain, "/instances", @@ -167,56 +171,56 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): (config.get("connection_timeout"), config.get("read_timeout")) ) - # DEBUG: print(f"DEBUG: response.ok='{response.ok}',response.status_code={response.status_code},response.text()={len(response.text)}") + logger.debug(f"response.ok='{response.ok}',response.status_code={response.status_code},response.text()={len(response.text)}") if response.ok and response.status_code < 300 and response.text != "": - # DEBUG: print(f"DEBUG: Parsing {len(response.text)} Bytes ...") + logger.debug(f"Parsing {len(response.text)} Bytes ...") doc = bs4.BeautifulSoup(response.text, "html.parser") - # DEBUG: print(f"DEBUG: doc[]={type(doc)}") + logger.debug(f"doc[]={type(doc)}") headers = doc.findAll("h5") found = None - # DEBUG: print(f"DEBUG: Search in {len(headers)} header(s) ...") + logger.debug(f"Search in {len(headers)} header(s) ...") for header in headers: - # DEBUG: print(f"DEBUG: header[]={type(header)}") + logger.debug(f"header[]={type(header)}") content = header.contents[0] - # DEBUG: print(f"DEBUG: content='{content}'") + logger.debug(f"content='{content}'") if content in translations: - # DEBUG: print("DEBUG: Found header with blocked instances - BREAK!") + logger.debug("Found header with blocked instances - BREAK!") found = header break - # DEBUG: print(f"DEBUG: found[]='{type(found)}'") + logger.debug(f"found[]='{type(found)}'") if found is None: - # DEBUG: print(f"DEBUG: domain='{domain}' is not blocking any instances - EXIT!") + logger.debug(f"domain='{domain}' is not blocking any instances - EXIT!") return blocking = found.find_next("ul").findAll("a") - # DEBUG: print(f"DEBUG: Found {len(blocking)} blocked instance(s) ...") + logger.debug(f"Found {len(blocking)} blocked instance(s) ...") for tag in blocking: - # DEBUG: print(f"DEBUG: tag[]='{type(tag)}'") + logger.debug(f"tag[]='{type(tag)}'") blocked = tidyup.domain(tag.contents[0]) - # DEBUG: print(f"DEBUG: blocked='{blocked}'") + logger.debug(f"blocked='{blocked}'") if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}' is not a valid domain - SKIPPED!") + logger.warning(f"blocked='{blocked}' is not a valid domain - SKIPPED!") continue elif blocked.endswith(".arpa"): - print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") + logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") continue elif blocked.endswith(".tld"): - print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!") + logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!") continue elif blacklist.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!") + logger.debug(f"blocked='{blocked}' is blacklisted - SKIPPED!") continue elif not instances.is_registered(blocked): - # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain) + logger.debug("Hash wasn't found, adding:", blocked, domain) instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) if not blocks.is_instance_blocked(domain, blocked, "reject"): - # DEBUG: print("DEBUG: Blocking:", domain, blocked) + logger.debug("Blocking:", domain, blocked) blocks.add_instance(domain, blocked, None, "reject") found_blocks.append({ @@ -224,13 +228,13 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): "reason" : None }) else: - # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...") + logger.debug(f"Updating block last seen for domain='{domain}',blocked='{blocked}' ...") blocks.update_last_seen(domain, blocked, "reject") - # DEBUG: print("DEBUG: Committing changes ...") + logger.debug("Committing changes ...") fba.connection.commit() except network.exceptions as exception: - print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'") + logger.warning(f"domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'") instances.set_last_error(domain, exception) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") diff --git a/fba/networks/mastodon.py b/fba/networks/mastodon.py index a6fccd0..2287ea7 100644 --- a/fba/networks/mastodon.py +++ b/fba/networks/mastodon.py @@ -15,6 +15,7 @@ # along with this program. If not, see . import inspect +import logging import bs4 import validators @@ -31,6 +32,10 @@ from fba.http import network from fba.models import blocks from fba.models import instances +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Language mapping X -> English language_mapping = { # English -> English "Silenced instances" : "Silenced servers", @@ -58,7 +63,7 @@ language_mapping = { } def fetch_blocks_from_about(domain: str) -> dict: - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + logger.debug(f"domain='{domain}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -72,11 +77,11 @@ def fetch_blocks_from_about(domain: str) -> dict: elif domain.endswith(".tld"): raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!") - # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain) + logger.debug("Fetching mastodon blocks from domain:", domain) doc = None for path in ["/about/more", "/about"]: try: - # DEBUG: print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...") + logger.debug(f"Fetching path='{path}' from domain='{domain}' ...") doc = bs4.BeautifulSoup( network.fetch_response( domain, @@ -88,11 +93,11 @@ def fetch_blocks_from_about(domain: str) -> dict: ) if len(doc.find_all("h3")) > 0: - # DEBUG: print(f"DEBUG: path='{path}' had some headlines - BREAK!") + logger.debug(f"path='{path}' had some headlines - BREAK!") break except network.exceptions as exception: - print(f"ERROR: Cannot fetch from domain='{domain}',exception='{type(exception)}'") + logger.warning(f"Cannot fetch from domain='{domain}',exception='{type(exception)}'") instances.set_last_error(domain, exception) break @@ -103,20 +108,20 @@ def fetch_blocks_from_about(domain: str) -> dict: "Silenced servers" : [], } - # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'") + logger.debug(f"doc[]='{type(doc)}'") if doc is None: - print(f"WARNING: Cannot fetch any /about pages for domain='{domain}' - EXIT!") + logger.warning(f"Cannot fetch any /about pages for domain='{domain}' - EXIT!") return blocklist for header in doc.find_all("h3"): header_text = tidyup.reason(header.text) - # DEBUG: print(f"DEBUG: header_text='{header_text}'") + logger.debug(f"header_text='{header_text}'") if header_text in language_mapping: - # DEBUG: print(f"DEBUG: header_text='{header_text}'") + logger.debug(f"header_text='{header_text}'") header_text = language_mapping[header_text] else: - print(f"WARNING: header_text='{header_text}' not found in language mapping table") + logger.warning(f"header_text='{header_text}' not found in language mapping table") if header_text in blocklist or header_text.lower() in blocklist: # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu @@ -127,9 +132,9 @@ def fetch_blocks_from_about(domain: str) -> dict: "reason": tidyup.reason(line.find_all("td")[1].text), }) else: - print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}") + logger.warning(f"header_text='{header_text}' not found in blocklist()={len(blocklist)}") - # DEBUG: print("DEBUG: Returning blocklist for domain:", domain) + logger.debug("Returning blocklist for domain:", domain) return { "reject" : blocklist["Suspended servers"], "media_removal" : blocklist["Filtered media"], @@ -137,7 +142,7 @@ def fetch_blocks_from_about(domain: str) -> dict: } def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): - # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!") + logger.debug(f"domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -163,10 +168,10 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): headers = tuple() try: - # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'") + logger.debug(f"Checking CSRF for domain='{domain}'") headers = csrf.determine(domain, dict()) except network.exceptions as exception: - print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!") + logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!") instances.set_last_error(domain, exception) return @@ -182,7 +187,7 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): "report_removal": [], } - # DEBUG: print("DEBUG: Querying API domain_blocks:", domain) + logger.debug("Querying API domain_blocks:", domain) data = network.get_json_api( domain, "/api/v1/instance/domain_blocks", @@ -190,13 +195,13 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): (config.get("connection_timeout"), config.get("read_timeout")) ) - # DEBUG: print(f"DEBUG: data[]='{type(data)}'") + logger.debug(f"data[]='{type(data)}'") if "error_message" in data: - # DEBUG: print(f"DEBUG: Was not able to fetch domain_blocks from domain='{domain}': status_code='{data['status_code']}',error_message='{data['error_message']}'") + logger.debug(f"Was not able to fetch domain_blocks from domain='{domain}': status_code='{data['status_code']}',error_message='{data['error_message']}'") instances.set_last_error(domain, data) return elif "json" in data and "error" in data["json"]: - print(f"WARNING: JSON API returned error message: '{data['json']['error']}'") + logger.warning(f"JSON API returned error message: '{data['json']['error']}'") instances.set_last_error(domain, data) return else: @@ -204,79 +209,79 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): blocklist = data["json"] if len(blocklist) > 0: - print(f"INFO: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon' ...") + logger.info(f"Checking {len(blocklist)} entries from domain='{domain}',software='mastodon' ...") for block in blocklist: # Check type - # DEBUG: print(f"DEBUG: block[]='{type(block)}'") + logger.debug(f"block[]='{type(block)}'") if not isinstance(block, dict): - # DEBUG: print(f"DEBUG: block[]='{type(block)}' is of type 'dict' - SKIPPED!") + logger.debug(f"block[]='{type(block)}' is of type 'dict' - SKIPPED!") continue # Map block -> entry - # DEBUG: print(f"DEBUG: block[{type(block)}]='{block}'") + logger.debug(f"block[{type(block)}]='{block}'") entry = { "domain": block["domain"], "hash" : block["digest"], "reason": block["comment"] if "comment" in block else None } - # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment']) + logger.debug("severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment']) if block['severity'] == 'suspend': - # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") + logger.debug(f"Adding entry='{entry}' with severity='{block['severity']}' ...") rows['reject'].append(entry) elif block['severity'] == 'silence': - # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") + logger.debug(f"Adding entry='{entry}' with severity='{block['severity']}' ...") rows['followers_only'].append(entry) elif block['severity'] == 'reject_media': - # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") + logger.debug(f"Adding entry='{entry}' with severity='{block['severity']}' ...") rows['media_removal'].append(entry) elif block['severity'] == 'reject_reports': - # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") + logger.debug(f"Adding entry='{entry}' with severity='{block['severity']}' ...") rows['report_removal'].append(entry) else: - print(f"WARNING: Unknown severity='{block['severity']}', domain='{block['domain']}'") + logger.warning(f"Unknown severity='{block['severity']}', domain='{block['domain']}'") else: - # DEBUG: print(f"DEBUG: domain='{domain}' has returned zero rows, trying /about/more page ...") + logger.debug(f"domain='{domain}' has returned zero rows, trying /about/more page ...") rows = fetch_blocks_from_about(domain) - print(f"INFO: Checking {len(rows.items())} entries from domain='{domain}',software='mastodon' ...") + logger.info(f"Checking {len(rows.items())} entries from domain='{domain}',software='mastodon' ...") for block_level, blocklist in rows.items(): - # DEBUG: print("DEBUG: domain,block_level,blocklist():", domain, block_level, len(blocklist)) + logger.debug("domain,block_level,blocklist():", domain, block_level, len(blocklist)) block_level = tidyup.domain(block_level) - # DEBUG: print("DEBUG: AFTER-block_level:", block_level) + logger.debug("AFTER-block_level:", block_level) if block_level == "": - print("WARNING: block_level is empty, domain:", domain) + logger.warning("block_level is empty, domain:", domain) continue elif block_level == "accept": - # DEBUG: print(f"DEBUG: domain='{domain}' skipping block_level='accept'") + logger.debug(f"domain='{domain}' skipping block_level='accept'") continue - # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon',block_level='{block_level}' ...") + logger.debug(f"Checking {len(blocklist)} entries from domain='{domain}',software='mastodon',block_level='{block_level}' ...") for block in blocklist: - # DEBUG: print(f"DEBUG: block[]='{type(block)}'") + logger.debug(f"block[]='{type(block)}'") blocked, blocked_hash, reason = block.values() - # DEBUG: print(f"DEBUG: blocked='{blocked}',blocked_hash='{blocked_hash}',reason='{reason}':") + logger.debug(f"blocked='{blocked}',blocked_hash='{blocked_hash}',reason='{reason}':") blocked = tidyup.domain(blocked) reason = tidyup.reason(reason) if reason is not None and reason != "" else None - # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!") + logger.debug(f"blocked='{blocked}',reason='{reason}' - AFTER!") if blocked == "": - print("WARNING: blocked is empty:", domain) + logger.warning("blocked is empty:", domain) continue elif blacklist.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + logger.debug(f"blocked='{blocked}' is blacklisted - skipping!") continue elif blocked.count("*") > 0: # Doing the hash search for instance names as well to tidy up DB row = instances.deobscure("*", blocked, blocked_hash) - # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + logger.debug(f"row[]='{type(row)}'") if row is None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!") + logger.warning(f"Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!") continue - # DEBUG: print("DEBUG: Updating domain: ", row[0]) + logger.debug("Updating domain: ", row[0]) blocked = row[0] origin = row[1] nodeinfo_url = row[2] @@ -284,52 +289,52 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): # Doing the hash search for instance names as well to tidy up DB row = instances.deobscure("?", blocked, blocked_hash) - # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + logger.debug(f"row[]='{type(row)}'") if row is None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!") + logger.warning(f"Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!") continue - # DEBUG: print("DEBUG: Updating domain: ", row[0]) + logger.debug("Updating domain: ", row[0]) blocked = row[0] origin = row[1] nodeinfo_url = row[2] - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + logger.debug("Looking up instance by domain:", blocked) if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!") + logger.warning(f"blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!") continue elif blocked.endswith(".arpa"): - print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") + logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") continue elif blocked.endswith(".tld"): - print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!") + logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!") continue elif blacklist.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!") + logger.debug(f"blocked='{blocked}' is blacklisted - SKIPPED!") continue elif not instances.is_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + logger.debug(f"Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + logger.debug("Looking up instance by domain:", blocked) if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!") + logger.warning(f"blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!") continue elif blocked.endswith(".arpa"): - print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") + logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") continue elif blocked.endswith(".tld"): - print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!") + logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!") continue elif blacklist.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!") + logger.debug(f"blocked='{blocked}' is blacklisted - SKIPPED!") continue elif not instances.is_registered(blocked): - # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain) + logger.debug("Hash wasn't found, adding:", blocked, domain) instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) if not blocks.is_instance_blocked(domain, blocked, block_level): - # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level) + logger.debug("Blocking:", domain, blocked, block_level) blocks.add_instance(domain, blocked, reason, block_level) if block_level == "reject": @@ -338,14 +343,14 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): "reason" : reason }) else: - # DEBUG: print(f"DEBUG: Updating block last seen and reason for domain='{domain}',blocked='{blocked}' ...") + logger.debug(f"Updating block last seen and reason for domain='{domain}',blocked='{blocked}' ...") blocks.update_last_seen(domain, blocked, block_level) blocks.update_reason(reason, domain, blocked, block_level) - # DEBUG: print("DEBUG: Committing changes ...") + logger.debug("Committing changes ...") fba.connection.commit() except network.exceptions as exception: - print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'") + logger.warning(f"domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'") instances.set_last_error(domain, exception) - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") diff --git a/fba/networks/misskey.py b/fba/networks/misskey.py index f39c649..10c53e5 100644 --- a/fba/networks/misskey.py +++ b/fba/networks/misskey.py @@ -15,6 +15,7 @@ # along with this program. If not, see . import json +import logging import validators from fba import csrf @@ -28,8 +29,11 @@ from fba.http import network from fba.models import instances +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + def fetch_peers(domain: str) -> list: - # DEBUG: print(f"DEBUG: domain({len(domain)})='{domain}' - CALLED!") + logger.debug(f"domain({len(domain)})='{domain}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -43,7 +47,7 @@ def fetch_peers(domain: str) -> list: elif domain.endswith(".tld"): raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!") - # DEBUG: print(f"DEBUG: domain='{domain}' is misskey, sending API POST request ...") + logger.debug(f"domain='{domain}' is misskey, sending API POST request ...") peers = list() offset = 0 step = config.get("misskey_limit") @@ -52,10 +56,10 @@ def fetch_peers(domain: str) -> list: headers = tuple() try: - # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'") + logger.debug(f"Checking CSRF for domain='{domain}'") headers = csrf.determine(domain, dict()) except network.exceptions as exception: - print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!") + logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!") instances.set_last_error(domain, exception) return peers @@ -63,7 +67,7 @@ def fetch_peers(domain: str) -> list: # instances page-by-page, since that troonware doesn't support # sending them all at once while True: - # DEBUG: print(f"DEBUG: Fetching offset='{offset}' from '{domain}' ...") + logger.debug(f"Fetching offset='{offset}' from '{domain}' ...") if offset == 0: fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({ "sort" : "+pubAt", @@ -79,71 +83,71 @@ def fetch_peers(domain: str) -> list: }), headers) # Check records - # DEBUG: print(f"DEBUG: fetched[]='{type(fetched)}'") + logger.debug(f"fetched[]='{type(fetched)}'") if "error_message" in fetched: - print(f"WARNING: post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}") + logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}") instances.set_last_error(domain, fetched) break elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]: - print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}") + logger.warning(f"post_json_api() returned error: {fetched['error']['message']}") instances.set_last_error(domain, fetched["json"]["error"]["message"]) break rows = fetched["json"] - # DEBUG: print(f"DEBUG: rows()={len(rows)}") + logger.debug(f"rows()={len(rows)}") if len(rows) == 0: - # DEBUG: print(f"DEBUG: Returned zero bytes, exiting loop, domain='{domain}'") + logger.debug(f"Returned zero bytes, exiting loop, domain='{domain}'") break elif len(rows) != config.get("misskey_limit"): - # DEBUG: print(f"DEBUG: Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'") + logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'") offset = offset + (config.get("misskey_limit") - len(rows)) else: - # DEBUG: print(f"DEBUG: Raising offset by step={step}") + logger.debug(f"Raising offset by step={step}") offset = offset + step already = 0 - # DEBUG: print(f"DEBUG: rows({len(rows)})[]='{type(rows)}'") + logger.debug(f"rows({len(rows)})[]='{type(rows)}'") for row in rows: - # DEBUG: print(f"DEBUG: row()={len(row)}") + logger.debug(f"row()={len(row)}") if "host" not in row: - print(f"WARNING: row()={len(row)} does not contain key 'host': {row},domain='{domain}'") + logger.warning(f"row()={len(row)} does not contain key 'host': {row},domain='{domain}'") continue elif not isinstance(row["host"], str): - print(f"WARNING: row[host][]='{type(row['host'])}' is not 'str' - SKIPPED!") + logger.warning(f"row[host][]='{type(row['host'])}' is not 'str' - SKIPPED!") continue elif not validators.domain(row["host"].split("/")[0]): - print(f"WARNING: row[host]='{row['host']}' is not a valid domain - SKIPPED!") + logger.warning(f"row[host]='{row['host']}' is not a valid domain - SKIPPED!") continue elif row["host"].endswith(".arpa"): - print(f"WARNING: row[host]='{row['host']}' is a domain for reversed IP addresses - SKIPPED!") + logger.warning(f"row[host]='{row['host']}' is a domain for reversed IP addresses - SKIPPED!") continue elif row["host"].endswith(".tld"): - print(f"WARNING: row[host]='{row['host']}' is a fake domain - SKIPPED!") + logger.warning(f"row[host]='{row['host']}' is a fake domain - SKIPPED!") continue elif blacklist.is_blacklisted(row["host"]): - # DEBUG: print(f"DEBUG: row[host]='{row['host']}' is blacklisted. domain='{domain}' - SKIPPED!") + logger.debug(f"row[host]='{row['host']}' is blacklisted. domain='{domain}' - SKIPPED!") continue elif row["host"] in peers: - # DEBUG: print(f"DEBUG: Not adding row[host]='{row['host']}', already found.") + logger.debug(f"Not adding row[host]='{row['host']}', already found.") already = already + 1 continue - # DEBUG: print(f"DEBUG: Adding peer: '{row['host']}'") + logger.debug(f"Adding peer: '{row['host']}'") peers.append(row["host"]) if already == len(rows): - # DEBUG: print(f"DEBUG: Host returned same set of '{already}' instances, aborting loop!") + logger.debug(f"Host returned same set of '{already}' instances, aborting loop!") break - # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") + logger.debug(f"Adding '{len(peers)}' for domain='{domain}'") instances.set_total_peers(domain, peers) - # DEBUG: print(f"DEBUG: Returning peers[]='{type(peers)}'") + logger.debug(f"Returning peers[]='{type(peers)}'") return peers def fetch_blocks(domain: str) -> dict: - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + logger.debug(f"domain='{domain}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -157,7 +161,7 @@ def fetch_blocks(domain: str) -> dict: elif domain.endswith(".tld"): raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!") - # DEBUG: print(f"DEBUG: Fetching misskey blocks from domain='{domain}'") + logger.debug(f"Fetching misskey blocks from domain='{domain}'") blocklist = { "suspended": [], "blocked" : [] @@ -170,10 +174,10 @@ def fetch_blocks(domain: str) -> dict: headers = tuple() try: - # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'") + logger.debug(f"Checking CSRF for domain='{domain}'") headers = csrf.determine(domain, dict()) except network.exceptions as exception: - print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!") + logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!") instances.set_last_error(domain, exception) return blocklist @@ -181,9 +185,9 @@ def fetch_blocks(domain: str) -> dict: # instances page-by-page since it doesn't support sending them all at once while True: try: - # DEBUG: print(f"DEBUG: Fetching offset='{offset}' from '{domain}' ...") + logger.debug(f"Fetching offset='{offset}' from '{domain}' ...") if offset == 0: - # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset) + logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset) fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({ "sort" : "+pubAt", "host" : None, @@ -191,7 +195,7 @@ def fetch_blocks(domain: str) -> dict: "limit" : step }), headers) else: - # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset) + logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset) fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({ "sort" : "+pubAt", "host" : None, @@ -200,33 +204,33 @@ def fetch_blocks(domain: str) -> dict: "offset" : offset - 1 }), headers) - # DEBUG: print(f"DEBUG: fetched[]='{type(fetched)}'") + logger.debug(f"fetched[]='{type(fetched)}'") if "error_message" in fetched: - print(f"WARNING: post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}") + logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}") instances.set_last_error(domain, fetched) break elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]: - print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}") + logger.warning(f"post_json_api() returned error: {fetched['error']['message']}") instances.set_last_error(domain, fetched["json"]["error"]["message"]) break rows = fetched["json"] - # DEBUG: print(f"DEBUG: rows({len(rows)})={rows} - suspend") + logger.debug(f"rows({len(rows)})={rows} - suspend") if len(rows) == 0: - # DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain) + logger.debug("Returned zero bytes, exiting loop:", domain) break elif len(rows) != config.get("misskey_limit"): - # DEBUG: print(f"DEBUG: Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'") + logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'") offset = offset + (config.get("misskey_limit") - len(rows)) else: - # DEBUG: print("DEBUG: Raising offset by step:", step) + logger.debug("Raising offset by step:", step) offset = offset + step count = 0 for instance in rows: # Is it there? - # DEBUG: print(f"DEBUG: instance[{type(instance)}]='{instance}' - suspend") + logger.debug(f"instance[{type(instance)}]='{instance}' - suspend") if "isSuspended" in instance and instance["isSuspended"] and not dicts.has_key(blocklist["suspended"], "domain", instance["host"]): count = count + 1 blocklist["suspended"].append({ @@ -235,13 +239,13 @@ def fetch_blocks(domain: str) -> dict: "reason": None }) - # DEBUG: print(f"DEBUG: count={count}") + logger.debug(f"count={count}") if count == 0: - # DEBUG: print("DEBUG: API is no more returning new instances, aborting loop!") + logger.debug("API is no more returning new instances, aborting loop!") break except network.exceptions as exception: - print(f"WARNING: Caught error, exiting loop: domain='{domain}',exception[{type(exception)}]='{str(exception)}'") + logger.warning(f"Caught error, exiting loop: domain='{domain}',exception[{type(exception)}]='{str(exception)}'") instances.set_last_error(domain, exception) offset = 0 break @@ -250,7 +254,7 @@ def fetch_blocks(domain: str) -> dict: # Fetch blocked (full suspended) instances try: if offset == 0: - # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset) + logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset) fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({ "sort" : "+pubAt", "host" : None, @@ -258,7 +262,7 @@ def fetch_blocks(domain: str) -> dict: "limit" : step }), headers) else: - # DEBUG: print("DEBUG: Sending JSON API request to domain,step,offset:", domain, step, offset) + logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset) fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({ "sort" : "+pubAt", "host" : None, @@ -267,33 +271,33 @@ def fetch_blocks(domain: str) -> dict: "offset" : offset - 1 }), headers) - # DEBUG: print(f"DEBUG: fetched[]='{type(fetched)}'") + logger.debug(f"fetched[]='{type(fetched)}'") if "error_message" in fetched: - print(f"WARNING: post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}") + logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}") instances.set_last_error(domain, fetched) break elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]: - print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}") + logger.warning(f"post_json_api() returned error: {fetched['error']['message']}") instances.set_last_error(domain, fetched["json"]["error"]["message"]) break rows = fetched["json"] - # DEBUG: print(f"DEBUG: rows({len(rows)})={rows} - blocked") + logger.debug(f"rows({len(rows)})={rows} - blocked") if len(rows) == 0: - # DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain) + logger.debug("Returned zero bytes, exiting loop:", domain) break elif len(rows) != config.get("misskey_limit"): - # DEBUG: print(f"DEBUG: Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'") + logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'") offset = offset + (config.get("misskey_limit") - len(rows)) else: - # DEBUG: print("DEBUG: Raising offset by step:", step) + logger.debug("Raising offset by step:", step) offset = offset + step count = 0 for instance in rows: # Is it there? - # DEBUG: print(f"DEBUG: instance[{type(instance)}]='{instance}' - blocked") + logger.debug(f"instance[{type(instance)}]='{instance}' - blocked") if "isBlocked" in instance and instance["isBlocked"] and not dicts.has_key(blocklist["blocked"], "domain", instance["host"]): count = count + 1 blocklist["blocked"].append({ @@ -301,18 +305,18 @@ def fetch_blocks(domain: str) -> dict: "reason": None }) - # DEBUG: print(f"DEBUG: count={count}") + logger.debug(f"count={count}") if count == 0: - # DEBUG: print("DEBUG: API is no more returning new instances, aborting loop!") + logger.debug("API is no more returning new instances, aborting loop!") break except network.exceptions as exception: - print(f"WARNING: Caught error, exiting loop: domain='{domain}',exception[{type(exception)}]='{str(exception)}'") + logger.warning(f"Caught error, exiting loop: domain='{domain}',exception[{type(exception)}]='{str(exception)}'") instances.set_last_error(domain, exception) offset = 0 break - # DEBUG: print(f"DEBUG: Returning for domain='{domain}',blocked()={len(blocklist['blocked'])},suspended()={len(blocklist['suspended'])}") + logger.debug(f"Returning for domain='{domain}',blocked()={len(blocklist['blocked'])},suspended()={len(blocklist['suspended'])}") return { "reject" : blocklist["blocked"], "followers_only": blocklist["suspended"] diff --git a/fba/networks/peertube.py b/fba/networks/peertube.py index a43315f..4ee7015 100644 --- a/fba/networks/peertube.py +++ b/fba/networks/peertube.py @@ -14,6 +14,8 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import logging + import validators from fba import csrf @@ -24,8 +26,11 @@ from fba.http import network from fba.models import instances +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + def fetch_peers(domain: str) -> list: - # DEBUG: print(f"DEBUG: domain({len(domain)})='{domain}',software='peertube' - CALLED!") + logger.debug(f"domain({len(domain)})='{domain}',software='peertube' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -39,7 +44,7 @@ def fetch_peers(domain: str) -> list: elif domain.endswith(".tld"): raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!") - # DEBUG: print(f"DEBUG: domain='{domain}' is a PeerTube, fetching JSON ...") + logger.debug(f"domain='{domain}' is a PeerTube, fetching JSON ...") peers = list() start = 0 @@ -47,15 +52,15 @@ def fetch_peers(domain: str) -> list: headers = tuple() try: - # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'") + logger.debug(f"Checking CSRF for domain='{domain}'") headers = csrf.determine(domain, dict()) except network.exceptions as exception: - print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!") + logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!") instances.set_last_error(domain, exception) return peers for mode in ["followers", "following"]: - # DEBUG: print(f"DEBUG: domain='{domain}',mode='{mode}'") + logger.debug(f"domain='{domain}',mode='{mode}'") while True: data = network.get_json_api( domain, @@ -64,35 +69,35 @@ def fetch_peers(domain: str) -> list: (config.get("connection_timeout"), config.get("read_timeout")) ) - # DEBUG: print(f"DEBUG: data[]='{type(data)}'") + logger.debug(f"data[]='{type(data)}'") if "error_message" not in data: - # DEBUG: print(f"DEBUG: Success, data[json]()={len(data['json'])}") + logger.debug(f"Success, data[json]()={len(data['json'])}") if "data" in data["json"]: rows = data["json"]["data"] - # DEBUG: print(f"DEBUG: Found {len(rows)} record(s).") + logger.debug(f"Found {len(rows)} record(s).") for record in rows: - # DEBUG: print(f"DEBUG: record()={len(record)}") + logger.debug(f"record()={len(record)}") for mode2 in ["follower", "following" ]: - # DEBUG: print(f"DEBUG: mode2='{mode2}'") + logger.debug(f"mode2='{mode2}'") if mode2 in record and "host" in record[mode2]: - # DEBUG: print(f"DEBUG: Found host='{record[mode2]['host']}', adding ...") + logger.debug(f"Found host='{record[mode2]['host']}', adding ...") peers.append(record[mode2]["host"]) else: - print(f"WARNING: record from '{domain}' has no '{mode2}' or 'host' record: {record}") + logger.warning(f"record from '{domain}' has no '{mode2}' or 'host' record: {record}") if len(rows) < 100: - # DEBUG: print(f"DEBUG: Reached end of JSON response, domain='{domain}'") + logger.debug(f"Reached end of JSON response, domain='{domain}'") break # Continue with next row start = start + 100 else: - print(f"WARNING: domain='{domain}' causes error during API query: '{data['error_message']}' - SKIPPED!") + logger.warning(f"domain='{domain}' causes error during API query: '{data['error_message']}' - SKIPPED!") break - # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'") + logger.debug(f"Adding '{len(peers)}' for domain='{domain}'") instances.set_total_peers(domain, peers) - # DEBUG: print(f"DEBUG: Returning peers[]='{type(peers)}'") + logger.debug(f"Returning peers[]='{type(peers)}'") return peers diff --git a/fba/networks/pleroma.py b/fba/networks/pleroma.py index 8ba2f65..b87c8db 100644 --- a/fba/networks/pleroma.py +++ b/fba/networks/pleroma.py @@ -15,6 +15,7 @@ # along with this program. If not, see . import inspect +import logging import bs4 import validators @@ -31,13 +32,17 @@ from fba.http import network from fba.models import blocks from fba.models import instances +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Language mapping X -> English language_mapping = { # English -> English "Reject": "Suspended servers", } def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): - # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!") + logger.debug(f"domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -63,28 +68,28 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): blockdict = list() rows = None try: - # DEBUG: print(f"DEBUG: Fetching nodeinfo: domain='{domain}',nodeinfo_url='{nodeinfo_url}'") + logger.debug(f"Fetching nodeinfo: domain='{domain}',nodeinfo_url='{nodeinfo_url}'") rows = federation.fetch_nodeinfo(domain, nodeinfo_url) except network.exceptions as exception: - print(f"WARNING: Exception '{type(exception)}' during fetching nodeinfo") + logger.warning(f"Exception '{type(exception)}' during fetching nodeinfo") instances.set_last_error(domain, exception) if rows is None: - print("WARNING: Could not fetch nodeinfo from domain:", domain) + logger.warning("Could not fetch nodeinfo from domain:", domain) return elif "metadata" not in rows: - print(f"WARNING: rows()={len(rows)} does not have key 'metadata', domain='{domain}'") + logger.warning(f"rows()={len(rows)} does not have key 'metadata', domain='{domain}'") return elif "federation" not in rows["metadata"]: - print(f"WARNING: rows()={len(rows['metadata'])} does not have key 'federation', domain='{domain}'") + logger.warning(f"rows()={len(rows['metadata'])} does not have key 'federation', domain='{domain}'") return data = rows["metadata"]["federation"] found = False - # DEBUG: print(f"DEBUG: data[]='{type(data)}'") + logger.debug(f"data[]='{type(data)}'") if "mrf_simple" in data: - # DEBUG: print("DEBUG: Found mrf_simple:", domain) + logger.debug("Found mrf_simple:", domain) found = True for block_level, blocklist in ( { @@ -94,40 +99,40 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): } } ).items(): - # DEBUG: print("DEBUG: block_level, blocklist():", block_level, len(blocklist)) + logger.debug("block_level, blocklist():", block_level, len(blocklist)) block_level = tidyup.domain(block_level) - # DEBUG: print("DEBUG: BEFORE block_level:", block_level) + logger.debug("BEFORE block_level:", block_level) if block_level == "": - print("WARNING: block_level is now empty!") + logger.warning("block_level is now empty!") continue elif block_level == "accept": - # DEBUG: print(f"DEBUG: domain='{domain}' skipping block_level='accept'") + logger.debug(f"domain='{domain}' skipping block_level='accept'") continue - # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',block_level='{block_level}' ...") + logger.debug(f"Checking {len(blocklist)} entries from domain='{domain}',block_level='{block_level}' ...") if len(blocklist) > 0: for blocked in blocklist: - # DEBUG: print("DEBUG: BEFORE blocked:", blocked) + logger.debug("BEFORE blocked:", blocked) blocked = tidyup.domain(blocked) - # DEBUG: print("DEBUG: AFTER blocked:", blocked) + logger.debug("AFTER blocked:", blocked) if blocked == "": - print("WARNING: blocked is empty after tidyup.domain():", domain, block_level) + logger.warning("blocked is empty after tidyup.domain():", domain, block_level) continue elif blacklist.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + logger.debug(f"blocked='{blocked}' is blacklisted - skipping!") continue elif blocked.count("*") > 0: # Obscured domain name with no hash row = instances.deobscure("*", blocked) - # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + logger.debug(f"row[]='{type(row)}'") if row is None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") + logger.warning(f"Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") continue - # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'") + logger.debug(f"blocked='{blocked}' de-obscured to '{row[0]}'") blocked = row[0] origin = row[1] nodeinfo_url = row[2] @@ -135,75 +140,75 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): # Obscured domain name with no hash row = instances.deobscure("?", blocked) - # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + logger.debug(f"row[]='{type(row)}'") if row is None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") + logger.warning(f"Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") continue - # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'") + logger.debug(f"blocked='{blocked}' de-obscured to '{row[0]}'") blocked = row[0] origin = row[1] nodeinfo_url = row[2] - # DEBUG: print(f"DEBUG: blocked='{blocked}'") + logger.debug(f"blocked='{blocked}'") if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!") + logger.warning(f"blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!") continue elif blocked.endswith(".arpa"): - print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") + logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") continue elif blocked.endswith(".tld"): - print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!") + logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!") continue elif blacklist.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!") + logger.debug(f"blocked='{blocked}' is blacklisted - SKIPPED!") continue elif not instances.is_registered(blocked): # Commit changes fba.connection.commit() - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + logger.debug(f"Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) if not blocks.is_instance_blocked(domain, blocked, block_level): - # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level) + logger.debug("Blocking:", domain, blocked, block_level) blocks.add_instance(domain, blocked, None, block_level) if block_level == "reject": - # DEBUG: print("DEBUG: Adding to blockdict:", blocked) + logger.debug("Adding to blockdict:", blocked) blockdict.append({ "blocked": blocked, "reason" : None }) else: - # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...") + logger.debug(f"Updating block last seen for domain='{domain}',blocked='{blocked}' ...") blocks.update_last_seen(domain, blocked, block_level) elif "quarantined_instances" in data: - # DEBUG: print(f"DEBUG: Found 'quarantined_instances' in JSON response: domain='{domain}'") + logger.debug(f"Found 'quarantined_instances' in JSON response: domain='{domain}'") found = True block_level = "quarantined" for blocked in data["quarantined_instances"]: - # DEBUG: print("DEBUG: BEFORE blocked:", blocked) + logger.debug("BEFORE blocked:", blocked) blocked = tidyup.domain(blocked) - # DEBUG: print("DEBUG: AFTER blocked:", blocked) + logger.debug("AFTER blocked:", blocked) if blocked == "": - print("WARNING: blocked is empty after tidyup.domain():", domain, block_level) + logger.warning("blocked is empty after tidyup.domain():", domain, block_level) continue elif blacklist.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + logger.debug(f"blocked='{blocked}' is blacklisted - skipping!") continue elif blocked.count("*") > 0: # Obscured domain name with no hash row = instances.deobscure("*", blocked) - # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + logger.debug(f"row[]='{type(row)}'") if row is None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") + logger.warning(f"Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") continue - # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'") + logger.debug(f"blocked='{blocked}' de-obscured to '{row[0]}'") blocked = row[0] origin = row[1] nodeinfo_url = row[2] @@ -211,58 +216,58 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): # Obscured domain name with no hash row = instances.deobscure("?", blocked) - # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + logger.debug(f"row[]='{type(row)}'") if row is None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") + logger.warning(f"Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") continue - # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'") + logger.debug(f"blocked='{blocked}' de-obscured to '{row[0]}'") blocked = row[0] origin = row[1] nodeinfo_url = row[2] - # DEBUG: print(f"DEBUG: blocked='{blocked}'") + logger.debug(f"blocked='{blocked}'") if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!") + logger.warning(f"blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!") continue elif blocked.endswith(".arpa"): - print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") + logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") continue elif blocked.endswith(".tld"): - print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!") + logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!") continue elif blacklist.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!") + logger.debug(f"blocked='{blocked}' is blacklisted - SKIPPED!") continue elif not instances.is_registered(blocked): # Commit changes fba.connection.commit() - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + logger.debug(f"Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) if not blocks.is_instance_blocked(domain, blocked, block_level): - # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level) + logger.debug("Blocking:", domain, blocked, block_level) blocks.add_instance(domain, blocked, None, block_level) if block_level == "reject": - # DEBUG: print("DEBUG: Adding to blockdict:", blocked) + logger.debug("Adding to blockdict:", blocked) blockdict.append({ "blocked": blocked, "reason" : None }) else: - # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...") + logger.debug(f"Updating block last seen for domain='{domain}',blocked='{blocked}' ...") blocks.update_last_seen(domain, blocked, block_level) else: - print(f"WARNING: Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='{domain}'") + logger.warning(f"Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='{domain}'") - # DEBUG: print("DEBUG: Committing changes ...") + logger.debug("Committing changes ...") fba.connection.commit() # Reasons if "mrf_simple_info" in data: - # DEBUG: print("DEBUG: Found mrf_simple_info:", domain) + logger.debug("Found mrf_simple_info:", domain) found = True for block_level, info in ( { @@ -270,49 +275,49 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {}) } ).items(): - # DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items())) + logger.debug("block_level, info.items():", block_level, len(info.items())) block_level = tidyup.domain(block_level) - # DEBUG: print("DEBUG: BEFORE block_level:", block_level) + logger.debug("BEFORE block_level:", block_level) if block_level == "": - print("WARNING: block_level is now empty!") + logger.warning("block_level is now empty!") continue elif block_level == "accept": - # DEBUG: print(f"DEBUG: domain='{domain}' skipping block_level='accept'") + logger.debug(f"domain='{domain}' skipping block_level='accept'") continue - # DEBUG: print(f"DEBUG: Checking {len(info.items())} entries from domain='{domain}',software='pleroma',block_level='{block_level}' ...") + logger.debug(f"Checking {len(info.items())} entries from domain='{domain}',software='pleroma',block_level='{block_level}' ...") for blocked, reason in info.items(): - # DEBUG: print(f"DEBUG: blocked='{blocked}',reason[{type(reason)}]='{reason}' - BEFORE!") + logger.debug(f"blocked='{blocked}',reason[{type(reason)}]='{reason}' - BEFORE!") blocked = tidyup.domain(blocked) if isinstance(reason, str): - # DEBUG: print("DEBUG: reason[] is a string") + logger.debug("reason[] is a string") reason = tidyup.reason(reason) elif isinstance(reason, dict) and "reason" in reason: - # DEBUG: print("DEBUG: reason[] is a dict") + logger.debug("reason[] is a dict") reason = tidyup.reason(reason["reason"]) elif reason is not None: raise ValueError(f"Cannot handle reason[]='{type(reason)}'") - # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!") + logger.debug(f"blocked='{blocked}',reason='{reason}' - AFTER!") if blocked == "": - print("WARNING: blocked is empty after tidyup.domain():", domain, block_level) + logger.warning("blocked is empty after tidyup.domain():", domain, block_level) continue elif blacklist.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + logger.debug(f"blocked='{blocked}' is blacklisted - skipping!") continue elif blocked.count("*") > 0: # Obscured domain name with no hash row = instances.deobscure("*", blocked) - # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + logger.debug(f"row[]='{type(row)}'") if row is None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") + logger.warning(f"Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") continue - # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'") + logger.debug(f"blocked='{blocked}' de-obscured to '{row[0]}'") blocked = row[0] origin = row[1] nodeinfo_url = row[2] @@ -320,77 +325,77 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): # Obscured domain name with no hash row = instances.deobscure("?", blocked) - # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + logger.debug(f"row[]='{type(row)}'") if row is None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") + logger.warning(f"Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") continue - # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'") + logger.debug(f"blocked='{blocked}' de-obscured to '{row[0]}'") blocked = row[0] origin = row[1] nodeinfo_url = row[2] - # DEBUG: print(f"DEBUG: blocked='{blocked}'") + logger.debug(f"blocked='{blocked}'") if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!") + logger.warning(f"blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!") continue elif blocked.endswith(".arpa"): - print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") + logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") continue elif blocked.endswith(".tld"): - print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!") + logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!") continue elif blacklist.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!") + logger.debug(f"blocked='{blocked}' is blacklisted - SKIPPED!") continue elif not instances.is_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + logger.debug(f"Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) - # DEBUG: print(f"DEBUG: Updating block reason: reason='{reason}',domain='{domain}',blocked='{blocked}',block_level='{block_level}'") + logger.debug(f"Updating block reason: reason='{reason}',domain='{domain}',blocked='{blocked}',block_level='{block_level}'") blocks.update_reason(reason, domain, blocked, block_level) - # DEBUG: print(f"DEBUG: blockdict()={len(blockdict)}") + logger.debug(f"blockdict()={len(blockdict)}") for entry in blockdict: if entry["blocked"] == blocked: - # DEBUG: print(f"DEBUG: Updating entry reason: blocked='{blocked}',reason='{reason}'") + logger.debug(f"Updating entry reason: blocked='{blocked}',reason='{reason}'") entry["reason"] = reason elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]: - # DEBUG: print(f"DEBUG: Found 'quarantined_instances_info' in JSON response: domain='{domain}'") + logger.debug(f"Found 'quarantined_instances_info' in JSON response: domain='{domain}'") found = True block_level = "quarantined" #print(data["quarantined_instances_info"]) rows = data["quarantined_instances_info"]["quarantined_instances"] for blocked in rows: - # DEBUG: print("DEBUG: BEFORE blocked:", blocked) + logger.debug("BEFORE blocked:", blocked) blocked = tidyup.domain(blocked) - # DEBUG: print("DEBUG: AFTER blocked:", blocked) + logger.debug("AFTER blocked:", blocked) if blocked not in rows or "reason" not in rows[blocked]: - print(f"WARNING: Cannot find blocked='{blocked}' in rows()={len(rows)},domain='{domain}'") + logger.warning(f"Cannot find blocked='{blocked}' in rows()={len(rows)},domain='{domain}'") break reason = rows[blocked]["reason"] - # DEBUG: print(f"DEBUG: reason='{reason}'") + logger.debug(f"reason='{reason}'") if blocked == "": - print("WARNING: blocked is empty after tidyup.domain():", domain, block_level) + logger.warning("blocked is empty after tidyup.domain():", domain, block_level) continue elif blacklist.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + logger.debug(f"blocked='{blocked}' is blacklisted - skipping!") continue elif blocked.count("*") > 0: # Obscured domain name with no hash row = instances.deobscure("*", blocked) - # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + logger.debug(f"row[]='{type(row)}'") if row is None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") + logger.warning(f"Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") continue - # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'") + logger.debug(f"blocked='{blocked}' de-obscured to '{row[0]}'") blocked = row[0] origin = row[1] nodeinfo_url = row[2] @@ -398,77 +403,77 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): # Obscured domain name with no hash row = instances.deobscure("?", blocked) - # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + logger.debug(f"row[]='{type(row)}'") if row is None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") + logger.warning(f"Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") continue - # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'") + logger.debug(f"blocked='{blocked}' de-obscured to '{row[0]}'") blocked = row[0] origin = row[1] nodeinfo_url = row[2] - # DEBUG: print(f"DEBUG: blocked='{blocked}'") + logger.debug(f"blocked='{blocked}'") if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!") + logger.warning(f"blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!") continue elif blocked.endswith(".arpa"): - print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") + logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") continue elif blocked.endswith(".tld"): - print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!") + logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!") continue elif blacklist.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!") + logger.debug(f"blocked='{blocked}' is blacklisted - SKIPPED!") continue elif not instances.is_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + logger.debug(f"Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) - # DEBUG: print(f"DEBUG: Updating block reason: reason='{reason}',domain='{domain}',blocked='{blocked}',block_level='{block_level}'") + logger.debug(f"Updating block reason: reason='{reason}',domain='{domain}',blocked='{blocked}',block_level='{block_level}'") blocks.update_reason(reason, domain, blocked, block_level) - # DEBUG: print(f"DEBUG: blockdict()={len(blockdict)}") + logger.debug(f"blockdict()={len(blockdict)}") for entry in blockdict: if entry["blocked"] == blocked: - # DEBUG: print(f"DEBUG: Updating entry reason: blocked='{blocked}',reason='{reason}'") + logger.debug(f"Updating entry reason: blocked='{blocked}',reason='{reason}'") entry["reason"] = reason else: - print(f"WARNING: Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='{domain}'") + logger.warning(f"Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='{domain}'") if not found: - # DEBUG: print(f"DEBUG: Did not find any useable JSON elements, domain='{domain}', continuing with /about page ...") + logger.debug(f"Did not find any useable JSON elements, domain='{domain}', continuing with /about page ...") blocklist = fetch_blocks_from_about(domain) - # DEBUG: print(f"DEBUG: blocklist()={len(blocklist)}") + logger.debug(f"blocklist()={len(blocklist)}") if len(blocklist) > 0: - print(f"INFO: Checking {len(blocklist)} record(s) ...") + logger.info(f"Checking {len(blocklist)} record(s) ...") for block_level in blocklist: - # DEBUG: print(f"DEBUG: block_level='{block_level}'") + logger.debug(f"block_level='{block_level}'") rows = blocklist[block_level] - # DEBUG: print(f"DEBUG: rows['{type(rows)}]()={len(rows)}'") + logger.debug(f"rows['{type(rows)}]()={len(rows)}'") for record in rows: - # DEBUG: print(f"DEBUG: record[]='{type(record)}'") + logger.debug(f"record[]='{type(record)}'") blocked = tidyup.domain(record["blocked"]) reason = tidyup.reason(record["reason"]) - # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!") + logger.debug(f"blocked='{blocked}',reason='{reason}' - AFTER!") if blocked == "": - print("WARNING: blocked is empty after tidyup.domain():", domain, block_level) + logger.warning("blocked is empty after tidyup.domain():", domain, block_level) continue elif blacklist.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") + logger.debug(f"blocked='{blocked}' is blacklisted - skipping!") continue elif blocked.count("*") > 0: # Obscured domain name with no hash row = instances.deobscure("*", blocked) - # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + logger.debug(f"row[]='{type(row)}'") if row is None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") + logger.warning(f"Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") continue - # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'") + logger.debug(f"blocked='{blocked}' de-obscured to '{row[0]}'") blocked = row[0] origin = row[1] nodeinfo_url = row[2] @@ -476,49 +481,49 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str): # Obscured domain name with no hash row = instances.deobscure("?", blocked) - # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + logger.debug(f"row[]='{type(row)}'") if row is None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") + logger.warning(f"Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!") continue - # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'") + logger.debug(f"blocked='{blocked}' de-obscured to '{row[0]}'") blocked = row[0] origin = row[1] nodeinfo_url = row[2] - # DEBUG: print(f"DEBUG: blocked='{blocked}'") + logger.debug(f"blocked='{blocked}'") if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!") + logger.warning(f"blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!") continue elif blocked.endswith(".arpa"): - print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") + logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.") continue elif blocked.endswith(".tld"): - print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!") + logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!") continue elif not instances.is_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") + logger.debug(f"Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url) if not blocks.is_instance_blocked(domain, blocked, block_level): - # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level) + logger.debug("Blocking:", domain, blocked, block_level) blocks.add_instance(domain, blocked, reason, block_level) if block_level == "reject": - # DEBUG: print("DEBUG: Adding to blockdict:", blocked) + logger.debug("Adding to blockdict:", blocked) blockdict.append({ "blocked": blocked, "reason" : reason }) else: - # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...") + logger.debug(f"Updating block last seen for domain='{domain}',blocked='{blocked}' ...") blocks.update_reason(reason, domain, blocked, block_level) fba.connection.commit() - # DEBUG: print("DEBUG: EXIT!") + logger.debug("EXIT!") def fetch_blocks_from_about(domain: str) -> dict: - # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + logger.debug(f"domain='{domain}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": @@ -532,14 +537,14 @@ def fetch_blocks_from_about(domain: str) -> dict: elif domain.endswith(".tld"): raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!") - # DEBUG: print(f"DEBUG: Fetching mastodon blocks from domain='{domain}'") + logger.debug(f"Fetching mastodon blocks from domain='{domain}'") doc = None for path in ["/instance/about/index.html"]: try: # Resetting doc type doc = None - # DEBUG: print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...") + logger.debug(f"Fetching path='{path}' from domain='{domain}' ...") response = network.fetch_response( domain, path, @@ -547,24 +552,24 @@ def fetch_blocks_from_about(domain: str) -> dict: (config.get("connection_timeout"), config.get("read_timeout")) ) - # DEBUG: print(f"DEBUG: response.ok='{response.ok}',response.status_code='{response.status_code}',response.text()={len(response.text)}") + logger.debug(f"response.ok='{response.ok}',response.status_code='{response.status_code}',response.text()={len(response.text)}") if not response.ok or response.text.strip() == "": - print(f"WARNING: path='{path}' does not exist on domain='{domain}' - SKIPPED!") + logger.warning(f"path='{path}' does not exist on domain='{domain}' - SKIPPED!") continue - # DEBUG: print(f"DEBUG: Parsing response.text()={len(response.text)} Bytes ...") + logger.debug(f"Parsing response.text()={len(response.text)} Bytes ...") doc = bs4.BeautifulSoup( response.text, "html.parser", ) - # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'") + logger.debug(f"doc[]='{type(doc)}'") if doc.find("h2") is not None: - # DEBUG: print(f"DEBUG: Found 'h2' header in path='{path}' - BREAK!") + logger.debug(f"Found 'h2' header in path='{path}' - BREAK!") break except network.exceptions as exception: - print("ERROR: Cannot fetch from domain:", domain, exception) + logger.warning("Cannot fetch from domain:", domain, exception) instances.set_last_error(domain, exception) break @@ -575,35 +580,35 @@ def fetch_blocks_from_about(domain: str) -> dict: "Silenced servers" : [], } - # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'") + logger.debug(f"doc[]='{type(doc)}'") if doc is None: - print(f"WARNING: Cannot fetch any /about pages for domain='{domain}' - EXIT!") + logger.warning(f"Cannot fetch any /about pages for domain='{domain}' - EXIT!") return blocklist for header in doc.find_all("h2"): header_text = tidyup.reason(header.text) - # DEBUG: print(f"DEBUG: header_text='{header_text}' - BEFORE!") + logger.debug(f"header_text='{header_text}' - BEFORE!") if header_text in language_mapping: - # DEBUG: print(f"DEBUG: header_text='{header_text}' - FOUND!") + logger.debug(f"header_text='{header_text}' - FOUND!") header_text = language_mapping[header_text] else: - print(f"WARNING: header_text='{header_text}' not found in language mapping table") + logger.warning(f"header_text='{header_text}' not found in language mapping table") - # DEBUG: print(f"DEBUG: header_text='{header_text} - AFTER!'") + logger.debug(f"header_text='{header_text} - AFTER!'") if header_text in blocklist or header_text.lower() in blocklist: # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu - # DEBUG: print(f"DEBUG: Found header_text='{header_text}', importing domain blocks ...") + logger.debug(f"Found header_text='{header_text}', importing domain blocks ...") for line in header.find_next("table").find_all("tr")[1:]: - # DEBUG: print(f"DEBUG: line[]='{type(line)}'") + logger.debug(f"line[]='{type(line)}'") blocklist[header_text].append({ "blocked": tidyup.domain(line.find_all("td")[0].text), "reason" : tidyup.reason(line.find_all("td")[1].text), }) else: - print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}") + logger.warning(f"header_text='{header_text}' not found in blocklist()={len(blocklist)}") - # DEBUG: print(f"DEBUG: Returning blocklist for domain='{domain}'") + logger.debug(f"Returning blocklist for domain='{domain}'") return { "reject" : blocklist["Suspended servers"], "media_removal" : blocklist["Filtered media"], -- 2.39.5