From ee59eee77c9d2ec3367601ec19e8d2a6c9f2466b Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Sat, 30 Sep 2023 13:11:41 +0200 Subject: [PATCH] Continued: - Paula has finally seen the wrong outcome of publishing a #FediBlock list publicly: "Too many people use blocklists as-is and don't use their own brain. Blindly blocking instances because someone else says so is not good." --- fba/boot.py | 10 +-- fba/commands.py | 189 +------------------------------------------- fba/deprecated.py | 194 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 203 insertions(+), 190 deletions(-) create mode 100644 fba/deprecated.py diff --git a/fba/boot.py b/fba/boot.py index d7dbb2f..67974d9 100644 --- a/fba/boot.py +++ b/fba/boot.py @@ -148,11 +148,11 @@ def init_parser(): parser.set_defaults(command=commands.fetch_txt) ### Fetch blocks from joinfediverse.wiki ### - parser = subparser_command.add_parser( - "fetch_joinfediverse", - help="Fetches FediBlock page from joinfediverse.wiki", - ) - parser.set_defaults(command=commands.fetch_joinfediverse) + #parser = subparser_command.add_parser( + # "fetch_joinfediverse", + # help="Fetches FediBlock page from joinfediverse.wiki", + #) + #parser.set_defaults(command=commands.fetch_joinfediverse) ### Fetch instances JSON from instances.joinmobilizon.org parser = subparser_command.add_parser( diff --git a/fba/commands.py b/fba/commands.py index 64125d9..fa8a8a9 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -1250,185 +1250,6 @@ def fetch_joinmisskey(args: argparse.Namespace) -> int: logger.debug("Success! - EXIT!") return 0 -def fetch_joinfediverse(args: argparse.Namespace) -> int: - logger.debug("args[]='%s' - CALLED!", type(args)) - - logger.debug("Invoking locking.acquire() ...") - locking.acquire() - - source_domain = "joinfediverse.wiki" - if sources.is_recent(source_domain): - logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain) - return 1 - else: - logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain) - sources.update(source_domain) - - logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain) - raw = utils.fetch_url( - f"https://{source_domain}/FediBlock", - network.web_headers, - (config.get("connection_timeout"), config.get("read_timeout")) - ).text - logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw)) - - doc = bs4.BeautifulSoup(raw, "html.parser") - logger.debug("doc[]='%s'", type(doc)) - - tables = doc.findAll("table", {"class": "wikitable"}) - - logger.info("Analyzing %d table(s) ...", len(tables)) - blocklist = list() - for table in tables: - logger.debug("table[]='%s'", type(table)) - - rows = table.findAll("tr") - logger.info("Checking %d row(s) ...", len(rows)) - block_headers = dict() - for row in rows: - logger.debug("row[%s]='%s'", type(row), row) - - headers = row.findAll("th") - logger.debug("Found headers()=%d header(s)", len(headers)) - if len(headers) > 1: - block_headers = dict() - cnt = 0 - for header in headers: - cnt = cnt + 1 - logger.debug("header[]='%s',cnt=%d", type(header), cnt) - text = header.contents[0] - - logger.debug("text[]='%s'", type(text)) - if not isinstance(text, str): - logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text)) - continue - elif validators.domain(text.strip()): - logger.debug("text='%s' is a domain - SKIPPED!", text.strip()) - continue - - text = tidyup.domain(text.strip()) - logger.debug("text='%s' - AFTER!", text) - if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]: - logger.debug("Found header: '%s'=%d", text, cnt) - block_headers[cnt] = text - - elif len(block_headers) == 0: - logger.debug("row is not scrapable - SKIPPED!") - continue - elif len(block_headers) > 0: - logger.debug("Found a row with %d scrapable headers ...", len(block_headers)) - cnt = 0 - block = dict() - - for element in row.find_all(["th", "td"]): - cnt = cnt + 1 - logger.debug("element[]='%s',cnt=%d", type(element), cnt) - if cnt in block_headers: - logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt]) - - text = element.text.strip() - key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked" - - logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text) - if key in ["domain", "instance"]: - block[key] = text - elif key == "reason": - block[key] = tidyup.reason(text) - elif key == "subdomain(s)": - block[key] = list() - if text != "": - block[key] = text.split("/") - else: - logger.debug("key='%s'", key) - block[key] = text - - logger.debug("block()=%d ...", len(block)) - if len(block) > 0: - logger.debug("Appending block()=%d ...", len(block)) - blocklist.append(block) - - logger.debug("blocklist()=%d", len(blocklist)) - - database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'") - domains = database.cursor.fetchall() - - logger.debug("domains(%d)[]='%s'", len(domains), type(domains)) - blocking = list() - for block in blocklist: - logger.debug("block='%s'", block) - if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0: - origin = block["blocked"] - logger.debug("origin='%s'", origin) - for subdomain in block["subdomain(s)"]: - block["blocked"] = subdomain + "." + origin - logger.debug("block[blocked]='%s'", block["blocked"]) - blocking.append(block) - else: - blocking.append(block) - - logger.debug("blocking()=%d", blocking) - for block in blocking: - logger.debug("block[]='%s'", type(block)) - if "blocked" not in block: - raise KeyError(f"block()={len(block)} does not have element 'blocked'") - - block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8") - logger.debug("block[blocked]='%s' - AFTER!", block["blocked"]) - - if block["blocked"] == "": - logger.debug("block[blocked] is empty - SKIPPED!") - continue - elif not domain_helper.is_wanted(block["blocked"]): - logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"]) - continue - elif instances.is_recent(block["blocked"]): - logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"]) - continue - - logger.debug("Proccessing blocked='%s' ...", block["blocked"]) - processing.instance(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name) - - blockdict = list() - for blocker in domains: - blocker = blocker[0] - logger.debug("blocker[%s]='%s'", type(blocker), blocker) - instances.set_last_blocked(blocker) - - for block in blocking: - logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None) - block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None - - logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"]) - if block["blocked"] == "": - logger.debug("block[blocked] is empty - SKIPPED!") - continue - elif not domain_helper.is_wanted(block["blocked"]): - logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"]) - continue - - logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"]) - if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"): - logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker) - blockdict.append({ - "blocked": block["blocked"], - "reason" : block["reason"], - }) - - if instances.has_pending(blocker): - logger.debug("Flushing updates for blocker='%s' ...", blocker) - instances.update(blocker) - - logger.debug("Invoking commit() ...") - database.connection.commit() - - logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict)) - if config.get("bot_enabled") and len(blockdict) > 0: - logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict)) - network.send_bot_post(blocker, blockdict) - - logger.debug("Success! - EXIT!") - return 0 - def recheck_obfuscation(args: argparse.Namespace) -> int: logger.debug("args[]='%s' - CALLED!", type(args)) @@ -1901,14 +1722,12 @@ def fetch_relays(args: argparse.Namespace) -> int: link = tag.find("a") logger.debug("link[%s]='%s'", type(link), link) - if link is None: - logger.warning("tag='%s' has no a-tag - SKIPPED!", tag) - continue - elif "href" not in link: - logger.warning("link()=%d has no key 'href' - SKIPPED!", len(link)) + if not isinstance(link, bs4.element.Tag): + logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag) continue - components = urlparse(link["href"]) + components = urlparse(link.get("href")) + logger.debug("components(%d)='%s'", len(components), components) domain = components.netloc.lower().split(":")[0] logger.debug("domain='%s' - BEFORE!", domain) diff --git a/fba/deprecated.py b/fba/deprecated.py new file mode 100644 index 0000000..d79d36f --- /dev/null +++ b/fba/deprecated.py @@ -0,0 +1,194 @@ +# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +def fetch_joinfediverse(args: argparse.Namespace) -> int: + logger.debug("args[]='%s' - CALLED!", type(args)) + + logger.debug("Invoking locking.acquire() ...") + locking.acquire() + + source_domain = "joinfediverse.wiki" + if sources.is_recent(source_domain): + logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain) + return 1 + else: + logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain) + sources.update(source_domain) + + logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain) + raw = utils.fetch_url( + f"https://{source_domain}/FediBlock", + network.web_headers, + (config.get("connection_timeout"), config.get("read_timeout")) + ).text + logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw)) + + doc = bs4.BeautifulSoup(raw, "html.parser") + logger.debug("doc[]='%s'", type(doc)) + + tables = doc.findAll("table", {"class": "wikitable"}) + + logger.info("Analyzing %d table(s) ...", len(tables)) + blocklist = list() + for table in tables: + logger.debug("table[]='%s'", type(table)) + + rows = table.findAll("tr") + logger.info("Checking %d row(s) ...", len(rows)) + block_headers = dict() + for row in rows: + logger.debug("row[%s]='%s'", type(row), row) + + headers = row.findAll("th") + logger.debug("Found headers()=%d header(s)", len(headers)) + if len(headers) > 1: + block_headers = dict() + cnt = 0 + for header in headers: + cnt = cnt + 1 + logger.debug("header[]='%s',cnt=%d", type(header), cnt) + text = header.contents[0] + + logger.debug("text[]='%s'", type(text)) + if not isinstance(text, str): + logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text)) + continue + elif validators.domain(text.strip()): + logger.debug("text='%s' is a domain - SKIPPED!", text.strip()) + continue + + text = tidyup.domain(text.strip()) + logger.debug("text='%s' - AFTER!", text) + if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]: + logger.debug("Found header: '%s'=%d", text, cnt) + block_headers[cnt] = text + + elif len(block_headers) == 0: + logger.debug("row is not scrapable - SKIPPED!") + continue + elif len(block_headers) > 0: + logger.debug("Found a row with %d scrapable headers ...", len(block_headers)) + cnt = 0 + block = dict() + + for element in row.find_all(["th", "td"]): + cnt = cnt + 1 + logger.debug("element[]='%s',cnt=%d", type(element), cnt) + if cnt in block_headers: + logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt]) + + text = element.text.strip() + key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked" + + logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text) + if key in ["domain", "instance"]: + block[key] = text + elif key == "reason": + block[key] = tidyup.reason(text) + elif key == "subdomain(s)": + block[key] = list() + if text != "": + block[key] = text.split("/") + else: + logger.debug("key='%s'", key) + block[key] = text + + logger.debug("block()=%d ...", len(block)) + if len(block) > 0: + logger.debug("Appending block()=%d ...", len(block)) + blocklist.append(block) + + logger.debug("blocklist()=%d", len(blocklist)) + + database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'") + domains = database.cursor.fetchall() + + logger.debug("domains(%d)[]='%s'", len(domains), type(domains)) + blocking = list() + for block in blocklist: + logger.debug("block='%s'", block) + if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0: + origin = block["blocked"] + logger.debug("origin='%s'", origin) + for subdomain in block["subdomain(s)"]: + block["blocked"] = subdomain + "." + origin + logger.debug("block[blocked]='%s'", block["blocked"]) + blocking.append(block) + else: + blocking.append(block) + + logger.debug("blocking()=%d", blocking) + for block in blocking: + logger.debug("block[]='%s'", type(block)) + if "blocked" not in block: + raise KeyError(f"block()={len(block)} does not have element 'blocked'") + + block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8") + logger.debug("block[blocked]='%s' - AFTER!", block["blocked"]) + + if block["blocked"] == "": + logger.debug("block[blocked] is empty - SKIPPED!") + continue + elif not domain_helper.is_wanted(block["blocked"]): + logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"]) + continue + elif instances.is_recent(block["blocked"]): + logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"]) + continue + + logger.debug("Proccessing blocked='%s' ...", block["blocked"]) + processing.instance(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name) + + blockdict = list() + for blocker in domains: + blocker = blocker[0] + logger.debug("blocker[%s]='%s'", type(blocker), blocker) + instances.set_last_blocked(blocker) + + for block in blocking: + logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None) + block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None + + logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"]) + if block["blocked"] == "": + logger.debug("block[blocked] is empty - SKIPPED!") + continue + elif not domain_helper.is_wanted(block["blocked"]): + logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"]) + continue + + logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"]) + if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"): + logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker) + blockdict.append({ + "blocked": block["blocked"], + "reason" : block["reason"], + }) + + if instances.has_pending(blocker): + logger.debug("Flushing updates for blocker='%s' ...", blocker) + instances.update(blocker) + + logger.debug("Invoking commit() ...") + database.connection.commit() + + logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict)) + if config.get("bot_enabled") and len(blockdict) > 0: + logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict)) + network.send_bot_post(blocker, blockdict) + + logger.debug("Success! - EXIT!") + return 0 -- 2.39.5