From: Roland Häder Date: Sun, 17 Mar 2024 00:07:51 +0000 (+0100) Subject: Continued: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=680ce23ab7603ff5fee5fcf984940ef45c60e525;p=fba.git Continued: - that Wiki page was listing "evil instances" and now explains the view of the owner of what FediBlock is --- diff --git a/deprecated/.gitkeep b/deprecated/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/deprecated/commands.py b/deprecated/commands.py deleted file mode 100644 index d79d36f..0000000 --- a/deprecated/commands.py +++ /dev/null @@ -1,194 +0,0 @@ -# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes -# Copyright (C) 2023 Free Software Foundation -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published -# by the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -def fetch_joinfediverse(args: argparse.Namespace) -> int: - logger.debug("args[]='%s' - CALLED!", type(args)) - - logger.debug("Invoking locking.acquire() ...") - locking.acquire() - - source_domain = "joinfediverse.wiki" - if sources.is_recent(source_domain): - logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain) - return 1 - else: - logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain) - sources.update(source_domain) - - logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain) - raw = utils.fetch_url( - f"https://{source_domain}/FediBlock", - network.web_headers, - (config.get("connection_timeout"), config.get("read_timeout")) - ).text - logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw)) - - doc = bs4.BeautifulSoup(raw, "html.parser") - logger.debug("doc[]='%s'", type(doc)) - - tables = doc.findAll("table", {"class": "wikitable"}) - - logger.info("Analyzing %d table(s) ...", len(tables)) - blocklist = list() - for table in tables: - logger.debug("table[]='%s'", type(table)) - - rows = table.findAll("tr") - logger.info("Checking %d row(s) ...", len(rows)) - block_headers = dict() - for row in rows: - logger.debug("row[%s]='%s'", type(row), row) - - headers = row.findAll("th") - logger.debug("Found headers()=%d header(s)", len(headers)) - if len(headers) > 1: - block_headers = dict() - cnt = 0 - for header in headers: - cnt = cnt + 1 - logger.debug("header[]='%s',cnt=%d", type(header), cnt) - text = header.contents[0] - - logger.debug("text[]='%s'", type(text)) - if not isinstance(text, str): - logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text)) - continue - elif validators.domain(text.strip()): - logger.debug("text='%s' is a domain - SKIPPED!", text.strip()) - continue - - text = tidyup.domain(text.strip()) - logger.debug("text='%s' - AFTER!", text) - if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]: - logger.debug("Found header: '%s'=%d", text, cnt) - block_headers[cnt] = text - - elif len(block_headers) == 0: - logger.debug("row is not scrapable - SKIPPED!") - continue - elif len(block_headers) > 0: - logger.debug("Found a row with %d scrapable headers ...", len(block_headers)) - cnt = 0 - block = dict() - - for element in row.find_all(["th", "td"]): - cnt = cnt + 1 - logger.debug("element[]='%s',cnt=%d", type(element), cnt) - if cnt in block_headers: - logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt]) - - text = element.text.strip() - key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked" - - logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text) - if key in ["domain", "instance"]: - block[key] = text - elif key == "reason": - block[key] = tidyup.reason(text) - elif key == "subdomain(s)": - block[key] = list() - if text != "": - block[key] = text.split("/") - else: - logger.debug("key='%s'", key) - block[key] = text - - logger.debug("block()=%d ...", len(block)) - if len(block) > 0: - logger.debug("Appending block()=%d ...", len(block)) - blocklist.append(block) - - logger.debug("blocklist()=%d", len(blocklist)) - - database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'") - domains = database.cursor.fetchall() - - logger.debug("domains(%d)[]='%s'", len(domains), type(domains)) - blocking = list() - for block in blocklist: - logger.debug("block='%s'", block) - if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0: - origin = block["blocked"] - logger.debug("origin='%s'", origin) - for subdomain in block["subdomain(s)"]: - block["blocked"] = subdomain + "." + origin - logger.debug("block[blocked]='%s'", block["blocked"]) - blocking.append(block) - else: - blocking.append(block) - - logger.debug("blocking()=%d", blocking) - for block in blocking: - logger.debug("block[]='%s'", type(block)) - if "blocked" not in block: - raise KeyError(f"block()={len(block)} does not have element 'blocked'") - - block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8") - logger.debug("block[blocked]='%s' - AFTER!", block["blocked"]) - - if block["blocked"] == "": - logger.debug("block[blocked] is empty - SKIPPED!") - continue - elif not domain_helper.is_wanted(block["blocked"]): - logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"]) - continue - elif instances.is_recent(block["blocked"]): - logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"]) - continue - - logger.debug("Proccessing blocked='%s' ...", block["blocked"]) - processing.instance(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name) - - blockdict = list() - for blocker in domains: - blocker = blocker[0] - logger.debug("blocker[%s]='%s'", type(blocker), blocker) - instances.set_last_blocked(blocker) - - for block in blocking: - logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None) - block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None - - logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"]) - if block["blocked"] == "": - logger.debug("block[blocked] is empty - SKIPPED!") - continue - elif not domain_helper.is_wanted(block["blocked"]): - logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"]) - continue - - logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"]) - if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"): - logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker) - blockdict.append({ - "blocked": block["blocked"], - "reason" : block["reason"], - }) - - if instances.has_pending(blocker): - logger.debug("Flushing updates for blocker='%s' ...", blocker) - instances.update(blocker) - - logger.debug("Invoking commit() ...") - database.connection.commit() - - logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict)) - if config.get("bot_enabled") and len(blockdict) > 0: - logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict)) - network.send_bot_post(blocker, blockdict) - - logger.debug("Success! - EXIT!") - return 0