]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Sat, 30 Sep 2023 11:11:41 +0000 (13:11 +0200)
committerRoland Häder <roland@mxchange.org>
Sat, 30 Sep 2023 11:12:54 +0000 (13:12 +0200)
- Paula has finally seen the wrong outcome of publishing a #FediBlock list
  publicly:
  "Too many people use blocklists as-is and don't use their own brain. Blindly
   blocking instances because someone else says so is not good."

fba/boot.py
fba/commands.py
fba/deprecated.py [new file with mode: 0644]

index d7dbb2f7eafadbc81b6bef246b96a625de5be544..67974d9f93a747927135b988f1ee02b8a68786b3 100644 (file)
@@ -148,11 +148,11 @@ def init_parser():
     parser.set_defaults(command=commands.fetch_txt)
 
     ### Fetch blocks from joinfediverse.wiki ###
-    parser = subparser_command.add_parser(
-        "fetch_joinfediverse",
-        help="Fetches FediBlock page from joinfediverse.wiki",
-    )
-    parser.set_defaults(command=commands.fetch_joinfediverse)
+    #parser = subparser_command.add_parser(
+    #    "fetch_joinfediverse",
+    #    help="Fetches FediBlock page from joinfediverse.wiki",
+    #)
+    #parser.set_defaults(command=commands.fetch_joinfediverse)
 
     ### Fetch instances JSON from instances.joinmobilizon.org
     parser = subparser_command.add_parser(
index 64125d9dbf8b294d58bc8f439cf428a5e23e3ca9..fa8a8a91710b62513f0abda64b50feb4f0be151c 100644 (file)
@@ -1250,185 +1250,6 @@ def fetch_joinmisskey(args: argparse.Namespace) -> int:
     logger.debug("Success! - EXIT!")
     return 0
 
-def fetch_joinfediverse(args: argparse.Namespace) -> int:
-    logger.debug("args[]='%s' - CALLED!", type(args))
-
-    logger.debug("Invoking locking.acquire() ...")
-    locking.acquire()
-
-    source_domain = "joinfediverse.wiki"
-    if sources.is_recent(source_domain):
-        logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
-        return 1
-    else:
-        logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
-        sources.update(source_domain)
-
-    logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain)
-    raw = utils.fetch_url(
-        f"https://{source_domain}/FediBlock",
-        network.web_headers,
-        (config.get("connection_timeout"), config.get("read_timeout"))
-    ).text
-    logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
-
-    doc = bs4.BeautifulSoup(raw, "html.parser")
-    logger.debug("doc[]='%s'", type(doc))
-
-    tables = doc.findAll("table", {"class": "wikitable"})
-
-    logger.info("Analyzing %d table(s) ...", len(tables))
-    blocklist = list()
-    for table in tables:
-        logger.debug("table[]='%s'", type(table))
-
-        rows = table.findAll("tr")
-        logger.info("Checking %d row(s) ...", len(rows))
-        block_headers = dict()
-        for row in rows:
-            logger.debug("row[%s]='%s'", type(row), row)
-
-            headers = row.findAll("th")
-            logger.debug("Found headers()=%d header(s)", len(headers))
-            if len(headers) > 1:
-                block_headers = dict()
-                cnt = 0
-                for header in headers:
-                    cnt = cnt + 1
-                    logger.debug("header[]='%s',cnt=%d", type(header), cnt)
-                    text = header.contents[0]
-
-                    logger.debug("text[]='%s'", type(text))
-                    if not isinstance(text, str):
-                        logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
-                        continue
-                    elif validators.domain(text.strip()):
-                        logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
-                        continue
-
-                    text = tidyup.domain(text.strip())
-                    logger.debug("text='%s' - AFTER!", text)
-                    if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
-                        logger.debug("Found header: '%s'=%d", text, cnt)
-                        block_headers[cnt] = text
-
-            elif len(block_headers) == 0:
-                logger.debug("row is not scrapable - SKIPPED!")
-                continue
-            elif len(block_headers) > 0:
-                logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
-                cnt = 0
-                block = dict()
-
-                for element in row.find_all(["th", "td"]):
-                    cnt = cnt + 1
-                    logger.debug("element[]='%s',cnt=%d", type(element), cnt)
-                    if cnt in block_headers:
-                        logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
-
-                        text = element.text.strip()
-                        key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
-
-                        logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
-                        if key in ["domain", "instance"]:
-                            block[key] = text
-                        elif key == "reason":
-                            block[key] = tidyup.reason(text)
-                        elif key == "subdomain(s)":
-                            block[key] = list()
-                            if text != "":
-                                block[key] = text.split("/")
-                        else:
-                            logger.debug("key='%s'", key)
-                            block[key] = text
-
-                logger.debug("block()=%d ...", len(block))
-                if len(block) > 0:
-                    logger.debug("Appending block()=%d ...", len(block))
-                    blocklist.append(block)
-
-    logger.debug("blocklist()=%d", len(blocklist))
-
-    database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
-    domains = database.cursor.fetchall()
-
-    logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
-    blocking = list()
-    for block in blocklist:
-        logger.debug("block='%s'", block)
-        if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
-            origin = block["blocked"]
-            logger.debug("origin='%s'", origin)
-            for subdomain in block["subdomain(s)"]:
-                block["blocked"] = subdomain + "." + origin
-                logger.debug("block[blocked]='%s'", block["blocked"])
-                blocking.append(block)
-        else:
-            blocking.append(block)
-
-    logger.debug("blocking()=%d", blocking)
-    for block in blocking:
-        logger.debug("block[]='%s'", type(block))
-        if "blocked" not in block:
-            raise KeyError(f"block()={len(block)} does not have element 'blocked'")
-
-        block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
-        logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
-
-        if block["blocked"] == "":
-            logger.debug("block[blocked] is empty - SKIPPED!")
-            continue
-        elif not domain_helper.is_wanted(block["blocked"]):
-            logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
-            continue
-        elif instances.is_recent(block["blocked"]):
-            logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
-            continue
-
-        logger.debug("Proccessing blocked='%s' ...", block["blocked"])
-        processing.instance(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
-
-    blockdict = list()
-    for blocker in domains:
-        blocker = blocker[0]
-        logger.debug("blocker[%s]='%s'", type(blocker), blocker)
-        instances.set_last_blocked(blocker)
-
-        for block in blocking:
-            logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
-            block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
-
-            logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
-            if block["blocked"] == "":
-                logger.debug("block[blocked] is empty - SKIPPED!")
-                continue
-            elif not domain_helper.is_wanted(block["blocked"]):
-                logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
-                continue
-
-            logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
-            if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
-                logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
-                blockdict.append({
-                    "blocked": block["blocked"],
-                    "reason" : block["reason"],
-                })
-
-        if instances.has_pending(blocker):
-            logger.debug("Flushing updates for blocker='%s' ...", blocker)
-            instances.update(blocker)
-
-        logger.debug("Invoking commit() ...")
-        database.connection.commit()
-
-        logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
-        if config.get("bot_enabled") and len(blockdict) > 0:
-            logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
-            network.send_bot_post(blocker, blockdict)
-
-    logger.debug("Success! - EXIT!")
-    return 0
-
 def recheck_obfuscation(args: argparse.Namespace) -> int:
     logger.debug("args[]='%s' - CALLED!", type(args))
 
@@ -1901,14 +1722,12 @@ def fetch_relays(args: argparse.Namespace) -> int:
 
                 link = tag.find("a")
                 logger.debug("link[%s]='%s'", type(link), link)
-                if link is None:
-                    logger.warning("tag='%s' has no a-tag - SKIPPED!", tag)
-                    continue
-                elif "href" not in link:
-                    logger.warning("link()=%d has no key 'href' - SKIPPED!", len(link))
+                if not isinstance(link, bs4.element.Tag):
+                    logger.warning("tag[%s]='%s' is not type of 'bs4.element.Tag' - SKIPPED!", type(tag), tag)
                     continue
 
-                components = urlparse(link["href"])
+                components = urlparse(link.get("href"))
+                logger.debug("components(%d)='%s'", len(components), components)
                 domain = components.netloc.lower().split(":")[0]
 
                 logger.debug("domain='%s' - BEFORE!", domain)
diff --git a/fba/deprecated.py b/fba/deprecated.py
new file mode 100644 (file)
index 0000000..d79d36f
--- /dev/null
@@ -0,0 +1,194 @@
+# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
+# Copyright (C) 2023 Free Software Foundation
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU Affero General Public License as published
+# by the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU Affero General Public License for more details.
+#
+# You should have received a copy of the GNU Affero General Public License
+# along with this program.  If not, see <https://www.gnu.org/licenses/>.
+
+def fetch_joinfediverse(args: argparse.Namespace) -> int:
+    logger.debug("args[]='%s' - CALLED!", type(args))
+
+    logger.debug("Invoking locking.acquire() ...")
+    locking.acquire()
+
+    source_domain = "joinfediverse.wiki"
+    if sources.is_recent(source_domain):
+        logger.info("API from source_domain='%s' has recently being accessed - EXIT!", source_domain)
+        return 1
+    else:
+        logger.debug("source_domain='%s' has not been recently used, marking ...", source_domain)
+        sources.update(source_domain)
+
+    logger.info("Fetching /FediBlock wiki page from source_domain='%s' ...", source_domain)
+    raw = utils.fetch_url(
+        f"https://{source_domain}/FediBlock",
+        network.web_headers,
+        (config.get("connection_timeout"), config.get("read_timeout"))
+    ).text
+    logger.debug("raw()=%d,raw[]='%s'", len(raw), type(raw))
+
+    doc = bs4.BeautifulSoup(raw, "html.parser")
+    logger.debug("doc[]='%s'", type(doc))
+
+    tables = doc.findAll("table", {"class": "wikitable"})
+
+    logger.info("Analyzing %d table(s) ...", len(tables))
+    blocklist = list()
+    for table in tables:
+        logger.debug("table[]='%s'", type(table))
+
+        rows = table.findAll("tr")
+        logger.info("Checking %d row(s) ...", len(rows))
+        block_headers = dict()
+        for row in rows:
+            logger.debug("row[%s]='%s'", type(row), row)
+
+            headers = row.findAll("th")
+            logger.debug("Found headers()=%d header(s)", len(headers))
+            if len(headers) > 1:
+                block_headers = dict()
+                cnt = 0
+                for header in headers:
+                    cnt = cnt + 1
+                    logger.debug("header[]='%s',cnt=%d", type(header), cnt)
+                    text = header.contents[0]
+
+                    logger.debug("text[]='%s'", type(text))
+                    if not isinstance(text, str):
+                        logger.debug("text[]='%s' is not of type 'str' - SKIPPED!", type(text))
+                        continue
+                    elif validators.domain(text.strip()):
+                        logger.debug("text='%s' is a domain - SKIPPED!", text.strip())
+                        continue
+
+                    text = tidyup.domain(text.strip())
+                    logger.debug("text='%s' - AFTER!", text)
+                    if text in ["domain", "instance", "subdomain(s)", "block reason(s)"]:
+                        logger.debug("Found header: '%s'=%d", text, cnt)
+                        block_headers[cnt] = text
+
+            elif len(block_headers) == 0:
+                logger.debug("row is not scrapable - SKIPPED!")
+                continue
+            elif len(block_headers) > 0:
+                logger.debug("Found a row with %d scrapable headers ...", len(block_headers))
+                cnt = 0
+                block = dict()
+
+                for element in row.find_all(["th", "td"]):
+                    cnt = cnt + 1
+                    logger.debug("element[]='%s',cnt=%d", type(element), cnt)
+                    if cnt in block_headers:
+                        logger.debug("block_headers[%d]='%s'", cnt, block_headers[cnt])
+
+                        text = element.text.strip()
+                        key = block_headers[cnt] if block_headers[cnt] not in ["domain", "instance"] else "blocked"
+
+                        logger.debug("cnt=%d is wanted: key='%s',text[%s]='%s'", cnt, key, type(text), text)
+                        if key in ["domain", "instance"]:
+                            block[key] = text
+                        elif key == "reason":
+                            block[key] = tidyup.reason(text)
+                        elif key == "subdomain(s)":
+                            block[key] = list()
+                            if text != "":
+                                block[key] = text.split("/")
+                        else:
+                            logger.debug("key='%s'", key)
+                            block[key] = text
+
+                logger.debug("block()=%d ...", len(block))
+                if len(block) > 0:
+                    logger.debug("Appending block()=%d ...", len(block))
+                    blocklist.append(block)
+
+    logger.debug("blocklist()=%d", len(blocklist))
+
+    database.cursor.execute("SELECT domain FROM instances WHERE domain LIKE 'climatejustice.%'")
+    domains = database.cursor.fetchall()
+
+    logger.debug("domains(%d)[]='%s'", len(domains), type(domains))
+    blocking = list()
+    for block in blocklist:
+        logger.debug("block='%s'", block)
+        if "subdomain(s)" in block and len(block["subdomain(s)"]) > 0:
+            origin = block["blocked"]
+            logger.debug("origin='%s'", origin)
+            for subdomain in block["subdomain(s)"]:
+                block["blocked"] = subdomain + "." + origin
+                logger.debug("block[blocked]='%s'", block["blocked"])
+                blocking.append(block)
+        else:
+            blocking.append(block)
+
+    logger.debug("blocking()=%d", blocking)
+    for block in blocking:
+        logger.debug("block[]='%s'", type(block))
+        if "blocked" not in block:
+            raise KeyError(f"block()={len(block)} does not have element 'blocked'")
+
+        block["blocked"] = tidyup.domain(block["blocked"]).encode("idna").decode("utf-8")
+        logger.debug("block[blocked]='%s' - AFTER!", block["blocked"])
+
+        if block["blocked"] == "":
+            logger.debug("block[blocked] is empty - SKIPPED!")
+            continue
+        elif not domain_helper.is_wanted(block["blocked"]):
+            logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
+            continue
+        elif instances.is_recent(block["blocked"]):
+            logger.debug("block[blocked]='%s' has been recently checked - SKIPPED!", block["blocked"])
+            continue
+
+        logger.debug("Proccessing blocked='%s' ...", block["blocked"])
+        processing.instance(block["blocked"], "climatejustice.social", inspect.currentframe().f_code.co_name)
+
+    blockdict = list()
+    for blocker in domains:
+        blocker = blocker[0]
+        logger.debug("blocker[%s]='%s'", type(blocker), blocker)
+        instances.set_last_blocked(blocker)
+
+        for block in blocking:
+            logger.debug("block[blocked]='%s',block[block reason(s)]='%s' - BEFORE!", block["blocked"], block["block reason(s)"] if "block reason(s)" in block else None)
+            block["reason"] = tidyup.reason(block["block reason(s)"]) if "block reason(s)" in block else None
+
+            logger.debug("block[blocked]='%s',block[reason]='%s' - AFTER!", block["blocked"], block["reason"])
+            if block["blocked"] == "":
+                logger.debug("block[blocked] is empty - SKIPPED!")
+                continue
+            elif not domain_helper.is_wanted(block["blocked"]):
+                logger.debug("blocked='%s' is not wanted - SKIPPED!", block["blocked"])
+                continue
+
+            logger.debug("blocked='%s',reason='%s'", block["blocked"], block["reason"])
+            if processing.block(blocker, block["blocked"], block["reason"], "reject") and config.get("bot_enabled"):
+                logger.debug("Appending blocked='%s',reason='%s' for blocker='%s' ...", block["blocked"], block["block_level"], blocker)
+                blockdict.append({
+                    "blocked": block["blocked"],
+                    "reason" : block["reason"],
+                })
+
+        if instances.has_pending(blocker):
+            logger.debug("Flushing updates for blocker='%s' ...", blocker)
+            instances.update(blocker)
+
+        logger.debug("Invoking commit() ...")
+        database.connection.commit()
+
+        logger.debug("config.get(bot_enabled)='%s',blockdict()=%d", config.get("bot_enabled"), len(blockdict))
+        if config.get("bot_enabled") and len(blockdict) > 0:
+            logger.info("Sending bot POST for blocker='%s,blockdict()=%d ...", blocker, len(blockdict))
+            network.send_bot_post(blocker, blockdict)
+
+    logger.debug("Success! - EXIT!")
+    return 0