Continued:

author Roland Häder <roland@mxchange.org>

Sun, 11 Jun 2023 17:09:53 +0000 (19:09 +0200)

committer Roland Häder <roland@mxchange.org>

Sun, 11 Jun 2023 17:09:53 +0000 (19:09 +0200)
author Roland Häder <roland@mxchange.org>
Sun, 11 Jun 2023 17:09:53 +0000 (19:09 +0200)
committer Roland Häder <roland@mxchange.org>
Sun, 11 Jun 2023 17:09:53 +0000 (19:09 +0200)
diff --git a/fba/blocks.py b/fba/blocks.py

index 28ae2bca632ee0efe2e1cd45e55616a297afa5f3..528940b4d3a104412811f0e3f01b1b78b2a9462b 100644 (file)
--- a/fba/blocks.py
+++ b/fba/blocks.py
@@ -42,7 +42,7 @@ def update_reason(reason: str, blocker: str, blocked: str, block_level: str):
      # DEBUG: print("DEBUG: Updating block reason:", reason, blocker, blocked, block_level)
      try:
          fba.cursor.execute(
-            "UPDATE blocks SET reason = ?, last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ? AND reason IN ('','unknown') LIMIT 1",
+            "UPDATE blocks SET reason = ?, last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ? AND (reason IS NULL OR reason IN ('','unknown')) LIMIT 1",
              (
                  reason,
                  time.time(),
diff --git a/fba/commands.py b/fba/commands.py

index 669bee9b09fe066b4512fd0807e9341238fe51f3..d2ce3cad08a7617de9fe6fb2f7df980c71d3e05a 100644 (file)
--- a/fba/commands.py
+++ b/fba/commands.py
@@ -142,8 +142,9 @@ def fetch_blocks(args: argparse.Namespace):
  
      if args.domain is not None and args.domain != "":
          # Re-check single domain
+        # DEBUG: print(f"DEBUG: Querying database for single args.domain='{args.domain}' ...")
          fba.cursor.execute(
-            "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND domain = ?", [args.domain]
+            "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain]
          )
      else:
          # Re-check after "timeout" (aka. minimum interval)
@@ -496,7 +497,7 @@ def fetch_federater(args: argparse.Namespace):
      # DEBUG: print(f"DEBUG: response[]='{type(response)}'")
      if response.ok and response.content != "":
          # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...")
-        ## DEBUG: print(f"DEBUG: response.content={response.content}")
+        #print(f"DEBUG: response.content={response.content}")
          reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect='unix')
          #, fieldnames='domain,severity,reject_media,reject_reports,public_comment,obfuscate'
          # DEBUG: print(f"DEBUG: reader[]='{type(reader)}'")
diff --git a/fba/federation.py b/fba/federation.py

index 06d07ab2fa5aa2a38d65efe4a705e85428b6c759..da7277cbce40668754ef15c8d802595a7ad23ff9 100644 (file)
--- a/fba/federation.py
+++ b/fba/federation.py
@@ -77,7 +77,7 @@ def fetch_instances(domain: str, origin: str, software: str, script: str, path:
          # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...")
          instances.update_data(domain)
  
-    print(f"INFO: Checking {len(peerlist)} instances from {domain} ...")
+    print(f"INFO: Checking {len(peerlist)} instances from domain='{domain}' ...")
      for instance in peerlist:
          # DEBUG: print(f"DEBUG: instance='{instance}'")
          if instance is None:
@@ -195,7 +195,11 @@ def fetch_nodeinfo(domain: str, path: str = None) -> dict:
  
      # DEBUG: print(f"DEBUG: Fetching nodeinfo from domain='{domain}' ...")
      nodeinfo = fetch_wellknown_nodeinfo(domain)
-    # DEBUG: print(f"DEBUG: nodeinfo[{type(nodeinfo)}]='{nodeinfo}'")
+
+    # DEBUG: print(f"DEBUG: nodeinfo[{type(nodeinfo)}]()='{len(nodeinfo)}'")
+    if "error_message" not in nodeinfo and "json" in nodeinfo and len(nodeinfo["json"]) > 0:
+        # DEBUG: print(f"DEBUG: Found nodeinfo[json]()={len(nodeinfo['json'])} - EXIT!")
+        return nodeinfo["json"]
  
      # No CSRF by default, you don't have to add network.api_headers by yourself here
      headers = tuple()
@@ -222,7 +226,7 @@ def fetch_nodeinfo(domain: str, path: str = None) -> dict:
      ]
  
      for request in request_paths:
-        # DEBUG: print(f"DEBUG: path[{type(path)}]='{path}',request='{request'}")
+        # DEBUG: print(f"DEBUG: path[{type(path)}]='{path}',request='{request}'")
          if path is not None and path != "" and path != request:
              # DEBUG: print(f"DEBUG: path='{path}' does not match request='{request}' - SKIPPED!")
              continue
@@ -389,24 +393,24 @@ def determine_software(domain: str, path: str = None) -> str:
          # Continue raising it
          raise data["exception"]
      elif "error_message" in data:
-        print(f"DEBUG: Returned error_message during fetching nodeinfo: '{data['error_message']}',status_code='{data['status_code']}'")
+        # DEBUG: print(f"DEBUG: Returned error_message during fetching nodeinfo: '{data['error_message']}',status_code='{data['status_code']}'")
          return fetch_generator_from_path(domain)
-    elif "status" in data["json"] and data["json"]["status"] == "error" and "message" in data["json"]:
-        print("WARNING: JSON response is an error:", data["json"]["message"])
-        instances.update_last_error(domain, data["json"]["message"])
+    elif "status" in data and data["status"] == "error" and "message" in data:
+        print("WARNING: JSON response is an error:", data["message"])
+        instances.update_last_error(domain, data["message"])
          return fetch_generator_from_path(domain)
-    elif "message" in data["json"]:
+    elif "message" in data:
          print("WARNING: JSON response contains only a message:", data["message"])
-        instances.update_last_error(domain, data["json"]["message"])
+        instances.update_last_error(domain, data["message"])
          return fetch_generator_from_path(domain)
-    elif "software" not in data["json"] or "name" not in data["json"]["software"]:
+    elif "software" not in data or "name" not in data["software"]:
          # DEBUG: print(f"DEBUG: JSON response from domain='{domain}' does not include [software][name], fetching / ...")
          software = fetch_generator_from_path(domain)
  
          # DEBUG: print(f"DEBUG: Generator for domain='{domain}' is: {software}, EXIT!")
          return software
  
-    software = tidyup.domain(data["json"]["software"]["name"])
+    software = tidyup.domain(data["software"]["name"])
  
      # DEBUG: print("DEBUG: sofware after tidyup.domain():", software)
      if software in ["akkoma", "rebased"]:
diff --git a/fba/networks/mastodon.py b/fba/networks/mastodon.py

index f26d7c95359a8ec2006b62c05af1f5286ff9397f..2b57f81013af8045daed4417f4a820fd8fe2aad2 100644 (file)
--- a/fba/networks/mastodon.py
+++ b/fba/networks/mastodon.py
@@ -73,7 +73,7 @@ def fetch_blocks_from_about(domain: str) -> dict:
      doc = None
      for path in ("/about/more", "/about"):
          try:
-            print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...")
+            # DEBUG: print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...")
              doc = bs4.BeautifulSoup(
                  network.fetch_response(
                      domain,
@@ -85,7 +85,7 @@ def fetch_blocks_from_about(domain: str) -> dict:
              )
  
              if len(doc.find_all("h3")) > 0:
-                print(f"DEBUG: path='{path}' had some headlines - BREAK!")
+                # DEBUG: print(f"DEBUG: path='{path}' had some headlines - BREAK!")
                  break
  
          except BaseException as exception:
@@ -93,7 +93,7 @@ def fetch_blocks_from_about(domain: str) -> dict:
              instances.update_last_error(domain, exception)
              break
  
-    print(f"DEBUG: doc[]='{type(doc)}'")
+    # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
      if doc is None:
          print(f"WARNING: Cannot find any 'h3' tags for domain='{domain}' - EXIT!")
          return blocklist
diff --git a/fba/networks/pleroma.py b/fba/networks/pleroma.py

index 5a6f7ec3709578b687ae36353b9bd708f168e029..d024f1d4e7416a4f0d7d3ed7303aed3662c462ba 100644 (file)
--- a/fba/networks/pleroma.py
+++ b/fba/networks/pleroma.py
@@ -17,11 +17,16 @@
  import inspect
  import validators
  
+import bs4
+
  from fba import blacklist
  from fba import blocks
+from fba import config
  from fba import fba
  from fba import federation
  from fba import instances
+from fba import network
+
  from fba.helpers import tidyup
  
  def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
@@ -58,14 +63,15 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
  
      data = rows["metadata"]["federation"]
  
-    if "enabled" in data:
-        # DEBUG: print("DEBUG: Instance has no block list to analyze:", domain)
-        return
-
      if "mrf_simple" in data:
+        # DEBUG: print("DEBUG: Found mrf_simple:", domain)
          for block_level, blocklist in (
-            {**data["mrf_simple"],
-            **{"quarantined_instances": data["quarantined_instances"]}}
+            {
+                **data["mrf_simple"],
+                **{
+                    "quarantined_instances": data["quarantined_instances"]
+                }
+            }
          ).items():
              # DEBUG: print("DEBUG: block_level, blocklist():", block_level, len(blocklist))
              block_level = tidyup.domain(block_level)
@@ -95,26 +101,23 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
                          )
                          searchres = fba.cursor.fetchone()
  
-                        print(f"DEBUG: searchres[]='{type(searchres)}'")
+                        # DEBUG: print(f"DEBUG: searchres[]='{type(searchres)}'")
                          if searchres is None:
                              print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
                              continue
  
-                        blocked = searchres[0]
+                        blocked      = searchres[0]
                          nodeinfo_url = searchres[1]
                          # DEBUG: print("DEBUG: Looked up domain:", blocked)
                      elif not validators.domain(blocked):
                          print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - skipped!")
                          continue
-
-                    # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
-                    if not validators.domain(blocked):
-                        print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - skipped!")
-                        continue
                      elif blocked.split(".")[-1] == "arpa":
                          print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
                          continue
-                    elif not instances.is_registered(blocked):
+
+                    # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
+                    if not instances.is_registered(blocked):
                          # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
                          instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
  
@@ -131,9 +134,6 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
                          else:
                              # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
                              blocks.update_last_seen(domain, blocked, block_level)
-            else:
-                # DEBUG: print(f"DEBUG: domain='{domain}' has returned zero rows, trying /about/more page ...")
-                rows = fetch_blocks_from_about(domain)
  
      # DEBUG: print("DEBUG: Committing changes ...")
      fba.connection.commit()
@@ -142,10 +142,10 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
      if "mrf_simple_info" in data:
          # DEBUG: print("DEBUG: Found mrf_simple_info:", domain)
          for block_level, info in (
-            {**data["mrf_simple_info"],
-            **(data["quarantined_instances_info"]
-            if "quarantined_instances_info" in data
-            else {})}
+            {
+                **data["mrf_simple_info"],
+                **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
+            }
          ).items():
              # DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items()))
              block_level = tidyup.domain(block_level)
@@ -157,9 +157,18 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
  
              # DEBUG: print(f"DEBUG: Checking {len(info.items())} entries from domain='{domain}',software='pleroma',block_level='{block_level}' ...")
              for blocked, reason in info.items():
-                # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
+                # DEBUG: print(f"DEBUG: blocked='{blocked}',reason[{type(reason)}]='{reason}' - BEFORE!")
                  blocked = tidyup.domain(blocked)
-                reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
+
+                if isinstance(reason, str):
+                    # DEBUG: print("DEBUG: reason[] is a string")
+                    reason = tidyup.reason(reason)
+                elif isinstance(reason, dict) and "reason" in reason:
+                    # DEBUG: print("DEBUG: reason[] is a dict")
+                    reason = tidyup.reason(reason["reason"])
+                elif reason is not None:
+                    raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
+
                  # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
  
                  if blocked == "":
@@ -195,88 +204,14 @@ def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
                      # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
                      instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
  
-                # DEBUG: print("DEBUG: Updating block reason:", domain, blocked, reason["reason"])
-                blocks.update_reason(reason["reason"], domain, blocked, block_level)
+                # DEBUG: print(f"DEBUG: Updating block reason: reason='{reason}',domain='{domain}',blocked='{blocked}',block_level='{block_level}'")
+                blocks.update_reason(reason, domain, blocked, block_level)
  
                  # DEBUG: print(f"DEBUG: blockdict()={len(blockdict)}")
                  for entry in blockdict:
                      if entry["blocked"] == blocked:
-                        # DEBUG: print("DEBUG: Updating entry reason:", blocked)
-                        entry["reason"] = reason["reason"]
+                        # DEBUG: print(f"DEBUG: Updating entry reason: blocked='{blocked}',reason='{reason}'")
+                        entry["reason"] = reason
  
      fba.connection.commit()
-
      # DEBUG: print("DEBUG: EXIT!")
-
-def fetch_blocks_from_about(domain: str) -> dict:
-    print(f"DEBUG: domain='{domain}' - CALLED!")
-    if not isinstance(domain, str):
-        raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
-    elif domain == "":
-        raise ValueError("Parameter 'domain' is empty")
-
-    print("DEBUG: Fetching mastodon blocks from domain:", domain)
-    blocklist = {
-        "Suspended servers": [],
-        "Filtered media"   : [],
-        "Limited servers"  : [],
-        "Silenced servers" : [],
-    }
-
-    doc = None
-    for path in ("/about/more", "/about"):
-        try:
-            print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...")
-            doc = bs4.BeautifulSoup(
-                network.fetch_response(
-                    domain,
-                    path,
-                    network.web_headers,
-                    (config.get("connection_timeout"), config.get("read_timeout"))
-                ).text,
-                "html.parser",
-            )
-
-            if len(doc.find_all("h3")) > 0:
-                print(f"DEBUG: path='{path}' had some headlines - BREAK!")
-                break
-
-        except BaseException as exception:
-            print("ERROR: Cannot fetch from domain:", domain, exception)
-            instances.update_last_error(domain, exception)
-            break
-
-    print(f"DEBUG: doc[]='{type(doc)}'")
-    if doc is None:
-        print(f"WARNING: Cannot find any 'h3' tags for domain='{domain}' - EXIT!")
-        return blocklist
-
-    for header in doc.find_all("h3"):
-        header_text = tidyup.reason(header.text)
-
-        print(f"DEBUG: header_text='{header_text}'")
-        if header_text in language_mapping:
-            print(f"DEBUG: header_text='{header_text}'")
-            header_text = language_mapping[header_text]
-        else:
-            print(f"WARNING: header_text='{header_text}' not found in language mapping table")
-
-        if header_text in blocklist or header_text.lower() in blocklist:
-            # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
-            for line in header.find_all_next("table")[0].find_all("tr")[1:]:
-                blocklist[header_text].append(
-                    {
-                        "domain": tidyup.domain(line.find("span").text),
-                        "hash"  : tidyup.domain(line.find("span")["title"][9:]),
-                        "reason": tidyup.reason(line.find_all("td")[1].text),
-                    }
-                )
-        else:
-            print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}")
-
-    print("DEBUG: Returning blocklist for domain:", domain)
-    return {
-        "reject"        : blocklist["Suspended servers"],
-        "media_removal" : blocklist["Filtered media"],
-        "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
-    }
author	Roland Häder <roland@mxchange.org>
	Sun, 11 Jun 2023 17:09:53 +0000 (19:09 +0200)
committer	Roland Häder <roland@mxchange.org>
	Sun, 11 Jun 2023 17:09:53 +0000 (19:09 +0200)
fba/blocks.py		patch \| blob \| history
fba/commands.py		patch \| blob \| history
fba/federation.py		patch \| blob \| history
fba/networks/mastodon.py		patch \| blob \| history
fba/networks/pleroma.py		patch \| blob \| history