]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Mon, 14 Aug 2023 04:03:15 +0000 (06:03 +0200)
committerRoland Häder <roland@mxchange.org>
Mon, 14 Aug 2023 04:03:15 +0000 (06:03 +0200)
- added detection-mode 'APP_NAME' which reflects meta information
  name="application-name"
- allow checking generator type if status code 410 (Gone) is given, e.g.
  wordpress.com still returns a full HTML code to check

fba/helpers/tidyup.py
fba/http/federation.py
fba/networks/pleroma.py
recheck.sh
templates/views/scoreboard.html

index d342a81ceef1f2abe2c9e548bcb885d9719e6ec0..4b14ae715fdf41239c5a66359210cf55895bbd58 100644 (file)
@@ -56,7 +56,9 @@ def domain(string: str) -> str:
     string = string.split(":")[0]
     logger.debug("string='%s' - #5", string)
 
-    # No individual users in block lists
+    # Try to "detect" user profiles, not wanted here. Don't block single users
+    # in an instance block list! Everything personal can be solved in a
+    # personal block.
     string = re.sub(r"(.+)\@", "", string)
     logger.debug("string='%s' - #6", string)
 
@@ -67,8 +69,10 @@ def domain(string: str) -> str:
     elif string.find("/tag/"):
         string = string.split("/tag/")[0]
 
+    # Some people have TLDs with this word on the end
+    logger.debug("string='%s' - #7", string)
     if string.endswith("silence"):
-       string = string.split("silence")[0]
+        string = string.split("silence")[0]
 
     logger.debug("string='%s' - EXIT!", string)
     return string
index 40564e338130d6c51b4664e53c64fad92247e283..071c702379bf615b9a09673bc5a1911d3719dcbd 100644 (file)
@@ -57,7 +57,7 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path:
     elif command == "":
         raise ValueError("Parameter 'command' is empty")
     elif command in ["fetch_blocks", "fetch_cs", "fetch_bkali", "fetch_relays", "fetch_fedipact", "fetch_joinmobilizon", "fetch_joinmisskey", "fetch_joinfediverse"] and origin is None:
-        raise ValueError("Parameter command='%s' but origin is None, please fix invoking this function.", command)
+        raise ValueError(f"Parameter command='{command}' but origin is None, please fix invoking this function.")
     elif software is None:
         try:
             logger.debug("Software for domain='%s' is not set, determining ...", domain)
@@ -258,16 +258,17 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str:
     )
 
     logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
-    if response.ok and response.status_code < 300 and response.text.find("<html") > 0 and domain_helper.is_in_url(domain, response.url):
+    if ((response.ok and response.status_code < 300) or response.status_code == 410) and response.text.find("<html") > 0 and domain_helper.is_in_url(domain, response.url):
         logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
         doc = bs4.BeautifulSoup(response.text, "html.parser")
 
         logger.debug("doc[]='%s'", type(doc))
+        platform  = doc.find("meta", {"property": "og:platform"})
         generator = doc.find("meta", {"name"    : "generator"})
         site_name = doc.find("meta", {"property": "og:site_name"})
-        platform  = doc.find("meta", {"property": "og:platform"})
+        app_name  = doc.find("meta", {"name"    : "application-name"})
 
-        logger.debug("generator[]='%s',site_name[]='%s',platform[]='%s'", type(generator), type(site_name), type(platform))
+        logger.debug("generator[]='%s',site_name[]='%s',platform[]='%s',app_name[]='%s'", type(generator), type(site_name), type(platform), type(app_name))
         if isinstance(platform, bs4.element.Tag) and isinstance(platform.get("content"), str):
             logger.debug("Found property=og:platform, domain='%s'", domain)
             software = tidyup.domain(platform.get("content"))
@@ -284,6 +285,14 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str:
             if software is not None and software != "":
                 logger.info("domain='%s' is generated by software='%s' - Setting detection_mode=GENERATOR ...", domain, software)
                 instances.set_detection_mode(domain, "GENERATOR")
+        elif isinstance(app_name, bs4.element.Tag) and isinstance(app_name.get("content"), str):
+            logger.debug("Found property=og:app_name, domain='%s'", domain)
+            software = tidyup.domain(app_name.get("content"))
+
+            logger.debug("software[%s]='%s'", type(software), software)
+            if software is not None and software != "":
+                logger.debug("domain='%s' has application-name='%s' - Setting detection_mode=app_name ...", domain, software)
+                instances.set_detection_mode(domain, "APP_NAME")
         elif isinstance(site_name, bs4.element.Tag) and isinstance(site_name.get("content"), str):
             logger.debug("Found property=og:site_name, domain='%s'", domain)
             software = tidyup.domain(site_name.get("content"))
index fceebf53e9380088c4e68275a2e0fa88ebecc7af..8352915e9a7e03b2ba9cdb0f54745d35e7043aa9 100644 (file)
@@ -360,7 +360,7 @@ def fetch_blocks_from_about(domain: str) -> dict:
                 reason = tidyup.reason(line.find_all("td")[1].text)
 
                 if blocked is None or blocked == "":
-                    logger.debug("blocker='%s',block_level='%s': blocked is empty - SKIPPED!", blocker, block_level)
+                    logger.debug("domain='%s',block_level='%s': blocked is empty - SKIPPED!", domain, block_level)
                     continue
 
                 logger.debug("Appending block_level='%s',blocked='%s',reason='%s' ...", block_level, blocked, reason)
index 143d3b2e2a4c4de15b60130ed3d1b0bdc522bd66..1c7fdaad8c73c843cbc0b1dab3315bfe25f1bf07 100755 (executable)
@@ -3,7 +3,7 @@
 MODE=""
 if [ "$1" = "--help" ]
 then
-       echo "Usage: $ [file|--software|--software2|--nodeinfo|--generator|--detection|--no-auto|--timeout]"
+       echo "Usage: $ [file|--software|--software2|--nodeinfo|--generator|--detection|--no-auto|--no-auto2|--timeout]"
        exit 255
 elif [ -n "$1" -a -f "$1" ]
 then
@@ -25,6 +25,10 @@ elif [ "$1" = "--no-auto" ]
 then
        DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC;")
        MODE="noauto"
+elif [ "$1" = "--no-auto2" ]
+then
+       DOMAINS=`sqlite3 blocks.db "SELECT domain FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus') AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC;"`
+       MODE="noauto2"
 elif [ "$1" = "--timeout" ]
 then
        DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE last_error_details LIKE '%Timeout%' ORDER BY last_updated ASC;")
@@ -36,7 +40,7 @@ then
 elif [ "$1" = "--generator" ]
 then
        DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE detection_mode='GENERATOR' ORDER BY last_updated ASC;")
-       MODE="software2"
+       MODE="generator"
 else
        DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE software IS NULL AND nodeinfo_url IS NOT NULL ORDER BY last_updated ASC;")
 fi
index 7f92de0f250191c000eebe75f14d17a6816c57aa..4e40f8d101c541c2d6b57754bc3c7646d63ce6e5 100644 (file)
@@ -87,6 +87,7 @@
                 <li><b>STATIC_CHECK</b>: Node information was found by probing for well-known URLs</li>
                 <li><b>PLATFORM</b>: Meta data <code>og:platform</code> was found in HTML code</li>
                 <li><b>GENERATOR</b>: Meta data <code>generator</code> was found in HTML code</li>
+                <li><b>APP_NAME</b>: Meta data <code>application-name</code> was found in HTML code</li>
                 <li><b>SITE_NAME</b>: Meta data <code>og:site_name</code> was found in HTML code</li>
                 <li><b>None</b>: the instance was not reachable or the used software was not stated</li>
             </ol>