From 126b8dd0b7a763a72b406cd439862d4962cab574 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Mon, 14 Aug 2023 06:03:15 +0200 Subject: [PATCH] =?utf8?q?Continued:=20-=20added=20detection-mode=20'APP?= =?utf8?q?=5FNAME'=20which=20reflects=20meta=20information=20=20=20name=3D?= =?utf8?q?"application-name"=20-=20allow=20checking=20generator=20type=20i?= =?utf8?q?f=20status=20code=20410=20(Gone)=20is=20given,=20e.g.=20=20=20wo?= =?utf8?q?rdpress.com=20still=20returns=20a=20full=20HTML=20code=C2=A0to?= =?utf8?q?=20check?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- fba/helpers/tidyup.py | 8 ++++++-- fba/http/federation.py | 17 +++++++++++++---- fba/networks/pleroma.py | 2 +- recheck.sh | 8 ++++++-- templates/views/scoreboard.html | 1 + 5 files changed, 27 insertions(+), 9 deletions(-) diff --git a/fba/helpers/tidyup.py b/fba/helpers/tidyup.py index d342a81..4b14ae7 100644 --- a/fba/helpers/tidyup.py +++ b/fba/helpers/tidyup.py @@ -56,7 +56,9 @@ def domain(string: str) -> str: string = string.split(":")[0] logger.debug("string='%s' - #5", string) - # No individual users in block lists + # Try to "detect" user profiles, not wanted here. Don't block single users + # in an instance block list! Everything personal can be solved in a + # personal block. string = re.sub(r"(.+)\@", "", string) logger.debug("string='%s' - #6", string) @@ -67,8 +69,10 @@ def domain(string: str) -> str: elif string.find("/tag/"): string = string.split("/tag/")[0] + # Some people have TLDs with this word on the end + logger.debug("string='%s' - #7", string) if string.endswith("silence"): - string = string.split("silence")[0] + string = string.split("silence")[0] logger.debug("string='%s' - EXIT!", string) return string diff --git a/fba/http/federation.py b/fba/http/federation.py index 40564e3..071c702 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -57,7 +57,7 @@ def fetch_instances(domain: str, origin: str, software: str, command: str, path: elif command == "": raise ValueError("Parameter 'command' is empty") elif command in ["fetch_blocks", "fetch_cs", "fetch_bkali", "fetch_relays", "fetch_fedipact", "fetch_joinmobilizon", "fetch_joinmisskey", "fetch_joinfediverse"] and origin is None: - raise ValueError("Parameter command='%s' but origin is None, please fix invoking this function.", command) + raise ValueError(f"Parameter command='{command}' but origin is None, please fix invoking this function.") elif software is None: try: logger.debug("Software for domain='%s' is not set, determining ...", domain) @@ -258,16 +258,17 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: ) logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text)) - if response.ok and response.status_code < 300 and response.text.find(" 0 and domain_helper.is_in_url(domain, response.url): + if ((response.ok and response.status_code < 300) or response.status_code == 410) and response.text.find(" 0 and domain_helper.is_in_url(domain, response.url): logger.debug("Parsing response.text()=%d Bytes ...", len(response.text)) doc = bs4.BeautifulSoup(response.text, "html.parser") logger.debug("doc[]='%s'", type(doc)) + platform = doc.find("meta", {"property": "og:platform"}) generator = doc.find("meta", {"name" : "generator"}) site_name = doc.find("meta", {"property": "og:site_name"}) - platform = doc.find("meta", {"property": "og:platform"}) + app_name = doc.find("meta", {"name" : "application-name"}) - logger.debug("generator[]='%s',site_name[]='%s',platform[]='%s'", type(generator), type(site_name), type(platform)) + logger.debug("generator[]='%s',site_name[]='%s',platform[]='%s',app_name[]='%s'", type(generator), type(site_name), type(platform), type(app_name)) if isinstance(platform, bs4.element.Tag) and isinstance(platform.get("content"), str): logger.debug("Found property=og:platform, domain='%s'", domain) software = tidyup.domain(platform.get("content")) @@ -284,6 +285,14 @@ def fetch_generator_from_path(domain: str, path: str = "/") -> str: if software is not None and software != "": logger.info("domain='%s' is generated by software='%s' - Setting detection_mode=GENERATOR ...", domain, software) instances.set_detection_mode(domain, "GENERATOR") + elif isinstance(app_name, bs4.element.Tag) and isinstance(app_name.get("content"), str): + logger.debug("Found property=og:app_name, domain='%s'", domain) + software = tidyup.domain(app_name.get("content")) + + logger.debug("software[%s]='%s'", type(software), software) + if software is not None and software != "": + logger.debug("domain='%s' has application-name='%s' - Setting detection_mode=app_name ...", domain, software) + instances.set_detection_mode(domain, "APP_NAME") elif isinstance(site_name, bs4.element.Tag) and isinstance(site_name.get("content"), str): logger.debug("Found property=og:site_name, domain='%s'", domain) software = tidyup.domain(site_name.get("content")) diff --git a/fba/networks/pleroma.py b/fba/networks/pleroma.py index fceebf5..8352915 100644 --- a/fba/networks/pleroma.py +++ b/fba/networks/pleroma.py @@ -360,7 +360,7 @@ def fetch_blocks_from_about(domain: str) -> dict: reason = tidyup.reason(line.find_all("td")[1].text) if blocked is None or blocked == "": - logger.debug("blocker='%s',block_level='%s': blocked is empty - SKIPPED!", blocker, block_level) + logger.debug("domain='%s',block_level='%s': blocked is empty - SKIPPED!", domain, block_level) continue logger.debug("Appending block_level='%s',blocked='%s',reason='%s' ...", block_level, blocked, reason) diff --git a/recheck.sh b/recheck.sh index 143d3b2..1c7fdaa 100755 --- a/recheck.sh +++ b/recheck.sh @@ -3,7 +3,7 @@ MODE="" if [ "$1" = "--help" ] then - echo "Usage: $ [file|--software|--software2|--nodeinfo|--generator|--detection|--no-auto|--timeout]" + echo "Usage: $ [file|--software|--software2|--nodeinfo|--generator|--detection|--no-auto|--no-auto2|--timeout]" exit 255 elif [ -n "$1" -a -f "$1" ] then @@ -25,6 +25,10 @@ elif [ "$1" = "--no-auto" ] then DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC;") MODE="noauto" +elif [ "$1" = "--no-auto2" ] +then + DOMAINS=`sqlite3 blocks.db "SELECT domain FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus') AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC;"` + MODE="noauto2" elif [ "$1" = "--timeout" ] then DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE last_error_details LIKE '%Timeout%' ORDER BY last_updated ASC;") @@ -36,7 +40,7 @@ then elif [ "$1" = "--generator" ] then DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE detection_mode='GENERATOR' ORDER BY last_updated ASC;") - MODE="software2" + MODE="generator" else DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE software IS NULL AND nodeinfo_url IS NOT NULL ORDER BY last_updated ASC;") fi diff --git a/templates/views/scoreboard.html b/templates/views/scoreboard.html index 7f92de0..4e40f8d 100644 --- a/templates/views/scoreboard.html +++ b/templates/views/scoreboard.html @@ -87,6 +87,7 @@
  • STATIC_CHECK: Node information was found by probing for well-known URLs
  • PLATFORM: Meta data og:platform was found in HTML code
  • GENERATOR: Meta data generator was found in HTML code
  • +
  • APP_NAME: Meta data application-name was found in HTML code
  • SITE_NAME: Meta data og:site_name was found in HTML code
  • None: the instance was not reachable or the used software was not stated
  • -- 2.39.5