string = string.split(":")[0]
logger.debug("string='%s' - #5", string)
- # No individual users in block lists
+ # Try to "detect" user profiles, not wanted here. Don't block single users
+ # in an instance block list! Everything personal can be solved in a
+ # personal block.
string = re.sub(r"(.+)\@", "", string)
logger.debug("string='%s' - #6", string)
elif string.find("/tag/"):
string = string.split("/tag/")[0]
+ # Some people have TLDs with this word on the end
+ logger.debug("string='%s' - #7", string)
if string.endswith("silence"):
- string = string.split("silence")[0]
+ string = string.split("silence")[0]
logger.debug("string='%s' - EXIT!", string)
return string
elif command == "":
raise ValueError("Parameter 'command' is empty")
elif command in ["fetch_blocks", "fetch_cs", "fetch_bkali", "fetch_relays", "fetch_fedipact", "fetch_joinmobilizon", "fetch_joinmisskey", "fetch_joinfediverse"] and origin is None:
- raise ValueError("Parameter command='%s' but origin is None, please fix invoking this function.", command)
+ raise ValueError(f"Parameter command='{command}' but origin is None, please fix invoking this function.")
elif software is None:
try:
logger.debug("Software for domain='%s' is not set, determining ...", domain)
)
logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
- if response.ok and response.status_code < 300 and response.text.find("<html") > 0 and domain_helper.is_in_url(domain, response.url):
+ if ((response.ok and response.status_code < 300) or response.status_code == 410) and response.text.find("<html") > 0 and domain_helper.is_in_url(domain, response.url):
logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
doc = bs4.BeautifulSoup(response.text, "html.parser")
logger.debug("doc[]='%s'", type(doc))
+ platform = doc.find("meta", {"property": "og:platform"})
generator = doc.find("meta", {"name" : "generator"})
site_name = doc.find("meta", {"property": "og:site_name"})
- platform = doc.find("meta", {"property": "og:platform"})
+ app_name = doc.find("meta", {"name" : "application-name"})
- logger.debug("generator[]='%s',site_name[]='%s',platform[]='%s'", type(generator), type(site_name), type(platform))
+ logger.debug("generator[]='%s',site_name[]='%s',platform[]='%s',app_name[]='%s'", type(generator), type(site_name), type(platform), type(app_name))
if isinstance(platform, bs4.element.Tag) and isinstance(platform.get("content"), str):
logger.debug("Found property=og:platform, domain='%s'", domain)
software = tidyup.domain(platform.get("content"))
if software is not None and software != "":
logger.info("domain='%s' is generated by software='%s' - Setting detection_mode=GENERATOR ...", domain, software)
instances.set_detection_mode(domain, "GENERATOR")
+ elif isinstance(app_name, bs4.element.Tag) and isinstance(app_name.get("content"), str):
+ logger.debug("Found property=og:app_name, domain='%s'", domain)
+ software = tidyup.domain(app_name.get("content"))
+
+ logger.debug("software[%s]='%s'", type(software), software)
+ if software is not None and software != "":
+ logger.debug("domain='%s' has application-name='%s' - Setting detection_mode=app_name ...", domain, software)
+ instances.set_detection_mode(domain, "APP_NAME")
elif isinstance(site_name, bs4.element.Tag) and isinstance(site_name.get("content"), str):
logger.debug("Found property=og:site_name, domain='%s'", domain)
software = tidyup.domain(site_name.get("content"))
MODE=""
if [ "$1" = "--help" ]
then
- echo "Usage: $ [file|--software|--software2|--nodeinfo|--generator|--detection|--no-auto|--timeout]"
+ echo "Usage: $ [file|--software|--software2|--nodeinfo|--generator|--detection|--no-auto|--no-auto2|--timeout]"
exit 255
elif [ -n "$1" -a -f "$1" ]
then
then
DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC;")
MODE="noauto"
+elif [ "$1" = "--no-auto2" ]
+then
+ DOMAINS=`sqlite3 blocks.db "SELECT domain FROM instances WHERE software IN ('pleroma', 'mastodon', 'lemmy', 'friendica', 'misskey', 'peertube', 'takahe', 'gotosocial', 'brighteon', 'wildebeest', 'bookwyrm', 'mitra', 'areionskey', 'mammuthus') AND detection_mode != 'AUTO_DISCOVERY' ORDER BY last_updated ASC;"`
+ MODE="noauto2"
elif [ "$1" = "--timeout" ]
then
DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE last_error_details LIKE '%Timeout%' ORDER BY last_updated ASC;")
elif [ "$1" = "--generator" ]
then
DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE detection_mode='GENERATOR' ORDER BY last_updated ASC;")
- MODE="software2"
+ MODE="generator"
else
DOMAINS=$(sqlite3 blocks.db "SELECT domain FROM instances WHERE software IS NULL AND nodeinfo_url IS NOT NULL ORDER BY last_updated ASC;")
fi