X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=fba%2Fhelpers%2Ftidyup.py;h=acd8bc655475bfc6cd4b472a0b0b1f53cd6edd54;hb=600b452ced0fbf1c9c9796cf5afed63a1baead5a;hp=f09c2824eaeb9032f771398a672ae66ac1533d61;hpb=874b573858431f6c56e0432cdb311282cbe0fadf;p=fba.git diff --git a/fba/helpers/tidyup.py b/fba/helpers/tidyup.py index f09c282..acd8bc6 100644 --- a/fba/helpers/tidyup.py +++ b/fba/helpers/tidyup.py @@ -20,49 +20,63 @@ logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def reason(string: str) -> str: - logger.debug(f"string='{string}' - CALLED!") + logger.debug("string='%s' - CALLED!", string) + if not isinstance(string, str): - raise ValueError(f"Parameter string[]='{type(string)}' is not 'str'") + raise ValueError(f"Parameter string[]='{type(string)}' is not of type 'str'") # Strip string string = string.strip() - logger.debug(f"string='{string}' - EXIT!") + logger.debug("string='%s' - EXIT!", string) return string def domain(string: str) -> str: - logger.debug(f"string='{string}' - CALLED!") + logger.debug("string='%s' - CALLED!", string) + if not isinstance(string, str): - raise ValueError(f"Parameter string[]='{type(string)}' is not 'str'") + raise ValueError(f"Parameter string[]='{type(string)}' is not of type 'str'") + elif string == "": + raise ValueError("Parameter string is empty") # All lower-case and strip spaces out + last dot - string = string.lower().strip().rstrip(".") - logger.debug(f"string='{string}' - #1") + string = string.lower().strip().rstrip(".").replace("..", ".") + logger.debug("string='%s' - #1", string) # No port number - string = re.sub("\:\d+$", "", string) - logger.debug(f"string='{string}' - #2") + string = re.sub(r"\:\d+$", "", string) + logger.debug("string='%s' - #2", string) # No protocol, sometimes without the slashes - string = re.sub("^https?\:(\/*)", "", string) - logger.debug(f"string='{string}' - #3") + string = re.sub(r"^https?\:(\/*)", "", string) + logger.debug("string='%s' - #3", string) # No trailing slash - string = re.sub("\/$", "", string) - logger.debug(f"string='{string}' - #4") + string = re.sub(r"\/$", "", string) + logger.debug("string='%s' - #4", string) # No @ or : sign - string = re.sub("^\@", "", string) + string = re.sub(r"^\@", "", string) string = string.split(":")[0] - logger.debug(f"string='{string}' - #4") + logger.debug("string='%s' - #5", string) + + # Try to "detect" user profiles, not wanted here. Don't block single users + # in an instance block list! Everything personal can be solved in a + # personal block. + string = re.sub(r"(.+)\@", "", string) + logger.debug("string='%s' - #6", string) - # No individual users in block lists - string = re.sub("(.+)\@", "", string) - logger.debug(f"string='{string}' - #5") if string.find("/profile/"): string = string.split("/profile/")[0] elif string.find("/users/"): string = string.split("/users/")[0] + elif string.find("/tag/"): + string = string.split("/tag/")[0] + + # Some people have TLDs with this word on the end + logger.debug("string='%s' - #7", string) + if string.endswith("silence"): + string = string.split("silence")[0] - logger.debug(f"string='{string}' - EXIT!") + logger.debug("string='%s' - EXIT!", string) return string