From 475368e3790098d3f804a201f74a65824c5352c2 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Sat, 9 Dec 2023 05:10:38 +0100 Subject: [PATCH] Continued: - moved original_software next to 'software' column - replaced own _cache dict with @lru_cache --- blocks_empty.db | Bin 40960 -> 53248 bytes fba/helpers/domain.py | 31 +++---------------------------- fba/helpers/software.py | 15 +++------------ fba/http/network.py | 1 + fba/http/nodeinfo.py | 2 +- fba/models/instances.py | 2 +- 6 files changed, 9 insertions(+), 42 deletions(-) diff --git a/blocks_empty.db b/blocks_empty.db index dec6ada3e62839974aaa9f105364a7ed5bf80a1e..a3233785f9affff2dcf5ad32a4ce18dd5746d1b0 100644 GIT binary patch delta 324 zcmZoTz|^pSd4jYcI|Bm)FA#%hW*`oksAH_k&Yp*a0BO?$IDJMTUyI6_S$js+@ i1Rn5%iHQ^3IhlaoVgq8<&4L2&`8WTUXH-~Z-~a%=p;*iS delta 286 zcmZozz}#?vX@ayM8v_Fa7ZAfh;6xo`c{T<;?}@zJMGTy*?-}?bINkX*S>N;J^Vx6} zaNBd<-Yh6k$ja3g%EB%#FVEOhIr%@UI3w$1akk>kyVzuy>J{>n(n^by6H8K4;*)an zle3EzoIKq<{X#Sh4K)?~LVbJ`TwL88Lw!QP936$6#Nv|pqSWGo{Ji4S_>#=rR0TI5 ze@BQkn%exL%=FB>#GLrz{Irtt#G+J%FvlQg562)4BU4kb>5M*`6*z;KSojt(o!UH+ w=@=7}A=_j}Zkfqxyduo(hOA)rOkNRIkl))UPHf(+$$#Bok%Gb^0|5vL05z>m>;M1& diff --git a/fba/helpers/domain.py b/fba/helpers/domain.py index 2df92e8..39fbb77 100644 --- a/fba/helpers/domain.py +++ b/fba/helpers/domain.py @@ -16,6 +16,7 @@ import logging +from functools import lru_cache from urllib.parse import urlparse import validators @@ -28,18 +29,6 @@ from fba.models import instances logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) -# In-function cache -_cache = { - # Cache for function is_in_url() - "is_in_url": {}, - - # Cache for function is_wanted() - "is_wanted": {}, - - # Cache for function raise_on() - "raise_on": {}, -} - def raise_on(domain: str): logger.debug("domain='%s' - CALLED!", domain) @@ -47,9 +36,6 @@ def raise_on(domain: str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not of type 'str'") elif domain == "": raise ValueError("Parameter 'domain' is empty") - elif domain in _cache["raise_on"]: - logger.debug("Returning cached raised_on='%s' - EXIT!", _cache["raise_on"][domain]) - return _cache["raise_on"][domain] elif domain.lower() != domain: raise ValueError(f"Parameter domain='{domain}' must be all lower-case") elif not validators.domain(domain.split("/")[0]): @@ -63,9 +49,9 @@ def raise_on(domain: str): elif domain.endswith(".tld"): raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!") - _cache["raise_on"][domain] = True logger.debug("EXIT!") +@lru_cache def is_in_url(domain: str, url: str) -> bool: logger.debug("domain='%s',url='%s' - CALLED!", domain, url) raise_on(domain) @@ -78,9 +64,6 @@ def is_in_url(domain: str, url: str) -> bool: raise ValueError("Parameter 'url' is empty") elif not validators.url(url): raise ValueError(f"Parameter url='{url}' is not a valid URL") - elif domain + url in _cache["is_in_url"]: - logger.debug("Returning cached is_in_url='%s' - EXIT!", _cache["is_in_url"][domain + url]) - return _cache["is_in_url"][domain + url] punycode = domain.encode("idna").decode("utf-8") @@ -89,12 +72,10 @@ def is_in_url(domain: str, url: str) -> bool: is_found = (punycode in [components.netloc, components.hostname]) - # Set cache - _cache["is_in_url"][domain + url] = is_found - logger.debug("is_found='%s' - EXIT!", is_found) return is_found +@lru_cache def is_wanted(domain: str) -> bool: logger.debug("domain='%s' - CALLED!", domain) @@ -102,9 +83,6 @@ def is_wanted(domain: str) -> bool: raise ValueError(f"Parameter domain[]='{type(domain)}' is not of type 'str'") elif domain == "": raise ValueError("Parameter 'domain' is empty") - elif domain in _cache["is_wanted"]: - logger.debug("Returning cached is_found='%s' - EXIT!", _cache["is_wanted"][domain]) - return _cache["is_wanted"][domain] wanted = True if domain.lower() != domain: @@ -135,8 +113,5 @@ def is_wanted(domain: str) -> bool: logger.debug("domain='%s' is a tag", domain) wanted = False - # Set cache - _cache["is_wanted"][domain] = wanted - logger.debug("wanted='%s' - EXIT!", wanted) return wanted diff --git a/fba/helpers/software.py b/fba/helpers/software.py index eff6eb5..821660d 100644 --- a/fba/helpers/software.py +++ b/fba/helpers/software.py @@ -16,6 +16,8 @@ import logging +from functools import lru_cache + from fba.helpers import tidyup logging.basicConfig(level=logging.INFO) @@ -29,12 +31,7 @@ relays = [ "pub-relay" ] -# In-function cache -_cache = { - # Cache for function alias() - "alias" : {}, -} - +@lru_cache def alias(software: str) -> str: logger.debug("software='%s'- CALLED!", software) @@ -42,9 +39,6 @@ def alias(software: str) -> str: raise ValueError(f"software[]='{type(software)}' is not type 'str'") elif software == "": raise ValueError("Parameter 'software' is empty") - elif software in _cache["alias"]: - logger.debug("Returning cached alias='%s' - EXIT!", _cache["alias"][software]) - return _cache["alias"][software] key = software @@ -137,9 +131,6 @@ def alias(software: str) -> str: logger.debug("software='%s' is being cleaned up further ...") software = software.rstrip("!").strip() - # Set cache - _cache["alias"][key] = software - logger.debug("software[%s]='%s' - EXIT!", type(software), software) return software diff --git a/fba/http/network.py b/fba/http/network.py index ec548d8..5158c94 100644 --- a/fba/http/network.py +++ b/fba/http/network.py @@ -20,6 +20,7 @@ import time import reqto import requests import urllib3 +import validators from fba import utils diff --git a/fba/http/nodeinfo.py b/fba/http/nodeinfo.py index f66afd2..9c944cd 100644 --- a/fba/http/nodeinfo.py +++ b/fba/http/nodeinfo.py @@ -124,7 +124,7 @@ def fetch(domain: str, path: str = None, update_mode: bool = True) -> dict: instances.set_detection_mode(domain, "STATIC_CHECK") logger.debug("domain='%s',request='%s'", domain, request) - instances.set_nodeinfo_url(domain, "https://{domain}{request}") + instances.set_nodeinfo_url(domain, f"https://{domain}{request}") logger.debug("BREAK!") break diff --git a/fba/models/instances.py b/fba/models/instances.py index f01dd36..77b198e 100644 --- a/fba/models/instances.py +++ b/fba/models/instances.py @@ -483,7 +483,7 @@ def set_nodeinfo_url(domain: str, url: str): raise ValueError(f"Parameter url[]='{type(url)}' is not of type 'str'") elif url == "": raise ValueError("Parameter 'url' is empty") - elif not validators.url(url): + elif url is not None and not validators.url(url): raise ValueError(f"Parameter url='{url}' is not a valid URL") # Set timestamp -- 2.39.5