From: Roland Häder Date: Mon, 12 Jun 2023 03:51:41 +0000 (+0200) Subject: Continued: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=92a6263c5be5992e094bcab62821a1eb437a5767;p=fba.git Continued: - moved fba.cache to fba.helpers - introduced instances.is_recent() so importing long blocklists won't take so long --- diff --git a/fba/__init__.py b/fba/__init__.py index 40310f9..0230fa3 100644 --- a/fba/__init__.py +++ b/fba/__init__.py @@ -17,7 +17,6 @@ __all__ = [ 'blacklist', 'blocks', 'boot', - 'cache', 'commands', 'config', 'csrf', diff --git a/fba/cache.py b/fba/cache.py deleted file mode 100644 index cfc5278..0000000 --- a/fba/cache.py +++ /dev/null @@ -1,69 +0,0 @@ -# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes -# Copyright (C) 2023 Free Software Foundation -# -# This program is free software: you can redistribute it and/or modify -# it under the terms of the GNU Affero General Public License as published -# by the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU Affero General Public License for more details. -# -# You should have received a copy of the GNU Affero General Public License -# along with this program. If not, see . - -# Cache for redundant SQL queries -_cache = {} - -##### Cache ##### - -def key_exists(key: str) -> bool: - return key in _cache - -def set_all(key: str, rows: list, value: any): - # DEBUG: print(f"DEBUG: key='{key}',rows()={len(rows)},value[]='{type(value)}' - CALLED!") - if not isinstance(key, str): - raise ValueError("Parameter key[]='{type(key)}' is not 'str'") - elif not key_exists(key): - # DEBUG: print(f"DEBUG: Cache for key='{key}' not initialized.") - _cache[key] = {} - - for sub in rows: - # DEBUG: print(f"DEBUG: Setting key='{key}',sub[{type(sub)}]='{sub}'") - if isinstance(sub, tuple): - # DEBUG: print(f"DEBUG: Setting key='{key}',sub[{type(sub)}]='{sub}',value[]='{type(value)}'") - _cache[key][sub[0]] = value - else: - print(f"WARNING: Unsupported type sub[]='{type(sub)}'") - - # DEBUG: print("DEBUG: EXIT!") - -def set_sub_key(key: str, sub: str, value: any): - # DEBUG: print(f"DEBUG: key='{key}',sub='{sub}',value[]='{type(value)}' - CALLED!") - if not isinstance(key, str): - raise ValueError("Parameter key[]='{type(key)}' is not 'str'") - elif not isinstance(sub, str): - raise ValueError("Parameter sub[]='{type(sub)}' is not 'str'") - elif not key_exists(key): - raise Exception(f"Cache for key='{key}' is not initialized, but function invoked") - - # DEBUG: print(f"DEBUG: Setting key='{key}',sub='{sub}',value[]='{type(value)}' ...") - _cache[key][sub] = value - - # DEBUG: print("DEBUG: EXIT!") - -def sub_key_exists(key: str, sub: str) -> bool: - # DEBUG: print(f"DEBUG: key='{key}',sub='{sub}' - CALLED!") - if not isinstance(key, str): - raise ValueError("Parameter key[]='{type(key)}' is not 'str'") - elif not isinstance(sub, str): - raise ValueError("Parameter sub[]='{type(sub)}' is not 'str'") - elif not key_exists(key): - raise Exception(f"Cache for key='{key}' is not initialized, but function invoked") - - exists = sub in _cache[key] - - # DEBUG: print(f"DEBUG: exists='{exists}' - EXIT!") - return exists diff --git a/fba/commands.py b/fba/commands.py index 165b67c..1c0f74e 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -560,6 +560,9 @@ def fetch_oliphant(args: argparse.Namespace): elif blacklist.is_blacklisted(row["#domain"]): print(f"WARNING: domain='{row['#domain']}' is blacklisted - skipped!") continue + elif instances.is_recent(row["#domain"]): + # DEBUG: print(f"DEBUG: domain='{row['#domain']}' has been recently checked - skipped!") + continue try: print(f"INFO: Fetching instances for instane='{row['#domain']}' ...") diff --git a/fba/federation.py b/fba/federation.py index 985d71a..5e63317 100644 --- a/fba/federation.py +++ b/fba/federation.py @@ -73,7 +73,7 @@ def fetch_instances(domain: str, origin: str, software: str, script: str, path: if peerlist is None: print("ERROR: Cannot fetch peers:", domain) return - elif instances.has_pending_instance_data(domain): + elif instances.has_pending(domain): # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...") instances.update_data(domain) @@ -132,6 +132,7 @@ def fetch_peers(domain: str, software: str) -> list: # Init peers variable peers = list() + # No CSRF by default, you don't have to add network.api_headers by yourself here headers = tuple() diff --git a/fba/helpers/__init__.py b/fba/helpers/__init__.py index e9ddf42..3e811e9 100644 --- a/fba/helpers/__init__.py +++ b/fba/helpers/__init__.py @@ -14,6 +14,7 @@ # along with this program. If not, see . __all__ = [ + 'cache', 'dicts', 'tidyup', 'version', diff --git a/fba/helpers/cache.py b/fba/helpers/cache.py new file mode 100644 index 0000000..cfc5278 --- /dev/null +++ b/fba/helpers/cache.py @@ -0,0 +1,69 @@ +# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +# Cache for redundant SQL queries +_cache = {} + +##### Cache ##### + +def key_exists(key: str) -> bool: + return key in _cache + +def set_all(key: str, rows: list, value: any): + # DEBUG: print(f"DEBUG: key='{key}',rows()={len(rows)},value[]='{type(value)}' - CALLED!") + if not isinstance(key, str): + raise ValueError("Parameter key[]='{type(key)}' is not 'str'") + elif not key_exists(key): + # DEBUG: print(f"DEBUG: Cache for key='{key}' not initialized.") + _cache[key] = {} + + for sub in rows: + # DEBUG: print(f"DEBUG: Setting key='{key}',sub[{type(sub)}]='{sub}'") + if isinstance(sub, tuple): + # DEBUG: print(f"DEBUG: Setting key='{key}',sub[{type(sub)}]='{sub}',value[]='{type(value)}'") + _cache[key][sub[0]] = value + else: + print(f"WARNING: Unsupported type sub[]='{type(sub)}'") + + # DEBUG: print("DEBUG: EXIT!") + +def set_sub_key(key: str, sub: str, value: any): + # DEBUG: print(f"DEBUG: key='{key}',sub='{sub}',value[]='{type(value)}' - CALLED!") + if not isinstance(key, str): + raise ValueError("Parameter key[]='{type(key)}' is not 'str'") + elif not isinstance(sub, str): + raise ValueError("Parameter sub[]='{type(sub)}' is not 'str'") + elif not key_exists(key): + raise Exception(f"Cache for key='{key}' is not initialized, but function invoked") + + # DEBUG: print(f"DEBUG: Setting key='{key}',sub='{sub}',value[]='{type(value)}' ...") + _cache[key][sub] = value + + # DEBUG: print("DEBUG: EXIT!") + +def sub_key_exists(key: str, sub: str) -> bool: + # DEBUG: print(f"DEBUG: key='{key}',sub='{sub}' - CALLED!") + if not isinstance(key, str): + raise ValueError("Parameter key[]='{type(key)}' is not 'str'") + elif not isinstance(sub, str): + raise ValueError("Parameter sub[]='{type(sub)}' is not 'str'") + elif not key_exists(key): + raise Exception(f"Cache for key='{key}' is not initialized, but function invoked") + + exists = sub in _cache[key] + + # DEBUG: print(f"DEBUG: exists='{exists}' - EXIT!") + return exists diff --git a/fba/instances.py b/fba/instances.py index e7a5bd2..96a4cd6 100644 --- a/fba/instances.py +++ b/fba/instances.py @@ -22,11 +22,13 @@ import requests import validators from fba import blacklist -from fba import cache +from fba import config from fba import fba from fba import federation from fba import network +from fba.helpers import cache + # Found info from node, such as nodeinfo URL, detection mode that needs to be # written to database. Both arrays must be filled at the same time or else # update_data() will fail @@ -72,7 +74,7 @@ def set_data(key: str, domain: str, value: any): # DEBUG: print("DEBUG: EXIT!") -def has_pending_instance_data(domain: str) -> bool: +def has_pending(domain: str) -> bool: # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") if not isinstance(domain, str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") @@ -95,7 +97,7 @@ def update_data(domain: str): raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") elif domain == "": raise ValueError("Parameter 'domain' is empty") - elif not has_pending_instance_data(domain): + elif not has_pending(domain): raise Exception(f"Domain '{domain}' has no pending instance data, but function invoked") # DEBUG: print(f"DEBUG: Updating instance data for domain='{domain}' ...") @@ -225,7 +227,7 @@ def add(domain: str, origin: str, command: str, path: str = None): cache.set_sub_key("is_registered", domain, True) - if has_pending_instance_data(domain): + if has_pending(domain): # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo being updated ...") set_data("last_status_code" , domain, None) set_data("last_error_details", domain, None) @@ -311,3 +313,25 @@ def is_registered(domain: str) -> bool: # DEBUG: print(f"DEBUG: registered='{registered}' - EXIT!") return registered + +def is_recent(domain: str) -> bool: + # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") + if not isinstance(domain, str): + raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'") + elif domain == "": + raise ValueError("Parameter 'domain' is empty") + elif not is_registered(domain): + # DEBUG: print(f"DEBUG: domain='{domain}' is not registered, returning False - EXIT!") + return False + + # Query database + fba.cursor.execute("SELECT last_instance_fetch FROM instances WHERE domain = ? LIMIT 1", [domain]) + + # Fetch row + fetched = fba.cursor.fetchone()[0] + + # DEBUG: print(f"DEBUG: fetched[{type(fetched)}]='{fetched}'") + recently = isinstance(fetched, float) and time.time() - fetched <= config.get("recheck_instance") + + # DEBUG: print(f"DEBUG: recently='{recently}' - EXIT!") + return recently