From ad0e8ee96f0dd23c04e491b01a23000b63cab854 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Sat, 3 Jun 2023 08:15:19 +0200 Subject: [PATCH] Continued: - moved 'fba' to own folder - splitted up file a bit: boot, cache --- api.py | 2 +- fba/__init__.py | 1 + fba/boot.py | 45 ++++++++++++++++++++++++ fba/cache.py | 64 ++++++++++++++++++++++++++++++++++ fba.py => fba/fba.py | 82 +++----------------------------------------- fetch_bkali.py | 6 ++-- fetch_blocks.py | 6 ++-- fetch_fba_rss.py | 6 ++-- fetch_instances.py | 6 ++-- 9 files changed, 127 insertions(+), 91 deletions(-) create mode 100644 fba/__init__.py create mode 100644 fba/boot.py create mode 100644 fba/cache.py rename fba.py => fba/fba.py (95%) diff --git a/api.py b/api.py index b37e555..871398e 100644 --- a/api.py +++ b/api.py @@ -25,7 +25,7 @@ import fastapi import uvicorn import requests import re -import fba +from fba import * router = fastapi.FastAPI(docs_url=fba.config["base_url"] + "/docs", redoc_url=fba.config["base_url"] + "/redoc") templates = Jinja2Templates(directory="templates") diff --git a/fba/__init__.py b/fba/__init__.py new file mode 100644 index 0000000..803e5ea --- /dev/null +++ b/fba/__init__.py @@ -0,0 +1 @@ +__all__ = ['boot', 'cache', 'fba'] diff --git a/fba/boot.py b/fba/boot.py new file mode 100644 index 0000000..8f962fc --- /dev/null +++ b/fba/boot.py @@ -0,0 +1,45 @@ +# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import os +import sys +import tempfile +import zc.lockfile +from fba import fba + +# Lock file +lockfile = tempfile.gettempdir() + '/fba.lock' +LOCK = None + +def lock_process(): + global LOCK + try: + print(f"DEBUG: Acquiring lock: '{lockfile}'") + LOCK = zc.lockfile.LockFile(lockfile) + print("DEBUG: Lock obtained.") + + except zc.lockfile.LockError: + print(f"ERROR: Cannot aquire lock: '{lockfile}'") + sys.exit(100) + +def shutdown(): + print("DEBUG: Closing database connection ...") + fba.connection.close() + print("DEBUG: Releasing lock ...") + LOCK.close() + print(f"DEBUG: Deleting lockfile='{lockfile}' ...") + os.remove(lockfile) + print("DEBUG: Shutdown completed.") diff --git a/fba/cache.py b/fba/cache.py new file mode 100644 index 0000000..068fd35 --- /dev/null +++ b/fba/cache.py @@ -0,0 +1,64 @@ +# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + + +# Cache for redundant SQL queries +cache = {} + +##### Cache ##### + +def is_cache_initialized(key: str) -> bool: + return key in cache + +def set_all_cache_key(key: str, rows: list, value: any): + # NOISY-DEBUG: print(f"DEBUG: key='{key}',rows()={len(rows)},value[]={type(value)} - CALLED!") + if type(key) != str: + raise ValueError("Parameter key[]='{type(key)}' is not 'str'") + elif not is_cache_initialized(key): + # NOISY-DEBUG: print(f"DEBUG: Cache for key='{key}' not initialized.") + cache[key] = {} + + for sub in rows: + # NOISY-DEBUG: print(f"DEBUG: Setting key='{key}',sub[{type(sub)}]='{sub}'") + + if isinstance(sub, tuple): + cache[key][sub[0]] = value + else: + print(f"WARNING: Unsupported type row[]='{type(row)}'") + + # NOISY-DEBUG: print("DEBUG: EXIT!") + +def set_cache_key(key: str, sub: str, value: any): + if type(key) != str: + raise ValueError("Parameter key[]='{type(key)}' is not 'str'") + elif type(sub) != str: + raise ValueError("Parameter sub[]='{type(sub)}' is not 'str'") + elif not is_cache_initialized(key): + print(f"WARNING: Bad method call, key='{key}' is not initialized yet.") + raise Exception(f"Cache for key='{key}' is not initialized, but function called") + + cache[key][sub] = value + +def is_cache_key_set(key: str, sub: str) -> bool: + if type(key) != str: + raise ValueError("Parameter key[]='{type(key)}' is not 'str'") + elif type(sub) != str: + raise ValueError("Parameter sub[]='{type(sub)}' is not 'str'") + elif not is_cache_initialized(key): + print(f"WARNING: Bad method call, key='{key}' is not initialized yet.") + raise Exception(f"Cache for key='{key}' is not initialized, but function called") + + return sub in cache[key] diff --git a/fba.py b/fba/fba.py similarity index 95% rename from fba.py rename to fba/fba.py index c8a088c..4e97f23 100644 --- a/fba.py +++ b/fba/fba.py @@ -15,17 +15,15 @@ # along with this program. If not, see . import bs4 +from fba import cache import hashlib import re import reqto import json -import os import sqlite3 import sys -import tempfile import time import validators -import zc.lockfile with open("config.json") as f: config = json.loads(f.read()) @@ -127,9 +125,6 @@ language_mapping = { # URL for fetching peers get_peers_url = "/api/v1/instance/peers" -# Cache for redundant SQL queries -cache = {} - # Connect to database connection = sqlite3.connect("blocks.db") cursor = connection.cursor() @@ -146,55 +141,6 @@ patterns = [ re.compile("^[a-f0-9]{7}$"), ] -# Lock file -lockfile = tempfile.gettempdir() + '/.' + __name__ + '.lock' -LOCK = None - -##### Cache ##### - -def is_cache_initialized(key: str) -> bool: - return key in cache - -def set_all_cache_key(key: str, rows: list, value: any): - # NOISY-DEBUG: print(f"DEBUG: key='{key}',rows()={len(rows)},value[]={type(value)} - CALLED!") - if type(key) != str: - raise ValueError("Parameter key[]='{type(key)}' is not 'str'") - elif not is_cache_initialized(key): - # NOISY-DEBUG: print(f"DEBUG: Cache for key='{key}' not initialized.") - cache[key] = {} - - for sub in rows: - # NOISY-DEBUG: print(f"DEBUG: Setting key='{key}',sub[{type(sub)}]='{sub}'") - - if isinstance(sub, tuple): - cache[key][sub[0]] = value - else: - print(f"WARNING: Unsupported type row[]='{type(row)}'") - - # NOISY-DEBUG: print("DEBUG: EXIT!") - -def set_cache_key(key: str, sub: str, value: any): - if type(key) != str: - raise ValueError("Parameter key[]='{type(key)}' is not 'str'") - elif type(sub) != str: - raise ValueError("Parameter sub[]='{type(sub)}' is not 'str'") - elif not is_cache_initialized(key): - print(f"WARNING: Bad method call, key='{key}' is not initialized yet.") - raise Exception(f"Cache for key='{key}' is not initialized, but function called") - - cache[key][sub] = value - -def is_cache_key_set(key: str, sub: str) -> bool: - if type(key) != str: - raise ValueError("Parameter key[]='{type(key)}' is not 'str'") - elif type(sub) != str: - raise ValueError("Parameter sub[]='{type(sub)}' is not 'str'") - elif not is_cache_initialized(key): - print(f"WARNING: Bad method call, key='{key}' is not initialized yet.") - raise Exception(f"Cache for key='{key}' is not initialized, but function called") - - return sub in cache[key] - ##### Other functions ##### def is_primitive(var: any) -> bool: @@ -1210,19 +1156,19 @@ def is_instance_registered(domain: str) -> bool: raise ValueError(f"Parameter 'domain' cannot be empty") # NOISY-DEBUG: # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!") - if not is_cache_initialized("is_registered"): + if not cache.is_cache_initialized("is_registered"): # NOISY-DEBUG: # DEBUG: print(f"DEBUG: Cache for 'is_registered' not initialized, fetching all rows ...") try: cursor.execute("SELECT domain FROM instances") # Check Set all - set_all_cache_key("is_registered", cursor.fetchall(), True) + cache.set_all_cache_key("is_registered", cursor.fetchall(), True) except BaseException as e: print(f"ERROR: failed SQL query: domain='{domain}',exception[{type(e)}]:'{str(e)}'") sys.exit(255) # Is cache found? - registered = is_cache_key_set("is_registered", domain) + registered = cache.is_cache_key_set("is_registered", domain) # NOISY-DEBUG: # DEBUG: print(f"DEBUG: registered='{registered}' - EXIT!") return registered @@ -1551,23 +1497,3 @@ def tidyup(string: str) -> str: string = re.sub("(.+)\@", "", string) return string - -def lock_process(): - global LOCK - try: - print(f"DEBUG: Acquiring lock: '{lockfile}'") - LOCK = zc.lockfile.LockFile(lockfile) - print("DEBUG: Lock obtained.") - - except zc.lockfile.LockError: - print(f"ERROR: Cannot aquire lock: '{lockfile}'") - sys.exit(100) - -def shutdown(): - print("DEBUG: Closing database connection ...") - connection.close() - print("DEBUG: Releasing lock ...") - LOCK.close() - print(f"DEBUG: Deleting lockfile='{lockfile}' ...") - os.remove(lockfile) - print("DEBUG: Shutdown completed.") diff --git a/fetch_bkali.py b/fetch_bkali.py index 01aea5a..f6b6fdb 100755 --- a/fetch_bkali.py +++ b/fetch_bkali.py @@ -20,9 +20,9 @@ import json import sys import validators -import fba +from fba import * -fba.lock_process() +boot.lock_process() domains = list() try: @@ -68,4 +68,4 @@ if len(domains) > 0: print(f"INFO: Fetching instances from domain='{domain}' ...") fba.fetch_instances(domain, None, None, sys.argv[0]) -fba.shutdown() +boot.shutdown() diff --git a/fetch_blocks.py b/fetch_blocks.py index b5a49c4..c7f25f5 100755 --- a/fetch_blocks.py +++ b/fetch_blocks.py @@ -23,9 +23,9 @@ import bs4 import itertools import re import validators -import fba +from fba import * -fba.lock_process() +boot.lock_process() fba.cursor.execute( "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_block"]] @@ -548,4 +548,4 @@ for blocker, software, origin, nodeinfo_url in rows: blockdict = [] -fba.shutdown() +boot.shutdown() diff --git a/fetch_fba_rss.py b/fetch_fba_rss.py index 7e888e2..022f46e 100755 --- a/fetch_fba_rss.py +++ b/fetch_fba_rss.py @@ -20,9 +20,9 @@ import reqto import rss_parser import sys -import fba +from fba import * -fba.lock_process() +boot.lock_process() feed = sys.argv[1] @@ -63,4 +63,4 @@ if len(domains) > 0: print(f"INFO: Fetching instances from domain='{domain}' ...") fba.fetch_instances(domain, None, None, sys.argv[0]) -fba.shutdown() +boot.shutdown() diff --git a/fetch_instances.py b/fetch_instances.py index e20bb75..926ccf6 100755 --- a/fetch_instances.py +++ b/fetch_instances.py @@ -22,9 +22,9 @@ import sys import json import time import validators -import fba +from fba import * -fba.lock_process() +boot.lock_process() instance = sys.argv[1] @@ -47,4 +47,4 @@ for row in rows: print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'") fba.fetch_instances(row[0], row[1], row[2], sys.argv[0], row[3]) -fba.shutdown() +boot.shutdown() -- 2.39.5