From aeed9d25209f9efafa858841934f127ecdbb14f3 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Sun, 25 Jun 2023 05:53:04 +0200 Subject: [PATCH] Continued: - introduced software helper with function alias() --- fba/boot.py | 1 + fba/commands.py | 14 ++++++--- fba/helpers/__init__.py | 1 + fba/helpers/software.py | 70 +++++++++++++++++++++++++++++++++++++++++ fba/http/federation.py | 44 +++----------------------- 5 files changed, 86 insertions(+), 44 deletions(-) create mode 100644 fba/helpers/software.py diff --git a/fba/boot.py b/fba/boot.py index 5521867..d9f64d5 100644 --- a/fba/boot.py +++ b/fba/boot.py @@ -133,6 +133,7 @@ def init_parser(): help="Fetches blocks from fediverse.observer.", ) parser.set_defaults(command=commands.fetch_observer) + parser.add_argument("--software", help="Name of software, e.g. 'lemmy'") ### Fetch instances from fedipact.online ### parser = subparser_command.add_parser( diff --git a/fba/commands.py b/fba/commands.py index be890fe..c2695cf 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -35,6 +35,7 @@ from fba.helpers import blacklist from fba.helpers import config from fba.helpers import cookies from fba.helpers import locking +from fba.helpers import software as software_helper from fba.helpers import tidyup from fba.http import federation @@ -404,8 +405,12 @@ def fetch_observer(args: argparse.Namespace) -> int: logger.info("Fetching %d different table data ...", len(types)) for software in types: - doc = None + logger.debug("software='%s' - BEFORE!", software) + if args.software is not None and args.software != software: + logger.debug("args.software='%s' does not match software='%s' - SKIPPED!") + continue + doc = None try: logger.debug("Fetching table data for software='%s' ...", software) raw = utils.fetch_url( @@ -434,12 +439,13 @@ def fetch_observer(args: argparse.Namespace) -> int: elif instances.is_registered(domain): logger.debug("domain='%s' is already registered - SKIPPED!", domain) continue - elif instance.is_recent(domain): + elif instances.is_recent(domain): logger.debug("domain='%s' is recently being handled - SKIPPED!", domain) continue + software = software_helper.alias(software) logger.info("Fetching instances for domain='%s',software='%s'", domain, software) - federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name) + federation.fetch_instances(domain, None, software, inspect.currentframe().f_code.co_name) logger.debug("Success! - EXIT!") return 0 @@ -694,7 +700,7 @@ def fetch_instances(args: argparse.Namespace) -> int: continue try: - logger.info("Fetching instances for domain='%s',software='%s',origin='%s',nodeinfo_url='%s'", row[0], row[2], row[1], row[3]) + logger.info("Fetching instances for domain='%s',origin='%s',software='%s',nodeinfo_url='%s'", row[0], row[1], row[2], row[3]) federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3]) except network.exceptions as exception: logger.warning("Exception '%s' during fetching instances (fetch_instances) from row[0]='%s'", type(exception), row[0]) diff --git a/fba/helpers/__init__.py b/fba/helpers/__init__.py index bccc33f..832e28f 100644 --- a/fba/helpers/__init__.py +++ b/fba/helpers/__init__.py @@ -21,6 +21,7 @@ __all__ = [ 'dicts', 'domain', 'locking', + 'software', 'tidyup', 'version', ] diff --git a/fba/helpers/software.py b/fba/helpers/software.py new file mode 100644 index 0000000..1987871 --- /dev/null +++ b/fba/helpers/software.py @@ -0,0 +1,70 @@ +# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import logging + +from fba.helpers import tidyup + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +def alias(software: str) -> str: + logger.debug("software='%s'- CALLED!", software) + if not isinstance(software, str) and software is not None: + raise ValueError(f"software[]='%s' is not type 'str'") + + logger.debug("software='%s'- BEFORE!", software) + software = tidyup.domain(software) + logger.debug("software='%s'- AFTER!", software) + + if software in ["akkoma", "rebased", "akkounfucked", "ched"]: + logger.debug("Setting pleroma: software='%s'", software) + software = "pleroma" + elif software in ["hometown", "ecko"]: + logger.debug("Setting mastodon: software='%s'", software) + software = "mastodon" + elif software in ["slipfox calckey", "calckey", "groundpolis", "foundkey", "cherrypick", "meisskey", "magnetar", "keybump"]: + logger.debug("Setting misskey: software='%s'", software) + software = "misskey" + elif software == "runtube.re": + logger.debug("Setting peertube: software='%s'", software) + software = "peertube" + elif software == "nextcloud social": + logger.debug("Setting nextcloud: software='%s'", software) + software = "nextcloud" + elif software.find("/") > 0: + logger.warning("Spliting of slash: software='%s'", software) + software = tidyup.domain(software.split("/")[-1]) + elif software.find("|") > 0: + logger.warning("Spliting of pipe: software='%s'", software) + software = tidyup.domain(software.split("|")[0]) + elif "powered by" in software: + logger.debug("software='%s' has 'powered by' in it", software) + software = version.strip_powered_by(software) + elif isinstance(software, str) and " by " in software: + logger.debug("software='%s' has ' by ' in it", software) + software = version.strip_until(software, " by ") + elif isinstance(software, str) and " see " in software: + logger.debug("software='%s' has ' see ' in it", software) + software = version.strip_until(software, " see ") + + logger.debug("software['%s']='%s'", type(software), software) + if software == "": + logger.warning("tidyup.domain() left no software name behind: software='%s'", software) + software = None + + logger.debug("software[%s]='%s' - EXIT!", type(software), software) + return software diff --git a/fba/http/federation.py b/fba/http/federation.py index c3ad8fb..ff7cf5e 100644 --- a/fba/http/federation.py +++ b/fba/http/federation.py @@ -26,6 +26,7 @@ from fba import utils from fba.helpers import config from fba.helpers import cookies from fba.helpers import domain as domain_helper +from fba.helpers import software as software_helper from fba.helpers import tidyup from fba.helpers import version @@ -458,46 +459,9 @@ def determine_software(domain: str, path: str = None) -> str: return None logger.debug("software='%s'- BEFORE!", software) - software = tidyup.domain(software) - logger.debug("software='%s'- AFTER!", software) - - if software in ["akkoma", "rebased", "akkounfucked", "ched"]: - logger.debug("Setting pleroma: domain='%s',software='%s'", domain, software) - software = "pleroma" - elif software in ["hometown", "ecko"]: - logger.debug("Setting mastodon: domain='%s',software='%s'", domain, software) - software = "mastodon" - elif software in ["slipfox calckey", "calckey", "groundpolis", "foundkey", "cherrypick", "meisskey", "magnetar", "keybump"]: - logger.debug("Setting misskey: domain='%s',software='%s'", domain, software) - software = "misskey" - elif software == "runtube.re": - logger.debug("Setting peertube: domain='%s',software='%s'", domain, software) - software = "peertube" - elif software == "nextcloud social": - logger.debug("Setting nextcloud: domain='%s',software='%s'", domain, software) - software = "nextcloud" - elif software.find("/") > 0: - logger.warning("Spliting of slash: domain='%s',software='%s'", domain, software) - software = tidyup.domain(software.split("/")[-1]) - elif software.find("|") > 0: - logger.warning("Spliting of pipe: domain='%s',software='%s'", domain, software) - software = tidyup.domain(software.split("|")[0]) - elif "powered by" in software: - logger.debug("software='%s' has 'powered by' in it", software) - software = version.strip_powered_by(software) - elif isinstance(software, str) and " by " in software: - logger.debug("software='%s' has ' by ' in it", software) - software = version.strip_until(software, " by ") - elif isinstance(software, str) and " see " in software: - logger.debug("software='%s' has ' see ' in it", software) - software = version.strip_until(software, " see ") - - logger.debug("software['%s']='%s'", type(software), software) - if software == "": - logger.warning("tidyup.domain() left no software name behind: domain='%s'", domain) - software = None + software = software_helper.alias(software) + logger.debug("software['%s']='%s' - AFTER!", type(software), software) - logger.debug("software[]='%s'", type(software)) if str(software) == "": logger.debug("software for domain='%s' was not detected, trying generator ...", domain) software = fetch_generator_from_path(domain) @@ -510,7 +474,7 @@ def determine_software(domain: str, path: str = None) -> str: logger.debug("software='%s' has 'powered by' in it", software) software = version.remove(version.strip_powered_by(software)) - logger.debug("software='%s' - EXIT!", domain, software) + logger.debug("software='%s' - EXIT!", software) return software def find_domains(tag: bs4.element.Tag) -> list: -- 2.39.5