From 2b86869b1269b6bda8c6f3b13344740f5aeb6d07 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Tue, 20 Jun 2023 03:20:17 +0200 Subject: [PATCH] Continued: - added command fetch_observer() to fetch instances from fediverse.observer - WIP: added blocks for lemmy --- fba/boot.py | 7 ++++ fba/commands.py | 103 ++++++++++++++++++++++++++++++++++++++++-------- fba/fba.py | 2 +- 3 files changed, 94 insertions(+), 18 deletions(-) diff --git a/fba/boot.py b/fba/boot.py index 1ed06ef..67d02a1 100644 --- a/fba/boot.py +++ b/fba/boot.py @@ -108,6 +108,13 @@ def init_parser(): ) parser.set_defaults(command=commands.fetch_txt) + ### Fetch blocks from fediverse.observer ### + parser = subparser_command.add_parser( + "fetch_observer", + help="Fetches blocks from fediverse.observer.", + ) + parser.set_defaults(command=commands.fetch_observer) + # DEBUG: print("DEBUG: init_parser(): EXIT!") def run_command(): diff --git a/fba/commands.py b/fba/commands.py index 89563a8..bc656dd 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -180,6 +180,9 @@ def fetch_blocks(args: argparse.Namespace): elif software == "mastodon": print(f"INFO: blocker='{blocker}',software='{software}'") mastodon.fetch_blocks(blocker, origin, nodeinfo_url) + elif software == "lemmy": + print(f"INFO: blocker='{blocker}',software='{software}'") + lemmy.fetch_blocks(blocker, origin, nodeinfo_url) elif software == "friendica" or software == "misskey": print(f"INFO: blocker='{blocker}',software='{software}'") @@ -279,26 +282,92 @@ def fetch_blocks(args: argparse.Namespace): # DEBUG: print("DEBUG: EXIT!") +def fetch_observer(args: argparse.Namespace): + # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!") + types = [ + "akoma", + "birdsitelive", + "bookwyrm", + "calckey", + "diaspora", + "foundkey", + "friendica", + "funkwhale", + "gancio", + "gnusocial", + "gotosocial", + "hometown", + "hubzilla", + "kbin", + "ktistec", + "lemmy", + "mastodon", + "microblogpub", + "misskey", + "mitra", + "mobilizon", + "owncast", + "peertube", + "pixelfed", + "pleroma", + "plume", + "snac", + "takahe", + "wildebeest", + "writefreely" + ] + + print(f"INFO: Fetching {len(types)} different table data ...") + for software in types: + # DEBUG: print(f"DEBUG: Fetching table data for software='{software}' ...") + raw = fba.fetch_url(f"https://fediverse.observer/app/views/tabledata.php?software={software}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text + # DEBUG: print(f"DEBUG: raw[{type(raw)}]()={len(raw)}") + + doc = bs4.BeautifulSoup(raw, features='html.parser') + # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'") + + items = doc.findAll("a", {"class": "url"}) + print(f"INFO: Checking {len(items)} items,software='{software}' ...") + for item in items: + # DEBUG: print(f"DEBUG: item[]='{type(item)}'") + domain = item.decode_contents() + + # DEBUG: print(f"DEBUG: domain='{domain}'") + if not validators.domain(domain): + print(f"WARNING: domain='{domain}' is not a valid domain - SKIPPED!") + continue + elif blacklist.is_blacklisted(domain): + # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") + continue + elif instances.is_registered(domain): + # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!") + continue + + print(f"INFO: Fetching instances for domain='{domain}',software='{software}'") + federation.fetch_instances(domain, None, software, inspect.currentframe().f_code.co_name) + + # DEBUG: print("DEBUG: EXIT!") + def fetch_cs(args: argparse.Namespace): # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!") extensions = [ - 'extra', - 'abbr', - 'attr_list', - 'def_list', - 'fenced_code', - 'footnotes', - 'md_in_html', - 'admonition', - 'codehilite', - 'legacy_attrs', - 'legacy_em', - 'meta', - 'nl2br', - 'sane_lists', - 'smarty', - 'toc', - 'wikilinks' + "extra", + "abbr", + "attr_list", + "def_list", + "fenced_code", + "footnotes", + "md_in_html", + "admonition", + "codehilite", + "legacy_attrs", + "legacy_em", + "meta", + "nl2br", + "sane_lists", + "smarty", + "toc", + "wikilinks" ] domains = { diff --git a/fba/fba.py b/fba/fba.py index aa1f375..23e2de3 100644 --- a/fba/fba.py +++ b/fba/fba.py @@ -65,7 +65,7 @@ def fetch_url(url: str, headers: dict, timeout: tuple) -> requests.models.Respon if components.query != "": response = network.fetch_response(components.netloc, f"{components.path}?{components.query}", headers, timeout) else: - response = network.fetch_response(components.netloc, f"{components.path}", headers, timeout) + response = network.fetch_response(components.netloc, components.path if isinstance(components.path, str) and components.path != '' else '/', headers, timeout) # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!") return response -- 2.39.5