From: Roland Häder Date: Fri, 2 Jun 2023 18:22:41 +0000 (+0200) Subject: Continued: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=1e72b2222dea7b7b57eff6396a5459c097205bcf;p=fba.git Continued: - added wrapper script to fetch RSS feeds from FBA installations (e.g. https://fba.ryona.agency/rss ) --- diff --git a/fba.py b/fba.py index bec2f99..612bc03 100644 --- a/fba.py +++ b/fba.py @@ -194,18 +194,22 @@ def is_primitive(var: any) -> bool: # NOISY-DEBUG: print(f"DEBUG: var[]='{type(var)}' - CALLED!") return type(var) in {int, str, float, bool} or var == None -def fetch_instances(domain: str, origin: str, software: str, path: str = None): +def fetch_instances(domain: str, origin: str, software: str, script: str, path: str = None): if type(domain) != str: raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'") elif domain == "": raise ValueError(f"Parameter 'domain' cannot be empty") elif type(origin) != str and origin != None: raise ValueError(f"Parameter origin[]={type(origin)} is not 'str'") + elif type(script) != str: + raise ValueError(f"Parameter script[]={type(script)} is not 'str'") + elif domain == "": + raise ValueError(f"Parameter 'domain' cannot be empty") # DEBUG: print("DEBUG: domain,origin,software,path:", domain, origin, software, path) if not is_instance_registered(domain): # DEBUG: print("DEBUG: Adding new domain:", domain, origin) - add_instance(domain, origin, sys.argv[0], path) + add_instance(domain, origin, script, path) # DEBUG: print("DEBUG: Fetching instances for domain:", domain, software) peerlist = get_peers(domain, software) @@ -736,7 +740,7 @@ def get_peers(domain: str, software: str) -> list: # DEBUG: print(f"DEBUG: domain='{domain}',mode='{mode}'") while True: try: - res = reqto.get(f"https://{domain}/api/v1/server/{mode}?start={start}&count=100", headers=api_headers, timeout=(config["connection_timeout"], config["read_timeout"])) + res = reqto.get(f"https://{domain}/api/v1/server/{mode}?start={start}&count=100", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])) data = res.json() # DEBUG: print(f"DEBUG: res.ok={res.ok},res.status_code='{res.status_code}',data[]='{type(data)}'") diff --git a/fetch_fba_rss.py b/fetch_fba_rss.py new file mode 100755 index 0000000..6745531 --- /dev/null +++ b/fetch_fba_rss.py @@ -0,0 +1,64 @@ +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + +import reqto +import rss_parser +import sys +import fba + +feed = sys.argv[1] + +domains = list() +try: + print(f"INFO: Fetch FBA-specific RSS feed='{feed}' ...") + res = reqto.get(feed, headers=fba.headers, timeout=(fba.config["connection_timeout"], fba.config["read_timeout"])) + + # DEBUG: print(f"DEBUG: res.ok={res.ok},res.status_code='{res.status_code}',res.text()={len(res.text)}") + if res.ok and res.status_code < 300 and len(res.text) > 0: + # DEBUG: print(f"DEBUG: Parsing RSS feed ...") + rss = rss_parser.Parser.parse(res.text) + for item in rss.channel.items: + # DEBUG: print(f"DEBUG: item.link={item.link}") + domain = item.link.split("=")[1] + + if fba.is_blacklisted(domain): + # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") + continue + elif domain in domains: + # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!") + continue + elif fba.is_instance_registered(domain): + # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!") + continue + + # DEBUG: print(f"DEBUG: domain='{domain}'") + domains.append(domain) + +except BaseException as e: + print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'") + +# Show domains +# DEBUG: print(f"DEBUG: domains()={len(domains)}") +if len(domains) > 0: + print(f"INFO: Adding {len(domains)} new instances ...") + for domain in domains: + print(f"INFO: Fetching instances from domain='{domain}' ...") + fba.fetch_instances(domain, None, None, sys.argv[0]) + +fba.connection.close() diff --git a/fetch_instances.py b/fetch_instances.py index 438142a..5dabad0 100755 --- a/fetch_instances.py +++ b/fetch_instances.py @@ -27,7 +27,7 @@ import fba instance = sys.argv[1] # Initial fetch -fba.fetch_instances(instance, None, None) +fba.fetch_instances(instance, None, None, sys.argv[0]) # Loop through some instances fba.cursor.execute( @@ -42,7 +42,7 @@ for row in rows: print("WARNING: domain is blacklisted:", row[0]) continue - print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin '{row[1]}',nodeinfo_url='{row[3]}'") - fba.fetch_instances(row[0], row[1], row[2], row[3]) + print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'") + fba.fetch_instances(row[0], row[1], row[2], sys.argv[0], row[3]) fba.connection.close() diff --git a/requirements.txt b/requirements.txt index 36b0f93..abf9e15 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,4 +5,5 @@ requests jinja2 eventlet reqto +rss-parser validators