From: Roland Häder Date: Mon, 12 Jun 2023 02:48:04 +0000 (+0200) Subject: WIP: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=54d631d3ce5dbd4c95a0374ea63cef47de181ad5;p=fba.git WIP: - rewrote from out-dated federater blocklist to multiple up-to-date oliphant blocklists - also prepared for local file support (documentation prepared) --- diff --git a/.gitignore b/.gitignore index 0e3266b..722848e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ blocks.db* +blocklists/*/ __pycache__/ venv/ config.json diff --git a/README.md b/README.md index 70ee9ef..f0fb65c 100644 --- a/README.md +++ b/README.md @@ -32,6 +32,15 @@ You maybe wish to change the configuration file, e.g. log_level is set to "info" "trace" ``` +### Fetch blocklists locally (WIP) +To save yourself bandwidth and codeberg, too, you want to clone the blocklists from some folks: + +``` +sudo -Hu fba git clone https://codeberg.org/oliphant/blocklists.git blocklists/oliphant/ +``` + +WIP notice: This feature is not implemented yet. + ### Install the services ```bash diff --git a/blocklists/.gitkeep b/blocklists/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/fba/boot.py b/fba/boot.py index cb3ccc9..525ef6b 100644 --- a/fba/boot.py +++ b/fba/boot.py @@ -85,10 +85,10 @@ def init_parser(): ### Fetch blocks from federater ### parser = subparser_command.add_parser( - "fetch_federater", - help="Fetches CSV file (block recommendations) for more possible instances to disover", + "fetch_oliphant", + help="Fetches CSV files (block recommendations) for more possible instances to disover", ) - parser.set_defaults(command=commands.fetch_federater) + parser.set_defaults(command=commands.fetch_oliphant) ### Fetch instances from given initial instance ### parser = subparser_command.add_parser( diff --git a/fba/commands.py b/fba/commands.py index d2ce3ca..8d5401d 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -488,35 +488,79 @@ def fetch_instances(args: argparse.Namespace) -> int: # DEBUG: print("DEBUG: EXIT!") return 0 -def fetch_federater(args: argparse.Namespace): +def fetch_oliphant(args: argparse.Namespace): # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!") locking.acquire() - # Fetch this URL - response = fba.fetch_url("https://github.com/federater/blocks_recommended/raw/main/federater.csv", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) - # DEBUG: print(f"DEBUG: response[]='{type(response)}'") - if response.ok and response.content != "": - # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...") - #print(f"DEBUG: response.content={response.content}") - reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect='unix') - #, fieldnames='domain,severity,reject_media,reject_reports,public_comment,obfuscate' - # DEBUG: print(f"DEBUG: reader[]='{type(reader)}'") - for row in reader: - if not validators.domain(row["#domain"]): - print(f"WARNING: domain='{row['#domain']}' is not a valid domain - skipped!") - continue - elif blacklist.is_blacklisted(row["#domain"]): - print(f"WARNING: domain='{row['#domain']}' is blacklisted - skipped!") - continue - elif instances.is_registered(row["#domain"]): - # DEBUG: print(f"DEBUG: domain='{row['#domain']}' is already registered - skipped!") - continue + # Base URL + base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists" + + # URLs to fetch + blocklists = ( + { + "blocker": "artisan.chat", + "csv_url": "mastodon/artisan.chat.csv", + },{ + "blocker": "mastodon.art", + "csv_url": "mastodon/mastodon.art.csv", + },{ + "blocker": "pleroma.envs.net", + "csv_url": "mastodon/pleroma.envs.net.csv", + },{ + "blocker": "oliphant.social", + "csv_url": "mastodon/_unified_tier3_blocklist.csv", + },{ + "blocker": "mastodon.online", + "csv_url": "mastodon/mastodon.online.csv", + },{ + "blocker": "mastodon.social", + "csv_url": "mastodon/mastodon.social.csv", + },{ + "blocker": "mastodon.social", + "csv_url": "other/missing-tier0-mastodon.social.csv", + },{ + "blocker": "rage.love", + "csv_url": "mastodon/rage.love.csv", + },{ + "blocker": "sunny.garden", + "csv_url": "mastodon/sunny.garden.csv", + },{ + "blocker": "solarpunk.moe", + "csv_url": "mastodon/solarpunk.moe.csv", + },{ + "blocker": "toot.wales", + "csv_url": "mastodon/toot.wales.csv", + },{ + "blocker": "union.place", + "csv_url": "mastodon/union.place.csv", + } + ) - try: - print(f"INFO: Fetching instances for instane='{row['#domain']}' ...") - federation.fetch_instances(row["#domain"], None, None, inspect.currentframe().f_code.co_name) - except network.exceptions as exception: - print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{row['#domain']}'") - instances.update_last_error(row["#domain"], exception) + for block in blocklists: + # Fetch this URL + print(f"DEBUG: Fetching csv_url='{block['csv_url']}' for blocker='{block['blocker']}' ...") + response = fba.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) + + # DEBUG: print(f"DEBUG: response[]='{type(response)}'") + if response.ok and response.content != "": + # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...") + #print(f"DEBUG: response.content={response.content}") + reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix") + #, fieldnames='domain,severity,reject_media,reject_reports,public_comment,obfuscate' + # DEBUG: print(f"DEBUG: reader[]='{type(reader)}'") + for row in reader: + if not validators.domain(row["#domain"]): + print(f"WARNING: domain='{row['#domain']}' is not a valid domain - skipped!") + continue + elif blacklist.is_blacklisted(row["#domain"]): + print(f"WARNING: domain='{row['#domain']}' is blacklisted - skipped!") + continue + + try: + print(f"INFO: Fetching instances for instane='{row['#domain']}' ...") + federation.fetch_instances(row["#domain"], block["blocker"], None, inspect.currentframe().f_code.co_name) + except network.exceptions as exception: + print(f"WARNING: Exception '{type(exception)}' during fetching instances from domain='{row['#domain']}'") + instances.update_last_error(row["#domain"], exception) # DEBUG: print("DEBUG: EXIT!")