X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=fba%2Fcommands.py;h=dc98d4d6c3d8b063b9383fd300f1fd778f7678ae;hb=bb3f9e35709b79b790b949ea65ac0a323957d9b5;hp=e22646a2cf3fc3d51ffd088a9a9dd5ec5186b69f;hpb=9cb874e872dc415cb31736da77894d293bf60912;p=fba.git diff --git a/fba/commands.py b/fba/commands.py index e22646a..dc98d4d 100644 --- a/fba/commands.py +++ b/fba/commands.py @@ -14,20 +14,34 @@ # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . +import csv +import inspect +import json +import time + import argparse import atoma import bs4 -import itertools -import json -import re +import markdown import reqto -import sys -import time import validators -from fba import boot +from fba import blacklist from fba import config +from fba import federation from fba import fba +from fba import network + +from fba.helpers import locking +from fba.helpers import tidyup + +from fba.models import blocks +from fba.models import instances + +from fba.networks import friendica +from fba.networks import mastodon +from fba.networks import misskey +from fba.networks import pleroma def check_instance(args: argparse.Namespace) -> int: # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - CALLED!") @@ -35,614 +49,280 @@ def check_instance(args: argparse.Namespace) -> int: if not validators.domain(args.domain): print(f"WARNING: args.domain='{args.domain}' is not valid") status = 100 - elif fba.is_blacklisted(args.domain): + elif blacklist.is_blacklisted(args.domain): print(f"WARNING: args.domain='{args.domain}' is blacklisted") status = 101 - elif fba.is_instance_registered(args.domain): + elif instances.is_registered(args.domain): print(f"WARNING: args.domain='{args.domain}' is already registered") - staus = 102 + status = 102 else: print(f"INFO: args.domain='{args.domain}' is not known") # DEBUG: print(f"DEBUG: status={status} - EXIT!") return status -def fetch_bkali(args: argparse.Namespace): - # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!") +def fetch_bkali(args: argparse.Namespace) -> int: + # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!") domains = list() try: - fetched = fba.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({ + fetched = network.post_json_api("gql.api.bka.li", "/v1/graphql", json.dumps({ "query": "query domainlist {nodeinfo(order_by: {domain: asc}) {domain}}" })) - # DEBUG: print(f"DEBUG: fetched({len(fetched)})[]='{type(fetched)}'") - if len(fetched) == 0: + # DEBUG: print(f"DEBUG: fetched[]='{type(fetched)}'") + if "error_message" in fetched: + print(f"WARNING: post_json_api() for 'gql.api.bka.li' returned error message: {fetched['error_message']}") + return 100 + elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]: + print(f"WARNING: post_json_api() returned error: {fetched['error']['message']}") + return 101 + + rows = fetched["json"] + + # DEBUG: print(f"DEBUG: rows({len(rows)})[]='{type(rows)}'") + if len(rows) == 0: raise Exception("WARNING: Returned no records") - elif not "data" in fetched: - raise Exception(f"WARNING: fetched()={len(fetched)} does not contain element 'data'") - elif not "nodeinfo" in fetched["data"]: - raise Exception(f"WARNING: fetched()={len(fetched['data'])} does not contain element 'nodeinfo'") + elif "data" not in rows: + raise Exception(f"WARNING: rows()={len(rows)} does not contain key 'data'") + elif "nodeinfo" not in rows["data"]: + raise Exception(f"WARNING: rows()={len(rows['data'])} does not contain key 'nodeinfo'") - for entry in fetched["data"]["nodeinfo"]: + for entry in rows["data"]["nodeinfo"]: # DEBUG: print(f"DEBUG: entry['{type(entry)}']='{entry}'") if not "domain" in entry: - print(f"WARNING: entry does not contain 'domain' - SKIPPED!") + print(f"WARNING: entry()={len(entry)} does not contain 'domain' - SKIPPED!") continue elif not validators.domain(entry["domain"]): print(f"WARNING: domain='{entry['domain']}' is not a valid domain - SKIPPED!") continue - elif fba.is_blacklisted(entry["domain"]): + elif blacklist.is_blacklisted(entry["domain"]): # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is blacklisted - SKIPPED!") continue - elif fba.is_instance_registered(entry["domain"]): + elif instances.is_registered(entry["domain"]): # DEBUG: print(f"DEBUG: domain='{entry['domain']}' is already registered - SKIPPED!") continue + elif instances.is_recent(entry["domain"]): + # DEBUG: print(f"DEBUG: domain='{entry['domain']}' has been recently fetched - SKIPPED!") + continue # DEBUG: print(f"DEBUG: Adding domain='{entry['domain']}' ...") domains.append(entry["domain"]) - except BaseException as e: - print(f"ERROR: Cannot fetch graphql,exception[{type(e)}]:'{str(e)}'") - sys.exit(255) + except network.exceptions as exception: + print(f"ERROR: Cannot fetch graphql,exception[{type(exception)}]:'{str(exception)}' - EXIT!") + return 102 # DEBUG: print(f"DEBUG: domains()={len(domains)}") if len(domains) > 0: - boot.acquire_lock() + locking.acquire() print(f"INFO: Adding {len(domains)} new instances ...") for domain in domains: - print(f"INFO: Fetching instances from domain='{domain}' ...") - fba.fetch_instances(domain, None, None, sys.argv[0]) + try: + print(f"INFO: Fetching instances from domain='{domain}' ...") + federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name) + except network.exceptions as exception: + print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_bkali) from domain='{domain}'") + instances.set_last_error(domain, exception) - # DEBUG: print("DEBUG: EXIT!") + # DEBUG: print("DEBUG: Success - EXIT!") + return 0 def fetch_blocks(args: argparse.Namespace): - print(f"DEBUG: args[]={type(args)} - CALLED!") - if args.domain != None and args.domain != "": + # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!") + if args.domain is not None and args.domain != "": + # DEBUG: print(f"DEBUG: args.domain='{args.domain}' - checking ...") if not validators.domain(args.domain): print(f"WARNING: domain='{args.domain}' is not valid.") return - elif fba.is_blacklisted(args.domain): + elif blacklist.is_blacklisted(args.domain): print(f"WARNING: domain='{args.domain}' is blacklisted, won't check it!") return - elif not fba.is_instance_registered(args.domain): + elif not instances.is_registered(args.domain): print(f"WARNING: domain='{args.domain}' is not registered, please run ./fba.py fetch_instances {args.domain} first.") return - boot.acquire_lock() + locking.acquire() - if args.domain != None and args.domain != "": + if args.domain is not None and args.domain != "": + # Re-check single domain + # DEBUG: print(f"DEBUG: Querying database for single args.domain='{args.domain}' ...") fba.cursor.execute( - "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND domain = ?", [args.domain] + "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE domain = ?", [args.domain] ) else: + # Re-check after "timeout" (aka. minimum interval) fba.cursor.execute( - "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")] + "SELECT domain, software, origin, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe') AND (last_blocked IS NULL OR last_blocked < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_block")] ) rows = fba.cursor.fetchall() print(f"INFO: Checking {len(rows)} entries ...") for blocker, software, origin, nodeinfo_url in rows: # DEBUG: print("DEBUG: BEFORE blocker,software,origin,nodeinfo_url:", blocker, software, origin, nodeinfo_url) - blockdict = [] - blocker = fba.tidyup_domain(blocker) + blockdict = list() + blocker = tidyup.domain(blocker) # DEBUG: print("DEBUG: AFTER blocker,software:", blocker, software) if blocker == "": print("WARNING: blocker is now empty!") continue - elif fba.is_blacklisted(blocker): + elif blacklist.is_blacklisted(blocker): print(f"WARNING: blocker='{blocker}' is blacklisted now!") continue # DEBUG: print(f"DEBUG: blocker='{blocker}'") - fba.update_last_blocked(blocker) + instances.set_last_blocked(blocker) if software == "pleroma": - print("INFO: blocker:", blocker) - try: - # Blocks - json = fba.fetch_nodeinfo(blocker, nodeinfo_url) - if json is None: - print("WARNING: Could not fetch nodeinfo from blocker:", blocker) - continue - elif not "metadata" in json: - print(f"WARNING: json()={len(json)} does not have key 'metadata', blocker='{blocker}'") - continue - elif not "federation" in json["metadata"]: - print(f"WARNING: json()={len(json['metadata'])} does not have key 'federation', blocker='{blocker}'") - continue - - # DEBUG: print("DEBUG: Updating nodeinfo:", blocker) - fba.update_last_nodeinfo(blocker) - - federation = json["metadata"]["federation"] - - if "enabled" in federation: - # DEBUG: print("DEBUG: Instance has no block list to analyze:", blocker) + print(f"INFO: blocker='{blocker}',software='{software}'") + pleroma.fetch_blocks(blocker, origin, nodeinfo_url) + elif software == "mastodon": + print(f"INFO: blocker='{blocker}',software='{software}'") + mastodon.fetch_blocks(blocker, origin, nodeinfo_url) + elif software == "friendica" or software == "misskey": + print(f"INFO: blocker='{blocker}',software='{software}'") + + blocking = list() + if software == "friendica": + blocking = friendica.fetch_blocks(blocker) + elif software == "misskey": + blocking = misskey.fetch_blocks(blocker) + + print(f"INFO: Checking {len(blocking.items())} entries from blocker='{blocker}',software='{software}' ...") + for block_level, blocklist in blocking.items(): + # DEBUG: print("DEBUG: blocker,block_level,blocklist():", blocker, block_level, len(blocklist)) + block_level = tidyup.domain(block_level) + # DEBUG: print("DEBUG: AFTER-block_level:", block_level) + if block_level == "": + print("WARNING: block_level is empty, blocker:", blocker) continue - if "mrf_simple" in federation: - for block_level, blocks in ( - {**federation["mrf_simple"], - **{"quarantined_instances": federation["quarantined_instances"]}} - ).items(): - # DEBUG: print("DEBUG: block_level, blocks():", block_level, len(blocks)) - block_level = fba.tidyup_domain(block_level) - # DEBUG: print("DEBUG: BEFORE block_level:", block_level) - - if block_level == "": - print("WARNING: block_level is now empty!") - continue - - # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") - for blocked in blocks: - # DEBUG: print("DEBUG: BEFORE blocked:", blocked) - blocked = fba.tidyup_domain(blocked) - # DEBUG: print("DEBUG: AFTER blocked:", blocked) - - if blocked == "": - print("WARNING: blocked is empty after fba.tidyup_domain():", blocker, block_level) - continue - elif fba.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") - continue - elif blocked.count("*") > 1: - # -ACK!-oma also started obscuring domains without hash - fba.cursor.execute( - "SELECT domain, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] - ) - searchres = fba.cursor.fetchone() - # DEBUG: print("DEBUG: searchres[]:", type(searchres)) - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") - continue - - blocked = searchres[0] - nodeinfo_url = searchres[1] - # DEBUG: print("DEBUG: Looked up domain:", blocked) - elif not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") - fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) - - if not fba.is_instance_blocked(blocker, blocked, block_level): - # DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level) - fba.block_instance(blocker, blocked, "unknown", block_level) - - if block_level == "reject": - # DEBUG: print("DEBUG: Adding to blockdict:", blocked) - blockdict.append( - { - "blocked": blocked, - "reason" : None - }) - else: - # DEBUG: print(f"DEBUG: Updating block last seen for blocker='{blocker}',blocked='{blocked}' ...") - fba.update_last_seen(blocker, blocked, block_level) - - # DEBUG: print("DEBUG: Committing changes ...") - fba.connection.commit() - - # Reasons - if "mrf_simple_info" in federation: - # DEBUG: print("DEBUG: Found mrf_simple_info:", blocker) - for block_level, info in ( - {**federation["mrf_simple_info"], - **(federation["quarantined_instances_info"] - if "quarantined_instances_info" in federation - else {})} - ).items(): - # DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items())) - block_level = fba.tidyup_domain(block_level) - # DEBUG: print("DEBUG: BEFORE block_level:", block_level) - - if block_level == "": - print("WARNING: block_level is now empty!") - continue + # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") + for block in blocklist: + blocked, reason = block.values() + # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!") + blocked = tidyup.domain(blocked) + reason = tidyup.reason(reason) if reason is not None and reason != "" else None + # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!") - # DEBUG: print(f"DEBUG: Checking {len(info.items())} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") - for blocked, reason in info.items(): - # DEBUG: print("DEBUG: BEFORE blocked:", blocked) - blocked = fba.tidyup_domain(blocked) - # DEBUG: print("DEBUG: AFTER blocked:", blocked) - - if blocked == "": - print("WARNING: blocked is empty after fba.tidyup_domain():", blocker, block_level) - continue - elif fba.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") - continue - elif blocked.count("*") > 1: - # same domain guess as above, but for reasons field - fba.cursor.execute( - "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] - ) - searchres = fba.cursor.fetchone() - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") - continue - - blocked = searchres[0] - origin = searchres[1] - nodeinfo_url = searchres[2] - elif not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") - fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) - - # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"]) - fba.update_block_reason(reason["reason"], blocker, blocked, block_level) - - for entry in blockdict: - if entry["blocked"] == blocked: - # DEBUG: print("DEBUG: Updating entry reason:", blocked) - entry["reason"] = reason["reason"] - - fba.connection.commit() - except Exception as e: - print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") - elif software == "mastodon": - print("INFO: blocker:", blocker) - try: - # json endpoint for newer mastodongs - try: - json = { - "reject" : [], - "media_removal" : [], - "followers_only": [], - "report_removal": [] - } - - # handling CSRF, I've saw at least one server requiring it to access the endpoint - # DEBUG: print("DEBUG: Fetching meta:", blocker) - meta = bs4.BeautifulSoup( - fba.get_response(blocker, "/", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text, - "html.parser", - ) - try: - csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"] - # DEBUG: print("DEBUG: Adding CSRF token:", blocker, csrf) - reqheaders = {**fba.api_headers, **{"X-CSRF-Token": csrf}} - except BaseException as e: - # DEBUG: print("DEBUG: No CSRF token found, using normal headers:", blocker, e) - reqheaders = fba.api_headers - - # DEBUG: print("DEBUG: Querying API domain_blocks:", blocker) - blocks = fba.get_response(blocker, "/api/v1/instance/domain_blocks", reqheaders, (config.get("connection_timeout"), config.get("read_timeout"))).json() - - print(f"INFO: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}' ...") - for block in blocks: - entry = { - 'domain': block['domain'], - 'hash' : block['digest'], - 'reason': block['comment'] - } - - # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment']) - if block['severity'] == 'suspend': - # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") - json['reject'].append(entry) - elif block['severity'] == 'silence': - # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") - json['followers_only'].append(entry) - elif block['severity'] == 'reject_media': - # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") - json['media_removal'].append(entry) - elif block['severity'] == 'reject_reports': - # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...") - json['report_removal'].append(entry) - else: - print("WARNING: Unknown severity:", block['severity'], block['domain']) - except BaseException as e: - # DEBUG: print(f"DEBUG: Failed, trying mastodon-specific fetches: blocker='{blocker}',exception[{type(e)}]={str(e)}") - json = fba.get_mastodon_blocks(blocker) - - print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...") - for block_level, blocks in json.items(): - # DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks)) - block_level = fba.tidyup_domain(block_level) - # DEBUG: print("DEBUG: AFTER-block_level:", block_level) - if block_level == "": - print("WARNING: block_level is empty, blocker:", blocker) + if blocked == "": + print("WARNING: blocked is empty:", blocker) continue - - # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") - for block in blocks: - blocked, blocked_hash, reason = block.values() - # DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason) - blocked = fba.tidyup_domain(blocked) - # DEBUG: print("DEBUG: AFTER-blocked:", blocked) - - if blocked == "": - print("WARNING: blocked is empty:", blocker) - continue - elif fba.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") - continue - elif blocked.count("*") > 0: - # Doing the hash search for instance names as well to tidy up DB - fba.cursor.execute( - "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash] - ) - searchres = fba.cursor.fetchone() - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!") - continue - - # DEBUG: print("DEBUG: Updating domain: ", searchres[0]) - blocked = searchres[0] - origin = searchres[1] - nodeinfo_url = searchres[2] - - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") - fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) - elif not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) - fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) - - blocking = blocked if blocked.count("*") <= 1 else blocked_hash - # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'") - - if not fba.is_instance_blocked(blocker, blocked, block_level): - # DEBUG: print("DEBUG: Blocking:", blocker, blocked, block_level) - fba.block_instance(blocker, blocking, reason, block_level) - - if block_level == "reject": - blockdict.append({ - "blocked": blocked, - "reason" : reason - }) - else: - # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocking='{blocking}' ...") - fba.update_last_seen(blocker, blocking, block_level) - fba.update_block_reason(reason, blocker, blocking, block_level) - - # DEBUG: print("DEBUG: Committing changes ...") - fba.connection.commit() - except Exception as e: - print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") - elif software == "friendica" or software == "misskey" or software == "bookwyrm" or software == "takahe": - print("INFO: blocker:", blocker) - try: - if software == "friendica": - json = fba.get_friendica_blocks(blocker) - elif software == "misskey": - json = fba.get_misskey_blocks(blocker) - elif software == "bookwyrm": - print("WARNING: bookwyrm is not fully supported for fetching blacklist!", blocker) - #json = fba.get_bookwyrm_blocks(blocker) - continue - elif software == "takahe": - print("WARNING: takahe is not fully supported for fetching blacklist!", blocker) - #json = fba.get_takahe_blocks(blocker) - continue - - print(f"INFO: Checking {len(json.items())} entries from blocker='{blocker}',software='{software}' ...") - for block_level, blocks in json.items(): - # DEBUG: print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks)) - block_level = fba.tidyup_domain(block_level) - # DEBUG: print("DEBUG: AFTER-block_level:", block_level) - if block_level == "": - print("WARNING: block_level is empty, blocker:", blocker) + elif blacklist.is_blacklisted(blocked): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") continue + elif blocked.count("*") > 0: + # Some friendica servers also obscure domains without hash + row = instances.deobscure("*", blocked) - # DEBUG: print(f"DEBUG: Checking {len(blocks)} entries from blocker='{blocker}',software='{software}',block_level='{block_level}' ...") - for block in blocks: - blocked, reason = block.values() - # DEBUG: print("DEBUG: BEFORE blocked:", blocked) - blocked = fba.tidyup_domain(blocked) - # DEBUG: print("DEBUG: AFTER blocked:", blocked) - - if blocked == "": - print("WARNING: blocked is empty:", blocker) - continue - elif fba.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") - continue - elif blocked.count("*") > 0: - # Some friendica servers also obscure domains without hash - fba.cursor.execute( - "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] - ) - - searchres = fba.cursor.fetchone() - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") - continue - - blocked = searchres[0] - origin = searchres[1] - nodeinfo_url = searchres[2] - elif blocked.count("?") > 0: - # Some obscure them with question marks, not sure if that's dependent on version or not - fba.cursor.execute( - "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("?", "_")] - ) - - searchres = fba.cursor.fetchone() - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") - continue - - blocked = searchres[0] - origin = searchres[1] - nodeinfo_url = searchres[2] - elif not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + if row is None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!") continue - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") - continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) - fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) - - if not fba.is_instance_blocked(blocker, blocked, block_level): - fba.block_instance(blocker, blocked, reason, block_level) - - if block_level == "reject": - blockdict.append({ - "blocked": blocked, - "reason" : reason - }) - else: - # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...") - fba.update_last_seen(blocker, blocked, block_level) - fba.update_block_reason(reason, blocker, blocked, block_level) - - # DEBUG: print("DEBUG: Committing changes ...") - fba.connection.commit() - except Exception as e: - print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") - elif software == "gotosocial": - print("INFO: blocker:", blocker) - try: - # Blocks - federation = fba.get_response(blocker, "{fba.get_peers_url}?filter=suspended", fba.api_headers, (config.get("connection_timeout"), config.get("read_timeout"))).json() + blocked = row[0] + origin = row[1] + nodeinfo_url = row[2] + elif blocked.count("?") > 0: + # Some obscure them with question marks, not sure if that's dependent on version or not + row = instances.deobscure("?", blocked) - if (federation == None): - print("WARNING: No valid response:", blocker); - elif "error" in federation: - print("WARNING: API returned error:", federation["error"]) - else: - print(f"INFO: Checking {len(federation)} entries from blocker='{blocker}',software='{software}' ...") - for peer in federation: - blocked = peer["domain"].lower() - # DEBUG: print("DEBUG: BEFORE blocked:", blocked) - blocked = fba.tidyup_domain(blocked) - # DEBUG: print("DEBUG: AFTER blocked:", blocked) - - if blocked == "": - print("WARNING: blocked is empty:", blocker) - continue - elif fba.is_blacklisted(blocked): - # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!") - continue - elif blocked.count("*") > 0: - # GTS does not have hashes for obscured domains, so we have to guess it - fba.cursor.execute( - "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")] - ) - searchres = fba.cursor.fetchone() - - if searchres == None: - print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!") - continue - - blocked = searchres[0] - origin = searchres[1] - nodeinfo_url = searchres[2] - elif not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + # DEBUG: print(f"DEBUG: row[]='{type(row)}'") + if row is None: + print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocker='{blocker}',software='{software}' - SKIPPED!") continue - # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) - if not validators.domain(blocked): - print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - skipped!") + blocked = row[0] + origin = row[1] + nodeinfo_url = row[2] + + # DEBUG: print("DEBUG: Looking up instance by domain:", blocked) + if not validators.domain(blocked): + print(f"WARNING: blocked='{blocked}',software='{software}' is not a valid domain name - SKIPPED!") + continue + elif blocked.endswith(".arpa"): + # DEBUG: print(f"DEBUG: blocked='{blocked}' is ending with '.arpa' - SKIPPED!") + continue + elif not instances.is_registered(blocked): + # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) + try: + instances.add(blocked, blocker, inspect.currentframe().f_code.co_name, nodeinfo_url) + except network.exceptions as exception: + print(f"Exception during adding blocked='{blocked}',blocker='{blocker}': '{type(exception)}'") continue - elif not fba.is_instance_registered(blocked): - # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., blocker='{blocker}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'") - fba.add_instance(blocked, blocker, sys.argv[0], nodeinfo_url) - if not fba.is_instance_blocked(blocker, blocked, "reject"): - # DEBUG: print(f"DEBUG: blocker='{blocker}' is blocking '{blocked}' for unknown reason at this point") - fba.block_instance(blocker, blocked, "unknown", "reject") + if not blocks.is_instance_blocked(blocker, blocked, block_level): + blocks.add_instance(blocker, blocked, reason, block_level) + if block_level == "reject": blockdict.append({ "blocked": blocked, - "reason" : None + "reason" : reason }) - else: - # DEBUG: print(f"DEBUG: Updating block last seen for blocker='{blocker}',blocked='{blocked}' ...") - fba.update_last_seen(blocker, blocked, "reject") - - if "public_comment" in peer: - # DEBUG: print("DEBUG: Updating block reason:", blocker, blocked, peer["public_comment"]) - fba.update_block_reason(peer["public_comment"], blocker, blocked, "reject") - - for entry in blockdict: - if entry["blocked"] == blocked: - # DEBUG: print(f"DEBUG: Setting block reason for blocked='{blocked}':'{peer['public_comment']}'") - entry["reason"] = peer["public_comment"] - - # DEBUG: print("DEBUG: Committing changes ...") - fba.connection.commit() - except Exception as e: - print(f"ERROR: blocker='{blocker}',software='{software}',exception[{type(e)}]:'{str(e)}'") + else: + # DEBUG: print(f"DEBUG: Updating block last seen and reason for blocker='{blocker}',blocked='{blocked}' ...") + blocks.update_last_seen(blocker, blocked, block_level) + blocks.update_reason(reason, blocker, blocked, block_level) + + # DEBUG: print("DEBUG: Committing changes ...") + fba.connection.commit() else: print("WARNING: Unknown software:", blocker, software) - if config.get("bot_enabled") and len(blockdict) > 0: - send_bot_post(blocker, blockdict) + if instances.has_pending(blocker): + # DEBUG: print(f"DEBUG: Invoking instances.update_data({blocker}) ...") + instances.update_data(blocker) - blockdict = [] + if config.get("bot_enabled") and len(blockdict) > 0: + network.send_bot_post(blocker, blockdict) # DEBUG: print("DEBUG: EXIT!") def fetch_cs(args: argparse.Namespace): - # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!") + # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!") + extensions = [ + 'extra', + 'abbr', + 'attr_list', + 'def_list', + 'fenced_code', + 'footnotes', + 'md_in_html', + 'admonition', + 'codehilite', + 'legacy_attrs', + 'legacy_em', + 'meta', + 'nl2br', + 'sane_lists', + 'smarty', + 'toc', + 'wikilinks' + ] + domains = { "silenced": list(), - "blocked": list(), + "reject" : list(), } - try: - doc = bs4.BeautifulSoup( - reqto.get("https://meta.chaos.social/federation", headers=fba.headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))).text, - "html.parser", - ) - # DEBUG: print(f"DEBUG: doc()={len(doc)}[]={type(doc)}") - silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table") + raw = fba.fetch_url("https://raw.githubusercontent.com/chaossocial/meta/master/federation.md", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))).text + # DEBUG: print(f"DEBUG: raw()={len(raw)}[]='{type(raw)}'") - # DEBUG: print(f"DEBUG: silenced[]={type(silenced)}") - domains["silenced"] = domains["silenced"] + find_domains(silenced) - blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table") + doc = bs4.BeautifulSoup(markdown.markdown(raw, extensions=extensions), features='html.parser') - # DEBUG: print(f"DEBUG: blocked[]={type(blocked)}") - domains["blocked"] = domains["blocked"] + find_domains(blocked) + # DEBUG: print(f"DEBUG: doc()={len(doc)}[]='{type(doc)}'") + silenced = doc.find("h2", {"id": "silenced-instances"}).findNext("table").find("tbody") + # DEBUG: print(f"DEBUG: silenced[]='{type(silenced)}'") + domains["silenced"] = domains["silenced"] + federation.find_domains(silenced) - except BaseException as e: - print(f"ERROR: Cannot fetch from meta.chaos.social,exception[{type(e)}]:'{str(e)}'") - sys.exit(255) + blocked = doc.find("h2", {"id": "blocked-instances"}).findNext("table").find("tbody") + # DEBUG: print(f"DEBUG: blocked[]='{type(blocked)}'") + domains["reject"] = domains["reject"] + federation.find_domains(blocked) # DEBUG: print(f"DEBUG: domains()={len(domains)}") if len(domains) > 0: - boot.acquire_lock() + locking.acquire() print(f"INFO: Adding {len(domains)} new instances ...") for block_level in domains: @@ -650,13 +330,17 @@ def fetch_cs(args: argparse.Namespace): for row in domains[block_level]: # DEBUG: print(f"DEBUG: row='{row}'") - if not fba.is_instance_registered(row["domain"]): - print(f"INFO: Fetching instances from domain='{row['domain']}' ...") - fba.fetch_instances(row["domain"], None, None, sys.argv[0]) - - if not fba.is_instance_blocked('chaos.social', row["domain"], block_level): + if not blocks.is_instance_blocked('chaos.social', row["domain"], block_level): # DEBUG: print(f"DEBUG: domain='{row['domain']}',block_level='{block_level}' blocked by chaos.social, adding ...") - fba.block_instance('chaos.social', row["domain"], row["reason"], block_level) + blocks.add_instance('chaos.social', row["domain"], row["reason"], block_level) + + if not instances.is_registered(row["domain"]): + try: + print(f"INFO: Fetching instances from domain='{row['domain']}' ...") + federation.fetch_instances(row["domain"], 'chaos.social', None, inspect.currentframe().f_code.co_name) + except network.exceptions as exception: + print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_cs) from domain='{row['domain']}'") + instances.set_last_error(row["domain"], exception) # DEBUG: print("DEBUG: Committing changes ...") fba.connection.commit() @@ -664,128 +348,262 @@ def fetch_cs(args: argparse.Namespace): # DEBUG: print("DEBUG: EXIT!") def fetch_fba_rss(args: argparse.Namespace): - # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!") + # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!") domains = list() - try: - print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...") - response = reqto.get(args.feed, headers=fba.headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))) + print(f"INFO: Fetch FBA-specific RSS args.feed='{args.feed}' ...") + response = fba.fetch_url(args.feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) - # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}") - if response.ok and response.status_code < 300 and len(response.text) > 0: - # DEBUG: print(f"DEBUG: Parsing RSS feed ...") - rss = atoma.parse_rss_bytes(response.content) + # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}") + if response.ok and response.status_code < 300 and len(response.text) > 0: + # DEBUG: print(f"DEBUG: Parsing RSS feed ({len(response.text)} Bytes) ...") + rss = atoma.parse_rss_bytes(response.content) - # DEBUG: print(f"DEBUG: rss[]={type(rss)}") - for item in rss.items: - # DEBUG: print(f"DEBUG: item={item}") - domain = item.link.split("=")[1] + # DEBUG: print(f"DEBUG: rss[]='{type(rss)}'") + for item in rss.items: + # DEBUG: print(f"DEBUG: item={item}") + domain = item.link.split("=")[1] - if fba.is_blacklisted(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") - continue - elif domain in domains: - # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!") - continue - elif fba.is_instance_registered(domain): - # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!") - continue - - # DEBUG: print(f"DEBUG: domain='{domain}'") - domains.append(domain) + if blacklist.is_blacklisted(domain): + # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") + continue + elif domain in domains: + # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!") + continue + elif instances.is_registered(domain): + # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!") + continue - except BaseException as e: - print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'") - sys.exit(255) + # DEBUG: print(f"DEBUG: Adding domain='{domain}'") + domains.append(domain) # DEBUG: print(f"DEBUG: domains()={len(domains)}") if len(domains) > 0: - boot.acquire_lock() + locking.acquire() print(f"INFO: Adding {len(domains)} new instances ...") for domain in domains: - print(f"INFO: Fetching instances from domain='{domain}' ...") - fba.fetch_instances(domain, None, None, sys.argv[0]) + try: + print(f"INFO: Fetching instances from domain='{domain}' ...") + federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name) + except network.exceptions as exception: + print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_fba_rss) from domain='{domain}'") + instances.set_last_error(domain, exception) # DEBUG: print("DEBUG: EXIT!") def fetch_fbabot_atom(args: argparse.Namespace): - # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!") + # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!") feed = "https://ryona.agency/users/fba/feed.atom" domains = list() - try: - print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...") - response = reqto.get(feed, headers=fba.headers, timeout=(config.get("connection_timeout"), config.get("read_timeout"))) - - # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}") - if response.ok and response.status_code < 300 and len(response.text) > 0: - # DEBUG: print(f"DEBUG: Parsing ATOM feed ...") - atom = atoma.parse_atom_bytes(response.content) - - # DEBUG: print(f"DEBUG: atom[]={type(atom)}") - for entry in atom.entries: - # DEBUG: print(f"DEBUG: entry[]={type(entry)}") - doc = bs4.BeautifulSoup(entry.content.value, "html.parser") - # DEBUG: print(f"DEBUG: doc[]={type(doc)}") - for element in doc.findAll("a"): - # DEBUG: print(f"DEBUG: element[{type(element)}]={element}") - domain = fba.tidyup_domain(element["href"]) + + print(f"INFO: Fetching ATOM feed='{feed}' from FBA bot account ...") + response = fba.fetch_url(feed, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) + + # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code='{response.status_code}',response.text()={len(response.text)}") + if response.ok and response.status_code < 300 and len(response.text) > 0: + # DEBUG: print(f"DEBUG: Parsing ATOM feed ({len(response.text)} Bytes) ...") + atom = atoma.parse_atom_bytes(response.content) + + # DEBUG: print(f"DEBUG: atom[]='{type(atom)}'") + for entry in atom.entries: + # DEBUG: print(f"DEBUG: entry[]='{type(entry)}'") + doc = bs4.BeautifulSoup(entry.content.value, "html.parser") + # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'") + for element in doc.findAll("a"): + for href in element["href"].split(","): + # DEBUG: print(f"DEBUG: href[{type(href)}]={href}") + domain = tidyup.domain(href) # DEBUG: print(f"DEBUG: domain='{domain}'") - if fba.is_blacklisted(domain): + if blacklist.is_blacklisted(domain): # DEBUG: print(f"DEBUG: domain='{domain}' is blacklisted - SKIPPED!") continue elif domain in domains: # DEBUG: print(f"DEBUG: domain='{domain}' is already added - SKIPPED!") continue - elif fba.is_instance_registered(domain): + elif instances.is_registered(domain): # DEBUG: print(f"DEBUG: domain='{domain}' is already registered - SKIPPED!") continue - # DEBUG: print(f"DEBUG: domain='{domain}'") + # DEBUG: print(f"DEBUG: Adding domain='{domain}',domains()={len(domains)}") domains.append(domain) - except BaseException as e: - print(f"ERROR: Cannot fetch feed='{feed}',exception[{type(e)}]:'{str(e)}'") - sys.exit(255) - - # DEBUG: print(f"DEBUG: domains()={len(domains)}") + # DEBUG: print(f"DEBUG: domains({len(domains)})={domains}") if len(domains) > 0: - boot.acquire_lock() + locking.acquire() print(f"INFO: Adding {len(domains)} new instances ...") for domain in domains: - print(f"INFO: Fetching instances from domain='{domain}' ...") - fba.fetch_instances(domain, None, None, sys.argv[0]) + try: + print(f"INFO: Fetching instances from domain='{domain}' ...") + federation.fetch_instances(domain, None, None, inspect.currentframe().f_code.co_name) + except network.exceptions as exception: + print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_fbabot_atom) from domain='{domain}'") + instances.set_last_error(domain, exception) # DEBUG: print("DEBUG: EXIT!") -def fetch_instances(args: argparse.Namespace): - # DEBUG: print(f"DEBUG: args[]={type(args)} - CALLED!") - boot.acquire_lock() +def fetch_instances(args: argparse.Namespace) -> int: + # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!") + locking.acquire() # Initial fetch - fba.fetch_instances(args.domain, None, None, sys.argv[0]) + try: + print(f"INFO: Fetching instances from args.domain='{args.domain}' ...") + federation.fetch_instances(args.domain, None, None, inspect.currentframe().f_code.co_name) + except network.exceptions as exception: + print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_instances) from args.domain='{args.domain}'") + instances.set_last_error(args.domain, exception) + + return 100 if args.single: - print(f"DEBUG: Not fetching more instances - EXIT!") - return + # DEBUG: print("DEBUG: Not fetching more instances - EXIT!") + return 0 # Loop through some instances fba.cursor.execute( - "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")] + "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - config.get("recheck_instance")] ) rows = fba.cursor.fetchall() print(f"INFO: Checking {len(rows)} entries ...") for row in rows: - # DEBUG: print("DEBUG: domain:", row[0]) - if fba.is_blacklisted(row[0]): + # DEBUG: print(f"DEBUG: domain='{row[0]}'") + if blacklist.is_blacklisted(row[0]): print("WARNING: domain is blacklisted:", row[0]) continue - print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'") - fba.fetch_instances(row[0], row[1], row[2], sys.argv[0], row[3]) + try: + print(f"INFO: Fetching instances for instance '{row[0]}' ('{row[2]}') of origin='{row[1]}',nodeinfo_url='{row[3]}'") + federation.fetch_instances(row[0], row[1], row[2], inspect.currentframe().f_code.co_name, row[3]) + except network.exceptions as exception: + print(f"WARNING: Exception '{type(exception)}' during fetching instances (fetch_instances) from domain='{row[0]}'") + instances.set_last_error(row[0], exception) + + # DEBUG: print("DEBUG: Success - EXIT!") + return 0 + +def fetch_oliphant(args: argparse.Namespace): + # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!") + locking.acquire() + + # Base URL + base_url = "https://codeberg.org/oliphant/blocklists/raw/branch/main/blocklists" + + # URLs to fetch + blocklists = ( + { + "blocker": "artisan.chat", + "csv_url": "mastodon/artisan.chat.csv", + },{ + "blocker": "mastodon.art", + "csv_url": "mastodon/mastodon.art.csv", + },{ + "blocker": "pleroma.envs.net", + "csv_url": "mastodon/pleroma.envs.net.csv", + },{ + "blocker": "oliphant.social", + "csv_url": "mastodon/_unified_tier3_blocklist.csv", + },{ + "blocker": "mastodon.online", + "csv_url": "mastodon/mastodon.online.csv", + },{ + "blocker": "mastodon.social", + "csv_url": "mastodon/mastodon.social.csv", + },{ + "blocker": "mastodon.social", + "csv_url": "other/missing-tier0-mastodon.social.csv", + },{ + "blocker": "rage.love", + "csv_url": "mastodon/rage.love.csv", + },{ + "blocker": "sunny.garden", + "csv_url": "mastodon/sunny.garden.csv", + },{ + "blocker": "solarpunk.moe", + "csv_url": "mastodon/solarpunk.moe.csv", + },{ + "blocker": "toot.wales", + "csv_url": "mastodon/toot.wales.csv", + },{ + "blocker": "union.place", + "csv_url": "mastodon/union.place.csv", + } + ) + + domains = list() + for block in blocklists: + # Is domain given and not equal blocker? + if isinstance(args.domain, str) and args.domain != block["blocker"]: + # DEBUG: print(f"DEBUG: Skipping blocker='{block['blocker']}', not matching args.domain='{args.domain}'") + continue + elif args.domain in domains: + # DEBUG: print(f"DEBUG: args.domain='{args.domain}' already handled - SKIPPED!") + continue + + # Fetch this URL + print(f"INFO: Fetching csv_url='{block['csv_url']}' for blocker='{block['blocker']}' ...") + response = fba.fetch_url(f"{base_url}/{block['csv_url']}", network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) + + # DEBUG: print(f"DEBUG: response[]='{type(response)}'") + if response.ok and response.content != "": + # DEBUG: print(f"DEBUG: Fetched {len(response.content)} Bytes, parsing CSV ...") + reader = csv.DictReader(response.content.decode('utf-8').splitlines(), dialect="unix") + + # DEBUG: print(f"DEBUG: reader[]='{type(reader)}'") + for row in reader: + domain = None + if "#domain" in row: + domain = row["#domain"] + elif "domain" in row: + domain = row["domain"] + else: + # DEBUG: print(f"DEBUG: row='{row}' does not contain domain column") + continue + + # DEBUG: print(f"DEBUG: Marking domain='{domain}' as handled") + domains.append(domain) + + # DEBUG: print(f"DEBUG: Processing domain='{domain}' ...") + processed = fba.process_domain(domain, block["blocker"], inspect.currentframe().f_code.co_name) + + # DEBUG: print(f"DEBUG: processed='{processed}'") + + # DEBUG: print("DEBUG: EXIT!") + +def fetch_txt(args: argparse.Namespace): + # DEBUG: print(f"DEBUG: args[]='{type(args)}' - CALLED!") + locking.acquire() + + # Static URLs + urls = ( + "https://seirdy.one/pb/bsl.txt", + ) + + print(f"INFO: Checking {len(urls)} text file(s) ...") + for url in urls: + # DEBUG: print(f"DEBUG: Fetching url='{url}' ...") + response = fba.fetch_url(url, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout"))) + + # DEBUG: print(f"DEBUG: response[]='{type(response)}'") + if response.ok and response.text != "": + # DEBUG: print(f"DEBUG: Returned {len(response.text.strip())} Bytes for processing") + domains = response.text.split("\n") + + print(f"INFO: Processing {len(domains)} domains ...") + for domain in domains: + if domain == "": + continue + + # DEBUG: print(f"DEBUG: domain='{domain}'") + processed = fba.process_domain(domain, 'seirdy.one', inspect.currentframe().f_code.co_name) + + # DEBUG: print(f"DEBUG: processed='{processed}'") + if not processed: + # DEBUG: print(f"DEBUG: domain='{domain}' was not generically processed - SKIPPED!") + continue # DEBUG: print("DEBUG: EXIT!")