From 989bedfa7e3a1c3a536615f11570bd9768c9433e Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Sun, 21 May 2023 07:16:23 +0200 Subject: [PATCH] Continued: - reformatted config.defaults.json - also store script name --- blocks_empty.db | Bin 28672 -> 28672 bytes config.defaults.json | 21 +++++++++++---------- fba.py | 25 +++++++++++++------------ fetch_blocks.py | 10 +++++----- fetch_instances.py | 10 +++++----- 5 files changed, 34 insertions(+), 32 deletions(-) diff --git a/blocks_empty.db b/blocks_empty.db index 457b338488c069c6b7d032bd44fbf7f8a64f55ff..93efe69100d170780db531eec9a331f466abdfae 100644 GIT binary patch delta 111 zcmZp8z}WDBae}nqD+UGzb|_{9(h3uGj3r+&=y^}%<=0_g<*{VoYvz5&W69giYs^!= zvGE``S7SLBySThOW3%n#M4s(p3L&0;ks+=TAqtLeA+A9RDfzjHnR$~1cx71fi!#$U J8}MF80syIj9K8Sl delta 96 zcmZp8z}WDBae}nq3kC)Tb|_{9(sC1Zj3r+%=y^}%<=0_g list: peers = None try: - res = reqto.get(f"https://{domain}/api/v1/instance/peers", headers=headers, timeout=5) + res = reqto.get(f"https://{domain}/api/v1/instance/peers", headers=headers, timeout=config["timeout"]) if not res.ok or res.status_code >= 400: print("WARNING: Cannot fetch peers:", domain) @@ -95,7 +95,7 @@ def get_peers(domain: str) -> list: def post_json_api(domain: str, path: str, data: str) -> list: try: # NOISY-DEBUG: print("DEBUG: Sending POST to domain,path,data:", domain, path, data) - res = reqto.post(f"https://{domain}{path}", data=data, headers=headers, timeout=5) + res = reqto.post(f"https://{domain}{path}", data=data, headers=headers, timeout=config["timeout"]) if not res.ok or res.status_code >= 400: print("WARNING: Cannot query JSON API:", domain, path, data, res.status_code) @@ -124,7 +124,7 @@ def fetch_nodeinfo(domain: str) -> list: json = None for request in requests: # NOISY-DEBUG: print("DEBUG: Fetching request:", request) - res = reqto.get(request, headers=headers, timeout=5) + res = reqto.get(request, headers=headers, timeout=config["timeout"]) # NOISY-DEBUG: print("DEBUG: res.ok,res.json[]:", res.ok, type(res.json())) if res.ok and res.json() is not None: @@ -218,7 +218,7 @@ def block_instance(blocker: str, blocked: str, reason: str, block_level: str, fi print("--- New block:", blocker, blocked, reason, block_level, first_added, last_seen) try: c.execute( - "INSERT INTO blocks SELECT ?, ?, ?, ?, ?, ?", + "INSERT INTO blocks (blocker, blocked, reason, block_level, first_added, last_seen) VALUES(?, ?, ?, ?, ?, ?)", ( blocker, blocked, @@ -233,21 +233,22 @@ def block_instance(blocker: str, blocked: str, reason: str, block_level: str, fi print("ERROR: failed SQL query:", blocker, blocked, reason, block_level, first_added, last_seen) sys.exit(255) -def add_instance(domain: str, originator: str): - # NOISY-DEBUG: print("DEBUG: domain,originator:", domain, originator) +def add_instance(domain: str, origin: str, originator: str): + # NOISY-DEBUG: print("DEBUG: domain,origin:", domain, origin, originator) if domain.find("@") > 0: print("WARNING: Bad domain name:", domain) raise - elif originator is not None and originator.find("@") > 0: - print("WARNING: Bad originator name:", originator) + elif origin is not None and origin.find("@") > 0: + print("WARNING: Bad origin name:", origin) raise - print("--- Adding new instance:", domain, originator) + print(f"--- Adding new instance {domain} (origin: {origin})") try: c.execute( - "INSERT INTO instances (domain,originator,hash,software) VALUES (?, ?, ?, ?)", + "INSERT INTO instances (domain,origin,originator,hash,software) VALUES (?, ?, ?, ?, ?)", ( domain, + origin, originator, get_hash(domain), determine_software(domain) @@ -314,7 +315,7 @@ def get_mastodon_blocks(domain: str) -> dict: try: doc = BeautifulSoup( - reqto.get(f"https://{domain}/about/more", headers=headers, timeout=5).text, + reqto.get(f"https://{domain}/about/more", headers=headers, timeout=config["timeout"]).text, "html.parser", ) except: @@ -351,7 +352,7 @@ def get_friendica_blocks(domain: str) -> dict: try: doc = BeautifulSoup( - reqto.get(f"https://{domain}/friendica", headers=headers, timeout=5).text, + reqto.get(f"https://{domain}/friendica", headers=headers, timeout=config["timeout"]).text, "html.parser", ) except: diff --git a/fetch_blocks.py b/fetch_blocks.py index cc2b0ec..a4248ac 100644 --- a/fetch_blocks.py +++ b/fetch_blocks.py @@ -76,7 +76,7 @@ for blocker, software in fba.c.fetchall(): if fba.c.fetchone() == None: # NOISY-DEBUG: print("DEBUG: Domain wasn't found, adding:", blocked, blocker) - fba.add_instance(blocked, blocker) + fba.add_instance(blocked, blocker, argv[0]) timestamp = int(time.time()) fba.c.execute( @@ -166,7 +166,7 @@ for blocker, software in fba.c.fetchall(): # handling CSRF, I've saw at least one server requiring it to access the endpoint # NOISY-DEBUG: print("DEBUG: Fetching meta:", blocker) meta = bs4.BeautifulSoup( - reqto.get(f"https://{blocker}/about", headers=fba.headers, timeout=5).text, + reqto.get(f"https://{blocker}/about", headers=fba.headers, timeout=fba.config["timeout"]).text, "html.parser", ) try: @@ -178,7 +178,7 @@ for blocker, software in fba.c.fetchall(): reqheaders = fba.headers # NOISY-DEBUG: print("DEBUG: Quering API domain_blocks:", blocker) - blocks = reqto.get(f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=5).json() + blocks = reqto.get(f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=fba.config["timeout"]).json() # NOISY-DEBUG: print("DEBUG: blocks():", len(blocks)) for block in blocks: @@ -229,7 +229,7 @@ for blocker, software in fba.c.fetchall(): if fba.c.fetchone() == None: # NOISY-DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, blocker) - fba.add_instance(blocked, blocker) + fba.add_instance(blocked, blocker, argv[0]) else: # Doing the hash search for instance names as well to tidy up DB fba.c.execute( @@ -350,7 +350,7 @@ for blocker, software in fba.c.fetchall(): print("INFO: blocker:", blocker) try: # Blocks - federation = reqto.get(f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=fba.headers, timeout=5).json() + federation = reqto.get(f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=fba.headers, timeout=fba.config["timeout"]).json() if (federation == None): print("WARNING: No valid response:", blocker); diff --git a/fetch_instances.py b/fetch_instances.py index e4a9f5a..8a4a45c 100644 --- a/fetch_instances.py +++ b/fetch_instances.py @@ -4,8 +4,8 @@ import json import time import fba -def fetch_instances(domain: str, originator: str): - # NOISY-DEBUG: print("DEBUG: Fetching instances for domain:", domain, originator) +def fetch_instances(domain: str, origin: str): + # NOISY-DEBUG: print("DEBUG: Fetching instances for domain:", domain, origin) peerlist = fba.get_peers(domain) if (peerlist is None): @@ -16,8 +16,8 @@ def fetch_instances(domain: str, originator: str): ) if fba.c.fetchone() == None: - # NOISY-DEBUG: print("DEBUG: Adding new domain:", domain, originator) - fba.add_instance(domain, originator) + # NOISY-DEBUG: print("DEBUG: Adding new domain:", domain, origin) + fba.add_instance(domain, origin, sys.argv[0]) fba.conn.commit() return @@ -45,7 +45,7 @@ def fetch_instances(domain: str, originator: str): if fba.c.fetchone() == None: # NOISY-DEBUG: print("DEBUG: Adding new instance:", instance, domain) - fba.add_instance(instance, domain) + fba.add_instance(instance, domain, sys.argv[0]) fba.conn.commit() -- 2.39.5