From 465961a02e1115a051bcf79e1c290d27c1d0fac8 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Roland=20H=C3=A4der?= Date: Sat, 20 May 2023 12:32:57 +0200 Subject: [PATCH] Continued: - some improvements --- blocks_empty.db | Bin 24576 -> 28672 bytes fba.py | 67 ++++++++++++++++++++++++++++++--------------- fetch_blocks.py | 2 +- fetch_instances.py | 14 ++++++++-- 4 files changed, 58 insertions(+), 25 deletions(-) diff --git a/blocks_empty.db b/blocks_empty.db index f6a323c20f6559f7c8030cc64ae9b091767bdbd9..af1c0a79e8d3719772c01d6a9f66b355ca7a0634 100644 GIT binary patch delta 310 zcmZoTz}WDBae}nqQw9bGb|_{9($W)kj3u8k=y^}%<=0_g=SW delta 257 zcmZp8z}Rqrae}nqBL)TrHXw!p{)sxq;*S{gyeIPV>oBnLN-^+C@ip_lyeRRJ!npuxqNlAoKH zna3I88WEzy#hH; str: # NOISY-DEBUG: print("DEBUG: Calculating hash for domain:", domain) return sha256(domain.encode("utf-8")).hexdigest() +def update_last_error(domain: str, res: any): + # NOISY-DEBUG: print("DEBUG: domain,res.status_code", domain, res.status_code) + + try: + c.execute("UPDATE instances SET last_status_code = ? WHERE domain = ?", [ + res.status_code, + domain + ]) + + except: + print("ERROR: failed SQL query:", domain) + sys.exit(255) + def update_last_nodeinfo(domain: str): # NOISY-DEBUG: print("DEBUG: Updating last_nodeinfo for domain:", domain) @@ -44,35 +57,45 @@ def update_last_nodeinfo(domain: str): print("ERROR: failed SQL query:", domain) sys.exit(255) -def get_peers(domain: str) -> str: +def get_peers(domain: str) -> list: # NOISY-DEBUG: print("DEBUG: Getting peers for domain:", domain) peers = None try: res = reqto.get(f"https://{domain}/api/v1/instance/peers", headers=headers, timeout=5) - peers = res.json() + + if not res.ok or res.status_code >= 400: + print("WARNING: Cannot fetch peers:", domain) + update_last_error(domain, res) + else: + # NOISY-DEBUG: print("DEBUG: Querying API was successful:", domain, len(res.json())) + peers = res.json() + except: - print("WARNING: Cannot fetch peers:", domain) + print("WARNING: Some error during get():", domain) - if peers is not None: - update_last_nodeinfo(domain) + update_last_nodeinfo(domain) # NOISY-DEBUG: print("DEBUG: Returning peers[]:", type(peers)) return peers def post_json_api(domain: str, path: str, data: str) -> list: - # NOISY-DEBUG: print("DEBUG: Sending POST to domain,path,data:", domain, path, data) - res = reqto.post(f"https://{domain}{path}", data=data, headers=headers, timeout=5) + try: + # NOISY-DEBUG: print("DEBUG: Sending POST to domain,path,data:", domain, path, data) + res = reqto.post(f"https://{domain}{path}", data=data, headers=headers, timeout=5) + + if not res.ok or res.status_code >= 400: + print("WARNING: Cannot query JSON API:", domain, path, data, res.status_code) + update_last_error(domain, res) + raise - if not res.ok: - print("WARNING: Cannot query JSON API:", domain, path, data, res.status_code) - raise - else: update_last_nodeinfo(domain) + json = res.json() + except: + print("WARNING: Some error during post():", domain, path, data) - doc = res.json() - # NOISY-DEBUG: print("DEBUG: Returning doc():", len(doc)) - return doc + # NOISY-DEBUG: print("DEBUG: Returning json():", len(json)) + return json def fetch_nodeinfo(domain: str) -> list: # NOISY-DEBUG: print("DEBUG: Fetching nodeinfo from domain:", domain) @@ -87,22 +110,22 @@ def fetch_nodeinfo(domain: str) -> list: json = None for request in requests: - # NOISY-DEBUG: print("DEBUG: Fetching request:", request) - try: + # NOISY-DEBUG: print("DEBUG: Fetching request:", request) res = reqto.get(request, headers=headers, timeout=5) # NOISY-DEBUG: print("DEBUG: res.ok,res.json[]:", res.ok, type(res.json())) if res.ok and res.json() is not None: # NOISY-DEBUG: print("DEBUG: Success:", request) json = res.json() + break + elif not res.ok or res.status_code >= 400: + # NOISY-DEBUG: print("DEBUG: Failed fetching nodeinfo from domain:", domain) + update_last_error(domain, res) + continue except: - # NOISY-DEBUG: print("DEBUG: Failed fetching nodeinfo from domain:", domain) - continue - - if json is not None: - break + print("WARNING: Some error during get():", request) if json is None: print("WARNING: Failed fetching nodeinfo from domain:", domain) @@ -204,7 +227,7 @@ def add_instance(domain: str): print("--- Adding new instance:", domain) try: c.execute( - "INSERT INTO instances SELECT ?, ?, ?, NULL", + "INSERT INTO instances (domain,hash,software) VALUES (?, ?, ?)", ( domain, get_hash(domain), diff --git a/fetch_blocks.py b/fetch_blocks.py index 2cc6180..6984348 100644 --- a/fetch_blocks.py +++ b/fetch_blocks.py @@ -1,9 +1,9 @@ import reqto import time import bs4 -import fba import itertools import re +import fba fba.c.execute( "SELECT domain, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial') ORDER BY rowid DESC" diff --git a/fetch_instances.py b/fetch_instances.py index 0f6103e..e1882cc 100644 --- a/fetch_instances.py +++ b/fetch_instances.py @@ -10,6 +10,16 @@ def fetch_instances(domain: str): if (peerlist is None): print("ERROR: Cannot fetch peers:", domain) + + fba.c.execute( + "SELECT domain FROM instances WHERE domain = ? LIMIT 1", (domain,) + ) + + if fba.c.fetchone() == None: + # NOISY-DEBUG: print("DEBUG: Adding new domain:", domain) + fba.add_instance(domain) + + fba.conn.commit() return for instance in peerlist: @@ -19,8 +29,8 @@ def fetch_instances(domain: str): continue blacklisted = False - for domain in fba.blacklist: - if domain in instance: + for peer in fba.blacklist: + if peer in instance: blacklisted = True if blacklisted: -- 2.39.5