From: Roland Häder Date: Tue, 23 May 2023 06:18:51 +0000 (+0200) Subject: Continued: X-Git-Url: https://git.mxchange.org/?a=commitdiff_plain;h=e7dbafd550c9011fde24e3f390447266e80dd0c6;p=fba.git Continued: - added detection_mode and nodeinfo_url to table 'instances' - described arrays --- diff --git a/blocks_empty.db b/blocks_empty.db index 0832008..dea0936 100644 Binary files a/blocks_empty.db and b/blocks_empty.db differ diff --git a/fba.py b/fba.py index dc29ffd..32a0d23 100644 --- a/fba.py +++ b/fba.py @@ -11,27 +11,47 @@ import time with open("config.json") as f: config = json.loads(f.read()) +# Don't check these, known trolls/flooders/testing/developing blacklist = [ + # Floods network with fake nodes as "research" project "activitypub-troll.cf", + # Similar troll "gab.best", + # Similar troll "4chan.icu", + # Flooder (?) "social.shrimpcam.pw", + # Flooder (?) "mastotroll.netz.org", + # Testing/developing installations "ngrok.io", ] +# Array with pending errors needed to be written to database pending_errors = { } -nodeinfos = [ +# "rel" identifiers (no real URLs) +nodeinfo_identifier = [ "http://nodeinfo.diaspora.software/ns/schema/2.1", "http://nodeinfo.diaspora.software/ns/schema/2.0", "http://nodeinfo.diaspora.software/ns/schema/1.1", "http://nodeinfo.diaspora.software/ns/schema/1.0", ] +# HTTP headers for requests headers = { - "user-agent": config["useragent"] + "user-agent": config["useragent"], +} + +# Found info from node, such as nodeinfo URL, detection mode that needs to be +# written to database. Both arrays must be filled at the same time or else +# update_nodeinfo() will fail +nodeinfos = { + # Detection mode: AUTO_DISCOVERY or STATIC_CHECKS + "detection_mode": {}, + # Found nodeinfo URL + "nodeinfo_url": {}, } connection = sqlite3.connect("blocks.db") @@ -64,6 +84,30 @@ def update_last_blocked(domain: str): print("ERROR: failed SQL query:", domain, e) sys.exit(255) +def update_nodeinfos(domain: str): + #print("DEBUG: Updating nodeinfo for domain:", domain) + if domain not in nodeinfos["detection_mode"] or domain not in nodeinfos["nodeinfo_url"]: + print(f"WARNING: domain {domain} has no pending nodeinfo!") + raise + + try: + cursor.execute("UPDATE instances SET detection_mode = ?, nodeinfo_url = ? WHERE domain = ? LIMIT 1", [ + nodeinfos["detection_mode"][domain], + nodeinfos["nodeinfo_url"][domain], + domain + ]) + + if cursor.rowcount == 0: + print("WARNING: Did not update any rows:", domain) + + except BaseException as e: + print("ERROR: failed SQL query:", domain, e) + sys.exit(255) + + # NOISY-DEBUG: print("DEBUG: Deleting nodeinfos for domain:", domain) + del nodeinfos["detection_mode"][domain] + del nodeinfos["nodeinfo_url"][domain] + def update_last_error(domain: str, res: any): # NOISY-DEBUG: print("DEBUG: domain,res.status_code:", domain, res.status_code, res.reason) try: @@ -93,6 +137,9 @@ def update_last_error(domain: str, res: any): print("ERROR: failed SQL query:", domain, e) sys.exit(255) + # NOISY-DEBUG: print("DEBUG: Deleting pending_errors for domain:", domain) + del pending_errors[domain] + def update_last_nodeinfo(domain: str): # NOISY-DEBUG: print("DEBUG: Updating last_nodeinfo for domain:", domain) try: @@ -181,6 +228,8 @@ def fetch_nodeinfo(domain: str) -> list: if res.ok and res.json() is not None: # NOISY-DEBUG: print("DEBUG: Success:", request) json = res.json() + nodeinfos["detection_mode"][domain] = "STATIC_CHECK" + nodeinfos["nodeinfo_url"][domain] = request break elif not res.ok or res.status_code >= 400: print("WARNING: Failed fetching nodeinfo from domain:", domain) @@ -213,13 +262,15 @@ def fetch_wellknown_nodeinfo(domain: str) -> list: # NOISY-DEBUG: print("DEBUG: Found links in nodeinfo():", len(nodeinfo["links"])) for link in nodeinfo["links"]: # NOISY-DEBUG: print("DEBUG: rel,href:", link["rel"], link["href"]) - if link["rel"] in nodeinfos: + if link["rel"] in nodeinfo_identifier: # NOISY-DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"]) res = reqto.get(link["href"]) # NOISY-DEBUG: print("DEBUG: href,res.ok,res.status_code:", link["href"], res.ok, res.status_code) if res.ok and res.json() is not None: # NOISY-DEBUG: print("DEBUG: Found JSON nodeinfo():", len(res.json())) json = res.json() + nodeinfos["detection_mode"][domain] = "AUTO_DISCOVERY" + nodeinfos["nodeinfo_url"][domain] = link["href"] break else: print("WARNING: Unknown 'rel' value:", domain, link["rel"]) @@ -381,8 +432,12 @@ def add_instance(domain: str, origin: str, originator: str): ), ) + if domain in nodeinfos["nodeinfo_url"]: + # NOISY-DEBUG print("DEBUG: domain has pending nodeinfo being updated:", domain) + update_nodeinfos(domain) + if domain in pending_errors: - # NOISY-DEBUG: print("DEBUG: domain has pending error be updated:", domain) + # NOISY-DEBUG: print("DEBUG: domain has pending error being updated:", domain) update_last_error(domain, pending_errors[domain]) del pending_errors[domain]