X-Git-Url: https://git.mxchange.org/?a=blobdiff_plain;f=fetch_instances.py;h=63a8fa793fac486f3933c2e142fc113334907fad;hb=081d47c1784379b4e7d7104c239c40f106f7bcf1;hp=c59d7fa33927a887bcf928aadabb561410901bf5;hpb=d8fc0154b73c75fc57c733cde920a4bcd086640f;p=fba.git diff --git a/fetch_instances.py b/fetch_instances.py old mode 100644 new mode 100755 index c59d7fa..63a8fa7 --- a/fetch_instances.py +++ b/fetch_instances.py @@ -1,66 +1,89 @@ -from requests import get -from hashlib import sha256 +#!/usr/bin/python3 +# -*- coding: utf-8 -*- + +# Fedi API Block - An aggregator for fetching blocking data from fediverse nodes +# Copyright (C) 2023 Free Software Foundation +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as published +# by the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . + import sqlite3 import sys import json +import time +import validators +import fba -domain = sys.argv[1] +def fetch_instances(domain: str, origin: str, software: str, path: str = None): + # NOISY-DEBUG: print("DEBUG: domain,origin,software,path:", domain, origin, software, path) + if not fba.is_instance_registered(domain): + # NOISY-DEBUG: print("DEBUG: Adding new domain:", domain, origin) + fba.add_instance(domain, origin, sys.argv[0], path) -headers = { - "user-agent": "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0" -} + # NOISY-DEBUG: print("DEBUG: Fetching instances for domain:", domain, software) + peerlist = fba.get_peers(domain, software) + if (peerlist is None): + print("ERROR: Cannot fetch peers:", domain) + return + elif fba.has_pending_nodeinfos(domain): + # NOISY-DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...") + fba.update_nodeinfos(domain) -def get_hash(domain: str) -> str: - return sha256(domain.encode("utf-8")).hexdigest() + print(f"INFO: Checking {len(peerlist)} instances from {domain} ...") + for instance in peerlist: + # NOISY-DEBUG: print("DEBUG: BEFORE instance:", instance) + instance = fba.tidyup(instance) + # NOISY-DEBUG: print("DEBUG: AFTER instance:", instance) + if instance == "": + print("WARNING: Empty instance after tidyup(), domain:", domain) + continue + elif not validators.domain(instance.split("/")[0]): + print(f"WARNING: Bad instance='{instance}' from domain='{domain}',origin='{origin}',software='{software}'") + continue + elif fba.is_blacklisted(instance): + # NOISY-DEBUG: print("DEBUG: instance is blacklisted:", instance) + continue -def get_peers(domain: str) -> str: - try: - res = get(f"https://{domain}/api/v1/instance/peers", headers=headers, timeout=5) - return res.json() - except: - return None + # NOISY-DEBUG: print("DEBUG: Handling instance:", instance) + try: + if not fba.is_instance_registered(instance): + # NOISY-DEBUG: print("DEBUG: Adding new instance:", instance, domain) + fba.add_instance(instance, domain, sys.argv[0]) + except BaseException as e: + print(f"ERROR: instance='{instance}',exception:'{str(e)}'") + continue -peerlist = get_peers(domain) +instance = sys.argv[1] -def get_type(instdomain: str) -> str: - try: - res = get(f"https://{instdomain}/nodeinfo/2.1.json", headers=headers, timeout=5) - if res.status_code == 404: - res = get(f"https://{instdomain}/nodeinfo/2.0.json", headers=headers, timeout=5) - if res.ok and "text/html" in res.headers["content-type"]: - res = get(f"https://{instdomain}/nodeinfo/2.1", headers=headers, timeout=5) - if res.ok: - if res.json()["software"]["name"] == "akkoma": - return "pleroma" - else: - return res.json()["software"]["name"] - elif res.status_code == 404: - res = get(f"https://{instdomain}/api/v1/instance", headers=headers, timeout=5) - if res.ok: - return "mastodon" - except: - return None +# Initial fetch +fetch_instances(instance, None, None) +# Loop through some instances +fba.cursor.execute( + "SELECT domain, origin, software, nodeinfo_url FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial', 'bookwyrm', 'takahe', 'lemmy') AND (last_instance_fetch IS NULL OR last_instance_fetch < ?) ORDER BY rowid DESC", [time.time() - fba.config["recheck_instance"]] +) -conn = sqlite3.connect("blocks.db") -c = conn.cursor() +rows = fba.cursor.fetchall() +print(f"INFO: Checking {len(rows)} entries ...") +for row in rows: + # NOISY-DEBUG: print("DEBUG: domain:", row[0]) + if fba.is_blacklisted(row[0]): + print("WARNING: domain is blacklisted:", row[0]) + continue -c.execute( - "select domain from instances where 1" -) + print(f"INFO: Fetching instances for instance '{row[0]}'('{row[2]}') of origin '{row[1]}',nodeinfo_url='{row[3]}'") + fetch_instances(row[0], row[1], row[2], row[3]) -for instance in peerlist: - instance = instance.lower() - print(instance) - try: - if c.fetchone() == None: - c.execute( - "insert into instances select ?, ?, ?", - (instance, get_hash(instance), get_type(instance)), - ) - conn.commit() - except Exception as e: - print("error:", e, instance) -conn.close() +fba.connection.close()