]> git.mxchange.org Git - fba.git/blobdiff - fetch_blocks.py
Continued:
[fba.git] / fetch_blocks.py
index 65b8f6366d683e3716d97b11500997a2d2952d9b..193969a67766c1e1af79dbd30803a8f3244c2462 100644 (file)
-from requests import get
-from hashlib import sha256
-import sqlite3
-from bs4 import BeautifulSoup
-
-headers = {
-    "user-agent": "Mozilla/5.0 (Windows NT 10.0; rv:91.0) Gecko/20100101 Firefox/91.0"
-}
-
-
-def get_mastodon_blocks(domain: str) -> dict:
-    blocks = {
-        "Suspended servers": [],
-        "Filtered media": [],
-        "Limited servers": [],
-        "Silenced servers": [],
-    }
-
-    translations = {
-        "Gesperrte Server": "Suspended servers",
-        "Gefilterte Medien": "Filtered media",
-        "Stummgeschaltete Server": "Silenced servers",
-        "停止済みのサーバー": "Suspended servers",
-        "メディアを拒否しているサーバー": "Filtered media",
-        "サイレンス済みのサーバー": "Silenced servers",
-        "Serveurs suspendus": "Suspended servers",
-        "Médias filtrés": "Filtered media",
-        "Serveurs limités": "Silenced servers",
-    }
-
-    try:
-        doc = BeautifulSoup(
-            get(f"https://{domain}/about/more", headers=headers, timeout=5).text,
-            "html.parser",
-        )
-    except:
-        return {}
-
-    for header in doc.find_all("h3"):
-        for line in header.find_next_siblings("table")[0].find_all("tr")[1:]:
-            header_text = header.text
-            if header_text in translations:
-                    header_text = translations[header_text]
-            if header_text in blocks:
-                blocks[header_text].append(
-                    {
-                        "domain": line.find("span").text,
-                        "hash": line.find("span")["title"][9:],
-                        "reason": line.find_all("td")[1].text.strip(),
-                    }
-                )
-    return {
-        "reject": blocks["Suspended servers"],
-        "media_removal": blocks["Filtered media"],
-        "federated_timeline_removal": blocks["Limited servers"]
-        + blocks["Silenced servers"],
-    }
-
-def get_friendica_blocks(domain: str) -> dict:
-    blocks = []
-
-    try:
-        doc = BeautifulSoup(
-            get(f"https://{domain}/friendica", headers=headers, timeout=5).text,
-            "html.parser",
-        )
-    except:
-        return {}
-
-    blocklist = doc.find(id="about_blocklist")
-    for line in blocklist.find("table").find_all("tr")[1:]:
-            blocks.append(
-                {
-                    "domain": line.find_all("td")[0].text.strip(),
-                    "reason": line.find_all("td")[1].text.strip()
-                }
-            )
-
-    return {
-        "reject": blocks
-    }
-
-def get_hash(domain: str) -> str:
-    return sha256(domain.encode("utf-8")).hexdigest()
-
-
-def get_type(domain: str) -> str:
-    try:
-        res = get(f"https://{domain}/nodeinfo/2.1.json", headers=headers, timeout=5)
-        if res.status_code == 404:
-            res = get(f"https://{domain}/nodeinfo/2.0", headers=headers, timeout=5)
-        if res.status_code == 404:
-            res = get(f"https://{domain}/nodeinfo/2.0.json", headers=headers, timeout=5)
-        if res.ok and "text/html" in res.headers["content-type"]:
-            res = get(f"https://{domain}/nodeinfo/2.1", headers=headers, timeout=5)
-        if res.ok:
-            if res.json()["software"]["name"] == "akkoma":
-                return "pleroma"
-            elif res.json()["software"]["name"] == "hometown":
-                return "mastodon"
-            elif res.json()["software"]["name"] == "ecko":
-                return "mastodon"
-            else:
-                return res.json()["software"]["name"]
-        elif res.status_code == 404:
-            res = get(f"https://{domain}/api/v1/instance", headers=headers, timeout=5)
-        if res.ok:
-            return "mastodon"
-    except:
-        return None
+import reqto
+import time
+import bs4
+import fba
+import itertools
+import re
 
-
-conn = sqlite3.connect("blocks.db")
-c = conn.cursor()
-
-c.execute(
-    "select domain, software from instances where software in ('pleroma', 'mastodon', 'friendica')"
+fba.c.execute(
+    "SELECT domain, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')"
 )
 
-for blocker, software in c.fetchall():
+for blocker, software in fba.c.fetchall():
+    print("DEBUG: blocker,software:", blocker, software)
+    blockdict = []
+    blocker = fba.tidyup(blocker)
     if software == "pleroma":
-        print(blocker)
+        print("INFO: blocker:", blocker)
         try:
             # Blocks
-            federation = get(
-                f"https://{blocker}/nodeinfo/2.1.json", headers=headers, timeout=5
+            federation = reqto.get(
+                f"https://{blocker}/nodeinfo/2.1.json", headers=fba.headers, timeout=5
             ).json()["metadata"]["federation"]
             if "mrf_simple" in federation:
                 for block_level, blocks in (
@@ -131,113 +26,347 @@ for blocker, software in c.fetchall():
                     **{"quarantined_instances": federation["quarantined_instances"]}}
                 ).items():
                     for blocked in blocks:
+                        print("DEBUG: BEFORE blocked:", blocked)
+                        blocked = fba.tidyup(blocked)
+                        print("DEBUG: AFTER blocked:", blocked)
+
                         if blocked == "":
+                            print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
                             continue
-                        blocked == blocked.lower()
-                        blocker == blocker.lower()
-                        c.execute(
-                            "select domain from instances where domain = ?", (blocked,)
-                        )
-                        if c.fetchone() == None:
-                            c.execute(
-                                "insert into instances select ?, ?, ?",
-                                (blocked, get_hash(blocked), get_type(blocked)),
+
+                        if blocked.count("*") > 1:
+                            # -ACK!-oma also started obscuring domains without hash
+                            fba.c.execute(
+                                "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
                             )
-                        c.execute(
-                            "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
-                            (blocker, blocked, block_level),
+                            searchres = fba.c.fetchone()
+                            print("DEBUG: searchres[]:", type(searchres))
+                            if searchres != None:
+                                blocked = searchres[0]
+
+                        print("DEBUG: Looking up instance by domain:", blocked)
+                        fba.c.execute(
+                            "SELECT domain FROM instances WHERE domain = ?", (blocked,)
                         )
-                        if c.fetchone() == None:
-                            c.execute(
-                                "insert into blocks select ?, ?, '', ?",
-                                (blocker, blocked, block_level),
-                            )
-            conn.commit()
+
+                        if fba.c.fetchone() == None:
+                            print("DEBUG: Domain wasn't found, adding:", blocked)
+                            fba.add_instance(blocked)
+
+                        timestamp = int(time.time())
+                        fba.c.execute(
+                            "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
+                            (
+                               blocker,
+                               blocked,
+                               block_level
+                           ),
+                        )
+
+                        if fba.c.fetchone() == None:
+                            fba.block_instance(blocker, blocked, "unknown", block_level, timestamp, timestamp)
+
+                            if block_level == "reject":
+                                blockdict.append(
+                                    {
+                                        "blocked": blocked,
+                                        "reason": None
+                                    })
+                        else:
+                            fba.update_last_seen(timestamp, blocker, blocked, block_level)
+
+            fba.conn.commit()
+
             # Reasons
             if "mrf_simple_info" in federation:
+                print("DEBUG: Found mrf_simple_info:", blocker)
                 for block_level, info in (
                     {**federation["mrf_simple_info"],
                     **(federation["quarantined_instances_info"]
                     if "quarantined_instances_info" in federation
                     else {})}
                 ).items():
+                    print("DEBUG: block_level, info.items():", block_level, len(info.items()))
                     for blocked, reason in info.items():
-                        blocker == blocker.lower()
-                        blocked == blocked.lower()
-                        c.execute(
-                            "update blocks set reason = ? where blocker = ? and blocked = ? and block_level = ?",
-                            (reason["reason"], blocker, blocked, block_level),
-                        )
-            conn.commit()
+                        print("DEBUG: BEFORE blocked:", blocked)
+                        blocked = fba.tidyup(blocked)
+                        print("DEBUG: AFTER blocked:", blocked)
+
+                        if blocked == "":
+                            print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
+                            continue
+
+                        if blocked.count("*") > 1:
+                            # same domain guess as above, but for reasons field
+                            fba.c.execute(
+                                "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
+                            )
+                            searchres = fba.c.fetchone()
+
+                            if searchres != None:
+                                blocked = searchres[0]
+
+                        print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"])
+                        fba.update_block_reason(reason["reason"], blocker, blocked, block_level)
+
+                        for entry in blockdict:
+                            if entry["blocked"] == blocked:
+                                print("DEBUG: Updating entry reason:", blocked)
+                                entry["reason"] = reason["reason"]
+
+            fba.conn.commit()
         except Exception as e:
-            print("error:", e, blocker)
+            print("error:", e, blocker, software)
     elif software == "mastodon":
-        print(blocker)
+        print("INFO: blocker:", blocker)
         try:
-            json = get_mastodon_blocks(blocker)
+            # json endpoint for newer mastodongs
+            try:
+                json = {
+                    "reject": [],
+                    "media_removal": [],
+                    "followers_only": [],
+                    "report_removal": []
+                }
+
+                # handling CSRF, I've saw at least one server requiring it to access the endpoint
+                print("DEBUG: Fetching meta:", blocker)
+                meta = bs4.BeautifulSoup(
+                    reqto.get(f"https://{blocker}/about", headers=fba.headers, timeout=5).text,
+                    "html.parser",
+                )
+                try:
+                    csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
+                    print("DEBUG: Adding CSRF token:", blocker, csrf)
+                    reqheaders = {**fba.headers, **{"x-csrf-token": csrf}}
+                except:
+                    print("DEBUG: No CSRF token found, using normal headers:", blocker)
+                    reqheaders = fba.headers
+
+                print("DEBUG: Quering API domain_blocks:", blocker)
+                blocks = reqto.get(
+                    f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=5
+                ).json()
+
+                print("DEBUG: blocks():", len(blocks))
+                for block in blocks:
+                    entry = {'domain': block['domain'], 'hash': block['digest'], 'reason': block['comment']}
+
+                    print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
+                    if block['severity'] == 'suspend':
+                        json['reject'].append(entry)
+                    elif block['severity'] == 'silence':
+                        json['followers_only'].append(entry)
+                    elif block['severity'] == 'reject_media':
+                        json['media_removal'].append(entry)
+                    elif block['severity'] == 'reject_reports':
+                        json['report_removal'].append(entry)
+                    else:
+                        print("WARNING: Unknown severity:", block['severity'], block['domain'])
+            except:
+                print("DEBUG: Failed, Trying mastodon-specific fetches:", blocker)
+                json = fba.get_mastodon_blocks(blocker)
+
+            print("DEBUG: json.items():", blocker, len(json.items()))
             for block_level, blocks in json.items():
+                print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
                 for instance in blocks:
                     blocked, blocked_hash, reason = instance.values()
-                    blocked == blocked.lower()
-                    blocker == blocker.lower()
-                    if blocked.count("*") <= 1:
-                        c.execute(
-                            "select hash from instances where hash = ?", (blocked_hash,)
+                    print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason)
+
+                    blocked = fba.tidyup(blocked)
+                    print("DEBUG: blocked:", blocked)
+
+                    if blocked.count("*") < 1:
+                        # No obsfucation for this instance
+                        fba.c.execute(
+                            "SELECT hash FROM instances WHERE domain = ? LIMIT 1", (blocked,)
                         )
-                        if c.fetchone() == None:
-                            c.execute(
-                                "insert into instances select ?, ?, ?",
-                                (blocked, get_hash(blocked), get_type(blocked)),
-                            )
-                    c.execute(
-                        "select * from blocks where blocker = ? and blocked = ? and block_level = ?",
-                        (blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
-                    )
-                    if c.fetchone() == None:
-                        c.execute(
-                            "insert into blocks select ?, ?, ?, ?",
-                            (
-                                blocker,
-                                blocked if blocked.count("*") <= 1 else blocked_hash,
-                                reason,
-                                block_level,
-                            ),
+
+                        if fba.c.fetchone() == None:
+                            print("DEBUG: Hash wasn't found, adding:", blocked)
+                            fba.add_instance(blocked)
+                    else:
+                        # Doing the hash search for instance names as well to tidy up DB
+                        fba.c.execute(
+                            "SELECT domain FROM instances WHERE hash = ? LIMIT 1", (blocked_hash,)
                         )
-            conn.commit()
+                        searchres = fba.c.fetchone()
+
+                        if searchres != None:
+                            print("DEBUG: Updating domain: ", searchres[0])
+                            blocked = searchres[0]
+
+                    timestamp = int(time.time())
+                    fba.c.execute(
+                        "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
+                        (
+                            blocker,
+                            blocked if blocked.count("*") <= 1 else blocked_hash,
+                            block_level
+                        ),
+                    )
+
+                    if fba.c.fetchone() == None:
+                        fba.block_instance(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, reason, block_level, timestamp, timestamp)
+
+                        if block_level == "reject":
+                            blockdict.append(
+                                {
+                                    "blocked": blocked,
+                                    "reason": reason
+                                })
+                    else:
+                        fba.update_last_seen(timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
+
+                    if reason != '':
+                        print("DEBUG: Updating block reason:", blocker, blocked, reason)
+                        fba.update_block_reason(reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
+
+            fba.conn.commit()
         except Exception as e:
-            print("error:", e, blocker)
-    elif software == "friendica":
-        print(blocker)
+            print("error:", e, blocker, software)
+    elif software == "friendica" or software == "misskey":
+        print("INFO: blocker:", blocker)
         try:
-            json = get_friendica_blocks(blocker)
+            if software == "friendica":
+                json = fba.get_friendica_blocks(blocker)
+            elif software == "misskey":
+                json = fba.get_misskey_blocks(blocker)
             for block_level, blocks in json.items():
                 for instance in blocks:
                     blocked, reason = instance.values()
-                    blocked == blocked.lower()
-                    blocker == blocker.lower()
-                    c.execute(
-                        "select domain from instances where domain = ?", (blocked,)
-                    )
-                    if c.fetchone() == None:
-                        c.execute(
-                            "insert into instances select ?, ?, ?",
-                            (blocked, get_hash(blocked), get_type(blocked)),
+                    blocked = fba.tidyup(blocked)
+
+                    print("BEFORE-blocked:", blocked)
+                    if blocked.count("*") > 0:
+                        # Some friendica servers also obscure domains without hash
+                        fba.c.execute(
+                            "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
+                        )
+                        searchres = fba.c.fetchone()
+                        if searchres != None:
+                            blocked = searchres[0]
+
+                    if blocked.count("?") > 0:
+                        # Some obscure them with question marks, not sure if that's dependent on version or not
+                        fba.c.execute(
+                            "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("?", "_"),)
                         )
-                    c.execute(
-                        "select * from blocks where blocker = ? and blocked = ?",
+                        searchres = fba.c.fetchone()
+                        if searchres != None:
+                            blocked = searchres[0]
+
+                    print("AFTER-blocked:", blocked)
+                    fba.c.execute(
+                        "SELECT domain FROM instances WHERE domain = ?", (blocked,)
+                    )
+
+                    if fba.c.fetchone() == None:
+                        print("DEBUG: Hash wasn't found, adding:", blocked)
+                        fba.add_instance(blocked)
+
+                    timestamp = int(time.time())
+                    fba.c.execute(
+                        "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?",
                         (blocker, blocked),
                     )
-                    if c.fetchone() == None:
-                        c.execute(
-                            "insert into blocks select ?, ?, ?, ?",
-                            (
-                                blocker,
-                                blocked,
-                                reason,
-                                block_level,
-                            ),
+                    if fba.c.fetchone() == None:
+                        fba.block_instance(blocker, blocked, reason, block_level, timestamp, timestamp)
+
+                        if block_level == "reject":
+                            blockdict.append(
+                                {
+                                    "blocked": blocked,
+                                    "reason": reason
+                                })
+                    else:
+                        fba.update_last_seen(timestamp, blocker, blocked, block_level)
+
+                    if reason != '':
+                        print("DEBUG: Updating block reason:", blocker, blocked, reason)
+                        fba.update_block_reason(reason, blocker, blocked, block_level)
+
+            fba.conn.commit()
+        except Exception as e:
+            print("error:", e, blocker, software)
+    elif software == "gotosocial":
+        print("INFO: blocker:", blocker)
+        try:
+            # Blocks
+            federation = reqto.get(
+                f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=fba.headers, timeout=5
+            ).json()
+
+            if (federation == None):
+                print("WARNING: No valid response:", blocker);
+            else:
+                for peer in federation:
+                    print("DEBUG: peer(),[]:", len(peer), type(peer))
+                    if (isinstance(peer, str) and peer == "error"):
+                        print("WARNING: Cannot continue, maybe authentication required?", blocker)
+                        break
+
+                    blocked = peer["domain"].lower()
+                    print("DEBUG: blocked:", blocked)
+
+                    if blocked.count("*") > 0:
+                        # GTS does not have hashes for obscured domains, so we have to guess it
+                        fba.c.execute(
+                            "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
                         )
-            conn.commit()
+                        searchres = fba.c.fetchone()
+
+                        if searchres != None:
+                            blocked = searchres[0]
+
+                    fba.c.execute(
+                        "SELECT domain FROM instances WHERE domain = ?", (blocked,)
+                    )
+
+                    if fba.c.fetchone() == None:
+                        print("DEBUG: Hash wasn't found, adding:", blocked)
+                        fba.add_instance(blocked)
+
+                    fba.c.execute(
+                        "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
+                        (
+                            blocker,
+                            blocked,
+                            "reject"
+                        ),
+                    )
+                    timestamp = int(time.time())
+
+                    if fba.c.fetchone() == None:
+                        fba.block_instance(blocker, blocked, "", "reject", timestamp, timestamp)
+
+                        blockdict.append(
+                            {
+                                "blocked": blocked,
+                                "reason": None
+                            })
+                    else:
+                        fba.update_last_seen(timestamp, blocker, blocked, "reject")
+
+                    if "public_comment" in peer:
+                        reason = peer["public_comment"]
+                        print("DEBUG: Updating block reason:", blocker, blocked, reason)
+                        fba.update_block_reason(reason, blocker, blocked, "reject")
+
+                        for entry in blockdict:
+                            if entry["blocked"] == blocked:
+                                entry["reason"] = reason
+
+                fba.conn.commit()
         except Exception as e:
-            print("error:", e, blocker)
-conn.close()
+            print("error:", e, blocker, software)
+    else:
+        print("WARNING: Unknown software:", software)
+
+    if fba.config["bot_enabled"] and len(blockdict) > 0:
+        send_bot_post(blocker, blockdict)
+
+    blockdict = []
+
+fba.conn.close()