]> git.mxchange.org Git - fba.git/commitdiff
Continued:
authorRoland Häder <roland@mxchange.org>
Wed, 17 May 2023 03:09:59 +0000 (05:09 +0200)
committerRoland Häder <roland@mxchange.org>
Wed, 17 May 2023 03:09:59 +0000 (05:09 +0200)
- encapsulation, still tons of duplicate records in `instances` table

api.py
fetch_blocks.py

diff --git a/api.py b/api.py
index 8e95e58d7e6e3c58964993d175e47ab61235a734..5df098fdecbbc4d12ed1908fc227b880dbb4b963 100644 (file)
--- a/api.py
+++ b/api.py
@@ -24,7 +24,7 @@ def get_hash(domain: str) -> str:
 def info():
     conn = sqlite3.connect("blocks.db")
     c = conn.cursor()
-    c.execute("SELECT (SELECT count(domain) FROM  instances), (SELECT count(domain) FROM  instances WHERE software in ('pleroma', 'mastodon', 'misskey', 'gotosocial', 'friendica')), (SELECT count(blocker) FROM  blocks)")
+    c.execute("SELECT (SELECT count(domain) FROM instances), (SELECT count(domain) FROM instances WHERE software IN ('pleroma', 'mastodon', 'misskey', 'gotosocial', 'friendica')), (SELECT count(blocker) FROM blocks)")
     known, indexed, blocks = c.fetchone()
     c.close()
     return {
@@ -42,11 +42,11 @@ def top(blocked: int = None, blockers: int = None):
     elif blocked != None:
         if blocked > 500:
             raise HTTPException(status_code=400, detail="Too many results")
-        c.execute("SELECT blocked, count(blocked) FROM  blocks WHERE block_level = 'reject' group by blocked order by count(blocked) desc limit ?", (blocked,))
+        c.execute("SELECT blocked, count(blocked) FROM blocks WHERE block_level = 'reject' group by blocked ORDER BY count(blocked) DESC LIMIT ?", (blocked,))
     elif blockers != None:
         if blockers > 500:
             raise HTTPException(status_code=400, detail="Too many results")
-        c.execute("SELECT blocker, count(blocker) FROM  blocks WHERE block_level = 'reject' group by blocker order by count(blocker) desc limit ?", (blockers,))
+        c.execute("SELECT blocker, count(blocker) FROM blocks WHERE block_level = 'reject' group by blocker ORDER BY count(blocker) DESC LIMIT ?", (blockers,))
     scores = c.fetchall()
     c.close()
 
@@ -70,12 +70,12 @@ def blocked(domain: str = None, reason: str = None, reverse: str = None):
     if domain != None:
         wildchar = "*." + ".".join(domain.split(".")[-domain.count("."):])
         punycode = domain.encode('idna').decode('utf-8')
-        c.execute("SELECT blocker, blocked, block_level, reason, first_added, last_seen FROM  blocks WHERE blocked = ? OR blocked = ? OR blocked = ? OR blocked = ? OR blocked = ? OR blocked = ? order by first_added asc",
+        c.execute("SELECT blocker, blocked, block_level, reason, first_added, last_seen FROM blocks WHERE blocked = ? OR blocked = ? OR blocked = ? OR blocked = ? OR blocked = ? OR blocked = ? ORDER BY first_added asc",
                   (domain, "*." + domain, wildchar, get_hash(domain), punycode, "*." + punycode))
     elif reverse != None:
-        c.execute("SELECT blocker, blocked, block_level, reason, first_added, last_seen FROM  blocks WHERE blocker = ? order by first_added asc", (reverse,))
+        c.execute("SELECT blocker, blocked, block_level, reason, first_added, last_seen FROM blocks WHERE blocker = ? ORDER BY first_added asc", (reverse,))
     else:
-        c.execute("SELECT blocker, blocked, block_level, reason, first_added, last_seen FROM  blocks WHERE reason like ? AND reason != '' order by first_added asc", ("%"+reason+"%",))
+        c.execute("SELECT blocker, blocked, block_level, reason, first_added, last_seen FROM blocks WHERE reason like ? AND reason != '' ORDER BY first_added asc", ("%"+reason+"%",))
     blocks = c.fetchall()
     c.close()
 
@@ -163,10 +163,10 @@ def rss(request: Request, domain: str = None):
     if domain != None:
         wildchar = "*." + ".".join(domain.split(".")[-domain.count("."):])
         punycode = domain.encode('idna').decode('utf-8')
-        c.execute("SELECT blocker, blocked, block_level, reason, first_added, last_seen FROM  blocks WHERE blocked = ? OR blocked = ? OR blocked = ? OR blocked = ? OR blocked = ? OR blocked = ? order by first_added desc limit 50",
+        c.execute("SELECT blocker, blocked, block_level, reason, first_added, last_seen FROM blocks WHERE blocked = ? OR blocked = ? OR blocked = ? OR blocked = ? OR blocked = ? OR blocked = ? ORDER BY first_added DESC LIMIT 50",
                   (domain, "*." + domain, wildchar, get_hash(domain), punycode, "*." + punycode))
     else:
-        c.execute("SELECT blocker, blocked, block_level, reason, first_added, last_seen FROM  blocks order by first_added desc limit 50")
+        c.execute("SELECT blocker, blocked, block_level, reason, first_added, last_seen FROM blocks ORDER BY first_added DESC LIMIT 50")
     blocks = c.fetchall()
     c.close()
 
index 4bbe4eafed130dd4141b94ebc5d96b87bcbe7860..58f86599f694b050381666b8032797c7a778da37 100644 (file)
@@ -16,6 +16,55 @@ headers = {
     "user-agent": config["useragent"]
 }
 
+def update_block_reason(reason: str, blocker: str, blocked: str, block_level: str):
+    # NOISY: print("--- Updating block reason:", reason, blocker, blocked, block_level)
+    c.execute(
+        "UPDATE blocks SET reason = ? WHERE blocker = ? AND blocked = ? AND block_level = ? AND reason = ''",
+        (
+            reason,
+            blocker,
+            blocked,
+            block_level
+        ),
+    )
+
+def update_last_seen(last_seen: int, blocker: str, blocked: str, block_level: str):
+    # NOISY: print("--- Updating last_seen:", last_seen, blocker, blocked, block_level)
+    c.execute(
+        "UPDATE blocks SET last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ?",
+        (
+            last_seen,
+            blocker,
+            blocked,
+            block_level
+        )
+    )
+
+def block_instance(blocker: str, blocked: str, reason: str, block_level: str, first_added: int, last_seen: int):
+    print("--- New block:", blocker, blocked, reason, block_level, first_added, last_seen)
+    c.execute(
+        "INSERT INTO blocks SELECT ?, ?, ?, ?, ?, ?",
+         (
+             blocker,
+             blocked,
+             reason,
+             block_level,
+             first_added,
+             last_seen
+         ),
+    )
+
+def add_instance (domain: str):
+    print("--- Adding new instance:", domain)
+    c.execute(
+        "INSERT INTO instances SELECT ?, ?, ?",
+        (
+           blocked,
+           get_hash(blocked),
+           get_type(blocked)
+        ),
+    )
+
 def send_bot_post(instance: str, blocks: dict):
     message = instance + " has blocked the following instances:\n\n"
     truncated = False
@@ -104,13 +153,19 @@ def get_friendica_blocks(domain: str) -> dict:
         return {}
 
     blocklist = doc.find(id="about_blocklist")
+
+    # Prevents exceptions:
+    if blocklist is None:
+        print("Instance has no block list:", domain)
+        return {}
+
     for line in blocklist.find("table").find_all("tr")[1:]:
-            blocks.append(
-                {
-                    "domain": line.find_all("td")[0].text.strip(),
-                    "reason": line.find_all("td")[1].text.strip()
-                }
-            )
+        blocks.append(
+            {
+                "domain": line.find_all("td")[0].text.strip(),
+                "reason": line.find_all("td")[1].text.strip()
+            }
+        )
 
     return {
         "reject": blocks
@@ -227,7 +282,8 @@ conn = sqlite3.connect("blocks.db")
 c = conn.cursor()
 
 c.execute(
-    "SELECT domain, software FROM  instances WHERE software in ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')"
+    "SELECT domain, software FROM instances WHERE domain='tooting.intensifi.es'"
+    #"SELECT domain, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')"
 )
 
 for blocker, software in c.fetchall():
@@ -247,35 +303,39 @@ for blocker, software in c.fetchall():
                 ).items():
                     for blocked in blocks:
                         blocked = tidyup(blocked)
+
                         if blocked == "":
+                            print("WARNING: blocked is empty after tidyup():", blocker, block_level)
                             continue
+
                         if blocked.count("*") > 1:
                             # -ACK!-oma also started obscuring domains without hash
                             c.execute(
-                                "SELECT domain FROM  instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
+                                "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
                             )
                             searchres = c.fetchone()
                             if searchres != None:
                                 blocked = searchres[0]
 
                         c.execute(
-                            "SELECT domain FROM  instances WHERE domain = ?", (blocked,)
+                            "SELECT domain FROM instances WHERE domain = ?", (blocked)
                         )
+
                         if c.fetchone() == None:
-                            c.execute(
-                                "INSERT INTO instances SELECT ?, ?, ?",
-                                (blocked, get_hash(blocked), get_type(blocked)),
-                            )
+                            add_instance(blocked)
+
                         timestamp = int(time())
                         c.execute(
-                            "SELECT * FROM  blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
-                            (blocker, blocked, block_level),
+                            "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
+                            (
+                               blocker,
+                               blocked,
+                               block_level
+                           ),
                         )
                         if c.fetchone() == None:
-                            c.execute(
-                                "INSERT INTO blocks SELECT ?, ?, '', ?, ?, ?",
-                                (blocker, blocked, block_level, timestamp, timestamp),
-                            )
+                            block_instance(blocker, blocked, reason, block_level, timestamp, timestamp)
+
                             if block_level == "reject":
                                 blockdict.append(
                                     {
@@ -283,10 +343,7 @@ for blocker, software in c.fetchall():
                                         "reason": None
                                     })
                         else:
-                            c.execute(
-                                "UPDATE blocks SET last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ?",
-                                (timestamp, blocker, blocked, block_level)
-                            )
+                            update_last_seen(timestamp, blocker, blocked, block_level)
             conn.commit()
             # Reasons
             if "mrf_simple_info" in federation:
@@ -298,20 +355,23 @@ for blocker, software in c.fetchall():
                 ).items():
                     for blocked, reason in info.items():
                         blocked = tidyup(blocked)
+
                         if blocked == "":
+                            print("WARNING: blocked is empty after tidyup():", blocker, block_level)
                             continue
+
                         if blocked.count("*") > 1:
                             # same domain guess as above, but for reasons field
                             c.execute(
-                                "SELECT domain FROM  instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
+                                "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
                             )
                             searchres = c.fetchone()
+
                             if searchres != None:
                                 blocked = searchres[0]
-                        c.execute(
-                            "UPDATE blocks SET reason = ? WHERE blocker = ? AND blocked = ? AND block_level = ? AND reason = ''",
-                            (reason["reason"], blocker, blocked, block_level),
-                        )
+
+                        update_block_reason(reason["reason"], blocker, blocked, block_level)
+
                         for entry in blockdict:
                             if entry["blocked"] == blocked:
                                 entry["reason"] = reason["reason"]
@@ -345,6 +405,7 @@ for blocker, software in c.fetchall():
                 blocks = get(
                     f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=5
                 ).json()
+
                 for block in blocks:
                     entry = {'domain': block['domain'], 'hash': block['digest'], 'reason': block['comment']}
                     if block['severity'] == 'suspend':
@@ -355,6 +416,8 @@ for blocker, software in c.fetchall():
                         json['media_removal'].append(entry)
                     elif block['severity'] == 'reject_reports':
                         json['report_removal'].append(entry)
+                    else:
+                        print("WARNING: Unknown severity:", block['severity'], block['domain'])
             except:
                 json = get_mastodon_blocks(blocker)
 
@@ -362,19 +425,18 @@ for blocker, software in c.fetchall():
                 for instance in blocks:
                     blocked, blocked_hash, reason = instance.values()
                     blocked = tidyup(blocked)
+
                     if blocked.count("*") <= 1:
                         c.execute(
-                            "SELECT hash FROM  instances WHERE hash = ?", (blocked_hash,)
+                            "SELECT hash FROM instances WHERE hash = ?", (blocked_hash,)
                         )
+
                         if c.fetchone() == None:
-                            c.execute(
-                                "INSERT INTO instances SELECT ?, ?, ?",
-                                (blocked, get_hash(blocked), get_type(blocked)),
-                            )
+                            add_instance(blocked)
                     else:
                         # Doing the hash search for instance names as well to tidy up DB
                         c.execute(
-                            "SELECT domain FROM  instances WHERE hash = ?", (blocked_hash,)
+                            "SELECT domain FROM instances WHERE hash = ?", (blocked_hash,)
                         )
                         searchres = c.fetchone()
                         if searchres != None:
@@ -382,21 +444,16 @@ for blocker, software in c.fetchall():
 
                     timestamp = int(time())
                     c.execute(
-                        "SELECT * FROM  blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
-                        (blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
+                        "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
+                        (
+                            blocker,
+                            blocked if blocked.count("*") <= 1 else blocked_hash,
+                            block_level
+                        ),
                     )
                     if c.fetchone() == None:
-                        c.execute(
-                            "INSERT INTO blocks SELECT ?, ?, ?, ?, ?, ?",
-                            (
-                                blocker,
-                                blocked if blocked.count("*") <= 1 else blocked_hash,
-                                reason,
-                                block_level,
-                                timestamp,
-                                timestamp,
-                            ),
-                        )
+                        block_instance(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, reason, block_level, timestamp, timestamp)
+
                         if block_level == "reject":
                             blockdict.append(
                                 {
@@ -404,15 +461,11 @@ for blocker, software in c.fetchall():
                                     "reason": reason
                                 })
                     else:
-                        c.execute(
-                            "UPDATE blocks SET last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ?",
-                            (timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
-                        )
+                        update_last_seen(timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
+
                     if reason != '':
-                        c.execute(
-                            "UPDATE blocks SET reason = ? WHERE blocker = ? AND blocked = ? AND block_level = ? AND reason = ''",
-                            (reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level),
-                        )
+                        update_block_reason(reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
+
             conn.commit()
         except Exception as e:
             print("error:", e, blocker)
@@ -428,10 +481,11 @@ for blocker, software in c.fetchall():
                     blocked, reason = instance.values()
                     blocked = tidyup(blocked)
 
+                    print("BEFORE-blocked:", blocked)
                     if blocked.count("*") > 0:
                         # Some friendica servers also obscure domains without hash
                         c.execute(
-                            "SELECT domain FROM  instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
+                            "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
                         )
                         searchres = c.fetchone()
                         if searchres != None:
@@ -440,38 +494,28 @@ for blocker, software in c.fetchall():
                     if blocked.count("?") > 0:
                         # Some obscure them with question marks, not sure if that's dependent on version or not
                         c.execute(
-                            "SELECT domain FROM  instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("?", "_"),)
+                            "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("?", "_"),)
                         )
                         searchres = c.fetchone()
                         if searchres != None:
                             blocked = searchres[0]
 
+                    print("AFTER-blocked:", blocked)
                     c.execute(
-                        "SELECT domain FROM  instances WHERE domain = ?", (blocked,)
+                        "SELECT domain FROM instances WHERE domain = ?", (blocked,)
                     )
+
                     if c.fetchone() == None:
-                        c.execute(
-                            "INSERT INTO instances SELECT ?, ?, ?",
-                            (blocked, get_hash(blocked), get_type(blocked)),
-                        )
+                        add_instance(blocked)
 
                     timestamp = int(time())
                     c.execute(
-                        "SELECT * FROM  blocks WHERE blocker = ? AND blocked = ?",
+                        "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?",
                         (blocker, blocked),
                     )
                     if c.fetchone() == None:
-                        c.execute(
-                            "INSERT INTO blocks SELECT ?, ?, ?, ?, ?, ?",
-                            (
-                                blocker,
-                                blocked,
-                                reason,
-                                block_level,
-                                timestamp,
-                                timestamp
-                            ),
-                        )
+                        block_instance(blocker, blocked, reason, block_level, timestamp, timestamp)
+
                         if block_level == "reject":
                             blockdict.append(
                                 {
@@ -479,15 +523,11 @@ for blocker, software in c.fetchall():
                                     "reason": reason
                                 })
                     else:
-                        c.execute(
-                            "UPDATE blocks SET last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ?",
-                            (timestamp, blocker, blocked, block_level),
-                        )
+                        update_last_seen(timestamp, blocker, blocked, block_level)
+
                     if reason != '':
-                        c.execute(
-                            "UPDATE blocks SET reason = ? WHERE blocker = ? AND blocked = ? AND block_level = ? AND reason = ''",
-                            (reason, blocker, blocked, block_level),
-                        )
+                        update_block_reason(reason, blocker, blocked, block_level)
+
             conn.commit()
         except Exception as e:
             print("error:", e, blocker)
@@ -498,56 +538,55 @@ for blocker, software in c.fetchall():
             federation = get(
                 f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=headers, timeout=5
             ).json()
-            for peer in federation:
-                blocked = peer["domain"].lower()
 
-                if blocked.count("*") > 0:
-                    # GTS does not have hashes for obscured domains, so we have to guess it
-                    c.execute(
-                        "SELECT domain FROM  instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
-                    )
-                    searchres = c.fetchone()
-                    if searchres != None:
-                        blocked = searchres[0]
+            if (federation == None):
+                print("WARNING: No valid response:", blocker);
+            else:
+                for peer in federation:
+                    blocked = peer["domain"].lower()
+
+                    if blocked.count("*") > 0:
+                        # GTS does not have hashes for obscured domains, so we have to guess it
+                        c.execute(
+                            "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
+                        )
+                        searchres = c.fetchone()
+
+                        if searchres != None:
+                            blocked = searchres[0]
 
-                c.execute(
-                    "SELECT domain FROM  instances WHERE domain = ?", (blocked,)
-                )
-                if c.fetchone() == None:
-                    c.execute(
-                        "INSERT INTO instances SELECT ?, ?, ?",
-                        (blocked, get_hash(blocked), get_type(blocked)),
-                    )
-                c.execute(
-                    "SELECT * FROM  blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
-                    (blocker, blocked, "reject"),
-                )
-                timestamp = int(time())
-                if c.fetchone() == None:
-                    c.execute(
-                        "INSERT INTO blocks SELECT ?, ?, ?, ?, ?, ?",
-                           (blocker, blocked, "", "reject", timestamp, timestamp),
-                    )
-                    blockdict.append(
-                        {
-                            "blocked": blocked,
-                            "reason": None
-                        })
-                else:
                     c.execute(
-                        "UPDATE blocks SET last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ?",
-                        (timestamp, blocker, blocked, "reject"),
+                        "SELECT domain FROM instances WHERE domain = ?", (blocked,)
                     )
-                if "public_comment" in peer:
-                    reason = peer["public_comment"]
+
+                    if c.fetchone() == None:
+                        add_instance(blocked)
+
                     c.execute(
-                        "UPDATE blocks SET reason = ? WHERE blocker = ? AND blocked = ? AND block_level = ? AND reason = ''",
-                        (reason, blocker, blocked, "reject"),
+                        "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
+                        (blocker, blocked, "reject"),
                     )
-                    for entry in blockdict:
-                        if entry["blocked"] == blocked:
-                            entry["reason"] = reason
-            conn.commit()
+                    timestamp = int(time())
+
+                    if c.fetchone() == None:
+                        block_instance(blocker, blocked, "", "reject", timestamp, timestamp)
+
+                        blockdict.append(
+                            {
+                                "blocked": blocked,
+                                "reason": None
+                            })
+                    else:
+                        update_last_seen(timestamp, blocker, blocked, "reject")
+
+                    if "public_comment" in peer:
+                        reason = peer["public_comment"]
+                        update_block_reason(reason, blocker, blocked, "reject")
+
+                        for entry in blockdict:
+                            if entry["blocked"] == blocked:
+                                entry["reason"] = reason
+                conn.commit()
         except Exception as e:
             print("error:", e, blocker)