]> git.mxchange.org Git - fba.git/blob - fba.py
Continued:
[fba.git] / fba.py
1 from bs4 import BeautifulSoup
2 from hashlib import sha256
3
4 import reqto
5 import re
6 import sqlite3
7 import json
8 import sys
9 import time
10
11 with open("config.json") as f:
12     config = json.loads(f.read())
13
14 blacklist = [
15     "activitypub-troll.cf",
16     "gab.best",
17     "4chan.icu",
18     "social.shrimpcam.pw",
19     "mastotroll.netz.org",
20     "ngrok.io",
21 ]
22
23 pending_errors = {
24 }
25
26 nodeinfos = [
27     "http://nodeinfo.diaspora.software/ns/schema/2.1",
28     "http://nodeinfo.diaspora.software/ns/schema/2.0",
29     "http://nodeinfo.diaspora.software/ns/schema/1.1",
30     "http://nodeinfo.diaspora.software/ns/schema/1.0",
31 ]
32
33 headers = {
34     "user-agent": config["useragent"]
35 }
36
37 connection = sqlite3.connect("blocks.db")
38 cursor = connection.cursor()
39
40 def is_blacklisted(domain: str) -> bool:
41     blacklisted = False
42     for peer in blacklist:
43         if peer in domain:
44             blacklisted = True
45
46     return blacklisted
47
48 def get_hash(domain: str) -> str:
49     return sha256(domain.encode("utf-8")).hexdigest()
50
51 def update_last_blocked(domain: str):
52     # NOISY-DEBUG: print("DEBUG: Updating last_blocked for domain", domain)
53     try:
54         cursor.execute("UPDATE instances SET last_blocked = ?, last_updated = ? WHERE domain = ? LIMIT 1", [
55             time.time(),
56             time.time(),
57             domain
58         ])
59
60         if cursor.rowcount == 0:
61             print("WARNING: Did not update any rows:", domain)
62
63     except BaseException as e:
64         print("ERROR: failed SQL query:", domain, e)
65         sys.exit(255)
66
67 def update_last_error(domain: str, res: any):
68     # NOISY-DEBUG: print("DEBUG: domain,res.status_code:", domain, res.status_code, res.reason)
69     try:
70         # NOISY-DEBUG: print("DEBUG: res[]:", type(res))
71         if isinstance(res, BaseException):
72             res = str(res)
73
74         if type(res) is str:
75             cursor.execute("UPDATE instances SET last_status_code = 999, last_error_details = ?, last_updated = ? WHERE domain = ? LIMIT 1", [
76                 res,
77                 time.time(),
78                 domain
79             ])
80         else:
81             cursor.execute("UPDATE instances SET last_status_code = ?, last_error_details = ?, last_updated = ? WHERE domain = ? LIMIT 1", [
82                 res.status_code,
83                 res.reason,
84                 time.time(),
85                 domain
86             ])
87
88         if cursor.rowcount == 0:
89             # NOISY-DEBUG: print("DEBUG: Did not update any rows:", domain)
90             pending_errors[domain] = res
91
92     except BaseException as e:
93         print("ERROR: failed SQL query:", domain, e)
94         sys.exit(255)
95
96 def update_last_nodeinfo(domain: str):
97     # NOISY-DEBUG: print("DEBUG: Updating last_nodeinfo for domain:", domain)
98     try:
99         cursor.execute("UPDATE instances SET last_nodeinfo = ?, last_updated = ? WHERE domain = ? LIMIT 1", [
100             time.time(),
101             time.time(),
102             domain
103         ])
104
105         if cursor.rowcount == 0:
106             print("WARNING: Did not update any rows:", domain)
107
108     except BaseException as e:
109         print("ERROR: failed SQL query:", domain, e)
110         sys.exit(255)
111
112     connection.commit()
113
114 def get_peers(domain: str) -> list:
115     # NOISY-DEBUG: print("DEBUG: Getting peers for domain:", domain)
116     peers = None
117
118     try:
119         res = reqto.get(f"https://{domain}/api/v1/instance/peers", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
120
121         if not res.ok or res.status_code >= 400:
122             print("WARNING: Cannot fetch peers:", domain)
123             update_last_error(domain, res)
124         else:
125             # NOISY-DEBUG: print("DEBUG: Querying API was successful:", domain, len(res.json()))
126             peers = res.json()
127
128     except:
129         print("WARNING: Some error during get():", domain)
130
131     update_last_nodeinfo(domain)
132
133     # NOISY-DEBUG: print("DEBUG: Returning peers[]:", type(peers))
134     return peers
135
136 def post_json_api(domain: str, path: str, data: str) -> list:
137     # NOISY-DEBUG: print("DEBUG: Sending POST to domain,path,data:", domain, path, data)
138     json = {}
139     try:
140         res = reqto.post(f"https://{domain}{path}", data=data, headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
141
142         if not res.ok or res.status_code >= 400:
143             print("WARNING: Cannot query JSON API:", domain, path, data, res.status_code)
144             update_last_error(domain, res)
145             raise
146
147         update_last_nodeinfo(domain)
148         json = res.json()
149     except:
150         print("WARNING: Some error during post():", domain, path, data)
151
152     # NOISY-DEBUG: print("DEBUG: Returning json():", len(json))
153     return json
154
155 def fetch_nodeinfo(domain: str) -> list:
156     # NOISY-DEBUG: print("DEBUG: Fetching nodeinfo from domain:", domain)
157
158     nodeinfo = fetch_wellknown_nodeinfo(domain)
159     # NOISY-DEBUG: print("DEBUG: nodeinfo:", len(nodeinfo))
160
161     if len(nodeinfo) > 0:
162         # NOISY-DEBUG: print("DEBUG: Returning auto-discovered nodeinfo:", len(nodeinfo))
163         return nodeinfo
164
165     requests = [
166        f"https://{domain}/nodeinfo/2.1.json",
167        f"https://{domain}/nodeinfo/2.1",
168        f"https://{domain}/nodeinfo/2.0.json",
169        f"https://{domain}/nodeinfo/2.0",
170        f"https://{domain}/nodeinfo/1.0",
171        f"https://{domain}/api/v1/instance"
172     ]
173
174     json = {}
175     for request in requests:
176         try:
177             # NOISY-DEBUG: print("DEBUG: Fetching request:", request)
178             res = reqto.get(request, headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
179
180             # NOISY-DEBUG: print("DEBUG: res.ok,res.json[]:", res.ok, type(res.json()))
181             if res.ok and res.json() is not None:
182                 # NOISY-DEBUG: print("DEBUG: Success:", request)
183                 json = res.json()
184                 break
185             elif not res.ok or res.status_code >= 400:
186                 print("WARNING: Failed fetching nodeinfo from domain:", domain)
187                 update_last_error(domain, res)
188                 continue
189
190         except BaseException as e:
191             # NOISY-DEBUG: print("DEBUG: Cannot fetch API request:", request)
192             update_last_error(domain, e)
193             pass
194
195     # NOISY-DEBUG: print("DEBUG: json[]:", type(json))
196     if json is None or len(json) == 0:
197         print("WARNING: Failed fetching nodeinfo from domain:", domain)
198
199     # NOISY-DEBUG: print("DEBUG: Returning json[]:", type(json))
200     return json
201
202 def fetch_wellknown_nodeinfo(domain: str) -> list:
203     # NOISY-DEBUG: print("DEBUG: Fetching .well-known info for domain:", domain)
204     json = {}
205
206     try:
207         res = reqto.get(f"https://{domain}/.well-known/nodeinfo", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
208         # NOISY-DEBUG: print("DEBUG: domain,res.ok:", domain, res.ok)
209         if res.ok and res.json() is not None:
210             nodeinfo = res.json()
211             # NOISY-DEBUG: print("DEBUG: Found entries:", len(nodeinfo), domain)
212             if "links" in nodeinfo:
213                 # NOISY-DEBUG: print("DEBUG: Found links in nodeinfo():", len(nodeinfo["links"]))
214                 for link in nodeinfo["links"]:
215                     # NOISY-DEBUG: print("DEBUG: rel,href:", link["rel"], link["href"])
216                     if link["rel"] in nodeinfos:
217                         # NOISY-DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"])
218                         res = reqto.get(link["href"])
219                         # NOISY-DEBUG: print("DEBUG: href,res.ok,res.status_code:", link["href"], res.ok, res.status_code)
220                         if res.ok and res.json() is not None:
221                             # NOISY-DEBUG: print("DEBUG: Found JSON nodeinfo():", len(res.json()))
222                             json = res.json()
223                             break
224                     else:
225                         print("WARNING: Unknown 'rel' value:", domain, link["rel"])
226             else:
227                 print("WARNING: nodeinfo does not contain 'links':", domain)
228
229     except BaseException as e:
230         print("WARNING: Failed fetching .well-known info:", domain)
231         update_last_error(domain, e)
232         pass
233
234     # NOISY-DEBUG: print("DEBUG: Returning json[]:", type(json))
235     return json
236
237 def determine_software(domain: str) -> str:
238     # NOISY-DEBUG: print("DEBUG: Determining software for domain:", domain)
239     software = None
240
241     json = fetch_nodeinfo(domain)
242     # NOISY-DEBUG: print("DEBUG: json[]:", type(json))
243
244     if json is None or len(json) == 0:
245         # NOISY-DEBUG: print("DEBUG: Could not determine software type:", domain)
246         return None
247
248     # NOISY-DEBUG: print("DEBUG: json():", len(json), json)
249     if "software" not in json or "name" not in json["software"]:
250         print("WARNING: JSON response does not include [software][name], guessing ...")
251         found = 0
252         for element in {"uri", "title", "description", "email", "version", "urls", "stats", "thumbnail", "languages", "contact_account"}:
253             if element in json:
254                 found = found + 1
255
256         # NOISY-DEBUG: print("DEBUG: Found elements:", found)
257         if found == len(json):
258             # NOISY-DEBUG: print("DEBUG: Maybe is Mastodon:", domain)
259             return "mastodon"
260
261         print("WARNING: Cannot guess software type:", domain, found, len(json))
262         return None
263
264     software = tidyup(json["software"]["name"])
265
266     # NOISY-DEBUG: print("DEBUG: tidyup software:", software)
267     if software in ["akkoma", "rebased"]:
268         # NOISY-DEBUG: print("DEBUG: Setting pleroma:", domain, software)
269         software = "pleroma"
270     elif software in ["hometown", "ecko"]:
271         # NOISY-DEBUG: print("DEBUG: Setting mastodon:", domain, software)
272         software = "mastodon"
273     elif software in ["calckey", "groundpolis", "foundkey", "cherrypick", "meisskey"]:
274         # NOISY-DEBUG: print("DEBUG: Setting misskey:", domain, software)
275         software = "misskey"
276     elif software.find("/") > 0:
277         print("WARNING: Spliting of path:", software)
278         software = software.split("/")[-1];
279     elif software.find("|") > 0:
280         print("WARNING: Spliting of path:", software)
281         software = software.split("|")[0].strip();
282
283     if software == "":
284         print("WARNING: tidyup() left no software name behind:", domain)
285         software = None
286
287     # NOISY-DEBUG: print("DEBUG: Returning domain,software:", domain, software)
288     return software
289
290 def update_block_reason(reason: str, blocker: str, blocked: str, block_level: str):
291     # NOISY: # NOISY-DEBUG: print("DEBUG: Updating block reason:", reason, blocker, blocked, block_level)
292     try:
293         cursor.execute(
294             "UPDATE blocks SET reason = ?, last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ? AND reason = ''",
295             (
296                 reason,
297                 time.time(),
298                 blocker,
299                 blocked,
300                 block_level
301             ),
302         )
303
304         if cursor.rowcount == 0:
305             print("WARNING: Did not update any rows:", domain)
306
307     except:
308         print("ERROR: failed SQL query:", reason, blocker, blocked, block_level)
309         sys.exit(255)
310
311 def update_last_seen(blocker: str, blocked: str, block_level: str):
312     # NOISY: # NOISY-DEBUG: print("DEBUG: Updating last_seen for:", blocker, blocked, block_level)
313     try:
314         cursor.execute(
315             "UPDATE blocks SET last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ?",
316             (
317                 time.time(),
318                 blocker,
319                 blocked,
320                 block_level
321             )
322         )
323
324         if cursor.rowcount == 0:
325             print("WARNING: Did not update any rows:", domain)
326
327     except:
328         print("ERROR: failed SQL query:", last_seen, blocker, blocked, block_level)
329         sys.exit(255)
330
331 def block_instance(blocker: str, blocked: str, reason: str, block_level: str):
332     # NOISY-DEBUG: print("DEBUG: blocker,blocked,reason,block_level:", blocker, blocked, reason, block_level)
333     if blocker.find("@") > 0:
334         print("WARNING: Bad blocker:", blocker)
335         raise
336     elif blocked.find("@") > 0:
337         print("WARNING: Bad blocked:", blocked)
338         raise
339
340     print("INFO: New block:", blocker, blocked, reason, block_level, first_added, last_seen)
341     try:
342         cursor.execute(
343             "INSERT INTO blocks (blocker, blocked, reason, block_level, first_added, last_seen) VALUES(?, ?, ?, ?, ?, ?)",
344              (
345                  blocker,
346                  blocked,
347                  reason,
348                  block_level,
349                  time.time(),
350                  time.time()
351              ),
352         )
353
354     except:
355         print("ERROR: failed SQL query:", blocker, blocked, reason, block_level, first_added, last_seen)
356         sys.exit(255)
357
358 def add_instance(domain: str, origin: str, originator: str):
359     # NOISY-DEBUG: print("DEBUG: domain,origin:", domain, origin, originator)
360     if domain.find("@") > 0:
361         print("WARNING: Bad domain name:", domain)
362         raise
363     elif origin is not None and origin.find("@") > 0:
364         print("WARNING: Bad origin name:", origin)
365         raise
366
367     software = determine_software(domain)
368     # NOISY-DEBUG: print("DEBUG: Determined software:", software)
369
370     print(f"INFO: Adding new instance {domain} (origin: {origin})")
371     try:
372         cursor.execute(
373             "INSERT INTO instances (domain, origin, originator, hash, software, first_seen) VALUES (?, ?, ?, ?, ?, ?)",
374             (
375                domain,
376                origin,
377                originator,
378                get_hash(domain),
379                software,
380                time.time()
381             ),
382         )
383
384         if domain in pending_errors:
385             # NOISY-DEBUG: print("DEBUG: domain has pending error be updated:", domain)
386             update_last_error(domain, pending_errors[domain])
387             del pending_errors[domain]
388
389     except BaseException as e:
390         print("ERROR: failed SQL query:", domain, e)
391         sys.exit(255)
392     else:
393         # NOISY-DEBUG: print("DEBUG: Updating nodeinfo for domain:", domain)
394         update_last_nodeinfo(domain)
395
396 def send_bot_post(instance: str, blocks: dict):
397     message = instance + " has blocked the following instances:\n\n"
398     truncated = False
399
400     if len(blocks) > 20:
401         truncated = True
402         blocks = blocks[0 : 19]
403
404     for block in blocks:
405         if block["reason"] == None or block["reason"] == '':
406             message = message + block["blocked"] + " with unspecified reason\n"
407         else:
408             if len(block["reason"]) > 420:
409                 block["reason"] = block["reason"][0:419] + "[…]"
410
411             message = message + block["blocked"] + ' for "' + block["reason"].replace("@", "@\u200b") + '"\n'
412
413     if truncated:
414         message = message + "(the list has been truncated to the first 20 entries)"
415
416     botheaders = {**headers, **{"Authorization": "Bearer " + config["bot_token"]}}
417
418     req = reqto.post(f"{config['bot_instance']}/api/v1/statuses",
419         data={"status":message, "visibility":config['bot_visibility'], "content_type":"text/plain"},
420         headers=botheaders, timeout=10).json()
421
422     return True
423
424 def get_mastodon_blocks(domain: str) -> dict:
425     # NOISY-DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain)
426     blocks = {
427         "Suspended servers": [],
428         "Filtered media"   : [],
429         "Limited servers"  : [],
430         "Silenced servers" : [],
431     }
432
433     translations = {
434         "Silenced instances"            : "Silenced servers",
435         "Suspended instances"           : "Suspended servers",
436         "Gesperrte Server"              : "Suspended servers",
437         "Gefilterte Medien"             : "Filtered media",
438         "Stummgeschaltete Server"       : "Silenced servers",
439         "停止済みのサーバー"            : "Suspended servers",
440         "制限中のサーバー"              : "Limited servers",
441         "メディアを拒否しているサーバー": "Filtered media",
442         "サイレンス済みのサーバー"      : "Silenced servers",
443         "שרתים מושעים"                  : "Suspended servers",
444         "מדיה מסוננת"                   : "Filtered media",
445         "שרתים מוגבלים"                 : "Silenced servers",
446         "Serveurs suspendus"            : "Suspended servers",
447         "Médias filtrés"                : "Filtered media",
448         "Serveurs limités"              : "Silenced servers",
449     }
450
451     try:
452         doc = BeautifulSoup(
453             reqto.get(f"https://{domain}/about/more", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])).text,
454             "html.parser",
455         )
456     except:
457         print("ERROR: Cannot fetch from domain:", domain)
458         return {}
459
460     for header in doc.find_all("h3"):
461         header_text = header.text
462
463         if header_text in translations:
464             header_text = translations[header_text]
465
466         if header_text in blocks or header_text.lower() in blocks:
467             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
468             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
469                 blocks[header_text].append(
470                     {
471                         "domain": tidyup(line.find("span").text),
472                         "hash"  : tidyup(line.find("span")["title"][9:]),
473                         "reason": tidyup(line.find_all("td")[1].text),
474                     }
475                 )
476
477     # NOISY-DEBUG: print("DEBUG: Returning blocks for domain:", domain)
478     return {
479         "reject"        : blocks["Suspended servers"],
480         "media_removal" : blocks["Filtered media"],
481         "followers_only": blocks["Limited servers"] + blocks["Silenced servers"],
482     }
483
484 def get_friendica_blocks(domain: str) -> dict:
485     # NOISY-DEBUG: print("DEBUG: Fetching friendica blocks from domain:", domain)
486     blocks = []
487
488     try:
489         doc = BeautifulSoup(
490             reqto.get(f"https://{domain}/friendica", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])).text,
491             "html.parser",
492         )
493     except:
494         print("WARNING: Failed to fetch /friendica from domain:", domain)
495         return {}
496
497     blocklist = doc.find(id="about_blocklist")
498
499     # Prevents exceptions:
500     if blocklist is None:
501         # NOISY-DEBUG: print("DEBUG: Instance has no block list:", domain)
502         return {}
503
504     for line in blocklist.find("table").find_all("tr")[1:]:
505         blocks.append({
506             "domain": tidyup(line.find_all("td")[0].text),
507             "reason": tidyup(line.find_all("td")[1].text)
508         })
509
510     # NOISY-DEBUG: print("DEBUG: Returning blocks() for domain:", domain, len(blocks))
511     return {
512         "reject": blocks
513     }
514
515 def get_misskey_blocks(domain: str) -> dict:
516     # NOISY-DEBUG: print("DEBUG: Fetching misskey blocks from domain:", domain)
517     blocks = {
518         "suspended": [],
519         "blocked"  : []
520     }
521
522     try:
523         counter = 0
524         step = 99
525         while True:
526             # iterating through all "suspended" (follow-only in its terminology)
527             # instances page-by-page, since that troonware doesn't support
528             # sending them all at once
529             try:
530                 if counter == 0:
531                     # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
532                     doc = post_json_api(domain, "/api/federation/instances/", json.dumps({
533                         "sort"     : "+caughtAt",
534                         "host"     : None,
535                         "suspended": True,
536                         "limit"    : step
537                     }))
538                 else:
539                     # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
540                     doc = post_json_api(domain, "/api/federation/instances/", json.dumps({
541                         "sort"     : "+caughtAt",
542                         "host"     : None,
543                         "suspended": True,
544                         "limit"    : step,
545                         "offset"   : counter-1
546                     }))
547
548                 # NOISY-DEBUG: print("DEBUG: doc():", len(doc))
549                 if len(doc) == 0:
550                     # NOISY-DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain)
551                     break
552
553                 for instance in doc:
554                     # just in case
555                     if instance["isSuspended"]:
556                         blocks["suspended"].append(
557                             {
558                                 "domain": tidyup(instance["host"]),
559                                 # no reason field, nothing
560                                 "reason": ""
561                             }
562                         )
563
564                 if len(doc) < step:
565                     # NOISY-DEBUG: print("DEBUG: End of request:", len(doc), step)
566                     break
567
568                 # NOISY-DEBUG: print("DEBUG: Raising counter by step:", step)
569                 counter = counter + step
570
571             except:
572                 print("WARNING: Caught error, exiting loop:", domain)
573                 counter = 0
574                 break
575
576         while True:
577             # same shit, different asshole ("blocked" aka full suspend)
578             try:
579                 if counter == 0:
580                     # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
581                     doc = post_json_api(domain,"/api/federation/instances", json.dumps({
582                         "sort"   : "+caughtAt",
583                         "host"   : None,
584                         "blocked": True,
585                         "limit"  : step
586                     }))
587                 else:
588                     # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
589                     doc = post_json_api(domain,"/api/federation/instances", json.dumps({
590                         "sort"   : "+caughtAt",
591                         "host"   : None,
592                         "blocked": True,
593                         "limit"  : step,
594                         "offset" : counter-1
595                     }))
596
597                 # NOISY-DEBUG: print("DEBUG: doc():", len(doc))
598                 if len(doc) == 0:
599                     # NOISY-DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain)
600                     break
601
602                 for instance in doc:
603                     if instance["isBlocked"]:
604                         blocks["blocked"].append({
605                             "domain": tidyup(instance["host"]),
606                             "reason": ""
607                         })
608
609                 if len(doc) < step:
610                     # NOISY-DEBUG: print("DEBUG: End of request:", len(doc), step)
611                     break
612
613                 # NOISY-DEBUG: print("DEBUG: Raising counter by step:", step)
614                 counter = counter + step
615
616             except:
617                 counter = 0
618                 break
619
620         # NOISY-DEBUG: print("DEBUG: Returning for domain,blocked(),suspended():", domain, len(blocks["blocked"]), len(blocks["suspended"]))
621         return {
622             "reject"        : blocks["blocked"],
623             "followers_only": blocks["suspended"]
624         }
625
626     except:
627         print("WARNING: API request failed for domain:", domain)
628         return {}
629
630 def tidyup(string: str) -> str:
631     # some retards put their blocks in variable case
632     string = string.lower().strip()
633
634     # other retards put the port
635     string = re.sub("\:\d+$", "", string)
636
637     # bigger retards put the schema in their blocklist, sometimes even without slashes
638     string = re.sub("^https?\:(\/*)", "", string)
639
640     # and trailing slash
641     string = re.sub("\/$", "", string)
642
643     # and the @
644     string = re.sub("^\@", "", string)
645
646     # the biggest retards of them all try to block individual users
647     string = re.sub("(.+)\@", "", string)
648
649     return string