]> git.mxchange.org Git - fba.git/blob - fba.py
6d20d6e775aedf2b7c482da78f3ced1c1c9c70cf
[fba.git] / fba.py
1 from bs4 import BeautifulSoup
2 from hashlib import sha256
3
4 import reqto
5 import re
6 import sqlite3
7 import json
8 import sys
9 import time
10
11 with open("config.json") as f:
12     config = json.loads(f.read())
13
14 blacklist = [
15     "activitypub-troll.cf",
16     "gab.best",
17     "4chan.icu",
18     "social.shrimpcam.pw",
19     "mastotroll.netz.org",
20     "ngrok.io",
21 ]
22
23 headers = {
24     "user-agent": config["useragent"]
25 }
26
27 connection = sqlite3.connect("blocks.db")
28 cursor = connection.cursor()
29
30 def is_blacklisted(domain: str) -> bool:
31     blacklisted = False
32     for peer in blacklist:
33         if peer in domain:
34             blacklisted = True
35
36     return blacklisted
37
38 def get_hash(domain: str) -> str:
39     return sha256(domain.encode("utf-8")).hexdigest()
40
41 def update_last_blocked(domain: str):
42     # NOISY-DEBUG: print("DEBUG: Updating last_blocked for domain", domain)
43     try:
44         cursor.execute("UPDATE instances SET last_blocked = ?, last_updated = ? WHERE domain = ?", [
45             time.time(),
46             time.time(),
47             domain
48         ])
49
50     except:
51         print("ERROR: failed SQL query:", domain)
52         sys.exit(255)
53
54 def update_last_error(domain: str, res: any):
55     # NOISY-DEBUG: print("DEBUG: domain,res.status_code:", domain, res.status_code, res.reason)
56     try:
57         cursor.execute("UPDATE instances SET last_status_code = ?, last_error_details = ?, last_updated = ? WHERE domain = ?", [
58             res.status_code,
59             res.reason,
60             time.time(),
61             domain
62         ])
63
64     except:
65         print("ERROR: failed SQL query:", domain)
66         sys.exit(255)
67
68 def update_last_nodeinfo(domain: str):
69     # NOISY-DEBUG: print("DEBUG: Updating last_nodeinfo for domain:", domain)
70     try:
71         cursor.execute("UPDATE instances SET last_nodeinfo = ?, last_updated = ? WHERE domain = ?", [
72             time.time(),
73             time.time(),
74             domain
75         ])
76
77     except:
78         print("ERROR: failed SQL query:", domain)
79         sys.exit(255)
80
81     connection.commit()
82
83 def get_peers(domain: str) -> list:
84     # NOISY-DEBUG: print("DEBUG: Getting peers for domain:", domain)
85     peers = None
86
87     try:
88         res = reqto.get(f"https://{domain}/api/v1/instance/peers", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
89
90         if not res.ok or res.status_code >= 400:
91             print("WARNING: Cannot fetch peers:", domain)
92             update_last_error(domain, res)
93         else:
94             # NOISY-DEBUG: print("DEBUG: Querying API was successful:", domain, len(res.json()))
95             peers = res.json()
96
97     except:
98         print("WARNING: Some error during get():", domain)
99
100     update_last_nodeinfo(domain)
101
102     # NOISY-DEBUG: print("DEBUG: Returning peers[]:", type(peers))
103     return peers
104
105 def post_json_api(domain: str, path: str, data: str) -> list:
106     # NOISY-DEBUG: print("DEBUG: Sending POST to domain,path,data:", domain, path, data)
107     json = {}
108     try:
109         res = reqto.post(f"https://{domain}{path}", data=data, headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
110
111         if not res.ok or res.status_code >= 400:
112             print("WARNING: Cannot query JSON API:", domain, path, data, res.status_code)
113             update_last_error(domain, res)
114             raise
115
116         update_last_nodeinfo(domain)
117         json = res.json()
118     except:
119         print("WARNING: Some error during post():", domain, path, data)
120
121     # NOISY-DEBUG: print("DEBUG: Returning json():", len(json))
122     return json
123
124 def fetch_nodeinfo(domain: str) -> list:
125     # NOISY-DEBUG: print("DEBUG: Fetching nodeinfo from domain:", domain)
126
127     requests = [
128        f"https://{domain}/nodeinfo/2.1.json",
129        f"https://{domain}/nodeinfo/2.1",
130        f"https://{domain}/nodeinfo/2.0.json",
131        f"https://{domain}/nodeinfo/2.0",
132        f"https://{domain}/nodeinfo/1.0",
133        f"https://{domain}/api/v1/instance"
134     ]
135
136     json = {}
137     for request in requests:
138         try:
139             # NOISY-DEBUG: print("DEBUG: Fetching request:", request)
140             res = reqto.get(request, headers=headers, timeout=(config["connection_timeout"], config["read_timeout"]))
141
142             # NOISY-DEBUG: print("DEBUG: res.ok,res.json[]:", res.ok, type(res.json()))
143             if res.ok and res.json() is not None:
144                 # NOISY-DEBUG: print("DEBUG: Success:", request)
145                 json = res.json()
146                 break
147             elif not res.ok or res.status_code >= 400:
148                 # NOISY-DEBUG: print("DEBUG: Failed fetching nodeinfo from domain:", domain)
149                 update_last_error(domain, res)
150                 continue
151
152         except:
153             # NOISY-DEBUG: print("DEBUG: Cannot fetch API request:", request)
154             pass
155
156     if json is None:
157         print("WARNING: Failed fetching nodeinfo from domain:", domain)
158
159     # NOISY-DEBUG: print("DEBUG: Returning json[]:", type(json))
160     return json
161
162 def determine_software(domain: str) -> str:
163     # NOISY-DEBUG: print("DEBUG: Determining software for domain:", domain)
164     software = None
165
166     json = fetch_nodeinfo(domain)
167     if len(json) == 0:
168         print("DEBUG: Could not determine software type:", domain)
169         return None
170
171     # NOISY-DEBUG: print("DEBUG: json():", len(json))
172     software = tidyup(json["software"]["name"])
173
174     # NOISY-DEBUG: print("DEBUG: tidyup software:", software)
175     if software in ["akkoma", "rebased"]:
176         # NOISY-DEBUG: print("DEBUG: Setting pleroma:", domain, software)
177         software = "pleroma"
178     elif software in ["hometown", "ecko"]:
179         # NOISY-DEBUG: print("DEBUG: Setting mastodon:", domain, software)
180         software = "mastodon"
181     elif software in ["calckey", "groundpolis", "foundkey", "cherrypick"]:
182         # NOISY-DEBUG: print("DEBUG: Setting misskey:", domain, software)
183         software = "misskey"
184     elif software.find("/") > 0:
185         print("WARNING: Spliting of path:", software)
186         software = software.split("/")[-1];
187
188     if software == "":
189         print("WARNING: tidyup() left no software name behind:", domain)
190         software = None
191
192     # NOISY-DEBUG: print("DEBUG: Returning domain,software:", domain, software)
193     return software
194
195 def update_block_reason(reason: str, blocker: str, blocked: str, block_level: str):
196     # NOISY: print("--- Updating block reason:", reason, blocker, blocked, block_level)
197     try:
198         cursor.execute(
199             "UPDATE blocks SET reason = ?, last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ? AND reason = ''",
200             (
201                 reason,
202                 time.time(),
203                 blocker,
204                 blocked,
205                 block_level
206             ),
207         )
208
209     except:
210         print("ERROR: failed SQL query:", reason, blocker, blocked, block_level)
211         sys.exit(255)
212
213 def update_last_seen(blocker: str, blocked: str, block_level: str):
214     # NOISY: print("--- Updating last_seen for:", blocker, blocked, block_level)
215     try:
216         cursor.execute(
217             "UPDATE blocks SET last_seen = ? WHERE blocker = ? AND blocked = ? AND block_level = ?",
218             (
219                 time.time(),
220                 blocker,
221                 blocked,
222                 block_level
223             )
224         )
225
226     except:
227         print("ERROR: failed SQL query:", last_seen, blocker, blocked, block_level)
228         sys.exit(255)
229
230 def block_instance(blocker: str, blocked: str, reason: str, block_level: str):
231     # NOISY-DEBUG: print("DEBUG: blocker,blocked,reason,block_level:", blocker, blocked, reason, block_level)
232     if blocker.find("@") > 0:
233         print("WARNING: Bad blocker:", blocker)
234         raise
235     elif blocked.find("@") > 0:
236         print("WARNING: Bad blocked:", blocked)
237         raise
238
239     print("--- New block:", blocker, blocked, reason, block_level, first_added, last_seen)
240     try:
241         cursor.execute(
242             "INSERT INTO blocks (blocker, blocked, reason, block_level, first_added, last_seen) VALUES(?, ?, ?, ?, ?, ?)",
243              (
244                  blocker,
245                  blocked,
246                  reason,
247                  block_level,
248                  time.time(),
249                  time.time()
250              ),
251         )
252
253     except:
254         print("ERROR: failed SQL query:", blocker, blocked, reason, block_level, first_added, last_seen)
255         sys.exit(255)
256
257 def add_instance(domain: str, origin: str, originator: str):
258     # NOISY-DEBUG: print("DEBUG: domain,origin:", domain, origin, originator)
259     if domain.find("@") > 0:
260         print("WARNING: Bad domain name:", domain)
261         raise
262     elif origin is not None and origin.find("@") > 0:
263         print("WARNING: Bad origin name:", origin)
264         raise
265
266     software = determine_software(domain)
267     # NOISY-DEBUG: print("DEBUG: Determined software:", software)
268
269     print(f"--- Adding new instance {domain} (origin: {origin})")
270     try:
271         cursor.execute(
272             "INSERT INTO instances (domain, origin, originator, hash, software, first_seen) VALUES (?, ?, ?, ?, ?, ?)",
273             (
274                domain,
275                origin,
276                originator,
277                get_hash(domain),
278                software,
279                time.time()
280             ),
281         )
282
283     except:
284         print("ERROR: failed SQL query:", domain)
285         sys.exit(255)
286     else:
287         # NOISY-DEBUG: print("DEBUG: Updating nodeinfo for domain:", domain)
288         update_last_nodeinfo(domain)
289
290 def send_bot_post(instance: str, blocks: dict):
291     message = instance + " has blocked the following instances:\n\n"
292     truncated = False
293
294     if len(blocks) > 20:
295         truncated = True
296         blocks = blocks[0 : 19]
297
298     for block in blocks:
299         if block["reason"] == None or block["reason"] == '':
300             message = message + block["blocked"] + " with unspecified reason\n"
301         else:
302             if len(block["reason"]) > 420:
303                 block["reason"] = block["reason"][0:419] + "[…]"
304
305             message = message + block["blocked"] + ' for "' + block["reason"].replace("@", "@\u200b") + '"\n'
306
307     if truncated:
308         message = message + "(the list has been truncated to the first 20 entries)"
309
310     botheaders = {**headers, **{"Authorization": "Bearer " + config["bot_token"]}}
311
312     req = reqto.post(f"{config['bot_instance']}/api/v1/statuses",
313         data={"status":message, "visibility":config['bot_visibility'], "content_type":"text/plain"},
314         headers=botheaders, timeout=10).json()
315
316     return True
317
318 def get_mastodon_blocks(domain: str) -> dict:
319     # NOISY-DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain)
320     blocks = {
321         "Suspended servers": [],
322         "Filtered media"   : [],
323         "Limited servers"  : [],
324         "Silenced servers" : [],
325     }
326
327     translations = {
328         "Silenced instances": "Silenced servers",
329         "Suspended instances": "Suspended servers",
330         "Gesperrte Server": "Suspended servers",
331         "Gefilterte Medien": "Filtered media",
332         "Stummgeschaltete Server": "Silenced servers",
333         "停止済みのサーバー": "Suspended servers",
334         "メディアを拒否しているサーバー": "Filtered media",
335         "サイレンス済みのサーバー": "Silenced servers",
336         "שרתים מושעים": "Suspended servers",
337         "מדיה מסוננת": "Filtered media",
338         "שרתים מוגבלים": "Silenced servers",
339         "Serveurs suspendus": "Suspended servers",
340         "Médias filtrés": "Filtered media",
341         "Serveurs limités": "Silenced servers",
342     }
343
344     try:
345         doc = BeautifulSoup(
346             reqto.get(f"https://{domain}/about/more", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])).text,
347             "html.parser",
348         )
349     except:
350         print("ERROR: Cannot fetch from domain:", domain)
351         return {}
352
353     for header in doc.find_all("h3"):
354         header_text = header.text
355
356         if header_text in translations:
357             header_text = translations[header_text]
358
359         if header_text in blocks:
360             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
361             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
362                 blocks[header_text].append(
363                     {
364                         "domain": tidyup(line.find("span").text),
365                         "hash"  : tidyup(line.find("span")["title"][9:]),
366                         "reason": tidyup(line.find_all("td")[1].text),
367                     }
368                 )
369
370     # NOISY-DEBUG: print("DEBUG: Returning blocks for domain:", domain)
371     return {
372         "reject"        : blocks["Suspended servers"],
373         "media_removal" : blocks["Filtered media"],
374         "followers_only": blocks["Limited servers"] + blocks["Silenced servers"],
375     }
376
377 def get_friendica_blocks(domain: str) -> dict:
378     # NOISY-DEBUG: print("DEBUG: Fetching friendica blocks from domain:", domain)
379     blocks = []
380
381     try:
382         doc = BeautifulSoup(
383             reqto.get(f"https://{domain}/friendica", headers=headers, timeout=(config["connection_timeout"], config["read_timeout"])).text,
384             "html.parser",
385         )
386     except:
387         print("WARNING: Failed to fetch /friendica from domain:", domain)
388         return {}
389
390     blocklist = doc.find(id="about_blocklist")
391
392     # Prevents exceptions:
393     if blocklist is None:
394         # NOISY-DEBUG: print("DEBUG: Instance has no block list:", domain)
395         return {}
396
397     for line in blocklist.find("table").find_all("tr")[1:]:
398         blocks.append({
399             "domain": tidyup(line.find_all("td")[0].text),
400             "reason": tidyup(line.find_all("td")[1].text)
401         })
402
403     # NOISY-DEBUG: print("DEBUG: Returning blocks() for domain:", domain, len(blocks))
404     return {
405         "reject": blocks
406     }
407
408 def get_misskey_blocks(domain: str) -> dict:
409     # NOISY-DEBUG: print("DEBUG: Fetching misskey blocks from domain:", domain)
410     blocks = {
411         "suspended": [],
412         "blocked": []
413     }
414
415     try:
416         counter = 0
417         step = 99
418         while True:
419             # iterating through all "suspended" (follow-only in its terminology)
420             # instances page-by-page, since that troonware doesn't support
421             # sending them all at once
422             try:
423                 if counter == 0:
424                     # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
425                     doc = post_json_api(domain, "/api/federation/instances/", json.dumps({
426                         "sort"     : "+caughtAt",
427                         "host"     : None,
428                         "suspended": True,
429                         "limit"    : step
430                     }))
431                 else:
432                     # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
433                     doc = post_json_api(domain, "/api/federation/instances/", json.dumps({
434                         "sort"     : "+caughtAt",
435                         "host"     : None,
436                         "suspended": True,
437                         "limit"    : step,
438                         "offset"   : counter-1
439                     }))
440
441                 # NOISY-DEBUG: print("DEBUG: doc():", len(doc))
442                 if len(doc) == 0:
443                     # NOISY-DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain)
444                     break
445
446                 for instance in doc:
447                     # just in case
448                     if instance["isSuspended"]:
449                         blocks["suspended"].append(
450                             {
451                                 "domain": tidyup(instance["host"]),
452                                 # no reason field, nothing
453                                 "reason": ""
454                             }
455                         )
456
457                 if len(doc) < step:
458                     # NOISY-DEBUG: print("DEBUG: End of request:", len(doc), step)
459                     break
460
461                 # NOISY-DEBUG: print("DEBUG: Raising counter by step:", step)
462                 counter = counter + step
463
464             except:
465                 print("WARNING: Caught error, exiting loop:", domain)
466                 counter = 0
467                 break
468
469         while True:
470             # same shit, different asshole ("blocked" aka full suspend)
471             try:
472                 if counter == 0:
473                     # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
474                     doc = post_json_api(domain,"/api/federation/instances", json.dumps({
475                         "sort"   : "+caughtAt",
476                         "host"   : None,
477                         "blocked": True,
478                         "limit"  : step
479                     }))
480                 else:
481                     # NOISY-DEBUG: print("DEBUG: Sending JSON API request to domain,step,counter:", domain, step, counter)
482                     doc = post_json_api(domain,"/api/federation/instances", json.dumps({
483                         "sort"   : "+caughtAt",
484                         "host"   : None,
485                         "blocked": True,
486                         "limit"  : step,
487                         "offset" : counter-1
488                     }))
489
490                 # NOISY-DEBUG: print("DEBUG: doc():", len(doc))
491                 if len(doc) == 0:
492                     # NOISY-DEBUG: print("DEBUG: Returned zero bytes, exiting loop:", domain)
493                     break
494
495                 for instance in doc:
496                     if instance["isBlocked"]:
497                         blocks["blocked"].append({
498                             "domain": tidyup(instance["host"]),
499                             "reason": ""
500                         })
501
502                 if len(doc) < step:
503                     # NOISY-DEBUG: print("DEBUG: End of request:", len(doc), step)
504                     break
505
506                 # NOISY-DEBUG: print("DEBUG: Raising counter by step:", step)
507                 counter = counter + step
508
509             except:
510                 counter = 0
511                 break
512
513         # NOISY-DEBUG: print("DEBUG: Returning for domain,blocked(),suspended():", domain, len(blocks["blocked"]), len(blocks["suspended"]))
514         return {
515             "reject"        : blocks["blocked"],
516             "followers_only": blocks["suspended"]
517         }
518
519     except:
520         print("WARNING: API request failed for domain:", domain)
521         return {}
522
523 def tidyup(string: str) -> str:
524     # some retards put their blocks in variable case
525     string = string.lower().strip()
526
527     # other retards put the port
528     string = re.sub("\:\d+$", "", string)
529
530     # bigger retards put the schema in their blocklist, sometimes even without slashes
531     string = re.sub("^https?\:(\/*)", "", string)
532
533     # and trailing slash
534     string = re.sub("\/$", "", string)
535
536     # and the @
537     string = re.sub("^\@", "", string)
538
539     # the biggest retards of them all try to block individual users
540     string = re.sub("(.+)\@", "", string)
541
542     return string