]> git.mxchange.org Git - fba.git/blob - fetch_blocks.py
Continued:
[fba.git] / fetch_blocks.py
1 import reqto
2 import time
3 import bs4
4 import fba
5 import itertools
6 import re
7
8 fba.c.execute(
9     "SELECT domain, software FROM instances WHERE software IN ('pleroma', 'mastodon', 'friendica', 'misskey', 'gotosocial')"
10 )
11
12 for blocker, software in fba.c.fetchall():
13     print("DEBUG: blocker,software:", blocker, software)
14     blockdict = []
15     blocker = fba.tidyup(blocker)
16     if software == "pleroma":
17         print("INFO: blocker:", blocker)
18         try:
19             # Blocks
20             federation = reqto.get(
21                 f"https://{blocker}/nodeinfo/2.1.json", headers=fba.headers, timeout=5
22             ).json()["metadata"]["federation"]
23             if "mrf_simple" in federation:
24                 for block_level, blocks in (
25                     {**federation["mrf_simple"],
26                     **{"quarantined_instances": federation["quarantined_instances"]}}
27                 ).items():
28                     for blocked in blocks:
29                         print("DEBUG: BEFORE blocked:", blocked)
30                         blocked = fba.tidyup(blocked)
31                         print("DEBUG: AFTER blocked:", blocked)
32
33                         if blocked == "":
34                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
35                             continue
36
37                         if blocked.count("*") > 1:
38                             # -ACK!-oma also started obscuring domains without hash
39                             fba.c.execute(
40                                 "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
41                             )
42                             searchres = fba.c.fetchone()
43                             print("DEBUG: searchres[]:", type(searchres))
44                             if searchres != None:
45                                 blocked = searchres[0]
46
47                         print("DEBUG: Looking up instance by domain:", blocked)
48                         fba.c.execute(
49                             "SELECT domain FROM instances WHERE domain = ?", (blocked,)
50                         )
51
52                         if fba.c.fetchone() == None:
53                             print("DEBUG: Domain wasn't found, adding:", blocked)
54                             fba.add_instance(blocked)
55
56                         timestamp = int(time.time())
57                         fba.c.execute(
58                             "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
59                             (
60                                blocker,
61                                blocked,
62                                block_level
63                            ),
64                         )
65
66                         if fba.c.fetchone() == None:
67                             fba.block_instance(blocker, blocked, "unknown", block_level, timestamp, timestamp)
68
69                             if block_level == "reject":
70                                 blockdict.append(
71                                     {
72                                         "blocked": blocked,
73                                         "reason": None
74                                     })
75                         else:
76                             fba.update_last_seen(timestamp, blocker, blocked, block_level)
77
78             fba.conn.commit()
79
80             # Reasons
81             if "mrf_simple_info" in federation:
82                 print("DEBUG: Found mrf_simple_info:", blocker)
83                 for block_level, info in (
84                     {**federation["mrf_simple_info"],
85                     **(federation["quarantined_instances_info"]
86                     if "quarantined_instances_info" in federation
87                     else {})}
88                 ).items():
89                     print("DEBUG: block_level, info.items():", block_level, len(info.items()))
90                     for blocked, reason in info.items():
91                         print("DEBUG: BEFORE blocked:", blocked)
92                         blocked = fba.tidyup(blocked)
93                         print("DEBUG: AFTER blocked:", blocked)
94
95                         if blocked == "":
96                             print("WARNING: blocked is empty after fba.tidyup():", blocker, block_level)
97                             continue
98
99                         if blocked.count("*") > 1:
100                             # same domain guess as above, but for reasons field
101                             fba.c.execute(
102                                 "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
103                             )
104                             searchres = fba.c.fetchone()
105
106                             if searchres != None:
107                                 blocked = searchres[0]
108
109                         print("DEBUG: Updating block reason:", blocker, blocked, reason["reason"])
110                         fba.update_block_reason(reason["reason"], blocker, blocked, block_level)
111
112                         for entry in blockdict:
113                             if entry["blocked"] == blocked:
114                                 print("DEBUG: Updating entry reason:", blocked)
115                                 entry["reason"] = reason["reason"]
116
117             fba.conn.commit()
118         except Exception as e:
119             print("error:", e, blocker, software)
120     elif software == "mastodon":
121         print("INFO: blocker:", blocker)
122         try:
123             # json endpoint for newer mastodongs
124             try:
125                 json = {
126                     "reject": [],
127                     "media_removal": [],
128                     "followers_only": [],
129                     "report_removal": []
130                 }
131
132                 # handling CSRF, I've saw at least one server requiring it to access the endpoint
133                 print("DEBUG: Fetching meta:", blocker)
134                 meta = bs4.BeautifulSoup(
135                     reqto.get(f"https://{blocker}/about", headers=fba.headers, timeout=5).text,
136                     "html.parser",
137                 )
138                 try:
139                     csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
140                     print("DEBUG: Adding CSRF token:", blocker, csrf)
141                     reqheaders = {**fba.headers, **{"x-csrf-token": csrf}}
142                 except:
143                     print("DEBUG: No CSRF token found, using normal headers:", blocker)
144                     reqheaders = fba.headers
145
146                 print("DEBUG: Quering API domain_blocks:", blocker)
147                 blocks = reqto.get(
148                     f"https://{blocker}/api/v1/instance/domain_blocks", headers=reqheaders, timeout=5
149                 ).json()
150
151                 print("DEBUG: blocks():", len(blocks))
152                 for block in blocks:
153                     entry = {'domain': block['domain'], 'hash': block['digest'], 'reason': block['comment']}
154
155                     print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
156                     if block['severity'] == 'suspend':
157                         json['reject'].append(entry)
158                     elif block['severity'] == 'silence':
159                         json['followers_only'].append(entry)
160                     elif block['severity'] == 'reject_media':
161                         json['media_removal'].append(entry)
162                     elif block['severity'] == 'reject_reports':
163                         json['report_removal'].append(entry)
164                     else:
165                         print("WARNING: Unknown severity:", block['severity'], block['domain'])
166             except:
167                 print("DEBUG: Failed, Trying mastodon-specific fetches:", blocker)
168                 json = fba.get_mastodon_blocks(blocker)
169
170             print("DEBUG: json.items():", blocker, len(json.items()))
171             for block_level, blocks in json.items():
172                 print("DEBUG: blocker,block_level,blocks():", blocker, block_level, len(blocks))
173                 for instance in blocks:
174                     blocked, blocked_hash, reason = instance.values()
175                     print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason)
176
177                     blocked = fba.tidyup(blocked)
178                     print("DEBUG: blocked:", blocked)
179
180                     if blocked.count("*") < 1:
181                         # No obsfucation for this instance
182                         fba.c.execute(
183                             "SELECT hash FROM instances WHERE domain = ? LIMIT 1", (blocked,)
184                         )
185
186                         if fba.c.fetchone() == None:
187                             print("DEBUG: Hash wasn't found, adding:", blocked)
188                             fba.add_instance(blocked)
189                     else:
190                         # Doing the hash search for instance names as well to tidy up DB
191                         fba.c.execute(
192                             "SELECT domain FROM instances WHERE hash = ? LIMIT 1", (blocked_hash,)
193                         )
194                         searchres = fba.c.fetchone()
195
196                         if searchres != None:
197                             print("DEBUG: Updating domain: ", searchres[0])
198                             blocked = searchres[0]
199
200                     timestamp = int(time.time())
201                     fba.c.execute(
202                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
203                         (
204                             blocker,
205                             blocked if blocked.count("*") <= 1 else blocked_hash,
206                             block_level
207                         ),
208                     )
209
210                     if fba.c.fetchone() == None:
211                         fba.block_instance(blocker, blocked if blocked.count("*") <= 1 else blocked_hash, reason, block_level, timestamp, timestamp)
212
213                         if block_level == "reject":
214                             blockdict.append(
215                                 {
216                                     "blocked": blocked,
217                                     "reason": reason
218                                 })
219                     else:
220                         fba.update_last_seen(timestamp, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
221
222                     if reason != '':
223                         print("DEBUG: Updating block reason:", blocker, blocked, reason)
224                         fba.update_block_reason(reason, blocker, blocked if blocked.count("*") <= 1 else blocked_hash, block_level)
225
226             fba.conn.commit()
227         except Exception as e:
228             print("error:", e, blocker, software)
229     elif software == "friendica" or software == "misskey":
230         print("INFO: blocker:", blocker)
231         try:
232             if software == "friendica":
233                 json = fba.get_friendica_blocks(blocker)
234             elif software == "misskey":
235                 json = fba.get_misskey_blocks(blocker)
236             for block_level, blocks in json.items():
237                 for instance in blocks:
238                     blocked, reason = instance.values()
239                     blocked = fba.tidyup(blocked)
240
241                     print("BEFORE-blocked:", blocked)
242                     if blocked.count("*") > 0:
243                         # Some friendica servers also obscure domains without hash
244                         fba.c.execute(
245                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
246                         )
247                         searchres = fba.c.fetchone()
248                         if searchres != None:
249                             blocked = searchres[0]
250
251                     if blocked.count("?") > 0:
252                         # Some obscure them with question marks, not sure if that's dependent on version or not
253                         fba.c.execute(
254                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("?", "_"),)
255                         )
256                         searchres = fba.c.fetchone()
257                         if searchres != None:
258                             blocked = searchres[0]
259
260                     print("AFTER-blocked:", blocked)
261                     fba.c.execute(
262                         "SELECT domain FROM instances WHERE domain = ?", (blocked,)
263                     )
264
265                     if fba.c.fetchone() == None:
266                         print("DEBUG: Hash wasn't found, adding:", blocked)
267                         fba.add_instance(blocked)
268
269                     timestamp = int(time.time())
270                     fba.c.execute(
271                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ?",
272                         (blocker, blocked),
273                     )
274                     if fba.c.fetchone() == None:
275                         fba.block_instance(blocker, blocked, reason, block_level, timestamp, timestamp)
276
277                         if block_level == "reject":
278                             blockdict.append(
279                                 {
280                                     "blocked": blocked,
281                                     "reason": reason
282                                 })
283                     else:
284                         fba.update_last_seen(timestamp, blocker, blocked, block_level)
285
286                     if reason != '':
287                         print("DEBUG: Updating block reason:", blocker, blocked, reason)
288                         fba.update_block_reason(reason, blocker, blocked, block_level)
289
290             fba.conn.commit()
291         except Exception as e:
292             print("error:", e, blocker, software)
293     elif software == "gotosocial":
294         print("INFO: blocker:", blocker)
295         try:
296             # Blocks
297             federation = reqto.get(
298                 f"https://{blocker}/api/v1/instance/peers?filter=suspended", headers=fba.headers, timeout=5
299             ).json()
300
301             if (federation == None):
302                 print("WARNING: No valid response:", blocker);
303             else:
304                 for peer in federation:
305                     print("DEBUG: peer(),[]:", len(peer), type(peer))
306                     if (isinstance(peer, str) and peer == "error"):
307                         print("WARNING: Cannot continue, maybe authentication required?", blocker)
308                         break
309
310                     blocked = peer["domain"].lower()
311                     print("DEBUG: blocked:", blocked)
312
313                     if blocked.count("*") > 0:
314                         # GTS does not have hashes for obscured domains, so we have to guess it
315                         fba.c.execute(
316                             "SELECT domain FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", (blocked.replace("*", "_"),)
317                         )
318                         searchres = fba.c.fetchone()
319
320                         if searchres != None:
321                             blocked = searchres[0]
322
323                     fba.c.execute(
324                         "SELECT domain FROM instances WHERE domain = ?", (blocked,)
325                     )
326
327                     if fba.c.fetchone() == None:
328                         print("DEBUG: Hash wasn't found, adding:", blocked)
329                         fba.add_instance(blocked)
330
331                     fba.c.execute(
332                         "SELECT * FROM blocks WHERE blocker = ? AND blocked = ? AND block_level = ?",
333                         (
334                             blocker,
335                             blocked,
336                             "reject"
337                         ),
338                     )
339                     timestamp = int(time.time())
340
341                     if fba.c.fetchone() == None:
342                         fba.block_instance(blocker, blocked, "", "reject", timestamp, timestamp)
343
344                         blockdict.append(
345                             {
346                                 "blocked": blocked,
347                                 "reason": None
348                             })
349                     else:
350                         fba.update_last_seen(timestamp, blocker, blocked, "reject")
351
352                     if "public_comment" in peer:
353                         reason = peer["public_comment"]
354                         print("DEBUG: Updating block reason:", blocker, blocked, reason)
355                         fba.update_block_reason(reason, blocker, blocked, "reject")
356
357                         for entry in blockdict:
358                             if entry["blocked"] == blocked:
359                                 entry["reason"] = reason
360
361                 fba.conn.commit()
362         except Exception as e:
363             print("error:", e, blocker, software)
364     else:
365         print("WARNING: Unknown software:", software)
366
367     if fba.config["bot_enabled"] and len(blockdict) > 0:
368         send_bot_post(blocker, blockdict)
369
370     blockdict = []
371
372 fba.conn.close()