]> git.mxchange.org Git - fba.git/blob - fba/networks/pleroma.py
cd3c2fc7c3c03827bf450a22aecc3a8c57927520
[fba.git] / fba / networks / pleroma.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import inspect
18 import logging
19
20 import bs4
21
22 from fba import database
23 from fba import utils
24
25 from fba.helpers import blacklist
26 from fba.helpers import config
27 from fba.helpers import domain as domain_helper
28 from fba.helpers import tidyup
29
30 from fba.http import federation
31 from fba.http import network
32
33 from fba.models import blocks
34 from fba.models import instances
35
36 logging.basicConfig(level=logging.INFO)
37 logger = logging.getLogger(__name__)
38
39 # Language mapping X -> English
40 language_mapping = {
41     # English -> English
42     "Reject": "Suspended servers",
43 }
44
45 def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
46     logger.debug("domain='%s',nodeinfo_url='%s' - CALLED!", domain, nodeinfo_url)
47     domain_helper.raise_on(domain)
48
49     if not isinstance(nodeinfo_url, str):
50         raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
51     elif nodeinfo_url == "":
52         raise ValueError("Parameter 'nodeinfo_url' is empty")
53
54     blockdict = list()
55     rows = None
56     try:
57         logger.debug(f"Fetching nodeinfo: domain='{domain}',nodeinfo_url='{nodeinfo_url}'")
58         rows = federation.fetch_nodeinfo(domain, nodeinfo_url)
59     except network.exceptions as exception:
60         logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain)
61         instances.set_last_error(domain, exception)
62
63     if rows is None:
64         logger.warning("Could not fetch nodeinfo from domain='%s'", domain)
65         return list()
66     elif "metadata" not in rows:
67         logger.warning("rows()=%d does not have key 'metadata', domain='%s'", len(rows), domain)
68         return list()
69     elif "federation" not in rows["metadata"]:
70         logger.warning("rows()=%d does not have key 'federation', domain='%s'", len(rows['metadata']), domain)
71         return list()
72
73     data = rows["metadata"]["federation"]
74     found = False
75
76     logger.debug("data[]='%s'", type(data))
77     if "mrf_simple" in data:
78         logger.debug("Found mrf_simple in API response from domain='%s'", domain)
79         found = True
80         for block_level, blocklist in (
81             {
82                 **data["mrf_simple"],
83                 **{
84                     "quarantined_instances": data["quarantined_instances"]
85                 }
86             }
87         ).items():
88             logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
89             block_level = tidyup.domain(block_level)
90             logger.debug("block_level='%s' - AFTER!", block_level)
91
92             if block_level == "":
93                 logger.warning("block_level is now empty!")
94                 continue
95             elif block_level == "accept":
96                 logger.debug("domain='%s' skipping block_level='accept'", domain)
97                 continue
98             elif block_level == "suspended":
99                 logger.debug("domain='%s', mapping 'suspended' to 'suspend'", domain)
100                 block_level = "suspend"
101             elif block_level == "silenced":
102                 logger.debug("domain='%s', mapping 'silenced' to 'silence'", domain)
103                 block_level = "silence"
104
105             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level)
106             if len(blocklist) > 0:
107                 for blocked in blocklist:
108                     logger.debug("blocked='%s' - BEFORE!", blocked)
109                     blocked = tidyup.domain(blocked)
110                     logger.debug("blocked='%s' - AFTER!", blocked)
111
112                     if blocked == "":
113                         logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
114                         continue
115                     elif blocked.endswith(".arpa"):
116                         logger.debug("blocked='%s' is a reverse IP address - SKIPPED!", blocked)
117                         continue
118                     elif blocked.endswith(".tld"):
119                         logger.debug("blocked='%s' is a fake domain - SKIPPED!", blocked)
120                         continue
121                     elif blocked.count("*") > 0:
122                         logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
123                         instances.set_has_obfuscation(domain, True)
124
125                         # Obscured domain name with no hash
126                         row = instances.deobfuscate("*", blocked)
127
128                         logger.debug("row[]='%s'", type(row))
129                         if row is None:
130                             logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
131                             continue
132
133                         logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
134                         blocked = row[0]
135                     elif blocked.count("?") > 0:
136                         logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
137                         instances.set_has_obfuscation(domain, True)
138
139                         # Obscured domain name with no hash
140                         row = instances.deobfuscate("?", blocked)
141
142                         logger.debug("row[]='%s'", type(row))
143                         if row is None:
144                             logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
145                             continue
146
147                         logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
148                         blocked = row[0]
149
150                     logger.debug("blocked='%s'", blocked)
151                     if not utils.is_domain_wanted(blocked):
152                         logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
153                         continue
154
155                     logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
156                     blockdict.append({
157                         "blocker"    : domain,
158                         "blocked"    : blocked,
159                         "reason"     : None,
160                         "block_level": block_level,
161                     })
162
163     elif "quarantined_instances" in data:
164         logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain)
165         found = True
166         block_level = "quarantined"
167
168         for blocked in data["quarantined_instances"]:
169             logger.debug("blocked='%s' - BEFORE!", blocked)
170             blocked = tidyup.domain(blocked)
171             logger.debug("blocked='%s' - AFTER!", blocked)
172
173             if blocked == "":
174                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
175                 continue
176             elif blocked.endswith(".arpa"):
177                 logger.debug("blocked='%s' is a reverse IP address - SKIPPED!", blocked)
178                 continue
179             elif blocked.endswith(".tld"):
180                 logger.debug("blocked='%s' is a fake domain - SKIPPED!", blocked)
181                 continue
182             elif blocked.count("*") > 0:
183                 logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
184                 instances.set_has_obfuscation(domain, True)
185
186                 # Obscured domain name with no hash
187                 row = instances.deobfuscate("*", blocked)
188
189                 logger.debug("row[]='%s'", type(row))
190                 if row is None:
191                     logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
192                     continue
193
194                 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
195                 blocked = row[0]
196             elif blocked.count("?") > 0:
197                 logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
198                 instances.set_has_obfuscation(domain, True)
199
200                 # Obscured domain name with no hash
201                 row = instances.deobfuscate("?", blocked)
202
203                 logger.debug("row[]='%s'", type(row))
204                 if row is None:
205                     logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
206                     continue
207
208                 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
209                 blocked = row[0]
210
211             logger.debug("blocked='%s' - DEobfuscatED!", blocked)
212             if not utils.is_domain_wanted(blocked):
213                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
214                 continue
215
216             logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
217             blockdict.append({
218                 "blocker"    : domain,
219                 "blocked"    : blocked,
220                 "reason"     : None,
221                 "block_level": block_level,
222             })
223
224     else:
225         logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain)
226
227     logger.debug("Invoking commit() ...")
228     database.connection.commit()
229
230     # Reasons
231     if "mrf_simple_info" in data:
232         logger.debug("Found mrf_simple_info in API response: domain='%s'", domain)
233         found = True
234         for block_level, info in (
235             {
236                 **data["mrf_simple_info"],
237                 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
238             }
239         ).items():
240             logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
241             block_level = tidyup.domain(block_level)
242             logger.debug("block_level='%s' - AFTER!", block_level)
243
244             if block_level == "":
245                 logger.warning("block_level is now empty!")
246                 continue
247             elif block_level == "accept":
248                 logger.debug("domain='%s' skipping block_level='accept'", domain)
249                 continue
250             elif block_level == "suspended":
251                 logger.debug("domain='%s', mapping 'suspended' to 'suspend'", domain)
252                 block_level = "suspend"
253             elif block_level == "silenced":
254                 logger.debug("domain='%s', mapping 'silenced' to 'silence'", domain)
255                 block_level = "silence"
256
257             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
258             for blocked, reason in info.items():
259                 logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
260                 blocked = tidyup.domain(blocked)
261                 logger.debug("blocked='%s' - AFTER!", blocked)
262
263                 if isinstance(reason, str):
264                     logger.debug("reason[] is a string")
265                     reason = tidyup.reason(reason)
266                 elif isinstance(reason, dict) and "reason" in reason:
267                     logger.debug("reason[] is a dict")
268                     reason = tidyup.reason(reason["reason"])
269                 elif reason is not None:
270                     raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
271
272                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
273
274                 if blocked == "":
275                     logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
276                     continue
277                 elif blocked.count("*") > 0:
278                     logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
279                     instances.set_has_obfuscation(domain, True)
280
281                     # Obscured domain name with no hash
282                     row = instances.deobfuscate("*", blocked)
283
284                     logger.debug("row[]='%s'", type(row))
285                     if row is None:
286                         logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
287                         continue
288
289                     logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
290                     blocked = row[0]
291                 elif blocked.count("?") > 0:
292                     logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
293                     instances.set_has_obfuscation(domain, True)
294
295                     # Obscured domain name with no hash
296                     row = instances.deobfuscate("?", blocked)
297
298                     logger.debug("row[]='%s'", type(row))
299                     if row is None:
300                         logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
301                         continue
302
303                     logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
304                     blocked = row[0]
305
306                 logger.debug("blocked='%s' - DEobfuscatED!", blocked)
307                 if not utils.is_domain_wanted(blocked):
308                     logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
309                     continue
310
311                 logger.debug("Checking %d blockdict records ...", len(blockdict))
312                 for block in blockdict:
313                     logger.debug("block[blocked]='%s',blocked='%s'", block['blocked'], blocked)
314                     if block['blocked'] == blocked:
315                         logger.debug("Updating reason='%s' for blocker='%s'", reason, block['blocked'])
316                         block['reason'] = reason
317
318     elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
319         logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain)
320         found = True
321         block_level = "quarantined"
322
323         #print(data["quarantined_instances_info"])
324         rows = data["quarantined_instances_info"]["quarantined_instances"]
325         for blocked in rows:
326             logger.debug("blocked='%s' - BEFORE!", blocked)
327             blocked = tidyup.domain(blocked)
328             logger.debug("blocked='%s' - AFTER!", blocked)
329
330             if blocked not in rows or "reason" not in rows[blocked]:
331                 logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
332                 break
333
334             reason = rows[blocked]["reason"]
335             logger.debug("reason='%s'", reason)
336
337             if blocked == "":
338                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
339                 continue
340             elif blocked.count("*") > 0:
341                 logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
342                 instances.set_has_obfuscation(domain, True)
343
344                 # Obscured domain name with no hash
345                 row = instances.deobfuscate("*", blocked)
346
347                 logger.debug("row[]='%s'", type(row))
348                 if row is None:
349                     logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
350                     continue
351
352                 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
353                 blocked = row[0]
354             elif blocked.count("?") > 0:
355                 logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
356                 instances.set_has_obfuscation(domain, True)
357
358                 # Obscured domain name with no hash
359                 row = instances.deobfuscate("?", blocked)
360
361                 logger.debug("row[]='%s'", type(row))
362                 if row is None:
363                     logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
364                     continue
365
366                 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
367                 blocked = row[0]
368
369             logger.debug("blocked='%s'", blocked)
370             if not utils.is_domain_wanted(blocked):
371                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
372                 continue
373
374             logger.debug("Checking %d blockdict records ...", len(blockdict))
375             for block in blockdict:
376                 logger.debug("block[blocked]='%s',blocked='%s'", block['blocked'], blocked)
377                 if block['blocked'] == blocked:
378                     logger.debug("Updating reason='%s' for blocker='%s'", reason, block['blocked'])
379                     block['reason'] = reason
380     else:
381         logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
382
383     if not found:
384         logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
385         blocklist = fetch_blocks_from_about(domain)
386
387         logger.debug("blocklist()=%d", len(blocklist))
388         if len(blocklist) > 0:
389             logger.info("Checking %d record(s) ...", len(blocklist))
390             for block_level in blocklist:
391                 logger.debug("block_level='%s'", block_level)
392
393                 rows = blocklist[block_level]
394                 logger.debug("rows[%s]()=%d'", type(rows), len(rows))
395                 for record in rows:
396                     logger.debug("record[]='%s'", type(record))
397                     blocked = tidyup.domain(record["blocked"])
398                     reason  = tidyup.reason(record["reason"])
399                     logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
400
401                     if blocked == "":
402                         logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
403                         continue
404                     elif blocked.count("*") > 0:
405                         logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
406                         instances.set_has_obfuscation(domain, True)
407
408                         # Obscured domain name with no hash
409                         row = instances.deobfuscate("*", blocked)
410
411                         logger.debug("row[]='%s'", type(row))
412                         if row is None:
413                             logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
414                             continue
415
416                         logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
417                         blocked = row[0]
418                     elif blocked.count("?") > 0:
419                         logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
420                         instances.set_has_obfuscation(domain, True)
421
422                         # Obscured domain name with no hash
423                         row = instances.deobfuscate("?", blocked)
424
425                         logger.debug("row[]='%s'", type(row))
426                         if row is None:
427                             logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
428                             continue
429
430                         logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
431                         blocked = row[0]
432
433                     logger.debug("blocked='%s' - DEobfuscatED!", blocked)
434                     if not utils.is_domain_wanted(blocked):
435                         logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
436                         continue
437
438                     logger.debug("Appending blocker='%s',blocked='%s',reason='%s',block_level='%s' ...",domain, blocked, reason, block_level)
439                     blockdict.append({
440                         "blocker"    : domain,
441                         "blocked"    : blocked,
442                         "reason"     : reason,
443                         "block_level": block_level,
444                     })
445
446     logger.debug("blockdict()=%d - EXIT!", len(blockdict))
447     return blockdict
448
449 def fetch_blocks_from_about(domain: str) -> dict:
450     logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
451     domain_helper.raise_on(domain)
452
453     logger.debug("Fetching mastodon blocks from domain='%s'", domain)
454     doc = None
455     for path in ["/instance/about/index.html"]:
456         try:
457             # Resetting doc type
458             doc = None
459
460             logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
461             response = network.fetch_response(
462                 domain,
463                 path,
464                 network.web_headers,
465                 (config.get("connection_timeout"), config.get("read_timeout"))
466             )
467
468             logger.debug("response.ok='%s',response.status_code='%d',response.text()=%d", response.ok, response.status_code, len(response.text))
469             if not response.ok or response.text.strip() == "":
470                 logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
471                 continue
472
473             logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
474             doc = bs4.BeautifulSoup(
475                 response.text,
476                 "html.parser",
477             )
478
479             logger.debug("doc[]='%s'", type(doc))
480             if doc.find("h2") is not None:
481                 logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
482                 break
483
484         except network.exceptions as exception:
485             logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
486             instances.set_last_error(domain, exception)
487             break
488
489     blocklist = {
490         "Suspended servers": [],
491         "Filtered media"   : [],
492         "Limited servers"  : [],
493         "Silenced servers" : [],
494     }
495
496     logger.debug("doc[]='%s'", type(doc))
497     if doc is None:
498         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
499         return list()
500
501     for header in doc.find_all("h2"):
502         header_text = tidyup.reason(header.text)
503
504         logger.debug("header_text='%s' - BEFORE!", header_text)
505         if header_text in language_mapping:
506             logger.debug("header_text='%s' - FOUND!", header_text)
507             header_text = language_mapping[header_text]
508         else:
509             logger.warning("header_text='%s' not found in language mapping table", header_text)
510
511         logger.debug("header_text='%s - AFTER!'", header_text)
512         if header_text in blocklist or header_text.lower() in blocklist:
513             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
514             logger.debug("Found header_text='%s', importing domain blocks ...", header_text)
515             for line in header.find_next("table").find_all("tr")[1:]:
516                 logger.debug("line[]='%s'", type(line))
517                 blocklist[header_text].append({
518                     "blocked": tidyup.domain(line.find_all("td")[0].text),
519                     "reason" : tidyup.reason(line.find_all("td")[1].text),
520                 })
521         else:
522             logger.warning("header_text='%s' not found in blocklist()=%d", header_text, len(blocklist))
523
524     logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)
525     return {
526         "reject"        : blocklist["Suspended servers"],
527         "media_removal" : blocklist["Filtered media"],
528         "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
529     }