]> git.mxchange.org Git - fba.git/blob - fba/networks/pleroma.py
f132ba348c98842f2a97d0d6221646168a01c390
[fba.git] / fba / networks / pleroma.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import logging
18
19 import bs4
20
21 from fba import database
22 from fba import utils
23
24 from fba.helpers import config
25 from fba.helpers import domain as domain_helper
26 from fba.helpers import tidyup
27
28 from fba.http import federation
29 from fba.http import network
30
31 from fba.models import instances
32
33 logging.basicConfig(level=logging.INFO)
34 logger = logging.getLogger(__name__)
35
36 # Language mapping X -> English
37 language_mapping = {
38     # English -> English
39     "Reject": "Suspended servers",
40 }
41
42 def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
43     logger.debug("domain='%s',nodeinfo_url='%s' - CALLED!", domain, nodeinfo_url)
44     domain_helper.raise_on(domain)
45
46     if not isinstance(nodeinfo_url, str):
47         raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
48     elif nodeinfo_url == "":
49         raise ValueError("Parameter 'nodeinfo_url' is empty")
50
51     blockdict = list()
52     rows = None
53     try:
54         logger.debug("Fetching nodeinfo: domain='%s',nodeinfo_url='%s'", domain, nodeinfo_url)
55         rows = federation.fetch_nodeinfo(domain, nodeinfo_url)
56     except network.exceptions as exception:
57         logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain)
58         instances.set_last_error(domain, exception)
59
60     if rows is None:
61         logger.warning("Could not fetch nodeinfo from domain='%s'", domain)
62         return list()
63     elif "metadata" not in rows:
64         logger.warning("rows()=%d does not have key 'metadata', domain='%s'", len(rows), domain)
65         return list()
66     elif "federation" not in rows["metadata"]:
67         logger.warning("rows()=%d does not have key 'federation', domain='%s'", len(rows['metadata']), domain)
68         return list()
69
70     data = rows["metadata"]["federation"]
71     found = False
72
73     logger.debug("data[]='%s'", type(data))
74     if "mrf_simple" in data:
75         logger.debug("Found mrf_simple in API response from domain='%s'", domain)
76         found = True
77         for block_level, blocklist in (
78             {
79                 **data["mrf_simple"],
80                 **{
81                     "quarantined_instances": data["quarantined_instances"]
82                 }
83             }
84         ).items():
85             logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
86             block_level = tidyup.domain(block_level)
87             logger.debug("block_level='%s' - AFTER!", block_level)
88
89             if block_level == "":
90                 logger.warning("block_level is now empty!")
91                 continue
92             elif block_level == "accept":
93                 logger.debug("domain='%s' skipping block_level='accept'", domain)
94                 continue
95             elif block_level == "suspended":
96                 logger.debug("domain='%s', mapping 'suspended' to 'suspend'", domain)
97                 block_level = "suspend"
98             elif block_level == "silenced":
99                 logger.debug("domain='%s', mapping 'silenced' to 'silence'", domain)
100                 block_level = "silence"
101
102             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level)
103             if len(blocklist) > 0:
104                 for blocked in blocklist:
105                     logger.debug("blocked='%s' - BEFORE!", blocked)
106                     blocked = tidyup.domain(blocked)
107                     logger.debug("blocked='%s' - AFTER!", blocked)
108
109                     if blocked == "":
110                         logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
111                         continue
112                     elif blocked.endswith(".arpa"):
113                         logger.debug("blocked='%s' is a reverse IP address - SKIPPED!", blocked)
114                         continue
115                     elif blocked.endswith(".tld"):
116                         logger.debug("blocked='%s' is a fake domain - SKIPPED!", blocked)
117                         continue
118                     elif blocked.count("*") > 0:
119                         logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
120                         instances.set_has_obfuscation(domain, True)
121
122                         # Obscured domain name with no hash
123                         row = instances.deobfuscate("*", blocked)
124
125                         logger.debug("row[]='%s'", type(row))
126                         if row is None:
127                             logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
128                             continue
129
130                         logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
131                         blocked = row[0]
132                     elif blocked.count("?") > 0:
133                         logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
134                         instances.set_has_obfuscation(domain, True)
135
136                         # Obscured domain name with no hash
137                         row = instances.deobfuscate("?", blocked)
138
139                         logger.debug("row[]='%s'", type(row))
140                         if row is None:
141                             logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
142                             continue
143
144                         logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
145                         blocked = row[0]
146
147                     logger.debug("blocked='%s'", blocked)
148                     if not utils.is_domain_wanted(blocked):
149                         logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
150                         continue
151
152                     logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
153                     blockdict.append({
154                         "blocker"    : domain,
155                         "blocked"    : blocked,
156                         "reason"     : None,
157                         "block_level": block_level,
158                     })
159
160     elif "quarantined_instances" in data:
161         logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain)
162         found = True
163         block_level = "quarantined"
164
165         for blocked in data["quarantined_instances"]:
166             logger.debug("blocked='%s' - BEFORE!", blocked)
167             blocked = tidyup.domain(blocked)
168             logger.debug("blocked='%s' - AFTER!", blocked)
169
170             if blocked == "":
171                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
172                 continue
173             elif blocked.endswith(".arpa"):
174                 logger.debug("blocked='%s' is a reverse IP address - SKIPPED!", blocked)
175                 continue
176             elif blocked.endswith(".tld"):
177                 logger.debug("blocked='%s' is a fake domain - SKIPPED!", blocked)
178                 continue
179             elif blocked.count("*") > 0:
180                 logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
181                 instances.set_has_obfuscation(domain, True)
182
183                 # Obscured domain name with no hash
184                 row = instances.deobfuscate("*", blocked)
185
186                 logger.debug("row[]='%s'", type(row))
187                 if row is None:
188                     logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
189                     continue
190
191                 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
192                 blocked = row[0]
193             elif blocked.count("?") > 0:
194                 logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
195                 instances.set_has_obfuscation(domain, True)
196
197                 # Obscured domain name with no hash
198                 row = instances.deobfuscate("?", blocked)
199
200                 logger.debug("row[]='%s'", type(row))
201                 if row is None:
202                     logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
203                     continue
204
205                 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
206                 blocked = row[0]
207
208             logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
209             if not utils.is_domain_wanted(blocked):
210                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
211                 continue
212
213             logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
214             blockdict.append({
215                 "blocker"    : domain,
216                 "blocked"    : blocked,
217                 "reason"     : None,
218                 "block_level": block_level,
219             })
220
221     else:
222         logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain)
223
224     logger.debug("Invoking commit() ...")
225     database.connection.commit()
226
227     # Reasons
228     if "mrf_simple_info" in data:
229         logger.debug("Found mrf_simple_info in API response: domain='%s'", domain)
230         found = True
231         for block_level, info in (
232             {
233                 **data["mrf_simple_info"],
234                 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
235             }
236         ).items():
237             logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
238             block_level = tidyup.domain(block_level)
239             logger.debug("block_level='%s' - AFTER!", block_level)
240
241             if block_level == "":
242                 logger.warning("block_level is now empty!")
243                 continue
244             elif block_level == "accept":
245                 logger.debug("domain='%s' skipping block_level='accept'", domain)
246                 continue
247             elif block_level == "suspended":
248                 logger.debug("domain='%s', mapping 'suspended' to 'suspend'", domain)
249                 block_level = "suspend"
250             elif block_level == "silenced":
251                 logger.debug("domain='%s', mapping 'silenced' to 'silence'", domain)
252                 block_level = "silence"
253
254             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
255             for blocked, reason in info.items():
256                 logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
257                 blocked = tidyup.domain(blocked)
258                 logger.debug("blocked='%s' - AFTER!", blocked)
259
260                 if isinstance(reason, str):
261                     logger.debug("reason[] is a string")
262                     reason = tidyup.reason(reason)
263                 elif isinstance(reason, dict) and "reason" in reason:
264                     logger.debug("reason[] is a dict")
265                     reason = tidyup.reason(reason["reason"])
266                 elif reason is not None:
267                     raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
268
269                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
270
271                 if blocked == "":
272                     logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
273                     continue
274                 elif blocked.count("*") > 0:
275                     logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
276                     instances.set_has_obfuscation(domain, True)
277
278                     # Obscured domain name with no hash
279                     row = instances.deobfuscate("*", blocked)
280
281                     logger.debug("row[]='%s'", type(row))
282                     if row is None:
283                         logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
284                         continue
285
286                     logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
287                     blocked = row[0]
288                 elif blocked.count("?") > 0:
289                     logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
290                     instances.set_has_obfuscation(domain, True)
291
292                     # Obscured domain name with no hash
293                     row = instances.deobfuscate("?", blocked)
294
295                     logger.debug("row[]='%s'", type(row))
296                     if row is None:
297                         logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
298                         continue
299
300                     logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
301                     blocked = row[0]
302
303                 logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
304                 if not utils.is_domain_wanted(blocked):
305                     logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
306                     continue
307
308                 logger.debug("Checking %d blockdict records ...", len(blockdict))
309                 for block in blockdict:
310                     logger.debug("block[blocked]='%s',blocked='%s'", block['blocked'], blocked)
311                     if block['blocked'] == blocked:
312                         logger.debug("Updating reason='%s' for blocker='%s'", reason, block['blocked'])
313                         block['reason'] = reason
314
315     elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
316         logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain)
317         found = True
318         block_level = "quarantined"
319
320         #print(data["quarantined_instances_info"])
321         rows = data["quarantined_instances_info"]["quarantined_instances"]
322         for blocked in rows:
323             logger.debug("blocked='%s' - BEFORE!", blocked)
324             blocked = tidyup.domain(blocked)
325             logger.debug("blocked='%s' - AFTER!", blocked)
326
327             if blocked not in rows or "reason" not in rows[blocked]:
328                 logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
329                 break
330
331             reason = rows[blocked]["reason"]
332             logger.debug("reason='%s'", reason)
333
334             if blocked == "":
335                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
336                 continue
337             elif blocked.count("*") > 0:
338                 logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
339                 instances.set_has_obfuscation(domain, True)
340
341                 # Obscured domain name with no hash
342                 row = instances.deobfuscate("*", blocked)
343
344                 logger.debug("row[]='%s'", type(row))
345                 if row is None:
346                     logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
347                     continue
348
349                 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
350                 blocked = row[0]
351             elif blocked.count("?") > 0:
352                 logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
353                 instances.set_has_obfuscation(domain, True)
354
355                 # Obscured domain name with no hash
356                 row = instances.deobfuscate("?", blocked)
357
358                 logger.debug("row[]='%s'", type(row))
359                 if row is None:
360                     logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
361                     continue
362
363                 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
364                 blocked = row[0]
365
366             logger.debug("blocked='%s'", blocked)
367             if not utils.is_domain_wanted(blocked):
368                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
369                 continue
370
371             logger.debug("Checking %d blockdict records ...", len(blockdict))
372             for block in blockdict:
373                 logger.debug("block[blocked]='%s',blocked='%s'", block['blocked'], blocked)
374                 if block['blocked'] == blocked:
375                     logger.debug("Updating reason='%s' for blocker='%s'", reason, block['blocked'])
376                     block['reason'] = reason
377     else:
378         logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
379
380     if not found:
381         logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
382         blocklist = fetch_blocks_from_about(domain)
383
384         logger.debug("blocklist()=%d", len(blocklist))
385         if len(blocklist) > 0:
386             logger.info("Checking %d record(s) ...", len(blocklist))
387             for block_level in blocklist:
388                 logger.debug("block_level='%s'", block_level)
389
390                 rows = blocklist[block_level]
391                 logger.debug("rows[%s]()=%d'", type(rows), len(rows))
392                 for record in rows:
393                     logger.debug("record[]='%s'", type(record))
394                     blocked = tidyup.domain(record["blocked"])
395                     reason  = tidyup.reason(record["reason"])
396                     logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
397
398                     if blocked == "":
399                         logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
400                         continue
401                     elif blocked.count("*") > 0:
402                         logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
403                         instances.set_has_obfuscation(domain, True)
404
405                         # Obscured domain name with no hash
406                         row = instances.deobfuscate("*", blocked)
407
408                         logger.debug("row[]='%s'", type(row))
409                         if row is None:
410                             logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
411                             continue
412
413                         logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
414                         blocked = row[0]
415                     elif blocked.count("?") > 0:
416                         logger.debug("domain='%s' uses obfuscated domains, marking ...", domain)
417                         instances.set_has_obfuscation(domain, True)
418
419                         # Obscured domain name with no hash
420                         row = instances.deobfuscate("?", blocked)
421
422                         logger.debug("row[]='%s'", type(row))
423                         if row is None:
424                             logger.warning("Cannot deobfuscate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
425                             continue
426
427                         logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
428                         blocked = row[0]
429
430                     logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
431                     if not utils.is_domain_wanted(blocked):
432                         logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
433                         continue
434
435                     logger.debug("Appending blocker='%s',blocked='%s',reason='%s',block_level='%s' ...",domain, blocked, reason, block_level)
436                     blockdict.append({
437                         "blocker"    : domain,
438                         "blocked"    : blocked,
439                         "reason"     : reason,
440                         "block_level": block_level,
441                     })
442
443     logger.debug("blockdict()=%d - EXIT!", len(blockdict))
444     return blockdict
445
446 def fetch_blocks_from_about(domain: str) -> dict:
447     logger.debug("domain='%s' - CALLED!", domain)
448     domain_helper.raise_on(domain)
449
450     logger.debug("Fetching mastodon blocks from domain='%s'", domain)
451     doc = None
452     for path in ["/instance/about/index.html"]:
453         try:
454             # Resetting doc type
455             doc = None
456
457             logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
458             response = network.fetch_response(
459                 domain,
460                 path,
461                 network.web_headers,
462                 (config.get("connection_timeout"), config.get("read_timeout"))
463             )
464
465             logger.debug("response.ok='%s',response.status_code='%d',response.text()=%d", response.ok, response.status_code, len(response.text))
466             if not response.ok or response.text.strip() == "":
467                 logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
468                 continue
469
470             logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
471             doc = bs4.BeautifulSoup(
472                 response.text,
473                 "html.parser",
474             )
475
476             logger.debug("doc[]='%s'", type(doc))
477             if doc.find("h2") is not None:
478                 logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
479                 break
480
481         except network.exceptions as exception:
482             logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
483             instances.set_last_error(domain, exception)
484             break
485
486     blocklist = {
487         "Suspended servers": [],
488         "Filtered media"   : [],
489         "Limited servers"  : [],
490         "Silenced servers" : [],
491     }
492
493     logger.debug("doc[]='%s'", type(doc))
494     if doc is None:
495         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
496         return list()
497
498     for header in doc.find_all("h2"):
499         header_text = tidyup.reason(header.text)
500
501         logger.debug("header_text='%s' - BEFORE!", header_text)
502         if header_text in language_mapping:
503             logger.debug("header_text='%s' - FOUND!", header_text)
504             header_text = language_mapping[header_text]
505         else:
506             logger.warning("header_text='%s' not found in language mapping table", header_text)
507
508         logger.debug("header_text='%s - AFTER!'", header_text)
509         if header_text in blocklist or header_text.lower() in blocklist:
510             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
511             logger.debug("Found header_text='%s', importing domain blocks ...", header_text)
512             for line in header.find_next("table").find_all("tr")[1:]:
513                 logger.debug("line[]='%s'", type(line))
514                 blocklist[header_text].append({
515                     "blocked": tidyup.domain(line.find_all("td")[0].text),
516                     "reason" : tidyup.reason(line.find_all("td")[1].text),
517                 })
518         else:
519             logger.warning("header_text='%s' not found in blocklist()=%d", header_text, len(blocklist))
520
521     logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)
522     return {
523         "reject"        : blocklist["Suspended servers"],
524         "media_removal" : blocklist["Filtered media"],
525         "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
526     }