]> git.mxchange.org Git - fba.git/blob - fba/networks/pleroma.py
f82575e20903741507c11530ddde4b2455da57a7
[fba.git] / fba / networks / pleroma.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import inspect
18
19 import bs4
20 import validators
21
22 from fba import fba
23 from fba import federation
24 from fba import network
25
26 from fba.helpers import blacklist
27 from fba.helpers import config
28 from fba.helpers import tidyup
29
30 from fba.models import blocks
31 from fba.models import instances
32
33 language_mapping = {
34     # English -> English
35     "Reject": "Suspended servers",
36 }
37
38 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
39     # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
40     if not isinstance(domain, str):
41         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
42     elif domain == "":
43         raise ValueError("Parameter 'domain' is empty")
44     elif not isinstance(origin, str) and origin is not None:
45         raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
46     elif origin == "":
47         raise ValueError("Parameter 'origin' is empty")
48     elif not isinstance(nodeinfo_url, str):
49         raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
50     elif nodeinfo_url == "":
51         raise ValueError("Parameter 'nodeinfo_url' is empty")
52
53     # @TODO Unused blockdict
54     blockdict = list()
55     rows = None
56     try:
57         # DEBUG: print(f"DEBUG: Fetching nodeinfo: domain='{domain}',nodeinfo_url='{nodeinfo_url}'")
58         rows = federation.fetch_nodeinfo(domain, nodeinfo_url)
59     except network.exceptions as exception:
60         print(f"WARNING: Exception '{type(exception)}' during fetching nodeinfo")
61         instances.set_last_error(domain, exception)
62
63     if rows is None:
64         print("WARNING: Could not fetch nodeinfo from domain:", domain)
65         return
66     elif "metadata" not in rows:
67         print(f"WARNING: rows()={len(rows)} does not have key 'metadata', domain='{domain}'")
68         return
69     elif "federation" not in rows["metadata"]:
70         print(f"WARNING: rows()={len(rows['metadata'])} does not have key 'federation', domain='{domain}'")
71         return
72
73     data = rows["metadata"]["federation"]
74     found = False
75
76     # DEBUG: print(f"DEBUG: data[]='{type(data)}'")
77     if "mrf_simple" in data:
78         # DEBUG: print("DEBUG: Found mrf_simple:", domain)
79         found = True
80         for block_level, blocklist in (
81             {
82                 **data["mrf_simple"],
83                 **{
84                     "quarantined_instances": data["quarantined_instances"]
85                 }
86             }
87         ).items():
88             # DEBUG: print("DEBUG: block_level, blocklist():", block_level, len(blocklist))
89             block_level = tidyup.domain(block_level)
90             # DEBUG: print("DEBUG: BEFORE block_level:", block_level)
91
92             if block_level == "":
93                 print("WARNING: block_level is now empty!")
94                 continue
95             elif block_level == "accept":
96                 # DEBUG: print(f"DEBUG: domain='{domain}' skipping block_level='accept'")
97                 continue
98
99             # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',block_level='{block_level}' ...")
100             if len(blocklist) > 0:
101                 for blocked in blocklist:
102                     # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
103                     blocked = tidyup.domain(blocked)
104                     # DEBUG: print("DEBUG: AFTER blocked:", blocked)
105
106                     if blocked == "":
107                         print("WARNING: blocked is empty after tidyup.domain():", domain, block_level)
108                         continue
109                     elif blacklist.is_blacklisted(blocked):
110                         # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
111                         continue
112                     elif blocked.count("*") > 0:
113                         # Obscured domain name with no hash
114                         row = instances.deobscure("*", blocked)
115
116                         # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
117                         if row is None:
118                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
119                             continue
120
121                         # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
122                         blocked      = row[0]
123                         origin       = row[1]
124                         nodeinfo_url = row[2]
125                     elif blocked.count("?") > 0:
126                         # Obscured domain name with no hash
127                         row = instances.deobscure("?", blocked)
128
129                         # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
130                         if row is None:
131                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
132                             continue
133
134                         # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
135                         blocked      = row[0]
136                         origin       = row[1]
137                         nodeinfo_url = row[2]
138
139                     # DEBUG: print(f"DEBUG: blocked='{blocked}'")
140                     if not validators.domain(blocked):
141                         print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!")
142                         continue
143                     elif blocked.endswith(".arpa"):
144                         print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
145                         continue
146                     elif blocked.endswith(".tld"):
147                         print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
148                         continue
149                     elif blacklist.is_blacklisted(blocked):
150                         # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
151                         continue
152                     elif not instances.is_registered(blocked):
153                         # Commit changes
154                         fba.connection.commit()
155
156                         # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
157                         instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
158
159                     if not blocks.is_instance_blocked(domain, blocked, block_level):
160                         # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
161                         blocks.add_instance(domain, blocked, None, block_level)
162
163                         if block_level == "reject":
164                             # DEBUG: print("DEBUG: Adding to blockdict:", blocked)
165                             blockdict.append({
166                                 "blocked": blocked,
167                                 "reason" : None
168                             })
169                     else:
170                         # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
171                         blocks.update_last_seen(domain, blocked, block_level)
172     elif "quarantined_instances" in data:
173         # DEBUG: print(f"DEBUG: Found 'quarantined_instances' in JSON response: domain='{domain}'")
174         found = True
175         block_level = "quarantined"
176
177         for blocked in data["quarantined_instances"]:
178             # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
179             blocked = tidyup.domain(blocked)
180             # DEBUG: print("DEBUG: AFTER blocked:", blocked)
181
182             if blocked == "":
183                 print("WARNING: blocked is empty after tidyup.domain():", domain, block_level)
184                 continue
185             elif blacklist.is_blacklisted(blocked):
186                 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
187                 continue
188             elif blocked.count("*") > 0:
189                 # Obscured domain name with no hash
190                 row = instances.deobscure("*", blocked)
191
192                 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
193                 if row is None:
194                     print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
195                     continue
196
197                 # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
198                 blocked      = row[0]
199                 origin       = row[1]
200                 nodeinfo_url = row[2]
201             elif blocked.count("?") > 0:
202                 # Obscured domain name with no hash
203                 row = instances.deobscure("?", blocked)
204
205                 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
206                 if row is None:
207                     print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
208                     continue
209
210                 # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
211                 blocked      = row[0]
212                 origin       = row[1]
213                 nodeinfo_url = row[2]
214
215             # DEBUG: print(f"DEBUG: blocked='{blocked}'")
216             if not validators.domain(blocked):
217                 print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!")
218                 continue
219             elif blocked.endswith(".arpa"):
220                 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
221                 continue
222             elif blocked.endswith(".tld"):
223                 print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
224                 continue
225             elif blacklist.is_blacklisted(blocked):
226                 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
227                 continue
228             elif not instances.is_registered(blocked):
229                 # Commit changes
230                 fba.connection.commit()
231
232                 # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
233                 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
234
235             if not blocks.is_instance_blocked(domain, blocked, block_level):
236                 # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
237                 blocks.add_instance(domain, blocked, None, block_level)
238
239                 if block_level == "reject":
240                     # DEBUG: print("DEBUG: Adding to blockdict:", blocked)
241                     blockdict.append({
242                         "blocked": blocked,
243                         "reason" : None
244                     })
245             else:
246                 # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
247                 blocks.update_last_seen(domain, blocked, block_level)
248     else:
249         print(f"WARNING: Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='{domain}'")
250
251     # DEBUG: print("DEBUG: Committing changes ...")
252     fba.connection.commit()
253
254     # Reasons
255     if "mrf_simple_info" in data:
256         # DEBUG: print("DEBUG: Found mrf_simple_info:", domain)
257         found = True
258         for block_level, info in (
259             {
260                 **data["mrf_simple_info"],
261                 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
262             }
263         ).items():
264             # DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items()))
265             block_level = tidyup.domain(block_level)
266             # DEBUG: print("DEBUG: BEFORE block_level:", block_level)
267
268             if block_level == "":
269                 print("WARNING: block_level is now empty!")
270                 continue
271             elif block_level == "accept":
272                 # DEBUG: print(f"DEBUG: domain='{domain}' skipping block_level='accept'")
273                 continue
274
275             # DEBUG: print(f"DEBUG: Checking {len(info.items())} entries from domain='{domain}',software='pleroma',block_level='{block_level}' ...")
276             for blocked, reason in info.items():
277                 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason[{type(reason)}]='{reason}' - BEFORE!")
278                 blocked = tidyup.domain(blocked)
279
280                 if isinstance(reason, str):
281                     # DEBUG: print("DEBUG: reason[] is a string")
282                     reason = tidyup.reason(reason)
283                 elif isinstance(reason, dict) and "reason" in reason:
284                     # DEBUG: print("DEBUG: reason[] is a dict")
285                     reason = tidyup.reason(reason["reason"])
286                 elif reason is not None:
287                     raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
288
289                 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
290
291                 if blocked == "":
292                     print("WARNING: blocked is empty after tidyup.domain():", domain, block_level)
293                     continue
294                 elif blacklist.is_blacklisted(blocked):
295                     # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
296                     continue
297                 elif blocked.count("*") > 0:
298                     # Obscured domain name with no hash
299                     row = instances.deobscure("*", blocked)
300
301                     # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
302                     if row is None:
303                         print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
304                         continue
305
306                     # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
307                     blocked      = row[0]
308                     origin       = row[1]
309                     nodeinfo_url = row[2]
310                 elif blocked.count("?") > 0:
311                     # Obscured domain name with no hash
312                     row = instances.deobscure("?", blocked)
313
314                     # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
315                     if row is None:
316                         print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
317                         continue
318
319                     # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
320                     blocked      = row[0]
321                     origin       = row[1]
322                     nodeinfo_url = row[2]
323
324                 # DEBUG: print(f"DEBUG: blocked='{blocked}'")
325                 if not validators.domain(blocked):
326                     print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!")
327                     continue
328                 elif blocked.endswith(".arpa"):
329                     print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
330                     continue
331                 elif blocked.endswith(".tld"):
332                     print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
333                     continue
334                 elif blacklist.is_blacklisted(blocked):
335                     # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
336                     continue
337                 elif not instances.is_registered(blocked):
338                     # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
339                     instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
340
341                 # DEBUG: print(f"DEBUG: Updating block reason: reason='{reason}',domain='{domain}',blocked='{blocked}',block_level='{block_level}'")
342                 blocks.update_reason(reason, domain, blocked, block_level)
343
344                 # DEBUG: print(f"DEBUG: blockdict()={len(blockdict)}")
345                 for entry in blockdict:
346                     if entry["blocked"] == blocked:
347                         # DEBUG: print(f"DEBUG: Updating entry reason: blocked='{blocked}',reason='{reason}'")
348                         entry["reason"] = reason
349
350     elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
351         # DEBUG: print(f"DEBUG: Found 'quarantined_instances_info' in JSON response: domain='{domain}'")
352         found = True
353         block_level = "quarantined"
354
355         #print(data["quarantined_instances_info"])
356         rows = data["quarantined_instances_info"]["quarantined_instances"]
357         for blocked in rows:
358             # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
359             blocked = tidyup.domain(blocked)
360             # DEBUG: print("DEBUG: AFTER blocked:", blocked)
361
362             if blocked not in rows or "reason" not in rows[blocked]:
363                 print(f"WARNING: Cannot find blocked='{blocked}' in rows()={len(rows)},domain='{domain}'")
364                 break
365
366             reason = rows[blocked]["reason"]
367             # DEBUG: print(f"DEBUG: reason='{reason}'")
368
369             if blocked == "":
370                 print("WARNING: blocked is empty after tidyup.domain():", domain, block_level)
371                 continue
372             elif blacklist.is_blacklisted(blocked):
373                 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
374                 continue
375             elif blocked.count("*") > 0:
376                 # Obscured domain name with no hash
377                 row = instances.deobscure("*", blocked)
378
379                 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
380                 if row is None:
381                     print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
382                     continue
383
384                 # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
385                 blocked      = row[0]
386                 origin       = row[1]
387                 nodeinfo_url = row[2]
388             elif blocked.count("?") > 0:
389                 # Obscured domain name with no hash
390                 row = instances.deobscure("?", blocked)
391
392                 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
393                 if row is None:
394                     print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
395                     continue
396
397                 # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
398                 blocked      = row[0]
399                 origin       = row[1]
400                 nodeinfo_url = row[2]
401
402             # DEBUG: print(f"DEBUG: blocked='{blocked}'")
403             if not validators.domain(blocked):
404                 print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!")
405                 continue
406             elif blocked.endswith(".arpa"):
407                 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
408                 continue
409             elif blocked.endswith(".tld"):
410                 print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
411                 continue
412             elif blacklist.is_blacklisted(blocked):
413                 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
414                 continue
415             elif not instances.is_registered(blocked):
416                 # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
417                 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
418
419             # DEBUG: print(f"DEBUG: Updating block reason: reason='{reason}',domain='{domain}',blocked='{blocked}',block_level='{block_level}'")
420             blocks.update_reason(reason, domain, blocked, block_level)
421
422             # DEBUG: print(f"DEBUG: blockdict()={len(blockdict)}")
423             for entry in blockdict:
424                 if entry["blocked"] == blocked:
425                     # DEBUG: print(f"DEBUG: Updating entry reason: blocked='{blocked}',reason='{reason}'")
426                     entry["reason"] = reason
427     else:
428         print(f"WARNING: Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='{domain}'")
429
430     if not found:
431         # DEBUG: print(f"DEBUG: Did not find any useable JSON elements, domain='{domain}', continuing with /about page ...")
432         blocklist = fetch_blocks_from_about(domain)
433
434         # DEBUG: print(f"DEBUG: blocklist()={len(blocklist)}")
435         if len(blocklist) > 0:
436             print(f"INFO: Checking {len(blocklist)} record(s) ...")
437             for block_level in blocklist:
438                 # DEBUG: print(f"DEBUG: block_level='{block_level}'")
439                 rows = blocklist[block_level]
440                 # DEBUG: print(f"DEBUG: rows['{type(rows)}]()={len(rows)}'")
441                 for record in rows:
442                     # DEBUG: print(f"DEBUG: record[]='{type(record)}'")
443                     blocked = tidyup.domain(record["blocked"])
444                     reason  = tidyup.reason(record["reason"])
445                     # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
446
447                     if blocked == "":
448                         print("WARNING: blocked is empty after tidyup.domain():", domain, block_level)
449                         continue
450                     elif blacklist.is_blacklisted(blocked):
451                         # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
452                         continue
453                     elif blocked.count("*") > 0:
454                         # Obscured domain name with no hash
455                         row = instances.deobscure("*", blocked)
456
457                         # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
458                         if row is None:
459                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
460                             continue
461
462                         # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
463                         blocked      = row[0]
464                         origin       = row[1]
465                         nodeinfo_url = row[2]
466                     elif blocked.count("?") > 0:
467                         # Obscured domain name with no hash
468                         row = instances.deobscure("?", blocked)
469
470                         # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
471                         if row is None:
472                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
473                             continue
474
475                         # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
476                         blocked      = row[0]
477                         origin       = row[1]
478                         nodeinfo_url = row[2]
479
480                     # DEBUG: print(f"DEBUG: blocked='{blocked}'")
481                     if not validators.domain(blocked):
482                         print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!")
483                         continue
484                     elif blocked.endswith(".arpa"):
485                         print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
486                         continue
487                     elif blocked.endswith(".tld"):
488                         print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
489                         continue
490                     elif not instances.is_registered(blocked):
491                         # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
492                         instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
493
494                     if not blocks.is_instance_blocked(domain, blocked, block_level):
495                         # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
496                         blocks.add_instance(domain, blocked, reason, block_level)
497
498                         if block_level == "reject":
499                             # DEBUG: print("DEBUG: Adding to blockdict:", blocked)
500                             blockdict.append({
501                                 "blocked": blocked,
502                                 "reason" : reason
503                             })
504                     else:
505                         # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
506                         blocks.update_reason(reason, domain, blocked, block_level)
507
508     fba.connection.commit()
509     # DEBUG: print("DEBUG: EXIT!")
510
511 def fetch_blocks_from_about(domain: str) -> dict:
512     # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
513     if not isinstance(domain, str):
514         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
515     elif domain == "":
516         raise ValueError("Parameter 'domain' is empty")
517
518     # DEBUG: print(f"DEBUG: Fetching mastodon blocks from domain='{domain}'")
519     doc = None
520     for path in ["/instance/about/index.html"]:
521         try:
522             # Resetting doc type
523             doc = None
524
525             # DEBUG: print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...")
526             response = network.fetch_response(
527                 domain,
528                 path,
529                 network.web_headers,
530                 (config.get("connection_timeout"), config.get("read_timeout"))
531             )
532
533             # DEBUG: print(f"DEBUG: response.ok='{response.ok}',response.status_code='{response.status_code}',response.text()={len(response.text)}")
534             if not response.ok or response.text.strip() == "":
535                 print(f"WARNING: path='{path}' does not exist on domain='{domain}' - SKIPPED!")
536                 continue
537
538             # DEBUG: print(f"DEBUG: Parsing response.text()={len(response.text)} Bytes ...")
539             doc = bs4.BeautifulSoup(
540                 response.text,
541                 "html.parser",
542             )
543
544             # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
545             if doc.find("h2") is not None:
546                 # DEBUG: print(f"DEBUG: Found 'h2' header in path='{path}' - BREAK!")
547                 break
548
549         except network.exceptions as exception:
550             print("ERROR: Cannot fetch from domain:", domain, exception)
551             instances.set_last_error(domain, exception)
552             break
553
554     blocklist = {
555         "Suspended servers": [],
556         "Filtered media"   : [],
557         "Limited servers"  : [],
558         "Silenced servers" : [],
559     }
560
561     # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
562     if doc is None:
563         print(f"WARNING: Cannot fetch any /about pages for domain='{domain}' - EXIT!")
564         return blocklist
565
566     for header in doc.find_all("h2"):
567         header_text = tidyup.reason(header.text)
568
569         # DEBUG: print(f"DEBUG: header_text='{header_text}' - BEFORE!")
570         if header_text in language_mapping:
571             # DEBUG: print(f"DEBUG: header_text='{header_text}' - FOUND!")
572             header_text = language_mapping[header_text]
573         else:
574             print(f"WARNING: header_text='{header_text}' not found in language mapping table")
575
576         # DEBUG: print(f"DEBUG: header_text='{header_text} - AFTER!'")
577         if header_text in blocklist or header_text.lower() in blocklist:
578             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
579             # DEBUG: print(f"DEBUG: Found header_text='{header_text}', importing domain blocks ...")
580             for line in header.find_next("table").find_all("tr")[1:]:
581                 # DEBUG: print(f"DEBUG: line[]='{type(line)}'")
582                 blocklist[header_text].append({
583                     "blocked": tidyup.domain(line.find_all("td")[0].text),
584                     "reason" : tidyup.reason(line.find_all("td")[1].text),
585                 })
586         else:
587             print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}")
588
589     # DEBUG: print(f"DEBUG: Returning blocklist for domain='{domain}'")
590     return {
591         "reject"        : blocklist["Suspended servers"],
592         "media_removal" : blocklist["Filtered media"],
593         "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
594     }