1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
22 from fba import blacklist
23 from fba import config
25 from fba import federation
26 from fba import network
28 from fba.helpers import tidyup
30 from fba.models import blocks
31 from fba.models import instances
35 "Reject": "Suspended servers",
38 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
39 # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
40 if not isinstance(domain, str):
41 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
43 raise ValueError("Parameter 'domain' is empty")
44 elif not isinstance(origin, str) and origin is not None:
45 raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
47 raise ValueError("Parameter 'origin' is empty")
48 elif not isinstance(nodeinfo_url, str):
49 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
50 elif nodeinfo_url == "":
51 raise ValueError("Parameter 'nodeinfo_url' is empty")
53 # @TODO Unused blockdict
57 # DEBUG: print(f"DEBUG: Fetching nodeinfo: domain='{domain}',nodeinfo_url='{nodeinfo_url}'")
58 rows = federation.fetch_nodeinfo(domain, nodeinfo_url)
59 except network.exceptions as exception:
60 print(f"WARNING: Exception '{type(exception)}' during fetching nodeinfo")
61 instances.set_last_error(domain, exception)
64 print("WARNING: Could not fetch nodeinfo from domain:", domain)
66 elif "metadata" not in rows:
67 print(f"WARNING: rows()={len(rows)} does not have key 'metadata', domain='{domain}'")
69 elif "federation" not in rows["metadata"]:
70 print(f"WARNING: rows()={len(rows['metadata'])} does not have key 'federation', domain='{domain}'")
73 data = rows["metadata"]["federation"]
76 # DEBUG: print(f"DEBUG: data[]='{type(data)}'")
77 if "mrf_simple" in data:
78 # DEBUG: print("DEBUG: Found mrf_simple:", domain)
80 for block_level, blocklist in (
84 "quarantined_instances": data["quarantined_instances"]
88 # DEBUG: print("DEBUG: block_level, blocklist():", block_level, len(blocklist))
89 block_level = tidyup.domain(block_level)
90 # DEBUG: print("DEBUG: BEFORE block_level:", block_level)
93 print("WARNING: block_level is now empty!")
95 elif block_level == "accept":
96 # DEBUG: print(f"DEBUG: domain='{domain}' skipping block_level='accept'")
99 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',block_level='{block_level}' ...")
100 if len(blocklist) > 0:
101 for blocked in blocklist:
102 # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
103 blocked = tidyup.domain(blocked)
104 # DEBUG: print("DEBUG: AFTER blocked:", blocked)
107 print("WARNING: blocked is empty after tidyup.domain():", domain, block_level)
109 elif blacklist.is_blacklisted(blocked):
110 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
112 elif blocked.count("*") > 0:
113 # Obscured domain name with no hash
114 row = instances.deobscure("*", blocked)
116 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
118 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
121 # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
124 nodeinfo_url = row[2]
125 elif blocked.count("?") > 0:
126 # Obscured domain name with no hash
127 row = instances.deobscure("?", blocked)
129 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
131 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
134 # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
137 nodeinfo_url = row[2]
139 # DEBUG: print(f"DEBUG: blocked='{blocked}'")
140 if not validators.domain(blocked):
141 print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!")
143 elif blocked.endswith(".arpa"):
144 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
146 elif blocked.endswith(".tld"):
147 print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
149 elif not instances.is_registered(blocked):
151 fba.connection.commit()
153 # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
154 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
156 if not blocks.is_instance_blocked(domain, blocked, block_level):
157 # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
158 blocks.add_instance(domain, blocked, None, block_level)
160 if block_level == "reject":
161 # DEBUG: print("DEBUG: Adding to blockdict:", blocked)
167 # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
168 blocks.update_last_seen(domain, blocked, block_level)
169 elif "quarantined_instances" in data:
170 # DEBUG: print(f"DEBUG: Found 'quarantined_instances' in JSON response: domain='{domain}'")
172 block_level = "quarantined"
174 for blocked in data["quarantined_instances"]:
175 # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
176 blocked = tidyup.domain(blocked)
177 # DEBUG: print("DEBUG: AFTER blocked:", blocked)
180 print("WARNING: blocked is empty after tidyup.domain():", domain, block_level)
182 elif blacklist.is_blacklisted(blocked):
183 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
185 elif blocked.count("*") > 0:
186 # Obscured domain name with no hash
187 row = instances.deobscure("*", blocked)
189 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
191 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
194 # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
197 nodeinfo_url = row[2]
198 elif blocked.count("?") > 0:
199 # Obscured domain name with no hash
200 row = instances.deobscure("?", blocked)
202 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
204 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
207 # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
210 nodeinfo_url = row[2]
212 # DEBUG: print(f"DEBUG: blocked='{blocked}'")
213 if not validators.domain(blocked):
214 print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!")
216 elif blocked.endswith(".arpa"):
217 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
219 elif blocked.endswith(".tld"):
220 print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
222 elif not instances.is_registered(blocked):
224 fba.connection.commit()
226 # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
227 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
229 if not blocks.is_instance_blocked(domain, blocked, block_level):
230 # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
231 blocks.add_instance(domain, blocked, None, block_level)
233 if block_level == "reject":
234 # DEBUG: print("DEBUG: Adding to blockdict:", blocked)
240 # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
241 blocks.update_last_seen(domain, blocked, block_level)
243 print(f"WARNING: Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='{domain}'")
245 # DEBUG: print("DEBUG: Committing changes ...")
246 fba.connection.commit()
249 if "mrf_simple_info" in data:
250 # DEBUG: print("DEBUG: Found mrf_simple_info:", domain)
252 for block_level, info in (
254 **data["mrf_simple_info"],
255 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
258 # DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items()))
259 block_level = tidyup.domain(block_level)
260 # DEBUG: print("DEBUG: BEFORE block_level:", block_level)
262 if block_level == "":
263 print("WARNING: block_level is now empty!")
265 elif block_level == "accept":
266 # DEBUG: print(f"DEBUG: domain='{domain}' skipping block_level='accept'")
269 # DEBUG: print(f"DEBUG: Checking {len(info.items())} entries from domain='{domain}',software='pleroma',block_level='{block_level}' ...")
270 for blocked, reason in info.items():
271 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason[{type(reason)}]='{reason}' - BEFORE!")
272 blocked = tidyup.domain(blocked)
274 if isinstance(reason, str):
275 # DEBUG: print("DEBUG: reason[] is a string")
276 reason = tidyup.reason(reason)
277 elif isinstance(reason, dict) and "reason" in reason:
278 # DEBUG: print("DEBUG: reason[] is a dict")
279 reason = tidyup.reason(reason["reason"])
280 elif reason is not None:
281 raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
283 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
286 print("WARNING: blocked is empty after tidyup.domain():", domain, block_level)
288 elif blacklist.is_blacklisted(blocked):
289 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
291 elif blocked.count("*") > 0:
292 # Obscured domain name with no hash
293 row = instances.deobscure("*", blocked)
295 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
297 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
300 # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
303 nodeinfo_url = row[2]
304 elif blocked.count("?") > 0:
305 # Obscured domain name with no hash
306 row = instances.deobscure("?", blocked)
308 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
310 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
313 # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
316 nodeinfo_url = row[2]
318 # DEBUG: print(f"DEBUG: blocked='{blocked}'")
319 if not validators.domain(blocked):
320 print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!")
322 elif blocked.endswith(".arpa"):
323 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
325 elif blocked.endswith(".tld"):
326 print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
328 elif not instances.is_registered(blocked):
329 # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
330 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
332 # DEBUG: print(f"DEBUG: Updating block reason: reason='{reason}',domain='{domain}',blocked='{blocked}',block_level='{block_level}'")
333 blocks.update_reason(reason, domain, blocked, block_level)
335 # DEBUG: print(f"DEBUG: blockdict()={len(blockdict)}")
336 for entry in blockdict:
337 if entry["blocked"] == blocked:
338 # DEBUG: print(f"DEBUG: Updating entry reason: blocked='{blocked}',reason='{reason}'")
339 entry["reason"] = reason
341 elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
342 # DEBUG: print(f"DEBUG: Found 'quarantined_instances_info' in JSON response: domain='{domain}'")
344 block_level = "quarantined"
346 #print(data["quarantined_instances_info"])
347 rows = data["quarantined_instances_info"]["quarantined_instances"]
349 # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
350 blocked = tidyup.domain(blocked)
351 # DEBUG: print("DEBUG: AFTER blocked:", blocked)
353 if blocked not in rows or "reason" not in rows[blocked]:
354 print(f"WARNING: Cannot find blocked='{blocked}' in rows()={len(rows)},domain='{domain}'")
357 reason = rows[blocked]["reason"]
358 # DEBUG: print(f"DEBUG: reason='{reason}'")
361 print("WARNING: blocked is empty after tidyup.domain():", domain, block_level)
363 elif blacklist.is_blacklisted(blocked):
364 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
366 elif blocked.count("*") > 0:
367 # Obscured domain name with no hash
368 row = instances.deobscure("*", blocked)
370 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
372 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
375 # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
378 nodeinfo_url = row[2]
379 elif blocked.count("?") > 0:
380 # Obscured domain name with no hash
381 row = instances.deobscure("?", blocked)
383 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
385 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
388 # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
391 nodeinfo_url = row[2]
393 # DEBUG: print(f"DEBUG: blocked='{blocked}'")
394 if not validators.domain(blocked):
395 print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!")
397 elif blocked.endswith(".arpa"):
398 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
400 elif blocked.endswith(".tld"):
401 print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
403 elif not instances.is_registered(blocked):
404 # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
405 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
407 # DEBUG: print(f"DEBUG: Updating block reason: reason='{reason}',domain='{domain}',blocked='{blocked}',block_level='{block_level}'")
408 blocks.update_reason(reason, domain, blocked, block_level)
410 # DEBUG: print(f"DEBUG: blockdict()={len(blockdict)}")
411 for entry in blockdict:
412 if entry["blocked"] == blocked:
413 # DEBUG: print(f"DEBUG: Updating entry reason: blocked='{blocked}',reason='{reason}'")
414 entry["reason"] = reason
416 print(f"WARNING: Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='{domain}'")
419 # DEBUG: print(f"DEBUG: Did not find any useable JSON elements, domain='{domain}', continuing with /about page ...")
420 blocklist = fetch_blocks_from_about(domain)
422 # DEBUG: print(f"DEBUG: blocklist()={len(blocklist)}")
423 if len(blocklist) > 0:
424 print(f"INFO: Checking {len(blocklist)} record(s) ...")
425 for block_level in blocklist:
426 # DEBUG: print(f"DEBUG: block_level='{block_level}'")
427 rows = blocklist[block_level]
428 # DEBUG: print(f"DEBUG: rows['{type(rows)}]()={len(rows)}'")
430 # DEBUG: print(f"DEBUG: record[]='{type(record)}'")
431 blocked = tidyup.domain(record["blocked"])
432 reason = tidyup.reason(record["reason"])
433 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
436 print("WARNING: blocked is empty after tidyup.domain():", domain, block_level)
438 elif blacklist.is_blacklisted(blocked):
439 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
441 elif blocked.count("*") > 0:
442 # Obscured domain name with no hash
443 row = instances.deobscure("*", blocked)
445 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
447 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
450 # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
453 nodeinfo_url = row[2]
454 elif blocked.count("?") > 0:
455 # Obscured domain name with no hash
456 row = instances.deobscure("?", blocked)
458 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
460 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',domain='{domain}',origin='{origin}' - SKIPPED!")
463 # DEBUG: print(f"DEBUG: blocked='{blocked}' de-obscured to '{row[0]}'")
466 nodeinfo_url = row[2]
468 # DEBUG: print(f"DEBUG: blocked='{blocked}'")
469 if not validators.domain(blocked):
470 print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - SKIPPED!")
472 elif blocked.endswith(".arpa"):
473 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
475 elif blocked.endswith(".tld"):
476 print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
478 elif not instances.is_registered(blocked):
479 # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
480 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
482 if not blocks.is_instance_blocked(domain, blocked, block_level):
483 # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
484 blocks.add_instance(domain, blocked, reason, block_level)
486 if block_level == "reject":
487 # DEBUG: print("DEBUG: Adding to blockdict:", blocked)
493 # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
494 blocks.update_reason(reason, domain, blocked, block_level)
496 fba.connection.commit()
497 # DEBUG: print("DEBUG: EXIT!")
499 def fetch_blocks_from_about(domain: str) -> dict:
500 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
501 if not isinstance(domain, str):
502 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
504 raise ValueError("Parameter 'domain' is empty")
506 # DEBUG: print(f"DEBUG: Fetching mastodon blocks from domain='{domain}'")
508 for path in ["/instance/about/index.html"]:
513 # DEBUG: print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...")
514 response = network.fetch_response(
518 (config.get("connection_timeout"), config.get("read_timeout"))
521 # DEBUG: print(f"DEBUG: response.ok='{response.ok}',response.status_code='{response.status_code}',response.text()={len(response.text)}")
522 if not response.ok or response.text.strip() == "":
523 print(f"WARNING: path='{path}' does not exist on domain='{domain}' - SKIPPED!")
526 # DEBUG: print(f"DEBUG: Parsing response.text()={len(response.text)} Bytes ...")
527 doc = bs4.BeautifulSoup(
532 # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
533 if doc.find("h2") is not None:
534 # DEBUG: print(f"DEBUG: Found 'h2' header in path='{path}' - BREAK!")
537 except network.exceptions as exception:
538 print("ERROR: Cannot fetch from domain:", domain, exception)
539 instances.set_last_error(domain, exception)
543 "Suspended servers": [],
544 "Filtered media" : [],
545 "Limited servers" : [],
546 "Silenced servers" : [],
549 # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
551 print(f"WARNING: Cannot fetch any /about pages for domain='{domain}' - EXIT!")
554 for header in doc.find_all("h2"):
555 header_text = tidyup.reason(header.text)
557 # DEBUG: print(f"DEBUG: header_text='{header_text}' - BEFORE!")
558 if header_text in language_mapping:
559 # DEBUG: print(f"DEBUG: header_text='{header_text}' - FOUND!")
560 header_text = language_mapping[header_text]
562 print(f"WARNING: header_text='{header_text}' not found in language mapping table")
564 # DEBUG: print(f"DEBUG: header_text='{header_text} - AFTER!'")
565 if header_text in blocklist or header_text.lower() in blocklist:
566 # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
567 # DEBUG: print(f"DEBUG: Found header_text='{header_text}', importing domain blocks ...")
568 for line in header.find_next("table").find_all("tr")[1:]:
569 # DEBUG: print(f"DEBUG: line[]='{type(line)}'")
570 blocklist[header_text].append({
571 "blocked": tidyup.domain(line.find_all("td")[0].text),
572 "reason" : tidyup.reason(line.find_all("td")[1].text),
575 print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}")
577 # DEBUG: print(f"DEBUG: Returning blocklist for domain='{domain}'")
579 "reject" : blocklist["Suspended servers"],
580 "media_removal" : blocklist["Filtered media"],
581 "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],