1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
22 from fba import database
25 from fba.helpers import blacklist
26 from fba.helpers import config
27 from fba.helpers import domain as domain_helper
28 from fba.helpers import tidyup
30 from fba.http import federation
31 from fba.http import network
33 from fba.models import blocks
34 from fba.models import instances
36 logging.basicConfig(level=logging.INFO)
37 logger = logging.getLogger(__name__)
39 # Language mapping X -> English
42 "Reject": "Suspended servers",
45 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
46 logger.debug(f"domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
47 domain_helper.raise_on(domain)
48 if not isinstance(origin, str) and origin is not None:
49 raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
51 raise ValueError("Parameter 'origin' is empty")
52 elif not isinstance(nodeinfo_url, str):
53 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
54 elif nodeinfo_url == "":
55 raise ValueError("Parameter 'nodeinfo_url' is empty")
57 # @TODO Unused blockdict
61 logger.debug(f"Fetching nodeinfo: domain='{domain}',nodeinfo_url='{nodeinfo_url}'")
62 rows = federation.fetch_nodeinfo(domain, nodeinfo_url)
63 except network.exceptions as exception:
64 logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain)
65 instances.set_last_error(domain, exception)
68 logger.warning("Could not fetch nodeinfo from domain:", domain)
70 elif "metadata" not in rows:
71 logger.warning("rows()=%d does not have key 'metadata', domain='%s'", len(rows), domain)
73 elif "federation" not in rows["metadata"]:
74 logger.warning("rows()=%d does not have key 'federation', domain='%s'", len(rows['metadata']), domain)
77 data = rows["metadata"]["federation"]
80 logger.debug("data[]='%s'", type(data))
81 if "mrf_simple" in data:
82 logger.debug("Found mrf_simple:", domain)
84 for block_level, blocklist in (
88 "quarantined_instances": data["quarantined_instances"]
92 logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
93 block_level = tidyup.domain(block_level)
94 logger.debug("block_level='%s' - AFTER!", block_level)
97 logger.warning("block_level is now empty!")
99 elif block_level == "accept":
100 logger.debug("domain='%s' skipping block_level='accept'", domain)
103 logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level)
104 if len(blocklist) > 0:
105 for blocked in blocklist:
106 logger.debug("blocked='%s' - BEFORE!", blocked)
107 blocked = tidyup.domain(blocked)
108 logger.debug("blocked='%s' - AFTER!", blocked)
111 logger.warning("blocked is empty after tidyup.domain():", domain, block_level)
113 elif blocked.endswith(".arpa"):
114 logger.debug("blocked='%s' is a reverse IP address - SKIPPED!", blocked)
116 elif blocked.endswith(".tld"):
117 logger.debug("blocked='%s' is a fake domain - SKIPPED!", blocked)
119 elif blacklist.is_blacklisted(blocked):
120 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
122 elif blocked.count("*") > 0:
123 # Obscured domain name with no hash
124 row = instances.deobscure("*", blocked)
126 logger.debug("row[]='%s'", type(row))
128 logger.warning("Cannot deobsfucate blocked='%s',domain='%s',origin='%s' - SKIPPED!", blocked, domain, origin)
131 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
134 nodeinfo_url = row[2]
135 elif blocked.count("?") > 0:
136 # Obscured domain name with no hash
137 row = instances.deobscure("?", blocked)
139 logger.debug("row[]='%s'", type(row))
141 logger.warning("Cannot deobsfucate blocked='%s',domain='%s',origin='%s' - SKIPPED!", blocked, domain, origin)
144 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
147 nodeinfo_url = row[2]
149 logger.debug("blocked='%s'", blocked)
150 if not utils.is_domain_wanted(blocked):
151 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
153 elif not instances.is_registered(blocked):
155 logger.debug("Invoking commit() ...")
156 database.connection.commit()
158 logger.debug("Domain blocked='%s' wasn't found, adding ..., domain='%s',origin='%s',nodeinfo_url='%s'", blocked, domain, origin, nodeinfo_url)
159 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
161 if not blocks.is_instance_blocked(domain, blocked, block_level):
162 logger.debug("Blocking domain='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
163 blocks.add_instance(domain, blocked, None, block_level)
165 if block_level == "reject":
166 logger.debug("Appending blocked='%s' ...", blocked)
172 logger.debug("Updating block last seen for domain='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
173 blocks.update_last_seen(domain, blocked, block_level)
174 elif "quarantined_instances" in data:
175 logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain)
177 block_level = "quarantined"
179 for blocked in data["quarantined_instances"]:
180 logger.debug("blocked='%s' - BEFORE!", blocked)
181 blocked = tidyup.domain(blocked)
182 logger.debug("blocked='%s' - AFTER!", blocked)
185 logger.warning("blocked is empty after tidyup.domain():", domain, block_level)
187 elif blocked.endswith(".arpa"):
188 logger.debug("blocked='%s' is a reverse IP address - SKIPPED!", blocked)
190 elif blocked.endswith(".tld"):
191 logger.debug("blocked='%s' is a fake domain - SKIPPED!", blocked)
193 elif blacklist.is_blacklisted(blocked):
194 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
196 elif blocked.count("*") > 0:
197 # Obscured domain name with no hash
198 row = instances.deobscure("*", blocked)
200 logger.debug("row[]='%s'", type(row))
202 logger.warning("Cannot deobsfucate blocked='%s',domain='%s',origin='%s' - SKIPPED!", blocked, domain, origin)
205 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
208 nodeinfo_url = row[2]
209 elif blocked.count("?") > 0:
210 # Obscured domain name with no hash
211 row = instances.deobscure("?", blocked)
213 logger.debug("row[]='%s'", type(row))
215 logger.warning("Cannot deobsfucate blocked='%s',domain='%s',origin='%s' - SKIPPED!", blocked, domain, origin)
218 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
221 nodeinfo_url = row[2]
223 logger.debug("blocked='%s' - DEOBSFUCATED!", blocked)
224 if not utils.is_domain_wanted(blocked):
225 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
227 elif not instances.is_registered(blocked):
229 logger.debug("Invoking commit() ...")
230 database.connection.commit()
232 logger.debug("Domain blocked='%s' wasn't found, adding ..., domain='%s',origin='%s',nodeinfo_url='{nodeinfo_url}'", blocked, domain, origin)
233 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
235 if not blocks.is_instance_blocked(domain, blocked, block_level):
236 logger.debug("Blocking domain='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
237 blocks.add_instance(domain, blocked, None, block_level)
239 if block_level == "reject":
240 logger.debug("Appending blocked='%s' ...", blocked)
246 logger.debug("Updating block last seen for domain='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
247 blocks.update_last_seen(domain, blocked, block_level)
249 logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain)
251 logger.debug("Invoking commit() ...")
252 database.connection.commit()
255 if "mrf_simple_info" in data:
256 logger.debug("Found mrf_simple_info in API response: domain='%s'", domain)
258 for block_level, info in (
260 **data["mrf_simple_info"],
261 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
264 logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
265 block_level = tidyup.domain(block_level)
266 logger.debug("block_level='%s' - AFTER!", block_level)
268 if block_level == "":
269 logger.warning("block_level is now empty!")
271 elif block_level == "accept":
272 logger.debug("domain='%s' skipping block_level='accept'", domain)
275 logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
276 for blocked, reason in info.items():
277 logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
278 blocked = tidyup.domain(blocked)
279 logger.debug("blocked='%s' - AFTER!", blocked)
281 if isinstance(reason, str):
282 logger.debug("reason[] is a string")
283 reason = tidyup.reason(reason)
284 elif isinstance(reason, dict) and "reason" in reason:
285 logger.debug("reason[] is a dict")
286 reason = tidyup.reason(reason["reason"])
287 elif reason is not None:
288 raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
290 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
293 logger.warning("blocked is empty after tidyup.domain():", domain, block_level)
295 elif blacklist.is_blacklisted(blocked):
296 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
298 elif blocked.count("*") > 0:
299 # Obscured domain name with no hash
300 row = instances.deobscure("*", blocked)
302 logger.debug("row[]='%s'", type(row))
304 logger.warning("Cannot deobsfucate blocked='%s',domain='%s',origin='%s' - SKIPPED!", blocked, domain, origin)
307 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
310 nodeinfo_url = row[2]
311 elif blocked.count("?") > 0:
312 # Obscured domain name with no hash
313 row = instances.deobscure("?", blocked)
315 logger.debug("row[]='%s'", type(row))
317 logger.warning("Cannot deobsfucate blocked='%s',domain='%s',origin='%s' - SKIPPED!", blocked, domain, origin)
320 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
323 nodeinfo_url = row[2]
325 logger.debug("blocked='%s' - DEOBSFUCATED!", blocked)
326 if not utils.is_domain_wanted(blocked):
327 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
329 elif not instances.is_registered(blocked):
330 logger.debug("Domain blocked='%s' wasn't found, adding ..., domain='%s',origin='%s',nodeinfo_url='%s'", blocked, domain, origin, nodeinfo_url)
331 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
333 logger.debug("Updating block reason: reason='%s',domain='%s',blocked='%s',block_level='%s'", reason, domain, blocked, block_level)
334 blocks.update_reason(reason, domain, blocked, block_level)
336 logger.debug("Checking %d blockdict records ...", len(blockdict))
337 for entry in blockdict:
338 if entry["blocked"] == blocked:
339 logger.debug("Updating entry reason: blocked='%s',reason='%s'", blocked, reason)
340 entry["reason"] = reason
342 elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
343 logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain)
345 block_level = "quarantined"
347 #print(data["quarantined_instances_info"])
348 rows = data["quarantined_instances_info"]["quarantined_instances"]
350 logger.debug("blocked='%s' - BEFORE!", blocked)
351 blocked = tidyup.domain(blocked)
352 logger.debug("blocked='%s' - AFTER!", blocked)
354 if blocked not in rows or "reason" not in rows[blocked]:
355 logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
358 reason = rows[blocked]["reason"]
359 logger.debug("reason='%s'", reason)
362 logger.warning("blocked is empty after tidyup.domain():", domain, block_level)
364 elif blacklist.is_blacklisted(blocked):
365 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
367 elif blocked.count("*") > 0:
368 # Obscured domain name with no hash
369 row = instances.deobscure("*", blocked)
371 logger.debug("row[]='%s'", type(row))
373 logger.warning("Cannot deobsfucate blocked='%s',domain='%s',origin='%s' - SKIPPED!", blocked, domain, origin)
376 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
379 nodeinfo_url = row[2]
380 elif blocked.count("?") > 0:
381 # Obscured domain name with no hash
382 row = instances.deobscure("?", blocked)
384 logger.debug("row[]='%s'", type(row))
386 logger.warning("Cannot deobsfucate blocked='%s',domain='%s',origin='%s' - SKIPPED!", blocked, domain, origin)
389 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
392 nodeinfo_url = row[2]
394 logger.debug("blocked='%s'", blocked)
395 if not utils.is_domain_wanted(blocked):
396 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
398 elif not instances.is_registered(blocked):
399 logger.debug("Domain blocked='%s' wasn't found, adding ..., domain='%s',origin='%s',nodeinfo_url='%s'", blocked, domain, origin, nodeinfo_url)
400 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
402 logger.debug("Updating block reason: reason='%s',domain='%s',blocked='%s',block_level='%s'", reason, domain, blocked, block_level)
403 blocks.update_reason(reason, domain, blocked, block_level)
405 logger.debug("Checking %d blockdict records ...", len(blockdict))
406 for entry in blockdict:
407 if entry["blocked"] == blocked:
408 logger.debug("Updating entry reason: blocked='%s',reason='%s'", blocked, reason)
409 entry["reason"] = reason
411 logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
414 logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
415 blocklist = fetch_blocks_from_about(domain)
417 logger.debug("blocklist()=%d", len(blocklist))
418 if len(blocklist) > 0:
419 logger.info("Checking %d record(s) ...", len(blocklist))
420 for block_level in blocklist:
421 logger.debug("block_level='%s'", block_level)
423 rows = blocklist[block_level]
424 logger.debug("rows[%s]()=%d'", type(rows), len(rows))
426 logger.debug("record[]='%s'", type(record))
427 blocked = tidyup.domain(record["blocked"])
428 reason = tidyup.reason(record["reason"])
429 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
432 logger.warning("blocked is empty after tidyup.domain():", domain, block_level)
434 elif blacklist.is_blacklisted(blocked):
435 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
437 elif blocked.count("*") > 0:
438 # Obscured domain name with no hash
439 row = instances.deobscure("*", blocked)
441 logger.debug("row[]='%s'", type(row))
443 logger.warning("Cannot deobsfucate blocked='%s',domain='%s',origin='%s' - SKIPPED!", blocked, domain, origin)
446 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
449 nodeinfo_url = row[2]
450 elif blocked.count("?") > 0:
451 # Obscured domain name with no hash
452 row = instances.deobscure("?", blocked)
454 logger.debug("row[]='%s'", type(row))
456 logger.warning("Cannot deobsfucate blocked='%s',domain='%s',origin='%s' - SKIPPED!", blocked, domain, origin)
459 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
462 nodeinfo_url = row[2]
464 logger.debug("blocked='%s' - DEOBSFUCATED!", blocked)
465 if not utils.is_domain_wanted(blocked):
466 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
468 elif not instances.is_registered(blocked):
469 logger.debug("Domain blocked='%s' wasn't found, adding ..., domain='%s',origin='%s',nodeinfo_url='%s'", blocked, domain, origin, nodeinfo_url)
470 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
472 if not blocks.is_instance_blocked(domain, blocked, block_level):
473 logger.debug("Blocking domain='%s',blocked='%s', block_level='%s' ...", domain, blocked, block_level)
474 blocks.add_instance(domain, blocked, reason, block_level)
476 if block_level == "reject":
477 logger.debug("Appending blocked='%s' ...", blocked)
483 logger.debug("Updating block last seen for domain='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
484 blocks.update_reason(reason, domain, blocked, block_level)
486 logger.debug("Invoking commit() ...")
487 database.connection.commit()
489 logger.debug("EXIT!")
491 def fetch_blocks_from_about(domain: str) -> dict:
492 logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
493 domain_helper.raise_on(domain)
495 logger.debug("Fetching mastodon blocks from domain='%s'", domain)
497 for path in ["/instance/about/index.html"]:
502 logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
503 response = network.fetch_response(
507 (config.get("connection_timeout"), config.get("read_timeout"))
510 logger.debug("response.ok='%s',response.status_code='%d',response.text()=%d", response.ok, response.status_code, len(response.text))
511 if not response.ok or response.text.strip() == "":
512 logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
515 logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
516 doc = bs4.BeautifulSoup(
521 logger.debug("doc[]='%s'", type(doc))
522 if doc.find("h2") is not None:
523 logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
526 except network.exceptions as exception:
527 logger.warning("Cannot fetch from domain:", domain, exception)
528 instances.set_last_error(domain, exception)
532 "Suspended servers": [],
533 "Filtered media" : [],
534 "Limited servers" : [],
535 "Silenced servers" : [],
538 logger.debug("doc[]='%s'", type(doc))
540 logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
543 for header in doc.find_all("h2"):
544 header_text = tidyup.reason(header.text)
546 logger.debug("header_text='%s' - BEFORE!", header_text)
547 if header_text in language_mapping:
548 logger.debug("header_text='%s' - FOUND!", header_text)
549 header_text = language_mapping[header_text]
551 logger.warning("header_text='%s' not found in language mapping table", header_text)
553 logger.debug("header_text='%s - AFTER!'", header_text)
554 if header_text in blocklist or header_text.lower() in blocklist:
555 # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
556 logger.debug("Found header_text='%s', importing domain blocks ...", header_text)
557 for line in header.find_next("table").find_all("tr")[1:]:
558 logger.debug("line[]='%s'", type(line))
559 blocklist[header_text].append({
560 "blocked": tidyup.domain(line.find_all("td")[0].text),
561 "reason" : tidyup.reason(line.find_all("td")[1].text),
564 logger.warning("header_text='%s' not found in blocklist()=%d", header_text, len(blocklist))
566 logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)
568 "reject" : blocklist["Suspended servers"],
569 "media_removal" : blocklist["Filtered media"],
570 "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],