1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
22 from fba import database
25 from fba.helpers import blacklist
26 from fba.helpers import config
27 from fba.helpers import domain as domain_helper
28 from fba.helpers import tidyup
30 from fba.http import federation
31 from fba.http import network
33 from fba.models import blocks
34 from fba.models import instances
36 logging.basicConfig(level=logging.INFO)
37 logger = logging.getLogger(__name__)
39 # Language mapping X -> English
42 "Reject": "Suspended servers",
45 def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
46 logger.debug("domain='%s',nodeinfo_url='%s' - CALLED!", domain, nodeinfo_url)
47 domain_helper.raise_on(domain)
49 if not isinstance(nodeinfo_url, str):
50 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
51 elif nodeinfo_url == "":
52 raise ValueError("Parameter 'nodeinfo_url' is empty")
57 logger.debug(f"Fetching nodeinfo: domain='{domain}',nodeinfo_url='{nodeinfo_url}'")
58 rows = federation.fetch_nodeinfo(domain, nodeinfo_url)
59 except network.exceptions as exception:
60 logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain)
61 instances.set_last_error(domain, exception)
64 logger.warning("Could not fetch nodeinfo from domain='%s'", domain)
66 elif "metadata" not in rows:
67 logger.warning("rows()=%d does not have key 'metadata', domain='%s'", len(rows), domain)
69 elif "federation" not in rows["metadata"]:
70 logger.warning("rows()=%d does not have key 'federation', domain='%s'", len(rows['metadata']), domain)
73 data = rows["metadata"]["federation"]
76 logger.debug("data[]='%s'", type(data))
77 if "mrf_simple" in data:
78 logger.debug("Found mrf_simple in API response from domain='%s'", domain)
80 for block_level, blocklist in (
84 "quarantined_instances": data["quarantined_instances"]
88 logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
89 block_level = tidyup.domain(block_level)
90 logger.debug("block_level='%s' - AFTER!", block_level)
93 logger.warning("block_level is now empty!")
95 elif block_level == "accept":
96 logger.debug("domain='%s' skipping block_level='accept'", domain)
99 logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level)
100 if len(blocklist) > 0:
101 for blocked in blocklist:
102 logger.debug("blocked='%s' - BEFORE!", blocked)
103 blocked = tidyup.domain(blocked)
104 logger.debug("blocked='%s' - AFTER!", blocked)
107 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
109 elif blocked.endswith(".arpa"):
110 logger.debug("blocked='%s' is a reverse IP address - SKIPPED!", blocked)
112 elif blocked.endswith(".tld"):
113 logger.debug("blocked='%s' is a fake domain - SKIPPED!", blocked)
115 elif blocked.count("*") > 0:
116 logger.debug("domain='%s' uses obfucated domains, marking ...", domain)
117 instances.set_has_obfucation(domain, True)
119 # Obscured domain name with no hash
120 row = instances.deobfucate("*", blocked)
122 logger.debug("row[]='%s'", type(row))
124 logger.warning("Cannot deobfucate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
127 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
129 elif blocked.count("?") > 0:
130 logger.debug("domain='%s' uses obfucated domains, marking ...", domain)
131 instances.set_has_obfucation(domain, True)
133 # Obscured domain name with no hash
134 row = instances.deobfucate("?", blocked)
136 logger.debug("row[]='%s'", type(row))
138 logger.warning("Cannot deobfucate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
141 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
144 logger.debug("blocked='%s'", blocked)
145 if not utils.is_domain_wanted(blocked):
146 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
149 logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
154 "block_level": block_level,
157 elif "quarantined_instances" in data:
158 logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain)
160 block_level = "quarantined"
162 for blocked in data["quarantined_instances"]:
163 logger.debug("blocked='%s' - BEFORE!", blocked)
164 blocked = tidyup.domain(blocked)
165 logger.debug("blocked='%s' - AFTER!", blocked)
168 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
170 elif blocked.endswith(".arpa"):
171 logger.debug("blocked='%s' is a reverse IP address - SKIPPED!", blocked)
173 elif blocked.endswith(".tld"):
174 logger.debug("blocked='%s' is a fake domain - SKIPPED!", blocked)
176 elif blocked.count("*") > 0:
177 logger.debug("domain='%s' uses obfucated domains, marking ...", domain)
178 instances.set_has_obfucation(domain, True)
180 # Obscured domain name with no hash
181 row = instances.deobfucate("*", blocked)
183 logger.debug("row[]='%s'", type(row))
185 logger.warning("Cannot deobfucate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
188 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
190 elif blocked.count("?") > 0:
191 logger.debug("domain='%s' uses obfucated domains, marking ...", domain)
192 instances.set_has_obfucation(domain, True)
194 # Obscured domain name with no hash
195 row = instances.deobfucate("?", blocked)
197 logger.debug("row[]='%s'", type(row))
199 logger.warning("Cannot deobfucate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
202 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
205 logger.debug("blocked='%s' - DEOBFUCATED!", blocked)
206 if not utils.is_domain_wanted(blocked):
207 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
210 logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
215 "block_level": block_level,
219 logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain)
221 logger.debug("Invoking commit() ...")
222 database.connection.commit()
225 if "mrf_simple_info" in data:
226 logger.debug("Found mrf_simple_info in API response: domain='%s'", domain)
228 for block_level, info in (
230 **data["mrf_simple_info"],
231 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
234 logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
235 block_level = tidyup.domain(block_level)
236 logger.debug("block_level='%s' - AFTER!", block_level)
238 if block_level == "":
239 logger.warning("block_level is now empty!")
241 elif block_level == "accept":
242 logger.debug("domain='%s' skipping block_level='accept'", domain)
245 logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
246 for blocked, reason in info.items():
247 logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
248 blocked = tidyup.domain(blocked)
249 logger.debug("blocked='%s' - AFTER!", blocked)
251 if isinstance(reason, str):
252 logger.debug("reason[] is a string")
253 reason = tidyup.reason(reason)
254 elif isinstance(reason, dict) and "reason" in reason:
255 logger.debug("reason[] is a dict")
256 reason = tidyup.reason(reason["reason"])
257 elif reason is not None:
258 raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
260 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
263 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
265 elif blocked.count("*") > 0:
266 logger.debug("domain='%s' uses obfucated domains, marking ...", domain)
267 instances.set_has_obfucation(domain, True)
269 # Obscured domain name with no hash
270 row = instances.deobfucate("*", blocked)
272 logger.debug("row[]='%s'", type(row))
274 logger.warning("Cannot deobfucate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
277 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
279 elif blocked.count("?") > 0:
280 logger.debug("domain='%s' uses obfucated domains, marking ...", domain)
281 instances.set_has_obfucation(domain, True)
283 # Obscured domain name with no hash
284 row = instances.deobfucate("?", blocked)
286 logger.debug("row[]='%s'", type(row))
288 logger.warning("Cannot deobfucate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
291 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
294 logger.debug("blocked='%s' - DEOBFUCATED!", blocked)
295 if not utils.is_domain_wanted(blocked):
296 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
299 logger.debug("Checking %d blockdict records ...", len(blockdict))
300 for block in blockdict:
301 logger.debug("block[blocker]='%s',blocker='%s'", block['blocker'], blocker)
302 if block['blocked'] == blocked:
303 logger.debug("Updating reason='%s' for blocker='%s'", reason, block['blocked'])
304 block['reason'] = reason
306 elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
307 logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain)
309 block_level = "quarantined"
311 #print(data["quarantined_instances_info"])
312 rows = data["quarantined_instances_info"]["quarantined_instances"]
314 logger.debug("blocked='%s' - BEFORE!", blocked)
315 blocked = tidyup.domain(blocked)
316 logger.debug("blocked='%s' - AFTER!", blocked)
318 if blocked not in rows or "reason" not in rows[blocked]:
319 logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
322 reason = rows[blocked]["reason"]
323 logger.debug("reason='%s'", reason)
326 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
328 elif blocked.count("*") > 0:
329 logger.debug("domain='%s' uses obfucated domains, marking ...", domain)
330 instances.set_has_obfucation(domain, True)
332 # Obscured domain name with no hash
333 row = instances.deobfucate("*", blocked)
335 logger.debug("row[]='%s'", type(row))
337 logger.warning("Cannot deobfucate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
340 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
342 elif blocked.count("?") > 0:
343 logger.debug("domain='%s' uses obfucated domains, marking ...", domain)
344 instances.set_has_obfucation(domain, True)
346 # Obscured domain name with no hash
347 row = instances.deobfucate("?", blocked)
349 logger.debug("row[]='%s'", type(row))
351 logger.warning("Cannot deobfucate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
354 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
357 logger.debug("blocked='%s'", blocked)
358 if not utils.is_domain_wanted(blocked):
359 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
362 logger.debug("Checking %d blockdict records ...", len(blockdict))
363 for block in blockdict:
364 logger.debug("block[blocker]='%s',blocker='%s'", block['blocker'], blocker)
365 if block['blocked'] == blocked:
366 logger.debug("Updating reason='%s' for blocker='%s'", reason, block['blocked'])
367 block['reason'] = reason
369 logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
372 logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
373 blocklist = fetch_blocks_from_about(domain)
375 logger.debug("blocklist()=%d", len(blocklist))
376 if len(blocklist) > 0:
377 logger.info("Checking %d record(s) ...", len(blocklist))
378 for block_level in blocklist:
379 logger.debug("block_level='%s'", block_level)
381 rows = blocklist[block_level]
382 logger.debug("rows[%s]()=%d'", type(rows), len(rows))
384 logger.debug("record[]='%s'", type(record))
385 blocked = tidyup.domain(record["blocked"])
386 reason = tidyup.reason(record["reason"])
387 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
390 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
392 elif blocked.count("*") > 0:
393 logger.debug("domain='%s' uses obfucated domains, marking ...", domain)
394 instances.set_has_obfucation(domain, True)
396 # Obscured domain name with no hash
397 row = instances.deobfucate("*", blocked)
399 logger.debug("row[]='%s'", type(row))
401 logger.warning("Cannot deobfucate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
404 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
406 elif blocked.count("?") > 0:
407 logger.debug("domain='%s' uses obfucated domains, marking ...", domain)
408 instances.set_has_obfucation(domain, True)
410 # Obscured domain name with no hash
411 row = instances.deobfucate("?", blocked)
413 logger.debug("row[]='%s'", type(row))
415 logger.warning("Cannot deobfucate blocked='%s',domain='%s' - SKIPPED!", blocked, domain)
418 logger.debug("blocked='%s' de-obscured to '%s'", blocked, row[0])
421 logger.debug("blocked='%s' - DEOBFUCATED!", blocked)
422 if not utils.is_domain_wanted(blocked):
423 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
426 logger.debug("Appending blocker='%s',blocked='%s',reason='%s',block_level='%s' ...",domain, blocked, reason, block_level)
431 "block_level": block_level,
434 logger.debug("blockdict()=%d - EXIT!", len(blockdict))
437 def fetch_blocks_from_about(domain: str) -> dict:
438 logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
439 domain_helper.raise_on(domain)
441 logger.debug("Fetching mastodon blocks from domain='%s'", domain)
443 for path in ["/instance/about/index.html"]:
448 logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
449 response = network.fetch_response(
453 (config.get("connection_timeout"), config.get("read_timeout"))
456 logger.debug("response.ok='%s',response.status_code='%d',response.text()=%d", response.ok, response.status_code, len(response.text))
457 if not response.ok or response.text.strip() == "":
458 logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
461 logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
462 doc = bs4.BeautifulSoup(
467 logger.debug("doc[]='%s'", type(doc))
468 if doc.find("h2") is not None:
469 logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
472 except network.exceptions as exception:
473 logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
474 instances.set_last_error(domain, exception)
478 "Suspended servers": [],
479 "Filtered media" : [],
480 "Limited servers" : [],
481 "Silenced servers" : [],
484 logger.debug("doc[]='%s'", type(doc))
486 logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
489 for header in doc.find_all("h2"):
490 header_text = tidyup.reason(header.text)
492 logger.debug("header_text='%s' - BEFORE!", header_text)
493 if header_text in language_mapping:
494 logger.debug("header_text='%s' - FOUND!", header_text)
495 header_text = language_mapping[header_text]
497 logger.warning("header_text='%s' not found in language mapping table", header_text)
499 logger.debug("header_text='%s - AFTER!'", header_text)
500 if header_text in blocklist or header_text.lower() in blocklist:
501 # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
502 logger.debug("Found header_text='%s', importing domain blocks ...", header_text)
503 for line in header.find_next("table").find_all("tr")[1:]:
504 logger.debug("line[]='%s'", type(line))
505 blocklist[header_text].append({
506 "blocked": tidyup.domain(line.find_all("td")[0].text),
507 "reason" : tidyup.reason(line.find_all("td")[1].text),
510 logger.warning("header_text='%s' not found in blocklist()=%d", header_text, len(blocklist))
512 logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)
514 "reject" : blocklist["Suspended servers"],
515 "media_removal" : blocklist["Filtered media"],
516 "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],