1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 from fba import database
24 from fba.helpers import config
25 from fba.helpers import domain as domain_helper
26 from fba.helpers import tidyup
28 from fba.http import federation
29 from fba.http import network
31 from fba.models import instances
33 logging.basicConfig(level=logging.INFO)
34 logger = logging.getLogger(__name__)
36 # Language mapping X -> English
39 "filtered media" : "filtered_media",
40 "limited servers" : "followers_only",
41 "followers-only" : "followers_only",
42 "media removal" : "media_removal",
43 "media_removal" : "media_removal",
44 "media force-set as sensitive": "nsfw",
47 "suspended servers": "reject",
48 "silenced servers" : "silenced",
49 "removal from \"the whole known network\" timeline": " federated_timeline_removal",
52 def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
53 logger.debug("domain='%s',nodeinfo_url='%s' - CALLED!", domain, nodeinfo_url)
54 domain_helper.raise_on(domain)
56 if not isinstance(nodeinfo_url, str):
57 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
58 elif nodeinfo_url == "":
59 raise ValueError("Parameter 'nodeinfo_url' is empty")
64 logger.debug("Fetching nodeinfo: domain='%s',nodeinfo_url='%s'", domain, nodeinfo_url)
65 rows = federation.fetch_nodeinfo(domain, nodeinfo_url)
66 except network.exceptions as exception:
67 logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain)
68 instances.set_last_error(domain, exception)
71 logger.warning("Could not fetch nodeinfo from domain='%s'", domain)
73 elif "metadata" not in rows:
74 logger.warning("rows()=%d does not have key 'metadata', domain='%s'", len(rows), domain)
76 elif "federation" not in rows["metadata"]:
77 logger.warning("rows()=%d does not have key 'federation', domain='%s'", len(rows["metadata"]), domain)
80 data = rows["metadata"]["federation"]
83 logger.debug("data[]='%s'", type(data))
84 if "mrf_simple" in data:
85 logger.debug("Found mrf_simple in API response from domain='%s'", domain)
87 for block_level, blocklist in (
91 "quarantined_instances": data["quarantined_instances"]
95 logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
96 block_level = tidyup.domain(block_level)
97 logger.debug("block_level='%s' - AFTER!", block_level)
100 logger.warning("block_level is now empty!")
102 elif block_level == "accept":
103 logger.debug("domain='%s' skipping block_level='accept'", domain)
106 block_level = utils.alias_block_level(block_level)
108 logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level)
109 if len(blocklist) > 0:
110 for blocked in blocklist:
111 logger.debug("blocked='%s' - BEFORE!", blocked)
112 blocked = tidyup.domain(blocked)
113 logger.debug("blocked='%s' - AFTER!", blocked)
116 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s' - SKIPPED!", domain, block_level)
118 elif not utils.is_domain_wanted(blocked):
119 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
122 logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
123 blocked = utils.deobfuscate_domain(blocked, domain)
125 logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
126 if not utils.is_domain_wanted(blocked):
127 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
130 logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
135 "block_level": block_level,
138 elif "quarantined_instances" in data:
139 logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain)
141 block_level = "quarantined"
143 for blocked in data["quarantined_instances"]:
144 logger.debug("blocked='%s' - BEFORE!", blocked)
145 blocked = tidyup.domain(blocked)
146 logger.debug("blocked='%s' - AFTER!", blocked)
149 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
151 elif not utils.is_domain_wanted(blocked):
152 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
155 logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
156 blocked = utils.deobfuscate_domain(blocked, domain)
158 logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
159 if not utils.is_domain_wanted(blocked):
160 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
163 logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
168 "block_level": block_level,
172 logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain)
174 logger.debug("Invoking commit() ...")
175 database.connection.commit()
178 if "mrf_simple_info" in data:
179 logger.debug("Found mrf_simple_info in API response: domain='%s'", domain)
181 for block_level, info in (
183 **data["mrf_simple_info"],
184 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
187 logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
188 block_level = tidyup.domain(block_level)
189 logger.debug("block_level='%s' - AFTER!", block_level)
191 if block_level == "":
192 logger.warning("block_level is now empty!")
194 elif block_level == "accept":
195 logger.debug("domain='%s': Skipping block_level='%s' ...", domain, block_level)
198 block_level = utils.alias_block_level(block_level)
200 logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
201 for blocked, reason in info.items():
202 logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
203 blocked = tidyup.domain(blocked)
204 logger.debug("blocked='%s' - AFTER!", blocked)
206 if isinstance(reason, str):
207 logger.debug("reason[] is a string")
208 reason = tidyup.reason(reason)
209 elif isinstance(reason, dict) and "reason" in reason:
210 logger.debug("reason[] is a dict")
211 reason = tidyup.reason(reason["reason"]) if isinstance(reason["reason"], str) else None
212 elif reason is not None:
213 raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
215 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
218 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
220 elif not utils.is_domain_wanted(blocked):
221 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
224 logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
225 blocked = utils.deobfuscate_domain(blocked, domain)
226 logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
228 logger.debug("Checking %d blockdict records ...", len(blockdict))
229 for block in blockdict:
230 logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
231 if block["blocked"] == blocked:
232 logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
233 block["reason"] = reason
235 elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
236 logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain)
238 block_level = "quarantined"
240 #print(data["quarantined_instances_info"])
241 rows = data["quarantined_instances_info"]["quarantined_instances"]
243 logger.debug("blocked='%s' - BEFORE!", blocked)
244 reason = tidyup.reason(rows[blocked]["reason"])
245 blocked = tidyup.domain(blocked)
246 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
248 if blocked not in rows or "reason" not in rows[blocked]:
249 logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
252 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
254 elif not utils.is_domain_wanted(blocked):
255 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
258 logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
259 blocked = utils.deobfuscate_domain(blocked, domain)
261 logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
262 if not utils.is_domain_wanted(blocked):
263 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
266 logger.debug("Checking %d blockdict records ...", len(blockdict))
267 for block in blockdict:
268 logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
269 if block["blocked"] == blocked:
270 logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
271 block["reason"] = reason
273 logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
276 logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
277 blocklist = fetch_blocks_from_about(domain)
279 logger.debug("blocklist()=%d", len(blocklist))
280 if len(blocklist) > 0:
281 logger.info("Checking %d different blocklists ...", len(blocklist))
282 for block_level in blocklist:
283 logger.debug("block_level='%s'", block_level)
284 rows = blocklist[block_level]
286 logger.debug("rows[%s]()=%d'", type(rows), len(rows))
288 logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", block["blocked"], domain)
289 block["blocked"] = utils.deobfuscate_domain(block["blocked"], domain)
291 logger.debug("block[blocked]='%s' - DEOBFUSCATED!", block["blocked"])
292 if not utils.is_domain_wanted(block["blocked"]):
293 logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
296 logger.debug("Appending blocker='%s',block[blocked]='%s',block[reason]='%s',block_level='%s' ...",domain, block["blocked"], block["reason"], block_level)
299 "blocked" : block["blocked"],
300 "reason" : block["reason"],
301 "block_level": block_level,
304 logger.debug("blockdict()=%d - EXIT!", len(blockdict))
307 def fetch_blocks_from_about(domain: str) -> dict:
308 logger.debug("domain='%s' - CALLED!", domain)
309 domain_helper.raise_on(domain)
311 logger.debug("Fetching mastodon blocks from domain='%s'", domain)
313 for path in ["/instance/about/index.html"]:
318 logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
319 response = network.fetch_response(
323 (config.get("connection_timeout"), config.get("read_timeout"))
326 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
327 if not response.ok or response.text.strip() == "":
328 logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
331 logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
332 doc = bs4.BeautifulSoup(
337 logger.debug("doc[]='%s'", type(doc))
338 if doc.find("h2") is not None:
339 logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
342 except network.exceptions as exception:
343 logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
344 instances.set_last_error(domain, exception)
349 "filtered_media": [],
350 "followers_only": [],
353 "media_removal" : [],
356 logger.debug("doc[]='%s'", type(doc))
358 logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
361 headers = doc.find_all("h2")
363 logger.debug("headers[]='%s'", type(headers))
365 logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
368 logger.info("Checking %d headers ...", len(headers))
369 for header in headers:
370 logger.debug("header[%s]='%s'", type(header), header)
371 block_level = tidyup.reason(header.text).lower()
373 logger.debug("block_level='%s' - BEFORE!", block_level)
374 if block_level in language_mapping:
375 logger.debug("block_level='%s' - FOUND!", block_level)
376 block_level = language_mapping[block_level].lower()
378 logger.warning("block_level='%s' not found in language mapping table", block_level)
380 logger.debug("block_level='%s - AFTER!'", block_level)
381 if block_level in blocklist:
382 # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
383 logger.debug("Found block_level='%s', importing domain blocks ...", block_level)
384 for line in header.find_next("table").find_all("tr")[1:]:
385 logger.debug("line[]='%s'", type(line))
386 blocked = tidyup.domain(line.find_all("td")[0].text)
387 reason = tidyup.reason(line.find_all("td")[1].text)
389 logger.debug("Appending block_level='%s',blocked='%s',reason='%s' ...", block_level, blocked, reason)
390 blocklist[block_level].append({
395 logger.warning("block_level='%s' not found in blocklist()=%d", block_level, len(blocklist))
397 logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)