1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
22 from fba import database
25 from fba.helpers import blacklist
26 from fba.helpers import config
27 from fba.helpers import domain as domain_helper
28 from fba.helpers import tidyup
30 from fba.http import network
31 from fba.http import nodeinfo
33 from fba.models import blocks
34 from fba.models import instances
36 logging.basicConfig(level=logging.INFO)
37 logger = logging.getLogger(__name__)
39 # Language mapping X -> English
42 "filtered media" : "filtered_media",
43 "limited servers" : "followers_only",
44 "followers-only" : "followers_only",
45 "media removal" : "media_removal",
46 "media_removal" : "media_removal",
47 "media force-set as sensitive": "media_nsfw",
48 "nsfw" : "media_nsfw",
50 "suspended servers": "reject",
51 "silenced servers" : "silenced",
52 "removal from \"the whole known network\" timeline": "federated_timeline_removal",
55 def fetch_blocks(domain: str) -> list:
56 logger.debug("domain='%s' - CALLED!", domain)
57 domain_helper.raise_on(domain)
59 if blacklist.is_blacklisted(domain):
60 raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
61 elif not instances.is_registered(domain):
62 raise Exception(f"domain='{domain}' is not registered but function is invoked.")
69 logger.debug("Fetching nodeinfo: domain='%s'", domain)
70 rows = nodeinfo.fetch(domain, update_mode=False)
72 if "error_message" in rows:
73 logger.warning("Error message '%s' during fetching nodeinfo for domain='%s'", rows["error_message"], domain)
74 instances.set_last_error(domain, rows)
75 instances.update(domain)
77 logger.debug("Returning empty list ... - EXIT!")
79 elif "exception" in rows:
80 logger.warning("Exception '%s' during fetching nodeinfo for domain='%s' - EXIT!", type(rows["exception"]), domain)
83 logger.debug("rows[json] found for domain='%s'", domain)
86 except network.exceptions as exception:
87 logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain)
88 instances.set_last_error(domain, exception)
90 logger.debug("rows[]='%s'", type(rows))
92 logger.warning("Could not fetch nodeinfo from domain='%s' - EXIT!", domain)
94 elif "metadata" not in rows:
95 logger.warning("rows()=%d does not have key 'metadata', domain='%s' - EXIT!", len(rows), domain)
97 elif "federation" not in rows["metadata"]:
98 logger.warning("rows()=%d does not have key 'federation', domain='%s' - EXIT!", len(rows["metadata"]), domain)
102 data = rows["metadata"]["federation"]
103 logger.debug("data[]='%s'", type(data))
105 if "mrf_simple" in data:
106 logger.debug("Found mrf_simple in API response from domain='%s'", domain)
108 for block_level, blocklist in (
110 **data["mrf_simple"],
112 "quarantined_instances": data["quarantined_instances"]
116 logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
117 block_level = tidyup.domain(block_level) if block_level != "" else None
118 logger.debug("block_level='%s' - AFTER!", block_level)
120 if block_level == "":
121 logger.warning("block_level is now empty!")
123 elif block_level == "accept":
124 logger.debug("domain='%s' skipping block_level='accept'", domain)
127 block_level = blocks.alias_block_level(block_level)
129 logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level)
130 for blocked in blocklist:
131 logger.debug("blocked='%s' - BEFORE!", blocked)
132 blocked = tidyup.domain(blocked) if blocked != "" else None
133 logger.debug("blocked='%s' - AFTER!", blocked)
135 if blocked in [None, ""]:
136 logger.warning("blocked='%s' is empty after tidyup.domain(): domain='%s',block_level='%s' - SKIPPED!", blocked, domain, block_level)
138 elif validators.domain(blocked) and blacklist.is_blacklisted(blocked):
139 logger.debug("blocked='%s' is blacklisted - SKIPPED!")
142 logger.debug("Invoking utils.deobfuscate(%s, %s) ...", blocked, domain)
143 blocked = utils.deobfuscate(blocked, domain)
144 logger.debug("blocked[%s]='%s' - DEOBFUSCATED!", type(blocked), blocked)
146 if blocked in [None, ""]:
147 logger.warning("instance[host]='%s' is None or empty after tidyup.domain() - SKIPPED!", instance["host"])
149 elif not domain_helper.is_wanted(blocked):
150 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
153 logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
158 "block_level": block_level,
161 elif "quarantined_instances" in data:
162 logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain)
164 block_level = "quarantined"
166 logger.debug("Checking %d quarantined instance(s) ...", len(data["quarantined_instances"]))
167 for blocked in data["quarantined_instances"]:
168 logger.debug("blocked='%s' - BEFORE!", blocked)
169 blocked = tidyup.domain(blocked) if blocked != "" else None
170 logger.debug("blocked='%s' - AFTER!", blocked)
172 if blocked in [None, ""]:
173 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
175 elif not domain_helper.is_wanted(blocked):
176 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
179 logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
184 "block_level": block_level,
188 logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain)
190 logger.debug("Invoking commit() ...")
191 database.connection.commit()
194 if "mrf_simple_info" in data:
195 logger.debug("Found mrf_simple_info in API response: domain='%s'", domain)
197 for block_level, info in (
199 **data["mrf_simple_info"],
200 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
203 logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
204 block_level = tidyup.domain(block_level) if block_level != "" else None
205 logger.debug("block_level='%s' - AFTER!", block_level)
207 if block_level in [None, ""]:
208 logger.warning("block_level='%s' is now empty!", block_level)
210 elif block_level == "accept":
211 logger.debug("domain='%s': Skipping block_level='%s' ...", domain, block_level)
214 block_level = blocks.alias_block_level(block_level)
216 logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
217 for blocked, reason in info.items():
218 logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
219 blocked = tidyup.domain(blocked) if blocked != "" else None
220 logger.debug("blocked='%s' - AFTER!", blocked)
222 if isinstance(reason, str):
223 logger.debug("reason[] is a string")
224 reason = tidyup.reason(reason)
225 elif isinstance(reason, dict) and "reason" in reason:
226 logger.debug("reason[] is a dict")
227 reason = tidyup.reason(reason["reason"]) if isinstance(reason["reason"], str) else None
228 elif reason is not None:
229 raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
231 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
234 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
237 logger.debug("Checking %d blockdict records ...", len(blockdict))
238 for block in blockdict:
239 logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
240 if block["blocked"] == blocked:
241 logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
242 block["reason"] = reason
244 elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
245 logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain)
247 block_level = "quarantined"
249 #print(data["quarantined_instances_info"])
250 rows = data["quarantined_instances_info"]["quarantined_instances"]
252 logger.debug("blocked='%s' - BEFORE!", blocked)
253 reason = tidyup.reason(rows[blocked]["reason"]) if rows[blocked]["reason"] != "" else None
254 blocked = tidyup.domain(blocked) if blocked != "" else None
255 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
257 if blocked not in rows or "reason" not in rows[blocked]:
258 logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
261 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
264 logger.debug("Checking %d blockdict record(s) ...", len(blockdict))
265 for block in blockdict:
266 logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
267 if block["blocked"] == blocked:
268 logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
269 block["reason"] = reason
271 logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
273 logger.debug("found='%s'", found)
275 logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
276 blocklist = fetch_blocks_from_about(domain)
278 logger.debug("blocklist()=%d", len(blocklist))
279 if len(blocklist) > 0:
280 logger.info("Checking %d different blocklist(s) ...", len(blocklist))
281 for block_level in blocklist:
282 logger.debug("Checking blocklist[%s]()=%d entries ...", block_level, blocklist[block_level])
283 for block in blocklist[block_level]:
284 logger.debug("Appending blocker='%s',block[blocked]='%s',block[reason]='%s',block_level='%s' ...",domain, block["blocked"], block["reason"], block_level)
287 "blocked" : block["blocked"],
288 "reason" : block["reason"],
289 "block_level": block_level,
292 logger.debug("blockdict()=%d - EXIT!", len(blockdict))
295 def fetch_blocks_from_about(domain: str) -> dict:
296 logger.debug("domain='%s' - CALLED!", domain)
297 domain_helper.raise_on(domain)
299 if blacklist.is_blacklisted(domain):
300 raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
301 elif not instances.is_registered(domain):
302 raise Exception(f"domain='{domain}' is not registered but function is invoked.")
307 logger.debug("Fetching mastodon blocks from domain='%s'", domain)
308 for path in ["/instance/about/index.html"]:
313 logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
314 response = network.fetch_response(
318 (config.get("connection_timeout"), config.get("read_timeout"))
321 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
322 if not response.ok or response.text.strip() == "":
323 logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
326 logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
327 doc = bs4.BeautifulSoup(
332 logger.debug("doc[]='%s'", type(doc))
333 if doc.find("h2") is not None:
334 logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
337 except network.exceptions as exception:
338 logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
339 instances.set_last_error(domain, exception)
344 "filtered_media": [],
345 "followers_only": [],
348 "media_removal" : [],
349 "federated_timeline_removal": [],
352 logger.debug("doc[]='%s'", type(doc))
354 logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
357 headers = doc.find_all("h2")
359 logger.debug("headers[]='%s'", type(headers))
361 logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
364 logger.info("Checking %d headers ...", len(headers))
365 for header in headers:
366 logger.debug("header[%s]='%s'", type(header), header)
367 block_level = tidyup.reason(header.text).lower()
369 logger.debug("block_level='%s' - BEFORE!", block_level)
370 if block_level in language_mapping:
371 logger.debug("block_level='%s' - FOUND!", block_level)
372 block_level = language_mapping[block_level].lower()
374 logger.warning("block_level='%s' not found in language mapping table", block_level)
376 logger.debug("block_level='%s - AFTER!'", block_level)
377 if block_level in blocklist:
378 # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
379 logger.debug("Found block_level='%s', importing domain blocks ...", block_level)
380 for line in header.find_next("table").find_all("tr")[1:]:
381 logger.debug("line[]='%s'", type(line))
382 blocked = line.find_all("td")[0].text
383 reason = line.find_all("td")[1].text
385 logger.debug("blocked='%s',reason='%s' - BEFORE!", blocked, reason)
386 blocked = tidyup.domain(blocked) if blocked != "" else None
387 reason = tidyup.reason(reason) if reason != "" else None
388 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
390 if blocked in [None, ""]:
391 logger.debug("domain='%s',block_level='%s': blocked='%s' is empty - SKIPPED!", domain, block_level, blocked)
393 elif not domain_helper.is_wanted(blocked):
394 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
397 logger.debug("Appending block_level='%s',blocked='%s',reason='%s' ...", block_level, blocked, reason)
398 blocklist[block_level].append({
403 logger.warning("block_level='%s' not found in blocklist()=%d", block_level, len(blocklist))
405 logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)