1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 from fba import database
24 from fba.helpers import config
25 from fba.helpers import domain as domain_helper
26 from fba.helpers import tidyup
28 from fba.http import federation
29 from fba.http import network
31 from fba.models import instances
33 logging.basicConfig(level=logging.INFO)
34 logger = logging.getLogger(__name__)
36 # Language mapping X -> English
39 "Reject": "Suspended servers",
42 def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
43 logger.debug("domain='%s',nodeinfo_url='%s' - CALLED!", domain, nodeinfo_url)
44 domain_helper.raise_on(domain)
46 if not isinstance(nodeinfo_url, str):
47 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
48 elif nodeinfo_url == "":
49 raise ValueError("Parameter 'nodeinfo_url' is empty")
54 logger.debug("Fetching nodeinfo: domain='%s',nodeinfo_url='%s'", domain, nodeinfo_url)
55 rows = federation.fetch_nodeinfo(domain, nodeinfo_url)
56 except network.exceptions as exception:
57 logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain)
58 instances.set_last_error(domain, exception)
61 logger.warning("Could not fetch nodeinfo from domain='%s'", domain)
63 elif "metadata" not in rows:
64 logger.warning("rows()=%d does not have key 'metadata', domain='%s'", len(rows), domain)
66 elif "federation" not in rows["metadata"]:
67 logger.warning("rows()=%d does not have key 'federation', domain='%s'", len(rows['metadata']), domain)
70 data = rows["metadata"]["federation"]
73 logger.debug("data[]='%s'", type(data))
74 if "mrf_simple" in data:
75 logger.debug("Found mrf_simple in API response from domain='%s'", domain)
77 for block_level, blocklist in (
81 "quarantined_instances": data["quarantined_instances"]
85 logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
86 block_level = tidyup.domain(block_level)
87 logger.debug("block_level='%s' - AFTER!", block_level)
90 logger.warning("block_level is now empty!")
92 elif block_level == "accept":
93 logger.debug("domain='%s' skipping block_level='accept'", domain)
95 elif block_level == "suspend":
96 logger.debug("domain='%s', mapping 'suspend' to 'suspended'", domain)
97 block_level = "suspended"
98 elif block_level == "silence":
99 logger.debug("domain='%s', mapping 'silence' to 'silenced'", domain)
100 block_level = "silenced"
102 logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level)
103 if len(blocklist) > 0:
104 for blocked in blocklist:
105 logger.debug("blocked='%s' - BEFORE!", blocked)
106 blocked = tidyup.domain(blocked)
107 logger.debug("blocked='%s' - AFTER!", blocked)
110 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
112 elif not utils.is_domain_wanted(blocked):
113 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
116 logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
117 blocked = utils.deobfuscate_domain(blocked, domain)
119 logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
120 if not utils.is_domain_wanted(blocked):
121 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
124 logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
129 "block_level": block_level,
132 elif "quarantined_instances" in data:
133 logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain)
135 block_level = "quarantined"
137 for blocked in data["quarantined_instances"]:
138 logger.debug("blocked='%s' - BEFORE!", blocked)
139 blocked = tidyup.domain(blocked)
140 logger.debug("blocked='%s' - AFTER!", blocked)
143 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
145 elif not utils.is_domain_wanted(blocked):
146 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
149 logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
150 blocked = utils.deobfuscate_domain(blocked, domain)
152 logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
153 if not utils.is_domain_wanted(blocked):
154 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
157 logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
162 "block_level": block_level,
166 logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain)
168 logger.debug("Invoking commit() ...")
169 database.connection.commit()
172 if "mrf_simple_info" in data:
173 logger.debug("Found mrf_simple_info in API response: domain='%s'", domain)
175 for block_level, info in (
177 **data["mrf_simple_info"],
178 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
181 logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
182 block_level = tidyup.domain(block_level)
183 logger.debug("block_level='%s' - AFTER!", block_level)
185 if block_level == "":
186 logger.warning("block_level is now empty!")
188 elif block_level == "accept":
189 logger.debug("domain='%s' skipping block_level='accept'", domain)
191 elif block_level == "suspend":
192 logger.debug("domain='%s', mapping 'suspend' to 'suspended'", domain)
193 block_level = "suspended"
194 elif block_level == "silence":
195 logger.debug("domain='%s', mapping 'silence' to 'silenced'", domain)
196 block_level = "silenced"
198 logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
199 for blocked, reason in info.items():
200 logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
201 blocked = tidyup.domain(blocked)
202 logger.debug("blocked='%s' - AFTER!", blocked)
204 if isinstance(reason, str):
205 logger.debug("reason[] is a string")
206 reason = tidyup.reason(reason)
207 elif isinstance(reason, dict) and "reason" in reason:
208 logger.debug("reason[] is a dict")
209 reason = tidyup.reason(reason["reason"])
210 elif reason is not None:
211 raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
213 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
216 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
218 elif not utils.is_domain_wanted(blocked):
219 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
222 logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
223 blocked = utils.deobfuscate_domain(blocked, domain)
224 logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
226 logger.debug("Checking %d blockdict records ...", len(blockdict))
227 for block in blockdict:
228 logger.debug("block[blocked]='%s',blocked='%s'", block['blocked'], blocked)
229 if block['blocked'] == blocked:
230 logger.debug("Updating reason='%s' for blocker='%s'", reason, block['blocked'])
231 block['reason'] = reason
233 elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
234 logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain)
236 block_level = "quarantined"
238 #print(data["quarantined_instances_info"])
239 rows = data["quarantined_instances_info"]["quarantined_instances"]
241 logger.debug("blocked='%s' - BEFORE!", blocked)
242 blocked = tidyup.domain(blocked)
243 logger.debug("blocked='%s' - AFTER!", blocked)
245 if blocked not in rows or "reason" not in rows[blocked]:
246 logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
249 reason = rows[blocked]["reason"]
250 logger.debug("reason='%s'", reason)
253 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
256 logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
257 blocked = utils.deobfuscate_domain(blocked, domain)
259 logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
260 if not utils.is_domain_wanted(blocked):
261 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
264 logger.debug("Checking %d blockdict records ...", len(blockdict))
265 for block in blockdict:
266 logger.debug("block[blocked]='%s',blocked='%s'", block['blocked'], blocked)
267 if block['blocked'] == blocked:
268 logger.debug("Updating reason='%s' for blocker='%s'", reason, block['blocked'])
269 block['reason'] = reason
271 logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
274 logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
275 blocklist = fetch_blocks_from_about(domain)
277 logger.debug("blocklist()=%d", len(blocklist))
278 if len(blocklist) > 0:
279 logger.info("Checking %d record(s) ...", len(blocklist))
280 for block_level in blocklist:
281 logger.debug("block_level='%s'", block_level)
282 rows = blocklist[block_level]
284 logger.debug("rows[%s]()=%d'", type(rows), len(rows))
286 logger.debug("record[]='%s'", type(record))
287 blocked = tidyup.domain(record["blocked"])
288 reason = tidyup.reason(record["reason"])
289 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
292 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
294 elif not utils.is_domain_wanted(blocked):
295 logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
298 logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
299 blocked = utils.deobfuscate_domain(blocked, domain)
300 logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
302 logger.debug("Appending blocker='%s',blocked='%s',reason='%s',block_level='%s' ...",domain, blocked, reason, block_level)
307 "block_level": block_level,
310 logger.debug("blockdict()=%d - EXIT!", len(blockdict))
313 def fetch_blocks_from_about(domain: str) -> dict:
314 logger.debug("domain='%s' - CALLED!", domain)
315 domain_helper.raise_on(domain)
317 logger.debug("Fetching mastodon blocks from domain='%s'", domain)
319 for path in ["/instance/about/index.html"]:
324 logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
325 response = network.fetch_response(
329 (config.get("connection_timeout"), config.get("read_timeout"))
332 logger.debug("response.ok='%s',response.status_code='%d',response.text()=%d", response.ok, response.status_code, len(response.text))
333 if not response.ok or response.text.strip() == "":
334 logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
337 logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
338 doc = bs4.BeautifulSoup(
343 logger.debug("doc[]='%s'", type(doc))
344 if doc.find("h2") is not None:
345 logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
348 except network.exceptions as exception:
349 logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
350 instances.set_last_error(domain, exception)
354 "Suspended servers": [],
355 "Filtered media" : [],
356 "Limited servers" : [],
357 "Silenced servers" : [],
360 logger.debug("doc[]='%s'", type(doc))
362 logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
365 for header in doc.find_all("h2"):
366 header_text = tidyup.reason(header.text)
368 logger.debug("header_text='%s' - BEFORE!", header_text)
369 if header_text in language_mapping:
370 logger.debug("header_text='%s' - FOUND!", header_text)
371 header_text = language_mapping[header_text]
373 logger.warning("header_text='%s' not found in language mapping table", header_text)
375 logger.debug("header_text='%s - AFTER!'", header_text)
376 if header_text in blocklist or header_text.lower() in blocklist:
377 # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
378 logger.debug("Found header_text='%s', importing domain blocks ...", header_text)
379 for line in header.find_next("table").find_all("tr")[1:]:
380 logger.debug("line[]='%s'", type(line))
381 blocklist[header_text].append({
382 "blocked": tidyup.domain(line.find_all("td")[0].text),
383 "reason" : tidyup.reason(line.find_all("td")[1].text),
386 logger.warning("header_text='%s' not found in blocklist()=%d", header_text, len(blocklist))
388 logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)
390 "reject" : blocklist["Suspended servers"],
391 "media_removal" : blocklist["Filtered media"],
392 "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],