1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
24 from fba.helpers import config
25 from fba.helpers import domain as domain_helper
26 from fba.helpers import tidyup
28 from fba.http import federation
29 from fba.http import network
31 from fba.models import instances
33 logging.basicConfig(level=logging.INFO)
34 logger = logging.getLogger(__name__)
35 #logger.setLevel(logging.DEBUG)
37 def fetch_peers(domain: str, origin: str) -> list:
38 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
39 domain_helper.raise_on(domain)
43 # No CSRF by default, you don't have to add network.api_headers by yourself here
47 logger.debug("Checking CSRF for domain='%s'", domain)
48 headers = csrf.determine(domain, dict())
49 except network.exceptions as exception:
50 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__)
51 instances.set_last_error(domain, exception)
53 logger.debug("Returning empty list ... - EXIT!")
57 logger.debug("Fetching '/api/v3/site' from domain='%s' ...", domain)
58 data = network.get_json_api(
62 (config.get("connection_timeout"), config.get("read_timeout"))
65 logger.debug("data[]='%s'", type(data))
66 if "error_message" in data:
67 logger.warning("Could not reach any JSON API: domain='%s'", domain)
68 instances.set_last_error(domain, data)
69 elif "federated_instances" in data["json"] and isinstance(data["json"]["federated_instances"], dict):
70 logger.debug("Found federated_instances for domain='%s'", domain)
71 peers = peers + federation.add_peers(data["json"]["federated_instances"])
73 logger.debug("Marking domain='%s' as successfully handled ...", domain)
74 instances.set_success(domain)
77 logger.warning("Fetching instances for domain='%s' from /instances ...", domain)
78 peers = fetch_instances(domain, origin)
80 except network.exceptions as exception:
81 logger.warning("Exception during fetching JSON: domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
82 instances.set_last_error(domain, exception)
84 logger.debug("peers()=%d - EXIT!", len(peers))
87 def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
88 logger.debug("domain='%s,nodeinfo_url='%s' - CALLED!", domain, nodeinfo_url)
89 domain_helper.raise_on(domain)
91 if not isinstance(nodeinfo_url, str):
92 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not of type 'str'")
93 elif nodeinfo_url == "":
94 raise ValueError("Parameter 'nodeinfo_url' is empty")
97 "Blocked Instances".lower(),
98 "Instàncies bloquejades".lower(),
99 "Blocáilte Ásc".lower(),
101 "Blokované instance".lower(),
102 "Geblokkeerde instanties".lower(),
103 "Blockerade instanser".lower(),
104 "Instàncias blocadas".lower(),
105 "Istanze bloccate".lower(),
106 "Instances bloquées".lower(),
107 "Letiltott példányok".lower(),
108 "Instancias bloqueadas".lower(),
109 "Blokeatuta dauden instantziak".lower(),
111 "Peladen Yang Diblokir".lower(),
112 "Blokerede servere".lower(),
113 "Blokitaj nodoj".lower(),
114 "Блокирани Инстанции".lower(),
115 "Blockierte Instanzen".lower(),
116 "Estetyt instanssit".lower(),
117 "Instâncias bloqueadas".lower(),
118 "Zablokowane instancje".lower(),
119 "Blokované inštancie".lower(),
120 "المثلاء المحجوبون".lower(),
121 "Užblokuoti serveriai".lower(),
122 "ブロックしたインスタンス".lower(),
123 "Блокированные Инстансы".lower(),
124 "Αποκλεισμένοι διακομιστές".lower(),
126 "Instâncias bloqueadas".lower(),
132 # json endpoint for newer mastodongs
133 logger.debug("Fetching /instances from domain='%s'", domain)
134 response = network.fetch_response(
138 (config.get("connection_timeout"), config.get("read_timeout"))
141 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
142 if response.ok and response.status_code < 300 and response.text != "":
143 logger.debug("Parsing %s Bytes ...", len(response.text))
145 doc = bs4.BeautifulSoup(response.text, "html.parser")
146 logger.debug("doc[]='%s'", type(doc))
149 for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
150 logger.debug("criteria='%s'", criteria)
151 containers = doc.findAll("div", criteria)
153 logger.debug("Checking %d containers ...", len(containers))
154 for container in containers:
155 logger.debug("container[]='%s'", type(container))
156 for header in container.find_all(["h2", "h3", "h4", "h5"]):
158 logger.debug("header[%s]='%s' - BEFORE!", type(header), header)
159 if header is not None:
160 content = str(header.contents[0])
161 logger.debug("content[%s]='%s' - AFTER!", type(content), content)
164 logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
166 elif not isinstance(content, str):
167 logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
169 elif content.lower() in translations:
170 logger.debug("Found header='%s' with blocked instances - BREAK(3) !", header)
174 logger.debug("found[]='%s'", type(found))
175 if found is not None:
176 logger.debug("Found header with blocked instances - BREAK(2) !")
179 logger.debug("found[]='%s'", type(found))
180 if found is not None:
181 logger.debug("Found header with blocked instances - BREAK(1) !")
184 logger.debug("found[]='%s'", type(found))
186 logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain)
187 peers = parse_script(doc, "blocked")
189 logger.debug("domain='%s' has %d peer(s).", domain, len(peers))
190 for blocked in peers:
191 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
196 "block_level": "reject",
199 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
202 blocking = found.find_next(["ul", "table"]).findAll("a")
203 logger.debug("Found %d blocked instance(s) ...", len(blocking))
205 logger.debug("tag[]='%s'", type(tag))
206 blocked = tidyup.domain(tag.contents[0])
207 logger.debug("blocked='%s'", blocked)
210 logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0])
212 elif not domain_helper.is_wanted(blocked):
213 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
216 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
221 "block_level": "reject",
224 except network.exceptions as exception:
225 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
226 instances.set_last_error(domain, exception)
228 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
231 def fetch_instances(domain: str, origin: str) -> list:
232 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
233 domain_helper.raise_on(domain)
238 # json endpoint for newer mastodongs
239 logger.debug("Fetching /instances from domain='%s'", domain)
240 response = network.fetch_response(
244 (config.get("connection_timeout"), config.get("read_timeout"))
247 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
248 if response.ok and response.status_code < 300 and response.text != "":
249 logger.debug("Parsing %s Bytes ...", len(response.text))
251 doc = bs4.BeautifulSoup(response.text, "html.parser")
252 logger.debug("doc[]='%s'", type(doc))
254 for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
255 logger.debug("criteria='%s'", criteria)
256 containers = doc.findAll("div", criteria)
258 logger.debug("Checking %d containers ...", len(containers))
259 for header in containers:
260 logger.debug("header[%s]='%s'", type(header), header)
262 rows = header.find_next(["ul","table"]).findAll("a")
263 logger.debug("Found %d instance(s) ...", len(rows))
265 logger.debug("tag[]='%s'", type(tag))
266 text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
267 peer = tidyup.domain(text)
268 logger.debug("peer='%s'", peer)
271 logger.debug("peer is empty - SKIPPED!")
273 elif not domain_helper.is_wanted(peer):
274 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
277 logger.debug("peer='%s' already added - SKIPPED!", peer)
280 logger.debug("Appending peer='%s' ...", peer)
283 logger.debug("peers()=%d", len(peers))
285 logger.debug("Found no peers for domain='%s', trying script tag ...", domain)
286 peers = parse_script(doc)
288 logger.debug("Marking domain='%s' as successfully handled, peers()=%d ...", domain, len(peers))
289 instances.set_success(domain)
291 except network.exceptions as exception:
292 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
293 instances.set_last_error(domain, exception)
295 logger.debug("peers()=%d - EXIT!", len(peers))
298 def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
299 logger.debug("doc[]='%s',only='%s' - CALLED!")
300 if not isinstance(doc, bs4.BeautifulSoup):
301 raise ValueError(f"Parameter doc[]='{type(only)}' is not of type 'bs4.BeautifulSoup'")
302 elif not isinstance(only, str) and only is not None:
303 raise ValueError(f"Parameter only[]='{type(only)}' is not of type 'str'")
304 elif isinstance(only, str) and only == "":
305 raise ValueError("Parameter 'only' is empty")
307 scripts = doc.find_all("script")
310 logger.debug("scripts()=%d", len(scripts))
311 for script in scripts:
312 logger.debug("script[%s].contents()=%d", type(script), len(script.contents))
313 if len(script.contents) == 0:
314 logger.debug("script has no contents - SKIPPED!")
316 elif not script.contents[0].startswith("window.isoData"):
317 logger.debug("script.contents[0]='%s' does not start with window.isoData - SKIPPED!", script.contents[0])
320 logger.debug("script.contents[0][]='%s'", type(script.contents[0]))
322 iso_data = script.contents[0].split("=")[1].strip().replace(":undefined", ":\"undefined\"")
323 logger.debug("iso_data[%s]='%s'", type(iso_data), iso_data)
327 parsed = json.loads(iso_data)
328 except json.decoder.JSONDecodeError as exception:
329 logger.warning("Exception '%s' during parsing %d Bytes: '%s' - EXIT!", type(exception), len(iso_data), str(exception))
332 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
334 if "routeData" not in parsed:
335 logger.warning("parsed[%s]()=%d does not contain element 'routeData'", type(parsed), len(parsed))
337 elif "federatedInstancesResponse" not in parsed["routeData"]:
338 logger.warning("parsed[routeData][%s]()=%d does not contain element 'federatedInstancesResponse'", type(parsed["routeData"]), len(parsed["routeData"]))
340 elif "data" not in parsed["routeData"]["federatedInstancesResponse"]:
341 logger.warning("parsed[routeData][federatedInstancesResponse][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]), len(parsed["routeData"]["federatedInstancesResponse"]))
343 elif "federated_instances" not in parsed["routeData"]["federatedInstancesResponse"]["data"]:
344 logger.warning("parsed[routeData][federatedInstancesResponse][data][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]["data"]), len(parsed["routeData"]["federatedInstancesResponse"]["data"]))
347 data = parsed["routeData"]["federatedInstancesResponse"]["data"]["federated_instances"]
348 logger.debug("Checking %d data elements ...", len(data))
350 logger.debug("element='%s'", element)
351 if isinstance(only, str) and only != element:
352 logger.debug("Skipping unwanted element='%s',only='%s'", element, only)
355 logger.debug("Checking data[%s]()=%d row(s) ...", element, len(data[element]))
356 for row in data[element]:
357 logger.debug("row[]='%s'", type(row))
358 if "domain" not in row:
359 logger.warning("row()=%d has no element 'domain' - SKIPPED!", len(row))
362 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
363 peer = tidyup.domain(row["domain"])
364 logger.debug("peer='%s' - AFTER!", peer)
367 logger.debug("peer is empty - SKIPPED!")
369 elif not domain_helper.is_wanted(peer):
370 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
373 logger.debug("peer='%s' already added - SKIPPED!", peer)
376 logger.debug("Appending peer='%s' ...", peer)
379 logger.debug("peers()=%d - EXIT!", len(peers))