1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
22 from fba.helpers import blacklist
23 from fba.helpers import config
24 from fba.helpers import domain as domain_helper
25 from fba.helpers import tidyup
27 from fba.http import csrf
28 from fba.http import federation
29 from fba.http import network
31 from fba.models import instances
33 logging.basicConfig(level=logging.INFO)
34 logger = logging.getLogger(__name__)
35 #logger.setLevel(logging.DEBUG)
39 "Blocked Instances".lower(),
40 "Instàncies bloquejades".lower(),
41 "Blocáilte Ásc".lower(),
43 "Blokované instance".lower(),
44 "Geblokkeerde instanties".lower(),
45 "Blockerade instanser".lower(),
46 "Instàncias blocadas".lower(),
47 "Istanze bloccate".lower(),
48 "Instances bloquées".lower(),
49 "Letiltott példányok".lower(),
50 "Instancias bloqueadas".lower(),
51 "Blokeatuta dauden instantziak".lower(),
53 "Peladen Yang Diblokir".lower(),
54 "Blokerede servere".lower(),
55 "Blokitaj nodoj".lower(),
56 "Блокирани Инстанции".lower(),
57 "Blockierte Instanzen".lower(),
58 "Estetyt instanssit".lower(),
59 "Instâncias bloqueadas".lower(),
60 "Zablokowane instancje".lower(),
61 "Blokované inštancie".lower(),
62 "المثلاء المحجوبون".lower(),
63 "Užblokuoti serveriai".lower(),
64 "ブロックしたインスタンス".lower(),
65 "Блокированные Инстансы".lower(),
66 "Αποκλεισμένοι διακομιστές".lower(),
68 "Instâncias bloqueadas".lower(),
71 def fetch_peers(domain: str, origin: str) -> list:
72 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
73 domain_helper.raise_on(domain)
75 if blacklist.is_blacklisted(domain):
76 raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
80 # No CSRF by default, you don't have to add network.api_headers by yourself here
84 logger.debug("Checking CSRF for domain='%s'", domain)
85 headers = csrf.determine(domain, dict())
86 except network.exceptions as exception:
87 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__)
88 instances.set_last_error(domain, exception)
90 logger.debug("Returning empty list ... - EXIT!")
94 logger.debug("Fetching '/api/v3/site' from domain='%s' ...", domain)
95 data = network.get_json_api(
99 (config.get("connection_timeout"), config.get("read_timeout"))
102 logger.debug("data[]='%s'", type(data))
103 if "error_message" in data:
104 logger.warning("Could not reach any JSON API: domain='%s'", domain)
105 instances.set_last_error(domain, data)
106 elif "federated_instances" in data["json"] and isinstance(data["json"]["federated_instances"], dict):
107 logger.debug("Found federated_instances for domain='%s'", domain)
108 peers = peers + federation.add_peers(data["json"]["federated_instances"])
110 logger.debug("Marking domain='%s' as successfully handled ...", domain)
111 instances.set_success(domain)
114 logger.warning("Fetching instances for domain='%s' from /instances ...", domain)
115 peers = fetch_instances(domain, origin)
117 except network.exceptions as exception:
118 logger.warning("Exception during fetching JSON: domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
119 instances.set_last_error(domain, exception)
121 logger.debug("peers()=%d - EXIT!", len(peers))
124 def fetch_blocks(domain: str) -> list:
125 logger.debug("domain='%s - CALLED!", domain)
126 domain_helper.raise_on(domain)
128 if blacklist.is_blacklisted(domain):
129 raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
130 elif not instances.is_registered(domain):
131 raise Exception(f"domain='{domain}' is not registered but function is invoked.")
136 # json endpoint for newer mastodongs
137 logger.debug("Fetching /instances from domain='%s'", domain)
138 response = network.fetch_response(
142 (config.get("connection_timeout"), config.get("read_timeout"))
145 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
146 if response.ok and response.status_code == 200 and response.text != "":
147 logger.debug("Parsing %s Bytes ...", len(response.text))
149 doc = bs4.BeautifulSoup(response.text, "html.parser")
150 logger.debug("doc[]='%s'", type(doc))
153 for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
154 logger.debug("criteria='%s'", criteria)
155 containers = doc.findAll("div", criteria)
157 logger.debug("Checking %d containers ...", len(containers))
158 for container in containers:
159 logger.debug("container[]='%s'", type(container))
160 for header in container.find_all(["h2", "h3", "h4", "h5"]):
162 logger.debug("header[%s]='%s' - BEFORE!", type(header), header)
163 if header is not None:
164 content = str(header.contents[0])
165 logger.debug("content[%s]='%s' - AFTER!", type(content), content)
168 logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
170 elif not isinstance(content, str):
171 logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
173 elif content.lower() in translations:
174 logger.debug("Found header='%s' with blocked instances - BREAK(3) !", header)
178 logger.debug("found[]='%s'", type(found))
179 if found is not None:
180 logger.debug("Found header with blocked instances - BREAK(2) !")
183 logger.debug("found[]='%s'", type(found))
184 if found is not None:
185 logger.debug("Found header with blocked instances - BREAK(1) !")
188 logger.debug("found[]='%s'", type(found))
190 logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain)
191 peers = parse_script(doc, "blocked")
193 logger.debug("domain='%s' has %d peer(s).", domain, len(peers))
194 for blocked in peers:
195 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
200 "block_level": "reject",
203 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
206 blocking = found.find_next(["ul", "table"]).findAll("a")
207 logger.debug("Found %d blocked instance(s) ...", len(blocking))
209 logger.debug("tag[]='%s'", type(tag))
210 blocked = tidyup.domain(tag.contents[0]) if tag.contents[0] != "" else None
211 logger.debug("blocked='%s'", blocked)
213 if blocked is None or blocked == "":
214 logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0])
216 elif not domain_helper.is_wanted(blocked):
217 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
220 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
225 "block_level": "reject",
228 logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason)
229 instances.set_last_error(domain, response)
231 except network.exceptions as exception:
232 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
233 instances.set_last_error(domain, exception)
235 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
238 def fetch_instances(domain: str, origin: str) -> list:
239 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
240 domain_helper.raise_on(domain)
242 if blacklist.is_blacklisted(domain):
243 raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
248 # json endpoint for newer mastodongs
249 logger.debug("Fetching /instances from domain='%s'", domain)
250 response = network.fetch_response(
254 (config.get("connection_timeout"), config.get("read_timeout"))
257 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
258 if response.ok and response.status_code == 200 and response.text != "":
259 logger.debug("Parsing %s Bytes ...", len(response.text))
261 doc = bs4.BeautifulSoup(response.text, "html.parser")
262 logger.debug("doc[]='%s'", type(doc))
264 for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
265 logger.debug("criteria='%s'", criteria)
266 containers = doc.findAll("div", criteria)
268 logger.debug("Checking %d containers ...", len(containers))
269 for header in containers:
270 logger.debug("header[%s]='%s'", type(header), header)
272 rows = header.find_next(["ul","table"]).findAll("a")
273 logger.debug("Found %d instance(s) ...", len(rows))
275 logger.debug("tag[]='%s'", type(tag))
276 text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
277 logger.debug("text='%s' - BEFORE!", text)
279 peer = tidyup.domain(text) if text != "" else None
280 logger.debug("peer='%s' - AFTER", peer)
282 if peer is None or peer == "":
283 logger.warning("peer='%s' is empty, text='%s' - SKIPPED!", peer, text)
285 elif not domain_helper.is_wanted(peer):
286 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
289 logger.debug("peer='%s' already added - SKIPPED!", peer)
292 logger.debug("Appending peer='%s' ...", peer)
295 logger.debug("peers()=%d", len(peers))
297 logger.debug("Found no peers for domain='%s', trying script tag ...", domain)
298 peers = parse_script(doc)
300 logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason)
301 instances.set_last_error(domain, response)
303 logger.debug("Marking domain='%s' as successfully handled, peers()=%d ...", domain, len(peers))
304 instances.set_success(domain)
306 except network.exceptions as exception:
307 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
308 instances.set_last_error(domain, exception)
310 logger.debug("peers()=%d - EXIT!", len(peers))
313 def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
314 logger.debug("doc[]='%s',only='%s' - CALLED!")
316 if not isinstance(doc, bs4.BeautifulSoup):
317 raise ValueError(f"Parameter doc[]='{type(only)}' is not of type 'bs4.BeautifulSoup'")
318 elif not isinstance(only, str) and only is not None:
319 raise ValueError(f"Parameter only[]='{type(only)}' is not of type 'str'")
320 elif isinstance(only, str) and only == "":
321 raise ValueError("Parameter 'only' is empty")
323 scripts = doc.find_all("script")
326 logger.debug("scripts()=%d", len(scripts))
327 for script in scripts:
328 logger.debug("script[%s].contents()=%d", type(script), len(script.contents))
329 if len(script.contents) == 0:
330 logger.debug("script has no contents - SKIPPED!")
332 elif not script.contents[0].startswith("window.isoData"):
333 logger.debug("script.contents[0]='%s' does not start with window.isoData - SKIPPED!", script.contents[0])
336 logger.debug("script.contents[0][]='%s'", type(script.contents[0]))
338 iso_data = script.contents[0].split("=")[1].strip().replace(":undefined", ":\"undefined\"")
339 logger.debug("iso_data[%s]='%s'", type(iso_data), iso_data)
343 parsed = json.loads(iso_data)
344 except json.decoder.JSONDecodeError as exception:
345 logger.warning("Exception '%s' during parsing %d Bytes: '%s' - EXIT!", type(exception), len(iso_data), str(exception))
348 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
350 if "routeData" not in parsed:
351 logger.warning("parsed[%s]()=%d does not contain element 'routeData'", type(parsed), len(parsed))
353 elif "federatedInstancesResponse" not in parsed["routeData"]:
354 logger.warning("parsed[routeData][%s]()=%d does not contain element 'federatedInstancesResponse'", type(parsed["routeData"]), len(parsed["routeData"]))
356 elif "data" not in parsed["routeData"]["federatedInstancesResponse"]:
357 logger.warning("parsed[routeData][federatedInstancesResponse][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]), len(parsed["routeData"]["federatedInstancesResponse"]))
359 elif "federated_instances" not in parsed["routeData"]["federatedInstancesResponse"]["data"]:
360 logger.warning("parsed[routeData][federatedInstancesResponse][data][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]["data"]), len(parsed["routeData"]["federatedInstancesResponse"]["data"]))
363 data = parsed["routeData"]["federatedInstancesResponse"]["data"]["federated_instances"]
364 logger.debug("Checking %d data elements ...", len(data))
366 logger.debug("element='%s'", element)
367 if isinstance(only, str) and only != element:
368 logger.debug("Skipping unwanted element='%s',only='%s'", element, only)
371 logger.debug("Checking data[%s]()=%d row(s) ...", element, len(data[element]))
372 for row in data[element]:
373 logger.debug("row[]='%s'", type(row))
374 if "domain" not in row:
375 logger.warning("row()=%d has no element 'domain' - SKIPPED!", len(row))
378 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
379 peer = tidyup.domain(row["domain"])
380 logger.debug("peer='%s' - AFTER!", peer)
382 if peer is None or peer == "":
383 logger.warning("peer='%s' is empty, row[domain]='%s' - SKIPPED!", peer, row["domain"])
385 elif not domain_helper.is_wanted(peer):
386 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
388 logger.debug("peer='%s' already added - SKIPPED!", peer)
391 logger.debug("Appending peer='%s' ...", peer)
394 logger.debug("peers()=%d - EXIT!", len(peers))