1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
22 from fba.helpers import blacklist
23 from fba.helpers import config
24 from fba.helpers import domain as domain_helper
25 from fba.helpers import tidyup
27 from fba.http import csrf
28 from fba.http import federation
29 from fba.http import network
31 from fba.models import instances
33 logging.basicConfig(level=logging.INFO)
34 logger = logging.getLogger(__name__)
35 #logger.setLevel(logging.DEBUG)
39 "Blocked Instances".lower(),
40 "Instàncies bloquejades".lower(),
41 "Blocáilte Ásc".lower(),
43 "Blokované instance".lower(),
44 "Geblokkeerde instanties".lower(),
45 "Blockerade instanser".lower(),
46 "Instàncias blocadas".lower(),
47 "Istanze bloccate".lower(),
48 "Instances bloquées".lower(),
49 "Letiltott példányok".lower(),
50 "Instancias bloqueadas".lower(),
51 "Blokeatuta dauden instantziak".lower(),
53 "Peladen Yang Diblokir".lower(),
54 "Blokerede servere".lower(),
55 "Blokitaj nodoj".lower(),
56 "Блокирани Инстанции".lower(),
57 "Blockierte Instanzen".lower(),
58 "Estetyt instanssit".lower(),
59 "Instâncias bloqueadas".lower(),
60 "Zablokowane instancje".lower(),
61 "Blokované inštancie".lower(),
62 "المثلاء المحجوبون".lower(),
63 "Užblokuoti serveriai".lower(),
64 "ブロックしたインスタンス".lower(),
65 "Блокированные Инстансы".lower(),
66 "Αποκλεισμένοι διακομιστές".lower(),
68 "Instâncias bloqueadas".lower(),
71 def fetch_peers(domain: str, origin: str) -> list:
72 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
73 domain_helper.raise_on(domain)
75 if blacklist.is_blacklisted(domain):
76 raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
77 elif not instances.is_registered(domain):
78 raise Exception(f"domain='{domain}' is not registered but function is invoked.")
82 # No CSRF by default, you don't have to add network.api_headers by yourself here
86 logger.debug("Checking CSRF for domain='%s'", domain)
87 headers = csrf.determine(domain, dict())
88 except network.exceptions as exception:
89 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__)
90 instances.set_last_error(domain, exception)
92 logger.debug("Returning empty list ... - EXIT!")
96 logger.debug("Fetching '/api/v3/site' from domain='%s' ...", domain)
97 data = network.get_json_api(
101 (config.get("connection_timeout"), config.get("read_timeout"))
104 logger.debug("data[]='%s'", type(data))
105 if "error_message" in data:
106 logger.warning("Could not reach any JSON API: domain='%s',error_message='%s'", domain, data["error_message"])
107 instances.set_last_error(domain, data)
108 elif "federated_instances" in data["json"] and isinstance(data["json"]["federated_instances"], dict):
109 logger.debug("Found federated_instances for domain='%s'", domain)
110 peers = peers + federation.add_peers(data["json"]["federated_instances"])
111 logger.debug("peers()=%d after adding", len(peers))
113 logger.debug("Marking domain='%s' as successfully handled ...", domain)
114 instances.set_success(domain)
117 logger.debug("Fetching instances for domain='%s' from /instances ...", domain)
118 peers = fetch_instances(domain, origin)
119 logger.debug("peers()=%d after fetch_instances(%s, %s)", len(peers), domain, origin)
121 except network.exceptions as exception:
122 logger.warning("Exception during fetching JSON: domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
123 instances.set_last_error(domain, exception)
125 logger.debug("peers()=%d - EXIT!", len(peers))
128 def fetch_blocks(domain: str) -> list:
129 logger.debug("domain='%s - CALLED!", domain)
130 domain_helper.raise_on(domain)
132 if blacklist.is_blacklisted(domain):
133 raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
134 elif not instances.is_registered(domain):
135 raise Exception(f"domain='{domain}' is not registered but function is invoked.")
140 # json endpoint for newer mastodongs
141 logger.debug("Fetching /instances from domain='%s'", domain)
142 response = network.fetch_response(
146 (config.get("connection_timeout"), config.get("read_timeout"))
149 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
150 if response.ok and response.status_code == 200 and response.text != "":
151 logger.debug("Parsing %s Bytes ...", len(response.text))
152 doc = bs4.BeautifulSoup(response.text, "html.parser")
153 logger.debug("doc[]='%s'", type(doc))
156 for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
157 logger.debug("Trying to find criteria='%s' ...", criteria)
158 containers = doc.findAll("div", criteria)
160 logger.debug("Checking %d containers ...", len(containers))
161 for container in containers:
162 logger.debug("container[]='%s'", type(container))
163 for header in container.find_all(["h2", "h3", "h4", "h5"]):
165 logger.debug("header[%s]='%s' - BEFORE!", type(header), header)
166 if header is not None:
167 content = str(header.contents[0])
168 logger.debug("content[%s]='%s' - AFTER!", type(content), content)
170 if content in [None, ""]:
171 logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
173 elif not isinstance(content, str):
174 logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
176 elif content.lower() in translations:
177 logger.debug("Found header='%s' with blocked instances - BREAK(3) !", header)
181 logger.debug("found[]='%s'", type(found))
182 if found is not None:
183 logger.debug("Found header with blocked instances - BREAK(2) !")
186 logger.debug("found[]='%s'", type(found))
187 if found is not None:
188 logger.debug("Found header with blocked instances - BREAK(1) !")
191 logger.debug("found[]='%s'", type(found))
193 logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain)
194 peers = parse_script(doc, "blocked")
196 logger.debug("domain='%s' has %d peer(s).", domain, len(peers))
197 for blocked in peers:
198 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
203 "block_level": "reject",
206 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
209 blocking = found.find_next(["ul", "table"]).findAll("a")
210 logger.debug("Found %d blocked instance(s) ...", len(blocking))
212 logger.debug("tag[]='%s'", type(tag))
213 blocked = tidyup.domain(tag.contents[0]) if tag.contents[0] != "" else None
214 logger.debug("blocked='%s'", blocked)
216 if blocked in [None, ""]:
217 logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0])
219 elif not domain_helper.is_wanted(blocked):
220 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
223 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
228 "block_level": "reject",
231 logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason)
232 instances.set_last_error(domain, response)
234 except network.exceptions as exception:
235 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
236 instances.set_last_error(domain, exception)
238 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
241 def fetch_instances(domain: str, origin: str) -> list:
242 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
243 domain_helper.raise_on(domain)
245 if blacklist.is_blacklisted(domain):
246 raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
251 # json endpoint for newer mastodongs
252 logger.debug("Fetching /instances from domain='%s'", domain)
253 response = network.fetch_response(
257 (config.get("connection_timeout"), config.get("read_timeout"))
260 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
261 if response.ok and response.status_code == 200 and response.text != "":
262 logger.debug("Parsing %s Bytes ...", len(response.text))
264 doc = bs4.BeautifulSoup(response.text, "html.parser")
265 logger.debug("doc[]='%s'", type(doc))
267 for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
268 logger.debug("criteria='%s'", criteria)
269 containers = doc.findAll("div", criteria)
271 logger.debug("Checking %d containers ...", len(containers))
272 for header in containers:
273 logger.debug("header[%s]='%s'", type(header), header)
275 rows = header.find_next(["ul","table"]).findAll("a")
276 logger.debug("Found %d instance(s) ...", len(rows))
278 logger.debug("tag[]='%s'", type(tag))
279 text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
281 logger.debug("text='%s' - BEFORE!", text)
282 peer = tidyup.domain(text) if text != "" else None
283 logger.debug("peer='%s' - AFTER", peer)
285 if peer in [None, ""]:
286 logger.warning("peer='%s' is empty, text='%s' - SKIPPED!", peer, text)
288 elif not domain_helper.is_wanted(peer):
289 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
292 logger.debug("peer='%s' already added - SKIPPED!", peer)
295 logger.debug("Appending peer='%s' ...", peer)
298 logger.debug("peers()=%d", len(peers))
300 logger.debug("Found no peers for domain='%s', trying script tag ...", domain)
301 peers = parse_script(doc)
303 logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason)
304 instances.set_last_error(domain, response)
306 logger.debug("Marking domain='%s' as successfully handled, peers()=%d ...", domain, len(peers))
307 instances.set_success(domain)
309 except network.exceptions as exception:
310 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
311 instances.set_last_error(domain, exception)
313 logger.debug("peers()=%d - EXIT!", len(peers))
316 def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
317 logger.debug("doc[]='%s',only='%s' - CALLED!")
319 if not isinstance(doc, bs4.BeautifulSoup):
320 raise ValueError(f"Parameter doc[]='{type(only)}' is not of type 'bs4.BeautifulSoup'")
321 elif not isinstance(only, str) and only is not None:
322 raise ValueError(f"Parameter only[]='{type(only)}' is not of type 'str'")
323 elif isinstance(only, str) and only == "":
324 raise ValueError("Parameter 'only' is empty")
326 scripts = doc.find_all("script")
329 logger.debug("scripts()=%d", len(scripts))
330 for script in scripts:
331 logger.debug("script[%s].contents()=%d", type(script), len(script.contents))
332 if len(script.contents) == 0:
333 logger.debug("script has no contents - SKIPPED!")
335 elif not script.contents[0].startswith("window.isoData"):
336 logger.debug("script.contents[0]='%s' does not start with window.isoData - SKIPPED!", script.contents[0])
339 logger.debug("script.contents[0][]='%s'", type(script.contents[0]))
341 iso_data = script.contents[0].split("=")[1].strip().replace(":undefined", ":\"undefined\"")
342 logger.debug("iso_data[%s]='%s'", type(iso_data), iso_data)
346 parsed = json.loads(iso_data)
347 except json.decoder.JSONDecodeError as exception:
348 logger.warning("Exception '%s' during parsing %d Bytes: '%s' - EXIT!", type(exception), len(iso_data), str(exception))
351 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
353 if "routeData" not in parsed:
354 logger.warning("parsed[%s]()=%d does not contain element 'routeData'", type(parsed), len(parsed))
356 elif "federatedInstancesResponse" not in parsed["routeData"]:
357 logger.warning("parsed[routeData][%s]()=%d does not contain element 'federatedInstancesResponse'", type(parsed["routeData"]), len(parsed["routeData"]))
359 elif "data" not in parsed["routeData"]["federatedInstancesResponse"]:
360 logger.warning("parsed[routeData][federatedInstancesResponse][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]), len(parsed["routeData"]["federatedInstancesResponse"]))
362 elif "federated_instances" not in parsed["routeData"]["federatedInstancesResponse"]["data"]:
363 logger.warning("parsed[routeData][federatedInstancesResponse][data][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]["data"]), len(parsed["routeData"]["federatedInstancesResponse"]["data"]))
366 data = parsed["routeData"]["federatedInstancesResponse"]["data"]["federated_instances"]
367 logger.debug("Checking %d data elements ...", len(data))
369 logger.debug("element='%s'", element)
370 if isinstance(only, str) and only != element:
371 logger.debug("Skipping unwanted element='%s',only='%s'", element, only)
374 logger.debug("Checking data[%s]()=%d row(s) ...", element, len(data[element]))
375 for row in data[element]:
376 logger.debug("row[]='%s'", type(row))
377 if "domain" not in row:
378 logger.warning("row()=%d has no element 'domain' - SKIPPED!", len(row))
381 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
382 peer = tidyup.domain(row["domain"])
383 logger.debug("peer='%s' - AFTER!", peer)
385 if peer in [None, ""]:
386 logger.warning("peer='%s' is empty, row[domain]='%s' - SKIPPED!", peer, row["domain"])
388 elif not domain_helper.is_wanted(peer):
389 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
391 logger.debug("peer='%s' already added - SKIPPED!", peer)
394 logger.debug("Appending peer='%s' ...", peer)
397 logger.debug("peers()=%d - EXIT!", len(peers))