1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
22 from fba.helpers import blacklist
23 from fba.helpers import config
24 from fba.helpers import domain as domain_helper
25 from fba.helpers import tidyup
27 from fba.http import csrf
28 from fba.http import federation
29 from fba.http import network
31 from fba.models import instances
33 logging.basicConfig(level=logging.INFO)
34 logger = logging.getLogger(__name__)
35 #logger.setLevel(logging.DEBUG)
39 "Blocked Instances".lower(),
40 "Instàncies bloquejades".lower(),
41 "Blocáilte Ásc".lower(),
43 "Blokované instance".lower(),
44 "Geblokkeerde instanties".lower(),
45 "Blockerade instanser".lower(),
46 "Instàncias blocadas".lower(),
47 "Istanze bloccate".lower(),
48 "Instances bloquées".lower(),
49 "Letiltott példányok".lower(),
50 "Instancias bloqueadas".lower(),
51 "Blokeatuta dauden instantziak".lower(),
53 "Peladen Yang Diblokir".lower(),
54 "Blokerede servere".lower(),
55 "Blokitaj nodoj".lower(),
56 "Блокирани Инстанции".lower(),
57 "Blockierte Instanzen".lower(),
58 "Estetyt instanssit".lower(),
59 "Instâncias bloqueadas".lower(),
60 "Zablokowane instancje".lower(),
61 "Blokované inštancie".lower(),
62 "المثلاء المحجوبون".lower(),
63 "Užblokuoti serveriai".lower(),
64 "ブロックしたインスタンス".lower(),
65 "Блокированные Инстансы".lower(),
66 "Αποκλεισμένοι διακομιστές".lower(),
68 "Instâncias bloqueadas".lower(),
71 def fetch_peers(domain: str, origin: str) -> list:
72 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
73 domain_helper.raise_on(domain)
75 if blacklist.is_blacklisted(domain):
76 raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
80 # No CSRF by default, you don't have to add network.api_headers by yourself here
84 logger.debug("Checking CSRF for domain='%s'", domain)
85 headers = csrf.determine(domain, dict())
86 except network.exceptions as exception:
87 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__)
88 instances.set_last_error(domain, exception)
90 logger.debug("Returning empty list ... - EXIT!")
94 logger.debug("Fetching '/api/v3/site' from domain='%s' ...", domain)
95 data = network.get_json_api(
99 (config.get("connection_timeout"), config.get("read_timeout"))
102 logger.debug("data[]='%s'", type(data))
103 if "error_message" in data:
104 logger.warning("Could not reach any JSON API: domain='%s',error_message='%s'", domain, data["error_message"])
105 instances.set_last_error(domain, data)
106 elif "federated_instances" in data["json"] and isinstance(data["json"]["federated_instances"], dict):
107 logger.debug("Found federated_instances for domain='%s'", domain)
108 peers = peers + federation.add_peers(data["json"]["federated_instances"])
109 logger.debug("peers()=%d after adding", len(peers))
111 logger.debug("Marking domain='%s' as successfully handled ...", domain)
112 instances.set_success(domain)
115 logger.debug("Fetching instances for domain='%s' from /instances ...", domain)
116 peers = fetch_instances(domain, origin)
117 logger.debug("peers()=%d after fetch_instances(%s, %s)", len(peers), domain, origin)
119 except network.exceptions as exception:
120 logger.warning("Exception during fetching JSON: domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
121 instances.set_last_error(domain, exception)
123 logger.debug("peers()=%d - EXIT!", len(peers))
126 def fetch_blocks(domain: str) -> list:
127 logger.debug("domain='%s - CALLED!", domain)
128 domain_helper.raise_on(domain)
130 if blacklist.is_blacklisted(domain):
131 raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
132 elif not instances.is_registered(domain):
133 raise Exception(f"domain='{domain}' is not registered but function is invoked.")
138 # json endpoint for newer mastodongs
139 logger.debug("Fetching /instances from domain='%s'", domain)
140 response = network.fetch_response(
144 (config.get("connection_timeout"), config.get("read_timeout"))
147 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
148 if response.ok and response.status_code == 200 and response.text != "":
149 logger.debug("Parsing %s Bytes ...", len(response.text))
150 doc = bs4.BeautifulSoup(response.text, "html.parser")
151 logger.debug("doc[]='%s'", type(doc))
154 for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
155 logger.debug("Trying to find criteria='%s' ...", criteria)
156 containers = doc.findAll("div", criteria)
158 logger.debug("Checking %d containers ...", len(containers))
159 for container in containers:
160 logger.debug("container[]='%s'", type(container))
161 for header in container.find_all(["h2", "h3", "h4", "h5"]):
163 logger.debug("header[%s]='%s' - BEFORE!", type(header), header)
164 if header is not None:
165 content = str(header.contents[0])
166 logger.debug("content[%s]='%s' - AFTER!", type(content), content)
168 if content in [None, ""]:
169 logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
171 elif not isinstance(content, str):
172 logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
174 elif content.lower() in translations:
175 logger.debug("Found header='%s' with blocked instances - BREAK(3) !", header)
179 logger.debug("found[]='%s'", type(found))
180 if found is not None:
181 logger.debug("Found header with blocked instances - BREAK(2) !")
184 logger.debug("found[]='%s'", type(found))
185 if found is not None:
186 logger.debug("Found header with blocked instances - BREAK(1) !")
189 logger.debug("found[]='%s'", type(found))
191 logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain)
192 peers = parse_script(doc, "blocked")
194 logger.debug("domain='%s' has %d peer(s).", domain, len(peers))
195 for blocked in peers:
196 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
201 "block_level": "reject",
204 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
207 blocking = found.find_next(["ul", "table"]).findAll("a")
208 logger.debug("Found %d blocked instance(s) ...", len(blocking))
210 logger.debug("tag[]='%s'", type(tag))
211 blocked = tidyup.domain(tag.contents[0]) if tag.contents[0] != "" else None
212 logger.debug("blocked='%s'", blocked)
214 if blocked in [None, ""]:
215 logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0])
217 elif not domain_helper.is_wanted(blocked):
218 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
221 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
226 "block_level": "reject",
229 logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason)
230 instances.set_last_error(domain, response)
232 except network.exceptions as exception:
233 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
234 instances.set_last_error(domain, exception)
236 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
239 def fetch_instances(domain: str, origin: str) -> list:
240 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
241 domain_helper.raise_on(domain)
243 if blacklist.is_blacklisted(domain):
244 raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
249 # json endpoint for newer mastodongs
250 logger.debug("Fetching /instances from domain='%s'", domain)
251 response = network.fetch_response(
255 (config.get("connection_timeout"), config.get("read_timeout"))
258 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
259 if response.ok and response.status_code == 200 and response.text != "":
260 logger.debug("Parsing %s Bytes ...", len(response.text))
262 doc = bs4.BeautifulSoup(response.text, "html.parser")
263 logger.debug("doc[]='%s'", type(doc))
265 for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
266 logger.debug("criteria='%s'", criteria)
267 containers = doc.findAll("div", criteria)
269 logger.debug("Checking %d containers ...", len(containers))
270 for header in containers:
271 logger.debug("header[%s]='%s'", type(header), header)
273 rows = header.find_next(["ul","table"]).findAll("a")
274 logger.debug("Found %d instance(s) ...", len(rows))
276 logger.debug("tag[]='%s'", type(tag))
277 text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
279 logger.debug("text='%s' - BEFORE!", text)
280 peer = tidyup.domain(text) if text != "" else None
281 logger.debug("peer='%s' - AFTER", peer)
283 if peer in [None, ""]:
284 logger.warning("peer='%s' is empty, text='%s' - SKIPPED!", peer, text)
286 elif not domain_helper.is_wanted(peer):
287 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
290 logger.debug("peer='%s' already added - SKIPPED!", peer)
293 logger.debug("Appending peer='%s' ...", peer)
296 logger.debug("peers()=%d", len(peers))
298 logger.debug("Found no peers for domain='%s', trying script tag ...", domain)
299 peers = parse_script(doc)
301 logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason)
302 instances.set_last_error(domain, response)
304 logger.debug("Marking domain='%s' as successfully handled, peers()=%d ...", domain, len(peers))
305 instances.set_success(domain)
307 except network.exceptions as exception:
308 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
309 instances.set_last_error(domain, exception)
311 logger.debug("peers()=%d - EXIT!", len(peers))
314 def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
315 logger.debug("doc[]='%s',only='%s' - CALLED!")
317 if not isinstance(doc, bs4.BeautifulSoup):
318 raise ValueError(f"Parameter doc[]='{type(only)}' is not of type 'bs4.BeautifulSoup'")
319 elif not isinstance(only, str) and only is not None:
320 raise ValueError(f"Parameter only[]='{type(only)}' is not of type 'str'")
321 elif isinstance(only, str) and only == "":
322 raise ValueError("Parameter 'only' is empty")
324 scripts = doc.find_all("script")
327 logger.debug("scripts()=%d", len(scripts))
328 for script in scripts:
329 logger.debug("script[%s].contents()=%d", type(script), len(script.contents))
330 if len(script.contents) == 0:
331 logger.debug("script has no contents - SKIPPED!")
333 elif not script.contents[0].startswith("window.isoData"):
334 logger.debug("script.contents[0]='%s' does not start with window.isoData - SKIPPED!", script.contents[0])
337 logger.debug("script.contents[0][]='%s'", type(script.contents[0]))
339 iso_data = script.contents[0].split("=")[1].strip().replace(":undefined", ":\"undefined\"")
340 logger.debug("iso_data[%s]='%s'", type(iso_data), iso_data)
344 parsed = json.loads(iso_data)
345 except json.decoder.JSONDecodeError as exception:
346 logger.warning("Exception '%s' during parsing %d Bytes: '%s' - EXIT!", type(exception), len(iso_data), str(exception))
349 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
351 if "routeData" not in parsed:
352 logger.warning("parsed[%s]()=%d does not contain element 'routeData'", type(parsed), len(parsed))
354 elif "federatedInstancesResponse" not in parsed["routeData"]:
355 logger.warning("parsed[routeData][%s]()=%d does not contain element 'federatedInstancesResponse'", type(parsed["routeData"]), len(parsed["routeData"]))
357 elif "data" not in parsed["routeData"]["federatedInstancesResponse"]:
358 logger.warning("parsed[routeData][federatedInstancesResponse][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]), len(parsed["routeData"]["federatedInstancesResponse"]))
360 elif "federated_instances" not in parsed["routeData"]["federatedInstancesResponse"]["data"]:
361 logger.warning("parsed[routeData][federatedInstancesResponse][data][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]["data"]), len(parsed["routeData"]["federatedInstancesResponse"]["data"]))
364 data = parsed["routeData"]["federatedInstancesResponse"]["data"]["federated_instances"]
365 logger.debug("Checking %d data elements ...", len(data))
367 logger.debug("element='%s'", element)
368 if isinstance(only, str) and only != element:
369 logger.debug("Skipping unwanted element='%s',only='%s'", element, only)
372 logger.debug("Checking data[%s]()=%d row(s) ...", element, len(data[element]))
373 for row in data[element]:
374 logger.debug("row[]='%s'", type(row))
375 if "domain" not in row:
376 logger.warning("row()=%d has no element 'domain' - SKIPPED!", len(row))
379 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
380 peer = tidyup.domain(row["domain"])
381 logger.debug("peer='%s' - AFTER!", peer)
383 if peer in [None, ""]:
384 logger.warning("peer='%s' is empty, row[domain]='%s' - SKIPPED!", peer, row["domain"])
386 elif not domain_helper.is_wanted(peer):
387 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
389 logger.debug("peer='%s' already added - SKIPPED!", peer)
392 logger.debug("Appending peer='%s' ...", peer)
395 logger.debug("peers()=%d - EXIT!", len(peers))