1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
22 from fba.helpers import config
23 from fba.helpers import domain as domain_helper
24 from fba.helpers import tidyup
26 from fba.http import csrf
27 from fba.http import federation
28 from fba.http import network
30 from fba.models import instances
32 logging.basicConfig(level=logging.INFO)
33 logger = logging.getLogger(__name__)
34 #logger.setLevel(logging.DEBUG)
36 def fetch_peers(domain: str, origin: str) -> list:
37 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
38 domain_helper.raise_on(domain)
42 # No CSRF by default, you don't have to add network.api_headers by yourself here
46 logger.debug("Checking CSRF for domain='%s'", domain)
47 headers = csrf.determine(domain, dict())
48 except network.exceptions as exception:
49 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__)
50 instances.set_last_error(domain, exception)
52 logger.debug("Returning empty list ... - EXIT!")
56 logger.debug("Fetching '/api/v3/site' from domain='%s' ...", domain)
57 data = network.get_json_api(
61 (config.get("connection_timeout"), config.get("read_timeout"))
64 logger.debug("data[]='%s'", type(data))
65 if "error_message" in data:
66 logger.warning("Could not reach any JSON API: domain='%s'", domain)
67 instances.set_last_error(domain, data)
68 elif "federated_instances" in data["json"] and isinstance(data["json"]["federated_instances"], dict):
69 logger.debug("Found federated_instances for domain='%s'", domain)
70 peers = peers + federation.add_peers(data["json"]["federated_instances"])
72 logger.debug("Marking domain='%s' as successfully handled ...", domain)
73 instances.set_success(domain)
76 logger.warning("Fetching instances for domain='%s' from /instances ...", domain)
77 peers = fetch_instances(domain, origin)
79 except network.exceptions as exception:
80 logger.warning("Exception during fetching JSON: domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
81 instances.set_last_error(domain, exception)
83 logger.debug("peers()=%d - EXIT!", len(peers))
86 def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
87 logger.debug("domain='%s,nodeinfo_url='%s' - CALLED!", domain, nodeinfo_url)
88 domain_helper.raise_on(domain)
90 if not isinstance(nodeinfo_url, str):
91 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not of type 'str'")
92 elif nodeinfo_url == "":
93 raise ValueError("Parameter 'nodeinfo_url' is empty")
96 "Blocked Instances".lower(),
97 "Instàncies bloquejades".lower(),
98 "Blocáilte Ásc".lower(),
100 "Blokované instance".lower(),
101 "Geblokkeerde instanties".lower(),
102 "Blockerade instanser".lower(),
103 "Instàncias blocadas".lower(),
104 "Istanze bloccate".lower(),
105 "Instances bloquées".lower(),
106 "Letiltott példányok".lower(),
107 "Instancias bloqueadas".lower(),
108 "Blokeatuta dauden instantziak".lower(),
110 "Peladen Yang Diblokir".lower(),
111 "Blokerede servere".lower(),
112 "Blokitaj nodoj".lower(),
113 "Блокирани Инстанции".lower(),
114 "Blockierte Instanzen".lower(),
115 "Estetyt instanssit".lower(),
116 "Instâncias bloqueadas".lower(),
117 "Zablokowane instancje".lower(),
118 "Blokované inštancie".lower(),
119 "المثلاء المحجوبون".lower(),
120 "Užblokuoti serveriai".lower(),
121 "ブロックしたインスタンス".lower(),
122 "Блокированные Инстансы".lower(),
123 "Αποκλεισμένοι διακομιστές".lower(),
125 "Instâncias bloqueadas".lower(),
131 # json endpoint for newer mastodongs
132 logger.debug("Fetching /instances from domain='%s'", domain)
133 response = network.fetch_response(
137 (config.get("connection_timeout"), config.get("read_timeout"))
140 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
141 if response.ok and response.status_code == 200 and response.text != "":
142 logger.debug("Parsing %s Bytes ...", len(response.text))
144 doc = bs4.BeautifulSoup(response.text, "html.parser")
145 logger.debug("doc[]='%s'", type(doc))
148 for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
149 logger.debug("criteria='%s'", criteria)
150 containers = doc.findAll("div", criteria)
152 logger.debug("Checking %d containers ...", len(containers))
153 for container in containers:
154 logger.debug("container[]='%s'", type(container))
155 for header in container.find_all(["h2", "h3", "h4", "h5"]):
157 logger.debug("header[%s]='%s' - BEFORE!", type(header), header)
158 if header is not None:
159 content = str(header.contents[0])
160 logger.debug("content[%s]='%s' - AFTER!", type(content), content)
163 logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
165 elif not isinstance(content, str):
166 logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
168 elif content.lower() in translations:
169 logger.debug("Found header='%s' with blocked instances - BREAK(3) !", header)
173 logger.debug("found[]='%s'", type(found))
174 if found is not None:
175 logger.debug("Found header with blocked instances - BREAK(2) !")
178 logger.debug("found[]='%s'", type(found))
179 if found is not None:
180 logger.debug("Found header with blocked instances - BREAK(1) !")
183 logger.debug("found[]='%s'", type(found))
185 logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain)
186 peers = parse_script(doc, "blocked")
188 logger.debug("domain='%s' has %d peer(s).", domain, len(peers))
189 for blocked in peers:
190 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
195 "block_level": "reject",
198 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
201 blocking = found.find_next(["ul", "table"]).findAll("a")
202 logger.debug("Found %d blocked instance(s) ...", len(blocking))
204 logger.debug("tag[]='%s'", type(tag))
205 blocked = tidyup.domain(tag.contents[0])
206 logger.debug("blocked='%s'", blocked)
209 logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0])
212 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
217 "block_level": "reject",
220 logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason)
221 instances.set_last_error(domain, response)
223 except network.exceptions as exception:
224 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
225 instances.set_last_error(domain, exception)
227 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
230 def fetch_instances(domain: str, origin: str) -> list:
231 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
232 domain_helper.raise_on(domain)
237 # json endpoint for newer mastodongs
238 logger.debug("Fetching /instances from domain='%s'", domain)
239 response = network.fetch_response(
243 (config.get("connection_timeout"), config.get("read_timeout"))
246 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
247 if response.ok and response.status_code == 200 and response.text != "":
248 logger.debug("Parsing %s Bytes ...", len(response.text))
250 doc = bs4.BeautifulSoup(response.text, "html.parser")
251 logger.debug("doc[]='%s'", type(doc))
253 for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
254 logger.debug("criteria='%s'", criteria)
255 containers = doc.findAll("div", criteria)
257 logger.debug("Checking %d containers ...", len(containers))
258 for header in containers:
259 logger.debug("header[%s]='%s'", type(header), header)
261 rows = header.find_next(["ul","table"]).findAll("a")
262 logger.debug("Found %d instance(s) ...", len(rows))
264 logger.debug("tag[]='%s'", type(tag))
265 text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
266 peer = tidyup.domain(text)
267 logger.debug("peer='%s'", peer)
270 logger.debug("peer is empty - SKIPPED!")
273 logger.debug("peer='%s' already added - SKIPPED!", peer)
276 logger.debug("Appending peer='%s' ...", peer)
279 logger.debug("peers()=%d", len(peers))
281 logger.debug("Found no peers for domain='%s', trying script tag ...", domain)
282 peers = parse_script(doc)
284 logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason)
285 instances.set_last_error(domain, response)
287 logger.debug("Marking domain='%s' as successfully handled, peers()=%d ...", domain, len(peers))
288 instances.set_success(domain)
290 except network.exceptions as exception:
291 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
292 instances.set_last_error(domain, exception)
294 logger.debug("peers()=%d - EXIT!", len(peers))
297 def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
298 logger.debug("doc[]='%s',only='%s' - CALLED!")
299 if not isinstance(doc, bs4.BeautifulSoup):
300 raise ValueError(f"Parameter doc[]='{type(only)}' is not of type 'bs4.BeautifulSoup'")
301 elif not isinstance(only, str) and only is not None:
302 raise ValueError(f"Parameter only[]='{type(only)}' is not of type 'str'")
303 elif isinstance(only, str) and only == "":
304 raise ValueError("Parameter 'only' is empty")
306 scripts = doc.find_all("script")
309 logger.debug("scripts()=%d", len(scripts))
310 for script in scripts:
311 logger.debug("script[%s].contents()=%d", type(script), len(script.contents))
312 if len(script.contents) == 0:
313 logger.debug("script has no contents - SKIPPED!")
315 elif not script.contents[0].startswith("window.isoData"):
316 logger.debug("script.contents[0]='%s' does not start with window.isoData - SKIPPED!", script.contents[0])
319 logger.debug("script.contents[0][]='%s'", type(script.contents[0]))
321 iso_data = script.contents[0].split("=")[1].strip().replace(":undefined", ":\"undefined\"")
322 logger.debug("iso_data[%s]='%s'", type(iso_data), iso_data)
326 parsed = json.loads(iso_data)
327 except json.decoder.JSONDecodeError as exception:
328 logger.warning("Exception '%s' during parsing %d Bytes: '%s' - EXIT!", type(exception), len(iso_data), str(exception))
331 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
333 if "routeData" not in parsed:
334 logger.warning("parsed[%s]()=%d does not contain element 'routeData'", type(parsed), len(parsed))
336 elif "federatedInstancesResponse" not in parsed["routeData"]:
337 logger.warning("parsed[routeData][%s]()=%d does not contain element 'federatedInstancesResponse'", type(parsed["routeData"]), len(parsed["routeData"]))
339 elif "data" not in parsed["routeData"]["federatedInstancesResponse"]:
340 logger.warning("parsed[routeData][federatedInstancesResponse][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]), len(parsed["routeData"]["federatedInstancesResponse"]))
342 elif "federated_instances" not in parsed["routeData"]["federatedInstancesResponse"]["data"]:
343 logger.warning("parsed[routeData][federatedInstancesResponse][data][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]["data"]), len(parsed["routeData"]["federatedInstancesResponse"]["data"]))
346 data = parsed["routeData"]["federatedInstancesResponse"]["data"]["federated_instances"]
347 logger.debug("Checking %d data elements ...", len(data))
349 logger.debug("element='%s'", element)
350 if isinstance(only, str) and only != element:
351 logger.debug("Skipping unwanted element='%s',only='%s'", element, only)
354 logger.debug("Checking data[%s]()=%d row(s) ...", element, len(data[element]))
355 for row in data[element]:
356 logger.debug("row[]='%s'", type(row))
357 if "domain" not in row:
358 logger.warning("row()=%d has no element 'domain' - SKIPPED!", len(row))
361 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
362 peer = tidyup.domain(row["domain"])
363 logger.debug("peer='%s' - AFTER!", peer)
366 logger.debug("peer is empty - SKIPPED!")
369 logger.debug("peer='%s' already added - SKIPPED!", peer)
372 logger.debug("Appending peer='%s' ...", peer)
375 logger.debug("peers()=%d - EXIT!", len(peers))