1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
25 from fba.helpers import config
26 from fba.helpers import domain as domain_helper
27 from fba.helpers import tidyup
29 from fba.http import federation
30 from fba.http import network
32 from fba.models import instances
34 logging.basicConfig(level=logging.INFO)
35 logger = logging.getLogger(__name__)
36 #logger.setLevel(logging.DEBUG)
38 def fetch_peers(domain: str, origin: str) -> list:
39 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
40 domain_helper.raise_on(domain)
44 # No CSRF by default, you don't have to add network.api_headers by yourself here
48 logger.debug("Checking CSRF for domain='%s'", domain)
49 headers = csrf.determine(domain, dict())
50 except network.exceptions as exception:
51 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
52 instances.set_last_error(domain, exception)
56 logger.debug("Fetching '/api/v3/site' from domain='%s' ...", domain)
57 data = network.get_json_api(
61 (config.get("connection_timeout"), config.get("read_timeout"))
64 logger.debug("data[]='%s'", type(data))
65 if "error_message" in data:
66 logger.warning("Could not reach any JSON API: domain='%s'", domain)
67 instances.set_last_error(domain, data)
68 elif "federated_instances" in data["json"] and isinstance(data["json"]["federated_instances"], dict):
69 logger.debug("Found federated_instances for domain='%s'", domain)
70 peers = peers + federation.add_peers(data["json"]["federated_instances"])
72 logger.debug("Marking domain='%s' as successfully handled ...", domain)
73 instances.set_success(domain)
76 logger.warning("Fetching instances for domain='%s' from /instances ...", domain)
77 peers = fetch_instances(domain, origin)
79 except network.exceptions as exception:
80 logger.warning("Exception during fetching JSON: domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
81 instances.set_last_error(domain, exception)
83 logger.debug("peers()=%d - EXIT!", len(peers))
86 def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
87 logger.debug("domain='%s,nodeinfo_url='%s' - CALLED!", domain, nodeinfo_url)
88 domain_helper.raise_on(domain)
90 if not isinstance(nodeinfo_url, str):
91 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not of type 'str'")
92 elif nodeinfo_url == "":
93 raise ValueError("Parameter 'nodeinfo_url' is empty")
96 "Blocked Instances".lower(),
97 "Instàncies bloquejades".lower(),
98 "Blocáilte Ásc".lower(),
100 "Blokované instance".lower(),
101 "Geblokkeerde instanties".lower(),
102 "Blockerade instanser".lower(),
103 "Instàncias blocadas".lower(),
104 "Istanze bloccate".lower(),
105 "Instances bloquées".lower(),
106 "Letiltott példányok".lower(),
107 "Instancias bloqueadas".lower(),
108 "Blokeatuta dauden instantziak".lower(),
110 "Peladen Yang Diblokir".lower(),
111 "Blokerede servere".lower(),
112 "Blokitaj nodoj".lower(),
113 "Блокирани Инстанции".lower(),
114 "Blockierte Instanzen".lower(),
115 "Estetyt instanssit".lower(),
116 "Instâncias bloqueadas".lower(),
117 "Zablokowane instancje".lower(),
118 "Blokované inštancie".lower(),
119 "المثلاء المحجوبون".lower(),
120 "Užblokuoti serveriai".lower(),
121 "ブロックしたインスタンス".lower(),
122 "Блокированные Инстансы".lower(),
123 "Αποκλεισμένοι διακομιστές".lower(),
125 "Instâncias bloqueadas".lower(),
131 # json endpoint for newer mastodongs
132 logger.debug("Fetching /instances from domain='%s'", domain)
133 response = network.fetch_response(
137 (config.get("connection_timeout"), config.get("read_timeout"))
140 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
141 if response.ok and response.status_code < 300 and response.text != "":
142 logger.debug("Parsing %s Bytes ...", len(response.text))
144 doc = bs4.BeautifulSoup(response.text, "html.parser")
145 logger.debug("doc[]='%s'", type(doc))
148 for container in [{"class": "home-instances container-lg"}, {"class": "container"}]:
149 logger.debug("container='%s'", container)
150 headers = doc.findAll("div", container)
152 logger.debug("Checking %d header(s) ...", len(headers))
153 for header in headers:
154 logger.debug("header[]='%s'", type(header))
155 content = header.find(["h2", "h3", "h4", "h5"])
157 logger.debug("content[%s]='%s' - BEFORE!", type(content), content)
158 if content is not None:
159 content = content.contents[0]
160 logger.debug("content[%s]='%s' - AFTER!", type(content), content)
163 logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
165 elif not isinstance(content, str):
166 logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
168 elif content.lower() in translations:
169 logger.debug("Found header with blocked instances - BREAK!")
173 logger.debug("found[]='%s'", type(found))
175 logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain)
176 peers = parse_script(doc, "blocked")
178 logger.debug("domain='%s' has %d peer(s).", domain, len(peers))
179 for blocked in peers:
180 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
185 "block_level": "reject",
188 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
191 blocking = found.find_next(["ul","table"]).findAll("a")
192 logger.debug("Found %d blocked instance(s) ...", len(blocking))
194 logger.debug("tag[]='%s'", type(tag))
195 blocked = tidyup.domain(tag.contents[0])
196 logger.debug("blocked='%s'", blocked)
199 logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0])
201 elif not utils.is_domain_wanted(blocked):
202 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
205 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
210 "block_level": "reject",
213 except network.exceptions as exception:
214 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
215 instances.set_last_error(domain, exception)
217 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
220 def fetch_instances(domain: str, origin: str) -> list:
221 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
222 domain_helper.raise_on(domain)
227 # json endpoint for newer mastodongs
228 logger.debug("Fetching /instances from domain='%s'", domain)
229 response = network.fetch_response(
233 (config.get("connection_timeout"), config.get("read_timeout"))
236 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
237 if response.ok and response.status_code < 300 and response.text != "":
238 logger.debug("Parsing %s Bytes ...", len(response.text))
240 doc = bs4.BeautifulSoup(response.text, "html.parser")
241 logger.debug("doc[]='%s'", type(doc))
243 for container in [{"class": "home-instances container-lg"}, {"class": "container"}]:
244 logger.debug("container='%s'", container)
245 headers = doc.findAll("div", container)
247 logger.debug("Checking %d headers ...", len(headers))
248 for header in headers:
249 logger.debug("header[%s]='%s'", type(header), header)
251 rows = header.find_next(["ul","table"]).findAll("a")
252 logger.debug("Found %d blocked instance(s) ...", len(rows))
254 logger.debug("tag[]='%s'", type(tag))
255 text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
256 peer = tidyup.domain(text)
257 logger.debug("peer='%s'", peer)
260 logger.debug("peer is empty - SKIPPED!")
262 elif not utils.is_domain_wanted(peer):
263 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
266 logger.debug("peer='%s' already added - SKIPPED!", peer)
269 logger.debug("Appending peer='%s' ...", peer)
272 logger.debug("peers()=%d", len(peers))
274 logger.debug("Found no peers for domain='%s', trying script tag ...", domain)
275 peers = parse_script(doc)
277 logger.debug("Marking domain='%s' as successfully handled, peers()=%d ...", domain, len(peers))
278 instances.set_success(domain)
280 except network.exceptions as exception:
281 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
282 instances.set_last_error(domain, exception)
284 logger.debug("peers()=%d - EXIT!", len(peers))
287 def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
288 logger.debug("doc[]='%s',only='%s' - CALLED!")
289 if not isinstance(doc, bs4.BeautifulSoup):
290 raise ValueError(f"Parameter doc[]='{type(only)}' is not of type 'bs4.BeautifulSoup'")
291 elif not isinstance(only, str) and only is not None:
292 raise ValueError(f"Parameter only[]='{type(only)}' is not of type 'str'")
293 elif isinstance(only, str) and only == "":
294 raise ValueError("Parameter 'only' is empty")
296 scripts = doc.find_all("script")
299 logger.debug("scripts()=%d", len(scripts))
300 for script in scripts:
301 logger.debug("script[%s].contents()=%d", type(script), len(script.contents))
302 if len(script.contents) == 0:
303 logger.debug("script has no contents - SKIPPED!")
305 elif not script.contents[0].startswith("window.isoData"):
306 logger.debug("script.contents[0]='%s' does not start with window.isoData - SKIPPED!", script.contents[0])
309 logger.debug("script.contents[0][]='%s'", type(script.contents[0]))
311 iso_data = script.contents[0].split("=")[1].strip().replace(":undefined", ":\"undefined\"")
312 logger.debug("iso_data[%s]='%s'", type(iso_data), iso_data)
316 parsed = json.loads(iso_data)
317 except json.decoder.JSONDecodeError as exception:
318 logger.warning("Exception '%s' during parsing %d Bytes: '%s'", type(exception), len(iso_data), str(exception))
321 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
323 if "routeData" not in parsed:
324 logger.warning("parsed[%s]()=%d does not contain element 'routeData'", type(parsed), len(parsed))
326 elif "federatedInstancesResponse" not in parsed["routeData"]:
327 logger.warning("parsed[routeData][%s]()=%d does not contain element 'federatedInstancesResponse'", type(parsed["routeData"]), len(parsed["routeData"]))
329 elif "data" not in parsed["routeData"]["federatedInstancesResponse"]:
330 logger.warning("parsed[routeData][federatedInstancesResponse][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]), len(parsed["routeData"]["federatedInstancesResponse"]))
332 elif "federated_instances" not in parsed["routeData"]["federatedInstancesResponse"]["data"]:
333 logger.warning("parsed[routeData][federatedInstancesResponse][data][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]["data"]), len(parsed["routeData"]["federatedInstancesResponse"]["data"]))
336 data = parsed["routeData"]["federatedInstancesResponse"]["data"]["federated_instances"]
337 logger.debug("Checking %d data elements ...", len(data))
339 logger.debug("element='%s'", element)
340 if isinstance(only, str) and only != element:
341 logger.debug("Skipping unwanted element='%s',only='%s'", element, only)
344 logger.debug("Checking data[%s]()=%d row(s) ...", element, len(data[element]))
345 for row in data[element]:
346 logger.debug("row[]='%s'", type(row))
347 if "domain" not in row:
348 logger.warning("row()=%d has no element 'domain' - SKIPPED!", len(row))
351 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
352 peer = tidyup.domain(row["domain"])
353 logger.debug("peer='%s' - AFTER!", peer)
356 logger.debug("peer is empty - SKIPPED!")
358 elif not utils.is_domain_wanted(peer):
359 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
362 logger.debug("peer='%s' already added - SKIPPED!", peer)
365 logger.debug("Appending peer='%s' ...", peer)
368 logger.debug("peers()=%d - EXIT!", len(peers))