1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
25 from fba.helpers import config
26 from fba.helpers import domain as domain_helper
27 from fba.helpers import tidyup
29 from fba.http import federation
30 from fba.http import network
32 from fba.models import instances
34 logging.basicConfig(level=logging.INFO)
35 logger = logging.getLogger(__name__)
36 #logger.setLevel(logging.DEBUG)
38 def fetch_peers(domain: str, origin: str) -> list:
39 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
40 domain_helper.raise_on(domain)
44 # No CSRF by default, you don't have to add network.api_headers by yourself here
48 logger.debug("Checking CSRF for domain='%s'", domain)
49 headers = csrf.determine(domain, dict())
50 except network.exceptions as exception:
51 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
52 instances.set_last_error(domain, exception)
56 logger.debug("Fetching '/api/v3/site' from domain='%s' ...", domain)
57 data = network.get_json_api(
61 (config.get("connection_timeout"), config.get("read_timeout"))
64 logger.debug("data[]='%s'", type(data))
65 if "error_message" in data:
66 logger.warning("Could not reach any JSON API: domain='%s'", domain)
67 instances.set_last_error(domain, data)
68 elif "federated_instances" in data["json"] and isinstance(data["json"]["federated_instances"], dict):
69 logger.debug("Found federated_instances for domain='%s'", domain)
70 peers = peers + federation.add_peers(data["json"]["federated_instances"])
72 logger.debug("Marking domain='%s' as successfully handled ...", domain)
73 instances.set_success(domain)
76 logger.warning("Fetching instances for domain='%s' from /instances ...", domain)
77 peers = fetch_instances(domain, origin)
79 except network.exceptions as exception:
80 logger.warning("Exception during fetching JSON: domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
81 instances.set_last_error(domain, exception)
83 logger.debug("peers()=%d - EXIT!", len(peers))
86 def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
87 logger.debug("domain='%s,nodeinfo_url='%s' - CALLED!", domain, nodeinfo_url)
88 domain_helper.raise_on(domain)
90 if not isinstance(nodeinfo_url, str):
91 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not of type 'str'")
92 elif nodeinfo_url == "":
93 raise ValueError("Parameter 'nodeinfo_url' is empty")
96 "Blocked Instances".lower(),
97 "Instàncies bloquejades".lower(),
98 "Blocáilte Ásc".lower(),
100 "Blokované instance".lower(),
101 "Geblokkeerde instanties".lower(),
102 "Blockerade instanser".lower(),
103 "Instàncias blocadas".lower(),
104 "Istanze bloccate".lower(),
105 "Instances bloquées".lower(),
106 "Letiltott példányok".lower(),
107 "Instancias bloqueadas".lower(),
108 "Blokeatuta dauden instantziak".lower(),
110 "Peladen Yang Diblokir".lower(),
111 "Blokerede servere".lower(),
112 "Blokitaj nodoj".lower(),
113 "Блокирани Инстанции".lower(),
114 "Blockierte Instanzen".lower(),
115 "Estetyt instanssit".lower(),
116 "Instâncias bloqueadas".lower(),
117 "Zablokowane instancje".lower(),
118 "Blokované inštancie".lower(),
119 "المثلاء المحجوبون".lower(),
120 "Užblokuoti serveriai".lower(),
121 "ブロックしたインスタンス".lower(),
122 "Блокированные Инстансы".lower(),
123 "Αποκλεισμένοι διακομιστές".lower(),
125 "Instâncias bloqueadas".lower(),
131 # json endpoint for newer mastodongs
132 logger.debug("Fetching /instances from domain='%s'", domain)
133 response = network.fetch_response(
137 (config.get("connection_timeout"), config.get("read_timeout"))
140 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
141 if response.ok and response.status_code < 300 and response.text != "":
142 logger.debug("Parsing %s Bytes ...", len(response.text))
144 doc = bs4.BeautifulSoup(response.text, "html.parser")
145 logger.debug("doc[]='%s'", type(doc))
148 for container in [{"class": "home-instances container-lg"}, {"class": "container"}]:
149 logger.debug("container='%s'", container)
150 headers = doc.findAll("div", container)
152 logger.debug("Checking %d header(s) ...", len(headers))
153 for header in headers:
154 logger.debug("header[]='%s'", type(header))
155 for content in header.find_all(["h2", "h3", "h4", "h5"]):
156 logger.debug("content[%s]='%s' - BEFORE!", type(content), content)
157 if content is not None:
158 content = str(content.contents[0])
159 logger.debug("content[%s]='%s' - AFTER!", type(content), content)
162 logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
164 elif not isinstance(content, str):
165 logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
167 elif content.lower() in translations:
168 logger.debug("Found header with blocked instances - BREAK!")
172 logger.debug("found[]='%s'", type(found))
174 logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain)
175 peers = parse_script(doc, "blocked")
177 logger.debug("domain='%s' has %d peer(s).", domain, len(peers))
178 for blocked in peers:
179 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
184 "block_level": "reject",
187 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
190 blocking = found.find_next(["ul","table"]).findAll("a")
191 logger.debug("Found %d blocked instance(s) ...", len(blocking))
193 logger.debug("tag[]='%s'", type(tag))
194 blocked = tidyup.domain(tag.contents[0])
195 logger.debug("blocked='%s'", blocked)
198 logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0])
200 elif not utils.is_domain_wanted(blocked):
201 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
204 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
209 "block_level": "reject",
212 except network.exceptions as exception:
213 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
214 instances.set_last_error(domain, exception)
216 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
219 def fetch_instances(domain: str, origin: str) -> list:
220 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
221 domain_helper.raise_on(domain)
226 # json endpoint for newer mastodongs
227 logger.debug("Fetching /instances from domain='%s'", domain)
228 response = network.fetch_response(
232 (config.get("connection_timeout"), config.get("read_timeout"))
235 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
236 if response.ok and response.status_code < 300 and response.text != "":
237 logger.debug("Parsing %s Bytes ...", len(response.text))
239 doc = bs4.BeautifulSoup(response.text, "html.parser")
240 logger.debug("doc[]='%s'", type(doc))
242 for container in [{"class": "home-instances container-lg"}, {"class": "container"}]:
243 logger.debug("container='%s'", container)
244 headers = doc.findAll("div", container)
246 logger.debug("Checking %d headers ...", len(headers))
247 for header in headers:
248 logger.debug("header[%s]='%s'", type(header), header)
250 rows = header.find_next(["ul","table"]).findAll("a")
251 logger.debug("Found %d blocked instance(s) ...", len(rows))
253 logger.debug("tag[]='%s'", type(tag))
254 text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
255 peer = tidyup.domain(text)
256 logger.debug("peer='%s'", peer)
259 logger.debug("peer is empty - SKIPPED!")
261 elif not utils.is_domain_wanted(peer):
262 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
265 logger.debug("peer='%s' already added - SKIPPED!", peer)
268 logger.debug("Appending peer='%s' ...", peer)
271 logger.debug("peers()=%d", len(peers))
273 logger.debug("Found no peers for domain='%s', trying script tag ...", domain)
274 peers = parse_script(doc)
276 logger.debug("Marking domain='%s' as successfully handled, peers()=%d ...", domain, len(peers))
277 instances.set_success(domain)
279 except network.exceptions as exception:
280 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
281 instances.set_last_error(domain, exception)
283 logger.debug("peers()=%d - EXIT!", len(peers))
286 def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
287 logger.debug("doc[]='%s',only='%s' - CALLED!")
288 if not isinstance(doc, bs4.BeautifulSoup):
289 raise ValueError(f"Parameter doc[]='{type(only)}' is not of type 'bs4.BeautifulSoup'")
290 elif not isinstance(only, str) and only is not None:
291 raise ValueError(f"Parameter only[]='{type(only)}' is not of type 'str'")
292 elif isinstance(only, str) and only == "":
293 raise ValueError("Parameter 'only' is empty")
295 scripts = doc.find_all("script")
298 logger.debug("scripts()=%d", len(scripts))
299 for script in scripts:
300 logger.debug("script[%s].contents()=%d", type(script), len(script.contents))
301 if len(script.contents) == 0:
302 logger.debug("script has no contents - SKIPPED!")
304 elif not script.contents[0].startswith("window.isoData"):
305 logger.debug("script.contents[0]='%s' does not start with window.isoData - SKIPPED!", script.contents[0])
308 logger.debug("script.contents[0][]='%s'", type(script.contents[0]))
310 iso_data = script.contents[0].split("=")[1].strip().replace(":undefined", ":\"undefined\"")
311 logger.debug("iso_data[%s]='%s'", type(iso_data), iso_data)
315 parsed = json.loads(iso_data)
316 except json.decoder.JSONDecodeError as exception:
317 logger.warning("Exception '%s' during parsing %d Bytes: '%s'", type(exception), len(iso_data), str(exception))
320 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
322 if "routeData" not in parsed:
323 logger.warning("parsed[%s]()=%d does not contain element 'routeData'", type(parsed), len(parsed))
325 elif "federatedInstancesResponse" not in parsed["routeData"]:
326 logger.warning("parsed[routeData][%s]()=%d does not contain element 'federatedInstancesResponse'", type(parsed["routeData"]), len(parsed["routeData"]))
328 elif "data" not in parsed["routeData"]["federatedInstancesResponse"]:
329 logger.warning("parsed[routeData][federatedInstancesResponse][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]), len(parsed["routeData"]["federatedInstancesResponse"]))
331 elif "federated_instances" not in parsed["routeData"]["federatedInstancesResponse"]["data"]:
332 logger.warning("parsed[routeData][federatedInstancesResponse][data][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]["data"]), len(parsed["routeData"]["federatedInstancesResponse"]["data"]))
335 data = parsed["routeData"]["federatedInstancesResponse"]["data"]["federated_instances"]
336 logger.debug("Checking %d data elements ...", len(data))
338 logger.debug("element='%s'", element)
339 if isinstance(only, str) and only != element:
340 logger.debug("Skipping unwanted element='%s',only='%s'", element, only)
343 logger.debug("Checking data[%s]()=%d row(s) ...", element, len(data[element]))
344 for row in data[element]:
345 logger.debug("row[]='%s'", type(row))
346 if "domain" not in row:
347 logger.warning("row()=%d has no element 'domain' - SKIPPED!", len(row))
350 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
351 peer = tidyup.domain(row["domain"])
352 logger.debug("peer='%s' - AFTER!", peer)
355 logger.debug("peer is empty - SKIPPED!")
357 elif not utils.is_domain_wanted(peer):
358 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
361 logger.debug("peer='%s' already added - SKIPPED!", peer)
364 logger.debug("Appending peer='%s' ...", peer)
367 logger.debug("peers()=%d - EXIT!", len(peers))