1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
22 from fba.helpers import config
23 from fba.helpers import domain as domain_helper
24 from fba.helpers import tidyup
26 from fba.http import csrf
27 from fba.http import federation
28 from fba.http import network
30 from fba.models import instances
32 logging.basicConfig(level=logging.INFO)
33 logger = logging.getLogger(__name__)
34 #logger.setLevel(logging.DEBUG)
36 def fetch_peers(domain: str, origin: str) -> list:
37 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
38 domain_helper.raise_on(domain)
42 # No CSRF by default, you don't have to add network.api_headers by yourself here
46 logger.debug("Checking CSRF for domain='%s'", domain)
47 headers = csrf.determine(domain, dict())
48 except network.exceptions as exception:
49 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__)
50 instances.set_last_error(domain, exception)
52 logger.debug("Returning empty list ... - EXIT!")
56 logger.debug("Fetching '/api/v3/site' from domain='%s' ...", domain)
57 data = network.get_json_api(
61 (config.get("connection_timeout"), config.get("read_timeout"))
64 logger.debug("data[]='%s'", type(data))
65 if "error_message" in data:
66 logger.warning("Could not reach any JSON API: domain='%s'", domain)
67 instances.set_last_error(domain, data)
68 elif "federated_instances" in data["json"] and isinstance(data["json"]["federated_instances"], dict):
69 logger.debug("Found federated_instances for domain='%s'", domain)
70 peers = peers + federation.add_peers(data["json"]["federated_instances"])
72 logger.debug("Marking domain='%s' as successfully handled ...", domain)
73 instances.set_success(domain)
76 logger.warning("Fetching instances for domain='%s' from /instances ...", domain)
77 peers = fetch_instances(domain, origin)
79 except network.exceptions as exception:
80 logger.warning("Exception during fetching JSON: domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
81 instances.set_last_error(domain, exception)
83 logger.debug("peers()=%d - EXIT!", len(peers))
86 def fetch_blocks(domain: str) -> list:
87 logger.debug("domain='%s - CALLED!", domain)
88 domain_helper.raise_on(domain)
90 if not instances.is_registered(domain):
91 raise Exception(f"domain='{domain}' is not registered but function is invoked.")
94 "Blocked Instances".lower(),
95 "Instàncies bloquejades".lower(),
96 "Blocáilte Ásc".lower(),
98 "Blokované instance".lower(),
99 "Geblokkeerde instanties".lower(),
100 "Blockerade instanser".lower(),
101 "Instàncias blocadas".lower(),
102 "Istanze bloccate".lower(),
103 "Instances bloquées".lower(),
104 "Letiltott példányok".lower(),
105 "Instancias bloqueadas".lower(),
106 "Blokeatuta dauden instantziak".lower(),
108 "Peladen Yang Diblokir".lower(),
109 "Blokerede servere".lower(),
110 "Blokitaj nodoj".lower(),
111 "Блокирани Инстанции".lower(),
112 "Blockierte Instanzen".lower(),
113 "Estetyt instanssit".lower(),
114 "Instâncias bloqueadas".lower(),
115 "Zablokowane instancje".lower(),
116 "Blokované inštancie".lower(),
117 "المثلاء المحجوبون".lower(),
118 "Užblokuoti serveriai".lower(),
119 "ブロックしたインスタンス".lower(),
120 "Блокированные Инстансы".lower(),
121 "Αποκλεισμένοι διακομιστές".lower(),
123 "Instâncias bloqueadas".lower(),
129 # json endpoint for newer mastodongs
130 logger.debug("Fetching /instances from domain='%s'", domain)
131 response = network.fetch_response(
135 (config.get("connection_timeout"), config.get("read_timeout"))
138 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
139 if response.ok and response.status_code == 200 and response.text != "":
140 logger.debug("Parsing %s Bytes ...", len(response.text))
142 doc = bs4.BeautifulSoup(response.text, "html.parser")
143 logger.debug("doc[]='%s'", type(doc))
146 for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
147 logger.debug("criteria='%s'", criteria)
148 containers = doc.findAll("div", criteria)
150 logger.debug("Checking %d containers ...", len(containers))
151 for container in containers:
152 logger.debug("container[]='%s'", type(container))
153 for header in container.find_all(["h2", "h3", "h4", "h5"]):
155 logger.debug("header[%s]='%s' - BEFORE!", type(header), header)
156 if header is not None:
157 content = str(header.contents[0])
158 logger.debug("content[%s]='%s' - AFTER!", type(content), content)
161 logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
163 elif not isinstance(content, str):
164 logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
166 elif content.lower() in translations:
167 logger.debug("Found header='%s' with blocked instances - BREAK(3) !", header)
171 logger.debug("found[]='%s'", type(found))
172 if found is not None:
173 logger.debug("Found header with blocked instances - BREAK(2) !")
176 logger.debug("found[]='%s'", type(found))
177 if found is not None:
178 logger.debug("Found header with blocked instances - BREAK(1) !")
181 logger.debug("found[]='%s'", type(found))
183 logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain)
184 peers = parse_script(doc, "blocked")
186 logger.debug("domain='%s' has %d peer(s).", domain, len(peers))
187 for blocked in peers:
188 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
193 "block_level": "reject",
196 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
199 blocking = found.find_next(["ul", "table"]).findAll("a")
200 logger.debug("Found %d blocked instance(s) ...", len(blocking))
202 logger.debug("tag[]='%s'", type(tag))
203 blocked = tidyup.domain(tag.contents[0]) if tag.contents[0] != "" else None
204 logger.debug("blocked='%s'", blocked)
207 logger.warning("blocked is empty - SKIPPED!")
210 logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0])
212 elif not domain_helper.is_wanted(blocked):
213 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
216 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
221 "block_level": "reject",
224 logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason)
225 instances.set_last_error(domain, response)
227 except network.exceptions as exception:
228 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
229 instances.set_last_error(domain, exception)
231 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
234 def fetch_instances(domain: str, origin: str) -> list:
235 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
236 domain_helper.raise_on(domain)
241 # json endpoint for newer mastodongs
242 logger.debug("Fetching /instances from domain='%s'", domain)
243 response = network.fetch_response(
247 (config.get("connection_timeout"), config.get("read_timeout"))
250 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
251 if response.ok and response.status_code == 200 and response.text != "":
252 logger.debug("Parsing %s Bytes ...", len(response.text))
254 doc = bs4.BeautifulSoup(response.text, "html.parser")
255 logger.debug("doc[]='%s'", type(doc))
257 for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
258 logger.debug("criteria='%s'", criteria)
259 containers = doc.findAll("div", criteria)
261 logger.debug("Checking %d containers ...", len(containers))
262 for header in containers:
263 logger.debug("header[%s]='%s'", type(header), header)
265 rows = header.find_next(["ul","table"]).findAll("a")
266 logger.debug("Found %d instance(s) ...", len(rows))
268 logger.debug("tag[]='%s'", type(tag))
269 text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
270 logger.debug("text='%s' - BEFORE!", text)
272 peer = tidyup.domain(text) if text != "" else None
273 logger.debug("peer='%s' - AFTER", peer)
276 logger.warning("peer is empty - SKIPPED!")
279 logger.warning("peer is an empty string, text='%s' - SKIPPED!", text)
281 elif not domain_helper.is_wanted(peer):
282 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
285 logger.debug("peer='%s' already added - SKIPPED!", peer)
288 logger.debug("Appending peer='%s' ...", peer)
291 logger.debug("peers()=%d", len(peers))
293 logger.debug("Found no peers for domain='%s', trying script tag ...", domain)
294 peers = parse_script(doc)
296 logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason)
297 instances.set_last_error(domain, response)
299 logger.debug("Marking domain='%s' as successfully handled, peers()=%d ...", domain, len(peers))
300 instances.set_success(domain)
302 except network.exceptions as exception:
303 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
304 instances.set_last_error(domain, exception)
306 logger.debug("peers()=%d - EXIT!", len(peers))
309 def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
310 logger.debug("doc[]='%s',only='%s' - CALLED!")
312 if not isinstance(doc, bs4.BeautifulSoup):
313 raise ValueError(f"Parameter doc[]='{type(only)}' is not of type 'bs4.BeautifulSoup'")
314 elif not isinstance(only, str) and only is not None:
315 raise ValueError(f"Parameter only[]='{type(only)}' is not of type 'str'")
316 elif isinstance(only, str) and only == "":
317 raise ValueError("Parameter 'only' is empty")
319 scripts = doc.find_all("script")
322 logger.debug("scripts()=%d", len(scripts))
323 for script in scripts:
324 logger.debug("script[%s].contents()=%d", type(script), len(script.contents))
325 if len(script.contents) == 0:
326 logger.debug("script has no contents - SKIPPED!")
328 elif not script.contents[0].startswith("window.isoData"):
329 logger.debug("script.contents[0]='%s' does not start with window.isoData - SKIPPED!", script.contents[0])
332 logger.debug("script.contents[0][]='%s'", type(script.contents[0]))
334 iso_data = script.contents[0].split("=")[1].strip().replace(":undefined", ":\"undefined\"")
335 logger.debug("iso_data[%s]='%s'", type(iso_data), iso_data)
339 parsed = json.loads(iso_data)
340 except json.decoder.JSONDecodeError as exception:
341 logger.warning("Exception '%s' during parsing %d Bytes: '%s' - EXIT!", type(exception), len(iso_data), str(exception))
344 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
346 if "routeData" not in parsed:
347 logger.warning("parsed[%s]()=%d does not contain element 'routeData'", type(parsed), len(parsed))
349 elif "federatedInstancesResponse" not in parsed["routeData"]:
350 logger.warning("parsed[routeData][%s]()=%d does not contain element 'federatedInstancesResponse'", type(parsed["routeData"]), len(parsed["routeData"]))
352 elif "data" not in parsed["routeData"]["federatedInstancesResponse"]:
353 logger.warning("parsed[routeData][federatedInstancesResponse][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]), len(parsed["routeData"]["federatedInstancesResponse"]))
355 elif "federated_instances" not in parsed["routeData"]["federatedInstancesResponse"]["data"]:
356 logger.warning("parsed[routeData][federatedInstancesResponse][data][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]["data"]), len(parsed["routeData"]["federatedInstancesResponse"]["data"]))
359 data = parsed["routeData"]["federatedInstancesResponse"]["data"]["federated_instances"]
360 logger.debug("Checking %d data elements ...", len(data))
362 logger.debug("element='%s'", element)
363 if isinstance(only, str) and only != element:
364 logger.debug("Skipping unwanted element='%s',only='%s'", element, only)
367 logger.debug("Checking data[%s]()=%d row(s) ...", element, len(data[element]))
368 for row in data[element]:
369 logger.debug("row[]='%s'", type(row))
370 if "domain" not in row:
371 logger.warning("row()=%d has no element 'domain' - SKIPPED!", len(row))
374 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
375 peer = tidyup.domain(row["domain"])
376 logger.debug("peer='%s' - AFTER!", peer)
379 logger.warning("peer is empty - SKIPPED!")
382 logger.warning("peer is an empty string, row[domain]='%s' - SKIPPED!", row["domain"])
384 elif not domain_helper.is_wanted(peer):
385 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
387 logger.debug("peer='%s' already added - SKIPPED!", peer)
390 logger.debug("Appending peer='%s' ...", peer)
393 logger.debug("peers()=%d - EXIT!", len(peers))