1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
22 from fba.helpers import config
23 from fba.helpers import domain as domain_helper
24 from fba.helpers import tidyup
26 from fba.http import csrf
27 from fba.http import federation
28 from fba.http import network
30 from fba.models import instances
32 logging.basicConfig(level=logging.INFO)
33 logger = logging.getLogger(__name__)
34 #logger.setLevel(logging.DEBUG)
36 def fetch_peers(domain: str, origin: str) -> list:
37 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
38 domain_helper.raise_on(domain)
42 # No CSRF by default, you don't have to add network.api_headers by yourself here
46 logger.debug("Checking CSRF for domain='%s'", domain)
47 headers = csrf.determine(domain, dict())
48 except network.exceptions as exception:
49 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__)
50 instances.set_last_error(domain, exception)
52 logger.debug("Returning empty list ... - EXIT!")
56 logger.debug("Fetching '/api/v3/site' from domain='%s' ...", domain)
57 data = network.get_json_api(
61 (config.get("connection_timeout"), config.get("read_timeout"))
64 logger.debug("data[]='%s'", type(data))
65 if "error_message" in data:
66 logger.warning("Could not reach any JSON API: domain='%s'", domain)
67 instances.set_last_error(domain, data)
68 elif "federated_instances" in data["json"] and isinstance(data["json"]["federated_instances"], dict):
69 logger.debug("Found federated_instances for domain='%s'", domain)
70 peers = peers + federation.add_peers(data["json"]["federated_instances"])
72 logger.debug("Marking domain='%s' as successfully handled ...", domain)
73 instances.set_success(domain)
76 logger.warning("Fetching instances for domain='%s' from /instances ...", domain)
77 peers = fetch_instances(domain, origin)
79 except network.exceptions as exception:
80 logger.warning("Exception during fetching JSON: domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
81 instances.set_last_error(domain, exception)
83 logger.debug("peers()=%d - EXIT!", len(peers))
86 def fetch_blocks(domain: str) -> list:
87 logger.debug("domain='%s - CALLED!", domain)
88 domain_helper.raise_on(domain)
91 "Blocked Instances".lower(),
92 "Instàncies bloquejades".lower(),
93 "Blocáilte Ásc".lower(),
95 "Blokované instance".lower(),
96 "Geblokkeerde instanties".lower(),
97 "Blockerade instanser".lower(),
98 "Instàncias blocadas".lower(),
99 "Istanze bloccate".lower(),
100 "Instances bloquées".lower(),
101 "Letiltott példányok".lower(),
102 "Instancias bloqueadas".lower(),
103 "Blokeatuta dauden instantziak".lower(),
105 "Peladen Yang Diblokir".lower(),
106 "Blokerede servere".lower(),
107 "Blokitaj nodoj".lower(),
108 "Блокирани Инстанции".lower(),
109 "Blockierte Instanzen".lower(),
110 "Estetyt instanssit".lower(),
111 "Instâncias bloqueadas".lower(),
112 "Zablokowane instancje".lower(),
113 "Blokované inštancie".lower(),
114 "المثلاء المحجوبون".lower(),
115 "Užblokuoti serveriai".lower(),
116 "ブロックしたインスタンス".lower(),
117 "Блокированные Инстансы".lower(),
118 "Αποκλεισμένοι διακομιστές".lower(),
120 "Instâncias bloqueadas".lower(),
126 # json endpoint for newer mastodongs
127 logger.debug("Fetching /instances from domain='%s'", domain)
128 response = network.fetch_response(
132 (config.get("connection_timeout"), config.get("read_timeout"))
135 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
136 if response.ok and response.status_code == 200 and response.text != "":
137 logger.debug("Parsing %s Bytes ...", len(response.text))
139 doc = bs4.BeautifulSoup(response.text, "html.parser")
140 logger.debug("doc[]='%s'", type(doc))
143 for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
144 logger.debug("criteria='%s'", criteria)
145 containers = doc.findAll("div", criteria)
147 logger.debug("Checking %d containers ...", len(containers))
148 for container in containers:
149 logger.debug("container[]='%s'", type(container))
150 for header in container.find_all(["h2", "h3", "h4", "h5"]):
152 logger.debug("header[%s]='%s' - BEFORE!", type(header), header)
153 if header is not None:
154 content = str(header.contents[0])
155 logger.debug("content[%s]='%s' - AFTER!", type(content), content)
158 logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
160 elif not isinstance(content, str):
161 logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
163 elif content.lower() in translations:
164 logger.debug("Found header='%s' with blocked instances - BREAK(3) !", header)
168 logger.debug("found[]='%s'", type(found))
169 if found is not None:
170 logger.debug("Found header with blocked instances - BREAK(2) !")
173 logger.debug("found[]='%s'", type(found))
174 if found is not None:
175 logger.debug("Found header with blocked instances - BREAK(1) !")
178 logger.debug("found[]='%s'", type(found))
180 logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain)
181 peers = parse_script(doc, "blocked")
183 logger.debug("domain='%s' has %d peer(s).", domain, len(peers))
184 for blocked in peers:
185 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
190 "block_level": "reject",
193 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
196 blocking = found.find_next(["ul", "table"]).findAll("a")
197 logger.debug("Found %d blocked instance(s) ...", len(blocking))
199 logger.debug("tag[]='%s'", type(tag))
200 blocked = tidyup.domain(tag.contents[0])
201 logger.debug("blocked='%s'", blocked)
204 logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0])
207 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
212 "block_level": "reject",
215 logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason)
216 instances.set_last_error(domain, response)
218 except network.exceptions as exception:
219 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
220 instances.set_last_error(domain, exception)
222 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
225 def fetch_instances(domain: str, origin: str) -> list:
226 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
227 domain_helper.raise_on(domain)
232 # json endpoint for newer mastodongs
233 logger.debug("Fetching /instances from domain='%s'", domain)
234 response = network.fetch_response(
238 (config.get("connection_timeout"), config.get("read_timeout"))
241 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
242 if response.ok and response.status_code == 200 and response.text != "":
243 logger.debug("Parsing %s Bytes ...", len(response.text))
245 doc = bs4.BeautifulSoup(response.text, "html.parser")
246 logger.debug("doc[]='%s'", type(doc))
248 for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
249 logger.debug("criteria='%s'", criteria)
250 containers = doc.findAll("div", criteria)
252 logger.debug("Checking %d containers ...", len(containers))
253 for header in containers:
254 logger.debug("header[%s]='%s'", type(header), header)
256 rows = header.find_next(["ul","table"]).findAll("a")
257 logger.debug("Found %d instance(s) ...", len(rows))
259 logger.debug("tag[]='%s'", type(tag))
260 text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
261 peer = tidyup.domain(text)
262 logger.debug("peer='%s'", peer)
265 logger.debug("peer is empty - SKIPPED!")
268 logger.debug("peer='%s' already added - SKIPPED!", peer)
271 logger.debug("Appending peer='%s' ...", peer)
274 logger.debug("peers()=%d", len(peers))
276 logger.debug("Found no peers for domain='%s', trying script tag ...", domain)
277 peers = parse_script(doc)
279 logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason)
280 instances.set_last_error(domain, response)
282 logger.debug("Marking domain='%s' as successfully handled, peers()=%d ...", domain, len(peers))
283 instances.set_success(domain)
285 except network.exceptions as exception:
286 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
287 instances.set_last_error(domain, exception)
289 logger.debug("peers()=%d - EXIT!", len(peers))
292 def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
293 logger.debug("doc[]='%s',only='%s' - CALLED!")
294 if not isinstance(doc, bs4.BeautifulSoup):
295 raise ValueError(f"Parameter doc[]='{type(only)}' is not of type 'bs4.BeautifulSoup'")
296 elif not isinstance(only, str) and only is not None:
297 raise ValueError(f"Parameter only[]='{type(only)}' is not of type 'str'")
298 elif isinstance(only, str) and only == "":
299 raise ValueError("Parameter 'only' is empty")
301 scripts = doc.find_all("script")
304 logger.debug("scripts()=%d", len(scripts))
305 for script in scripts:
306 logger.debug("script[%s].contents()=%d", type(script), len(script.contents))
307 if len(script.contents) == 0:
308 logger.debug("script has no contents - SKIPPED!")
310 elif not script.contents[0].startswith("window.isoData"):
311 logger.debug("script.contents[0]='%s' does not start with window.isoData - SKIPPED!", script.contents[0])
314 logger.debug("script.contents[0][]='%s'", type(script.contents[0]))
316 iso_data = script.contents[0].split("=")[1].strip().replace(":undefined", ":\"undefined\"")
317 logger.debug("iso_data[%s]='%s'", type(iso_data), iso_data)
321 parsed = json.loads(iso_data)
322 except json.decoder.JSONDecodeError as exception:
323 logger.warning("Exception '%s' during parsing %d Bytes: '%s' - EXIT!", type(exception), len(iso_data), str(exception))
326 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
328 if "routeData" not in parsed:
329 logger.warning("parsed[%s]()=%d does not contain element 'routeData'", type(parsed), len(parsed))
331 elif "federatedInstancesResponse" not in parsed["routeData"]:
332 logger.warning("parsed[routeData][%s]()=%d does not contain element 'federatedInstancesResponse'", type(parsed["routeData"]), len(parsed["routeData"]))
334 elif "data" not in parsed["routeData"]["federatedInstancesResponse"]:
335 logger.warning("parsed[routeData][federatedInstancesResponse][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]), len(parsed["routeData"]["federatedInstancesResponse"]))
337 elif "federated_instances" not in parsed["routeData"]["federatedInstancesResponse"]["data"]:
338 logger.warning("parsed[routeData][federatedInstancesResponse][data][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]["data"]), len(parsed["routeData"]["federatedInstancesResponse"]["data"]))
341 data = parsed["routeData"]["federatedInstancesResponse"]["data"]["federated_instances"]
342 logger.debug("Checking %d data elements ...", len(data))
344 logger.debug("element='%s'", element)
345 if isinstance(only, str) and only != element:
346 logger.debug("Skipping unwanted element='%s',only='%s'", element, only)
349 logger.debug("Checking data[%s]()=%d row(s) ...", element, len(data[element]))
350 for row in data[element]:
351 logger.debug("row[]='%s'", type(row))
352 if "domain" not in row:
353 logger.warning("row()=%d has no element 'domain' - SKIPPED!", len(row))
356 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
357 peer = tidyup.domain(row["domain"])
358 logger.debug("peer='%s' - AFTER!", peer)
361 logger.debug("peer is empty - SKIPPED!")
364 logger.debug("peer='%s' already added - SKIPPED!", peer)
367 logger.debug("Appending peer='%s' ...", peer)
370 logger.debug("peers()=%d - EXIT!", len(peers))