1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
25 from fba.helpers import config
26 from fba.helpers import domain as domain_helper
27 from fba.helpers import tidyup
29 from fba.http import federation
30 from fba.http import network
32 from fba.models import instances
34 logging.basicConfig(level=logging.INFO)
35 logger = logging.getLogger(__name__)
36 #logger.setLevel(logging.DEBUG)
38 def fetch_peers(domain: str, origin: str) -> list:
39 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
40 domain_helper.raise_on(domain)
44 # No CSRF by default, you don't have to add network.api_headers by yourself here
48 logger.debug("Checking CSRF for domain='%s'", domain)
49 headers = csrf.determine(domain, dict())
50 except network.exceptions as exception:
51 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__)
52 instances.set_last_error(domain, exception)
54 logger.debug("Returning empty list ... - EXIT!")
58 logger.debug("Fetching '/api/v3/site' from domain='%s' ...", domain)
59 data = network.get_json_api(
63 (config.get("connection_timeout"), config.get("read_timeout"))
66 logger.debug("data[]='%s'", type(data))
67 if "error_message" in data:
68 logger.warning("Could not reach any JSON API: domain='%s'", domain)
69 instances.set_last_error(domain, data)
70 elif "federated_instances" in data["json"] and isinstance(data["json"]["federated_instances"], dict):
71 logger.debug("Found federated_instances for domain='%s'", domain)
72 peers = peers + federation.add_peers(data["json"]["federated_instances"])
74 logger.debug("Marking domain='%s' as successfully handled ...", domain)
75 instances.set_success(domain)
78 logger.warning("Fetching instances for domain='%s' from /instances ...", domain)
79 peers = fetch_instances(domain, origin)
81 except network.exceptions as exception:
82 logger.warning("Exception during fetching JSON: domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
83 instances.set_last_error(domain, exception)
85 logger.debug("peers()=%d - EXIT!", len(peers))
88 def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
89 logger.debug("domain='%s,nodeinfo_url='%s' - CALLED!", domain, nodeinfo_url)
90 domain_helper.raise_on(domain)
92 if not isinstance(nodeinfo_url, str):
93 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not of type 'str'")
94 elif nodeinfo_url == "":
95 raise ValueError("Parameter 'nodeinfo_url' is empty")
98 "Blocked Instances".lower(),
99 "Instàncies bloquejades".lower(),
100 "Blocáilte Ásc".lower(),
102 "Blokované instance".lower(),
103 "Geblokkeerde instanties".lower(),
104 "Blockerade instanser".lower(),
105 "Instàncias blocadas".lower(),
106 "Istanze bloccate".lower(),
107 "Instances bloquées".lower(),
108 "Letiltott példányok".lower(),
109 "Instancias bloqueadas".lower(),
110 "Blokeatuta dauden instantziak".lower(),
112 "Peladen Yang Diblokir".lower(),
113 "Blokerede servere".lower(),
114 "Blokitaj nodoj".lower(),
115 "Блокирани Инстанции".lower(),
116 "Blockierte Instanzen".lower(),
117 "Estetyt instanssit".lower(),
118 "Instâncias bloqueadas".lower(),
119 "Zablokowane instancje".lower(),
120 "Blokované inštancie".lower(),
121 "المثلاء المحجوبون".lower(),
122 "Užblokuoti serveriai".lower(),
123 "ブロックしたインスタンス".lower(),
124 "Блокированные Инстансы".lower(),
125 "Αποκλεισμένοι διακομιστές".lower(),
127 "Instâncias bloqueadas".lower(),
133 # json endpoint for newer mastodongs
134 logger.debug("Fetching /instances from domain='%s'", domain)
135 response = network.fetch_response(
139 (config.get("connection_timeout"), config.get("read_timeout"))
142 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
143 if response.ok and response.status_code < 300 and response.text != "":
144 logger.debug("Parsing %s Bytes ...", len(response.text))
146 doc = bs4.BeautifulSoup(response.text, "html.parser")
147 logger.debug("doc[]='%s'", type(doc))
150 for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
151 logger.debug("criteria='%s'", criteria)
152 containers = doc.findAll("div", criteria)
154 logger.debug("Checking %d containers ...", len(containers))
155 for container in containers:
156 logger.debug("container[]='%s'", type(container))
157 for header in container.find_all(["h2", "h3", "h4", "h5"]):
159 logger.debug("header[%s]='%s' - BEFORE!", type(header), header)
160 if header is not None:
161 content = str(header.contents[0])
162 logger.debug("content[%s]='%s' - AFTER!", type(content), content)
165 logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
167 elif not isinstance(content, str):
168 logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
170 elif content.lower() in translations:
171 logger.debug("Found header='%s' with blocked instances - BREAK(3) !", header)
175 logger.debug("found[]='%s'", type(found))
176 if found is not None:
177 logger.debug("Found header with blocked instances - BREAK(2) !")
180 logger.debug("found[]='%s'", type(found))
181 if found is not None:
182 logger.debug("Found header with blocked instances - BREAK(1) !")
185 logger.debug("found[]='%s'", type(found))
187 logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain)
188 peers = parse_script(doc, "blocked")
190 logger.debug("domain='%s' has %d peer(s).", domain, len(peers))
191 for blocked in peers:
192 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
197 "block_level": "reject",
200 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
203 blocking = found.find_next(["ul", "table"]).findAll("a")
204 logger.debug("Found %d blocked instance(s) ...", len(blocking))
206 logger.debug("tag[]='%s'", type(tag))
207 blocked = tidyup.domain(tag.contents[0])
208 logger.debug("blocked='%s'", blocked)
211 logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0])
213 elif not utils.is_domain_wanted(blocked):
214 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
217 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
222 "block_level": "reject",
225 except network.exceptions as exception:
226 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
227 instances.set_last_error(domain, exception)
229 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
232 def fetch_instances(domain: str, origin: str) -> list:
233 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
234 domain_helper.raise_on(domain)
239 # json endpoint for newer mastodongs
240 logger.debug("Fetching /instances from domain='%s'", domain)
241 response = network.fetch_response(
245 (config.get("connection_timeout"), config.get("read_timeout"))
248 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
249 if response.ok and response.status_code < 300 and response.text != "":
250 logger.debug("Parsing %s Bytes ...", len(response.text))
252 doc = bs4.BeautifulSoup(response.text, "html.parser")
253 logger.debug("doc[]='%s'", type(doc))
255 for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
256 logger.debug("criteria='%s'", criteria)
257 containers = doc.findAll("div", criteria)
259 logger.debug("Checking %d containers ...", len(containers))
260 for header in containers:
261 logger.debug("header[%s]='%s'", type(header), header)
263 rows = header.find_next(["ul","table"]).findAll("a")
264 logger.debug("Found %d instance(s) ...", len(rows))
266 logger.debug("tag[]='%s'", type(tag))
267 text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
268 peer = tidyup.domain(text)
269 logger.debug("peer='%s'", peer)
272 logger.debug("peer is empty - SKIPPED!")
274 elif not utils.is_domain_wanted(peer):
275 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
278 logger.debug("peer='%s' already added - SKIPPED!", peer)
281 logger.debug("Appending peer='%s' ...", peer)
284 logger.debug("peers()=%d", len(peers))
286 logger.debug("Found no peers for domain='%s', trying script tag ...", domain)
287 peers = parse_script(doc)
289 logger.debug("Marking domain='%s' as successfully handled, peers()=%d ...", domain, len(peers))
290 instances.set_success(domain)
292 except network.exceptions as exception:
293 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
294 instances.set_last_error(domain, exception)
296 logger.debug("peers()=%d - EXIT!", len(peers))
299 def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
300 logger.debug("doc[]='%s',only='%s' - CALLED!")
301 if not isinstance(doc, bs4.BeautifulSoup):
302 raise ValueError(f"Parameter doc[]='{type(only)}' is not of type 'bs4.BeautifulSoup'")
303 elif not isinstance(only, str) and only is not None:
304 raise ValueError(f"Parameter only[]='{type(only)}' is not of type 'str'")
305 elif isinstance(only, str) and only == "":
306 raise ValueError("Parameter 'only' is empty")
308 scripts = doc.find_all("script")
311 logger.debug("scripts()=%d", len(scripts))
312 for script in scripts:
313 logger.debug("script[%s].contents()=%d", type(script), len(script.contents))
314 if len(script.contents) == 0:
315 logger.debug("script has no contents - SKIPPED!")
317 elif not script.contents[0].startswith("window.isoData"):
318 logger.debug("script.contents[0]='%s' does not start with window.isoData - SKIPPED!", script.contents[0])
321 logger.debug("script.contents[0][]='%s'", type(script.contents[0]))
323 iso_data = script.contents[0].split("=")[1].strip().replace(":undefined", ":\"undefined\"")
324 logger.debug("iso_data[%s]='%s'", type(iso_data), iso_data)
328 parsed = json.loads(iso_data)
329 except json.decoder.JSONDecodeError as exception:
330 logger.warning("Exception '%s' during parsing %d Bytes: '%s' - EXIT!", type(exception), len(iso_data), str(exception))
333 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
335 if "routeData" not in parsed:
336 logger.warning("parsed[%s]()=%d does not contain element 'routeData'", type(parsed), len(parsed))
338 elif "federatedInstancesResponse" not in parsed["routeData"]:
339 logger.warning("parsed[routeData][%s]()=%d does not contain element 'federatedInstancesResponse'", type(parsed["routeData"]), len(parsed["routeData"]))
341 elif "data" not in parsed["routeData"]["federatedInstancesResponse"]:
342 logger.warning("parsed[routeData][federatedInstancesResponse][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]), len(parsed["routeData"]["federatedInstancesResponse"]))
344 elif "federated_instances" not in parsed["routeData"]["federatedInstancesResponse"]["data"]:
345 logger.warning("parsed[routeData][federatedInstancesResponse][data][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]["data"]), len(parsed["routeData"]["federatedInstancesResponse"]["data"]))
348 data = parsed["routeData"]["federatedInstancesResponse"]["data"]["federated_instances"]
349 logger.debug("Checking %d data elements ...", len(data))
351 logger.debug("element='%s'", element)
352 if isinstance(only, str) and only != element:
353 logger.debug("Skipping unwanted element='%s',only='%s'", element, only)
356 logger.debug("Checking data[%s]()=%d row(s) ...", element, len(data[element]))
357 for row in data[element]:
358 logger.debug("row[]='%s'", type(row))
359 if "domain" not in row:
360 logger.warning("row()=%d has no element 'domain' - SKIPPED!", len(row))
363 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
364 peer = tidyup.domain(row["domain"])
365 logger.debug("peer='%s' - AFTER!", peer)
368 logger.debug("peer is empty - SKIPPED!")
370 elif not utils.is_domain_wanted(peer):
371 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
374 logger.debug("peer='%s' already added - SKIPPED!", peer)
377 logger.debug("Appending peer='%s' ...", peer)
380 logger.debug("peers()=%d - EXIT!", len(peers))