1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
25 from fba.helpers import config
26 from fba.helpers import domain as domain_helper
27 from fba.helpers import tidyup
29 from fba.http import federation
30 from fba.http import network
32 from fba.models import instances
34 logging.basicConfig(level=logging.INFO)
35 logger = logging.getLogger(__name__)
36 #logger.setLevel(logging.DEBUG)
38 def fetch_peers(domain: str, origin: str) -> list:
39 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
40 domain_helper.raise_on(domain)
44 # No CSRF by default, you don't have to add network.api_headers by yourself here
48 logger.debug("Checking CSRF for domain='%s'", domain)
49 headers = csrf.determine(domain, dict())
50 except network.exceptions as exception:
51 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
52 instances.set_last_error(domain, exception)
56 logger.debug("Fetching '/api/v3/site' from domain='%s' ...", domain)
57 data = network.get_json_api(
61 (config.get("connection_timeout"), config.get("read_timeout"))
64 logger.debug("data[]='%s'", type(data))
65 if "error_message" in data:
66 logger.warning("Could not reach any JSON API: domain='%s'", domain)
67 instances.set_last_error(domain, data)
68 elif "federated_instances" in data["json"] and isinstance(data["json"]["federated_instances"], dict):
69 logger.debug("Found federated_instances for domain='%s'", domain)
70 peers = peers + federation.add_peers(data["json"]["federated_instances"])
72 logger.debug("Marking domain='%s' as successfully handled ...", domain)
73 instances.set_success(domain)
76 logger.warning("Fetching instances for domain='%s' from /instances ...", domain)
77 peers = fetch_instances(domain, origin)
79 except network.exceptions as exception:
80 logger.warning("Exception during fetching JSON: domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
81 instances.set_last_error(domain, exception)
83 logger.debug("peers()=%d - EXIT!", len(peers))
86 def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
87 logger.debug("domain='%s,nodeinfo_url='%s' - CALLED!", domain, nodeinfo_url)
88 domain_helper.raise_on(domain)
90 if not isinstance(nodeinfo_url, str):
91 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not of type 'str'")
92 elif nodeinfo_url == "":
93 raise ValueError("Parameter 'nodeinfo_url' is empty")
96 "Blocked Instances".lower(),
97 "Instàncies bloquejades".lower(),
98 "Blocáilte Ásc".lower(),
100 "Blokované instance".lower(),
101 "Geblokkeerde instanties".lower(),
102 "Blockerade instanser".lower(),
103 "Instàncias blocadas".lower(),
104 "Istanze bloccate".lower(),
105 "Instances bloquées".lower(),
106 "Letiltott példányok".lower(),
107 "Instancias bloqueadas".lower(),
108 "Blokeatuta dauden instantziak".lower(),
110 "Peladen Yang Diblokir".lower(),
111 "Blokerede servere".lower(),
112 "Blokitaj nodoj".lower(),
113 "Блокирани Инстанции".lower(),
114 "Blockierte Instanzen".lower(),
115 "Estetyt instanssit".lower(),
116 "Instâncias bloqueadas".lower(),
117 "Zablokowane instancje".lower(),
118 "Blokované inštancie".lower(),
119 "المثلاء المحجوبون".lower(),
120 "Užblokuoti serveriai".lower(),
121 "ブロックしたインスタンス".lower(),
122 "Блокированные Инстансы".lower(),
123 "Αποκλεισμένοι διακομιστές".lower(),
125 "Instâncias bloqueadas".lower(),
131 # json endpoint for newer mastodongs
132 logger.debug("Fetching /instances from domain='%s'", domain)
133 response = network.fetch_response(
137 (config.get("connection_timeout"), config.get("read_timeout"))
140 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
141 if response.ok and response.status_code < 300 and response.text != "":
142 logger.debug("Parsing %s Bytes ...", len(response.text))
144 doc = bs4.BeautifulSoup(response.text, "html.parser")
145 logger.debug("doc[]='%s'", type(doc))
147 headers = doc.findAll("div", {"class": "home-instances container-lg"})
149 logger.debug("Checking %d header(s) ...", len(headers))
150 for header in headers:
151 logger.debug("header[]='%s'", type(header))
152 content = header.contents[0]
154 logger.debug("content[%s]='%s'", type(content), content)
156 logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
158 elif not isinstance(content, str):
159 logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
161 elif content.lower() in translations:
162 logger.debug("Found header with blocked instances - BREAK!")
166 logger.debug("found[]='%s'", type(found))
168 logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain)
169 peers = parse_script(doc, "blocked")
171 logger.debug("domain='%s' has %d peer(s).", domain, len(peers))
172 for blocked in peers:
173 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
178 "block_level": "reject",
181 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
184 blocking = found.find_next(["ul","table"]).findAll("a")
185 logger.debug("Found %d blocked instance(s) ...", len(blocking))
187 logger.debug("tag[]='%s'", type(tag))
188 blocked = tidyup.domain(tag.contents[0])
189 logger.debug("blocked='%s'", blocked)
192 logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0])
194 elif not utils.is_domain_wanted(blocked):
195 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
198 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
203 "block_level": "reject",
206 except network.exceptions as exception:
207 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
208 instances.set_last_error(domain, exception)
210 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
213 def fetch_instances(domain: str, origin: str) -> list:
214 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
215 domain_helper.raise_on(domain)
220 # json endpoint for newer mastodongs
221 logger.debug("Fetching /instances from domain='%s'", domain)
222 response = network.fetch_response(
226 (config.get("connection_timeout"), config.get("read_timeout"))
229 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
230 if response.ok and response.status_code < 300 and response.text != "":
231 logger.debug("Parsing %s Bytes ...", len(response.text))
233 doc = bs4.BeautifulSoup(response.text, "html.parser")
234 logger.debug("doc[]='%s'", type(doc))
236 headers = doc.findAll("div", {"class": "home-instances container-lg"})
237 logger.debug("Checking %d headers ...", len(headers))
238 for header in headers:
239 logger.debug("header[%s]='%s'", type(header), header)
241 rows = header.find_next(["ul","table"]).findAll("a")
242 logger.debug("Found %d blocked instance(s) ...", len(rows))
244 logger.debug("tag[]='%s'", type(tag))
245 text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
246 peer = tidyup.domain(text)
247 logger.debug("peer='%s'", peer)
250 logger.debug("peer is empty - SKIPPED!")
252 elif not utils.is_domain_wanted(peer):
253 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
256 logger.debug("peer='%s' already added - SKIPPED!", peer)
259 logger.debug("Appending peer='%s' ...", peer)
262 logger.debug("peers()=%d", len(peers))
264 logger.debug("Found no peers for domain='%s', trying script tag ...", domain)
265 peers = parse_script(doc)
267 logger.debug("Marking domain='%s' as successfully handled, peers()=%d ...", domain, len(peers))
268 instances.set_success(domain)
270 except network.exceptions as exception:
271 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
272 instances.set_last_error(domain, exception)
274 logger.debug("peers()=%d - EXIT!", len(peers))
277 def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
278 logger.debug("doc[]='%s',only='%s' - CALLED!")
279 if not isinstance(doc, bs4.BeautifulSoup):
280 raise ValueError(f"Parameter doc[]='{type(only)}' is not of type 'bs4.BeautifulSoup'")
281 elif not isinstance(only, str) and only != None:
282 raise ValueError(f"Parameter only[]='{type(only)}' is not of type 'str'")
283 elif isinstance(only, str) and only == "":
284 raise ValueError("Parameter 'only' is empty")
286 scripts = doc.find_all("script")
289 logger.debug("scripts()=%d", len(scripts))
290 for script in scripts:
291 logger.debug("script[%s].contents()=%d", type(script), len(script.contents))
292 if len(script.contents) == 0:
293 logger.debug("script has no contents - SKIPPED!")
295 elif not script.contents[0].startswith("window.isoData"):
296 logger.debug("script.contents[0]='%s' does not start with window.isoData - SKIPPED!", script.contents[0])
299 logger.debug("script.contents[0][]='%s'", type(script.contents[0]))
301 isoData = script.contents[0].split("=")[1].strip().replace(":undefined", ":\"undefined\"")
302 logger.debug("isoData[%s]='%s'", type(isoData), isoData)
306 parsed = json.loads(isoData)
307 except json.decoder.JSONDecodeError as exception:
308 logger.warning("Exception '%s' during parsing %d Bytes: '%s'", type(exception), len(isoData), str(exception))
311 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
313 if "routeData" not in parsed:
314 logger.warning("parsed[%s]()=%d does not contain element 'routeData'", type(parsed), len(parsed))
316 elif "federatedInstancesResponse" not in parsed["routeData"]:
317 logger.warning("parsed[routeData][%s]()=%d does not contain element 'federatedInstancesResponse'", type(parsed["routeData"]), len(parsed["routeData"]))
319 elif "data" not in parsed["routeData"]["federatedInstancesResponse"]:
320 logger.warning("parsed[routeData][federatedInstancesResponse][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]), len(parsed["routeData"]["federatedInstancesResponse"]))
322 elif "federated_instances" not in parsed["routeData"]["federatedInstancesResponse"]["data"]:
323 logger.warning("parsed[routeData][federatedInstancesResponse][data][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]["data"]), len(parsed["routeData"]["federatedInstancesResponse"]["data"]))
326 data = parsed["routeData"]["federatedInstancesResponse"]["data"]["federated_instances"]
327 logger.debug("Checking %d data elements ...", len(data))
329 logger.debug("element='%s'", element)
330 if isinstance(only, str) and only != element:
331 logger.debug("Skipping unwanted element='%s',only='%s'", element, only)
334 logger.debug("Checking data[%s]()=%d row(s) ...", element, len(data[element]))
335 for row in data[element]:
336 logger.debug("row[]='%s'", type(row))
337 if "domain" not in row:
338 logger.warning("row()=%d has no element 'domain' - SKIPPED!", len(row))
341 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
342 peer = tidyup.domain(row["domain"])
343 logger.debug("peer='%s' - AFTER!", peer)
346 logger.debug("peer is empty - SKIPPED!")
348 elif not utils.is_domain_wanted(peer):
349 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
352 logger.debug("peer='%s' already added - SKIPPED!", peer)
355 logger.debug("Appending peer='%s' ...", peer)
358 logger.debug("peers()=%d - EXIT!", len(peers))