1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
25 from fba.helpers import config
26 from fba.helpers import domain as domain_helper
27 from fba.helpers import tidyup
29 from fba.http import federation
30 from fba.http import network
32 from fba.models import instances
34 logging.basicConfig(level=logging.INFO)
35 logger = logging.getLogger(__name__)
36 #logger.setLevel(logging.DEBUG)
38 def fetch_peers(domain: str, origin: str) -> list:
39 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
40 domain_helper.raise_on(domain)
44 # No CSRF by default, you don't have to add network.api_headers by yourself here
48 logger.debug("Checking CSRF for domain='%s'", domain)
49 headers = csrf.determine(domain, dict())
50 except network.exceptions as exception:
51 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
52 instances.set_last_error(domain, exception)
56 logger.debug("Fetching '/api/v3/site' from domain='%s' ...", domain)
57 data = network.get_json_api(
61 (config.get("connection_timeout"), config.get("read_timeout"))
64 logger.debug("data[]='%s'", type(data))
65 if "error_message" in data:
66 logger.warning("Could not reach any JSON API: domain='%s'", domain)
67 instances.set_last_error(domain, data)
68 elif "federated_instances" in data["json"] and isinstance(data["json"]["federated_instances"], dict):
69 logger.debug("Found federated_instances for domain='%s'", domain)
70 peers = peers + federation.add_peers(data["json"]["federated_instances"])
72 logger.debug("Marking domain='%s' as successfully handled ...", domain)
73 instances.set_success(domain)
76 logger.warning("Fetching instances for domain='%s' from /instances ...", domain)
77 peers = fetch_instances(domain, origin)
79 except network.exceptions as exception:
80 logger.warning("Exception during fetching JSON: domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
81 instances.set_last_error(domain, exception)
83 logger.debug("peers()=%d - EXIT!", len(peers))
86 def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
87 logger.debug("domain='%s,nodeinfo_url='%s' - CALLED!", domain, nodeinfo_url)
88 domain_helper.raise_on(domain)
90 if not isinstance(nodeinfo_url, str):
91 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not of type 'str'")
92 elif nodeinfo_url == "":
93 raise ValueError("Parameter 'nodeinfo_url' is empty")
96 "Blocked Instances".lower(),
97 "Instàncies bloquejades".lower(),
98 "Blocáilte Ásc".lower(),
100 "Blokované instance".lower(),
101 "Geblokkeerde instanties".lower(),
102 "Blockerade instanser".lower(),
103 "Instàncias blocadas".lower(),
104 "Istanze bloccate".lower(),
105 "Instances bloquées".lower(),
106 "Letiltott példányok".lower(),
107 "Instancias bloqueadas".lower(),
108 "Blokeatuta dauden instantziak".lower(),
110 "Peladen Yang Diblokir".lower(),
111 "Blokerede servere".lower(),
112 "Blokitaj nodoj".lower(),
113 "Блокирани Инстанции".lower(),
114 "Blockierte Instanzen".lower(),
115 "Estetyt instanssit".lower(),
116 "Instâncias bloqueadas".lower(),
117 "Zablokowane instancje".lower(),
118 "Blokované inštancie".lower(),
119 "المثلاء المحجوبون".lower(),
120 "Užblokuoti serveriai".lower(),
121 "ブロックしたインスタンス".lower(),
122 "Блокированные Инстансы".lower(),
123 "Αποκλεισμένοι διακομιστές".lower(),
125 "Instâncias bloqueadas".lower(),
131 # json endpoint for newer mastodongs
132 logger.debug("Fetching /instances from domain='%s'", domain)
133 response = network.fetch_response(
137 (config.get("connection_timeout"), config.get("read_timeout"))
140 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
141 if response.ok and response.status_code < 300 and response.text != "":
142 logger.debug("Parsing %s Bytes ...", len(response.text))
144 doc = bs4.BeautifulSoup(response.text, "html.parser")
145 logger.debug("doc[]='%s'", type(doc))
148 for container in [{"class": "home-instances container-lg"}, {"class": "container"}]:
149 logger.debug("container='%s'", container)
150 headers = doc.findAll("div", container)
152 logger.debug("Checking %d header(s) ...", len(headers))
153 for header in headers:
154 logger.debug("header[]='%s'", type(header))
155 content = header.find(["h2", "h3", "h4", "h5"]).contents[0]
157 logger.debug("content[%s]='%s'", type(content), content)
159 logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
161 elif not isinstance(content, str):
162 logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
164 elif content.lower() in translations:
165 logger.debug("Found header with blocked instances - BREAK!")
169 logger.debug("found[]='%s'", type(found))
171 logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain)
172 peers = parse_script(doc, "blocked")
174 logger.debug("domain='%s' has %d peer(s).", domain, len(peers))
175 for blocked in peers:
176 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
181 "block_level": "reject",
184 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
187 blocking = found.find_next(["ul","table"]).findAll("a")
188 logger.debug("Found %d blocked instance(s) ...", len(blocking))
190 logger.debug("tag[]='%s'", type(tag))
191 blocked = tidyup.domain(tag.contents[0])
192 logger.debug("blocked='%s'", blocked)
195 logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0])
197 elif not utils.is_domain_wanted(blocked):
198 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
201 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
206 "block_level": "reject",
209 except network.exceptions as exception:
210 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
211 instances.set_last_error(domain, exception)
213 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
216 def fetch_instances(domain: str, origin: str) -> list:
217 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
218 domain_helper.raise_on(domain)
223 # json endpoint for newer mastodongs
224 logger.debug("Fetching /instances from domain='%s'", domain)
225 response = network.fetch_response(
229 (config.get("connection_timeout"), config.get("read_timeout"))
232 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
233 if response.ok and response.status_code < 300 and response.text != "":
234 logger.debug("Parsing %s Bytes ...", len(response.text))
236 doc = bs4.BeautifulSoup(response.text, "html.parser")
237 logger.debug("doc[]='%s'", type(doc))
239 for container in [{"class": "home-instances container-lg"}, {"class": "container"}]:
240 logger.debug("container='%s'", container)
241 headers = doc.findAll("div", container)
243 logger.debug("Checking %d headers ...", len(headers))
244 for header in headers:
245 logger.debug("header[%s]='%s'", type(header), header)
247 rows = header.find_next(["ul","table"]).findAll("a")
248 logger.debug("Found %d blocked instance(s) ...", len(rows))
250 logger.debug("tag[]='%s'", type(tag))
251 text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
252 peer = tidyup.domain(text)
253 logger.debug("peer='%s'", peer)
256 logger.debug("peer is empty - SKIPPED!")
258 elif not utils.is_domain_wanted(peer):
259 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
262 logger.debug("peer='%s' already added - SKIPPED!", peer)
265 logger.debug("Appending peer='%s' ...", peer)
268 logger.debug("peers()=%d", len(peers))
270 logger.debug("Found no peers for domain='%s', trying script tag ...", domain)
271 peers = parse_script(doc)
273 logger.debug("Marking domain='%s' as successfully handled, peers()=%d ...", domain, len(peers))
274 instances.set_success(domain)
276 except network.exceptions as exception:
277 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
278 instances.set_last_error(domain, exception)
280 logger.debug("peers()=%d - EXIT!", len(peers))
283 def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
284 logger.debug("doc[]='%s',only='%s' - CALLED!")
285 if not isinstance(doc, bs4.BeautifulSoup):
286 raise ValueError(f"Parameter doc[]='{type(only)}' is not of type 'bs4.BeautifulSoup'")
287 elif not isinstance(only, str) and only != None:
288 raise ValueError(f"Parameter only[]='{type(only)}' is not of type 'str'")
289 elif isinstance(only, str) and only == "":
290 raise ValueError("Parameter 'only' is empty")
292 scripts = doc.find_all("script")
295 logger.debug("scripts()=%d", len(scripts))
296 for script in scripts:
297 logger.debug("script[%s].contents()=%d", type(script), len(script.contents))
298 if len(script.contents) == 0:
299 logger.debug("script has no contents - SKIPPED!")
301 elif not script.contents[0].startswith("window.isoData"):
302 logger.debug("script.contents[0]='%s' does not start with window.isoData - SKIPPED!", script.contents[0])
305 logger.debug("script.contents[0][]='%s'", type(script.contents[0]))
307 isoData = script.contents[0].split("=")[1].strip().replace(":undefined", ":\"undefined\"")
308 logger.debug("isoData[%s]='%s'", type(isoData), isoData)
312 parsed = json.loads(isoData)
313 except json.decoder.JSONDecodeError as exception:
314 logger.warning("Exception '%s' during parsing %d Bytes: '%s'", type(exception), len(isoData), str(exception))
317 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
319 if "routeData" not in parsed:
320 logger.warning("parsed[%s]()=%d does not contain element 'routeData'", type(parsed), len(parsed))
322 elif "federatedInstancesResponse" not in parsed["routeData"]:
323 logger.warning("parsed[routeData][%s]()=%d does not contain element 'federatedInstancesResponse'", type(parsed["routeData"]), len(parsed["routeData"]))
325 elif "data" not in parsed["routeData"]["federatedInstancesResponse"]:
326 logger.warning("parsed[routeData][federatedInstancesResponse][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]), len(parsed["routeData"]["federatedInstancesResponse"]))
328 elif "federated_instances" not in parsed["routeData"]["federatedInstancesResponse"]["data"]:
329 logger.warning("parsed[routeData][federatedInstancesResponse][data][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]["data"]), len(parsed["routeData"]["federatedInstancesResponse"]["data"]))
332 data = parsed["routeData"]["federatedInstancesResponse"]["data"]["federated_instances"]
333 logger.debug("Checking %d data elements ...", len(data))
335 logger.debug("element='%s'", element)
336 if isinstance(only, str) and only != element:
337 logger.debug("Skipping unwanted element='%s',only='%s'", element, only)
340 logger.debug("Checking data[%s]()=%d row(s) ...", element, len(data[element]))
341 for row in data[element]:
342 logger.debug("row[]='%s'", type(row))
343 if "domain" not in row:
344 logger.warning("row()=%d has no element 'domain' - SKIPPED!", len(row))
347 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
348 peer = tidyup.domain(row["domain"])
349 logger.debug("peer='%s' - AFTER!", peer)
352 logger.debug("peer is empty - SKIPPED!")
354 elif not utils.is_domain_wanted(peer):
355 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
358 logger.debug("peer='%s' already added - SKIPPED!", peer)
361 logger.debug("Appending peer='%s' ...", peer)
364 logger.debug("peers()=%d - EXIT!", len(peers))