1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
25 from fba.helpers import config
26 from fba.helpers import domain as domain_helper
27 from fba.helpers import tidyup
29 from fba.http import federation
30 from fba.http import network
32 from fba.models import instances
34 logging.basicConfig(level=logging.INFO)
35 logger = logging.getLogger(__name__)
36 #logger.setLevel(logging.DEBUG)
38 def fetch_peers(domain: str, origin: str) -> list:
39 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
40 domain_helper.raise_on(domain)
44 # No CSRF by default, you don't have to add network.api_headers by yourself here
48 logger.debug("Checking CSRF for domain='%s'", domain)
49 headers = csrf.determine(domain, dict())
50 except network.exceptions as exception:
51 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
52 instances.set_last_error(domain, exception)
56 logger.debug("Fetching '/api/v3/site' from domain='%s' ...", domain)
57 data = network.get_json_api(
61 (config.get("connection_timeout"), config.get("read_timeout"))
64 logger.debug("data[]='%s'", type(data))
65 if "error_message" in data:
66 logger.warning("Could not reach any JSON API: domain='%s'", domain)
67 instances.set_last_error(domain, data)
68 elif "federated_instances" in data["json"] and isinstance(data["json"]["federated_instances"], dict):
69 logger.debug("Found federated_instances for domain='%s'", domain)
70 peers = peers + federation.add_peers(data["json"]["federated_instances"])
72 logger.debug("Marking domain='%s' as successfully handled ...", domain)
73 instances.set_success(domain)
76 logger.warning("Fetching instances for domain='%s' from /instances ...", domain)
77 peers = fetch_instances(domain, origin)
79 except network.exceptions as exception:
80 logger.warning("Exception during fetching JSON: domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
81 instances.set_last_error(domain, exception)
83 logger.debug("peers()=%d - EXIT!", len(peers))
86 def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
87 logger.debug("domain='%s,nodeinfo_url='%s' - CALLED!", domain, nodeinfo_url)
88 domain_helper.raise_on(domain)
90 if not isinstance(nodeinfo_url, str):
91 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not of type 'str'")
92 elif nodeinfo_url == "":
93 raise ValueError("Parameter 'nodeinfo_url' is empty")
96 "Blocked Instances".lower(),
97 "Instàncies bloquejades".lower(),
98 "Blocáilte Ásc".lower(),
100 "Blokované instance".lower(),
101 "Geblokkeerde instanties".lower(),
102 "Blockerade instanser".lower(),
103 "Instàncias blocadas".lower(),
104 "Istanze bloccate".lower(),
105 "Instances bloquées".lower(),
106 "Letiltott példányok".lower(),
107 "Instancias bloqueadas".lower(),
108 "Blokeatuta dauden instantziak".lower(),
110 "Peladen Yang Diblokir".lower(),
111 "Blokerede servere".lower(),
112 "Blokitaj nodoj".lower(),
113 "Блокирани Инстанции".lower(),
114 "Blockierte Instanzen".lower(),
115 "Estetyt instanssit".lower(),
116 "Instâncias bloqueadas".lower(),
117 "Zablokowane instancje".lower(),
118 "Blokované inštancie".lower(),
119 "المثلاء المحجوبون".lower(),
120 "Užblokuoti serveriai".lower(),
121 "ブロックしたインスタンス".lower(),
122 "Блокированные Инстансы".lower(),
123 "Αποκλεισμένοι διακομιστές".lower(),
125 "Instâncias bloqueadas".lower(),
131 # json endpoint for newer mastodongs
132 logger.debug("Fetching /instances from domain='%s'", domain)
133 response = network.fetch_response(
137 (config.get("connection_timeout"), config.get("read_timeout"))
140 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
141 if response.ok and response.status_code < 300 and response.text != "":
142 logger.debug("Parsing %s Bytes ...", len(response.text))
144 doc = bs4.BeautifulSoup(response.text, "html.parser")
145 logger.debug("doc[]='%s'", type(doc))
147 headers = doc.findAll("div", {"class": "home-instances container-lg"})
149 logger.debug("Checking %d header(s) ...", len(headers))
150 for header in headers:
151 logger.debug("header[]='%s'", type(header))
152 content = header.contents[0]
154 logger.debug("content[%s]='%s'", type(content), content)
156 logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
158 elif not isinstance(content, str):
159 logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
161 elif content.lower() in translations:
162 logger.debug("Found header with blocked instances - BREAK!")
166 logger.debug("found[]='%s'", type(found))
168 logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain)
169 blocking = parse_script(doc, "blocked")
171 logger.debug("blocking()=%d - EXIT!", len(blocking))
174 blocking = found.find_next(["ul","table"]).findAll("a")
175 logger.debug("Found %d blocked instance(s) ...", len(blocking))
177 logger.debug("tag[]='%s'", type(tag))
178 blocked = tidyup.domain(tag.contents[0])
179 logger.debug("blocked='%s'", blocked)
182 logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0])
184 elif not utils.is_domain_wanted(blocked):
185 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
188 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
193 "block_level": "reject",
196 except network.exceptions as exception:
197 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
198 instances.set_last_error(domain, exception)
200 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
203 def fetch_instances(domain: str, origin: str) -> list:
204 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
205 domain_helper.raise_on(domain)
210 # json endpoint for newer mastodongs
211 logger.debug("Fetching /instances from domain='%s'", domain)
212 response = network.fetch_response(
216 (config.get("connection_timeout"), config.get("read_timeout"))
219 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
220 if response.ok and response.status_code < 300 and response.text != "":
221 logger.debug("Parsing %s Bytes ...", len(response.text))
223 doc = bs4.BeautifulSoup(response.text, "html.parser")
224 logger.debug("doc[]='%s'", type(doc))
226 headers = doc.findAll("div", {"class": "home-instances container-lg"})
227 logger.debug("Checking %d headers ...", len(headers))
228 for header in headers:
229 logger.debug("header[%s]='%s'", type(header), header)
231 rows = header.find_next(["ul","table"]).findAll("a")
232 logger.debug("Found %d blocked instance(s) ...", len(rows))
234 logger.debug("tag[]='%s'", type(tag))
235 text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
236 peer = tidyup.domain(text)
237 logger.debug("peer='%s'", peer)
240 logger.debug("peer is empty - SKIPPED!")
242 elif not utils.is_domain_wanted(peer):
243 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
246 logger.debug("peer='%s' already added - SKIPPED!", peer)
249 logger.debug("Appending peer='%s' ...", peer)
252 logger.debug("peers()=%d", len(peers))
254 logger.debug("Found no peers for domain='%s', trying script tag ...", domain)
255 peers = parse_script(doc)
257 logger.debug("Marking domain='%s' as successfully handled, peers()=%d ...", domain, len(peers))
258 instances.set_success(domain)
260 except network.exceptions as exception:
261 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
262 instances.set_last_error(domain, exception)
264 logger.debug("peers()=%d - EXIT!", len(peers))
267 def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
268 logger.debug("doc[]='%s',only='%s' - CALLED!")
269 if not isinstance(doc, bs4.BeautifulSoup):
270 raise ValueError(f"Parameter doc[]='{type(only)}' is not of type 'bs4.BeautifulSoup'")
271 elif not isinstance(only, str) and only != None:
272 raise ValueError(f"Parameter only[]='{type(only)}' is not of type 'str'")
273 elif isinstance(only, str) and only == "":
274 raise ValueError("Parameter 'only' is empty")
276 scripts = doc.find_all("script")
279 logger.debug("scripts()=%d", len(scripts))
280 for script in scripts:
281 logger.debug("script[%s].contents()=%d", type(script), len(script.contents))
282 if len(script.contents) == 0:
283 logger.debug("script has no contents - SKIPPED!")
285 elif not script.contents[0].startswith("window.isoData"):
286 logger.debug("script.contents[0]='%s' does not start with window.isoData - SKIPPED!", script.contents[0])
289 logger.debug("script.contents[0][]='%s'", type(script.contents[0]))
291 isoData = script.contents[0].split("=")[1].strip().replace(":undefined", ":\"undefined\"")
292 logger.debug("isoData[%s]='%s'", type(isoData), isoData)
296 parsed = json.loads(isoData)
297 except json.decoder.JSONDecodeError as exception:
298 logger.warning("Exception '%s' during parsing %d Bytes: '%s'", type(exception), len(isoData), str(exception))
301 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
303 if "routeData" not in parsed:
304 logger.warning("parsed[%s]()=%d does not contain element 'routeData'", type(parsed), len(parsed))
306 elif "federatedInstancesResponse" not in parsed["routeData"]:
307 logger.warning("parsed[routeData][%s]()=%d does not contain element 'federatedInstancesResponse'", type(parsed["routeData"]), len(parsed["routeData"]))
309 elif "data" not in parsed["routeData"]["federatedInstancesResponse"]:
310 logger.warning("parsed[routeData][federatedInstancesResponse][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]), len(parsed["routeData"]["federatedInstancesResponse"]))
312 elif "federated_instances" not in parsed["routeData"]["federatedInstancesResponse"]["data"]:
313 logger.warning("parsed[routeData][federatedInstancesResponse][data][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]["data"]), len(parsed["routeData"]["federatedInstancesResponse"]["data"]))
316 data = parsed["routeData"]["federatedInstancesResponse"]["data"]["federated_instances"]
317 logger.debug("Checking %d data elements ...", len(data))
319 logger.debug("element='%s'", element)
320 if isinstance(only, str) and only != element:
321 logger.debug("Skipping unwanted element='%s',only='%s'", element, only)
324 logger.debug("Checking data[%s]()=%d row(s) ...", element, len(data[element]))
325 for row in data[element]:
326 logger.debug("row[]='%s'", type(row))
327 if "domain" not in row:
328 logger.warning("row()=%d has no element 'domain' - SKIPPED!", len(row))
331 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
332 peer = tidyup.domain(row["domain"])
333 logger.debug("peer='%s' - AFTER!", peer)
336 logger.debug("peer is empty - SKIPPED!")
338 elif not utils.is_domain_wanted(peer):
339 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
342 logger.debug("peer='%s' already added - SKIPPED!", peer)
345 logger.debug("Appending peer='%s' ...", peer)
348 logger.debug("peers()=%d - EXIT!", len(peers))