1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
22 from fba.helpers import blacklist
23 from fba.helpers import config
24 from fba.helpers import domain as domain_helper
25 from fba.helpers import tidyup
27 from fba.http import csrf
28 from fba.http import federation
29 from fba.http import network
31 from fba.models import instances
33 logging.basicConfig(level=logging.INFO)
34 logger = logging.getLogger(__name__)
35 #logger.setLevel(logging.DEBUG)
39 "Blocked Instances".lower(),
40 "Instàncies bloquejades".lower(),
41 "Blocáilte Ásc".lower(),
43 "Blokované instance".lower(),
44 "Geblokkeerde instanties".lower(),
45 "Blockerade instanser".lower(),
46 "Instàncias blocadas".lower(),
47 "Istanze bloccate".lower(),
48 "Instances bloquées".lower(),
49 "Letiltott példányok".lower(),
50 "Instancias bloqueadas".lower(),
51 "Blokeatuta dauden instantziak".lower(),
53 "Peladen Yang Diblokir".lower(),
54 "Blokerede servere".lower(),
55 "Blokitaj nodoj".lower(),
56 "Блокирани Инстанции".lower(),
57 "Blockierte Instanzen".lower(),
58 "Estetyt instanssit".lower(),
59 "Instâncias bloqueadas".lower(),
60 "Zablokowane instancje".lower(),
61 "Blokované inštancie".lower(),
62 "المثلاء المحجوبون".lower(),
63 "Užblokuoti serveriai".lower(),
64 "ブロックしたインスタンス".lower(),
65 "Блокированные Инстансы".lower(),
66 "Αποκλεισμένοι διακομιστές".lower(),
68 "Instâncias bloqueadas".lower(),
71 def fetch_peers(domain: str, origin: str) -> list:
72 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
73 domain_helper.raise_on(domain)
75 if blacklist.is_blacklisted(domain):
76 raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
77 elif not instances.is_registered(domain):
78 raise Exception(f"domain='{domain}' is not registered but function is invoked.")
82 # No CSRF by default, you don't have to add network.api_headers by yourself here
86 logger.debug("Checking CSRF for domain='%s' ...", domain)
87 headers = csrf.determine(domain, dict())
88 except network.exceptions as exception:
89 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__)
90 instances.set_last_error(domain, exception)
92 logger.debug("Returning empty list ... - EXIT!")
96 logger.debug("Fetching '/api/v3/site' from domain='%s' ...", domain)
97 data = network.get_json_api(
101 (config.get("connection_timeout"), config.get("read_timeout"))
104 logger.debug("data[]='%s'", type(data))
105 if "error_message" in data:
106 logger.warning("Could not reach any JSON API: domain='%s',error_message='%s'", domain, data["error_message"])
107 instances.set_last_error(domain, data)
108 elif "federated_instances" in data["json"] and isinstance(data["json"]["federated_instances"], dict):
109 logger.debug("Found federated_instances for domain='%s'", domain)
110 peers = peers + federation.add_peers(data["json"]["federated_instances"])
111 logger.debug("peers()=%d after adding", len(peers))
113 logger.debug("Marking domain='%s' as successfully handled ...", domain)
114 instances.set_success(domain)
116 logger.debug("peers()=%d", len(peers))
118 logger.debug("Fetching instances for domain='%s' from /instances ...", domain)
119 peers = fetch_instances(domain, origin)
120 logger.debug("peers()=%d after fetch_instances(%s, %s)", len(peers), domain, origin)
122 except network.exceptions as exception:
123 logger.warning("Exception during fetching JSON: domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
124 instances.set_last_error(domain, exception)
126 logger.debug("peers()=%d - EXIT!", len(peers))
129 def fetch_blocks(domain: str) -> list:
130 logger.debug("domain='%s - CALLED!", domain)
131 domain_helper.raise_on(domain)
133 if blacklist.is_blacklisted(domain):
134 raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
135 elif not instances.is_registered(domain):
136 raise Exception(f"domain='{domain}' is not registered but function is invoked.")
141 # json endpoint for newer mastodongs
142 logger.debug("Fetching /instances from domain='%s'", domain)
143 response = network.fetch_response(
147 (config.get("connection_timeout"), config.get("read_timeout"))
150 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
151 if response.ok and response.status_code == 200 and response.text != "":
152 logger.debug("Parsing %s Bytes ...", len(response.text))
153 doc = bs4.BeautifulSoup(response.text, "html.parser")
154 logger.debug("doc[]='%s'", type(doc))
157 for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
158 logger.debug("Trying to find criteria='%s' ...", criteria)
159 containers = doc.findAll("div", criteria)
161 logger.debug("Checking %d containers ...", len(containers))
162 for container in containers:
163 logger.debug("container[]='%s'", type(container))
164 for header in container.find_all(["h2", "h3", "h4", "h5"]):
166 logger.debug("header[%s]='%s' - BEFORE!", type(header), header)
167 if header is not None:
168 content = str(header.contents[0])
169 logger.debug("content[%s]='%s' - AFTER!", type(content), content)
171 if content in [None, ""]:
172 logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
174 elif not isinstance(content, str):
175 logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
177 elif content.lower() in _translations:
178 logger.debug("Found header='%s' with blocked instances - BREAK(3) !", header)
182 logger.debug("found[]='%s'", type(found))
183 if found is not None:
184 logger.debug("Found header with blocked instances - BREAK(2) !")
187 logger.debug("found[]='%s'", type(found))
188 if found is not None:
189 logger.debug("Found header with blocked instances - BREAK(1) !")
192 logger.debug("found[]='%s'", type(found))
194 logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain)
195 peers = parse_script(doc, "blocked")
197 logger.debug("domain='%s' has %d peer(s).", domain, len(peers))
198 for blocked in peers:
199 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
204 "block_level": "reject",
207 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
210 blocking = found.find_next(["ul", "table"]).findAll("a")
211 logger.debug("Found %d blocked instance(s) ...", len(blocking))
213 logger.debug("tag[]='%s'", type(tag))
214 blocked = tidyup.domain(tag.contents[0]) if tag.contents[0] != "" else None
215 logger.debug("blocked='%s'", blocked)
217 if blocked in [None, ""]:
218 logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0])
220 elif not domain_helper.is_wanted(blocked):
221 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
224 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
229 "block_level": "reject",
232 logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason)
233 instances.set_last_error(domain, response)
235 except network.exceptions as exception:
236 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
237 instances.set_last_error(domain, exception)
239 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
242 def fetch_instances(domain: str, origin: str) -> list:
243 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
244 domain_helper.raise_on(domain)
246 if blacklist.is_blacklisted(domain):
247 raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
252 # json endpoint for newer mastodongs
253 logger.debug("Fetching /instances from domain='%s'", domain)
254 response = network.fetch_response(
258 (config.get("connection_timeout"), config.get("read_timeout"))
261 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
262 if response.ok and response.status_code == 200 and response.text != "":
263 logger.debug("Parsing %s Bytes ...", len(response.text))
265 doc = bs4.BeautifulSoup(response.text, "html.parser")
266 logger.debug("doc[]='%s'", type(doc))
268 for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
269 logger.debug("criteria='%s'", criteria)
270 containers = doc.findAll("div", criteria)
272 logger.debug("Checking %d containers ...", len(containers))
273 for header in containers:
274 logger.debug("header[%s]='%s'", type(header), header)
275 table = header.find_next(["ul","table"])
277 logger.debug("table[]='%s'", type(table))
279 logger.warning("No unsorted list or table found,domain='%s' - EXIT!", domain)
282 rows = table.findAll("a")
283 logger.debug("Found %d instance(s) ...", len(rows))
285 logger.debug("tag[]='%s'", type(tag))
286 text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
288 logger.debug("text[%s]='%s' - BEFORE!", type(text), text)
289 peer = tidyup.domain(text) if text != "" else None
290 logger.debug("peer='%s' - AFTER", peer)
292 if peer in [None, ""]:
293 logger.warning("peer='%s' is empty, text='%s' - SKIPPED!", peer, text)
295 elif not domain_helper.is_wanted(peer):
296 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
299 logger.debug("peer='%s' already added - SKIPPED!", peer)
302 logger.debug("Appending peer='%s' ...", peer)
305 logger.debug("peers()=%d", len(peers))
307 logger.debug("Found no peers for domain='%s', trying script tag ...", domain)
308 peers = parse_script(doc)
309 logger.debug("Parsing doc()=%d returned %d peer(s).", len(doc), len(peers))
311 logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason)
312 instances.set_last_error(domain, response)
314 logger.debug("Marking domain='%s' as successfully handled, peers()=%d ...", domain, len(peers))
315 instances.set_success(domain)
317 except network.exceptions as exception:
318 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
319 instances.set_last_error(domain, exception)
321 logger.debug("peers()=%d - EXIT!", len(peers))
324 def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
325 logger.debug("doc[]='%s',only='%s' - CALLED!")
327 if not isinstance(doc, bs4.BeautifulSoup):
328 raise ValueError(f"Parameter doc[]='{type(only)}' is not of type 'bs4.BeautifulSoup'")
329 elif not isinstance(only, str) and only is not None:
330 raise ValueError(f"Parameter only[]='{type(only)}' is not of type 'str'")
331 elif isinstance(only, str) and only == "":
332 raise ValueError("Parameter 'only' is empty")
334 scripts = doc.find_all("script")
337 logger.debug("scripts()=%d", len(scripts))
338 for script in scripts:
339 logger.debug("script[%s].contents()=%d", type(script), len(script.contents))
340 if len(script.contents) == 0:
341 logger.debug("script has no contents - SKIPPED!")
343 elif not script.contents[0].startswith("window.isoData"):
344 logger.debug("script.contents[0]='%s' does not start with window.isoData - SKIPPED!", script.contents[0])
347 logger.debug("script.contents[0][]='%s'", type(script.contents[0]))
349 iso_data = script.contents[0].split("=")[1].strip().replace(":undefined", ":\"undefined\"")
350 logger.debug("iso_data[%s]='%s'", type(iso_data), iso_data)
354 parsed = json.loads(iso_data)
355 except json.decoder.JSONDecodeError as exception:
356 logger.warning("Exception '%s' during parsing %d Bytes: '%s' - EXIT!", type(exception), len(iso_data), str(exception))
359 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
361 if "routeData" not in parsed:
362 logger.warning("parsed[%s]()=%d does not contain element 'routeData'", type(parsed), len(parsed))
364 elif "federatedInstancesResponse" not in parsed["routeData"]:
365 logger.warning("parsed[routeData][%s]()=%d does not contain element 'federatedInstancesResponse'", type(parsed["routeData"]), len(parsed["routeData"]))
367 elif "data" not in parsed["routeData"]["federatedInstancesResponse"]:
368 logger.warning("parsed[routeData][federatedInstancesResponse][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]), len(parsed["routeData"]["federatedInstancesResponse"]))
370 elif "federated_instances" not in parsed["routeData"]["federatedInstancesResponse"]["data"]:
371 logger.warning("parsed[routeData][federatedInstancesResponse][data][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]["data"]), len(parsed["routeData"]["federatedInstancesResponse"]["data"]))
374 data = parsed["routeData"]["federatedInstancesResponse"]["data"]["federated_instances"]
375 logger.debug("Checking %d data elements ...", len(data))
377 logger.debug("element[%s]='%s'", type(element), element)
378 if isinstance(only, str) and only != element:
379 logger.debug("Skipping unwanted element='%s',only='%s'", element, only)
382 logger.debug("Checking data[%s]()=%d row(s) ...", element, len(data[element]))
383 for row in data[element]:
384 logger.debug("row[]='%s'", type(row))
385 if "domain" not in row:
386 logger.warning("row()=%d has no element 'domain' - SKIPPED!", len(row))
388 elif row["domain"] in [None, ""]:
389 logger.debug("row[domain]='%s' is empty - SKIPPED!", row["domain"])
392 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
393 peer = tidyup.domain(row["domain"]) if row["domain"] != "" else None
394 logger.debug("peer='%s' - AFTER!", peer)
396 if peer in [None, ""]:
397 logger.warning("peer='%s' is empty, row[domain]='%s' - SKIPPED!", peer, row["domain"])
399 elif not domain_helper.is_wanted(peer):
400 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
403 logger.debug("peer='%s' already added - SKIPPED!", peer)
406 logger.debug("Appending peer='%s' ...", peer)
409 logger.debug("peers()=%d - EXIT!", len(peers))