1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
22 from fba.helpers import blacklist
23 from fba.helpers import config
24 from fba.helpers import domain as domain_helper
25 from fba.helpers import tidyup
27 from fba.http import csrf
28 from fba.http import federation
29 from fba.http import network
31 from fba.models import instances
33 logging.basicConfig(level=logging.INFO)
34 logger = logging.getLogger(__name__)
35 #logger.setLevel(logging.DEBUG)
39 "Blocked Instances".lower(),
40 "Instàncies bloquejades".lower(),
41 "Blocáilte Ásc".lower(),
43 "Blokované instance".lower(),
44 "Geblokkeerde instanties".lower(),
45 "Blockerade instanser".lower(),
46 "Instàncias blocadas".lower(),
47 "Istanze bloccate".lower(),
48 "Instances bloquées".lower(),
49 "Letiltott példányok".lower(),
50 "Instancias bloqueadas".lower(),
51 "Blokeatuta dauden instantziak".lower(),
53 "Peladen Yang Diblokir".lower(),
54 "Blokerede servere".lower(),
55 "Blokitaj nodoj".lower(),
56 "Блокирани Инстанции".lower(),
57 "Blockierte Instanzen".lower(),
58 "Estetyt instanssit".lower(),
59 "Instâncias bloqueadas".lower(),
60 "Zablokowane instancje".lower(),
61 "Blokované inštancie".lower(),
62 "المثلاء المحجوبون".lower(),
63 "Užblokuoti serveriai".lower(),
64 "ブロックしたインスタンス".lower(),
65 "Блокированные Инстансы".lower(),
66 "Αποκλεισμένοι διακομιστές".lower(),
68 "Instâncias bloqueadas".lower(),
71 def fetch_peers(domain: str, origin: str) -> list:
72 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
73 domain_helper.raise_on(domain)
75 if blacklist.is_blacklisted(domain):
76 raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
77 elif not instances.is_registered(domain):
78 raise Exception(f"domain='{domain}' is not registered but function is invoked.")
82 # No CSRF by default, you don't have to add network.api_headers by yourself here
86 logger.debug("Checking CSRF for domain='%s'", domain)
87 headers = csrf.determine(domain, dict())
88 except network.exceptions as exception:
89 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__)
90 instances.set_last_error(domain, exception)
92 logger.debug("Returning empty list ... - EXIT!")
96 logger.debug("Fetching '/api/v3/site' from domain='%s' ...", domain)
97 data = network.get_json_api(
101 (config.get("connection_timeout"), config.get("read_timeout"))
104 logger.debug("data[]='%s'", type(data))
105 if "error_message" in data:
106 logger.warning("Could not reach any JSON API: domain='%s',error_message='%s'", domain, data["error_message"])
107 instances.set_last_error(domain, data)
108 elif "federated_instances" in data["json"] and isinstance(data["json"]["federated_instances"], dict):
109 logger.debug("Found federated_instances for domain='%s'", domain)
110 peers = peers + federation.add_peers(data["json"]["federated_instances"])
111 logger.debug("peers()=%d after adding", len(peers))
113 logger.debug("Marking domain='%s' as successfully handled ...", domain)
114 instances.set_success(domain)
116 logger.debug("peers()=%d", len(peers))
118 logger.debug("Fetching instances for domain='%s' from /instances ...", domain)
119 peers = fetch_instances(domain, origin)
120 logger.debug("peers()=%d after fetch_instances(%s, %s)", len(peers), domain, origin)
122 except network.exceptions as exception:
123 logger.warning("Exception during fetching JSON: domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
124 instances.set_last_error(domain, exception)
126 logger.debug("peers()=%d - EXIT!", len(peers))
129 def fetch_blocks(domain: str) -> list:
130 logger.debug("domain='%s - CALLED!", domain)
131 domain_helper.raise_on(domain)
133 if blacklist.is_blacklisted(domain):
134 raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
135 elif not instances.is_registered(domain):
136 raise Exception(f"domain='{domain}' is not registered but function is invoked.")
141 # json endpoint for newer mastodongs
142 logger.debug("Fetching /instances from domain='%s'", domain)
143 response = network.fetch_response(
147 (config.get("connection_timeout"), config.get("read_timeout"))
150 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
151 if response.ok and response.status_code == 200 and response.text != "":
152 logger.debug("Parsing %s Bytes ...", len(response.text))
153 doc = bs4.BeautifulSoup(response.text, "html.parser")
154 logger.debug("doc[]='%s'", type(doc))
157 for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
158 logger.debug("Trying to find criteria='%s' ...", criteria)
159 containers = doc.findAll("div", criteria)
161 logger.debug("Checking %d containers ...", len(containers))
162 for container in containers:
163 logger.debug("container[]='%s'", type(container))
164 for header in container.find_all(["h2", "h3", "h4", "h5"]):
166 logger.debug("header[%s]='%s' - BEFORE!", type(header), header)
167 if header is not None:
168 content = str(header.contents[0])
169 logger.debug("content[%s]='%s' - AFTER!", type(content), content)
171 if content in [None, ""]:
172 logger.debug("domain='%s' has returned empty header='%s' - SKIPPED!", domain, header)
174 elif not isinstance(content, str):
175 logger.debug("content[]='%s' is not supported/wanted type 'str' - SKIPPED!", type(content))
177 elif content.lower() in _translations:
178 logger.debug("Found header='%s' with blocked instances - BREAK(3) !", header)
182 logger.debug("found[]='%s'", type(found))
183 if found is not None:
184 logger.debug("Found header with blocked instances - BREAK(2) !")
187 logger.debug("found[]='%s'", type(found))
188 if found is not None:
189 logger.debug("Found header with blocked instances - BREAK(1) !")
192 logger.debug("found[]='%s'", type(found))
194 logger.info("domain='%s' has no HTML blocklist, checking scripts ...", domain)
195 peers = parse_script(doc, "blocked")
197 logger.debug("domain='%s' has %d peer(s).", domain, len(peers))
198 for blocked in peers:
199 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
204 "block_level": "reject",
207 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
210 blocking = found.find_next(["ul", "table"]).findAll("a")
211 logger.debug("Found %d blocked instance(s) ...", len(blocking))
213 logger.debug("tag[]='%s'", type(tag))
214 blocked = tidyup.domain(tag.contents[0]) if tag.contents[0] != "" else None
215 logger.debug("blocked='%s'", blocked)
217 if blocked in [None, ""]:
218 logger.warning("blocked='%s' is empty after tidyup.domain() - SKIPPED!", tag.contents[0])
220 elif not domain_helper.is_wanted(blocked):
221 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
224 logger.debug("Appending blocker='%s',blocked='%s',block_level='reject' ...", domain, blocked)
229 "block_level": "reject",
232 logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason)
233 instances.set_last_error(domain, response)
235 except network.exceptions as exception:
236 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
237 instances.set_last_error(domain, exception)
239 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
242 def fetch_instances(domain: str, origin: str) -> list:
243 logger.debug("domain='%s',origin='%s' - CALLED!", domain, origin)
244 domain_helper.raise_on(domain)
246 if blacklist.is_blacklisted(domain):
247 raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
252 # json endpoint for newer mastodongs
253 logger.debug("Fetching /instances from domain='%s'", domain)
254 response = network.fetch_response(
258 (config.get("connection_timeout"), config.get("read_timeout"))
261 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
262 if response.ok and response.status_code == 200 and response.text != "":
263 logger.debug("Parsing %s Bytes ...", len(response.text))
265 doc = bs4.BeautifulSoup(response.text, "html.parser")
266 logger.debug("doc[]='%s'", type(doc))
268 for criteria in [{"class": "home-instances container-lg"}, {"class": "container"}]:
269 logger.debug("criteria='%s'", criteria)
270 containers = doc.findAll("div", criteria)
272 logger.debug("Checking %d containers ...", len(containers))
273 for header in containers:
274 logger.debug("header[%s]='%s'", type(header), header)
276 rows = header.find_next(["ul","table"]).findAll("a")
277 logger.debug("Found %d instance(s) ...", len(rows))
279 logger.debug("tag[]='%s'", type(tag))
280 text = tag.contents[0] if isinstance(tag.contents[0], str) else tag.contents[0].text
282 logger.debug("text[%s]='%s' - BEFORE!", type(text), text)
283 peer = tidyup.domain(text) if text != "" else None
284 logger.debug("peer='%s' - AFTER", peer)
286 if peer in [None, ""]:
287 logger.warning("peer='%s' is empty, text='%s' - SKIPPED!", peer, text)
289 elif not domain_helper.is_wanted(peer):
290 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
293 logger.debug("peer='%s' already added - SKIPPED!", peer)
296 logger.debug("Appending peer='%s' ...", peer)
299 logger.debug("peers()=%d", len(peers))
301 logger.debug("Found no peers for domain='%s', trying script tag ...", domain)
302 peers = parse_script(doc)
303 logger.debug("Parsing doc()=%d returned %d peer(s).", len(doc), len(peers))
305 logger.warning("Cannot fetch /instances due to error: response.ok='%s',response.status_code=%d,response.details='%s'", response.ok, response.status_code, response.reason)
306 instances.set_last_error(domain, response)
308 logger.debug("Marking domain='%s' as successfully handled, peers()=%d ...", domain, len(peers))
309 instances.set_success(domain)
311 except network.exceptions as exception:
312 logger.warning("domain='%s',exception[%s]:'%s'", domain, type(exception), str(exception))
313 instances.set_last_error(domain, exception)
315 logger.debug("peers()=%d - EXIT!", len(peers))
318 def parse_script(doc: bs4.BeautifulSoup, only: str = None) -> list:
319 logger.debug("doc[]='%s',only='%s' - CALLED!")
321 if not isinstance(doc, bs4.BeautifulSoup):
322 raise ValueError(f"Parameter doc[]='{type(only)}' is not of type 'bs4.BeautifulSoup'")
323 elif not isinstance(only, str) and only is not None:
324 raise ValueError(f"Parameter only[]='{type(only)}' is not of type 'str'")
325 elif isinstance(only, str) and only == "":
326 raise ValueError("Parameter 'only' is empty")
328 scripts = doc.find_all("script")
331 logger.debug("scripts()=%d", len(scripts))
332 for script in scripts:
333 logger.debug("script[%s].contents()=%d", type(script), len(script.contents))
334 if len(script.contents) == 0:
335 logger.debug("script has no contents - SKIPPED!")
337 elif not script.contents[0].startswith("window.isoData"):
338 logger.debug("script.contents[0]='%s' does not start with window.isoData - SKIPPED!", script.contents[0])
341 logger.debug("script.contents[0][]='%s'", type(script.contents[0]))
343 iso_data = script.contents[0].split("=")[1].strip().replace(":undefined", ":\"undefined\"")
344 logger.debug("iso_data[%s]='%s'", type(iso_data), iso_data)
348 parsed = json.loads(iso_data)
349 except json.decoder.JSONDecodeError as exception:
350 logger.warning("Exception '%s' during parsing %d Bytes: '%s' - EXIT!", type(exception), len(iso_data), str(exception))
353 logger.debug("parsed[%s]()=%d", type(parsed), len(parsed))
355 if "routeData" not in parsed:
356 logger.warning("parsed[%s]()=%d does not contain element 'routeData'", type(parsed), len(parsed))
358 elif "federatedInstancesResponse" not in parsed["routeData"]:
359 logger.warning("parsed[routeData][%s]()=%d does not contain element 'federatedInstancesResponse'", type(parsed["routeData"]), len(parsed["routeData"]))
361 elif "data" not in parsed["routeData"]["federatedInstancesResponse"]:
362 logger.warning("parsed[routeData][federatedInstancesResponse][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]), len(parsed["routeData"]["federatedInstancesResponse"]))
364 elif "federated_instances" not in parsed["routeData"]["federatedInstancesResponse"]["data"]:
365 logger.warning("parsed[routeData][federatedInstancesResponse][data][%s]()=%d does not contain element 'data'", type(parsed["routeData"]["federatedInstancesResponse"]["data"]), len(parsed["routeData"]["federatedInstancesResponse"]["data"]))
368 data = parsed["routeData"]["federatedInstancesResponse"]["data"]["federated_instances"]
369 logger.debug("Checking %d data elements ...", len(data))
371 logger.debug("element[%s]='%s'", type(element), element)
372 if isinstance(only, str) and only != element:
373 logger.debug("Skipping unwanted element='%s',only='%s'", element, only)
376 logger.debug("Checking data[%s]()=%d row(s) ...", element, len(data[element]))
377 for row in data[element]:
378 logger.debug("row[]='%s'", type(row))
379 if "domain" not in row:
380 logger.warning("row()=%d has no element 'domain' - SKIPPED!", len(row))
382 elif row["domain"] in [None, ""]:
383 logger.debug("row[domain]='%s' is empty - SKIPPED!", row["domain"])
386 logger.debug("row[domain]='%s' - BEFORE!", row["domain"])
387 peer = tidyup.domain(row["domain"])
388 logger.debug("peer='%s' - AFTER!", peer)
390 if peer in [None, ""]:
391 logger.warning("peer='%s' is empty, row[domain]='%s' - SKIPPED!", peer, row["domain"])
393 elif not domain_helper.is_wanted(peer):
394 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
397 logger.debug("peer='%s' already added - SKIPPED!", peer)
400 logger.debug("Appending peer='%s' ...", peer)
403 logger.debug("peers()=%d - EXIT!", len(peers))