1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
25 from fba.helpers import blacklist
26 from fba.helpers import config
27 from fba.helpers import tidyup
29 from fba.http import federation
30 from fba.http import network
32 from fba.models import blocks
33 from fba.models import instances
35 def fetch_peers(domain: str) -> list:
36 # DEBUG: print(f"DEBUG: domain({len(domain)})='{domain}',software='lemmy' - CALLED!")
37 if not isinstance(domain, str):
38 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
40 raise ValueError("Parameter 'domain' is empty")
41 elif domain.lower() != domain:
42 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
43 elif not validators.domain(domain.split("/")[0]):
44 raise ValueError(f"domain='{domain}' is not a valid domain")
45 elif domain.endswith(".arpa"):
46 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
47 elif domain.endswith(".tld"):
48 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
52 # No CSRF by default, you don't have to add network.api_headers by yourself here
56 # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
57 headers = csrf.determine(domain, dict())
58 except network.exceptions as exception:
59 print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
60 instances.set_last_error(domain, exception)
64 # DEBUG: print(f"DEBUG: domain='{domain}' is Lemmy, fetching JSON ...")
65 data = network.get_json_api(
69 (config.get("connection_timeout"), config.get("read_timeout"))
72 # DEBUG: print(f"DEBUG: data[]='{type(data)}'")
73 if "error_message" in data:
74 print("WARNING: Could not reach any JSON API:", domain)
75 instances.set_last_error(domain, data)
76 elif "federated_instances" in data["json"]:
77 # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
78 peers = peers + federation.add_peers(data["json"]["federated_instances"])
79 # DEBUG: print("DEBUG: Added instance(s) to peers")
81 print("WARNING: JSON response does not contain 'federated_instances':", domain)
82 instances.set_last_error(domain, data)
84 except network.exceptions as exception:
85 print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(exception)}]:'{str(exception)}'")
86 instances.set_last_error(domain, exception)
88 # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
89 instances.set_total_peers(domain, peers)
91 # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
94 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
95 # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
96 if not isinstance(domain, str):
97 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
99 raise ValueError("Parameter 'domain' is empty")
100 elif domain.lower() != domain:
101 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
102 elif not validators.domain(domain.split("/")[0]):
103 raise ValueError(f"domain='{domain}' is not a valid domain")
104 elif domain.endswith(".arpa"):
105 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
106 elif domain.endswith(".tld"):
107 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
108 elif not isinstance(origin, str) and origin is not None:
109 raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
111 raise ValueError("Parameter 'origin' is empty")
112 elif not isinstance(nodeinfo_url, str):
113 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
114 elif nodeinfo_url == "":
115 raise ValueError("Parameter 'nodeinfo_url' is empty")
119 "Instàncies bloquejades",
122 "Blokované instance",
123 "Geblokkeerde instanties",
124 "Blockerade instanser",
125 "Instàncias blocadas",
127 "Instances bloquées",
128 "Letiltott példányok",
129 "Instancias bloqueadas",
130 "Blokeatuta dauden instantziak",
132 "Peladen Yang Diblokir",
135 "Блокирани Инстанции",
136 "Blockierte Instanzen",
137 "Estetyt instanssit",
138 "Instâncias bloqueadas",
139 "Zablokowane instancje",
140 "Blokované inštancie",
142 "Užblokuoti serveriai",
144 "Блокированные Инстансы",
145 "Αποκλεισμένοι διακομιστές",
147 "Instâncias bloqueadas",
151 # json endpoint for newer mastodongs
152 found_blocks = list()
157 "media_removal" : [],
158 "followers_only": [],
159 "report_removal": [],
162 # DEBUG: print(f"DEBUG: Fetching /instances from domain='{domain}'")
163 response = network.fetch_response(
167 (config.get("connection_timeout"), config.get("read_timeout"))
170 # DEBUG: print(f"DEBUG: response.ok='{response.ok}',response.status_code={response.status_code},response.text()={len(response.text)}")
171 if response.ok and response.status_code < 300 and response.text != "":
172 # DEBUG: print(f"DEBUG: Parsing {len(response.text)} Bytes ...")
174 doc = bs4.BeautifulSoup(response.text, "html.parser")
175 # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
177 headers = doc.findAll("h5")
179 # DEBUG: print(f"DEBUG: Search in {len(headers)} header(s) ...")
180 for header in headers:
181 # DEBUG: print(f"DEBUG: header[]={type(header)}")
182 content = header.contents[0]
184 # DEBUG: print(f"DEBUG: content='{content}'")
185 if content in translations:
186 # DEBUG: print("DEBUG: Found header with blocked instances - BREAK!")
190 # DEBUG: print(f"DEBUG: found[]='{type(found)}'")
192 # DEBUG: print(f"DEBUG: domain='{domain}' is not blocking any instances - EXIT!")
195 blocking = found.find_next("ul").findAll("a")
196 # DEBUG: print(f"DEBUG: Found {len(blocking)} blocked instance(s) ...")
198 # DEBUG: print(f"DEBUG: tag[]='{type(tag)}'")
199 blocked = tidyup.domain(tag.contents[0])
201 # DEBUG: print(f"DEBUG: blocked='{blocked}'")
202 if not validators.domain(blocked):
203 print(f"WARNING: blocked='{blocked}' is not a valid domain - SKIPPED!")
205 elif blocked.endswith(".arpa"):
206 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
208 elif blocked.endswith(".tld"):
209 print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
211 elif blacklist.is_blacklisted(blocked):
212 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
214 elif not instances.is_registered(blocked):
215 # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
216 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
218 if not blocks.is_instance_blocked(domain, blocked, "reject"):
219 # DEBUG: print("DEBUG: Blocking:", domain, blocked)
220 blocks.add_instance(domain, blocked, None, "reject")
222 found_blocks.append({
227 # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
228 blocks.update_last_seen(domain, blocked, "reject")
230 # DEBUG: print("DEBUG: Committing changes ...")
231 fba.connection.commit()
232 except network.exceptions as exception:
233 print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
234 instances.set_last_error(domain, exception)
236 # DEBUG: print("DEBUG: EXIT!")