1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
25 from fba.helpers import blacklist
26 from fba.helpers import cache
27 from fba.helpers import config
29 from fba.http import federation
30 from fba.http import network
32 from fba.models import error_log
34 # Found info from node, such as nodeinfo URL, detection mode that needs to be
35 # written to database. Both arrays must be filled at the same time or else
36 # update_data() will fail
38 # Detection mode: 'AUTO_DISCOVERY', 'STATIC_CHECKS' or 'GENERATOR'
39 # NULL means all detection methods have failed (maybe still reachable instance)
40 "detection_mode" : {},
45 # Last fetched instances
46 "last_instance_fetch": {},
51 # Last nodeinfo (fetched)
54 "last_status_code" : {},
56 "last_error_details" : {},
59 def _set_data(key: str, domain: str, value: any):
60 # DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',value[]='{type(value)}' - CALLED!")
61 if not isinstance(key, str):
62 raise ValueError("Parameter key[]='{type(key)}' is not 'str'")
64 raise ValueError("Parameter 'key' is empty")
65 elif not isinstance(domain, str):
66 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
68 raise ValueError("Parameter 'domain' is empty")
69 elif not key in _pending:
70 raise ValueError(f"key='{key}' not found in _pending")
71 elif not fba.is_primitive(value):
72 raise ValueError(f"value[]='{type(value)}' is not a primitive type")
75 _pending[key][domain] = value
77 # DEBUG: print("DEBUG: EXIT!")
79 def has_pending(domain: str) -> bool:
80 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
81 if not isinstance(domain, str):
82 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
84 raise ValueError("Parameter 'domain' is empty")
88 # DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',_pending[key]()='{len(_pending[key])}'")
89 if domain in _pending[key]:
93 # DEBUG: print(f"DEBUG: has='{has}' - EXIT!")
96 def update_data(domain: str):
97 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
98 if not isinstance(domain, str):
99 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
101 raise ValueError("Parameter 'domain' is empty")
102 elif not has_pending(domain):
103 raise Exception(f"domain='{domain}' has no pending instance data, but function invoked")
104 elif not is_registered(domain):
105 raise Exception(f"domain='{domain}' cannot be updated while not being registered")
107 # DEBUG: print(f"DEBUG: Updating instance data for domain='{domain}' ...")
111 # DEBUG: print("DEBUG: key:", key)
112 if domain in _pending[key]:
113 # DEBUG: print(f"DEBUG: Adding '{_pending[key][domain]}' for key='{key}' ...")
114 fields.append(_pending[key][domain])
115 sql_string += f" {key} = ?,"
117 # DEBUG: print(f"DEBUG: sql_string()={len(sql_string)}")
119 raise ValueError(f"No fields have been set, but method invoked, domain='{domain}'")
121 # Set last_updated to current timestamp
122 fields.append(time.time())
124 # For WHERE statement
125 fields.append(domain)
127 # DEBUG: print(f"DEBUG: sql_string='{sql_string}',fields()={len(fields)}")
128 sql_string = "UPDATE instances SET" + sql_string + " last_updated = ? WHERE domain = ? LIMIT 1"
129 # DEBUG: print("DEBUG: sql_string:", sql_string)
131 # DEBUG: print("DEBUG: Executing SQL:", sql_string)
132 fba.cursor.execute(sql_string, fields)
134 # DEBUG: print(f"DEBUG: Success! (rowcount={fba.cursor.rowcount })")
135 if fba.cursor.rowcount == 0:
136 raise Exception(f"Did not update any rows: domain='{domain}',fields()={len(fields)}")
138 # DEBUG: print("DEBUG: Committing changes ...")
139 fba.connection.commit()
141 # DEBUG: print(f"DEBUG: Deleting _pending for domain='{domain}'")
143 # DEBUG: print(f"DEBUG: domain='{domain}',key='{key}'")
144 if domain in _pending[key]:
145 del _pending[key][domain]
147 # DEBUG: print("DEBUG: EXIT!")
149 def add(domain: str, origin: str, command: str, path: str = None, software: str = None):
150 # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',command='{command}',path='{path}',software='{software}' - CALLED!")
151 if not isinstance(domain, str):
152 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
154 raise ValueError("Parameter 'domain' is empty")
155 elif not isinstance(origin, str) and origin is not None:
156 raise ValueError(f"origin[]='{type(origin)}' is not 'str'")
158 raise ValueError("Parameter 'origin' is empty")
159 elif not isinstance(command, str):
160 raise ValueError(f"command[]='{type(command)}' is not 'str'")
162 raise ValueError("Parameter 'command' is empty")
163 elif not validators.domain(domain.split("/")[0]):
164 raise ValueError(f"Bad domain name='{domain}'")
165 elif not isinstance(path, str) and path is not None:
166 raise ValueError(f"path[]='{type(path)}' is not 'str'")
168 raise ValueError("Parameter 'path' is empty")
169 elif not isinstance(software, str) and software is not None:
170 raise ValueError(f"software[]='{type(software)}' is not 'str'")
172 raise ValueError("Parameter 'software' is empty")
173 elif domain.endswith(".arpa"):
174 raise ValueError(f"Please don't crawl .arpa domains: domain='{domain}'")
175 elif origin is not None and not validators.domain(origin.split("/")[0]):
176 raise ValueError(f"Bad origin name='{origin}'")
177 elif blacklist.is_blacklisted(domain):
178 raise Exception(f"domain='{domain}' is blacklisted, but method invoked")
179 elif domain.find("/profile/") > 0 or domain.find("/users/") > 0 or (software == "lemmy" and domain.find("/c/") > 0):
180 raise Exception(f"domain='{domain}' is a single user")
181 elif domain.endswith(".tld"):
182 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
186 # DEBUG: print("DEBUG: domain,origin,command,path:", domain, origin, command, path)
187 software = federation.determine_software(domain, path)
188 except network.exceptions as exception:
189 print(f"WARNING: Exception '{type(exception)}' during determining software type")
190 set_last_error(domain, exception)
192 # DEBUG: print("DEBUG: Determined software:", software)
193 if software == "lemmy" and domain.find("/c/") > 0:
194 domain = domain.split("/c/")[0]
195 if is_registered(domain):
196 print(f"WARNING: domain='{domain}' already registered after cutting off user part. - EXIT!")
199 print(f"INFO: Adding instance domain='{domain}' (origin='{origin}',software='{software}')")
201 "INSERT INTO instances (domain, origin, command, hash, software, first_seen) VALUES (?, ?, ?, ?, ?, ?)",
206 fba.get_hash(domain),
212 # DEBUG: print(f"DEBUG: Marking domain='{domain}' as registered.")
213 cache.set_sub_key("is_registered", domain, True)
215 if has_pending(domain):
216 # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo being updated ...")
219 # DEBUG: print("DEBUG: EXIT!")
221 def set_last_nodeinfo(domain: str):
222 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
223 if not isinstance(domain, str):
224 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
226 raise ValueError("Parameter 'domain' is empty")
228 # DEBUG: print("DEBUG: Updating last_nodeinfo for domain:", domain)
229 _set_data("last_nodeinfo", domain, time.time())
231 # Running pending updated
232 # DEBUG: print(f"DEBUG: Invoking update_data({domain}) ...")
235 # DEBUG: print("DEBUG: EXIT!")
237 def set_last_error(domain: str, error: dict):
238 # DEBUG: print("DEBUG: domain,error[]:", domain, type(error))
239 if not isinstance(domain, str):
240 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
242 raise ValueError("Parameter 'domain' is empty")
244 # DEBUG: print("DEBUG: BEFORE error[]:", type(error))
245 if isinstance(error, (BaseException, json.decoder.JSONDecodeError)):
246 error = f"error[{type(error)}]='{str(error)}'"
247 # DEBUG: print("DEBUG: AFTER error[]:", type(error))
249 if isinstance(error, str):
250 # DEBUG: print(f"DEBUG: Setting last_error_details='{error}'")
251 _set_data("last_status_code" , domain, 999)
252 _set_data("last_error_details", domain, error if error != "" else None)
253 elif isinstance(error, requests.models.Response):
254 # DEBUG: print(f"DEBUG: Setting last_error_details='{error.reason}'")
255 _set_data("last_status_code" , domain, error.status_code)
256 _set_data("last_error_details", domain, error.reason if error.reason != "" else None)
257 elif not isinstance(error, dict):
258 raise KeyError(f"Cannot handle keys in error[{type(error)}]='{error}'")
259 elif "status_code" in error and "error_message" in error:
260 # DEBUG: print(f"DEBUG: Setting last_error_details='{error['error_message']}'")
261 _set_data("last_status_code" , domain, error["status_code"])
262 _set_data("last_error_details", domain, error["error_message"] if error["error_message"] != "" else None)
263 elif "json" in error and "error" in error["json"]:
264 _set_data("last_status_code" , domain, error["status_code"])
265 _set_data("last_error_details", domain, error["json"]["error"] if error["json"]["error"] != "" else None)
267 # DEBUG: print(f"DEBUG: Invoking error_log.add(domain='{domain}',error[]='{type(error)}'")
268 error_log.add(domain, error)
270 # DEBUG: print("DEBUG: EXIT!")
272 def is_registered(domain: str) -> bool:
273 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
274 if not isinstance(domain, str):
275 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
277 raise ValueError("Parameter 'domain' is empty")
279 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
280 if not cache.key_exists("is_registered"):
281 # DEBUG: print("DEBUG: Cache for 'is_registered' not initialized, fetching all rows ...")
282 fba.cursor.execute("SELECT domain FROM instances")
285 cache.set_all("is_registered", fba.cursor.fetchall(), True)
288 registered = cache.sub_key_exists("is_registered", domain)
290 # DEBUG: print(f"DEBUG: registered='{registered}' - EXIT!")
293 def is_recent(domain: str) -> bool:
294 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
295 if not isinstance(domain, str):
296 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
298 raise ValueError("Parameter 'domain' is empty")
299 elif not is_registered(domain):
300 # DEBUG: print(f"DEBUG: domain='{domain}' is not registered, returning False - EXIT!")
304 fba.cursor.execute("SELECT last_instance_fetch FROM instances WHERE domain = ? LIMIT 1", [domain])
307 fetched = fba.cursor.fetchone()[0]
309 # DEBUG: print(f"DEBUG: fetched[{type(fetched)}]='{fetched}'")
310 recently = isinstance(fetched, float) and time.time() - fetched <= config.get("recheck_instance")
312 # DEBUG: print(f"DEBUG: recently='{recently}' - EXIT!")
315 def deobscure(char: str, domain: str, blocked_hash: str = None) -> tuple:
316 # DEBUG: print(f"DEBUG: char='{char}',domain='{domain}',blocked_hash='{blocked_hash}' - CALLED!")
317 if not isinstance(char, str):
318 raise ValueError(f"Parameter char[]='{type(char)}' is not 'str'")
320 raise ValueError("Parameter 'char' is empty")
321 elif not isinstance(domain, str):
322 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
324 raise ValueError("Parameter 'domain' is empty")
325 elif not isinstance(blocked_hash, str) and blocked_hash is not None:
326 raise ValueError(f"Parameter blocked_hash[]='{type(blocked_hash)}' is not 'str'")
328 if isinstance(blocked_hash, str):
329 # DEBUG: print(f"DEBUG: Looking up blocked_hash='{blocked_hash}' ...")
331 "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash]
334 row = fba.cursor.fetchone()
335 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
338 # DEBUG: print(f"DEBUG: blocked_hash='{blocked_hash}' not found, trying domain='{domain}' ...")
339 return deobscure(char, domain)
341 # DEBUG: print(f"DEBUG: Looking up domain='{domain}' ...")
343 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [domain.replace(char, "_")]
346 row = fba.cursor.fetchone()
347 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
349 # DEBUG: print(f"DEBUG: row[]='{type(row)}' - EXIT!")
352 def set_last_blocked(domain: str):
353 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
354 if not isinstance(domain, str):
355 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
357 raise ValueError("Parameter 'domain' is empty")
360 _set_data("last_blocked", domain, time.time())
361 # DEBUG: print("DEBUG: EXIT!")
363 def set_last_instance_fetch(domain: str):
364 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
365 if not isinstance(domain, str):
366 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
368 raise ValueError("Parameter 'domain' is empty")
371 _set_data("last_instance_fetch", domain, time.time())
372 # DEBUG: print("DEBUG: EXIT!")
374 def set_total_peers(domain: str, peers: list):
375 # DEBUG: print(f"DEBUG: domain='{domain}',peers()={len(peers)} - CALLED!")
376 if not isinstance(domain, str):
377 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
379 raise ValueError("Parameter 'domain' is empty")
380 elif not isinstance(peers, list):
381 raise ValueError(f"Parameter peers[]='{type(peers)}' is not 'list'")
384 _set_data("total_peers", domain, len(peers))
385 # DEBUG: print("DEBUG: EXIT!")
387 def set_nodeinfo_url(domain: str, url: str):
388 # DEBUG: print(f"DEBUG: domain='{domain}',url='{url}' - CALLED!")
389 if not isinstance(domain, str):
390 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
392 raise ValueError("Parameter 'domain' is empty")
393 elif not isinstance(url, str):
394 raise ValueError("Parameter url[]='{type(url)}' is not 'list'")
396 raise ValueError("Parameter 'url' is empty")
399 _set_data("nodeinfo_url", domain, url)
400 # DEBUG: print("DEBUG: EXIT!")
402 def set_detection_mode(domain: str, mode: str):
403 # DEBUG: print(f"DEBUG: domain='{domain}',mode='{mode}' - CALLED!")
404 if not isinstance(domain, str):
405 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
407 raise ValueError("Parameter 'domain' is empty")
408 elif not isinstance(mode, str):
409 raise ValueError("Parameter mode[]='{type(mode)}' is not 'list'")
411 raise ValueError("Parameter 'mode' is empty")
414 _set_data("detection_mode", domain, mode)
415 # DEBUG: print("DEBUG: EXIT!")