1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
24 from fba import blacklist
25 from fba import config
27 from fba import federation
28 from fba import network
30 from fba.helpers import cache
32 from fba.models import error_log
34 # Found info from node, such as nodeinfo URL, detection mode that needs to be
35 # written to database. Both arrays must be filled at the same time or else
36 # update_data() will fail
38 # Detection mode: 'AUTO_DISCOVERY', 'STATIC_CHECKS' or 'GENERATOR'
39 # NULL means all detection methods have failed (maybe still reachable instance)
40 "detection_mode" : {},
45 # Last fetched instances
46 "last_instance_fetch": {},
51 # Last nodeinfo (fetched)
54 "last_status_code" : {},
56 "last_error_details" : {},
59 def _set_data(key: str, domain: str, value: any):
60 # DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',value[]='{type(value)}' - CALLED!")
61 if not isinstance(key, str):
62 raise ValueError("Parameter key[]='{type(key)}' is not 'str'")
64 raise ValueError("Parameter 'key' is empty")
65 elif not isinstance(domain, str):
66 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
68 raise ValueError("Parameter 'domain' is empty")
69 elif not key in _pending:
70 raise ValueError(f"key='{key}' not found in _pending")
71 elif not fba.is_primitive(value):
72 raise ValueError(f"value[]='{type(value)}' is not a primitive type")
75 _pending[key][domain] = value
77 # DEBUG: print("DEBUG: EXIT!")
79 def has_pending(domain: str) -> bool:
80 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
81 if not isinstance(domain, str):
82 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
84 raise ValueError("Parameter 'domain' is empty")
88 # DEBUG: print(f"DEBUG: key='{key}',domain='{domain}',_pending[key]()='{len(_pending[key])}'")
89 if domain in _pending[key]:
93 # DEBUG: print(f"DEBUG: has='{has}' - EXIT!")
96 def update_data(domain: str):
97 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
98 if not isinstance(domain, str):
99 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
101 raise ValueError("Parameter 'domain' is empty")
102 elif not has_pending(domain):
103 raise Exception(f"domain='{domain}' has no pending instance data, but function invoked")
104 elif not is_registered(domain):
105 raise Exception(f"domain='{domain}' cannot be updated while not being registered")
107 # DEBUG: print(f"DEBUG: Updating instance data for domain='{domain}' ...")
111 # DEBUG: print("DEBUG: key:", key)
112 if domain in _pending[key]:
113 # DEBUG: print(f"DEBUG: Adding '{_pending[key][domain]}' for key='{key}' ...")
114 fields.append(_pending[key][domain])
115 sql_string += f" {key} = ?,"
117 # DEBUG: print(f"DEBUG: sql_string()={len(sql_string)}")
119 raise ValueError(f"No fields have been set, but method invoked, domain='{domain}'")
121 # Set last_updated to current timestamp
122 fields.append(time.time())
124 # For WHERE statement
125 fields.append(domain)
127 # DEBUG: print(f"DEBUG: sql_string='{sql_string}',fields()={len(fields)}")
128 sql_string = "UPDATE instances SET" + sql_string + " last_updated = ? WHERE domain = ? LIMIT 1"
129 # DEBUG: print("DEBUG: sql_string:", sql_string)
131 # DEBUG: print("DEBUG: Executing SQL:", sql_string)
132 fba.cursor.execute(sql_string, fields)
134 # DEBUG: print(f"DEBUG: Success! (rowcount={fba.cursor.rowcount })")
135 if fba.cursor.rowcount == 0:
136 raise Exception(f"Did not update any rows: domain='{domain}',fields()={len(fields)}")
138 # DEBUG: print("DEBUG: Committing changes ...")
139 fba.connection.commit()
141 # DEBUG: print(f"DEBUG: Deleting _pending for domain='{domain}'")
143 # DEBUG: print(f"DEBUG: domain='{domain}',key='{key}'")
144 if domain in _pending[key]:
145 del _pending[key][domain]
147 # DEBUG: print("DEBUG: EXIT!")
149 def add(domain: str, origin: str, command: str, path: str = None, software: str = None):
150 # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',command='{command}',path='{path}',software='{software}' - CALLED!")
151 if not isinstance(domain, str):
152 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
154 raise ValueError("Parameter 'domain' is empty")
155 elif not isinstance(origin, str) and origin is not None:
156 raise ValueError(f"origin[]='{type(origin)}' is not 'str'")
158 raise ValueError("Parameter 'origin' is empty")
159 elif not isinstance(command, str):
160 raise ValueError(f"command[]='{type(command)}' is not 'str'")
162 raise ValueError("Parameter 'command' is empty")
163 elif not validators.domain(domain.split("/")[0]):
164 raise ValueError(f"Bad domain name='{domain}'")
165 elif not isinstance(path, str) and path is not None:
166 raise ValueError(f"path[]='{type(path)}' is not 'str'")
168 raise ValueError("Parameter 'path' is empty")
169 elif not isinstance(software, str) and software is not None:
170 raise ValueError(f"software[]='{type(software)}' is not 'str'")
172 raise ValueError("Parameter 'software' is empty")
173 elif domain.endswith(".arpa"):
174 raise ValueError(f"Please don't crawl .arpa domains: domain='{domain}'")
175 elif origin is not None and not validators.domain(origin.split("/")[0]):
176 raise ValueError(f"Bad origin name='{origin}'")
177 elif blacklist.is_blacklisted(domain):
178 raise Exception(f"domain='{domain}' is blacklisted, but method invoked")
179 elif domain.find("/profile/") > 0 or domain.find("/users/") > 0 or (software == "lemmy" and domain.find("/c/") > 0):
180 raise Exception(f"domain='{domain}' is a single user")
184 # DEBUG: print("DEBUG: domain,origin,command,path:", domain, origin, command, path)
185 software = federation.determine_software(domain, path)
186 except network.exceptions as exception:
187 print(f"WARNING: Exception '{type(exception)}' during determining software type")
188 set_last_error(domain, exception)
190 # DEBUG: print("DEBUG: Determined software:", software)
191 if software == "lemmy" and domain.find("/c/") > 0:
192 domain = domain.split("/c/")[0]
193 if is_registered(domain):
194 print(f"WARNING: domain='{domain}' already registered after cutting off user part. - EXIT!")
197 print(f"INFO: Adding instance domain='{domain}' (origin='{origin}',software='{software}')")
199 "INSERT INTO instances (domain, origin, command, hash, software, first_seen) VALUES (?, ?, ?, ?, ?, ?)",
204 fba.get_hash(domain),
210 # DEBUG: print(f"DEBUG: Marking domain='{domain}' as registered.")
211 cache.set_sub_key("is_registered", domain, True)
213 if has_pending(domain):
214 # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo being updated ...")
217 # DEBUG: print("DEBUG: EXIT!")
219 def set_last_nodeinfo(domain: str):
220 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
221 if not isinstance(domain, str):
222 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
224 raise ValueError("Parameter 'domain' is empty")
226 # DEBUG: print("DEBUG: Updating last_nodeinfo for domain:", domain)
227 _set_data("last_nodeinfo", domain, time.time())
229 # Running pending updated
230 # DEBUG: print(f"DEBUG: Invoking update_data({domain}) ...")
233 # DEBUG: print("DEBUG: EXIT!")
235 def set_last_error(domain: str, error: dict):
236 # DEBUG: print("DEBUG: domain,error[]:", domain, type(error))
237 if not isinstance(domain, str):
238 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
240 raise ValueError("Parameter 'domain' is empty")
242 # DEBUG: print("DEBUG: BEFORE error[]:", type(error))
243 if isinstance(error, (BaseException, json.decoder.JSONDecodeError)):
244 error = f"error[{type(error)}]='{str(error)}'"
245 # DEBUG: print("DEBUG: AFTER error[]:", type(error))
247 if isinstance(error, str):
248 # DEBUG: print(f"DEBUG: Setting last_error_details='{error}'")
249 _set_data("last_status_code" , domain, 999)
250 _set_data("last_error_details", domain, error if error != "" else None)
251 elif isinstance(error, requests.models.Response):
252 # DEBUG: print(f"DEBUG: Setting last_error_details='{error.reason}'")
253 _set_data("last_status_code" , domain, error.status_code)
254 _set_data("last_error_details", domain, error.reason if error.reason != "" else None)
255 elif not isinstance(error, dict):
256 raise KeyError(f"Cannot handle keys in error[{type(error)}]='{error}'")
257 elif "status_code" in error and "error_message" in error:
258 # DEBUG: print(f"DEBUG: Setting last_error_details='{error['error_message']}'")
259 _set_data("last_status_code" , domain, error["status_code"])
260 _set_data("last_error_details", domain, error["error_message"] if error["error_message"] != "" else None)
261 elif "json" in error and "error" in error["json"]:
262 _set_data("last_status_code" , domain, error["status_code"])
263 _set_data("last_error_details", domain, error["json"]["error"] if error["json"]["error"] != "" else None)
265 # DEBUG: print(f"DEBUG: Invoking error_log.add(domain='{domain}',error[]='{type(error)}'")
266 error_log.add(domain, error)
268 # DEBUG: print("DEBUG: EXIT!")
270 def is_registered(domain: str) -> bool:
271 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
272 if not isinstance(domain, str):
273 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
275 raise ValueError("Parameter 'domain' is empty")
277 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
278 if not cache.key_exists("is_registered"):
279 # DEBUG: print("DEBUG: Cache for 'is_registered' not initialized, fetching all rows ...")
280 fba.cursor.execute("SELECT domain FROM instances")
283 cache.set_all("is_registered", fba.cursor.fetchall(), True)
286 registered = cache.sub_key_exists("is_registered", domain)
288 # DEBUG: print(f"DEBUG: registered='{registered}' - EXIT!")
291 def is_recent(domain: str) -> bool:
292 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
293 if not isinstance(domain, str):
294 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
296 raise ValueError("Parameter 'domain' is empty")
297 elif not is_registered(domain):
298 # DEBUG: print(f"DEBUG: domain='{domain}' is not registered, returning False - EXIT!")
302 fba.cursor.execute("SELECT last_instance_fetch FROM instances WHERE domain = ? LIMIT 1", [domain])
305 fetched = fba.cursor.fetchone()[0]
307 # DEBUG: print(f"DEBUG: fetched[{type(fetched)}]='{fetched}'")
308 recently = isinstance(fetched, float) and time.time() - fetched <= config.get("recheck_instance")
310 # DEBUG: print(f"DEBUG: recently='{recently}' - EXIT!")
313 def deobscure(char: str, domain: str, blocked_hash: str = None) -> tuple:
314 # DEBUG: print(f"DEBUG: char='{char}',domain='{domain}',blocked_hash='{blocked_hash}' - CALLED!")
315 if not isinstance(char, str):
316 raise ValueError(f"Parameter char[]='{type(char)}' is not 'str'")
318 raise ValueError("Parameter 'char' is empty")
319 elif not isinstance(domain, str):
320 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
322 raise ValueError("Parameter 'domain' is empty")
323 elif not isinstance(blocked_hash, str) and blocked_hash is not None:
324 raise ValueError(f"Parameter blocked_hash[]='{type(blocked_hash)}' is not 'str'")
326 if isinstance(blocked_hash, str):
327 # DEBUG: print(f"DEBUG: Looking up blocked_hash='{blocked_hash}' ...")
329 "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash]
332 row = fba.cursor.fetchone()
333 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
336 # DEBUG: print(f"DEBUG: blocked_hash='{blocked_hash}' not found, trying domain='{domain}' ...")
337 return deobscure(char, domain)
339 # DEBUG: print(f"DEBUG: Looking up domain='{domain}' ...")
341 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [domain.replace(char, "_")]
344 row = fba.cursor.fetchone()
345 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
347 # DEBUG: print(f"DEBUG: row[]='{type(row)}' - EXIT!")
350 def set_last_blocked (domain: str):
351 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
352 if not isinstance(domain, str):
353 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
355 raise ValueError("Parameter 'domain' is empty")
358 _set_data("last_blocked", domain, time.time())
359 # DEBUG: print("DEBUG: EXIT!")
361 def set_last_instance_fetch (domain: str):
362 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
363 if not isinstance(domain, str):
364 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
366 raise ValueError("Parameter 'domain' is empty")
369 _set_data("last_instance_fetch", domain, time.time())
370 # DEBUG: print("DEBUG: EXIT!")
372 def set_total_peers (domain: str, peers: list):
373 # DEBUG: print(f"DEBUG: domain='{domain}',peers()={len(peers)} - CALLED!")
374 if not isinstance(domain, str):
375 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
377 raise ValueError("Parameter 'domain' is empty")
378 elif not isinstance(peers, list):
379 raise ValueError("Parameter peers[]='{type(peers)}' is not 'list'")
382 _set_data("total_peers", domain, len(peers))
383 # DEBUG: print("DEBUG: EXIT!")
385 def set_nodeinfo_url (domain: str, url: list):
386 # DEBUG: print(f"DEBUG: domain='{domain}',url='{url}' - CALLED!")
387 if not isinstance(domain, str):
388 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
390 raise ValueError("Parameter 'domain' is empty")
391 elif not isinstance(url, str):
392 raise ValueError("Parameter url[]='{type(url)}' is not 'list'")
394 raise ValueError("Parameter 'url' is empty")
397 _set_data("nodeinfo_url", domain, url)
398 # DEBUG: print("DEBUG: EXIT!")
400 def set_detection_mode (domain: str, url: list):
401 # DEBUG: print(f"DEBUG: domain='{domain}',url='{url}' - CALLED!")
402 if not isinstance(domain, str):
403 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
405 raise ValueError("Parameter 'domain' is empty")
406 elif not isinstance(url, str):
407 raise ValueError("Parameter url[]='{type(url)}' is not 'list'")
409 raise ValueError("Parameter 'url' is empty")
412 _set_data("detection_mode", domain, url)
413 # DEBUG: print("DEBUG: EXIT!")
415 def set_detection_mode (domain: str, url: list):
416 # DEBUG: print(f"DEBUG: domain='{domain}',url='{url}' - CALLED!")
417 if not isinstance(domain, str):
418 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
420 raise ValueError("Parameter 'domain' is empty")
421 elif not isinstance(url, str):
422 raise ValueError("Parameter url[]='{type(url)}' is not 'list'")
424 raise ValueError("Parameter 'url' is empty")
427 _set_data("detection_mode", domain, url)
428 # DEBUG: print("DEBUG: EXIT!")
430 def set_detection_mode (domain: str, mode: list):
431 # DEBUG: print(f"DEBUG: domain='{domain}',mode='{mode}' - CALLED!")
432 if not isinstance(domain, str):
433 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
435 raise ValueError("Parameter 'domain' is empty")
436 elif not isinstance(mode, str):
437 raise ValueError("Parameter mode[]='{type(mode)}' is not 'list'")
439 raise ValueError("Parameter 'mode' is empty")
442 _set_data("detection_mode", domain, mode)
443 # DEBUG: print("DEBUG: EXIT!")