1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
24 from fba import database
27 from fba.helpers import blacklist
28 from fba.helpers import cache
29 from fba.helpers import config
30 from fba.helpers import domain as domain_helper
31 from fba.helpers import tidyup
33 from fba.http import federation
34 from fba.http import network
36 from fba.models import error_log
38 logging.basicConfig(level=logging.INFO)
39 logger = logging.getLogger(__name__)
41 # Found info from node, such as nodeinfo URL, detection mode that needs to be
42 # written to database. Both arrays must be filled at the same time or else
43 # update_data() will fail
46 # NULL means all detection methods have failed (maybe still reachable instance)
47 "detection_mode" : {},
54 # Last fetched instances
55 "last_instance_fetch": {},
60 # Last nodeinfo (fetched)
63 "last_status_code" : {},
65 "last_error_details" : {},
66 # Wether obfuscation has been used
67 "has_obfuscation" : {},
72 def _set_data(key: str, domain: str, value: any):
73 logger.debug("key='%s',domain='%s',value[]='%s' - CALLED!", key, domain, type(value))
74 domain_helper.raise_on(domain)
75 if not isinstance(key, str):
76 raise ValueError(f"Parameter key[]='{type(key)}' is not of type 'str'")
78 raise ValueError("Parameter 'key' is empty")
79 elif not key in _pending:
80 raise ValueError(f"key='{key}' not found in _pending")
81 elif not utils.is_primitive(value):
82 raise ValueError(f"value[]='{type(value)}' is not a primitive type")
85 _pending[key][domain] = value
89 def has_pending(domain: str) -> bool:
90 logger.debug("domain='%s' - CALLED!", domain)
91 domain_helper.raise_on(domain)
95 logger.debug("key='%s',domain='%s',_pending[key]()=%d", key, domain, len(_pending[key]))
96 if domain in _pending[key]:
100 logger.debug("has='%s' - EXIT!", has)
103 def update_data(domain: str):
104 logger.debug("domain='%s' - CALLED!", domain)
105 domain_helper.raise_on(domain)
106 if not has_pending(domain):
107 raise Exception(f"domain='{domain}' has no pending instance data, but function invoked")
108 elif not is_registered(domain):
109 raise Exception(f"domain='{domain}' cannot be updated while not being registered")
111 logger.debug("Updating instance data for domain='%s' ...", domain)
115 logger.debug("Checking key='%s',domain='%s'", key, domain)
116 if domain in _pending[key]:
117 logger.debug("Adding '%s' for key='%s' ...", _pending[key][domain], key)
118 fields.append(_pending[key][domain])
119 sql_string += f" {key} = ?,"
121 logger.debug("sql_string(%d)='%s'", len(sql_string), sql_string)
123 raise ValueError(f"No fields have been set, but method invoked, domain='{domain}'")
125 # Set last_updated to current timestamp
126 fields.append(time.time())
128 # For WHERE statement
129 logger.debug("Setting domain='%s' for WHERE statement ...", domain)
130 fields.append(domain)
132 logger.debug("sql_string='%s',fields()=%d", sql_string, len(fields))
133 sql_string = "UPDATE instances SET" + sql_string + " last_updated = ? WHERE domain = ? LIMIT 1"
135 logger.debug("Executing SQL: sql_string='%s',fields()=%d", sql_string, len(fields))
136 database.cursor.execute(sql_string, fields)
138 logger.debug("rowcount=%d", database.cursor.rowcount)
139 if database.cursor.rowcount == 0:
140 raise Exception(f"Did not update any rows: domain='{domain}',fields()={len(fields)}")
142 logger.debug("Invoking commit() ...")
143 database.connection.commit()
145 logger.debug("Deleting _pending for domain='%s'", domain)
147 logger.debug("domain='%s',key='%s'", domain, key)
148 if domain in _pending[key]:
149 logger.debug("Deleting key='%s',domain='%s' ...", key, domain)
150 del _pending[key][domain]
152 logger.debug("EXIT!")
154 def add(domain: str, origin: str, command: str, path: str = None, software: str = None):
155 logger.debug("domain='%s',origin='%s',command='%s',path='%s',software='%s' - CALLED!", domain, origin, command, path, software)
156 domain_helper.raise_on(domain)
158 if not isinstance(origin, str) and origin is not None:
159 raise ValueError(f"origin[]='{type(origin)}' is not of type 'str'")
161 raise ValueError("Parameter 'origin' is empty")
162 elif not isinstance(command, str):
163 raise ValueError(f"command[]='{type(command)}' is not of type 'str'")
165 raise ValueError("Parameter 'command' is empty")
166 elif not isinstance(path, str) and path is not None:
167 raise ValueError(f"path[]='{type(path)}' is not of type 'str'")
169 raise ValueError("Parameter 'path' is empty")
170 elif not isinstance(software, str) and software is not None:
171 raise ValueError(f"software[]='{type(software)}' is not of type 'str'")
173 raise ValueError("Parameter 'software' is empty")
174 elif origin is not None and not validators.domain(origin.split("/")[0]):
175 raise ValueError(f"Bad origin name='{origin}'")
176 elif blacklist.is_blacklisted(domain):
177 raise Exception(f"domain='{domain}' is blacklisted, but method invoked")
178 elif domain.find("/profile/") > 0 or domain.find("/users/") > 0 or (is_registered(domain.split("/")[0]) and domain.find("/c/") > 0):
179 raise Exception(f"domain='{domain}' is a single user")
180 elif domain.find("/tag/") > 0:
181 raise Exception(f"domain='{domain}' is a tag")
185 logger.debug("domain='%s',origin='%s',command='%s',path='%s'", domain, origin, command, path)
186 software = federation.determine_software(domain, path)
187 except network.exceptions as exception:
188 logger.warning("Exception '%s' during determining software type, domain='%s'", type(exception), domain)
189 set_last_error(domain, exception)
191 logger.debug("Determined software='%s'", software)
192 if software == "lemmy" and domain.find("/c/") > 0:
193 domain = domain.split("/c/")[0]
194 if is_registered(domain):
195 logger.warning("domain='%s' already registered after cutting off user part. - EXIT!", domain)
198 logger.info("Adding instance domain='%s',origin='%s',software='%s',command='%s'", domain, origin, software, command)
199 database.cursor.execute(
200 "INSERT INTO instances (domain, origin, command, hash, software, first_seen) VALUES (?, ?, ?, ?, ?, ?)",
205 utils.get_hash(domain),
211 logger.debug("Marking domain='%s' as registered.", domain)
212 cache.set_sub_key("is_registered", domain, True)
214 logger.debug("Checking if domain='%s' has pending updates ...", domain)
215 if has_pending(domain):
216 logger.debug("Flushing updates for domain='%s' ...", domain)
219 logger.debug("EXIT!")
221 def set_last_nodeinfo(domain: str):
222 logger.debug("domain='%s' - CALLED!", domain)
223 domain_helper.raise_on(domain)
225 logger.debug("Updating last_nodeinfo for domain='%s'", domain)
226 _set_data("last_nodeinfo", domain, time.time())
228 logger.debug("EXIT!")
230 def set_last_error(domain: str, error: dict):
231 logger.debug("domain='%s',error[]='%s' - CALLED!", domain, type(error))
232 domain_helper.raise_on(domain)
234 logger.debug("error[]='%s' - BEFORE!", type(error))
235 if isinstance(error, (BaseException, json.decoder.JSONDecodeError)):
236 error = f"error[{type(error)}]='{str(error)}'"
237 logger.debug("error[]='%s' - AFTER!", type(error))
239 if isinstance(error, str):
240 logger.debug("Setting last_error_details='%s' (str)", error)
241 _set_data("last_status_code" , domain, 999)
242 _set_data("last_error_details", domain, error if error != "" else None)
243 elif isinstance(error, requests.models.Response):
244 logger.debug("Setting last_error_details='%s' (Response)", error.reason)
245 _set_data("last_status_code" , domain, error.status_code)
246 _set_data("last_error_details", domain, error.reason if error.reason != "" else None)
247 elif not isinstance(error, dict):
248 raise KeyError(f"Cannot handle keys in error[{type(error)}]='{error}'")
249 elif "status_code" in error and "error_message" in error:
250 logger.debug("Setting last_error_details='%s' (error_message)", error['error_message'])
251 _set_data("last_status_code" , domain, error["status_code"])
252 _set_data("last_error_details", domain, error["error_message"] if error["error_message"] != "" else None)
253 elif "json" in error and "error" in error["json"]:
254 logger.debug("Setting last_error_details='%s' (json,error)", error["json"]["error"])
255 _set_data("last_status_code" , domain, error["status_code"])
256 _set_data("last_error_details", domain, error["json"]["error"] if error["json"]["error"] != "" else None)
258 logger.debug("Invoking error_log.add(domain='%s',error[]='%s'", domain, type(error))
259 error_log.add(domain, error)
261 logger.debug("EXIT!")
263 def set_success(domain: str):
264 logger.debug("domain='%s' - CALLED!", domain)
265 domain_helper.raise_on(domain)
267 # Set both to success
268 _set_data("last_status_code" , domain, 200)
269 _set_data("last_error_details", domain, None)
271 logger.debug("EXIT!")
273 def is_registered(domain: str) -> bool:
274 logger.debug("domain='%s' - CALLED!", domain)
275 domain_helper.raise_on(domain)
277 logger.debug("domain='%s' - CALLED!", domain)
278 if not cache.key_exists("is_registered"):
279 logger.debug("Cache for 'is_registered' not initialized, fetching all rows ...")
280 database.cursor.execute("SELECT domain FROM instances")
283 cache.set_all("is_registered", database.cursor.fetchall(), True)
286 registered = cache.sub_key_exists("is_registered", domain)
288 logger.debug("registered='%s' - EXIT!", registered)
291 def is_recent(domain: str, column: str = "last_instance_fetch") -> bool:
292 logger.debug("domain='%s',column='%s' - CALLED!", domain, column)
293 domain_helper.raise_on(domain)
295 if not isinstance(column, str):
296 raise ValueError(f"Parameter column[]='{type(column)}' is not of type 'str'")
297 elif column not in ["last_instance_fetch", "last_blocked", "last_nodeinfo"]:
298 raise ValueError(f"Parameter column='{column}' is not expected")
299 elif not is_registered(domain):
300 logger.debug("domain='%s' is not registered, returning False - EXIT!", domain)
304 database.cursor.execute(f"SELECT {column} FROM instances WHERE domain = ? LIMIT 1", [domain])
307 fetched = database.cursor.fetchone()[column]
309 logger.debug("fetched[%s]='%s'", type(fetched), fetched)
310 recently = isinstance(fetched, float) and (time.time() - fetched) <= config.get("recheck_instance")
312 logger.debug("recently='%s' - EXIT!", recently)
315 def deobfuscate(char: str, domain: str, blocked_hash: str = None) -> tuple:
316 logger.debug("char='%s',domain='%s',blocked_hash='%s' - CALLED!", char, domain, blocked_hash)
318 if not isinstance(char, str):
319 raise ValueError(f"Parameter char[]='{type(char)}' is not of type 'str'")
321 raise ValueError("Parameter 'char' is empty")
322 elif not char in domain:
323 raise ValueError(f"char='{char}' not found in domain='{domain}' but function invoked")
324 elif not isinstance(domain, str):
325 raise ValueError(f"Parameter domain[]='%s'", type(domain))
327 raise ValueError("Parameter 'domain' is empty")
328 elif not isinstance(blocked_hash, str) and blocked_hash is not None:
329 raise ValueError(f"Parameter blocked_hash[]='{type(blocked_hash)}' is not of type 'str'")
334 logger.debug("blocked_hash[]='%s'", type(blocked_hash))
335 if isinstance(blocked_hash, str):
336 logger.debug("Looking up blocked_hash='%s',domain='%s' ...", blocked_hash, domain)
337 database.cursor.execute(
338 "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? OR domain LIKE ? LIMIT 1", [blocked_hash, domain.replace(char, "_")]
341 row = database.cursor.fetchone()
342 logger.debug("row[]='%s'", type(row))
345 logger.debug("blocked_hash='%s' not found, trying domain='%s' ...", blocked_hash, domain)
346 return deobfuscate(char, domain)
347 elif not domain.startswith("*."):
348 logger.debug("domain='%s' - BEFORE!", domain)
349 domain = tidyup.domain(domain)
350 logger.debug("domain='%s' - AFTER!", domain)
353 debug.warning("domain is empty after tidyup - EXIT!")
356 search = domain.replace(char, "_")
358 logger.debug("Looking up domain='%s',search='%s' ...", domain, search)
359 database.cursor.execute(
360 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? OR 'https://' || domain LIKE ? ORDER BY rowid LIMIT 1", [search, search]
363 row = database.cursor.fetchone()
364 logger.debug("row[]='%s'", type(row))
366 logger.debug("row[]='%s' - EXIT!", type(row))
369 def set_last_blocked(domain: str):
370 logger.debug("domain='%s' - CALLED!", domain)
371 domain_helper.raise_on(domain)
374 _set_data("last_blocked", domain, time.time())
375 logger.debug("EXIT!")
377 def set_last_instance_fetch(domain: str):
378 logger.debug("domain='%s' - CALLED!", domain)
379 domain_helper.raise_on(domain)
382 _set_data("last_instance_fetch", domain, time.time())
383 logger.debug("EXIT!")
385 def set_total_peers(domain: str, peers: list):
386 logger.debug("domain='%s',peers()=%d - CALLED!", domain, len(peers))
387 domain_helper.raise_on(domain)
389 if not isinstance(peers, list):
390 raise ValueError(f"Parameter peers[]='{type(peers)}' is not of type 'list'")
393 _set_data("total_peers", domain, len(peers))
394 logger.debug("EXIT!")
396 def set_total_blocks(domain: str, blocks: list):
397 logger.debug("domain='%s',blocks()=%d - CALLED!", domain, len(blocks))
398 domain_helper.raise_on(domain)
400 if not isinstance(blocks, list):
401 raise ValueError(f"Parameter blocks[]='{type(blocks)}' is not of type 'list'")
404 _set_data("total_blocks", domain, len(blocks))
405 logger.debug("EXIT!")
407 def set_nodeinfo_url(domain: str, url: str):
408 logger.debug("domain='%s',url='%s' - CALLED!", domain, url)
409 domain_helper.raise_on(domain)
411 if not isinstance(url, str) and url is not None:
412 raise ValueError(f"Parameter url[]='{type(url)}' is not of type 'str'")
414 raise ValueError("Parameter 'url' is empty")
417 _set_data("nodeinfo_url", domain, url)
418 logger.debug("EXIT!")
420 def set_detection_mode(domain: str, mode: str):
421 logger.debug("domain='%s',mode='%s' - CALLED!", domain, mode)
422 domain_helper.raise_on(domain)
424 if not isinstance(mode, str) and mode is not None:
425 raise ValueError(f"Parameter mode[]='{type(mode)}' is not of type 'str'")
427 raise ValueError("Parameter 'mode' is empty")
430 _set_data("detection_mode", domain, mode)
431 logger.debug("EXIT!")
433 def set_has_obfuscation(domain: str, status: bool):
434 logger.debug("domain='%s',status='%s' - CALLED!", domain, status)
435 domain_helper.raise_on(domain)
437 if not isinstance(status, bool):
438 raise ValueError(f"Parameter status[]='{type(status)}' is not of type 'bool'")
441 _set_data("has_obfuscation", domain, status)
442 logger.debug("EXIT!")
444 def set_software(domain: str, software: str):
445 logger.debug("domain='%s',software='%s' - CALLED!", domain, software)
446 domain_helper.raise_on(domain)
448 if not isinstance(software, str) and software is not None:
449 raise ValueError(f"Parameter software[]='{type(software)}' is not of type 'str'")
451 raise ValueError("Parameter 'software' is empty")
454 _set_data("software", domain, software)
455 logger.debug("EXIT!")
457 def valid(value: str, column: str) -> bool:
458 logger.debug("value='%s' - CALLED!", value)
459 if not isinstance(value, str):
460 raise ValueError(f"Parameter value[]='{type(value)}' is not of type 'str'")
462 raise ValueError("Parameter 'value' is empty")
463 elif not isinstance(column, str):
464 raise columnError(f"Parameter column[]='{type(column)}' is not of type 'str'")
466 raise columnError("Parameter 'column' is empty")
469 database.cursor.execute(
470 f"SELECT {column} FROM instances WHERE {column} = ? LIMIT 1", [value]
473 valid = database.cursor.fetchone() is not None
475 logger.debug("valid='%s' - EXIT!", valid)