1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
24 from fba import database
27 from fba.helpers import blacklist
28 from fba.helpers import cache
29 from fba.helpers import config
30 from fba.helpers import domain as domain_helper
31 from fba.helpers import tidyup
33 from fba.http import federation
34 from fba.http import network
36 from fba.models import error_log
38 logging.basicConfig(level=logging.INFO)
39 logger = logging.getLogger(__name__)
40 #logger.setLevel(logging.DEBUG)
42 # Found info from node, such as nodeinfo URL, detection mode that needs to be
43 # written to database. Both arrays must be filled at the same time or else
47 # NULL means all detection methods have failed (maybe still reachable instance)
48 "detection_mode" : {},
56 "obfuscated_blocks" : {},
57 # Last fetched instances
58 "last_instance_fetch": {},
63 # Last nodeinfo (fetched)
66 "last_response_time" : {},
68 "last_status_code" : {},
70 "last_error_details" : {},
71 # Wether obfuscation has been used
72 "has_obfuscation" : {},
74 "original_software" : {},
79 def _set_data(key: str, domain: str, value: any):
80 logger.debug("key='%s',domain='%s',value[]='%s' - CALLED!", key, domain, type(value))
81 domain_helper.raise_on(domain)
83 if not isinstance(key, str):
84 raise ValueError(f"Parameter key[]='{type(key)}' is not of type 'str'")
86 raise ValueError("Parameter 'key' is empty")
87 elif not key in _pending:
88 raise ValueError(f"key='{key}' not found in _pending")
89 elif blacklist.is_blacklisted(domain):
90 raise Exception(f"domain='{domain}' is blacklisted but function has been invoked")
91 elif not utils.is_primitive(value):
92 raise ValueError(f"value[]='{type(value)}' is not a primitive type")
95 _pending[key][domain] = value
99 def has_pending(domain: str) -> bool:
100 logger.debug("domain='%s' - CALLED!", domain)
101 domain_helper.raise_on(domain)
103 if not is_registered(domain):
104 raise ValueError(f"domain='{domain}' is not registered but function was invoked.")
105 elif blacklist.is_blacklisted(domain):
106 raise Exception(f"domain='{domain}' is blacklisted but function has been invoked")
109 logger.debug("Checking %d _pending array elements ...", len(_pending))
111 logger.debug("domain='%s',_pending[%s]()=%d", domain, key, len(_pending[key]))
112 if domain in _pending[key]:
113 logger.debug("domain='%s' at key='%s' has pending data ...", domain, key)
117 logger.debug("has='%s' - EXIT!", has)
120 def update(domain: str):
121 logger.debug("domain='%s' - CALLED!", domain)
122 domain_helper.raise_on(domain)
124 if not is_registered(domain):
125 raise Exception(f"domain='{domain}' cannot be updated while not being registered")
126 elif not has_pending(domain):
127 raise Exception(f"domain='{domain}' has no pending instance data, but function invoked")
128 elif blacklist.is_blacklisted(domain):
129 raise Exception(f"domain='{domain}' is blacklisted but function has been invoked")
134 logger.debug("Checking %d _pending array elements ...", len(_pending))
136 logger.debug("Checking key='%s',domain='%s'", key, domain)
137 if domain in _pending[key]:
138 logger.debug("Adding '%s' for key='%s' ...", _pending[key][domain], key)
139 fields.append(_pending[key][domain])
140 sql_string += f" {key} = ?,"
142 logger.debug("sql_string(%d)='%s'", len(sql_string), sql_string)
144 raise ValueError(f"No fields have been set, but function invoked, domain='{domain}'")
146 # Set last_updated to current timestamp
147 fields.append(time.time())
149 # For WHERE statement
150 logger.debug("Setting domain='%s' for WHERE statement ...", domain)
151 fields.append(domain)
153 logger.debug("sql_string='%s',fields()=%d", sql_string, len(fields))
154 sql_string = "UPDATE instances SET" + sql_string + " last_updated = ? WHERE domain = ? LIMIT 1"
156 logger.debug("Executing SQL: sql_string='%s',fields()=%d", sql_string, len(fields))
157 database.cursor.execute(sql_string, fields)
159 logger.debug("rowcount=%d", database.cursor.rowcount)
160 if database.cursor.rowcount == 0:
161 raise Exception(f"Did not update any rows: domain='{domain}',fields()={len(fields)}")
163 logger.debug("Invoking commit() ...")
164 database.connection.commit()
166 logger.debug("Deleting _pending for domain='%s'", domain)
168 logger.debug("domain='%s',key='%s'", domain, key)
169 if domain in _pending[key]:
170 logger.debug("Deleting key='%s',domain='%s' ...", key, domain)
171 del _pending[key][domain]
173 logger.debug("EXIT!")
175 def add(domain: str, origin: str, command: str, path: str = None, software: str = None):
176 logger.debug("domain='%s',origin='%s',command='%s',path='%s',software='%s' - CALLED!", domain, origin, command, path, software)
177 domain_helper.raise_on(domain)
179 if not isinstance(origin, str) and origin is not None:
180 raise ValueError(f"origin[]='{type(origin)}' is not of type 'str'")
182 raise ValueError("Parameter 'origin' is empty")
183 elif not isinstance(command, str):
184 raise ValueError(f"command[]='{type(command)}' is not of type 'str'")
186 raise ValueError("Parameter 'command' is empty")
187 elif not isinstance(path, str) and path is not None:
188 raise ValueError(f"path[]='{type(path)}' is not of type 'str'")
190 raise ValueError("Parameter 'path' is empty")
191 elif path is not None and not path.startswith("/"):
192 raise ValueError(f"path='{path}' does not start with / but should")
193 elif not isinstance(software, str) and software is not None:
194 raise ValueError(f"software[]='{type(software)}' is not of type 'str'")
196 raise ValueError("Parameter 'software' is empty")
197 elif origin is not None and not validators.domain(origin.split("/")[0]):
198 raise ValueError(f"Bad origin name='{origin}'")
199 elif blacklist.is_blacklisted(domain):
200 raise Exception(f"domain='{domain}' is blacklisted, but function invoked")
201 elif domain.find("/profile/") > 0 or domain.find("/users/") > 0 or (is_registered(domain.split("/")[0]) and domain.find("/c/") > 0):
202 raise Exception(f"domain='{domain}' is a single user")
203 elif domain.find("/tag/") > 0:
204 raise Exception(f"domain='{domain}' is a tag")
208 logger.debug("domain='%s',origin='%s',command='%s',path='%s'", domain, origin, command, path)
209 software = federation.determine_software(domain, path)
210 except network.exceptions as exception:
211 logger.warning("Exception '%s' during determining software type, domain='%s'", type(exception), domain)
212 set_last_error(domain, exception)
214 logger.debug("Determined software='%s'", software)
215 if software == "lemmy" and domain.find("/c/") > 0:
216 domain = domain.split("/c/")[0]
218 logger.debug("domain='%s' - LEMMY /c/ !", domain)
219 if is_registered(domain):
220 logger.warning("domain='%s' already registered after cutting off user part. - EXIT!", domain)
223 logger.info("Adding instance domain='%s',origin='%s',software='%s',command='%s' ...", domain, origin, software, command)
224 database.cursor.execute(
225 "INSERT INTO instances (domain, origin, command, hash, software, original_software, first_seen) VALUES (?, ?, ?, ?, ?, ?, ?)",
230 utils.get_hash(domain),
237 logger.debug("Marking domain='%s' as registered.", domain)
238 cache.set_sub_key("is_registered", domain, True)
240 logger.debug("Checking if domain='%s' has pending updates ...", domain)
241 if has_pending(domain):
242 logger.debug("Flushing updates for domain='%s' ...", domain)
245 logger.debug("EXIT!")
247 def set_last_nodeinfo(domain: str):
248 logger.debug("domain='%s' - CALLED!", domain)
249 domain_helper.raise_on(domain)
251 logger.debug("Updating last_nodeinfo for domain='%s'", domain)
252 _set_data("last_nodeinfo", domain, time.time())
254 logger.debug("EXIT!")
256 def set_last_error(domain: str, error: dict):
257 logger.debug("domain='%s',error[]='%s' - CALLED!", domain, type(error))
258 domain_helper.raise_on(domain)
260 logger.debug("error[]='%s' - BEFORE!", type(error))
261 if isinstance(error, (BaseException, json.decoder.JSONDecodeError)):
262 logger.debug("error[]='%s' is an exception, converting to string ...", type(error))
263 error = f"error[{type(error)}]='{str(error)}'"
265 logger.debug("error[]='%s' - AFTER!", type(error))
267 if isinstance(error, str):
268 logger.debug("Setting last_error_details='%s' (str)", error)
269 _set_data("last_status_code" , domain, 999)
270 _set_data("last_error_details", domain, error if error != "" else None)
271 elif isinstance(error, requests.models.Response):
272 logger.debug("Setting last_error_details='%s' (Response)", error.reason)
273 _set_data("last_status_code" , domain, error.status_code)
274 _set_data("last_error_details", domain, error.reason if error.reason != "" else None)
275 elif not isinstance(error, dict):
276 raise KeyError(f"Cannot handle keys in error[{type(error)}]='{error}'")
277 elif "status_code" in error and "error_message" in error:
278 logger.debug("Setting last_error_details='%s' (error_message)", error['error_message'])
279 _set_data("last_status_code" , domain, error["status_code"])
280 _set_data("last_error_details", domain, error["error_message"] if error["error_message"] != "" else None)
281 elif "json" in error and "error" in error["json"] and "msg" in error["json"]:
282 logger.debug("Setting last_error_details='%s' (json,error)", error["json"]["msg"])
283 _set_data("last_status_code" , domain, error["status_code"])
284 _set_data("last_error_details", domain, error["json"]["msg"] if error["json"]["msg"] != "" else None)
285 elif "json" in error and "error" in error["json"] and "error_message" in error["json"]:
286 logger.debug("Setting last_error_details='%s' (json,error)", error["json"]["error_message"])
287 _set_data("last_status_code" , domain, error["status_code"])
288 _set_data("last_error_details", domain, error["json"]["error_message"] if error["json"]["error_message"] != "" else None)
289 elif "json" in error and "error" in error["json"] and isinstance(error["json"]["error"], dict) and "message" in error["json"]["error"]:
290 logger.debug("Setting last_error_details='%s' (json,error)", error["json"]["error"]["message"])
291 _set_data("last_status_code" , domain, error["status_code"])
292 _set_data("last_error_details", domain, error["json"]["error"]["message"] if error["json"]["error"]["message"] != "" else None)
293 elif "json" in error and "error" in error["json"]:
294 logger.debug("Setting last_error_details='%s' (json,error)", error["json"]["error"])
295 _set_data("last_status_code" , domain, error["status_code"])
296 _set_data("last_error_details", domain, error["json"]["error"] if error["json"]["error"] != "" else None)
298 logger.debug("Invoking error_log.add(domain='%s',error[]='%s'", domain, type(error))
299 error_log.add(domain, error)
301 logger.debug("EXIT!")
303 def set_success(domain: str):
304 logger.debug("domain='%s' - CALLED!", domain)
305 domain_helper.raise_on(domain)
307 # Set both to success
308 _set_data("last_status_code" , domain, 200)
309 _set_data("last_error_details", domain, None)
311 logger.debug("EXIT!")
313 def is_registered(domain: str, skip_raise = False) -> bool:
314 logger.debug("domain='%s',skip_raise='%s' - CALLED!", domain, skip_raise)
315 domain_helper.raise_on(domain)
317 if blacklist.is_blacklisted(domain):
318 raise Exception(f"domain='{domain}' is blacklisted but function has been invoked")
319 elif not isinstance(skip_raise, bool):
320 raise ValueError(f"skip_raise[]='{type(skip_raise)}' is not type of 'bool'")
323 domain_helper.raise_on(domain)
325 logger.debug("domain='%s' - CALLED!", domain)
326 if not cache.key_exists("is_registered"):
327 logger.debug("Cache for 'is_registered' not initialized, fetching all rows ...")
328 database.cursor.execute("SELECT domain FROM instances")
331 cache.set_all("is_registered", database.cursor.fetchall(), True)
334 registered = cache.sub_key_exists("is_registered", domain)
336 logger.debug("registered='%s' - EXIT!", registered)
339 def is_recent(domain: str, column: str = "last_instance_fetch") -> bool:
340 logger.debug("domain='%s',column='%s' - CALLED!", domain, column)
341 domain_helper.raise_on(domain)
343 if not isinstance(column, str):
344 raise ValueError(f"Parameter column[]='{type(column)}' is not of type 'str'")
345 elif not column.startswith("last_"):
346 raise ValueError(f"Parameter column='{column}' is not expected")
347 elif blacklist.is_blacklisted(domain):
348 raise ValueError(f"domain='{domain}' is blacklisted but function was invoked")
349 elif not is_registered(domain):
350 logger.debug("domain='%s' is not registered, returning False - EXIT!", domain)
353 key = "recheck_instance"
354 if column == "last_blocked":
355 key = "recheck_block"
358 database.cursor.execute(f"SELECT {column} FROM instances WHERE domain = ? LIMIT 1", [domain])
361 row = database.cursor.fetchone()
363 fetched = float(row[column]) if row[column] is not None else 0.0
365 diff = (time.time() - fetched)
367 logger.debug("fetched[%s]='%s',key='%s',diff=%f", type(fetched), fetched, key, diff)
368 recently = bool(diff < config.get(key))
370 logger.debug("recently='%s' - EXIT!", recently)
373 def deobfuscate(char: str, domain: str, blocked_hash: str = None) -> tuple:
374 logger.debug("char='%s',domain='%s',blocked_hash='%s' - CALLED!", char, domain, blocked_hash)
376 if not isinstance(char, str):
377 raise ValueError(f"Parameter char[]='{type(char)}' is not of type 'str'")
379 raise ValueError("Parameter 'char' is empty")
380 elif not char in domain:
381 raise ValueError(f"char='{char}' not found in domain='{domain}' but function invoked")
382 elif not isinstance(domain, str):
383 raise ValueError(f"Parameter domain[]='{type(domain)}'")
384 elif not isinstance(blocked_hash, str) and blocked_hash is not None:
385 raise ValueError(f"Parameter blocked_hash[]='{type(blocked_hash)}' is not of type 'str'")
390 logger.debug("blocked_hash[]='%s'", type(blocked_hash))
391 if isinstance(blocked_hash, str):
392 logger.debug("Looking up blocked_hash='%s',domain='%s' ...", blocked_hash, domain)
393 database.cursor.execute(
394 "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? OR domain LIKE ? LIMIT 1", [blocked_hash, domain.replace(char, "_")]
397 row = database.cursor.fetchone()
398 logger.debug("row[]='%s'", type(row))
401 logger.debug("blocked_hash='%s' not found, trying domain='%s' ...", blocked_hash, domain)
402 return deobfuscate(char, domain)
403 elif not domain.startswith("*."):
404 logger.debug("domain='%s' - BEFORE!", domain)
405 domain = tidyup.domain(domain)
406 logger.debug("domain='%s' - AFTER!", domain)
409 logger.warning("domain is empty after tidyup - EXIT!")
412 search = domain.replace(char, "_")
414 logger.debug("Looking up domain='%s',search='%s' ...", domain, search)
415 database.cursor.execute(
416 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? OR 'https://' || domain LIKE ? ORDER BY rowid LIMIT 1", [search, search]
419 row = database.cursor.fetchone()
420 logger.debug("row[]='%s'", type(row))
422 logger.debug("row[]='%s' - EXIT!", type(row))
425 def set_last_blocked(domain: str):
426 logger.debug("domain='%s' - CALLED!", domain)
427 domain_helper.raise_on(domain)
430 _set_data("last_blocked", domain, time.time())
431 logger.debug("EXIT!")
433 def set_last_instance_fetch(domain: str):
434 logger.debug("domain='%s' - CALLED!", domain)
435 domain_helper.raise_on(domain)
438 _set_data("last_instance_fetch", domain, time.time())
439 logger.debug("EXIT!")
441 def set_last_response_time(domain: str, response_time: float):
442 logger.debug("domain='%s',response_time=%d - CALLED!", domain, response_time)
443 domain_helper.raise_on(domain)
445 if not isinstance(response_time, float):
446 raise ValueError(f"response_time[]='{type(response_time)}' is not of type 'float'")
447 elif response_time < 0:
448 raise ValueError(f"response_time={response_time} is below zero")
451 _set_data("last_response_time", domain, response_time)
452 logger.debug("EXIT!")
454 def set_total_peers(domain: str, peers: list):
455 logger.debug("domain='%s',peers()=%d - CALLED!", domain, len(peers))
456 domain_helper.raise_on(domain)
458 if not isinstance(peers, list):
459 raise ValueError(f"Parameter peers[]='{type(peers)}' is not of type 'list'")
462 _set_data("total_peers", domain, len(peers))
463 logger.debug("EXIT!")
465 def set_total_blocks(domain: str, blocks: list):
466 logger.debug("domain='%s',blocks()=%d - CALLED!", domain, len(blocks))
467 domain_helper.raise_on(domain)
469 if not isinstance(blocks, list):
470 raise ValueError(f"Parameter blocks[]='{type(blocks)}' is not of type 'list'")
473 _set_data("total_blocks", domain, len(blocks))
474 logger.debug("EXIT!")
476 def set_obfuscated_blocks(domain: str, obfuscated: int):
477 logger.debug("domain='%s',obfuscated=%d - CALLED!", domain, obfuscated)
478 domain_helper.raise_on(domain)
480 if not isinstance(obfuscated, int):
481 raise ValueError(f"Parameter obfuscated[]='{type(obfuscated)}' is not of type 'int'")
483 raise ValueError(f"Parameter obfuscated={obfuscated} is not valid")
486 _set_data("obfuscated_blocks", domain, obfuscated)
487 logger.debug("EXIT!")
489 def set_nodeinfo_url(domain: str, url: str):
490 logger.debug("domain='%s',url='%s' - CALLED!", domain, url)
491 domain_helper.raise_on(domain)
493 if not isinstance(url, str) and url is not None:
494 raise ValueError(f"Parameter url[]='{type(url)}' is not of type 'str'")
496 raise ValueError("Parameter 'url' is empty")
497 elif url is not None and not validators.url(url):
498 raise ValueError(f"Parameter url='{url}' is not a valid URL")
501 _set_data("nodeinfo_url", domain, url)
502 logger.debug("EXIT!")
504 def set_detection_mode(domain: str, mode: str):
505 logger.debug("domain='%s',mode='%s' - CALLED!", domain, mode)
506 domain_helper.raise_on(domain)
508 if not isinstance(mode, str) and mode is not None:
509 raise ValueError(f"Parameter mode[]='{type(mode)}' is not of type 'str'")
511 raise ValueError("Parameter 'mode' is empty")
514 _set_data("detection_mode", domain, mode)
515 logger.debug("EXIT!")
517 def set_has_obfuscation(domain: str, status: bool):
518 logger.debug("domain='%s',status='%s' - CALLED!", domain, status)
519 domain_helper.raise_on(domain)
521 if not isinstance(status, bool):
522 raise ValueError(f"Parameter status[]='{type(status)}' is not of type 'bool'")
525 _set_data("has_obfuscation", domain, status)
526 logger.debug("EXIT!")
528 def set_original_software(domain: str, software: str):
529 logger.debug("domain='%s',software='%s' - CALLED!", domain, software)
530 domain_helper.raise_on(domain)
532 if not isinstance(software, str) and software is not None:
533 raise ValueError(f"Parameter software[]='{type(software)}' is not of type 'str'")
535 raise ValueError("Parameter 'software' is empty")
537 # Set original software
538 _set_data("original_software", domain, software)
539 logger.debug("EXIT!")
542 def set_software(domain: str, software: str):
543 logger.debug("domain='%s',software='%s' - CALLED!", domain, software)
544 domain_helper.raise_on(domain)
546 if not isinstance(software, str) and software is not None:
547 raise ValueError(f"Parameter software[]='{type(software)}' is not of type 'str'")
549 raise ValueError("Parameter 'software' is empty")
551 # Set software (maybe aliased to generic name)
552 _set_data("software", domain, software)
553 logger.debug("EXIT!")
555 def valid(value: str, column: str) -> bool:
556 logger.debug("value='%s' - CALLED!", value)
557 if not isinstance(value, str):
558 raise ValueError(f"Parameter value[]='{type(value)}' is not of type 'str'")
560 raise ValueError("Parameter 'value' is empty")
561 elif not isinstance(column, str):
562 raise ValueError(f"Parameter column[]='{type(column)}' is not of type 'str'")
564 raise ValueError("Parameter 'column' is empty")
567 database.cursor.execute(
568 f"SELECT {column} FROM instances WHERE {column} = ? LIMIT 1", [value]
571 is_valid = database.cursor.fetchone() is not None
573 logger.debug("is_valid='%s' - EXIT!", is_valid)
576 def delete(domain: str):
577 logger.debug("domain='%s' - CALLED!", domain)
578 domain_helper.raise_on(domain)
580 database.cursor.execute(f"DELETE FROM instances WHERE domain = ? LIMIT 1", [domain])
582 logger.debug("Invoking commit() ...")
583 database.connection.commit()
585 logger.debug("EXIT!")
587 def translate_idnas(rows: list, column: str):
588 logger.debug("rows[]='%s' - CALLED!", type(rows))
590 if not isinstance(rows, list):
591 raise ValueError("rows[]='{type(rows)}' is not of type 'list'")
593 raise ValueError("Parameter 'rows' is an empty list")
594 elif not isinstance(column, str):
595 raise ValueError(f"column='{type(column)}' is not of type 'str'")
597 raise ValueError("Parameter 'column' is empty")
598 elif column not in ["domain", "origin"]:
599 raise ValueError(f"column='{column}' is not supported")
601 logger.info("Checking/converting %d domain names ...", len(rows))
603 logger.debug("row[]='%s'", type(row))
605 translated = row[column].encode("idna").decode("utf-8")
606 logger.debug("translated='%s',row[%s]='%s'", translated, column, row[column])
608 if translated != row[column]:
609 logger.info("Translated row[%s]='%s' to '%s'", column, row[column], translated)
610 if is_registered(translated, True):
611 logger.warning("Deleting row[%s]='%s' as translated='%s' already exist", column, row[column], translated)
612 database.cursor.execute(f"DELETE FROM instances WHERE {column} = ? LIMIT 1", [row[column]])
614 logger.debug("Updating row[%s]='%s' to translated='%s' ...", column, row[column], translated)
615 database.cursor.execute(f"UPDATE instances SET {column} = ? WHERE {column} = ? LIMIT 1", [translated, row[column]])
617 logger.debug("Invoking commit() ...")
618 database.connection.commit()
620 logger.debug("EXIT!")