1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
24 from fba import database
27 from fba.helpers import blacklist
28 from fba.helpers import cache
29 from fba.helpers import config
30 from fba.helpers import domain as domain_helper
32 from fba.http import federation
33 from fba.http import network
35 from fba.models import error_log
37 logging.basicConfig(level=logging.INFO)
38 logger = logging.getLogger(__name__)
40 # Found info from node, such as nodeinfo URL, detection mode that needs to be
41 # written to database. Both arrays must be filled at the same time or else
42 # update_data() will fail
44 # Detection mode: 'AUTO_DISCOVERY', 'STATIC_CHECKS' or 'GENERATOR'
45 # NULL means all detection methods have failed (maybe still reachable instance)
46 "detection_mode" : {},
51 # Last fetched instances
52 "last_instance_fetch": {},
57 # Last nodeinfo (fetched)
60 "last_status_code" : {},
62 "last_error_details" : {},
63 # Wether obfuscation has been used
64 "has_obfuscation" : {},
67 def _set_data(key: str, domain: str, value: any):
68 logger.debug("key='%s',domain='%s',value[]='%s' - CALLED!", key, domain, type(value))
69 domain_helper.raise_on(domain)
70 if not isinstance(key, str):
71 raise ValueError("Parameter key[]='{type(key)}' is not 'str'")
73 raise ValueError("Parameter 'key' is empty")
74 elif not key in _pending:
75 raise ValueError(f"key='{key}' not found in _pending")
76 elif not utils.is_primitive(value):
77 raise ValueError(f"value[]='{type(value)}' is not a primitive type")
80 _pending[key][domain] = value
84 def has_pending(domain: str) -> bool:
85 logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
86 domain_helper.raise_on(domain)
90 logger.debug("key='%s',domain='%s',_pending[key]()=%d", key, domain, len(_pending[key]))
91 if domain in _pending[key]:
95 logger.debug("has='%s' - EXIT!", has)
98 def update_data(domain: str):
99 logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
100 domain_helper.raise_on(domain)
101 if not has_pending(domain):
102 raise Exception(f"domain='{domain}' has no pending instance data, but function invoked")
103 elif not is_registered(domain):
104 raise Exception(f"domain='{domain}' cannot be updated while not being registered")
106 logger.debug("Updating instance data for domain='%s' ...", domain)
110 logger.debug("Checking key='%s',domain='%s'", key, domain)
111 if domain in _pending[key]:
112 logger.debug("Adding '%s' for key='%s' ...", _pending[key][domain], key)
113 fields.append(_pending[key][domain])
114 sql_string += f" {key} = ?,"
116 logger.debug("sql_string()=%d", len(sql_string))
118 raise ValueError(f"No fields have been set, but method invoked, domain='{domain}'")
120 # Set last_updated to current timestamp
121 fields.append(time.time())
123 # For WHERE statement
124 fields.append(domain)
126 logger.debug("sql_string='%s',fields()=%d", sql_string, len(fields))
127 sql_string = "UPDATE instances SET" + sql_string + " last_updated = ? WHERE domain = ? LIMIT 1"
129 logger.debug("Executing SQL: '%s'", sql_string)
130 database.cursor.execute(sql_string, fields)
132 logger.debug("rowcount=%d", database.cursor.rowcount)
133 if database.cursor.rowcount == 0:
134 raise Exception(f"Did not update any rows: domain='{domain}',fields()={len(fields)}")
136 logger.debug("Invoking commit() ...")
137 database.connection.commit()
139 logger.debug("Deleting _pending for domain='%s'", domain)
141 logger.debug("domain='%s',key='%s'", domain, key)
142 if domain in _pending[key]:
143 logger.debug("Deleting key='%s',domain='%s' ...", key, domain)
144 del _pending[key][domain]
146 logger.debug("EXIT!")
148 def add(domain: str, origin: str, command: str, path: str = None, software: str = None):
149 logger.debug("domain='%s',origin='%s',command='%s',path='%s',software='%s' - CALLED!", domain, origin, command, path, software)
150 domain_helper.raise_on(domain)
151 if not isinstance(origin, str) and origin is not None:
152 raise ValueError(f"origin[]='{type(origin)}' is not 'str'")
154 raise ValueError("Parameter 'origin' is empty")
155 elif not isinstance(command, str):
156 raise ValueError(f"command[]='{type(command)}' is not 'str'")
158 raise ValueError("Parameter 'command' is empty")
159 elif not isinstance(path, str) and path is not None:
160 raise ValueError(f"path[]='{type(path)}' is not 'str'")
162 raise ValueError("Parameter 'path' is empty")
163 elif not isinstance(software, str) and software is not None:
164 raise ValueError(f"software[]='{type(software)}' is not 'str'")
166 raise ValueError("Parameter 'software' is empty")
167 elif origin is not None and not validators.domain(origin.split("/")[0]):
168 raise ValueError(f"Bad origin name='{origin}'")
169 elif blacklist.is_blacklisted(domain):
170 raise Exception(f"domain='{domain}' is blacklisted, but method invoked")
171 elif domain.find("/profile/") > 0 or domain.find("/users/") > 0 or (software == "lemmy" and domain.find("/c/") > 0):
172 raise Exception(f"domain='{domain}' is a single user")
176 logger.debug("domain='%s',origin='%s',command='%s',path='%s'", domain, origin, command, path)
177 software = federation.determine_software(domain, path)
178 except network.exceptions as exception:
179 logger.warning("Exception '%s' during determining software type, domain='%s'", type(exception), domain)
180 set_last_error(domain, exception)
182 logger.debug("Determined software='%s'", software)
183 if software == "lemmy" and domain.find("/c/") > 0:
184 domain = domain.split("/c/")[0]
185 if is_registered(domain):
186 logger.warning("domain='%s' already registered after cutting off user part. - EXIT!", domain)
189 logger.info("Adding instance domain='%s',origin='%s',software='%s',command='%s'", domain, origin, software, command)
190 database.cursor.execute(
191 "INSERT INTO instances (domain, origin, command, hash, software, first_seen) VALUES (?, ?, ?, ?, ?, ?)",
196 utils.get_hash(domain),
202 logger.debug("Marking domain='%s' as registered.", domain)
203 cache.set_sub_key("is_registered", domain, True)
205 if has_pending(domain):
206 logger.debug("domain='%s' has pending nodeinfo being updated ...", domain)
209 logger.debug("EXIT!")
211 def set_last_nodeinfo(domain: str):
212 logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
213 domain_helper.raise_on(domain)
215 logger.debug("Updating last_nodeinfo for domain='%s'", domain)
216 _set_data("last_nodeinfo", domain, time.time())
218 logger.debug("EXIT!")
220 def set_last_error(domain: str, error: dict):
221 logger.debug("domain='%s',error[]='%s' - CALLED!", domain, type(error))
222 domain_helper.raise_on(domain)
224 logger.debug("error[]='%s' - BEFORE!", type(error))
225 if isinstance(error, (BaseException, json.decoder.JSONDecodeError)):
226 error = f"error[{type(error)}]='{str(error)}'"
227 logger.debug("error[]='%s' - AFTER!", type(error))
229 if isinstance(error, str):
230 logger.debug("Setting last_error_details='%s' (str)", error)
231 _set_data("last_status_code" , domain, 999)
232 _set_data("last_error_details", domain, error if error != "" else None)
233 elif isinstance(error, requests.models.Response):
234 logger.debug("Setting last_error_details='%s' (Response)", error.reason)
235 _set_data("last_status_code" , domain, error.status_code)
236 _set_data("last_error_details", domain, error.reason if error.reason != "" else None)
237 elif not isinstance(error, dict):
238 raise KeyError(f"Cannot handle keys in error[{type(error)}]='{error}'")
239 elif "status_code" in error and "error_message" in error:
240 logger.debug("Setting last_error_details='%s' (error_message)", error['error_message'])
241 _set_data("last_status_code" , domain, error["status_code"])
242 _set_data("last_error_details", domain, error["error_message"] if error["error_message"] != "" else None)
243 elif "json" in error and "error" in error["json"]:
244 logger.debug("Setting last_error_details='%s' (json,error)", error["json"]["error"])
245 _set_data("last_status_code" , domain, error["status_code"])
246 _set_data("last_error_details", domain, error["json"]["error"] if error["json"]["error"] != "" else None)
248 logger.debug("Invoking error_log.add(domain='%s',error[]='%s'", domain, type(error))
249 error_log.add(domain, error)
251 logger.debug("EXIT!")
253 def is_registered(domain: str) -> bool:
254 logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
255 domain_helper.raise_on(domain)
257 logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
258 if not cache.key_exists("is_registered"):
259 logger.debug("Cache for 'is_registered' not initialized, fetching all rows ...")
260 database.cursor.execute("SELECT domain FROM instances")
263 cache.set_all("is_registered", database.cursor.fetchall(), True)
266 registered = cache.sub_key_exists("is_registered", domain)
268 logger.debug("registered='%s' - EXIT!", registered)
271 def is_recent(domain: str) -> bool:
272 logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
273 domain_helper.raise_on(domain)
274 if not is_registered(domain):
275 logger.debug(f"domain='{domain}' is not registered, returning False - EXIT!")
279 database.cursor.execute("SELECT last_instance_fetch FROM instances WHERE domain = ? LIMIT 1", [domain])
282 fetched = database.cursor.fetchone()[0]
284 logger.debug("fetched[%s]='%s'", type(fetched), fetched)
285 recently = isinstance(fetched, float) and time.time() - fetched <= config.get("recheck_instance")
287 logger.debug("recently='%s' - EXIT!", recently)
290 def deobfuscate(char: str, domain: str, blocked_hash: str = None) -> tuple:
291 logger.debug("char='%s',domain='%s',blocked_hash='%s' - CALLED!", char, domain, blocked_hash)
293 if not isinstance(domain, str):
294 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
296 raise ValueError("Parameter 'domain' is empty")
297 elif domain.lower() != domain:
298 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
299 elif domain.endswith(".arpa"):
300 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
301 elif domain.endswith(".tld"):
302 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
303 elif not isinstance(char, str):
304 raise ValueError(f"Parameter char[]='{type(char)}' is not 'str'")
306 raise ValueError("Parameter 'char' is empty")
307 elif not char in domain:
308 raise ValueError(f"char='{char}' not found in domain='{domain}' but function invoked")
309 elif not isinstance(blocked_hash, str) and blocked_hash is not None:
310 raise ValueError(f"Parameter blocked_hash[]='{type(blocked_hash)}' is not 'str'")
312 logger.debug("blocked_hash[]='%s'", type(blocked_hash))
313 if isinstance(blocked_hash, str):
314 logger.debug("Looking up blocked_hash='%s',domain='%s' ...", blocked_hash, domain)
315 database.cursor.execute(
316 "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? OR domain LIKE ? LIMIT 1", [blocked_hash, domain.replace(char, "_")]
319 row = database.cursor.fetchone()
320 logger.debug("row[]='%s'", type(row))
323 logger.debug("blocked_hash='%s' not found, trying domain='%s' ...", blocked_hash, domain)
324 return deobfuscate(char, domain)
326 logger.debug("Looking up domain='%s' ...", domain)
327 database.cursor.execute(
328 "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [domain.replace(char, "_")]
331 row = database.cursor.fetchone()
332 logger.debug("row[]='%s'", type(row))
334 logger.debug("row[]='%s' - EXIT!", type(row))
337 def set_last_blocked(domain: str):
338 logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
339 domain_helper.raise_on(domain)
342 _set_data("last_blocked", domain, time.time())
343 logger.debug("EXIT!")
345 def set_last_instance_fetch(domain: str):
346 logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
347 domain_helper.raise_on(domain)
350 _set_data("last_instance_fetch", domain, time.time())
351 logger.debug("EXIT!")
353 def set_total_peers(domain: str, peers: list):
354 logger.debug(f"domain='{domain}',peers()={len(peers)} - CALLED!")
355 domain_helper.raise_on(domain)
356 if not isinstance(peers, list):
357 raise ValueError(f"Parameter peers[]='{type(peers)}' is not 'list'")
360 _set_data("total_peers", domain, len(peers))
361 logger.debug("EXIT!")
363 def set_nodeinfo_url(domain: str, url: str):
364 logger.debug(f"domain='{domain}',url='{url}' - CALLED!")
365 domain_helper.raise_on(domain)
367 if not isinstance(url, str):
368 raise ValueError("Parameter url[]='{type(url)}' is not 'list'")
370 raise ValueError("Parameter 'url' is empty")
373 _set_data("nodeinfo_url", domain, url)
374 logger.debug("EXIT!")
376 def set_detection_mode(domain: str, mode: str):
377 logger.debug(f"domain='{domain}',mode='{mode}' - CALLED!")
378 domain_helper.raise_on(domain)
380 if not isinstance(mode, str):
381 raise ValueError("Parameter mode[]='{type(mode)}' is not 'list'")
383 raise ValueError("Parameter 'mode' is empty")
386 _set_data("detection_mode", domain, mode)
387 logger.debug("EXIT!")
389 def set_has_obfuscation(domain: str, status: bool):
390 logger.debug("domain(%d)='%s',status='%s' - CALLED!", len(domain), domain, status)
391 domain_helper.raise_on(domain)
393 if not isinstance(status, bool):
394 raise ValueError(f"Parameter status[]='{type(status)}' is not 'bool'")
397 _set_data("has_obfuscation", domain, status)
398 logger.debug("EXIT!")