1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
28 from fba.helpers import blacklist
29 from fba.helpers import config
30 from fba.helpers import cookies
31 from fba.helpers import domain as domain_helper
32 from fba.helpers import json as json_helper
34 from fba.models import instances
36 # HTTP headers for non-API requests
38 "User-Agent": config.get("useragent"),
41 # HTTP headers for API requests
43 "User-Agent" : config.get("useragent"),
44 "Content-Type" : "application/json",
45 "Sec-Fetch-Dest": "empty",
46 "Sec-Fetch-Mode": "cors",
47 "Sec-Fetch-Site": "same-origin",
50 # Exceptions to always catch
52 requests.exceptions.ChunkedEncodingError,
53 requests.exceptions.ConnectionError,
54 requests.exceptions.ContentDecodingError,
55 requests.exceptions.InvalidSchema,
56 requests.exceptions.InvalidURL,
57 requests.exceptions.SSLError,
58 requests.exceptions.Timeout,
59 eventlet.timeout.Timeout,
60 requests.exceptions.TooManyRedirects,
63 urllib3.exceptions.LocationParseError,
64 urllib3.util.ssl_match_hostname.CertificateError,
67 logging.basicConfig(level=logging.INFO)
68 logger = logging.getLogger(__name__)
69 #logger.setLevel(logging.DEBUG)
71 def post_json_api(domain: str, path: str, data: str = "", headers: dict = {}) -> dict:
72 logger.debug("domain='%s',path='%s',data='%s',headers()=%d - CALLED!", domain, path, data, len(headers))
73 domain_helper.raise_on(domain)
75 if blacklist.is_blacklisted(domain):
76 raise RuntimeError(f"domain='{domain}' is blacklisted but function was invoked")
77 elif not isinstance(path, str):
78 raise TypeError(f"path[]='{type(path)}' has not expected type 'str'")
80 raise ValueError("Parameter 'path' is empty")
81 elif not path.startswith("/"):
82 raise ValueError(f"path='{path}' does not start with / but should")
83 elif not isinstance(data, str):
84 raise TypeError(f"data[]='{type(data)}' has not expected type 'str'")
85 elif headers is not None and not isinstance(headers, dict):
86 raise ValueError(f"headers[]='{type(headers)}' has not expected type 'dict'")
92 # Add domain as referer and origin
93 headers["Referer"] = f"https://{domain}/"
94 headers["Origin"] = f"https://{domain}/"
97 logger.debug("Sending POST to domain='%s',path='%s',data='%s',headers(%d)='%s'", domain, path, data, len(headers), headers)
98 start = time.perf_counter()
99 response = reqto.post(
100 f"https://{domain}{path}",
102 headers={**_api_headers, **headers},
103 timeout=config.timeout,
104 cookies=cookies.get_all(domain),
105 allow_redirects=False
107 response_time = time.perf_counter() - start
108 logger.debug("response_time=%s", response_time)
110 instances.set_last_requested_path(domain, path)
111 instances.set_last_response_time(domain, response_time)
113 logger.debug("response.ok='%s',response.status_code=%d,response.reason='%s',response_time=%s", response.ok, response.status_code, response.reason, response_time)
114 if response.ok and response.status_code == 200:
115 logger.debug("Parsing JSON response from domain='%s',path='%s' ...", domain, path)
116 json_reply["json"] = json_helper.from_response(response)
118 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
119 if not response.ok or response.status_code > 200 or len(response.text.strip()) == 0:
120 logger.debug("Cannot query JSON API: domain='%s',path='%s',data()=%d,response.status_code=%d,response.text()=%d", domain, path, len(data), response.status_code, len(response.text))
121 json_reply["status_code"] = response.status_code
122 json_reply["error_message"] = response.reason
123 instances.set_last_error(domain, response)
125 except exceptions as exception:
126 logger.debug("Fetching path='%s' from domain='%s' failed. exception[%s]='%s'", path, domain, type(exception), str(exception))
127 json_reply["status_code"] = 999
128 json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'"
129 json_reply["exception"] = exception
131 logger.debug("Invoking instances.set_last_error(%s,%s) ...", domain, exception)
132 instances.set_last_error(domain, exception)
135 logger.debug("Returning json_reply(%d)[]='%s' - EXIT!", len(json_reply), type(json_reply))
138 def fetch_api_url(url: str, timeout: tuple) -> dict:
139 logger.debug("url='%s',timeout()=%d - CALLED!", url, len(timeout))
141 if not isinstance(url, str):
142 raise TypeError(f"Parameter url[]='{type(url)}' has not expected type 'str'")
144 raise ValueError("Parameter 'url' is an empty string")
145 elif not validators.url(url):
146 raise ValueError(f"Parameter url='{url}' is not a valid URL")
147 elif not isinstance(timeout, tuple):
148 raise TypeError(f"timeout[]='{type(timeout)}' has not expected type 'tuple'")
155 logger.debug("Fetching url='%s' ...", url)
156 response = fetch_url(url, _api_headers, timeout)
158 logger.debug("response.ok='%s',response.status_code=%d,response.reason='%s'", response.ok, response.status_code, response.reason)
159 if response.ok and response.status_code == 200:
160 logger.debug("Parsing JSON response from url='%s' ...", url)
161 json_reply["json"] = json_helper.from_response(response)
163 logger.debug("response.ok='%s',response.status_code='%s',response.text()=%d", response.ok, response.status_code, len(response.text))
164 if not response.ok or response.status_code > 200 or len(response.text) == 0:
165 logger.warning("Cannot query JSON API: url='%s',response.status_code=%d,response.text()=%d", url, response.status_code, len(response.text))
166 json_reply["status_code"] = response.status_code
167 json_reply["error_message"] = response.reason
169 except exceptions as exception:
170 logger.debug("Fetching url='%s' failed. exception[%s]='%s'", url, type(exception), str(exception))
171 json_reply["status_code"] = 999
172 json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'"
173 json_reply["exception"] = exception
176 logger.debug("Returning json_reply(%d)[]='%s' - EXIT!", len(json_reply), type(json_reply))
179 def get_json_api(domain: str, path: str, headers: dict, timeout: tuple) -> dict:
180 logger.debug("domain='%s',path='%s',timeout()=%d - CALLED!", domain, path, len(timeout))
181 domain_helper.raise_on(domain)
183 if blacklist.is_blacklisted(domain):
184 raise RuntimeError(f"domain='{domain}' is blacklisted but function was invoked")
185 elif not isinstance(path, str):
186 raise TypeError(f"path[]='{type(path)}' has not expected type 'str'")
188 raise ValueError("Parameter 'path' is empty")
189 elif not path.startswith("/"):
190 raise ValueError(f"path='{path}' does not start with / but should")
191 elif not isinstance(headers, dict):
192 raise TypeError(f"headers[]='{type(headers)}' has not expected type 'list'")
193 elif not isinstance(timeout, tuple):
194 raise TypeError(f"timeout[]='{type(timeout)}' has not expected type 'tuple'")
201 logger.debug("Sending GET to domain='%s',path='%s',timeout(%d)='%s'", domain, path, len(timeout), timeout)
202 response = _fetch_response(domain, path, {**_api_headers, **headers}, timeout)
203 except exceptions as exception:
204 logger.debug("Fetching path='%s' from domain='%s' failed. exception[%s]='%s'", path, domain, type(exception), str(exception))
205 json_reply["status_code"] = 999
206 json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'"
207 json_reply["exception"] = exception
209 logger.debug("Invoking instances.set_last_error(%s,%s) ...", domain, exception)
210 instances.set_last_error(domain, exception)
211 logger.debug("Returning json_reply(%d)[]='%s' during an exception: '%s' - EXIT!", len(json_reply), type(json_reply), exception)
214 logger.debug("response.ok='%s',response.status_code=%d,response.reason='%s'", response.ok, response.status_code, response.reason)
215 if not response.ok or response.status_code > 200:
216 json_reply["status_code"] = response.status_code
217 json_reply["error_message"] = f"response.ok='{response.ok}' or response.status_code={response.status_code} was not expected!"
218 logger.debug(json_reply["error_message"])
219 elif not json_helper.is_json_response(response):
220 logger.debug("content-type='%s' is not a valid JSON response!", response.headers.get("content-type"))
221 json_reply["status_code"] = 999
222 json_reply["error_message"] = f"content-type='{response.headers.get('content-type')}' is not a JSON response!"
223 elif response.ok and response.status_code == 200:
224 logger.debug("Parsing JSON response from domain='%s',path='%s' ...", domain, path)
225 json_reply["json"] = json_helper.from_response(response)
226 logger.debug("json_reply[json][]='%s'", type(json_reply["json"]))
228 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
229 if not response.ok or response.status_code > 200 or len(response.text) == 0:
230 logger.debug("Cannot query JSON API: domain='%s',path='%s',response.status_code=%d,response.text()=%d", domain, path, response.status_code, len(response.text))
231 json_reply["status_code"] = response.status_code
232 json_reply["error_message"] = response.reason
233 instances.set_last_error(domain, response)
235 logger.debug("Returning json_reply(%d)[]='%s' - EXIT!", len(json_reply), type(json_reply))
238 def send_bot_post(domain: str, blocklist: list) -> None:
239 logger.debug("domain='%s',blocklist()=%d - CALLED!", domain, len(blocklist))
240 domain_helper.raise_on(domain)
242 if blacklist.is_blacklisted(domain):
243 raise RuntimeError(f"domain='{domain}' is blacklisted but function was invoked")
244 elif not isinstance(blocklist, list):
245 raise TypeError(f"Parameter blocklist[]='{type(blocklist)}' has not expected type 'list'")
246 elif len(blocklist) == 0:
247 raise ValueError("Parameter 'blocklist' is an empty string")
248 elif config.get("bot_token") == "":
249 raise ValueError("config[bot_token] is not set")
251 message = f"{domain} has blocked the following instances:\n\n"
254 if len(blocklist) > 20:
255 logger.warning("blocklist()=%d for domain='%s' has more than 20 records, truncating to 20 ...", len(blocklist), domain)
257 blocklist = blocklist[0 : 19]
259 logger.debug("blocklist()=%d", len(blocklist))
260 for block in blocklist:
261 logger.debug("block[%s]='%s'", type(block), block)
262 if block["reason"] in [None, ""]:
263 logger.debug("block[blocked]='%s' is being blocked with no reason specified", block["blocked"])
264 message = message + block["blocked"] + " with unspecified reason\n"
266 logger.debug("block[reason]()=%d", len(block["reason"]))
267 if len(block["reason"]) > 420:
268 block["reason"] = block["reason"][0:419] + "[…]"
270 message = message + block["blocked"] + ' for "' + block["reason"].replace("@", "@\u200b") + '"\n'
273 message = message + "(the list has been truncated to the first 20 entries)"
275 response = reqto.post(
276 f"{config.get('bot_instance')}/api/v1/statuses",
279 "visibility" : config.get("bot_visibility"),
280 "content_type": "text/plain"
282 headers={**_api_headers, **{"Authorization": "Bearer " + config.get("bot_token")}},
283 timeout=config.timeout,
284 allow_redirects=False
287 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
288 return response.ok and response.status_code == 200 and response.text.strip() != ""
290 def _fetch_response(domain: str, path: str, headers: dict, timeout: tuple, allow_redirects: bool = False) -> requests.models.Response:
291 logger.debug("domain='%s',path='%s',headers()=%d,timeout='%s',allow_redirects='%s' - CALLED!", domain, path, len(headers), timeout, allow_redirects)
292 domain_helper.raise_on(domain)
294 if blacklist.is_blacklisted(domain):
295 raise RuntimeError(f"domain='{domain}' is blacklisted but function was invoked")
296 elif not isinstance(path, str):
297 raise TypeError(f"Parameter path[]='{type(path)}' has not expected type 'str'")
299 raise ValueError("Parameter 'path' is an empty string")
300 elif not path.startswith("/"):
301 raise ValueError(f"path='{path}' does not start with / but should")
302 elif not isinstance(headers, dict):
303 raise TypeError(f"headers[]='{type(headers)}' has not expected type 'dict'")
304 elif not isinstance(timeout, tuple):
305 raise TypeError(f"timeout[]='{type(timeout)}' has not expected type 'tuple'")
306 elif not isinstance(allow_redirects, bool):
307 raise TypeError(f"allow_redirects[]='{type(allow_redirects)}' has not expected type 'bool'")
311 logger.debug("Sending GET request to 'https://%s%s' ...", domain, path)
312 start = time.perf_counter()
313 response = reqto.get(
314 f"https://{domain}{path}",
317 cookies=cookies.get_all(domain),
318 allow_redirects=allow_redirects
320 response_time = time.perf_counter() - start
321 logger.debug("Setting response_time=%s,path='%s' for domain='%s' ...", response_time, path, domain)
322 instances.set_last_requested_path(domain, path)
323 instances.set_last_response_time(domain, response_time)
325 logger.debug("response.ok='%s',response.status_code=%d,response.reason='%s',response_time=%s", response.ok, response.status_code, response.reason, response_time)
326 except exceptions as exception:
327 logger.debug("Fetching path='%s' from domain='%s' failed. exception[%s]='%s'", path, domain, type(exception), str(exception))
328 instances.set_last_error(domain, exception)
330 response_time = time.perf_counter() - start
331 logger.debug("Setting response_time=%s,path='%s' for domain='%s' ...", response_time, path, domain)
332 instances.set_last_requested_path(domain, path)
333 instances.set_last_response_time(domain, response_time)
337 logger.debug("response[]='%s' - EXIT!", type(response))
340 def fetch_url(url: str, headers: dict, timeout: tuple, allow_redirects: bool = True) -> requests.models.Response:
341 logger.debug("url='%s',headers()=%d,timeout(%d)='%s',allow_redirects='%s' - CALLED!", url, len(headers), len(timeout), timeout, allow_redirects)
343 if not isinstance(url, str):
344 raise TypeError(f"Parameter url[]='{type(url)}' has not expected type 'str'")
346 raise ValueError("Parameter 'url' is an empty string")
347 elif not validators.url(url):
348 raise ValueError(f"Parameter url='{url}' is not a valid URL")
349 elif not isinstance(headers, dict):
350 raise TypeError(f"Parameter headers[]='{type(headers)}' has not expected type 'dict'")
351 elif not isinstance(timeout, tuple):
352 raise TypeError(f"Parameter timeout[]='{type(timeout)}' has not expected type 'tuple'")
353 elif not isinstance(allow_redirects, bool):
354 raise TypeError(f"Parameter allow_redirects[]='{type(allow_redirects)}' has not expected type 'bool'")
356 logger.debug("Parsing url='%s' ...", url)
357 components = urllib.parse.urlparse(url)
359 # Invoke other function, avoid trailing ?
360 logger.debug("components[%s]='%s'", type(components), components)
361 if components.query != "":
362 logger.debug("Fetching path='%s?%s' from netloc='%s' ...", components.path, components.query, components.netloc)
363 response = _fetch_response(
364 components.netloc.split(":")[0],
365 f"{components.path}?{components.query}",
368 allow_redirects=allow_redirects
371 logger.debug("Fetching path='%s' from netloc='%s' ...", components.path, components.netloc)
372 response = _fetch_response(
373 components.netloc.split(":")[0],
374 components.path if isinstance(components.path, str) and components.path != "" else "/",
377 allow_redirects=allow_redirects
380 logger.debug("response[]='%s' - EXIT!", type(response))
383 def fetch_json_rows(hostname: str, path: str, headers: dict = {}, rows_key: str = None):
384 logger.debug("hostname='%s',path='%s',headers()=%d,rows_key='%s' - CALLED!", hostname, path, len(headers), rows_key)
386 if not isinstance(hostname, str):
387 raise TypeError(f"hostname[]='{type(hostname)}' has not expected type 'str'")
389 raise ValueError("Parameter 'hostname' is an empty string")
390 elif not validators.hostname(hostname):
391 raise ValueError(f"hostname='{hostname}' is not a valid hostname")
392 elif not isinstance(path, str):
393 raise TypeError(f"path[]='{type(path)}' has not expected type 'str'")
395 raise ValueError("Parameter 'path' is an empty string")
396 elif not path.startswith("/"):
397 raise ValueError(f"path='{path}' does not start with a slash")
398 elif headers is not None and not isinstance(headers, dict):
399 raise ValueError(f"headers[]='{type(headers)}' has not expected type 'dict'")
400 elif not isinstance(rows_key, str) and rows_key is not None:
401 raise TypeError(f"rows_key[]='{type(rows_key)}' has not expected type 'str'")
402 elif rows_key is not None and rows_key == "":
403 raise ValueError("Parameter 'rows_key' is an empty string")
405 logger.debug("Invoking get_json_api(%s,%s,headers()=%d) ...", hostname, path, len(headers))
406 fetched = get_json_api(
410 timeout=config.timeout
412 logger.debug("fetched(%d)[]='%s'", len(fetched), type(fetched))
414 if "error_message" in fetched:
415 logger.warning("Error during fetching API result: '%s' - EXIT!", fetched["error_message"])
417 elif "exception" in fetched:
418 logger.warning("Exception '%s' during fetching API result - EXIT!", type(fetched["exception"]))
420 elif "json" not in fetched:
421 raise KeyError("fetched has no element 'json'")
422 elif rows_key not in[None, ""] and rows_key not in fetched["json"]:
423 raise KeyError(f"fetched[row] has no element '{rows_key}'")
424 elif rows_key is None:
425 logger.debug("Parameter 'rows_key' is not set, using whole fetched['json'] as rows ...")
426 rows = fetched["json"]
428 logger.debug("Setting rows to fetched[json][%s]()=%d ...", rows_key, len(fetched["json"][rows_key]))
429 rows = fetched["json"][rows_key]
431 logger.debug("rows()=%d - EXIT!", len(rows))
434 def fetch_csv_rows (url: str) -> list:
435 logger.debug("url='%s' - CALLED!", url)
437 if not isinstance(url, str):
438 raise TypeError(f"url[]='{type(url)}' has not expected type 'str'")
440 raise ValueError("Parameter 'url' is an empty string")
441 elif not validators.url(url):
442 raise ValueError(f"Parameter url='{url}' is not a valid URL")
444 logger.debug("Fetching url='%s' ...", url)
445 response = fetch_url(
448 timeout=config.timeout,
449 allow_redirects=False
452 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
453 if not response.ok or response.status_code > 200 or len(response.content) == 0:
454 logger.warning("Failed fetching url='%s': response.ok='%s',response.status_code=%d,response.content()=%d - EXIT!", url, response.ok, response.status_code, len(response.text))
455 raise RuntimeError(f"response.ok='{response.ok}',response.status_code={response.status_code},response.content()={len(response.content)} is unexpected")
457 lines = response.content.decode("utf-8").splitlines()
458 logger.debug("Reading %d lines, dialect=unix ...", len(lines))
460 reader = csv.DictReader(lines, dialect="unix")
461 logger.debug("reader[]='%s'", type(reader))
464 logger.warning("Failed parsing response.content()=%d as CSV content", len(response.content))
465 raise RuntimeError(f"reader is None after parsing {len(lines)} CSV lines")
467 # Init rows from CSV reader
470 logger.debug("rows()=%d - EXIT!", len(rows))
473 def get_generic(domain: str, path: str, allow_redirects: bool = False) -> requests.models.Response:
474 logger.debug("domain='%s',path='%s',allow_redirects='%s' - CALLED!", domain, path, allow_redirects)
475 domain_helper.raise_on(domain)
477 if blacklist.is_blacklisted(domain):
478 raise RuntimeError(f"domain='{domain}' is blacklisted but function was invoked")
479 elif not isinstance(path, str):
480 raise TypeError(f"Parameter path[]='{type(path)}' has not expected type 'str'")
482 raise ValueError("Parameter 'path' is an empty string")
483 elif not path.startswith("/"):
484 raise ValueError(f"path='{path}' does not start with / but should")
485 elif not isinstance(allow_redirects, bool):
486 raise TypeError(f"allow_redirects[]='{type(allow_redirects)}' has not expected type 'bool'")
488 logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
489 response = _fetch_response(
493 timeout=config.timeout,
494 allow_redirects=allow_redirects
497 logger.debug("response[]='%s' - EXIT!", type(response))