1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
20 from urllib.parse import urlparse
30 from fba.helpers import blacklist
31 from fba.helpers import config
32 from fba.helpers import cookies
33 from fba.helpers import domain as domain_helper
34 from fba.helpers import json as json_helper
36 from fba.models import instances
38 logging.basicConfig(level=logging.INFO)
39 logger = logging.getLogger(__name__)
41 # HTTP headers for non-API requests
43 "User-Agent": config.get("useragent"),
46 # HTTP headers for API requests
48 "User-Agent" : config.get("useragent"),
49 "Content-Type": "application/json",
52 # Exceptions to always catch
54 requests.exceptions.ChunkedEncodingError,
55 requests.exceptions.ConnectionError,
56 requests.exceptions.ContentDecodingError,
57 requests.exceptions.InvalidSchema,
58 requests.exceptions.InvalidURL,
59 requests.exceptions.Timeout,
60 eventlet.timeout.Timeout,
61 requests.exceptions.TooManyRedirects,
64 urllib3.exceptions.LocationParseError
67 def post_json_api(domain: str, path: str, data: str = "", headers: dict = dict()) -> dict:
68 logger.debug("domain='%s',path='%s',data='%s',headers()=%d - CALLED!", domain, path, data, len(headers))
69 domain_helper.raise_on(domain)
71 if blacklist.is_blacklisted(domain):
72 raise ValueError(f"domain='{domain}' is blacklisted but function was invoked")
73 elif not isinstance(path, str):
74 raise ValueError(f"path[]='{type(path)}' is not of type 'str'")
76 raise ValueError("Parameter 'path' is empty")
77 elif not path.startswith("/"):
78 raise ValueError(f"path='{path}' does not start with / but should")
79 elif not isinstance(data, str):
80 raise ValueError(f"data[]='{type(data)}' is not of type 'str'")
81 elif not isinstance(headers, dict):
82 raise ValueError(f"headers[]='{type(headers)}' is not of type 'list'")
89 logger.debug("Sending POST to domain='%s',path='%s',data='%s',headers(%d)='%s'", domain, path, data, len(headers), headers)
90 start = time.perf_counter()
91 response = reqto.post(
92 f"https://{domain}{path}",
94 headers={**api_headers, **headers},
95 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
96 cookies=cookies.get_all(domain),
99 response_time = time.perf_counter() - start
100 logger.debug("response_time=%s", response_time)
102 instances.set_last_response_time(domain, response_time)
104 logger.debug("response.ok='%s',response.status_code=%d,response.reason='%s',response_time=%s", response.ok, response.status_code, response.reason, response_time)
105 if response.ok and response.status_code == 200:
106 logger.debug("Parsing JSON response from domain='%s',path='%s' ...", domain, path)
107 json_reply["json"] = json_helper.from_response(response)
109 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
110 if not response.ok or response.status_code > 200 or len(response.text.strip()) == 0:
111 logger.debug("Cannot query JSON API: domain='%s',path='%s',data()=%d,response.status_code=%d,response.text()=%d", domain, path, len(data), response.status_code, len(response.text))
112 json_reply["status_code"] = response.status_code
113 json_reply["error_message"] = response.reason
114 instances.set_last_error(domain, response)
116 except exceptions as exception:
117 logger.debug("Fetching path='%s' from domain='%s' failed. exception[%s]='%s'", path, domain, type(exception), str(exception))
118 json_reply["status_code"] = 999
119 json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'"
120 json_reply["exception"] = exception
121 instances.set_last_error(domain, exception)
124 logger.debug("Returning json_reply(%d)[]='%s' - EXIT!", len(json_reply), type(json_reply))
127 def fetch_api_url(url: str, timeout: tuple) -> dict:
128 logger.debug("url='%s',timeout()=%d - CALLED!", url, len(timeout))
130 if not isinstance(url, str):
131 raise ValueError(f"Parameter url[]='{type(url)}' is not of type 'str'")
133 raise ValueError("Parameter 'url' is empty")
134 elif not validators.url(url):
135 raise ValueError(f"Parameter url='{url}' is not a valid URL")
136 elif not isinstance(timeout, tuple):
137 raise ValueError(f"timeout[]='{type(timeout)}' is not of type 'tuple'")
144 logger.debug("Fetching url='%s' ...", url)
145 response = fetch_url(url, api_headers, timeout)
147 logger.debug("response.ok='%s',response.status_code=%d,response.reason='%s'", response.ok, response.status_code, response.reason)
148 if response.ok and response.status_code == 200:
149 logger.debug("Parsing JSON response from url='%s' ...", url)
150 json_reply["json"] = json_helper.from_response(response)
152 logger.debug("response.ok='%s',response.status_code='%s',response.text()=%d", response.ok, response.status_code, len(response.text))
153 if not response.ok or response.status_code > 200 or len(response.text) == 0:
154 logger.warning("Cannot query JSON API: url='%s',response.status_code=%d,response.text()=%d", url, response.status_code, len(response.text))
155 json_reply["status_code"] = response.status_code
156 json_reply["error_message"] = response.reason
158 except exceptions as exception:
159 logger.debug("Fetching url='%s' failed. exception[%s]='%s'", url, type(exception), str(exception))
160 json_reply["status_code"] = 999
161 json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'"
162 json_reply["exception"] = exception
165 logger.debug("Returning json_reply(%d)[]='%s' - EXIT!", len(json_reply), type(json_reply))
168 def get_json_api(domain: str, path: str, headers: dict, timeout: tuple) -> dict:
169 logger.debug("domain='%s',path='%s',timeout()=%d - CALLED!", domain, path, len(timeout))
170 domain_helper.raise_on(domain)
172 if blacklist.is_blacklisted(domain):
173 raise ValueError(f"domain='{domain}' is blacklisted but function was invoked")
174 elif not isinstance(path, str):
175 raise ValueError(f"path[]='{type(path)}' is not of type 'str'")
177 raise ValueError("Parameter 'path' is empty")
178 elif not path.startswith("/"):
179 raise ValueError(f"path='{path}' does not start with / but should")
180 elif not isinstance(headers, dict):
181 raise ValueError(f"headers[]='{type(headers)}' is not of type 'list'")
182 elif not isinstance(timeout, tuple):
183 raise ValueError(f"timeout[]='{type(timeout)}' is not of type 'tuple'")
190 logger.debug("Sending GET to domain='%s',path='%s',timeout(%d)='%s'", domain, path, len(timeout), timeout)
191 response = fetch_response(domain, path, {**api_headers, **headers}, timeout)
192 except exceptions as exception:
193 logger.debug("Fetching path='%s' from domain='%s' failed. exception[%s]='%s'", path, domain, type(exception), str(exception))
194 json_reply["status_code"] = 999
195 json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'"
196 json_reply["exception"] = exception
197 instances.set_last_error(domain, exception)
200 logger.debug("response.ok='%s',response.status_code=%d,response.reason='%s'", response.ok, response.status_code, response.reason)
201 if response.ok and response.status_code == 200:
202 logger.debug("Parsing JSON response from domain='%s',path='%s' ...", domain, path)
203 json_reply["json"] = json_helper.from_response(response)
204 logger.debug("json_reply[json][]='%s'", type(json_reply["json"]))
206 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
207 if not response.ok or response.status_code > 200 or len(response.text) == 0:
208 logger.debug("Cannot query JSON API: domain='%s',path='%s',response.status_code=%d,response.text()=%d", domain, path, response.status_code, len(response.text))
209 json_reply["status_code"] = response.status_code
210 json_reply["error_message"] = response.reason
211 instances.set_last_error(domain, response)
213 logger.debug("Returning json_reply(%d)[]='%s' - EXIT!", len(json_reply), type(json_reply))
216 def send_bot_post(domain: str, blocklist: list) -> None:
217 logger.debug("domain='%s',blocklist()=%d - CALLED!", domain, len(blocklist))
218 domain_helper.raise_on(domain)
220 if blacklist.is_blacklisted(domain):
221 raise ValueError(f"domain='{domain}' is blacklisted but function was invoked")
222 elif not isinstance(blocklist, list):
223 raise ValueError(f"Parameter blocklist[]='{type(blocklist)}' is not of type 'list'")
224 elif len(blocklist) == 0:
225 raise ValueError("Parameter 'blocklist' is empty")
226 elif config.get("bot_token") == "":
227 raise ValueError("config[bot_token] is not set")
229 message = f"{domain} has blocked the following instances:\n\n"
232 if len(blocklist) > 20:
233 logger.warning("blocklist()=%d for domain='%s' has more than 20 records, truncating to 20 ...", len(blocklist), domain)
235 blocklist = blocklist[0 : 19]
237 logger.debug("blocklist()=%d", len(blocklist))
238 for block in blocklist:
239 logger.debug("block[%s]='%s'", type(block), block)
240 if block["reason"] in [None, ""]:
241 logger.debug("block[blocked]='%s' is being blocked with no reason specified", block["blocked"])
242 message = message + block["blocked"] + " with unspecified reason\n"
244 logger.debug("block[reason]()=%d", len(block["reason"]))
245 if len(block["reason"]) > 420:
246 block["reason"] = block["reason"][0:419] + "[…]"
248 message = message + block["blocked"] + ' for "' + block["reason"].replace("@", "@\u200b") + '"\n'
251 message = message + "(the list has been truncated to the first 20 entries)"
253 response = reqto.post(
254 f"{config.get('bot_instance')}/api/v1/statuses",
257 "visibility" : config.get("bot_visibility"),
258 "content_type": "text/plain"
260 headers={**api_headers, **{"Authorization": "Bearer " + config.get("bot_token")}},
261 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
262 allow_redirects=False
265 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
266 return response.ok and response.status_code == 200 and response.text.strip() != ""
268 def fetch_response(domain: str, path: str, headers: dict, timeout: tuple, allow_redirects: bool = False) -> requests.models.Response:
269 logger.debug("domain='%s',path='%s',headers()=%d,timeout='%s',allow_redirects='%s' - CALLED!", domain, path, len(headers), timeout, allow_redirects)
270 domain_helper.raise_on(domain)
272 if blacklist.is_blacklisted(domain):
273 raise ValueError(f"domain='{domain}' is blacklisted but function was invoked")
274 elif not isinstance(path, str):
275 raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'")
277 raise ValueError("Parameter 'path' is empty")
278 elif not path.startswith("/"):
279 raise ValueError(f"path='{path}' does not start with / but should")
280 elif not isinstance(headers, dict):
281 raise ValueError(f"headers[]='{type(headers)}' is not of type 'dict'")
282 elif not isinstance(timeout, tuple):
283 raise ValueError(f"timeout[]='{type(timeout)}' is not of type 'tuple'")
287 logger.debug("Sending GET request to 'https://%s%s' ...", domain, path)
288 start = time.perf_counter()
289 response = reqto.get(
290 f"https://{domain}{path}",
293 cookies=cookies.get_all(domain),
294 allow_redirects=allow_redirects
296 response_time = time.perf_counter() - start
297 logger.debug("Setting response_time=%s for domain='%s' ...", response_time, domain)
298 instances.set_last_response_time(domain, response_time)
300 logger.debug("response.ok='%s',response.status_code=%d,response.reason='%s',response_time=%s", response.ok, response.status_code, response.reason, response_time)
301 except exceptions as exception:
302 logger.debug("Fetching path='%s' from domain='%s' failed. exception[%s]='%s'", path, domain, type(exception), str(exception))
303 instances.set_last_error(domain, exception)
305 response_time = time.perf_counter() - start
306 logger.debug("Setting response_time=%s for domain='%s' ...", response_time, domain)
307 instances.set_last_response_time(domain, response_time)
311 logger.debug("response[]='%s' - EXIT!", type(response))
314 def fetch_url(url: str, headers: dict, timeout: tuple) -> requests.models.Response:
315 logger.debug("url='%s',headers()=%d,timeout(%d)='%s' - CALLED!", url, len(headers), len(timeout), timeout)
317 if not isinstance(url, str):
318 raise ValueError(f"Parameter url[]='{type(url)}' is not of type 'str'")
320 raise ValueError("Parameter 'url' is empty")
321 elif not validators.url(url):
322 raise ValueError(f"Parameter url='{url}' is not a valid URL")
323 elif not isinstance(headers, dict):
324 raise ValueError(f"Parameter headers[]='{type(headers)}' is not of type 'dict'")
325 elif not isinstance(timeout, tuple):
326 raise ValueError(f"Parameter timeout[]='{type(timeout)}' is not of type 'tuple'")
328 logger.debug("Parsing url='%s' ...", url)
329 components = urlparse(url)
331 # Invoke other function, avoid trailing ?
332 logger.debug("components[%s]='%s'", type(components), components)
333 if components.query != "":
334 logger.debug("Fetching path='%s?%s' from netloc='%s' ...", components.path, components.query, components.netloc)
335 response = fetch_response(
336 components.netloc.split(":")[0],
337 f"{components.path}?{components.query}",
342 logger.debug("Fetching path='%s' from netloc='%s' ...", components.path, components.netloc)
343 response = fetch_response(
344 components.netloc.split(":")[0],
345 components.path if isinstance(components.path, str) and components.path != '' else '/',
350 logger.debug("response[]='%s' - EXIT!", type(response))