1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 from fba import config
24 from fba.models import instances
26 # HTTP headers for non-API requests
28 "User-Agent": config.get("useragent"),
31 # HTTP headers for API requests
33 "User-Agent" : config.get("useragent"),
34 "Content-Type": "application/json",
37 # Exceptions to always catch
39 requests.exceptions.ChunkedEncodingError,
40 requests.exceptions.ConnectionError,
41 requests.exceptions.InvalidSchema,
42 requests.exceptions.InvalidURL,
43 requests.exceptions.Timeout,
44 requests.exceptions.TooManyRedirects,
48 def post_json_api(domain: str, path: str, data: str = "", headers: dict = {}) -> dict:
49 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',data='{data}',headers()={len(headers)} - CALLED!")
50 if not isinstance(domain, str):
51 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
53 raise ValueError("Parameter 'domain' is empty")
54 elif domain.endswith(".tld"):
55 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
56 elif not isinstance(path, str):
57 raise ValueError(f"path[]='{type(path)}' is not 'str'")
59 raise ValueError("Parameter 'path' cannot be empty")
60 elif not isinstance(data, str):
61 raise ValueError(f"data[]='{type(data)}' is not 'str'")
62 elif not isinstance(headers, dict):
63 raise ValueError(f"headers[]='{type(headers)}' is not 'list'")
70 # DEBUG: print(f"DEBUG: Sending POST to domain='{domain}',path='{path}',data='{data}',headers({len(headers)})={headers}")
71 response = reqto.post(
72 f"https://{domain}{path}",
74 headers={**api_headers, **headers},
75 timeout=(config.get("connection_timeout"), config.get("read_timeout"))
78 json_reply["json"] = json_from_response(response)
80 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'")
81 if not response.ok or response.status_code >= 400:
82 print(f"WARNING: Cannot query JSON API: domain='{domain}',path='{path}',data()={len(data)},response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'")
83 json_reply["status_code"] = response.status_code
84 json_reply["error_message"] = response.reason
85 del json_reply["json"]
86 instances.set_last_error(domain, response)
88 except exceptions as exception:
89 # DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'")
90 json_reply["status_code"] = 999
91 json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'"
92 json_reply["exception"] = exception
93 instances.set_last_error(domain, exception)
96 # DEBUG: print(f"DEBUG: Returning json_reply({len(json_reply)})=[]:{type(json_reply)}")
99 def fetch_api_url(url: str, timeout: tuple) -> dict:
100 # DEBUG: print(f"DEBUG: url='{url}',timeout()={len(timeout)} - CALLED!")
101 if not isinstance(url, str):
102 raise ValueError(f"Parameter url[]='{type(url)}' is not 'str'")
103 elif not isinstance(timeout, tuple):
104 raise ValueError(f"timeout[]='{type(timeout)}' is not 'tuple'")
111 # DEBUG: print(f"DEBUG: Fetching url='{url}' ...")
112 response = fba.fetch_url(url, api_headers, timeout)
114 json_reply["json"] = json_from_response(response)
116 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'")
117 if not response.ok or response.status_code >= 400:
118 print(f"WARNING: Cannot query JSON API: url='{url}',response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'")
119 json_reply["status_code"] = response.status_code
120 json_reply["error_message"] = response.reason
121 del json_reply["json"]
123 except exceptions as exception:
124 # DEBUG: print(f"DEBUG: Fetching '{url}' failed. exception[{type(exception)}]='{str(exception)}'")
125 json_reply["status_code"] = 999
126 json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'"
127 json_reply["exception"] = exception
130 # DEBUG: print(f"DEBUG: Returning json_reply({len(json_reply)})=[]:{type(json_reply)}")
133 def get_json_api(domain: str, path: str, headers: dict, timeout: tuple) -> dict:
134 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',timeout()={len(timeout)} - CALLED!")
135 if not isinstance(domain, str):
136 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
138 raise ValueError("Parameter 'domain' is empty")
139 elif domain.endswith(".tld"):
140 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
141 elif not isinstance(path, str):
142 raise ValueError(f"path[]='{type(path)}' is not 'str'")
144 raise ValueError("Parameter 'path' cannot be empty")
145 elif not isinstance(headers, dict):
146 raise ValueError(f"headers[]='{type(headers)}' is not 'list'")
147 elif not isinstance(timeout, tuple):
148 raise ValueError(f"timeout[]='{type(timeout)}' is not 'tuple'")
155 # DEBUG: print(f"DEBUG: Sending GET to domain='{domain}',path='{path}',timeout({len(timeout)})={timeout}")
156 response = reqto.get(
157 f"https://{domain}{path}",
158 headers={**api_headers, **headers},
162 except exceptions as exception:
163 # DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'")
164 json_reply["status_code"] = 999
165 json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'"
166 json_reply["exception"] = exception
167 instances.set_last_error(domain, exception)
170 json_reply["json"] = json_from_response(response)
172 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'")
173 if not response.ok or response.status_code >= 400:
174 print(f"WARNING: Cannot query JSON API: domain='{domain}',path='{path}',response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'")
175 json_reply["status_code"] = response.status_code
176 json_reply["error_message"] = response.reason
177 del json_reply["json"]
178 instances.set_last_error(domain, response)
180 # DEBUG: print(f"DEBUG: Returning json_reply({len(json_reply)})=[]:{type(json_reply)}")
183 def send_bot_post(domain: str, blocklist: dict):
184 # DEBUG: print(f"DEBUG: domain='{domain}',blocklist()={len(blocklist)} - CALLED!")
185 if not isinstance(domain, str):
186 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
188 raise ValueError("Parameter 'domain' is empty")
189 elif domain.endswith(".tld"):
190 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
191 elif not isinstance(blocklist, dict):
192 raise ValueError(f"Parameter blocklist[]='{type(blocklist)}' is not 'dict'")
194 message = f"{domain} has blocked the following instances:\n\n"
197 if len(blocklist) > 20:
199 blocklist = blocklist[0 : 19]
201 # DEBUG: print(f"DEBUG: blocklist()={len(blocklist)}")
202 for block in blocklist:
203 # DEBUG: print(f"DEBUG: block['{type(block)}']={block}")
204 if block["reason"] is None or block["reason"] == '':
205 message = message + block["blocked"] + " with unspecified reason\n"
207 if len(block["reason"]) > 420:
208 block["reason"] = block["reason"][0:419] + "[…]"
210 message = message + block["blocked"] + ' for "' + block["reason"].replace("@", "@\u200b") + '"\n'
213 message = message + "(the list has been truncated to the first 20 entries)"
215 botheaders = {**api_headers, **{"Authorization": "Bearer " + config.get("bot_token")}}
218 f"{config.get('bot_instance')}/api/v1/statuses",
221 "visibility" : config.get('bot_visibility'),
222 "content_type": "text/plain"
230 def fetch_response(domain: str, path: str, headers: dict, timeout: tuple) -> requests.models.Response:
231 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',headers()={len(headers)},timeout={timeout} - CALLED!")
232 if not isinstance(domain, str):
233 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
235 raise ValueError("Parameter 'domain' is empty")
236 elif domain.endswith(".tld"):
237 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
238 elif not isinstance(path, str):
239 raise ValueError(f"Parameter path[]='{type(path)}' is not 'str'")
241 raise ValueError("Parameter 'path' is empty")
242 elif not isinstance(headers, dict):
243 raise ValueError(f"headers[]='{type(headers)}' is not 'dict'")
244 elif not isinstance(timeout, tuple):
245 raise ValueError(f"timeout[]='{type(timeout)}' is not 'tuple'")
248 # DEBUG: print(f"DEBUG: Sending GET request to '{domain}{path}' ...")
249 response = reqto.get(
250 f"https://{domain}{path}",
255 except exceptions as exception:
256 # DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'")
257 instances.set_last_error(domain, exception)
260 # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!")
263 def json_from_response(response: requests.models.Response) -> list:
264 # DEBUG: print(f"DEBUG: response[]='{type(response)}' - CALLED!")
265 if not isinstance(response, requests.models.Response):
266 raise ValueError(f"Parameter response[]='{type(response)}' is not type of 'Response'")
269 if response.text.strip() != "":
270 # DEBUG: print(f"DEBUG: response.text()={len(response.text)} is not empty, invoking response.json() ...")
272 data = response.json()
273 except json.decoder.JSONDecodeError:
276 # DEBUG: print(f"DEBUG: data[]='{type(data)}' - EXIT!")