1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 from fba import config
24 from fba.models import instances
26 # HTTP headers for non-API requests
28 "User-Agent": config.get("useragent"),
31 # HTTP headers for API requests
33 "User-Agent" : config.get("useragent"),
34 "Content-Type": "application/json",
37 # Exceptions to always catch
39 requests.exceptions.ChunkedEncodingError,
40 requests.exceptions.ConnectionError,
41 requests.exceptions.InvalidSchema,
42 requests.exceptions.Timeout,
43 requests.exceptions.TooManyRedirects,
47 def post_json_api(domain: str, path: str, data: str = "", headers: dict = {}) -> dict:
48 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',data='{data}',headers()={len(headers)} - CALLED!")
49 if not isinstance(domain, str):
50 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
52 raise ValueError("Parameter 'domain' is empty")
53 elif domain.endswith(".tld"):
54 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
55 elif not isinstance(path, str):
56 raise ValueError(f"path[]='{type(path)}' is not 'str'")
58 raise ValueError("Parameter 'path' cannot be empty")
59 elif not isinstance(data, str):
60 raise ValueError(f"data[]='{type(data)}' is not 'str'")
61 elif not isinstance(headers, dict):
62 raise ValueError(f"headers[]='{type(headers)}' is not 'list'")
69 # DEBUG: print(f"DEBUG: Sending POST to domain='{domain}',path='{path}',data='{data}',headers({len(headers)})={headers}")
70 response = reqto.post(
71 f"https://{domain}{path}",
73 headers={**api_headers, **headers},
74 timeout=(config.get("connection_timeout"), config.get("read_timeout"))
77 json_reply["json"] = json_from_response(response)
79 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'")
80 if not response.ok or response.status_code >= 400:
81 print(f"WARNING: Cannot query JSON API: domain='{domain}',path='{path}',data()={len(data)},response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'")
82 json_reply["status_code"] = response.status_code
83 json_reply["error_message"] = response.reason
84 del json_reply["json"]
85 instances.set_last_error(domain, response)
87 except exceptions as exception:
88 # DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'")
89 json_reply["status_code"] = 999
90 json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'"
91 json_reply["exception"] = exception
92 instances.set_last_error(domain, exception)
95 # DEBUG: print(f"DEBUG: Returning json_reply({len(json_reply)})=[]:{type(json_reply)}")
98 def fetch_api_url(url: str, timeout: tuple) -> dict:
99 # DEBUG: print(f"DEBUG: url='{url}',timeout()={len(timeout)} - CALLED!")
100 if not isinstance(url, str):
101 raise ValueError(f"Parameter url[]='{type(url)}' is not 'str'")
102 elif not isinstance(timeout, tuple):
103 raise ValueError(f"timeout[]='{type(timeout)}' is not 'tuple'")
110 # DEBUG: print(f"DEBUG: Fetching url='{url}' ...")
111 response = fba.fetch_url(url, api_headers, timeout)
113 json_reply["json"] = json_from_response(response)
115 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'")
116 if not response.ok or response.status_code >= 400:
117 print(f"WARNING: Cannot query JSON API: url='{url}',response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'")
118 json_reply["status_code"] = response.status_code
119 json_reply["error_message"] = response.reason
120 del json_reply["json"]
122 except exceptions as exception:
123 # DEBUG: print(f"DEBUG: Fetching '{url}' failed. exception[{type(exception)}]='{str(exception)}'")
124 json_reply["status_code"] = 999
125 json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'"
126 json_reply["exception"] = exception
129 # DEBUG: print(f"DEBUG: Returning json_reply({len(json_reply)})=[]:{type(json_reply)}")
132 def get_json_api(domain: str, path: str, headers: dict, timeout: tuple) -> dict:
133 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',timeout()={len(timeout)} - CALLED!")
134 if not isinstance(domain, str):
135 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
137 raise ValueError("Parameter 'domain' is empty")
138 elif domain.endswith(".tld"):
139 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
140 elif not isinstance(path, str):
141 raise ValueError(f"path[]='{type(path)}' is not 'str'")
143 raise ValueError("Parameter 'path' cannot be empty")
144 elif not isinstance(headers, dict):
145 raise ValueError(f"headers[]='{type(headers)}' is not 'list'")
146 elif not isinstance(timeout, tuple):
147 raise ValueError(f"timeout[]='{type(timeout)}' is not 'tuple'")
154 # DEBUG: print(f"DEBUG: Sending GET to domain='{domain}',path='{path}',timeout({len(timeout)})={timeout}")
155 response = reqto.get(
156 f"https://{domain}{path}",
157 headers={**api_headers, **headers},
161 except exceptions as exception:
162 # DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'")
163 json_reply["status_code"] = 999
164 json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'"
165 json_reply["exception"] = exception
166 instances.set_last_error(domain, exception)
169 json_reply["json"] = json_from_response(response)
171 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'")
172 if not response.ok or response.status_code >= 400:
173 print(f"WARNING: Cannot query JSON API: domain='{domain}',path='{path}',response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'")
174 json_reply["status_code"] = response.status_code
175 json_reply["error_message"] = response.reason
176 del json_reply["json"]
177 instances.set_last_error(domain, response)
179 # DEBUG: print(f"DEBUG: Returning json_reply({len(json_reply)})=[]:{type(json_reply)}")
182 def send_bot_post(domain: str, blocklist: dict):
183 # DEBUG: print(f"DEBUG: domain={domain},blocklist()={len(blocklist)} - CALLED!")
184 if not isinstance(domain, str):
185 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
187 raise ValueError("Parameter 'domain' is empty")
188 elif domain.endswith(".tld"):
189 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
190 elif not isinstance(blocklist, dict):
191 raise ValueError(f"Parameter blocklist[]='{type(blocklist)}' is not 'dict'")
193 message = f"{domain} has blocked the following instances:\n\n"
196 if len(blocklist) > 20:
198 blocklist = blocklist[0 : 19]
200 # DEBUG: print(f"DEBUG: blocklist()={len(blocklist)}")
201 for block in blocklist:
202 # DEBUG: print(f"DEBUG: block['{type(block)}']={block}")
203 if block["reason"] is None or block["reason"] == '':
204 message = message + block["blocked"] + " with unspecified reason\n"
206 if len(block["reason"]) > 420:
207 block["reason"] = block["reason"][0:419] + "[…]"
209 message = message + block["blocked"] + ' for "' + block["reason"].replace("@", "@\u200b") + '"\n'
212 message = message + "(the list has been truncated to the first 20 entries)"
214 botheaders = {**api_headers, **{"Authorization": "Bearer " + config.get("bot_token")}}
217 f"{config.get('bot_instance')}/api/v1/statuses",
220 "visibility" : config.get('bot_visibility'),
221 "content_type": "text/plain"
229 def fetch_response(domain: str, path: str, headers: dict, timeout: tuple) -> requests.models.Response:
230 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',headers()={len(headers)},timeout={timeout} - CALLED!")
231 if not isinstance(domain, str):
232 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
234 raise ValueError("Parameter 'domain' is empty")
235 elif domain.endswith(".tld"):
236 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
237 elif not isinstance(path, str):
238 raise ValueError(f"Parameter path[]='{type(path)}' is not 'str'")
240 raise ValueError("Parameter 'path' is empty")
241 elif not isinstance(headers, dict):
242 raise ValueError(f"headers[]='{type(headers)}' is not 'dict'")
243 elif not isinstance(timeout, tuple):
244 raise ValueError(f"timeout[]='{type(timeout)}' is not 'tuple'")
247 # DEBUG: print(f"DEBUG: Sending GET request to '{domain}{path}' ...")
248 response = reqto.get(
249 f"https://{domain}{path}",
254 except exceptions as exception:
255 # DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'")
256 instances.set_last_error(domain, exception)
259 # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!")
262 def json_from_response(response: requests.models.Response) -> list:
263 # DEBUG: print(f"DEBUG: response[]='{type(response)}' - CALLED!")
264 if not isinstance(response, requests.models.Response):
265 raise ValueError(f"Parameter response[]='{type(response)}' is not type of 'Response'")
268 if response.text.strip() != "":
269 # DEBUG: print(f"DEBUG: response.text()={len(response.text)} is not empty, invoking response.json() ...")
271 data = response.json()
272 except json.decoder.JSONDecodeError:
275 # DEBUG: print(f"DEBUG: data[]='{type(data)}' - EXIT!")