1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 from fba import config
24 from fba.models import instances
26 # HTTP headers for non-API requests
28 "User-Agent": config.get("useragent"),
31 # HTTP headers for API requests
33 "User-Agent" : config.get("useragent"),
34 "Content-Type": "application/json",
37 # Exceptions to always catch
39 requests.exceptions.ChunkedEncodingError,
40 requests.exceptions.ConnectionError,
41 requests.exceptions.Timeout,
42 requests.exceptions.TooManyRedirects,
46 def post_json_api(domain: str, path: str, data: str = "", headers: dict = {}) -> dict:
47 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',data='{data}',headers()={len(headers)} - CALLED!")
48 if not isinstance(domain, str):
49 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
51 raise ValueError("Parameter 'domain' is empty")
52 elif domain.endswith(".tld"):
53 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
54 elif not isinstance(path, str):
55 raise ValueError(f"path[]='{type(path)}' is not 'str'")
57 raise ValueError("Parameter 'path' cannot be empty")
58 elif not isinstance(data, str):
59 raise ValueError(f"data[]='{type(data)}' is not 'str'")
60 elif not isinstance(headers, dict):
61 raise ValueError(f"headers[]='{type(headers)}' is not 'list'")
68 # DEBUG: print(f"DEBUG: Sending POST to domain='{domain}',path='{path}',data='{data}',headers({len(headers)})={headers}")
69 response = reqto.post(
70 f"https://{domain}{path}",
72 headers={**api_headers, **headers},
73 timeout=(config.get("connection_timeout"), config.get("read_timeout"))
76 json_reply["json"] = json_from_response(response)
78 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'")
79 if not response.ok or response.status_code >= 400:
80 print(f"WARNING: Cannot query JSON API: domain='{domain}',path='{path}',data()={len(data)},response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'")
81 json_reply["status_code"] = response.status_code
82 json_reply["error_message"] = response.reason
83 del json_reply["json"]
84 instances.set_last_error(domain, response)
86 except exceptions as exception:
87 # DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'")
88 json_reply["status_code"] = 999
89 json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'"
90 json_reply["exception"] = exception
91 instances.set_last_error(domain, exception)
94 # DEBUG: print(f"DEBUG: Returning json_reply({len(json_reply)})=[]:{type(json_reply)}")
97 def fetch_api_url(url: str, timeout: tuple) -> dict:
98 # DEBUG: print(f"DEBUG: url='{url}',timeout()={len(timeout)} - CALLED!")
99 if not isinstance(url, str):
100 raise ValueError(f"Parameter url[]='{type(url)}' is not 'str'")
101 elif not isinstance(timeout, tuple):
102 raise ValueError(f"timeout[]='{type(timeout)}' is not 'tuple'")
109 # DEBUG: print(f"DEBUG: Fetching url='{url}' ...")
110 response = fba.fetch_url(url, api_headers, timeout)
112 json_reply["json"] = json_from_response(response)
114 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'")
115 if not response.ok or response.status_code >= 400:
116 print(f"WARNING: Cannot query JSON API: url='{url}',response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'")
117 json_reply["status_code"] = response.status_code
118 json_reply["error_message"] = response.reason
119 del json_reply["json"]
121 except exceptions as exception:
122 # DEBUG: print(f"DEBUG: Fetching '{url}' failed. exception[{type(exception)}]='{str(exception)}'")
123 json_reply["status_code"] = 999
124 json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'"
125 json_reply["exception"] = exception
128 # DEBUG: print(f"DEBUG: Returning json_reply({len(json_reply)})=[]:{type(json_reply)}")
131 def get_json_api(domain: str, path: str, headers: dict, timeout: tuple) -> dict:
132 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',timeout()={len(timeout)} - CALLED!")
133 if not isinstance(domain, str):
134 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
136 raise ValueError("Parameter 'domain' is empty")
137 elif domain.endswith(".tld"):
138 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
139 elif not isinstance(path, str):
140 raise ValueError(f"path[]='{type(path)}' is not 'str'")
142 raise ValueError("Parameter 'path' cannot be empty")
143 elif not isinstance(headers, dict):
144 raise ValueError(f"headers[]='{type(headers)}' is not 'list'")
145 elif not isinstance(timeout, tuple):
146 raise ValueError(f"timeout[]='{type(timeout)}' is not 'tuple'")
153 # DEBUG: print(f"DEBUG: Sending GET to domain='{domain}',path='{path}',timeout({len(timeout)})={timeout}")
154 response = reqto.get(
155 f"https://{domain}{path}",
156 headers={**api_headers, **headers},
160 except exceptions as exception:
161 # DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'")
162 json_reply["status_code"] = 999
163 json_reply["error_message"] = f"exception['{type(exception)}']='{str(exception)}'"
164 json_reply["exception"] = exception
165 instances.set_last_error(domain, exception)
168 json_reply["json"] = json_from_response(response)
170 # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},json_reply[]='{type(json_reply)}'")
171 if not response.ok or response.status_code >= 400:
172 print(f"WARNING: Cannot query JSON API: domain='{domain}',path='{path}',response.status_code='{response.status_code}',json_reply[]='{type(json_reply)}'")
173 json_reply["status_code"] = response.status_code
174 json_reply["error_message"] = response.reason
175 del json_reply["json"]
176 instances.set_last_error(domain, response)
178 # DEBUG: print(f"DEBUG: Returning json_reply({len(json_reply)})=[]:{type(json_reply)}")
181 def send_bot_post(domain: str, blocklist: dict):
182 # DEBUG: print(f"DEBUG: domain={domain},blocklist()={len(blocklist)} - CALLED!")
183 if not isinstance(domain, str):
184 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
186 raise ValueError("Parameter 'domain' is empty")
187 elif domain.endswith(".tld"):
188 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
189 elif not isinstance(blocklist, dict):
190 raise ValueError(f"Parameter blocklist[]='{type(blocklist)}' is not 'dict'")
192 message = f"{domain} has blocked the following instances:\n\n"
195 if len(blocklist) > 20:
197 blocklist = blocklist[0 : 19]
199 # DEBUG: print(f"DEBUG: blocklist()={len(blocklist)}")
200 for block in blocklist:
201 # DEBUG: print(f"DEBUG: block['{type(block)}']={block}")
202 if block["reason"] is None or block["reason"] == '':
203 message = message + block["blocked"] + " with unspecified reason\n"
205 if len(block["reason"]) > 420:
206 block["reason"] = block["reason"][0:419] + "[…]"
208 message = message + block["blocked"] + ' for "' + block["reason"].replace("@", "@\u200b") + '"\n'
211 message = message + "(the list has been truncated to the first 20 entries)"
213 botheaders = {**api_headers, **{"Authorization": "Bearer " + config.get("bot_token")}}
216 f"{config.get('bot_instance')}/api/v1/statuses",
219 "visibility" : config.get('bot_visibility'),
220 "content_type": "text/plain"
228 def fetch_response(domain: str, path: str, headers: dict, timeout: tuple) -> requests.models.Response:
229 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}',headers()={len(headers)},timeout={timeout} - CALLED!")
230 if not isinstance(domain, str):
231 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
233 raise ValueError("Parameter 'domain' is empty")
234 elif domain.endswith(".tld"):
235 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
236 elif not isinstance(path, str):
237 raise ValueError(f"Parameter path[]='{type(path)}' is not 'str'")
239 raise ValueError("Parameter 'path' is empty")
240 elif not isinstance(headers, dict):
241 raise ValueError(f"headers[]='{type(headers)}' is not 'dict'")
242 elif not isinstance(timeout, tuple):
243 raise ValueError(f"timeout[]='{type(timeout)}' is not 'tuple'")
246 # DEBUG: print(f"DEBUG: Sending GET request to '{domain}{path}' ...")
247 response = reqto.get(
248 f"https://{domain}{path}",
253 except exceptions as exception:
254 # DEBUG: print(f"DEBUG: Fetching '{path}' from '{domain}' failed. exception[{type(exception)}]='{str(exception)}'")
255 instances.set_last_error(domain, exception)
258 # DEBUG: print(f"DEBUG: response[]='{type(response)}' - EXXIT!")
261 def json_from_response(response: requests.models.Response) -> list:
262 # DEBUG: print(f"DEBUG: response[]='{type(response)}' - CALLED!")
263 if not isinstance(response, requests.models.Response):
264 raise ValueError(f"Parameter response[]='{type(response)}' is not type of 'Response'")
267 if response.text.strip() != "":
268 # DEBUG: print(f"DEBUG: response.text()={len(response.text)} is not empty, invoking response.json() ...")
270 data = response.json()
271 except json.decoder.JSONDecodeError:
274 # DEBUG: print(f"DEBUG: data[]='{type(data)}' - EXIT!")