1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
23 from fba.helpers import blacklist
24 from fba.helpers import config
25 from fba.helpers import dicts
26 from fba.helpers import tidyup
28 from fba.http import network
30 from fba.models import instances
32 logging.basicConfig(level=logging.INFO)
33 logger = logging.getLogger(__name__)
35 def fetch_peers(domain: str) -> list:
36 logger.debug(f"domain({len(domain)})='{domain}' - CALLED!")
37 if not isinstance(domain, str):
38 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
40 raise ValueError("Parameter 'domain' is empty")
41 elif domain.lower() != domain:
42 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
43 elif not validators.domain(domain.split("/")[0]):
44 raise ValueError(f"domain='{domain}' is not a valid domain")
45 elif domain.endswith(".arpa"):
46 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
47 elif domain.endswith(".tld"):
48 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
50 logger.debug(f"domain='{domain}' is misskey, sending API POST request ...")
53 step = config.get("misskey_limit")
55 # No CSRF by default, you don't have to add network.api_headers by yourself here
59 logger.debug(f"Checking CSRF for domain='{domain}'")
60 headers = csrf.determine(domain, dict())
61 except network.exceptions as exception:
62 logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
63 instances.set_last_error(domain, exception)
66 # iterating through all "suspended" (follow-only in its terminology)
67 # instances page-by-page, since that troonware doesn't support
68 # sending them all at once
70 logger.debug(f"Fetching offset='{offset}' from '{domain}' ...")
72 fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
78 fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
86 logger.debug(f"fetched[]='{type(fetched)}'")
87 if "error_message" in fetched:
88 logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}")
89 instances.set_last_error(domain, fetched)
91 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
92 logger.warning(f"post_json_api() returned error: {fetched['error']['message']}")
93 instances.set_last_error(domain, fetched["json"]["error"]["message"])
96 rows = fetched["json"]
98 logger.debug(f"rows()={len(rows)}")
100 logger.debug(f"Returned zero bytes, exiting loop, domain='{domain}'")
102 elif len(rows) != config.get("misskey_limit"):
103 logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'")
104 offset = offset + (config.get("misskey_limit") - len(rows))
106 logger.debug(f"Raising offset by step={step}")
107 offset = offset + step
110 logger.debug(f"rows({len(rows)})[]='{type(rows)}'")
112 logger.debug(f"row()={len(row)}")
113 if "host" not in row:
114 logger.warning(f"row()={len(row)} does not contain key 'host': {row},domain='{domain}'")
116 elif not isinstance(row["host"], str):
117 logger.warning(f"row[host][]='{type(row['host'])}' is not 'str' - SKIPPED!")
119 elif not validators.domain(row["host"].split("/")[0]):
120 logger.warning(f"row[host]='{row['host']}' is not a valid domain - SKIPPED!")
122 elif row["host"].endswith(".arpa"):
123 logger.warning(f"row[host]='{row['host']}' is a domain for reversed IP addresses - SKIPPED!")
125 elif row["host"].endswith(".tld"):
126 logger.warning(f"row[host]='{row['host']}' is a fake domain - SKIPPED!")
128 elif blacklist.is_blacklisted(row["host"]):
129 logger.debug(f"row[host]='{row['host']}' is blacklisted. domain='{domain}' - SKIPPED!")
131 elif row["host"] in peers:
132 logger.debug(f"Not adding row[host]='{row['host']}', already found.")
133 already = already + 1
136 logger.debug(f"Adding peer: '{row['host']}'")
137 peers.append(row["host"])
139 if already == len(rows):
140 logger.debug(f"Host returned same set of '{already}' instances, aborting loop!")
143 logger.debug(f"Adding '{len(peers)}' for domain='{domain}'")
144 instances.set_total_peers(domain, peers)
146 logger.debug(f"Returning peers[]='{type(peers)}'")
149 def fetch_blocks(domain: str) -> dict:
150 logger.debug(f"domain='{domain}' - CALLED!")
151 if not isinstance(domain, str):
152 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
154 raise ValueError("Parameter 'domain' is empty")
155 elif domain.lower() != domain:
156 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
157 elif not validators.domain(domain.split("/")[0]):
158 raise ValueError(f"domain='{domain}' is not a valid domain")
159 elif domain.endswith(".arpa"):
160 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
161 elif domain.endswith(".tld"):
162 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
164 logger.debug(f"Fetching misskey blocks from domain='{domain}'")
171 step = config.get("misskey_limit")
173 # No CSRF by default, you don't have to add network.api_headers by yourself here
177 logger.debug(f"Checking CSRF for domain='{domain}'")
178 headers = csrf.determine(domain, dict())
179 except network.exceptions as exception:
180 logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!")
181 instances.set_last_error(domain, exception)
184 # iterating through all "suspended" (follow-only in its terminology)
185 # instances page-by-page since it doesn't support sending them all at once
188 logger.debug(f"Fetching offset='{offset}' from '{domain}' ...")
190 logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset)
191 fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
198 logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset)
199 fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
204 "offset" : offset - 1
207 logger.debug(f"fetched[]='{type(fetched)}'")
208 if "error_message" in fetched:
209 logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}")
210 instances.set_last_error(domain, fetched)
212 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
213 logger.warning(f"post_json_api() returned error: {fetched['error']['message']}")
214 instances.set_last_error(domain, fetched["json"]["error"]["message"])
217 rows = fetched["json"]
219 logger.debug(f"rows({len(rows)})={rows} - suspend")
221 logger.debug("Returned zero bytes, exiting loop:", domain)
223 elif len(rows) != config.get("misskey_limit"):
224 logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'")
225 offset = offset + (config.get("misskey_limit") - len(rows))
227 logger.debug("Raising offset by step:", step)
228 offset = offset + step
231 for instance in rows:
233 logger.debug(f"instance[{type(instance)}]='{instance}' - suspend")
234 if "isSuspended" in instance and instance["isSuspended"] and not dicts.has_key(blocklist["suspended"], "domain", instance["host"]):
236 blocklist["suspended"].append({
237 "domain": tidyup.domain(instance["host"]),
238 # no reason field, nothing
242 logger.debug(f"count={count}")
244 logger.debug("API is no more returning new instances, aborting loop!")
247 except network.exceptions as exception:
248 logger.warning(f"Caught error, exiting loop: domain='{domain}',exception[{type(exception)}]='{str(exception)}'")
249 instances.set_last_error(domain, exception)
254 # Fetch blocked (full suspended) instances
257 logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset)
258 fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
265 logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset)
266 fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
271 "offset" : offset - 1
274 logger.debug(f"fetched[]='{type(fetched)}'")
275 if "error_message" in fetched:
276 logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}")
277 instances.set_last_error(domain, fetched)
279 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
280 logger.warning(f"post_json_api() returned error: {fetched['error']['message']}")
281 instances.set_last_error(domain, fetched["json"]["error"]["message"])
284 rows = fetched["json"]
286 logger.debug(f"rows({len(rows)})={rows} - blocked")
288 logger.debug("Returned zero bytes, exiting loop:", domain)
290 elif len(rows) != config.get("misskey_limit"):
291 logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'")
292 offset = offset + (config.get("misskey_limit") - len(rows))
294 logger.debug("Raising offset by step:", step)
295 offset = offset + step
298 for instance in rows:
300 logger.debug(f"instance[{type(instance)}]='{instance}' - blocked")
301 if "isBlocked" in instance and instance["isBlocked"] and not dicts.has_key(blocklist["blocked"], "domain", instance["host"]):
303 blocklist["blocked"].append({
304 "domain": tidyup.domain(instance["host"]),
308 logger.debug(f"count={count}")
310 logger.debug("API is no more returning new instances, aborting loop!")
313 except network.exceptions as exception:
314 logger.warning(f"Caught error, exiting loop: domain='{domain}',exception[{type(exception)}]='{str(exception)}'")
315 instances.set_last_error(domain, exception)
319 logger.debug(f"Returning for domain='{domain}',blocked()={len(blocklist['blocked'])},suspended()={len(blocklist['suspended'])}")
321 "reject" : blocklist["blocked"],
322 "followers_only": blocklist["suspended"]