1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
24 from fba.helpers import config
25 from fba.helpers import dicts
26 from fba.helpers import tidyup
28 from fba.http import network
30 from fba.models import instances
32 logging.basicConfig(level=logging.INFO)
33 logger = logging.getLogger(__name__)
35 def fetch_peers(domain: str) -> list:
36 logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
37 if not isinstance(domain, str):
38 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
40 raise ValueError("Parameter 'domain' is empty")
41 elif domain.lower() != domain:
42 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
43 elif not validators.domain(domain.split("/")[0]):
44 raise ValueError(f"domain='{domain}' is not a valid domain")
45 elif domain.endswith(".arpa"):
46 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
47 elif domain.endswith(".tld"):
48 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
50 logger.debug(f"domain='{domain}' is misskey, sending API POST request ...")
53 step = config.get("misskey_limit")
55 # No CSRF by default, you don't have to add network.api_headers by yourself here
59 logger.debug("Checking CSRF for domain='%s'", domain)
60 headers = csrf.determine(domain, dict())
61 except network.exceptions as exception:
62 logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
63 instances.set_last_error(domain, exception)
66 # iterating through all "suspended" (follow-only in its terminology)
67 # instances page-by-page, since that troonware doesn't support
68 # sending them all at once
70 logger.debug(f"Fetching offset='{offset}' from '{domain}' ...")
72 fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
78 fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
86 logger.debug("fetched[]='%s'", type(fetched))
87 if "error_message" in fetched:
88 logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}")
89 instances.set_last_error(domain, fetched)
91 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
92 logger.warning(f"post_json_api() returned error: {fetched['error']['message']}")
93 instances.set_last_error(domain, fetched["json"]["error"]["message"])
96 rows = fetched["json"]
98 logger.debug(f"rows()={len(rows)}")
100 logger.debug(f"Returned zero bytes, exiting loop, domain='{domain}'")
102 elif len(rows) != config.get("misskey_limit"):
103 logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'")
104 offset = offset + (config.get("misskey_limit") - len(rows))
106 logger.debug(f"Raising offset by step={step}")
107 offset = offset + step
110 logger.debug(f"rows({len(rows)})[]='{type(rows)}'")
112 logger.debug(f"row()={len(row)}")
113 if "host" not in row:
114 logger.warning(f"row()={len(row)} does not contain key 'host': {row},domain='{domain}'")
116 elif not isinstance(row["host"], str):
117 logger.warning(f"row[host][]='{type(row['host'])}' is not 'str' - SKIPPED!")
119 elif not utils.is_domain_wanted(row["host"]):
120 logger.debug(f"row[host]='{row['host']}' is not wanted, domain='{domain}' - SKIPPED!")
122 elif row["host"] in peers:
123 logger.debug(f"Not adding row[host]='{row['host']}', already found.")
124 already = already + 1
127 logger.debug(f"Adding peer: '{row['host']}'")
128 peers.append(row["host"])
130 if already == len(rows):
131 logger.debug(f"Host returned same set of '{already}' instances, aborting loop!")
134 logger.debug(f"Adding '{len(peers)}' for domain='{domain}'")
135 instances.set_total_peers(domain, peers)
137 logger.debug(f"Returning peers[]='{type(peers)}'")
140 def fetch_blocks(domain: str) -> dict:
141 logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
142 if not isinstance(domain, str):
143 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
145 raise ValueError("Parameter 'domain' is empty")
146 elif domain.lower() != domain:
147 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
148 elif not validators.domain(domain.split("/")[0]):
149 raise ValueError(f"domain='{domain}' is not a valid domain")
150 elif domain.endswith(".arpa"):
151 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
152 elif domain.endswith(".tld"):
153 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
155 logger.debug(f"Fetching misskey blocks from domain='{domain}'")
162 step = config.get("misskey_limit")
164 # No CSRF by default, you don't have to add network.api_headers by yourself here
168 logger.debug("Checking CSRF for domain='%s'", domain)
169 headers = csrf.determine(domain, dict())
170 except network.exceptions as exception:
171 logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!")
172 instances.set_last_error(domain, exception)
175 # iterating through all "suspended" (follow-only in its terminology)
176 # instances page-by-page since it doesn't support sending them all at once
179 logger.debug(f"Fetching offset='{offset}' from '{domain}' ...")
181 logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset)
182 fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
189 logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset)
190 fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
195 "offset" : offset - 1
198 logger.debug("fetched[]='%s'", type(fetched))
199 if "error_message" in fetched:
200 logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}")
201 instances.set_last_error(domain, fetched)
203 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
204 logger.warning(f"post_json_api() returned error: {fetched['error']['message']}")
205 instances.set_last_error(domain, fetched["json"]["error"]["message"])
208 rows = fetched["json"]
210 logger.debug(f"rows({len(rows)})={rows} - suspend")
212 logger.debug("Returned zero bytes, exiting loop:", domain)
214 elif len(rows) != config.get("misskey_limit"):
215 logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'")
216 offset = offset + (config.get("misskey_limit") - len(rows))
218 logger.debug("Raising offset by step:", step)
219 offset = offset + step
222 for instance in rows:
224 logger.debug(f"instance[{type(instance)}]='{instance}' - suspend")
225 if "isSuspended" in instance and instance["isSuspended"] and not dicts.has_key(blocklist["suspended"], "domain", instance["host"]):
227 blocklist["suspended"].append({
228 "domain": tidyup.domain(instance["host"]),
229 # no reason field, nothing
233 logger.debug(f"count={count}")
235 logger.debug("API is no more returning new instances, aborting loop!")
238 except network.exceptions as exception:
239 logger.warning(f"Caught error, exiting loop: domain='{domain}',exception[{type(exception)}]='{str(exception)}'")
240 instances.set_last_error(domain, exception)
245 # Fetch blocked (full suspended) instances
248 logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset)
249 fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
256 logger.debug("Sending JSON API request to domain,step,offset:", domain, step, offset)
257 fetched = network.post_json_api(domain, "/api/federation/instances", json.dumps({
262 "offset" : offset - 1
265 logger.debug("fetched[]='%s'", type(fetched))
266 if "error_message" in fetched:
267 logger.warning(f"post_json_api() for domain='{domain}' returned error message: {fetched['error_message']}")
268 instances.set_last_error(domain, fetched)
270 elif isinstance(fetched["json"], dict) and "error" in fetched["json"] and "message" in fetched["json"]["error"]:
271 logger.warning(f"post_json_api() returned error: {fetched['error']['message']}")
272 instances.set_last_error(domain, fetched["json"]["error"]["message"])
275 rows = fetched["json"]
277 logger.debug(f"rows({len(rows)})={rows} - blocked")
279 logger.debug("Returned zero bytes, exiting loop:", domain)
281 elif len(rows) != config.get("misskey_limit"):
282 logger.debug(f"Fetched '{len(rows)}' row(s) but expected: '{config.get('misskey_limit')}'")
283 offset = offset + (config.get("misskey_limit") - len(rows))
285 logger.debug("Raising offset by step:", step)
286 offset = offset + step
289 for instance in rows:
291 logger.debug(f"instance[{type(instance)}]='{instance}' - blocked")
292 if "isBlocked" in instance and instance["isBlocked"] and not dicts.has_key(blocklist["blocked"], "domain", instance["host"]):
294 blocklist["blocked"].append({
295 "domain": tidyup.domain(instance["host"]),
299 logger.debug(f"count={count}")
301 logger.debug("API is no more returning new instances, aborting loop!")
304 except network.exceptions as exception:
305 logger.warning(f"Caught error, exiting loop: domain='{domain}',exception[{type(exception)}]='{str(exception)}'")
306 instances.set_last_error(domain, exception)
310 logger.debug(f"Returning for domain='{domain}',blocked()={len(blocklist['blocked'])},suspended()={len(blocklist['suspended'])}")
312 "reject" : blocklist["blocked"],
313 "followers_only": blocklist["suspended"]