1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
26 from fba.helpers import blacklist
27 from fba.helpers import config
28 from fba.helpers import tidyup
30 from fba.http import federation
31 from fba.http import network
33 from fba.models import blocks
34 from fba.models import instances
36 logging.basicConfig(level=logging.INFO)
37 logger = logging.getLogger(__name__)
39 def fetch_peers(domain: str) -> list:
40 logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
41 if not isinstance(domain, str):
42 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
44 raise ValueError("Parameter 'domain' is empty")
45 elif domain.lower() != domain:
46 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
47 elif not validators.domain(domain.split("/")[0]):
48 raise ValueError(f"domain='{domain}' is not a valid domain")
49 elif domain.endswith(".arpa"):
50 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
51 elif domain.endswith(".tld"):
52 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
56 # No CSRF by default, you don't have to add network.api_headers by yourself here
60 logger.debug(f"Checking CSRF for domain='{domain}'")
61 headers = csrf.determine(domain, dict())
62 except network.exceptions as exception:
63 logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
64 instances.set_last_error(domain, exception)
68 logger.debug(f"domain='{domain}' is Lemmy, fetching JSON ...")
69 data = network.get_json_api(
73 (config.get("connection_timeout"), config.get("read_timeout"))
76 logger.debug("data[]='%s'", type(data))
77 if "error_message" in data:
78 logger.warning("Could not reach any JSON API:", domain)
79 instances.set_last_error(domain, data)
80 elif "federated_instances" in data["json"]:
81 logger.debug(f"Found federated_instances for domain='{domain}'")
82 peers = peers + federation.add_peers(data["json"]["federated_instances"])
83 logger.debug("Added instance(s) to peers")
85 logger.warning("JSON response does not contain 'federated_instances':", domain)
86 instances.set_last_error(domain, data)
88 except network.exceptions as exception:
89 logger.warning(f"Exception during fetching JSON: domain='{domain}',exception[{type(exception)}]:'{str(exception)}'")
90 instances.set_last_error(domain, exception)
92 logger.debug(f"Adding '{len(peers)}' for domain='{domain}'")
93 instances.set_total_peers(domain, peers)
95 logger.debug("Returning peers[]:", type(peers))
98 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
99 logger.debug(f"domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
100 if not isinstance(domain, str):
101 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
103 raise ValueError("Parameter 'domain' is empty")
104 elif domain.lower() != domain:
105 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
106 elif not validators.domain(domain.split("/")[0]):
107 raise ValueError(f"domain='{domain}' is not a valid domain")
108 elif domain.endswith(".arpa"):
109 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
110 elif domain.endswith(".tld"):
111 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
112 elif not isinstance(origin, str) and origin is not None:
113 raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
115 raise ValueError("Parameter 'origin' is empty")
116 elif not isinstance(nodeinfo_url, str):
117 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
118 elif nodeinfo_url == "":
119 raise ValueError("Parameter 'nodeinfo_url' is empty")
123 "Instàncies bloquejades",
126 "Blokované instance",
127 "Geblokkeerde instanties",
128 "Blockerade instanser",
129 "Instàncias blocadas",
131 "Instances bloquées",
132 "Letiltott példányok",
133 "Instancias bloqueadas",
134 "Blokeatuta dauden instantziak",
136 "Peladen Yang Diblokir",
139 "Блокирани Инстанции",
140 "Blockierte Instanzen",
141 "Estetyt instanssit",
142 "Instâncias bloqueadas",
143 "Zablokowane instancje",
144 "Blokované inštancie",
146 "Užblokuoti serveriai",
148 "Блокированные Инстансы",
149 "Αποκλεισμένοι διακομιστές",
151 "Instâncias bloqueadas",
155 # json endpoint for newer mastodongs
156 found_blocks = list()
161 "media_removal" : [],
162 "followers_only": [],
163 "report_removal": [],
166 logger.debug(f"Fetching /instances from domain='{domain}'")
167 response = network.fetch_response(
171 (config.get("connection_timeout"), config.get("read_timeout"))
174 logger.debug(f"response.ok='{response.ok}',response.status_code={response.status_code},response.text()={len(response.text)}")
175 if response.ok and response.status_code < 300 and response.text != "":
176 logger.debug(f"Parsing {len(response.text)} Bytes ...")
178 doc = bs4.BeautifulSoup(response.text, "html.parser")
179 logger.debug(f"doc[]={type(doc)}")
181 headers = doc.findAll("h5")
183 logger.debug(f"Search in {len(headers)} header(s) ...")
184 for header in headers:
185 logger.debug(f"header[]={type(header)}")
186 content = header.contents[0]
188 logger.debug(f"content='{content}'")
189 if content in translations:
190 logger.debug("Found header with blocked instances - BREAK!")
194 logger.debug(f"found[]='{type(found)}'")
196 logger.debug(f"domain='{domain}' is not blocking any instances - EXIT!")
199 blocking = found.find_next("ul").findAll("a")
200 logger.debug(f"Found {len(blocking)} blocked instance(s) ...")
202 logger.debug(f"tag[]='{type(tag)}'")
203 blocked = tidyup.domain(tag.contents[0])
205 logger.debug(f"blocked='{blocked}'")
206 if not validators.domain(blocked):
207 logger.warning(f"blocked='{blocked}' is not a valid domain - SKIPPED!")
209 elif blocked.endswith(".arpa"):
210 logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
212 elif blocked.endswith(".tld"):
213 logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!")
215 elif blacklist.is_blacklisted(blocked):
216 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
218 elif not instances.is_registered(blocked):
219 logger.debug("Hash wasn't found, adding:", blocked, domain)
220 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
222 if not blocks.is_instance_blocked(domain, blocked, "reject"):
223 logger.debug("Blocking:", domain, blocked)
224 blocks.add_instance(domain, blocked, None, "reject")
226 found_blocks.append({
231 logger.debug(f"Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
232 blocks.update_last_seen(domain, blocked, "reject")
234 logger.debug("Committing changes ...")
235 fba.connection.commit()
236 except network.exceptions as exception:
237 logger.warning(f"domain='{domain}',exception[{type(exception)}]:'{str(exception)}'")
238 instances.set_last_error(domain, exception)
240 logger.debug("EXIT!")