1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
22 from fba import blacklist
23 from fba import config
26 from fba import federation
27 from fba import network
29 from fba.models import blocks
30 from fba.models import instances
32 def fetch_peers(domain: str) -> list:
33 # DEBUG: print(f"DEBUG: domain({len(domain)})='{domain}',software='lemmy' - CALLED!")
34 if not isinstance(domain, str):
35 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
37 raise ValueError("Parameter 'domain' is empty")
41 # No CSRF by default, you don't have to add network.api_headers by yourself here
45 # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
46 headers = csrf.determine(domain, dict())
47 except network.exceptions as exception:
48 print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
49 instances.set_last_error(domain, exception)
53 # DEBUG: print(f"DEBUG: domain='{domain}' is Lemmy, fetching JSON ...")
54 data = network.get_json_api(
58 (config.get("connection_timeout"), config.get("read_timeout"))
61 # DEBUG: print(f"DEBUG: data[]='{type(data)}'")
62 if "error_message" in data:
63 print("WARNING: Could not reach any JSON API:", domain)
64 instances.set_last_error(domain, data)
65 elif "federated_instances" in data["json"]:
66 # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
67 peers = peers + federation.add_peers(data["json"]["federated_instances"])
68 # DEBUG: print("DEBUG: Added instance(s) to peers")
70 print("WARNING: JSON response does not contain 'federated_instances':", domain)
71 instances.set_last_error(domain, data)
73 except network.exceptions as exception:
74 print(f"WARNING: Exception during fetching JSON: domain='{domain}',exception[{type(exception)}]:'{str(exception)}'")
76 # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
77 instances.set_total_peers(domain, peers)
79 # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
82 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
83 # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
84 if not isinstance(domain, str):
85 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
87 raise ValueError("Parameter 'domain' is empty")
88 elif not isinstance(origin, str) and origin is not None:
89 raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
91 raise ValueError("Parameter 'origin' is empty")
92 elif not isinstance(nodeinfo_url, str):
93 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
94 elif nodeinfo_url == "":
95 raise ValueError("Parameter 'nodeinfo_url' is empty")
102 # json endpoint for newer mastodongs
103 found_blocks = list()
108 "media_removal" : [],
109 "followers_only": [],
110 "report_removal": [],
113 # DEBUG: print(f"DEBUG: Fetching /instances from domain='{domain}'")
114 response = network.fetch_response(
118 (config.get("connection_timeout"), config.get("read_timeout"))
121 # DEBUG: print(f"DEBUG: response.ok='{response.ok}',response.status_code={response.status_code},response.text()={len(response.text)}")
122 if response.ok and response.status_code < 300 and response.text != "":
123 # DEBUG: print(f"DEBUG: Parsing {len(response.text)} Bytes ...")
125 doc = bs4.BeautifulSoup(response.text, "html.parser")
126 # DEBUG: print(f"DEBUG: doc[]={type(doc)}")
128 headers = doc.findAll("h5")
130 # DEBUG: print(f"DEBUG: Search in {len(headers)} header(s) ...")
131 for header in headers:
132 # DEBUG: print(f"DEBUG: header[]={type(header)}")
133 content = header.contents[0]
135 # DEBUG: print(f"DEBUG: content='{content}'")
136 if content.lower() in translations:
137 # DEBUG: print("DEBUG: Found header with blocked instances - BREAK!")
141 # DEBUG: print(f"DEBUG: found[]='{type(found)}'")
143 # DEBUG: print(f"DEBUG: domain='{domain}' is not blocking any instances - EXIT!")
146 blocking = found.find_next("ul").findAll("a")
147 # DEBUG: print(f"DEBUG: Found {len(blocking)} blocked instance(s) ...")
149 # DEBUG: print(f"DEBUG: tag[]='{type(tag)}'")
150 blocked = tag.contents[0]
152 # DEBUG: print(f"DEBUG: blocked='{blocked}'")
153 if not validators.domain(blocked):
154 # DEBUG: print(f"DEBUG: blocked='{blocked}' is not a valid domain - SKIPPED!")
156 elif blacklist.is_blacklisted(blocked):
157 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
159 elif blocked.endswith(".arpa"):
160 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
162 elif blocked.endswith(".tld"):
163 print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
165 elif not instances.is_registered(blocked):
166 # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
167 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
169 if not blocks.is_instance_blocked(domain, blocked, "reject"):
170 # DEBUG: print("DEBUG: Blocking:", domain, blocked)
171 blocks.add_instance(domain, blocked, None, "reject")
173 found_blocks.append({
178 # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
179 blocks.update_last_seen(domain, blocked, "reject")
181 # DEBUG: print("DEBUG: Committing changes ...")
182 fba.connection.commit()
183 except network.exceptions as exception:
184 print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
186 # DEBUG: print("DEBUG: EXIT!")