1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
25 from fba.helpers import blacklist
26 from fba.helpers import config
27 from fba.helpers import tidyup
29 from fba.http import network
31 from fba.models import blocks
32 from fba.models import instances
36 "Silenced instances" : "Silenced servers",
37 "Suspended instances" : "Suspended servers",
38 "Limited instances" : "Limited servers",
39 "Filtered media" : "Filtered media",
40 # Mappuing German -> English
41 "Gesperrte Server" : "Suspended servers",
42 "Gefilterte Medien" : "Filtered media",
43 "Stummgeschaltete Server" : "Silenced servers",
45 "停止済みのサーバー" : "Suspended servers",
46 "制限中のサーバー" : "Limited servers",
47 "メディアを拒否しているサーバー": "Filtered media",
48 "サイレンス済みのサーバー" : "Silenced servers",
50 "שרתים מושעים" : "Suspended servers",
51 "מדיה מסוננת" : "Filtered media",
52 "שרתים מוגבלים" : "Silenced servers",
54 "Serveurs suspendus" : "Suspended servers",
55 "Médias filtrés" : "Filtered media",
56 "Serveurs limités" : "Limited servers",
57 "Serveurs modérés" : "Limited servers",
60 def fetch_blocks_from_about(domain: str) -> dict:
61 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
62 if not isinstance(domain, str):
63 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
65 raise ValueError("Parameter 'domain' is empty")
67 # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain)
69 for path in ["/about/more", "/about"]:
71 # DEBUG: print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...")
72 doc = bs4.BeautifulSoup(
73 network.fetch_response(
77 (config.get("connection_timeout"), config.get("read_timeout"))
82 if len(doc.find_all("h3")) > 0:
83 # DEBUG: print(f"DEBUG: path='{path}' had some headlines - BREAK!")
86 except network.exceptions as exception:
87 print(f"ERROR: Cannot fetch from domain='{domain}',exception='{type(exception)}'")
88 instances.set_last_error(domain, exception)
92 "Suspended servers": [],
93 "Filtered media" : [],
94 "Limited servers" : [],
95 "Silenced servers" : [],
98 # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
100 print(f"WARNING: Cannot fetch any /about pages for domain='{domain}' - EXIT!")
103 for header in doc.find_all("h3"):
104 header_text = tidyup.reason(header.text)
106 # DEBUG: print(f"DEBUG: header_text='{header_text}'")
107 if header_text in language_mapping:
108 # DEBUG: print(f"DEBUG: header_text='{header_text}'")
109 header_text = language_mapping[header_text]
111 print(f"WARNING: header_text='{header_text}' not found in language mapping table")
113 if header_text in blocklist or header_text.lower() in blocklist:
114 # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
115 for line in header.find_all_next("table")[0].find_all("tr")[1:]:
116 blocklist[header_text].append({
117 "domain": tidyup.domain(line.find("span").text),
118 "hash" : tidyup.domain(line.find("span")["title"][9:]),
119 "reason": tidyup.reason(line.find_all("td")[1].text),
122 print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}")
124 # DEBUG: print("DEBUG: Returning blocklist for domain:", domain)
126 "reject" : blocklist["Suspended servers"],
127 "media_removal" : blocklist["Filtered media"],
128 "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
131 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
132 # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
133 if not isinstance(domain, str):
134 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
136 raise ValueError("Parameter 'domain' is empty")
137 elif not isinstance(origin, str) and origin is not None:
138 raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
140 raise ValueError("Parameter 'origin' is empty")
141 elif not isinstance(nodeinfo_url, str):
142 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
143 elif nodeinfo_url == "":
144 raise ValueError("Parameter 'nodeinfo_url' is empty")
146 # No CSRF by default, you don't have to add network.api_headers by yourself here
150 # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
151 headers = csrf.determine(domain, dict())
152 except network.exceptions as exception:
153 print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!")
154 instances.set_last_error(domain, exception)
158 # json endpoint for newer mastodongs
159 found_blocks = list()
164 "media_removal" : [],
165 "followers_only": [],
166 "report_removal": [],
169 # DEBUG: print("DEBUG: Querying API domain_blocks:", domain)
170 data = network.get_json_api(
172 "/api/v1/instance/domain_blocks",
174 (config.get("connection_timeout"), config.get("read_timeout"))
177 # DEBUG: print(f"DEBUG: data[]='{type(data)}'")
178 if "error_message" in data:
179 # DEBUG: print(f"DEBUG: Was not able to fetch domain_blocks from domain='{domain}': status_code='{data['status_code']}',error_message='{data['error_message']}'")
180 instances.set_last_error(domain, data)
182 elif "json" in data and "error" in data["json"]:
183 print(f"WARNING: JSON API returned error message: '{data['json']['error']}'")
184 instances.set_last_error(domain, data)
188 blocklist = data["json"]
190 if len(blocklist) > 0:
191 print(f"INFO: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon' ...")
192 for block in blocklist:
194 # DEBUG: print(f"DEBUG: block[]='{type(block)}'")
195 if not isinstance(block, dict):
196 # DEBUG: print(f"DEBUG: block[]='{type(block)}' is of type 'dict' - SKIPPED!")
200 # DEBUG: print(f"DEBUG: block[{type(block)}]='{block}'")
202 "domain": block["domain"],
203 "hash" : block["digest"],
204 "reason": block["comment"] if "comment" in block else None
207 # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
208 if block['severity'] == 'suspend':
209 # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
210 rows['reject'].append(entry)
211 elif block['severity'] == 'silence':
212 # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
213 rows['followers_only'].append(entry)
214 elif block['severity'] == 'reject_media':
215 # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
216 rows['media_removal'].append(entry)
217 elif block['severity'] == 'reject_reports':
218 # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
219 rows['report_removal'].append(entry)
221 print(f"WARNING: Unknown severity='{block['severity']}', domain='{block['domain']}'")
223 # DEBUG: print(f"DEBUG: domain='{domain}' has returned zero rows, trying /about/more page ...")
224 rows = fetch_blocks_from_about(domain)
226 print(f"INFO: Checking {len(rows.items())} entries from domain='{domain}',software='mastodon' ...")
227 for block_level, blocklist in rows.items():
228 # DEBUG: print("DEBUG: domain,block_level,blocklist():", domain, block_level, len(blocklist))
229 block_level = tidyup.domain(block_level)
231 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
232 if block_level == "":
233 print("WARNING: block_level is empty, domain:", domain)
235 elif block_level == "accept":
236 # DEBUG: print(f"DEBUG: domain='{domain}' skipping block_level='accept'")
239 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon',block_level='{block_level}' ...")
240 for block in blocklist:
241 # DEBUG: print(f"DEBUG: block[]='{type(block)}'")
242 blocked, blocked_hash, reason = block.values()
243 # DEBUG: print(f"DEBUG: blocked='{blocked}',blocked_hash='{blocked_hash}',reason='{reason}':")
244 blocked = tidyup.domain(blocked)
245 reason = tidyup.reason(reason) if reason is not None and reason != "" else None
246 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
249 print("WARNING: blocked is empty:", domain)
251 elif blacklist.is_blacklisted(blocked):
252 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
254 elif blocked.count("*") > 0:
255 # Doing the hash search for instance names as well to tidy up DB
256 row = instances.deobscure("*", blocked, blocked_hash)
258 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
260 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
263 # DEBUG: print("DEBUG: Updating domain: ", row[0])
266 nodeinfo_url = row[2]
267 elif blocked.count("?") > 0:
268 # Doing the hash search for instance names as well to tidy up DB
269 row = instances.deobscure("?", blocked, blocked_hash)
271 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
273 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
276 # DEBUG: print("DEBUG: Updating domain: ", row[0])
279 nodeinfo_url = row[2]
281 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
282 if not validators.domain(blocked):
283 print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!")
285 elif blocked.endswith(".arpa"):
286 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
288 elif blocked.endswith(".tld"):
289 print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
291 elif blacklist.is_blacklisted(blocked):
292 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
294 elif not instances.is_registered(blocked):
295 # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
296 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
298 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
299 if not validators.domain(blocked):
300 print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!")
302 elif blocked.endswith(".arpa"):
303 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
305 elif blocked.endswith(".tld"):
306 print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
308 elif blacklist.is_blacklisted(blocked):
309 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
311 elif not instances.is_registered(blocked):
312 # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
313 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
315 if not blocks.is_instance_blocked(domain, blocked, block_level):
316 # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
317 blocks.add_instance(domain, blocked, reason, block_level)
319 if block_level == "reject":
320 found_blocks.append({
325 # DEBUG: print(f"DEBUG: Updating block last seen and reason for domain='{domain}',blocked='{blocked}' ...")
326 blocks.update_last_seen(domain, blocked, block_level)
327 blocks.update_reason(reason, domain, blocked, block_level)
329 # DEBUG: print("DEBUG: Committing changes ...")
330 fba.connection.commit()
331 except network.exceptions as exception:
332 print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
333 instances.set_last_error(domain, exception)
335 # DEBUG: print("DEBUG: EXIT!")