1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
22 from fba import blacklist
23 from fba import blocks
24 from fba import config
27 from fba import instances
28 from fba import network
30 from fba.helpers import tidyup
34 "Silenced instances" : "Silenced servers",
35 "Suspended instances" : "Suspended servers",
36 "Limited instances" : "Limited servers",
37 "Filtered media" : "Filtered media",
38 # Mappuing German -> English
39 "Gesperrte Server" : "Suspended servers",
40 "Gefilterte Medien" : "Filtered media",
41 "Stummgeschaltete Server" : "Silenced servers",
43 "停止済みのサーバー" : "Suspended servers",
44 "制限中のサーバー" : "Limited servers",
45 "メディアを拒否しているサーバー": "Filtered media",
46 "サイレンス済みのサーバー" : "Silenced servers",
48 "שרתים מושעים" : "Suspended servers",
49 "מדיה מסוננת" : "Filtered media",
50 "שרתים מוגבלים" : "Silenced servers",
52 "Serveurs suspendus" : "Suspended servers",
53 "Médias filtrés" : "Filtered media",
54 "Serveurs limités" : "Limited servers",
55 "Serveurs modérés" : "Limited servers",
58 def fetch_blocks_from_about(domain: str) -> dict:
59 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
60 if not isinstance(domain, str):
61 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
63 raise ValueError("Parameter 'domain' is empty")
65 # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain)
67 "Suspended servers": [],
68 "Filtered media" : [],
69 "Limited servers" : [],
70 "Silenced servers" : [],
74 for path in ("/about/more", "/about"):
76 # DEBUG: print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...")
77 doc = bs4.BeautifulSoup(
78 network.fetch_response(
82 (config.get("connection_timeout"), config.get("read_timeout"))
87 if len(doc.find_all("h3")) > 0:
88 # DEBUG: print(f"DEBUG: path='{path}' had some headlines - BREAK!")
91 except BaseException as exception:
92 print("ERROR: Cannot fetch from domain:", domain, exception)
93 instances.update_last_error(domain, exception)
96 # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
98 print(f"WARNING: Cannot find any 'h3' tags for domain='{domain}' - EXIT!")
101 for header in doc.find_all("h3"):
102 header_text = tidyup.reason(header.text)
104 # DEBUG: print(f"DEBUG: header_text='{header_text}'")
105 if header_text in language_mapping:
106 # DEBUG: print(f"DEBUG: header_text='{header_text}'")
107 header_text = language_mapping[header_text]
109 print(f"WARNING: header_text='{header_text}' not found in language mapping table")
111 if header_text in blocklist or header_text.lower() in blocklist:
112 # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
113 for line in header.find_all_next("table")[0].find_all("tr")[1:]:
114 blocklist[header_text].append(
116 "domain": tidyup.domain(line.find("span").text),
117 "hash" : tidyup.domain(line.find("span")["title"][9:]),
118 "reason": tidyup.reason(line.find_all("td")[1].text),
122 print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}")
124 # DEBUG: print("DEBUG: Returning blocklist for domain:", domain)
126 "reject" : blocklist["Suspended servers"],
127 "media_removal" : blocklist["Filtered media"],
128 "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
131 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
132 # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
133 if not isinstance(domain, str):
134 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
136 raise ValueError("Parameter 'domain' is empty")
137 elif not isinstance(origin, str) and origin is not None:
138 raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
140 raise ValueError("Parameter 'origin' is empty")
141 elif not isinstance(nodeinfo_url, str):
142 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
143 elif nodeinfo_url == "":
144 raise ValueError("Parameter 'nodeinfo_url' is empty")
146 # No CSRF by default, you don't have to add network.api_headers by yourself here
150 # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
151 headers = csrf.determine(domain, dict())
152 except network.exceptions as exception:
153 print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!")
157 # json endpoint for newer mastodongs
158 found_blocks = list()
163 "media_removal" : [],
164 "followers_only": [],
165 "report_removal": [],
168 # DEBUG: print("DEBUG: Querying API domain_blocks:", domain)
169 data = network.get_json_api(
171 "/api/v1/instance/domain_blocks",
173 (config.get("connection_timeout"), config.get("read_timeout"))
176 if "error_message" in data:
177 # DEBUG: print(f"DEBUG: Was not able to fetch domain_blocks from domain='{domain}': status_code='{data['status_code']}',error_message='{data['error_message']}'")
178 instances.update_last_error(domain, data)
180 elif "json" in data and "error" in data["json"]:
181 print(f"WARNING: JSON API returned error message: '{data['json']['error']}'")
182 instances.update_last_error(domain, data)
186 blocklist = data["json"]
188 if len(blocklist) > 0:
189 print(f"INFO: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon' ...")
190 for block in blocklist:
192 # DEBUG: print(f"DEBUG: block[{type(block)}]='{block}'")
194 "domain": block["domain"],
195 "hash" : block["digest"],
196 "reason": block["comment"] if "comment" in block else None
199 # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
200 if block['severity'] == 'suspend':
201 # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
202 rows['reject'].append(entry)
203 elif block['severity'] == 'silence':
204 # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
205 rows['followers_only'].append(entry)
206 elif block['severity'] == 'reject_media':
207 # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
208 rows['media_removal'].append(entry)
209 elif block['severity'] == 'reject_reports':
210 # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
211 rows['report_removal'].append(entry)
213 print("WARNING: Unknown severity:", block['severity'], block['domain'])
215 # DEBUG: print(f"DEBUG: domain='{domain}' has returned zero rows, trying /about/more page ...")
216 rows = fetch_blocks_from_about(domain)
218 print(f"INFO: Checking {len(rows.items())} entries from domain='{domain}',software='mastodon' ...")
219 for block_level, blocklist in rows.items():
220 # DEBUG: print("DEBUG: domain,block_level,blocklist():", domain, block_level, len(blocklist))
221 block_level = tidyup.domain(block_level)
223 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
224 if block_level == "":
225 print("WARNING: block_level is empty, domain:", domain)
228 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon',block_level='{block_level}' ...")
229 for block in blocklist:
230 # DEBUG: print(f"DEBUG: block[]='{type(block)}'")
231 blocked, blocked_hash, reason = block.values()
232 # DEBUG: print(f"DEBUG: blocked='{blocked}',blocked_hash='{blocked_hash}',reason='{reason}':")
233 blocked = tidyup.domain(blocked)
234 reason = tidyup.reason(reason) if reason is not None and reason != "" else None
235 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
238 print("WARNING: blocked is empty:", domain)
240 elif blacklist.is_blacklisted(blocked):
241 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
243 elif blocked.count("*") > 0:
244 # Doing the hash search for instance names as well to tidy up DB
246 "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash]
248 searchres = fba.cursor.fetchone()
250 # DEBUG: print(f"DEBUG: searchres[]='{type(searchres)}'")
251 if searchres is None:
252 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
255 # DEBUG: print("DEBUG: Updating domain: ", searchres[0])
256 blocked = searchres[0]
257 origin = searchres[1]
258 nodeinfo_url = searchres[2]
260 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
261 if not validators.domain(blocked):
262 print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
264 elif blocked.split(".")[-1] == "arpa":
265 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
267 elif not instances.is_registered(blocked):
268 # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
269 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
270 elif not validators.domain(blocked):
271 print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
274 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
275 if not validators.domain(blocked):
276 print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
278 elif blocked.split(".")[-1] == "arpa":
279 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
281 elif not instances.is_registered(blocked):
282 # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
283 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
285 blocking = blocked if blocked.count("*") <= 1 else blocked_hash
286 # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'")
288 if not blocks.is_instance_blocked(domain, blocked, block_level):
289 # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
290 blocks.add_instance(domain, blocking, reason, block_level)
292 if block_level == "reject":
293 found_blocks.append({
298 # DEBUG: print(f"DEBUG: Updating block last seen and reason for domain='{domain}',blocking='{blocking}' ...")
299 blocks.update_last_seen(domain, blocking, block_level)
300 blocks.update_reason(reason, domain, blocking, block_level)
302 # DEBUG: print("DEBUG: Committing changes ...")
303 fba.connection.commit()
304 except network.exceptions as exception:
305 print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
307 # DEBUG: print("DEBUG: EXIT!")