1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
24 from fba import network
26 from fba.helpers import blacklist
27 from fba.helpers import config
28 from fba.helpers import tidyup
30 from fba.models import blocks
31 from fba.models import instances
35 "Silenced instances" : "Silenced servers",
36 "Suspended instances" : "Suspended servers",
37 "Limited instances" : "Limited servers",
38 "Filtered media" : "Filtered media",
39 # Mappuing German -> English
40 "Gesperrte Server" : "Suspended servers",
41 "Gefilterte Medien" : "Filtered media",
42 "Stummgeschaltete Server" : "Silenced servers",
44 "停止済みのサーバー" : "Suspended servers",
45 "制限中のサーバー" : "Limited servers",
46 "メディアを拒否しているサーバー": "Filtered media",
47 "サイレンス済みのサーバー" : "Silenced servers",
49 "שרתים מושעים" : "Suspended servers",
50 "מדיה מסוננת" : "Filtered media",
51 "שרתים מוגבלים" : "Silenced servers",
53 "Serveurs suspendus" : "Suspended servers",
54 "Médias filtrés" : "Filtered media",
55 "Serveurs limités" : "Limited servers",
56 "Serveurs modérés" : "Limited servers",
59 def fetch_blocks_from_about(domain: str) -> dict:
60 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
61 if not isinstance(domain, str):
62 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
64 raise ValueError("Parameter 'domain' is empty")
66 # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain)
68 for path in ["/about/more", "/about"]:
70 # DEBUG: print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...")
71 doc = bs4.BeautifulSoup(
72 network.fetch_response(
76 (config.get("connection_timeout"), config.get("read_timeout"))
81 if len(doc.find_all("h3")) > 0:
82 # DEBUG: print(f"DEBUG: path='{path}' had some headlines - BREAK!")
85 except network.exceptions as exception:
86 print(f"ERROR: Cannot fetch from domain='{domain}',exception='{type(exception)}'")
87 instances.set_last_error(domain, exception)
91 "Suspended servers": [],
92 "Filtered media" : [],
93 "Limited servers" : [],
94 "Silenced servers" : [],
97 # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
99 print(f"WARNING: Cannot fetch any /about pages for domain='{domain}' - EXIT!")
102 for header in doc.find_all("h3"):
103 header_text = tidyup.reason(header.text)
105 # DEBUG: print(f"DEBUG: header_text='{header_text}'")
106 if header_text in language_mapping:
107 # DEBUG: print(f"DEBUG: header_text='{header_text}'")
108 header_text = language_mapping[header_text]
110 print(f"WARNING: header_text='{header_text}' not found in language mapping table")
112 if header_text in blocklist or header_text.lower() in blocklist:
113 # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
114 for line in header.find_all_next("table")[0].find_all("tr")[1:]:
115 blocklist[header_text].append({
116 "domain": tidyup.domain(line.find("span").text),
117 "hash" : tidyup.domain(line.find("span")["title"][9:]),
118 "reason": tidyup.reason(line.find_all("td")[1].text),
121 print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}")
123 # DEBUG: print("DEBUG: Returning blocklist for domain:", domain)
125 "reject" : blocklist["Suspended servers"],
126 "media_removal" : blocklist["Filtered media"],
127 "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
130 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
131 # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
132 if not isinstance(domain, str):
133 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
135 raise ValueError("Parameter 'domain' is empty")
136 elif not isinstance(origin, str) and origin is not None:
137 raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
139 raise ValueError("Parameter 'origin' is empty")
140 elif not isinstance(nodeinfo_url, str):
141 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
142 elif nodeinfo_url == "":
143 raise ValueError("Parameter 'nodeinfo_url' is empty")
145 # No CSRF by default, you don't have to add network.api_headers by yourself here
149 # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
150 headers = csrf.determine(domain, dict())
151 except network.exceptions as exception:
152 print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!")
153 instances.set_last_error(domain, exception)
157 # json endpoint for newer mastodongs
158 found_blocks = list()
163 "media_removal" : [],
164 "followers_only": [],
165 "report_removal": [],
168 # DEBUG: print("DEBUG: Querying API domain_blocks:", domain)
169 data = network.get_json_api(
171 "/api/v1/instance/domain_blocks",
173 (config.get("connection_timeout"), config.get("read_timeout"))
176 # DEBUG: print(f"DEBUG: data[]='{type(data)}'")
177 if "error_message" in data:
178 # DEBUG: print(f"DEBUG: Was not able to fetch domain_blocks from domain='{domain}': status_code='{data['status_code']}',error_message='{data['error_message']}'")
179 instances.set_last_error(domain, data)
181 elif "json" in data and "error" in data["json"]:
182 print(f"WARNING: JSON API returned error message: '{data['json']['error']}'")
183 instances.set_last_error(domain, data)
187 blocklist = data["json"]
189 if len(blocklist) > 0:
190 print(f"INFO: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon' ...")
191 for block in blocklist:
193 # DEBUG: print(f"DEBUG: block[]='{type(block)}'")
194 if not isinstance(block, dict):
195 # DEBUG: print(f"DEBUG: block[]='{type(block)}' is of type 'dict' - SKIPPED!")
199 # DEBUG: print(f"DEBUG: block[{type(block)}]='{block}'")
201 "domain": block["domain"],
202 "hash" : block["digest"],
203 "reason": block["comment"] if "comment" in block else None
206 # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
207 if block['severity'] == 'suspend':
208 # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
209 rows['reject'].append(entry)
210 elif block['severity'] == 'silence':
211 # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
212 rows['followers_only'].append(entry)
213 elif block['severity'] == 'reject_media':
214 # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
215 rows['media_removal'].append(entry)
216 elif block['severity'] == 'reject_reports':
217 # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
218 rows['report_removal'].append(entry)
220 print(f"WARNING: Unknown severity='{block['severity']}', domain='{block['domain']}'")
222 # DEBUG: print(f"DEBUG: domain='{domain}' has returned zero rows, trying /about/more page ...")
223 rows = fetch_blocks_from_about(domain)
225 print(f"INFO: Checking {len(rows.items())} entries from domain='{domain}',software='mastodon' ...")
226 for block_level, blocklist in rows.items():
227 # DEBUG: print("DEBUG: domain,block_level,blocklist():", domain, block_level, len(blocklist))
228 block_level = tidyup.domain(block_level)
230 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
231 if block_level == "":
232 print("WARNING: block_level is empty, domain:", domain)
234 elif block_level == "accept":
235 # DEBUG: print(f"DEBUG: domain='{domain}' skipping block_level='accept'")
238 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon',block_level='{block_level}' ...")
239 for block in blocklist:
240 # DEBUG: print(f"DEBUG: block[]='{type(block)}'")
241 blocked, blocked_hash, reason = block.values()
242 # DEBUG: print(f"DEBUG: blocked='{blocked}',blocked_hash='{blocked_hash}',reason='{reason}':")
243 blocked = tidyup.domain(blocked)
244 reason = tidyup.reason(reason) if reason is not None and reason != "" else None
245 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
248 print("WARNING: blocked is empty:", domain)
250 elif blacklist.is_blacklisted(blocked):
251 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
253 elif blocked.count("*") > 0:
254 # Doing the hash search for instance names as well to tidy up DB
255 row = instances.deobscure("*", blocked, blocked_hash)
257 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
259 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
262 # DEBUG: print("DEBUG: Updating domain: ", row[0])
265 nodeinfo_url = row[2]
266 elif blocked.count("?") > 0:
267 # Doing the hash search for instance names as well to tidy up DB
268 row = instances.deobscure("?", blocked, blocked_hash)
270 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
272 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
275 # DEBUG: print("DEBUG: Updating domain: ", row[0])
278 nodeinfo_url = row[2]
280 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
281 if not validators.domain(blocked):
282 print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!")
284 elif blocked.endswith(".arpa"):
285 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
287 elif blocked.endswith(".tld"):
288 print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
290 elif blacklist.is_blacklisted(blocked):
291 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
293 elif not instances.is_registered(blocked):
294 # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
295 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
297 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
298 if not validators.domain(blocked):
299 print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!")
301 elif blocked.endswith(".arpa"):
302 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
304 elif blocked.endswith(".tld"):
305 print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
307 elif blacklist.is_blacklisted(blocked):
308 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
310 elif not instances.is_registered(blocked):
311 # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
312 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
314 if not blocks.is_instance_blocked(domain, blocked, block_level):
315 # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
316 blocks.add_instance(domain, blocked, reason, block_level)
318 if block_level == "reject":
319 found_blocks.append({
324 # DEBUG: print(f"DEBUG: Updating block last seen and reason for domain='{domain}',blocked='{blocked}' ...")
325 blocks.update_last_seen(domain, blocked, block_level)
326 blocks.update_reason(reason, domain, blocked, block_level)
328 # DEBUG: print("DEBUG: Committing changes ...")
329 fba.connection.commit()
330 except network.exceptions as exception:
331 print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
332 instances.set_last_error(domain, exception)
334 # DEBUG: print("DEBUG: EXIT!")