1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
25 from fba.helpers import blacklist
26 from fba.helpers import config
27 from fba.helpers import tidyup
29 from fba.http import network
31 from fba.models import blocks
32 from fba.models import instances
36 "Silenced instances" : "Silenced servers",
37 "Suspended instances" : "Suspended servers",
38 "Limited instances" : "Limited servers",
39 "Filtered media" : "Filtered media",
40 # Mappuing German -> English
41 "Gesperrte Server" : "Suspended servers",
42 "Gefilterte Medien" : "Filtered media",
43 "Stummgeschaltete Server" : "Silenced servers",
45 "停止済みのサーバー" : "Suspended servers",
46 "制限中のサーバー" : "Limited servers",
47 "メディアを拒否しているサーバー": "Filtered media",
48 "サイレンス済みのサーバー" : "Silenced servers",
50 "שרתים מושעים" : "Suspended servers",
51 "מדיה מסוננת" : "Filtered media",
52 "שרתים מוגבלים" : "Silenced servers",
54 "Serveurs suspendus" : "Suspended servers",
55 "Médias filtrés" : "Filtered media",
56 "Serveurs limités" : "Limited servers",
57 "Serveurs modérés" : "Limited servers",
60 def fetch_blocks_from_about(domain: str) -> dict:
61 # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
62 if not isinstance(domain, str):
63 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
65 raise ValueError("Parameter 'domain' is empty")
66 elif domain.lower() != domain:
67 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
68 elif not validators.domain(domain.split("/")[0]):
69 raise ValueError(f"domain='{domain}' is not a valid domain")
70 elif domain.endswith(".arpa"):
71 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
72 elif domain.endswith(".tld"):
73 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
75 # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain)
77 for path in ["/about/more", "/about"]:
79 # DEBUG: print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...")
80 doc = bs4.BeautifulSoup(
81 network.fetch_response(
85 (config.get("connection_timeout"), config.get("read_timeout"))
90 if len(doc.find_all("h3")) > 0:
91 # DEBUG: print(f"DEBUG: path='{path}' had some headlines - BREAK!")
94 except network.exceptions as exception:
95 print(f"ERROR: Cannot fetch from domain='{domain}',exception='{type(exception)}'")
96 instances.set_last_error(domain, exception)
100 "Suspended servers": [],
101 "Filtered media" : [],
102 "Limited servers" : [],
103 "Silenced servers" : [],
106 # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
108 print(f"WARNING: Cannot fetch any /about pages for domain='{domain}' - EXIT!")
111 for header in doc.find_all("h3"):
112 header_text = tidyup.reason(header.text)
114 # DEBUG: print(f"DEBUG: header_text='{header_text}'")
115 if header_text in language_mapping:
116 # DEBUG: print(f"DEBUG: header_text='{header_text}'")
117 header_text = language_mapping[header_text]
119 print(f"WARNING: header_text='{header_text}' not found in language mapping table")
121 if header_text in blocklist or header_text.lower() in blocklist:
122 # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
123 for line in header.find_all_next("table")[0].find_all("tr")[1:]:
124 blocklist[header_text].append({
125 "domain": tidyup.domain(line.find("span").text),
126 "hash" : tidyup.domain(line.find("span")["title"][9:]),
127 "reason": tidyup.reason(line.find_all("td")[1].text),
130 print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}")
132 # DEBUG: print("DEBUG: Returning blocklist for domain:", domain)
134 "reject" : blocklist["Suspended servers"],
135 "media_removal" : blocklist["Filtered media"],
136 "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
139 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
140 # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
141 if not isinstance(domain, str):
142 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
144 raise ValueError("Parameter 'domain' is empty")
145 elif domain.lower() != domain:
146 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
147 elif not validators.domain(domain.split("/")[0]):
148 raise ValueError(f"domain='{domain}' is not a valid domain")
149 elif domain.endswith(".arpa"):
150 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
151 elif domain.endswith(".tld"):
152 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
153 elif not isinstance(origin, str) and origin is not None:
154 raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
156 raise ValueError("Parameter 'origin' is empty")
157 elif not isinstance(nodeinfo_url, str):
158 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
159 elif nodeinfo_url == "":
160 raise ValueError("Parameter 'nodeinfo_url' is empty")
162 # No CSRF by default, you don't have to add network.api_headers by yourself here
166 # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
167 headers = csrf.determine(domain, dict())
168 except network.exceptions as exception:
169 print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!")
170 instances.set_last_error(domain, exception)
174 # json endpoint for newer mastodongs
175 found_blocks = list()
180 "media_removal" : [],
181 "followers_only": [],
182 "report_removal": [],
185 # DEBUG: print("DEBUG: Querying API domain_blocks:", domain)
186 data = network.get_json_api(
188 "/api/v1/instance/domain_blocks",
190 (config.get("connection_timeout"), config.get("read_timeout"))
193 # DEBUG: print(f"DEBUG: data[]='{type(data)}'")
194 if "error_message" in data:
195 # DEBUG: print(f"DEBUG: Was not able to fetch domain_blocks from domain='{domain}': status_code='{data['status_code']}',error_message='{data['error_message']}'")
196 instances.set_last_error(domain, data)
198 elif "json" in data and "error" in data["json"]:
199 print(f"WARNING: JSON API returned error message: '{data['json']['error']}'")
200 instances.set_last_error(domain, data)
204 blocklist = data["json"]
206 if len(blocklist) > 0:
207 print(f"INFO: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon' ...")
208 for block in blocklist:
210 # DEBUG: print(f"DEBUG: block[]='{type(block)}'")
211 if not isinstance(block, dict):
212 # DEBUG: print(f"DEBUG: block[]='{type(block)}' is of type 'dict' - SKIPPED!")
216 # DEBUG: print(f"DEBUG: block[{type(block)}]='{block}'")
218 "domain": block["domain"],
219 "hash" : block["digest"],
220 "reason": block["comment"] if "comment" in block else None
223 # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
224 if block['severity'] == 'suspend':
225 # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
226 rows['reject'].append(entry)
227 elif block['severity'] == 'silence':
228 # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
229 rows['followers_only'].append(entry)
230 elif block['severity'] == 'reject_media':
231 # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
232 rows['media_removal'].append(entry)
233 elif block['severity'] == 'reject_reports':
234 # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
235 rows['report_removal'].append(entry)
237 print(f"WARNING: Unknown severity='{block['severity']}', domain='{block['domain']}'")
239 # DEBUG: print(f"DEBUG: domain='{domain}' has returned zero rows, trying /about/more page ...")
240 rows = fetch_blocks_from_about(domain)
242 print(f"INFO: Checking {len(rows.items())} entries from domain='{domain}',software='mastodon' ...")
243 for block_level, blocklist in rows.items():
244 # DEBUG: print("DEBUG: domain,block_level,blocklist():", domain, block_level, len(blocklist))
245 block_level = tidyup.domain(block_level)
247 # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
248 if block_level == "":
249 print("WARNING: block_level is empty, domain:", domain)
251 elif block_level == "accept":
252 # DEBUG: print(f"DEBUG: domain='{domain}' skipping block_level='accept'")
255 # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon',block_level='{block_level}' ...")
256 for block in blocklist:
257 # DEBUG: print(f"DEBUG: block[]='{type(block)}'")
258 blocked, blocked_hash, reason = block.values()
259 # DEBUG: print(f"DEBUG: blocked='{blocked}',blocked_hash='{blocked_hash}',reason='{reason}':")
260 blocked = tidyup.domain(blocked)
261 reason = tidyup.reason(reason) if reason is not None and reason != "" else None
262 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
265 print("WARNING: blocked is empty:", domain)
267 elif blacklist.is_blacklisted(blocked):
268 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
270 elif blocked.count("*") > 0:
271 # Doing the hash search for instance names as well to tidy up DB
272 row = instances.deobscure("*", blocked, blocked_hash)
274 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
276 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
279 # DEBUG: print("DEBUG: Updating domain: ", row[0])
282 nodeinfo_url = row[2]
283 elif blocked.count("?") > 0:
284 # Doing the hash search for instance names as well to tidy up DB
285 row = instances.deobscure("?", blocked, blocked_hash)
287 # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
289 print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
292 # DEBUG: print("DEBUG: Updating domain: ", row[0])
295 nodeinfo_url = row[2]
297 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
298 if not validators.domain(blocked):
299 print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!")
301 elif blocked.endswith(".arpa"):
302 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
304 elif blocked.endswith(".tld"):
305 print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
307 elif blacklist.is_blacklisted(blocked):
308 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
310 elif not instances.is_registered(blocked):
311 # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
312 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
314 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
315 if not validators.domain(blocked):
316 print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!")
318 elif blocked.endswith(".arpa"):
319 print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
321 elif blocked.endswith(".tld"):
322 print(f"WARNING: blocked='{blocked}' is a fake domain, please don't crawl them!")
324 elif blacklist.is_blacklisted(blocked):
325 # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - SKIPPED!")
327 elif not instances.is_registered(blocked):
328 # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
329 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
331 if not blocks.is_instance_blocked(domain, blocked, block_level):
332 # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
333 blocks.add_instance(domain, blocked, reason, block_level)
335 if block_level == "reject":
336 found_blocks.append({
341 # DEBUG: print(f"DEBUG: Updating block last seen and reason for domain='{domain}',blocked='{blocked}' ...")
342 blocks.update_last_seen(domain, blocked, block_level)
343 blocks.update_reason(reason, domain, blocked, block_level)
345 # DEBUG: print("DEBUG: Committing changes ...")
346 fba.connection.commit()
347 except network.exceptions as exception:
348 print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
349 instances.set_last_error(domain, exception)
351 # DEBUG: print("DEBUG: EXIT!")