1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
26 from fba.helpers import blacklist
27 from fba.helpers import config
28 from fba.helpers import tidyup
30 from fba.http import network
32 from fba.models import blocks
33 from fba.models import instances
35 logging.basicConfig(level=logging.INFO)
36 logger = logging.getLogger(__name__)
38 # Language mapping X -> English
41 "Silenced instances" : "Silenced servers",
42 "Suspended instances" : "Suspended servers",
43 "Limited instances" : "Limited servers",
44 "Filtered media" : "Filtered media",
45 # Mappuing German -> English
46 "Gesperrte Server" : "Suspended servers",
47 "Gefilterte Medien" : "Filtered media",
48 "Stummgeschaltete Server" : "Silenced servers",
50 "停止済みのサーバー" : "Suspended servers",
51 "制限中のサーバー" : "Limited servers",
52 "メディアを拒否しているサーバー": "Filtered media",
53 "サイレンス済みのサーバー" : "Silenced servers",
55 "שרתים מושעים" : "Suspended servers",
56 "מדיה מסוננת" : "Filtered media",
57 "שרתים מוגבלים" : "Silenced servers",
59 "Serveurs suspendus" : "Suspended servers",
60 "Médias filtrés" : "Filtered media",
61 "Serveurs limités" : "Limited servers",
62 "Serveurs modérés" : "Limited servers",
65 def fetch_blocks_from_about(domain: str) -> dict:
66 logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
67 if not isinstance(domain, str):
68 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
70 raise ValueError("Parameter 'domain' is empty")
71 elif domain.lower() != domain:
72 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
73 elif not validators.domain(domain.split("/")[0]):
74 raise ValueError(f"domain='{domain}' is not a valid domain")
75 elif domain.endswith(".arpa"):
76 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
77 elif domain.endswith(".tld"):
78 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
80 logger.debug("Fetching mastodon blocks from domain:", domain)
82 for path in ["/about/more", "/about"]:
84 logger.debug(f"Fetching path='{path}' from domain='{domain}' ...")
85 doc = bs4.BeautifulSoup(
86 network.fetch_response(
90 (config.get("connection_timeout"), config.get("read_timeout"))
95 if len(doc.find_all("h3")) > 0:
96 logger.debug(f"path='{path}' had some headlines - BREAK!")
99 except network.exceptions as exception:
100 logger.warning(f"Cannot fetch from domain='{domain}',exception='{type(exception)}'")
101 instances.set_last_error(domain, exception)
105 "Suspended servers": [],
106 "Filtered media" : [],
107 "Limited servers" : [],
108 "Silenced servers" : [],
111 logger.debug("doc[]='%s'", type(doc))
113 logger.warning(f"Cannot fetch any /about pages for domain='{domain}' - EXIT!")
116 for header in doc.find_all("h3"):
117 header_text = tidyup.reason(header.text)
119 logger.debug("header_text='%s'", header_text)
120 if header_text in language_mapping:
121 logger.debug("header_text='%s'", header_text)
122 header_text = language_mapping[header_text]
124 logger.warning(f"header_text='{header_text}' not found in language mapping table")
126 if header_text in blocklist or header_text.lower() in blocklist:
127 # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
128 for line in header.find_all_next("table")[0].find_all("tr")[1:]:
129 blocklist[header_text].append({
130 "domain": tidyup.domain(line.find("span").text),
131 "hash" : tidyup.domain(line.find("span")["title"][9:]),
132 "reason": tidyup.reason(line.find_all("td")[1].text),
135 logger.warning(f"header_text='{header_text}' not found in blocklist()={len(blocklist)}")
137 logger.debug("Returning blocklist for domain:", domain)
139 "reject" : blocklist["Suspended servers"],
140 "media_removal" : blocklist["Filtered media"],
141 "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
144 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
145 logger.debug(f"domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
146 if not isinstance(domain, str):
147 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
149 raise ValueError("Parameter 'domain' is empty")
150 elif domain.lower() != domain:
151 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
152 elif not validators.domain(domain.split("/")[0]):
153 raise ValueError(f"domain='{domain}' is not a valid domain")
154 elif domain.endswith(".arpa"):
155 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
156 elif domain.endswith(".tld"):
157 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
158 elif not isinstance(origin, str) and origin is not None:
159 raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
161 raise ValueError("Parameter 'origin' is empty")
162 elif not isinstance(nodeinfo_url, str):
163 raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
164 elif nodeinfo_url == "":
165 raise ValueError("Parameter 'nodeinfo_url' is empty")
167 # No CSRF by default, you don't have to add network.api_headers by yourself here
171 logger.debug(f"Checking CSRF for domain='{domain}'")
172 headers = csrf.determine(domain, dict())
173 except network.exceptions as exception:
174 logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!")
175 instances.set_last_error(domain, exception)
179 # json endpoint for newer mastodongs
180 found_blocks = list()
185 "media_removal" : [],
186 "followers_only": [],
187 "report_removal": [],
190 logger.debug("Querying API domain_blocks:", domain)
191 data = network.get_json_api(
193 "/api/v1/instance/domain_blocks",
195 (config.get("connection_timeout"), config.get("read_timeout"))
198 logger.debug("data[]='%s'", type(data))
199 if "error_message" in data:
200 logger.debug(f"Was not able to fetch domain_blocks from domain='{domain}': status_code='{data['status_code']}',error_message='{data['error_message']}'")
201 instances.set_last_error(domain, data)
203 elif "json" in data and "error" in data["json"]:
204 logger.warning(f"JSON API returned error message: '{data['json']['error']}'")
205 instances.set_last_error(domain, data)
209 blocklist = data["json"]
211 if len(blocklist) > 0:
212 logger.info("Checking %d entries from domain='%s' ...", len(blocklist), domain)
213 for block in blocklist:
215 logger.debug(f"block[]='{type(block)}'")
216 if not isinstance(block, dict):
217 logger.debug(f"block[]='{type(block)}' is of type 'dict' - SKIPPED!")
221 logger.debug(f"block[{type(block)}]='{block}'")
223 "domain": block["domain"],
224 "hash" : block["digest"],
225 "reason": block["comment"] if "comment" in block else None
228 logger.debug("severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
229 if block['severity'] == 'suspend':
230 logger.debug(f"Adding entry='{entry}' with severity='{block['severity']}' ...")
231 rows['reject'].append(entry)
232 elif block['severity'] == 'silence':
233 logger.debug(f"Adding entry='{entry}' with severity='{block['severity']}' ...")
234 rows['followers_only'].append(entry)
235 elif block['severity'] == 'reject_media':
236 logger.debug(f"Adding entry='{entry}' with severity='{block['severity']}' ...")
237 rows['media_removal'].append(entry)
238 elif block['severity'] == 'reject_reports':
239 logger.debug(f"Adding entry='{entry}' with severity='{block['severity']}' ...")
240 rows['report_removal'].append(entry)
242 logger.warning(f"Unknown severity='{block['severity']}', domain='{block['domain']}'")
244 logger.debug(f"domain='{domain}' has returned zero rows, trying /about/more page ...")
245 rows = fetch_blocks_from_about(domain)
247 logger.info("Checking %d entries from domain='%s' ...", len(rows.items()), domain)
248 for block_level, blocklist in rows.items():
249 logger.debug("domain,block_level,blocklist():", domain, block_level, len(blocklist))
250 block_level = tidyup.domain(block_level)
252 logger.debug("AFTER-block_level:", block_level)
253 if block_level == "":
254 logger.warning("block_level is empty, domain:", domain)
256 elif block_level == "accept":
257 logger.debug(f"domain='{domain}' skipping block_level='accept'")
260 logger.debug(f"Checking {len(blocklist)} entries from domain='{domain}',block_level='{block_level}' ...")
261 for block in blocklist:
262 logger.debug(f"block[]='{type(block)}'")
263 blocked, blocked_hash, reason = block.values()
264 logger.debug(f"blocked='{blocked}',blocked_hash='{blocked_hash}',reason='{reason}':")
265 blocked = tidyup.domain(blocked)
266 reason = tidyup.reason(reason) if reason is not None and reason != "" else None
267 logger.debug(f"blocked='{blocked}',reason='{reason}' - AFTER!")
270 logger.warning("blocked is empty, domain='%s'", domain)
272 elif blacklist.is_blacklisted(blocked):
273 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
275 elif blocked.count("*") > 0:
276 # Doing the hash search for instance names as well to tidy up DB
277 row = instances.deobscure("*", blocked, blocked_hash)
279 logger.debug("row[]='%s'", type(row))
281 logger.warning(f"Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
284 logger.debug("Updating domain: ", row[0])
287 nodeinfo_url = row[2]
288 elif blocked.count("?") > 0:
289 # Doing the hash search for instance names as well to tidy up DB
290 row = instances.deobscure("?", blocked, blocked_hash)
292 logger.debug("row[]='%s'", type(row))
294 logger.warning(f"Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
297 logger.debug("Updating domain: ", row[0])
300 nodeinfo_url = row[2]
302 logger.debug("Looking up instance by domain:", blocked)
303 if not validators.domain(blocked):
304 logger.warning(f"blocked='{blocked}' is not a valid domain name - SKIPPED!")
306 elif blocked.endswith(".arpa"):
307 logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
309 elif blocked.endswith(".tld"):
310 logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!")
312 elif blacklist.is_blacklisted(blocked):
313 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
315 elif not instances.is_registered(blocked):
316 logger.debug(f"Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
317 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
319 logger.debug("Looking up instance by domain:", blocked)
320 if not validators.domain(blocked):
321 logger.warning(f"blocked='{blocked}' is not a valid domain name - SKIPPED!")
323 elif blocked.endswith(".arpa"):
324 logger.warning(f"blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
326 elif blocked.endswith(".tld"):
327 logger.warning(f"blocked='{blocked}' is a fake domain, please don't crawl them!")
329 elif blacklist.is_blacklisted(blocked):
330 logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
332 elif not instances.is_registered(blocked):
333 logger.debug("Hash wasn't found, adding:", blocked, domain)
334 instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
336 if not blocks.is_instance_blocked(domain, blocked, block_level):
337 logger.debug("Blocking:", domain, blocked, block_level)
338 blocks.add_instance(domain, blocked, reason, block_level)
340 if block_level == "reject":
341 found_blocks.append({
346 logger.debug(f"Updating block last seen and reason for domain='{domain}',blocked='{blocked}' ...")
347 blocks.update_last_seen(domain, blocked, block_level)
348 blocks.update_reason(reason, domain, blocked, block_level)
350 logger.debug("Committing changes ...")
351 fba.connection.commit()
352 except network.exceptions as exception:
353 logger.warning(f"domain='{domain}',exception[{type(exception)}]:'{str(exception)}'")
354 instances.set_last_error(domain, exception)
356 logger.debug("EXIT!")