]> git.mxchange.org Git - fba.git/blob - fba/networks/mastodon.py
Fixed:
[fba.git] / fba / networks / mastodon.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import inspect
18
19 import bs4
20 import validators
21
22 from fba import blacklist
23 from fba import config
24 from fba import csrf
25 from fba import fba
26 from fba import network
27
28 from fba.helpers import tidyup
29
30 from fba.models import blocks
31 from fba.models import instances
32
33 language_mapping = {
34     # English -> English
35     "Silenced instances"            : "Silenced servers",
36     "Suspended instances"           : "Suspended servers",
37     "Limited instances"             : "Limited servers",
38     "Filtered media"                : "Filtered media",
39     # Mappuing German -> English
40     "Gesperrte Server"              : "Suspended servers",
41     "Gefilterte Medien"             : "Filtered media",
42     "Stummgeschaltete Server"       : "Silenced servers",
43     # Japanese -> English
44     "停止済みのサーバー"            : "Suspended servers",
45     "制限中のサーバー"              : "Limited servers",
46     "メディアを拒否しているサーバー": "Filtered media",
47     "サイレンス済みのサーバー"      : "Silenced servers",
48     # ??? -> English
49     "שרתים מושעים"                  : "Suspended servers",
50     "מדיה מסוננת"                   : "Filtered media",
51     "שרתים מוגבלים"                 : "Silenced servers",
52     # French -> English
53     "Serveurs suspendus"            : "Suspended servers",
54     "Médias filtrés"                : "Filtered media",
55     "Serveurs limités"              : "Limited servers",
56     "Serveurs modérés"              : "Limited servers",
57 }
58
59 def fetch_blocks_from_about(domain: str) -> dict:
60     # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
61     if not isinstance(domain, str):
62         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
63     elif domain == "":
64         raise ValueError("Parameter 'domain' is empty")
65
66     # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain)
67     doc = None
68     for path in ["/about/more", "/about"]:
69         try:
70             # DEBUG: print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...")
71             doc = bs4.BeautifulSoup(
72                 network.fetch_response(
73                     domain,
74                     path,
75                     network.web_headers,
76                     (config.get("connection_timeout"), config.get("read_timeout"))
77                 ).text,
78                 "html.parser",
79             )
80
81             if len(doc.find_all("h3")) > 0:
82                 # DEBUG: print(f"DEBUG: path='{path}' had some headlines - BREAK!")
83                 break
84
85         except BaseException as exception:
86             print("ERROR: Cannot fetch from domain:", domain, exception)
87             instances.update_last_error(domain, exception)
88             break
89
90     # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
91     if doc is None:
92         print(f"WARNING: Cannot fetch any /about pages for domain='{domain}' - EXIT!")
93         return blocklist
94
95     blocklist = {
96         "Suspended servers": [],
97         "Filtered media"   : [],
98         "Limited servers"  : [],
99         "Silenced servers" : [],
100     }
101
102     for header in doc.find_all("h3"):
103         header_text = tidyup.reason(header.text)
104
105         # DEBUG: print(f"DEBUG: header_text='{header_text}'")
106         if header_text in language_mapping:
107             # DEBUG: print(f"DEBUG: header_text='{header_text}'")
108             header_text = language_mapping[header_text]
109         else:
110             print(f"WARNING: header_text='{header_text}' not found in language mapping table")
111
112         if header_text in blocklist or header_text.lower() in blocklist:
113             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
114             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
115                 blocklist[header_text].append(
116                     {
117                         "domain": tidyup.domain(line.find("span").text),
118                         "hash"  : tidyup.domain(line.find("span")["title"][9:]),
119                         "reason": tidyup.reason(line.find_all("td")[1].text),
120                     }
121                 )
122         else:
123             print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}")
124
125     # DEBUG: print("DEBUG: Returning blocklist for domain:", domain)
126     return {
127         "reject"        : blocklist["Suspended servers"],
128         "media_removal" : blocklist["Filtered media"],
129         "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
130     }
131
132 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
133     # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
134     if not isinstance(domain, str):
135         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
136     elif domain == "":
137         raise ValueError("Parameter 'domain' is empty")
138     elif not isinstance(origin, str) and origin is not None:
139         raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
140     elif origin == "":
141         raise ValueError("Parameter 'origin' is empty")
142     elif not isinstance(nodeinfo_url, str):
143         raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
144     elif nodeinfo_url == "":
145         raise ValueError("Parameter 'nodeinfo_url' is empty")
146
147     # No CSRF by default, you don't have to add network.api_headers by yourself here
148     headers = tuple()
149
150     try:
151         # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
152         headers = csrf.determine(domain, dict())
153     except network.exceptions as exception:
154         print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!")
155         return
156
157     try:
158         # json endpoint for newer mastodongs
159         found_blocks = list()
160         blocklist = list()
161
162         rows = {
163             "reject"        : [],
164             "media_removal" : [],
165             "followers_only": [],
166             "report_removal": [],
167         }
168
169         # DEBUG: print("DEBUG: Querying API domain_blocks:", domain)
170         data = network.get_json_api(
171             domain,
172             "/api/v1/instance/domain_blocks",
173             headers,
174             (config.get("connection_timeout"), config.get("read_timeout"))
175         )
176
177         if "error_message" in data:
178             # DEBUG: print(f"DEBUG: Was not able to fetch domain_blocks from domain='{domain}': status_code='{data['status_code']}',error_message='{data['error_message']}'")
179             instances.update_last_error(domain, data)
180             return
181         elif "json" in data and "error" in data["json"]:
182             print(f"WARNING: JSON API returned error message: '{data['json']['error']}'")
183             instances.update_last_error(domain, data)
184             return
185         else:
186             # Getting blocklist
187             blocklist = data["json"]
188
189         if len(blocklist) > 0:
190             print(f"INFO: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon' ...")
191             for block in blocklist:
192                 # Check type
193                 # DEBUG: print(f"DEBUG: block[]='{type(block)}'")
194                 if not isinstance(block, dict):
195                     print(f"DEBUG: block[]='{type(block)}' is of type 'dict' - SKIPPED!")
196                     continue
197
198                 # Map block -> entry
199                 # DEBUG: print(f"DEBUG: block[{type(block)}]='{block}'")
200                 entry = {
201                     "domain": block["domain"],
202                     "hash"  : block["digest"],
203                     "reason": block["comment"] if "comment" in block else None
204                 }
205
206                 # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
207                 if block['severity'] == 'suspend':
208                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
209                     rows['reject'].append(entry)
210                 elif block['severity'] == 'silence':
211                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
212                     rows['followers_only'].append(entry)
213                 elif block['severity'] == 'reject_media':
214                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
215                     rows['media_removal'].append(entry)
216                 elif block['severity'] == 'reject_reports':
217                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
218                     rows['report_removal'].append(entry)
219                 else:
220                     print("WARNING: Unknown severity:", block['severity'], block['domain'])
221         else:
222             # DEBUG: print(f"DEBUG: domain='{domain}' has returned zero rows, trying /about/more page ...")
223             rows = fetch_blocks_from_about(domain)
224
225         print(f"INFO: Checking {len(rows.items())} entries from domain='{domain}',software='mastodon' ...")
226         for block_level, blocklist in rows.items():
227             # DEBUG: print("DEBUG: domain,block_level,blocklist():", domain, block_level, len(blocklist))
228             block_level = tidyup.domain(block_level)
229
230             # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
231             if block_level == "":
232                 print("WARNING: block_level is empty, domain:", domain)
233                 continue
234
235             # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon',block_level='{block_level}' ...")
236             for block in blocklist:
237                 # DEBUG: print(f"DEBUG: block[]='{type(block)}'")
238                 blocked, blocked_hash, reason = block.values()
239                 # DEBUG: print(f"DEBUG: blocked='{blocked}',blocked_hash='{blocked_hash}',reason='{reason}':")
240                 blocked = tidyup.domain(blocked)
241                 reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
242                 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
243
244                 if blocked == "":
245                     print("WARNING: blocked is empty:", domain)
246                     continue
247                 elif blacklist.is_blacklisted(blocked):
248                     # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
249                     continue
250                 elif blocked.count("*") > 0:
251                     # Doing the hash search for instance names as well to tidy up DB
252                     row = instances.deobscure("*", blocked, blocked_hash)
253
254                     # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
255                     if row is None:
256                         print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
257                         continue
258
259                     # DEBUG: print("DEBUG: Updating domain: ", row[0])
260                     blocked      = row[0]
261                     origin       = row[1]
262                     nodeinfo_url = row[2]
263
264                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
265                     if not validators.domain(blocked):
266                         print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!")
267                         continue
268                     elif blocked.endswith(".arpa"):
269                         print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
270                         continue
271                     elif not instances.is_registered(blocked):
272                         # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
273                         instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
274                 elif not validators.domain(blocked):
275                     print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!")
276                     continue
277
278                 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
279                 if not validators.domain(blocked):
280                     print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!")
281                     continue
282                 elif blocked.endswith(".arpa"):
283                     print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
284                     continue
285                 elif not instances.is_registered(blocked):
286                     # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
287                     instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
288
289                 blocking = blocked if blocked.count("*") <= 1 else blocked_hash
290                 # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'")
291
292                 if not blocks.is_instance_blocked(domain, blocked, block_level):
293                     # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
294                     blocks.add_instance(domain, blocking, reason, block_level)
295
296                     if block_level == "reject":
297                         found_blocks.append({
298                             "blocked": blocked,
299                             "reason" : reason
300                         })
301                 else:
302                     # DEBUG: print(f"DEBUG: Updating block last seen and reason for domain='{domain}',blocking='{blocking}' ...")
303                     blocks.update_last_seen(domain, blocking, block_level)
304                     blocks.update_reason(reason, domain, blocking, block_level)
305
306         # DEBUG: print("DEBUG: Committing changes ...")
307         fba.connection.commit()
308     except network.exceptions as exception:
309         print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
310
311     # DEBUG: print("DEBUG: EXIT!")