]> git.mxchange.org Git - fba.git/blob - fba/networks/mastodon.py
Continued:
[fba.git] / fba / networks / mastodon.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import inspect
18
19 import bs4
20 import validators
21
22 from fba import blacklist
23 from fba import config
24 from fba import csrf
25 from fba import fba
26 from fba import network
27
28 from fba.helpers import tidyup
29
30 from fba.models import blocks
31 from fba.models import instances
32
33 language_mapping = {
34     # English -> English
35     "Silenced instances"            : "Silenced servers",
36     "Suspended instances"           : "Suspended servers",
37     "Limited instances"             : "Limited servers",
38     "Filtered media"                : "Filtered media",
39     # Mappuing German -> English
40     "Gesperrte Server"              : "Suspended servers",
41     "Gefilterte Medien"             : "Filtered media",
42     "Stummgeschaltete Server"       : "Silenced servers",
43     # Japanese -> English
44     "停止済みのサーバー"            : "Suspended servers",
45     "制限中のサーバー"              : "Limited servers",
46     "メディアを拒否しているサーバー": "Filtered media",
47     "サイレンス済みのサーバー"      : "Silenced servers",
48     # ??? -> English
49     "שרתים מושעים"                  : "Suspended servers",
50     "מדיה מסוננת"                   : "Filtered media",
51     "שרתים מוגבלים"                 : "Silenced servers",
52     # French -> English
53     "Serveurs suspendus"            : "Suspended servers",
54     "Médias filtrés"                : "Filtered media",
55     "Serveurs limités"              : "Limited servers",
56     "Serveurs modérés"              : "Limited servers",
57 }
58
59 def fetch_blocks_from_about(domain: str) -> dict:
60     # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
61     if not isinstance(domain, str):
62         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
63     elif domain == "":
64         raise ValueError("Parameter 'domain' is empty")
65
66     # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain)
67     doc = None
68     for path in ["/about/more", "/about"]:
69         try:
70             # DEBUG: print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...")
71             doc = bs4.BeautifulSoup(
72                 network.fetch_response(
73                     domain,
74                     path,
75                     network.web_headers,
76                     (config.get("connection_timeout"), config.get("read_timeout"))
77                 ).text,
78                 "html.parser",
79             )
80
81             if len(doc.find_all("h3")) > 0:
82                 # DEBUG: print(f"DEBUG: path='{path}' had some headlines - BREAK!")
83                 break
84
85         except BaseException as exception:
86             print("ERROR: Cannot fetch from domain:", domain, exception)
87             instances.set_last_error(domain, exception)
88             break
89
90     blocklist = {
91         "Suspended servers": [],
92         "Filtered media"   : [],
93         "Limited servers"  : [],
94         "Silenced servers" : [],
95     }
96
97     # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
98     if doc is None:
99         print(f"WARNING: Cannot fetch any /about pages for domain='{domain}' - EXIT!")
100         return blocklist
101
102     for header in doc.find_all("h3"):
103         header_text = tidyup.reason(header.text)
104
105         # DEBUG: print(f"DEBUG: header_text='{header_text}'")
106         if header_text in language_mapping:
107             # DEBUG: print(f"DEBUG: header_text='{header_text}'")
108             header_text = language_mapping[header_text]
109         else:
110             print(f"WARNING: header_text='{header_text}' not found in language mapping table")
111
112         if header_text in blocklist or header_text.lower() in blocklist:
113             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
114             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
115                 blocklist[header_text].append(
116                     {
117                         "domain": tidyup.domain(line.find("span").text),
118                         "hash"  : tidyup.domain(line.find("span")["title"][9:]),
119                         "reason": tidyup.reason(line.find_all("td")[1].text),
120                     }
121                 )
122         else:
123             print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}")
124
125     # DEBUG: print("DEBUG: Returning blocklist for domain:", domain)
126     return {
127         "reject"        : blocklist["Suspended servers"],
128         "media_removal" : blocklist["Filtered media"],
129         "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
130     }
131
132 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
133     # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
134     if not isinstance(domain, str):
135         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
136     elif domain == "":
137         raise ValueError("Parameter 'domain' is empty")
138     elif not isinstance(origin, str) and origin is not None:
139         raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
140     elif origin == "":
141         raise ValueError("Parameter 'origin' is empty")
142     elif not isinstance(nodeinfo_url, str):
143         raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
144     elif nodeinfo_url == "":
145         raise ValueError("Parameter 'nodeinfo_url' is empty")
146
147     # No CSRF by default, you don't have to add network.api_headers by yourself here
148     headers = tuple()
149
150     try:
151         # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
152         headers = csrf.determine(domain, dict())
153     except network.exceptions as exception:
154         print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!")
155         return
156
157     try:
158         # json endpoint for newer mastodongs
159         found_blocks = list()
160         blocklist = list()
161
162         rows = {
163             "reject"        : [],
164             "media_removal" : [],
165             "followers_only": [],
166             "report_removal": [],
167         }
168
169         # DEBUG: print("DEBUG: Querying API domain_blocks:", domain)
170         data = network.get_json_api(
171             domain,
172             "/api/v1/instance/domain_blocks",
173             headers,
174             (config.get("connection_timeout"), config.get("read_timeout"))
175         )
176
177         # DEBUG: print(f"DEBUG: data[]='{type(data)}'")
178         if "error_message" in data:
179             # DEBUG: print(f"DEBUG: Was not able to fetch domain_blocks from domain='{domain}': status_code='{data['status_code']}',error_message='{data['error_message']}'")
180             instances.set_last_error(domain, data)
181             return
182         elif "json" in data and "error" in data["json"]:
183             print(f"WARNING: JSON API returned error message: '{data['json']['error']}'")
184             instances.set_last_error(domain, data)
185             return
186         else:
187             # Getting blocklist
188             blocklist = data["json"]
189
190         if len(blocklist) > 0:
191             print(f"INFO: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon' ...")
192             for block in blocklist:
193                 # Check type
194                 # DEBUG: print(f"DEBUG: block[]='{type(block)}'")
195                 if not isinstance(block, dict):
196                     print(f"DEBUG: block[]='{type(block)}' is of type 'dict' - SKIPPED!")
197                     continue
198
199                 # Map block -> entry
200                 # DEBUG: print(f"DEBUG: block[{type(block)}]='{block}'")
201                 entry = {
202                     "domain": block["domain"],
203                     "hash"  : block["digest"],
204                     "reason": block["comment"] if "comment" in block else None
205                 }
206
207                 # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
208                 if block['severity'] == 'suspend':
209                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
210                     rows['reject'].append(entry)
211                 elif block['severity'] == 'silence':
212                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
213                     rows['followers_only'].append(entry)
214                 elif block['severity'] == 'reject_media':
215                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
216                     rows['media_removal'].append(entry)
217                 elif block['severity'] == 'reject_reports':
218                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
219                     rows['report_removal'].append(entry)
220                 else:
221                     print("WARNING: Unknown severity:", block['severity'], block['domain'])
222         else:
223             # DEBUG: print(f"DEBUG: domain='{domain}' has returned zero rows, trying /about/more page ...")
224             rows = fetch_blocks_from_about(domain)
225
226         print(f"INFO: Checking {len(rows.items())} entries from domain='{domain}',software='mastodon' ...")
227         for block_level, blocklist in rows.items():
228             # DEBUG: print("DEBUG: domain,block_level,blocklist():", domain, block_level, len(blocklist))
229             block_level = tidyup.domain(block_level)
230
231             # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
232             if block_level == "":
233                 print("WARNING: block_level is empty, domain:", domain)
234                 continue
235
236             # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon',block_level='{block_level}' ...")
237             for block in blocklist:
238                 # DEBUG: print(f"DEBUG: block[]='{type(block)}'")
239                 blocked, blocked_hash, reason = block.values()
240                 # DEBUG: print(f"DEBUG: blocked='{blocked}',blocked_hash='{blocked_hash}',reason='{reason}':")
241                 blocked = tidyup.domain(blocked)
242                 reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
243                 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
244
245                 if blocked == "":
246                     print("WARNING: blocked is empty:", domain)
247                     continue
248                 elif blacklist.is_blacklisted(blocked):
249                     # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
250                     continue
251                 elif blocked.count("*") > 0:
252                     # Doing the hash search for instance names as well to tidy up DB
253                     row = instances.deobscure("*", blocked, blocked_hash)
254
255                     # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
256                     if row is None:
257                         print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
258                         continue
259
260                     # DEBUG: print("DEBUG: Updating domain: ", row[0])
261                     blocked      = row[0]
262                     origin       = row[1]
263                     nodeinfo_url = row[2]
264
265                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
266                     if not validators.domain(blocked):
267                         print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!")
268                         continue
269                     elif blocked.endswith(".arpa"):
270                         print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
271                         continue
272                     elif not instances.is_registered(blocked):
273                         # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
274                         instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
275                 elif not validators.domain(blocked):
276                     print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!")
277                     continue
278
279                 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
280                 if not validators.domain(blocked):
281                     print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!")
282                     continue
283                 elif blocked.endswith(".arpa"):
284                     print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
285                     continue
286                 elif not instances.is_registered(blocked):
287                     # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
288                     instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
289
290                 blocking = blocked if blocked.count("*") <= 1 else blocked_hash
291                 # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'")
292
293                 if not blocks.is_instance_blocked(domain, blocked, block_level):
294                     # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
295                     blocks.add_instance(domain, blocking, reason, block_level)
296
297                     if block_level == "reject":
298                         found_blocks.append({
299                             "blocked": blocked,
300                             "reason" : reason
301                         })
302                 else:
303                     # DEBUG: print(f"DEBUG: Updating block last seen and reason for domain='{domain}',blocking='{blocking}' ...")
304                     blocks.update_last_seen(domain, blocking, block_level)
305                     blocks.update_reason(reason, domain, blocking, block_level)
306
307         # DEBUG: print("DEBUG: Committing changes ...")
308         fba.connection.commit()
309     except network.exceptions as exception:
310         print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
311
312     # DEBUG: print("DEBUG: EXIT!")