]> git.mxchange.org Git - fba.git/blob - fba/networks/mastodon.py
Continued:
[fba.git] / fba / networks / mastodon.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import inspect
18
19 import bs4
20 import validators
21
22 from fba import blacklist
23 from fba import config
24 from fba import csrf
25 from fba import fba
26 from fba import network
27
28 from fba.helpers import tidyup
29
30 from fba.models import blocks
31 from fba.models import instances
32
33 language_mapping = {
34     # English -> English
35     "Silenced instances"            : "Silenced servers",
36     "Suspended instances"           : "Suspended servers",
37     "Limited instances"             : "Limited servers",
38     "Filtered media"                : "Filtered media",
39     # Mappuing German -> English
40     "Gesperrte Server"              : "Suspended servers",
41     "Gefilterte Medien"             : "Filtered media",
42     "Stummgeschaltete Server"       : "Silenced servers",
43     # Japanese -> English
44     "停止済みのサーバー"            : "Suspended servers",
45     "制限中のサーバー"              : "Limited servers",
46     "メディアを拒否しているサーバー": "Filtered media",
47     "サイレンス済みのサーバー"      : "Silenced servers",
48     # ??? -> English
49     "שרתים מושעים"                  : "Suspended servers",
50     "מדיה מסוננת"                   : "Filtered media",
51     "שרתים מוגבלים"                 : "Silenced servers",
52     # French -> English
53     "Serveurs suspendus"            : "Suspended servers",
54     "Médias filtrés"                : "Filtered media",
55     "Serveurs limités"              : "Limited servers",
56     "Serveurs modérés"              : "Limited servers",
57 }
58
59 def fetch_blocks_from_about(domain: str) -> dict:
60     # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
61     if not isinstance(domain, str):
62         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
63     elif domain == "":
64         raise ValueError("Parameter 'domain' is empty")
65
66     # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain)
67     doc = None
68     for path in ["/about/more", "/about"]:
69         try:
70             # DEBUG: print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...")
71             doc = bs4.BeautifulSoup(
72                 network.fetch_response(
73                     domain,
74                     path,
75                     network.web_headers,
76                     (config.get("connection_timeout"), config.get("read_timeout"))
77                 ).text,
78                 "html.parser",
79             )
80
81             if len(doc.find_all("h3")) > 0:
82                 # DEBUG: print(f"DEBUG: path='{path}' had some headlines - BREAK!")
83                 break
84
85         except BaseException as exception:
86             print("ERROR: Cannot fetch from domain:", domain, exception)
87             instances.update_last_error(domain, exception)
88             break
89
90     # DEBUG: print(f"DEBUG: doc[]='{type(doc)}'")
91     if doc is None:
92         print(f"WARNING: Cannot fetch any /about pages for domain='{domain}' - EXIT!")
93         return blocklist
94
95     blocklist = {
96         "Suspended servers": [],
97         "Filtered media"   : [],
98         "Limited servers"  : [],
99         "Silenced servers" : [],
100     }
101
102     for header in doc.find_all("h3"):
103         header_text = tidyup.reason(header.text)
104
105         # DEBUG: print(f"DEBUG: header_text='{header_text}'")
106         if header_text in language_mapping:
107             # DEBUG: print(f"DEBUG: header_text='{header_text}'")
108             header_text = language_mapping[header_text]
109         else:
110             print(f"WARNING: header_text='{header_text}' not found in language mapping table")
111
112         if header_text in blocklist or header_text.lower() in blocklist:
113             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
114             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
115                 blocklist[header_text].append(
116                     {
117                         "domain": tidyup.domain(line.find("span").text),
118                         "hash"  : tidyup.domain(line.find("span")["title"][9:]),
119                         "reason": tidyup.reason(line.find_all("td")[1].text),
120                     }
121                 )
122         else:
123             print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}")
124
125     # DEBUG: print("DEBUG: Returning blocklist for domain:", domain)
126     return {
127         "reject"        : blocklist["Suspended servers"],
128         "media_removal" : blocklist["Filtered media"],
129         "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
130     }
131
132 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
133     # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
134     if not isinstance(domain, str):
135         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
136     elif domain == "":
137         raise ValueError("Parameter 'domain' is empty")
138     elif not isinstance(origin, str) and origin is not None:
139         raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
140     elif origin == "":
141         raise ValueError("Parameter 'origin' is empty")
142     elif not isinstance(nodeinfo_url, str):
143         raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
144     elif nodeinfo_url == "":
145         raise ValueError("Parameter 'nodeinfo_url' is empty")
146
147     # No CSRF by default, you don't have to add network.api_headers by yourself here
148     headers = tuple()
149
150     try:
151         # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
152         headers = csrf.determine(domain, dict())
153     except network.exceptions as exception:
154         print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!")
155         return
156
157     try:
158         # json endpoint for newer mastodongs
159         found_blocks = list()
160         blocklist = list()
161
162         rows = {
163             "reject"        : [],
164             "media_removal" : [],
165             "followers_only": [],
166             "report_removal": [],
167         }
168
169         # DEBUG: print("DEBUG: Querying API domain_blocks:", domain)
170         data = network.get_json_api(
171             domain,
172             "/api/v1/instance/domain_blocks",
173             headers,
174             (config.get("connection_timeout"), config.get("read_timeout"))
175         )
176
177         if "error_message" in data:
178             # DEBUG: print(f"DEBUG: Was not able to fetch domain_blocks from domain='{domain}': status_code='{data['status_code']}',error_message='{data['error_message']}'")
179             instances.update_last_error(domain, data)
180             return
181         elif "json" in data and "error" in data["json"]:
182             print(f"WARNING: JSON API returned error message: '{data['json']['error']}'")
183             instances.update_last_error(domain, data)
184             return
185         else:
186             # Getting blocklist
187             blocklist = data["json"]
188
189         if len(blocklist) > 0:
190             print(f"INFO: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon' ...")
191             for block in blocklist:
192                 # Map block -> entry
193                 # DEBUG: print(f"DEBUG: block[{type(block)}]='{block}'")
194                 entry = {
195                     "domain": block["domain"],
196                     "hash"  : block["digest"],
197                     "reason": block["comment"] if "comment" in block else None
198                 }
199
200                 # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
201                 if block['severity'] == 'suspend':
202                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
203                     rows['reject'].append(entry)
204                 elif block['severity'] == 'silence':
205                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
206                     rows['followers_only'].append(entry)
207                 elif block['severity'] == 'reject_media':
208                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
209                     rows['media_removal'].append(entry)
210                 elif block['severity'] == 'reject_reports':
211                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
212                     rows['report_removal'].append(entry)
213                 else:
214                     print("WARNING: Unknown severity:", block['severity'], block['domain'])
215         else:
216             # DEBUG: print(f"DEBUG: domain='{domain}' has returned zero rows, trying /about/more page ...")
217             rows = fetch_blocks_from_about(domain)
218
219         print(f"INFO: Checking {len(rows.items())} entries from domain='{domain}',software='mastodon' ...")
220         for block_level, blocklist in rows.items():
221             # DEBUG: print("DEBUG: domain,block_level,blocklist():", domain, block_level, len(blocklist))
222             block_level = tidyup.domain(block_level)
223
224             # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
225             if block_level == "":
226                 print("WARNING: block_level is empty, domain:", domain)
227                 continue
228
229             # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon',block_level='{block_level}' ...")
230             for block in blocklist:
231                 # DEBUG: print(f"DEBUG: block[]='{type(block)}'")
232                 blocked, blocked_hash, reason = block.values()
233                 # DEBUG: print(f"DEBUG: blocked='{blocked}',blocked_hash='{blocked_hash}',reason='{reason}':")
234                 blocked = tidyup.domain(blocked)
235                 reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
236                 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
237
238                 if blocked == "":
239                     print("WARNING: blocked is empty:", domain)
240                     continue
241                 elif blacklist.is_blacklisted(blocked):
242                     # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
243                     continue
244                 elif blocked.count("*") > 0:
245                     # Doing the hash search for instance names as well to tidy up DB
246                     row = instances.deobscure("*", blocked, blocked_hash)
247
248                     # DEBUG: print(f"DEBUG: row[]='{type(row)}'")
249                     if row is None:
250                         print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
251                         continue
252
253                     # DEBUG: print("DEBUG: Updating domain: ", row[0])
254                     blocked      = row[0]
255                     origin       = row[1]
256                     nodeinfo_url = row[2]
257
258                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
259                     if not validators.domain(blocked):
260                         print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!")
261                         continue
262                     elif blocked.endswith(".arpa"):
263                         print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
264                         continue
265                     elif not instances.is_registered(blocked):
266                         # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
267                         instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
268                 elif not validators.domain(blocked):
269                     print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!")
270                     continue
271
272                 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
273                 if not validators.domain(blocked):
274                     print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - SKIPPED!")
275                     continue
276                 elif blocked.endswith(".arpa"):
277                     print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
278                     continue
279                 elif not instances.is_registered(blocked):
280                     # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
281                     instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
282
283                 blocking = blocked if blocked.count("*") <= 1 else blocked_hash
284                 # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'")
285
286                 if not blocks.is_instance_blocked(domain, blocked, block_level):
287                     # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
288                     blocks.add_instance(domain, blocking, reason, block_level)
289
290                     if block_level == "reject":
291                         found_blocks.append({
292                             "blocked": blocked,
293                             "reason" : reason
294                         })
295                 else:
296                     # DEBUG: print(f"DEBUG: Updating block last seen and reason for domain='{domain}',blocking='{blocking}' ...")
297                     blocks.update_last_seen(domain, blocking, block_level)
298                     blocks.update_reason(reason, domain, blocking, block_level)
299
300         # DEBUG: print("DEBUG: Committing changes ...")
301         fba.connection.commit()
302     except network.exceptions as exception:
303         print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
304
305     # DEBUG: print("DEBUG: EXIT!")