]> git.mxchange.org Git - fba.git/blob - fba/networks/mastodon.py
Continued:
[fba.git] / fba / networks / mastodon.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import inspect
18
19 import bs4
20 import validators
21
22 from fba import blacklist
23 from fba import blocks
24 from fba import config
25 from fba import csrf
26 from fba import fba
27 from fba import instances
28 from fba import network
29
30 from fba.helpers import tidyup
31
32 language_mapping = {
33     # English -> English
34     "Silenced instances"            : "Silenced servers",
35     "Suspended instances"           : "Suspended servers",
36     "Limited instances"             : "Limited servers",
37     "Filtered media"                : "Filtered media",
38     # Mappuing German -> English
39     "Gesperrte Server"              : "Suspended servers",
40     "Gefilterte Medien"             : "Filtered media",
41     "Stummgeschaltete Server"       : "Silenced servers",
42     # Japanese -> English
43     "停止済みのサーバー"            : "Suspended servers",
44     "制限中のサーバー"              : "Limited servers",
45     "メディアを拒否しているサーバー": "Filtered media",
46     "サイレンス済みのサーバー"      : "Silenced servers",
47     # ??? -> English
48     "שרתים מושעים"                  : "Suspended servers",
49     "מדיה מסוננת"                   : "Filtered media",
50     "שרתים מוגבלים"                 : "Silenced servers",
51     # French -> English
52     "Serveurs suspendus"            : "Suspended servers",
53     "Médias filtrés"                : "Filtered media",
54     "Serveurs limités"              : "Limited servers",
55     "Serveurs modérés"              : "Limited servers",
56 }
57
58 def fetch_blocks_from_about(domain: str) -> dict:
59     # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
60     if not isinstance(domain, str):
61         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
62     elif domain == "":
63         raise ValueError("Parameter 'domain' is empty")
64
65     # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain)
66     blocklist = {
67         "Suspended servers": [],
68         "Filtered media"   : [],
69         "Limited servers"  : [],
70         "Silenced servers" : [],
71     }
72
73     try:
74         doc = bs4.BeautifulSoup(
75             network.fetch_response(
76                 domain,
77                 "/about/more",
78                 network.web_headers,
79                 (config.get("connection_timeout"), config.get("read_timeout"))
80             ).text,
81             "html.parser",
82         )
83     except BaseException as exception:
84         print("ERROR: Cannot fetch from domain:", domain, exception)
85         instances.update_last_error(domain, exception)
86         return {}
87
88     for header in doc.find_all("h3"):
89         header_text = tidyup.reason(header.text)
90
91         # DEBUG: print(f"DEBUG: header_text='{header_text}'")
92         if header_text in language_mapping:
93             # DEBUG: print(f"DEBUG: header_text='{header_text}'")
94             header_text = language_mapping[header_text]
95         else:
96             print(f"WARNING: header_text='{header_text}' not found in language mapping table")
97
98         if header_text in blocklist or header_text.lower() in blocklist:
99             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
100             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
101                 blocklist[header_text].append(
102                     {
103                         "domain": tidyup.domain(line.find("span").text),
104                         "hash"  : tidyup.domain(line.find("span")["title"][9:]),
105                         "reason": tidyup.reason(line.find_all("td")[1].text),
106                     }
107                 )
108         else:
109             print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}")
110
111     # DEBUG: print("DEBUG: Returning blocklist for domain:", domain)
112     return {
113         "reject"        : blocklist["Suspended servers"],
114         "media_removal" : blocklist["Filtered media"],
115         "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
116     }
117
118 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
119     # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
120     if not isinstance(domain, str):
121         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
122     elif domain == "":
123         raise ValueError("Parameter 'domain' is empty")
124     elif not isinstance(origin, str) and origin is not None:
125         raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
126     elif origin == "":
127         raise ValueError("Parameter 'origin' is empty")
128     elif not isinstance(nodeinfo_url, str):
129         raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
130     elif nodeinfo_url == "":
131         raise ValueError("Parameter 'nodeinfo_url' is empty")
132
133     # No CSRF by default, you don't have to add network.api_headers by yourself here
134     headers = tuple()
135
136     try:
137         # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
138         headers = csrf.determine(domain, dict())
139     except network.exceptions as exception:
140         print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_blocks,{__name__}) - EXIT!")
141         return
142
143     try:
144         # json endpoint for newer mastodongs
145         found_blocks = list()
146         blocklist = list()
147
148         rows = {
149             "reject"        : [],
150             "media_removal" : [],
151             "followers_only": [],
152             "report_removal": [],
153         }
154
155         # DEBUG: print("DEBUG: Querying API domain_blocks:", domain)
156         data = network.get_json_api(
157             domain,
158             "/api/v1/instance/domain_blocks",
159             headers,
160             (config.get("connection_timeout"), config.get("read_timeout"))
161         )
162
163         if "error_message" in data:
164             print(f"WARNING: Was not able to fetch domain_blocks from domain='{domain}': status_code='{data['status_code']}',error_message='{data['error_message']}'")
165             instances.update_last_error(domain, data)
166             return
167         elif "json" in data and "error" in data["json"]:
168             print(f"WARNING: JSON API returned error message: '{data['json']['error']}'")
169             instances.update_last_error(domain, data)
170             return
171         else:
172             # Getting blocklist
173             blocklist = data["json"]
174
175         if len(blocklist) > 0:
176             print(f"INFO: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon' ...")
177             for block in blocklist:
178                 # Map block -> entry
179                 # DEBUG: print(f"DEBUG: block[{type(block)}]='{block}'")
180                 entry = {
181                     "domain": block["domain"],
182                     "hash"  : block["digest"],
183                     "reason": block["comment"]
184                 }
185
186                 # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
187                 if block['severity'] == 'suspend':
188                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
189                     rows['reject'].append(entry)
190                 elif block['severity'] == 'silence':
191                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
192                     rows['followers_only'].append(entry)
193                 elif block['severity'] == 'reject_media':
194                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
195                     rows['media_removal'].append(entry)
196                 elif block['severity'] == 'reject_reports':
197                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
198                     rows['report_removal'].append(entry)
199                 else:
200                     print("WARNING: Unknown severity:", block['severity'], block['domain'])
201         else:
202             # DEBUG: print(f"DEBUG: domain='{domain}' has returned zero rows, trying /about/more page ...")
203             rows = fetch_blocks_from_about(domain)
204
205         print(f"INFO: Checking {len(rows.items())} entries from domain='{domain}',software='mastodon' ...")
206         for block_level, blocklist in rows.items():
207             # DEBUG: print("DEBUG: domain,block_level,blocklist():", domain, block_level, len(blocklist))
208             block_level = tidyup.domain(block_level)
209
210             # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
211             if block_level == "":
212                 print("WARNING: block_level is empty, domain:", domain)
213                 continue
214
215             # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon',block_level='{block_level}' ...")
216             for block in blocklist:
217                 # DEBUG: print(f"DEBUG: block[]='{type(block)}'")
218                 blocked, blocked_hash, reason = block.values()
219                 # DEBUG: print(f"DEBUG: blocked='{blocked}',blocked_hash='{blocked_hash}',reason='{reason}':")
220                 blocked = tidyup.domain(blocked)
221                 reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
222                 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
223
224                 if blocked == "":
225                     print("WARNING: blocked is empty:", domain)
226                     continue
227                 elif blacklist.is_blacklisted(blocked):
228                     # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
229                     continue
230                 elif blocked.count("*") > 0:
231                     # Doing the hash search for instance names as well to tidy up DB
232                     fba.cursor.execute(
233                         "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash]
234                     )
235                     searchres = fba.cursor.fetchone()
236
237                     # DEBUG: print(f"DEBUG: searchres[]='{type(searchres)}'")
238                     if searchres is None:
239                         print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
240                         continue
241
242                     # DEBUG: print("DEBUG: Updating domain: ", searchres[0])
243                     blocked      = searchres[0]
244                     origin       = searchres[1]
245                     nodeinfo_url = searchres[2]
246
247                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
248                     if not validators.domain(blocked):
249                         print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
250                         continue
251                     elif not instances.is_registered(blocked):
252                         # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
253                         instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
254                 elif not validators.domain(blocked):
255                     print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
256                     continue
257
258                 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
259                 if not validators.domain(blocked):
260                     print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
261                     continue
262                 elif not instances.is_registered(blocked):
263                     # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
264                     instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
265
266                 blocking = blocked if blocked.count("*") <= 1 else blocked_hash
267                 # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'")
268
269                 if not blocks.is_instance_blocked(domain, blocked, block_level):
270                     # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
271                     blocks.add_instance(domain, blocking, reason, block_level)
272
273                     if block_level == "reject":
274                         found_blocks.append({
275                             "blocked": blocked,
276                             "reason" : reason
277                         })
278                 else:
279                     # DEBUG: print(f"DEBUG: Updating block last seen and reason for domain='{domain}',blocking='{blocking}' ...")
280                     blocks.update_last_seen(domain, blocking, block_level)
281                     blocks.update_reason(reason, domain, blocking, block_level)
282
283         # DEBUG: print("DEBUG: Committing changes ...")
284         fba.connection.commit()
285     except network.exceptions as exception:
286         print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
287
288     # DEBUG: print("DEBUG: EXIT!")