]> git.mxchange.org Git - fba.git/blob - fba/networks/mastodon.py
WIP(?):
[fba.git] / fba / networks / mastodon.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import inspect
18
19 import bs4
20 import requests
21 import validators
22
23 from fba import blacklist
24 from fba import blocks
25 from fba import config
26 from fba import csrf
27 from fba import fba
28 from fba import instances
29 from fba import network
30 from fba.helpers import tidyup
31
32 language_mapping = {
33     # English -> English
34     "Silenced instances"            : "Silenced servers",
35     "Suspended instances"           : "Suspended servers",
36     "Limited instances"             : "Limited servers",
37     "Filtered media"                : "Filtered media",
38     # Mappuing German -> English
39     "Gesperrte Server"              : "Suspended servers",
40     "Gefilterte Medien"             : "Filtered media",
41     "Stummgeschaltete Server"       : "Silenced servers",
42     # Japanese -> English
43     "停止済みのサーバー"            : "Suspended servers",
44     "制限中のサーバー"              : "Limited servers",
45     "メディアを拒否しているサーバー": "Filtered media",
46     "サイレンス済みのサーバー"      : "Silenced servers",
47     # ??? -> English
48     "שרתים מושעים"                  : "Suspended servers",
49     "מדיה מסוננת"                   : "Filtered media",
50     "שרתים מוגבלים"                 : "Silenced servers",
51     # French -> English
52     "Serveurs suspendus"            : "Suspended servers",
53     "Médias filtrés"                : "Filtered media",
54     "Serveurs limités"              : "Limited servers",
55     "Serveurs modérés"              : "Limited servers",
56 }
57
58 def fetch_blocks_from_about(domain: str) -> dict:
59     # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
60     if not isinstance(domain, str):
61         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
62     elif domain == "":
63         raise ValueError("Parameter 'domain' is empty")
64
65     # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain)
66     blocklist = {
67         "Suspended servers": [],
68         "Filtered media"   : [],
69         "Limited servers"  : [],
70         "Silenced servers" : [],
71     }
72
73     try:
74         doc = bs4.BeautifulSoup(
75             network.fetch_response(
76                 domain,
77                 "/about/more",
78                 network.web_headers,
79                 (config.get("connection_timeout"), config.get("read_timeout"))
80             ).text,
81             "html.parser",
82         )
83     except BaseException as exception:
84         print("ERROR: Cannot fetch from domain:", domain, exception)
85         instances.update_last_error(domain, exception)
86         return {}
87
88     for header in doc.find_all("h3"):
89         header_text = tidyup.reason(header.text)
90
91         # DEBUG: print(f"DEBUG: header_text='{header_text}'")
92         if header_text in language_mapping:
93             # DEBUG: print(f"DEBUG: header_text='{header_text}'")
94             header_text = language_mapping[header_text]
95         else:
96             print(f"WARNING: header_text='{header_text}' not found in language mapping table")
97
98         if header_text in blocklist or header_text.lower() in blocklist:
99             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
100             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
101                 blocklist[header_text].append(
102                     {
103                         "domain": tidyup.domain(line.find("span").text),
104                         "hash"  : tidyup.domain(line.find("span")["title"][9:]),
105                         "reason": tidyup.reason(line.find_all("td")[1].text),
106                     }
107                 )
108         else:
109             print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}")
110
111     # DEBUG: print("DEBUG: Returning blocklist for domain:", domain)
112     return {
113         "reject"        : blocklist["Suspended servers"],
114         "media_removal" : blocklist["Filtered media"],
115         "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
116     }
117
118 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
119     # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
120     if not isinstance(domain, str):
121         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
122     elif domain == "":
123         raise ValueError("Parameter 'domain' is empty")
124     elif not isinstance(origin, str) and origin is not None:
125         raise ValueError(f"Parameter origin[]={type(origin)} is not 'str'")
126     elif origin == "":
127         raise ValueError("Parameter 'origin' is empty")
128     elif not isinstance(nodeinfo_url, str):
129         raise ValueError(f"Parameter nodeinfo_url[]={type(nodeinfo_url)} is not 'str'")
130     elif nodeinfo_url == "":
131         raise ValueError("Parameter 'nodeinfo_url' is empty")
132
133     print(f"DEBUG: Checking CSRF for domain='{domain}'")
134     headers = csrf.determine(domain, dict())
135
136     try:
137         # json endpoint for newer mastodongs
138         blockdict = list()
139         rows = {
140             "reject"        : [],
141             "media_removal" : [],
142             "followers_only": [],
143             "report_removal": [],
144         }
145
146         # DEBUG: print("DEBUG: Querying API domain_blocks:", domain)
147         data = network.get_json_api(
148             domain,
149             "/api/v1/instance/domain_blocks",
150             headers,
151             (config.get("connection_timeout"), config.get("read_timeout"))
152         )
153
154         if "error_message" in data:
155             print(f"WARNING: Was not able to fetch domain_blocks from domain='{domain}': status_code='{data['status_code']}',error_message='{data['error_message']}'")
156             instances.update_last_error(domain, data)
157         else:
158             # Getting blocklist
159             blocklist = data["json"]
160
161         if len(blocklist) > 0:
162             print(f"INFO: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon' ...")
163             for block in blocklist:
164                 # Map block -> entry
165                 entry = {
166                     "domain": block["domain"],
167                     "hash"  : block["digest"],
168                     "reason": block["comment"]
169                 }
170
171                 # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
172                 if block['severity'] == 'suspend':
173                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
174                     rows['reject'].append(entry)
175                 elif block['severity'] == 'silence':
176                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
177                     rows['followers_only'].append(entry)
178                 elif block['severity'] == 'reject_media':
179                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
180                     rows['media_removal'].append(entry)
181                 elif block['severity'] == 'reject_reports':
182                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
183                     rows['report_removal'].append(entry)
184                 else:
185                     print("WARNING: Unknown severity:", block['severity'], block['domain'])
186         else:
187             # DEBUG: print(f"DEBUG: domain='{domain}' has returned zero rows, trying /about/more page ...")
188             rows = fetch_blocks_from_about(domain)
189
190         print(f"INFO: Checking {len(rows.items())} entries from domain='{domain}',software='mastodon' ...")
191         for block_level, blocklist in rows.items():
192             # DEBUG: print("DEBUG: domain,block_level,blocklist():", domain, block_level, len(blocklist))
193             block_level = tidyup.domain(block_level)
194
195             # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
196             if block_level == "":
197                 print("WARNING: block_level is empty, domain:", domain)
198                 continue
199
200             # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon',block_level='{block_level}' ...")
201             for block in blocklist:
202                 # DEBUG: print(f"DEBUG: block[]='{type(block)}'")
203                 blocked, blocked_hash, reason = block.values()
204                 # DEBUG: print(f"DEBUG: blocked='{blocked}',blocked_hash='{blocked_hash}',reason='{reason}':")
205                 blocked = tidyup.domain(blocked)
206                 reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
207                 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
208
209                 if blocked == "":
210                     print("WARNING: blocked is empty:", domain)
211                     continue
212                 elif blacklist.is_blacklisted(blocked):
213                     # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
214                     continue
215                 elif blocked.count("*") > 0:
216                     # Doing the hash search for instance names as well to tidy up DB
217                     fba.cursor.execute(
218                         "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash]
219                     )
220                     searchres = fba.cursor.fetchone()
221
222                     if searchres is None:
223                         print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
224                         continue
225
226                     # DEBUG: print("DEBUG: Updating domain: ", searchres[0])
227                     blocked = searchres[0]
228                     origin = searchres[1]
229                     nodeinfo_url = searchres[2]
230
231                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
232                     if not validators.domain(blocked):
233                         print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
234                         continue
235                     elif not instances.is_registered(blocked):
236                         # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
237                         instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
238                 elif not validators.domain(blocked):
239                     print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
240                     continue
241
242                 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
243                 if not validators.domain(blocked):
244                     print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
245                     continue
246                 elif not instances.is_registered(blocked):
247                     # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
248                     instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
249
250                 blocking = blocked if blocked.count("*") <= 1 else blocked_hash
251                 # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'")
252
253                 if not blocks.is_instance_blocked(domain, blocked, block_level):
254                     # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
255                     blocks.add_instance(domain, blocking, reason, block_level)
256
257                     if block_level == "reject":
258                         blockdict.append({
259                             "blocked": blocked,
260                             "reason" : reason
261                         })
262                 else:
263                     # DEBUG: print(f"DEBUG: Updating block last seen and reason for domain='{domain}',blocking='{blocking}' ...")
264                     blocks.update_last_seen(domain, blocking, block_level)
265                     blocks.update_reason(reason, domain, blocking, block_level)
266
267         # DEBUG: print("DEBUG: Committing changes ...")
268         fba.connection.commit()
269     except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as exception:
270         print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
271
272     # DEBUG: print("DEBUG: EXIT!")