]> git.mxchange.org Git - fba.git/blob - fba/networks/mastodon.py
WIP:
[fba.git] / fba / networks / mastodon.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import inspect
18
19 import bs4
20 import validators
21
22 from fba import blacklist
23 from fba import blocks
24 from fba import config
25 from fba import fba
26 from fba import instances
27 from fba import network
28 from fba.helpers import tidyup
29
30 language_mapping = {
31     # English -> English
32     "Silenced instances"            : "Silenced servers",
33     "Suspended instances"           : "Suspended servers",
34     "Limited instances"             : "Limited servers",
35     "Filtered media"                : "Filtered media",
36     # Mappuing German -> English
37     "Gesperrte Server"              : "Suspended servers",
38     "Gefilterte Medien"             : "Filtered media",
39     "Stummgeschaltete Server"       : "Silenced servers",
40     # Japanese -> English
41     "停止済みのサーバー"            : "Suspended servers",
42     "制限中のサーバー"              : "Limited servers",
43     "メディアを拒否しているサーバー": "Filtered media",
44     "サイレンス済みのサーバー"      : "Silenced servers",
45     # ??? -> English
46     "שרתים מושעים"                  : "Suspended servers",
47     "מדיה מסוננת"                   : "Filtered media",
48     "שרתים מוגבלים"                 : "Silenced servers",
49     # French -> English
50     "Serveurs suspendus"            : "Suspended servers",
51     "Médias filtrés"                : "Filtered media",
52     "Serveurs limités"              : "Limited servers",
53     "Serveurs modérés"              : "Limited servers",
54 }
55
56 def fetch_blocks_from_about(domain: str) -> dict:
57     # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
58     if not isinstance(domain, str):
59         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
60     elif domain == "":
61         raise ValueError("Parameter 'domain' is empty")
62
63     # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain)
64     blocklist = {
65         "Suspended servers": [],
66         "Filtered media"   : [],
67         "Limited servers"  : [],
68         "Silenced servers" : [],
69     }
70
71     try:
72         doc = bs4.BeautifulSoup(
73             network.fetch_response(
74                 domain,
75                 "/about/more",
76                 network.web_headers,
77                 (config.get("connection_timeout"), config.get("read_timeout"))
78             ).text,
79             "html.parser",
80         )
81     except BaseException as exception:
82         print("ERROR: Cannot fetch from domain:", domain, exception)
83         instances.update_last_error(domain, exception)
84         return {}
85
86     for header in doc.find_all("h3"):
87         header_text = tidyup.reason(header.text)
88
89         # DEBUG: print(f"DEBUG: header_text='{header_text}'")
90         if header_text in language_mapping:
91             # DEBUG: print(f"DEBUG: header_text='{header_text}'")
92             header_text = language_mapping[header_text]
93         else:
94             print(f"WARNING: header_text='{header_text}' not found in language mapping table")
95
96         if header_text in blocklist or header_text.lower() in blocklist:
97             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
98             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
99                 blocklist[header_text].append(
100                     {
101                         "domain": tidyup.domain(line.find("span").text),
102                         "hash"  : tidyup.domain(line.find("span")["title"][9:]),
103                         "reason": tidyup.reason(line.find_all("td")[1].text),
104                     }
105                 )
106         else:
107             print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}")
108
109     # DEBUG: print("DEBUG: Returning blocklist for domain:", domain)
110     return {
111         "reject"        : blocklist["Suspended servers"],
112         "media_removal" : blocklist["Filtered media"],
113         "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
114     }
115
116 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
117     # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
118     if not isinstance(domain, str):
119         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
120     elif domain == "":
121         raise ValueError("Parameter 'domain' is empty")
122     elif not isinstance(origin, str) and origin is not None:
123         raise ValueError(f"Parameter origin[]={type(origin)} is not 'str'")
124     elif origin == "":
125         raise ValueError("Parameter 'origin' is empty")
126     elif not isinstance(nodeinfo_url, str):
127         raise ValueError(f"Parameter nodeinfo_url[]={type(nodeinfo_url)} is not 'str'")
128     elif nodeinfo_url == "":
129         raise ValueError("Parameter 'nodeinfo_url' is empty")
130
131     try:
132         # json endpoint for newer mastodongs
133         blockdict = list()
134         try:
135             rows = {
136                 "reject"        : [],
137                 "media_removal" : [],
138                 "followers_only": [],
139                 "report_removal": [],
140             }
141
142             # DEBUG: print("DEBUG: Querying API domain_blocks:", domain)
143             data = network.get_json_api(
144                 domain,
145                 "/api/v1/instance/domain_blocks",
146                 (config.get("connection_timeout"), config.get("read_timeout"))
147             )
148
149             if "error_message" in data:
150                 print(f"WARNING: Was not able to fetch domain_blocks from domain='{domain}': status_code='{data['status_code']}',error_message='{data['error_message']}'")
151                 instances.update_last_error(domain, data)
152
153             blocklist = data["json"]
154             print(f"INFO: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon' ...")
155             for block in blocklist:
156                 entry = {
157                     'domain': block['domain'],
158                     'hash'  : block['digest'],
159                     'reason': block['comment']
160                 }
161
162                 # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
163                 if block['severity'] == 'suspend':
164                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
165                     rows['reject'].append(entry)
166                 elif block['severity'] == 'silence':
167                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
168                     rows['followers_only'].append(entry)
169                 elif block['severity'] == 'reject_media':
170                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
171                     rows['media_removal'].append(entry)
172                 elif block['severity'] == 'reject_reports':
173                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
174                     rows['report_removal'].append(entry)
175                 else:
176                     print("WARNING: Unknown severity:", block['severity'], block['domain'])
177
178         except BaseException as exception:
179             # DEBUG: print(f"DEBUG: Failed, trying mastodon-specific fetches: domain='{domain}',exception[{type(exception)}]={str(exception)}")
180             rows = fetch_blocks_from_about(domain)
181
182         print(f"INFO: Checking {len(rows.items())} entries from domain='{domain}',software='mastodon' ...")
183         for block_level, blocklist in rows.items():
184             # DEBUG: print("DEBUG: domain,block_level,blocklist():", domain, block_level, len(blocklist))
185             block_level = tidyup.domain(block_level)
186
187             # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
188             if block_level == "":
189                 print("WARNING: block_level is empty, domain:", domain)
190                 continue
191
192             # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon',block_level='{block_level}' ...")
193             for block in blocklist:
194                 # DEBUG: print(f"DEBUG: block[]='{type(block)}'")
195                 blocked, blocked_hash, reason = block.values()
196                 # DEBUG: print(f"DEBUG: blocked='{blocked}',blocked_hash='{blocked_hash}',reason='{reason}':")
197                 blocked = tidyup.domain(blocked)
198                 reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
199                 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
200
201                 if blocked == "":
202                     print("WARNING: blocked is empty:", domain)
203                     continue
204                 elif blacklist.is_blacklisted(blocked):
205                     # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
206                     continue
207                 elif blocked.count("*") > 0:
208                     # Doing the hash search for instance names as well to tidy up DB
209                     fba.cursor.execute(
210                         "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash]
211                     )
212                     searchres = fba.cursor.fetchone()
213
214                     if searchres is None:
215                         print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
216                         continue
217
218                     # DEBUG: print("DEBUG: Updating domain: ", searchres[0])
219                     blocked = searchres[0]
220                     origin = searchres[1]
221                     nodeinfo_url = searchres[2]
222
223                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
224                     if not validators.domain(blocked):
225                         print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
226                         continue
227                     elif not instances.is_registered(blocked):
228                         # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
229                         instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
230                 elif not validators.domain(blocked):
231                     print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
232                     continue
233
234                 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
235                 if not validators.domain(blocked):
236                     print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
237                     continue
238                 elif not instances.is_registered(blocked):
239                     # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
240                     instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
241
242                 blocking = blocked if blocked.count("*") <= 1 else blocked_hash
243                 # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'")
244
245                 if not blocks.is_instance_blocked(domain, blocked, block_level):
246                     # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
247                     blocks.add_instance(domain, blocking, reason, block_level)
248
249                     if block_level == "reject":
250                         blockdict.append({
251                             "blocked": blocked,
252                             "reason" : reason
253                         })
254                 else:
255                     # DEBUG: print(f"DEBUG: Updating block last seen and reason for domain='{domain}',blocking='{blocking}' ...")
256                     blocks.update_last_seen(domain, blocking, block_level)
257                     blocks.update_reason(reason, domain, blocking, block_level)
258
259         # DEBUG: print("DEBUG: Committing changes ...")
260         fba.connection.commit()
261     except BaseException as exception:
262         print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
263
264     # DEBUG: print("DEBUG: EXIT!")