]> git.mxchange.org Git - fba.git/blob - fba/federation/mastodon.py
Continued:
[fba.git] / fba / federation / mastodon.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import bs4
18 import validators
19
20 from fba import blacklist
21 from fba import blocks
22 from fba import config
23 from fba import fba
24 from fba import instances
25
26 language_mapping = {
27     # English -> English
28     "Silenced instances"            : "Silenced servers",
29     "Suspended instances"           : "Suspended servers",
30     "Limited instances"             : "Limited servers",
31     "Filtered media"                : "Filtered media",
32     # Mappuing German -> English
33     "Gesperrte Server"              : "Suspended servers",
34     "Gefilterte Medien"             : "Filtered media",
35     "Stummgeschaltete Server"       : "Silenced servers",
36     # Japanese -> English
37     "停止済みのサーバー"            : "Suspended servers",
38     "制限中のサーバー"              : "Limited servers",
39     "メディアを拒否しているサーバー": "Filtered media",
40     "サイレンス済みのサーバー"      : "Silenced servers",
41     # ??? -> English
42     "שרתים מושעים"                  : "Suspended servers",
43     "מדיה מסוננת"                   : "Filtered media",
44     "שרתים מוגבלים"                 : "Silenced servers",
45     # French -> English
46     "Serveurs suspendus"            : "Suspended servers",
47     "Médias filtrés"                : "Filtered media",
48     "Serveurs limités"              : "Limited servers",
49     "Serveurs modérés"              : "Limited servers",
50 }
51
52 def fetch_blocks_from_about(domain: str) -> dict:
53     # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
54     if type(domain) != str:
55         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
56     elif domain == "":
57         raise ValueError(f"Parameter 'domain' is empty")
58
59     # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain)
60     blocklist = {
61         "Suspended servers": [],
62         "Filtered media"   : [],
63         "Limited servers"  : [],
64         "Silenced servers" : [],
65     }
66
67     try:
68         doc = bs4.BeautifulSoup(
69             fba.get_response(domain, "/about/more", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
70             "html.parser",
71         )
72     except BaseException as e:
73         print("ERROR: Cannot fetch from domain:", domain, e)
74         fba.update_last_error(domain, e)
75         return {}
76
77     for header in doc.find_all("h3"):
78         header_text = fba.tidyup_reason(header.text)
79
80         # DEBUG: print(f"DEBUG: header_text='{header_text}'")
81         if header_text in language_mapping:
82             # DEBUG: print(f"DEBUG: header_text='{header_text}'")
83             header_text = language_mapping[header_text]
84         else:
85             print(f"WARNING: header_text='{header_text}' not found in language mapping table")
86
87         if header_text in blocklist or header_text.lower() in blocklist:
88             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
89             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
90                 blocklist[header_text].append(
91                     {
92                         "domain": fba.tidyup_domain(line.find("span").text),
93                         "hash"  : fba.tidyup_domain(line.find("span")["title"][9:]),
94                         "reason": fba.tidyup_domain(line.find_all("td")[1].text),
95                     }
96                 )
97         else:
98             print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}")
99
100     # DEBUG: print("DEBUG: Returning blocklist for domain:", domain)
101     return {
102         "reject"        : blocklist["Suspended servers"],
103         "media_removal" : blocklist["Filtered media"],
104         "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
105     }
106
107 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
108     print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
109     if type(domain) != str:
110         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
111     elif domain == "":
112         raise ValueError(f"Parameter 'domain' is empty")
113     elif type(origin) != str and origin != None:
114         raise ValueError(f"Parameter origin[]={type(origin)} is not 'str'")
115     elif origin == "":
116         raise ValueError(f"Parameter 'origin' is empty")
117     elif type(nodeinfo_url) != str:
118         raise ValueError(f"Parameter nodeinfo_url[]={type(nodeinfo_url)} is not 'str'")
119     elif nodeinfo_url == "":
120         raise ValueError(f"Parameter 'nodeinfo_url' is empty")
121
122     try:
123         # json endpoint for newer mastodongs
124         blockdict = list()
125         try:
126             json = {
127                 "reject"        : [],
128                 "media_removal" : [],
129                 "followers_only": [],
130                 "report_removal": []
131             }
132
133             # handling CSRF, I've saw at least one server requiring it to access the endpoint
134             # DEBUG: print("DEBUG: Fetching meta:", domain)
135             meta = bs4.BeautifulSoup(
136                 fba.get_response(domain, "/", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
137                 "html.parser",
138             )
139             try:
140                 csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
141                 # DEBUG: print("DEBUG: Adding CSRF token:", domain, csrf)
142                 reqheaders = {**fba.api_headers, **{"X-CSRF-Token": csrf}}
143             except BaseException as e:
144                 # DEBUG: print("DEBUG: No CSRF token found, using normal headers:", domain, e)
145                 reqheaders = fba.api_headers
146
147             # DEBUG: print("DEBUG: Querying API domain_blocks:", domain)
148             blocklist = fba.get_response(domain, "/api/v1/instance/domain_blocks", reqheaders, (config.get("connection_timeout"), config.get("read_timeout"))).json()
149
150             print(f"INFO: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon' ...")
151             for block in blocklist:
152                 entry = {
153                     'domain': block['domain'],
154                     'hash'  : block['digest'],
155                     'reason': block['comment']
156                 }
157
158                 # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
159                 if block['severity'] == 'suspend':
160                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
161                     json['reject'].append(entry)
162                 elif block['severity'] == 'silence':
163                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
164                     json['followers_only'].append(entry)
165                 elif block['severity'] == 'reject_media':
166                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
167                     json['media_removal'].append(entry)
168                 elif block['severity'] == 'reject_reports':
169                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
170                     json['report_removal'].append(entry)
171                 else:
172                     print("WARNING: Unknown severity:", block['severity'], block['domain'])
173
174         except BaseException as e:
175             # DEBUG: print(f"DEBUG: Failed, trying mastodon-specific fetches: domain='{domain}',exception[{type(e)}]={str(e)}")
176             json = fetch_blocks_from_about(domain)
177
178         print(f"INFO: Checking {len(json.items())} entries from domain='{domain}',software='mastodon' ...")
179         for block_level, blocklist in json.items():
180             # DEBUG: print("DEBUG: domain,block_level,blocklist():", domain, block_level, len(blocklist))
181             block_level = fba.tidyup_domain(block_level)
182
183             # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
184             if block_level == "":
185                 print("WARNING: block_level is empty, domain:", domain)
186                 continue
187
188             # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon',block_level='{block_level}' ...")
189             for block in blocklist:
190                 blocked, blocked_hash, reason = block.values()
191                 # DEBUG: print("DEBUG: blocked,hash,reason:", blocked, blocked_hash, reason)
192                 blocked = fba.tidyup_domain(blocked)
193                 # DEBUG: print("DEBUG: AFTER-blocked:", blocked)
194
195                 if blocked == "":
196                     print("WARNING: blocked is empty:", domain)
197                     continue
198                 elif blacklist.is_blacklisted(blocked):
199                     # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
200                     continue
201                 elif blocked.count("*") > 0:
202                     # Doing the hash search for instance names as well to tidy up DB
203                     fba.cursor.execute(
204                         "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash]
205                     )
206                     searchres = fba.cursor.fetchone()
207
208                     if searchres == None:
209                         print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
210                         continue
211
212                     # DEBUG: print("DEBUG: Updating domain: ", searchres[0])
213                     blocked = searchres[0]
214                     origin = searchres[1]
215                     nodeinfo_url = searchres[2]
216
217                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
218                     if not validators.domain(blocked):
219                         print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
220                         continue
221                     elif not instances.is_registered(blocked):
222                         # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
223                         instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
224                 elif not validators.domain(blocked):
225                     print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
226                     continue
227
228                 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
229                 if not validators.domain(blocked):
230                     print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
231                     continue
232                 elif not instances.is_registered(blocked):
233                     # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
234                     instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
235
236                 blocking = blocked if blocked.count("*") <= 1 else blocked_hash
237                 # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'")
238
239                 if not blocks.is_instance_blocked(domain, blocked, block_level):
240                     # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
241                     blocks.add_instance(domain, blocking, reason, block_level)
242
243                     if block_level == "reject":
244                         blockdict.append({
245                             "blocked": blocked,
246                             "reason" : reason
247                         })
248                 else:
249                     # DEBUG: print(f"DEBUG: Updating block last seen and reason for domain='{domain}',blocking='{blocking}' ...")
250                     blocks.update_last_seen(domain, blocking, block_level)
251                     blocks.update_reason(reason, domain, blocking, block_level)
252
253         # DEBUG: print("DEBUG: Committing changes ...")
254         fba.connection.commit()
255     except Exception as e:
256         print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(e)}]:'{str(e)}'")
257
258     # DEBUG: print("DEBUG: EXIT!")