]> git.mxchange.org Git - fba.git/blob - mastodon.py
4752ca1792d41815b74b89a39262e155f7b152ff
[fba.git] / mastodon.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import bs4
18 import inspect
19 import validators
20
21 from fba import blacklist
22 from fba import blocks
23 from fba import config
24 from fba import fba
25 from fba import instances
26
27 language_mapping = {
28     # English -> English
29     "Silenced instances"            : "Silenced servers",
30     "Suspended instances"           : "Suspended servers",
31     "Limited instances"             : "Limited servers",
32     "Filtered media"                : "Filtered media",
33     # Mappuing German -> English
34     "Gesperrte Server"              : "Suspended servers",
35     "Gefilterte Medien"             : "Filtered media",
36     "Stummgeschaltete Server"       : "Silenced servers",
37     # Japanese -> English
38     "停止済みのサーバー"            : "Suspended servers",
39     "制限中のサーバー"              : "Limited servers",
40     "メディアを拒否しているサーバー": "Filtered media",
41     "サイレンス済みのサーバー"      : "Silenced servers",
42     # ??? -> English
43     "שרתים מושעים"                  : "Suspended servers",
44     "מדיה מסוננת"                   : "Filtered media",
45     "שרתים מוגבלים"                 : "Silenced servers",
46     # French -> English
47     "Serveurs suspendus"            : "Suspended servers",
48     "Médias filtrés"                : "Filtered media",
49     "Serveurs limités"              : "Limited servers",
50     "Serveurs modérés"              : "Limited servers",
51 }
52
53 def fetch_blocks_from_about(domain: str) -> dict:
54     # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
55     if type(domain) != str:
56         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
57     elif domain == "":
58         raise ValueError(f"Parameter 'domain' is empty")
59
60     # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain)
61     blocklist = {
62         "Suspended servers": [],
63         "Filtered media"   : [],
64         "Limited servers"  : [],
65         "Silenced servers" : [],
66     }
67
68     try:
69         doc = bs4.BeautifulSoup(
70             fba.get_response(domain, "/about/more", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
71             "html.parser",
72         )
73     except BaseException as e:
74         print("ERROR: Cannot fetch from domain:", domain, e)
75         instances.update_last_error(domain, e)
76         return {}
77
78     for header in doc.find_all("h3"):
79         header_text = fba.tidyup_reason(header.text)
80
81         # DEBUG: print(f"DEBUG: header_text='{header_text}'")
82         if header_text in language_mapping:
83             # DEBUG: print(f"DEBUG: header_text='{header_text}'")
84             header_text = language_mapping[header_text]
85         else:
86             print(f"WARNING: header_text='{header_text}' not found in language mapping table")
87
88         if header_text in blocklist or header_text.lower() in blocklist:
89             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
90             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
91                 blocklist[header_text].append(
92                     {
93                         "domain": fba.tidyup_domain(line.find("span").text),
94                         "hash"  : fba.tidyup_domain(line.find("span")["title"][9:]),
95                         "reason": fba.tidyup_domain(line.find_all("td")[1].text),
96                     }
97                 )
98         else:
99             print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}")
100
101     # DEBUG: print("DEBUG: Returning blocklist for domain:", domain)
102     return {
103         "reject"        : blocklist["Suspended servers"],
104         "media_removal" : blocklist["Filtered media"],
105         "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
106     }
107
108 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
109     # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
110     if type(domain) != str:
111         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
112     elif domain == "":
113         raise ValueError(f"Parameter 'domain' is empty")
114     elif type(origin) != str and origin != None:
115         raise ValueError(f"Parameter origin[]={type(origin)} is not 'str'")
116     elif origin == "":
117         raise ValueError(f"Parameter 'origin' is empty")
118     elif type(nodeinfo_url) != str:
119         raise ValueError(f"Parameter nodeinfo_url[]={type(nodeinfo_url)} is not 'str'")
120     elif nodeinfo_url == "":
121         raise ValueError(f"Parameter 'nodeinfo_url' is empty")
122
123     try:
124         # json endpoint for newer mastodongs
125         blockdict = list()
126         try:
127             json = {
128                 "reject"        : [],
129                 "media_removal" : [],
130                 "followers_only": [],
131                 "report_removal": [],
132             }
133
134             # handling CSRF, I've saw at least one server requiring it to access the endpoint
135             # DEBUG: print("DEBUG: Fetching meta:", domain)
136             meta = bs4.BeautifulSoup(
137                 fba.get_response(domain, "/", fba.headers, (config.get("connection_timeout"), config.get("read_timeout"))).text,
138                 "html.parser",
139             )
140             try:
141                 csrf = meta.find("meta", attrs={"name": "csrf-token"})["content"]
142                 # DEBUG: print("DEBUG: Adding CSRF token:", domain, csrf)
143                 reqheaders = {**fba.api_headers, **{"X-CSRF-Token": csrf}}
144             except BaseException as e:
145                 # DEBUG: print("DEBUG: No CSRF token found, using normal headers:", domain, e)
146                 reqheaders = fba.api_headers
147
148             # DEBUG: print("DEBUG: Querying API domain_blocks:", domain)
149             blocklist = fba.get_response(domain, "/api/v1/instance/domain_blocks", reqheaders, (config.get("connection_timeout"), config.get("read_timeout"))).json()
150
151             print(f"INFO: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon' ...")
152             for block in blocklist:
153                 entry = {
154                     'domain': block['domain'],
155                     'hash'  : block['digest'],
156                     'reason': block['comment']
157                 }
158
159                 # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
160                 if block['severity'] == 'suspend':
161                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
162                     json['reject'].append(entry)
163                 elif block['severity'] == 'silence':
164                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
165                     json['followers_only'].append(entry)
166                 elif block['severity'] == 'reject_media':
167                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
168                     json['media_removal'].append(entry)
169                 elif block['severity'] == 'reject_reports':
170                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
171                     json['report_removal'].append(entry)
172                 else:
173                     print("WARNING: Unknown severity:", block['severity'], block['domain'])
174
175         except BaseException as e:
176             # DEBUG: print(f"DEBUG: Failed, trying mastodon-specific fetches: domain='{domain}',exception[{type(e)}]={str(e)}")
177             json = fetch_blocks_from_about(domain)
178
179         print(f"INFO: Checking {len(json.items())} entries from domain='{domain}',software='mastodon' ...")
180         for block_level, blocklist in json.items():
181             # DEBUG: print("DEBUG: domain,block_level,blocklist():", domain, block_level, len(blocklist))
182             block_level = fba.tidyup_domain(block_level)
183
184             # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
185             if block_level == "":
186                 print("WARNING: block_level is empty, domain:", domain)
187                 continue
188
189             # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon',block_level='{block_level}' ...")
190             for block in blocklist:
191                 # DEBUG: print(f"DEBUG: block[]='{type(block)}'")
192                 blocked, blocked_hash, reason = block.values()
193                 # DEBUG: print(f"DEBUG: blocked='{blocked}',blocked_hash='{blocked_hash}',reason='{reason}':")
194                 blocked = fba.tidyup_domain(blocked)
195                 reason  = fba.tidyup_reason(reason)
196                 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
197
198                 if blocked == "":
199                     print("WARNING: blocked is empty:", domain)
200                     continue
201                 elif blacklist.is_blacklisted(blocked):
202                     # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
203                     continue
204                 elif blocked.count("*") > 0:
205                     # Doing the hash search for instance names as well to tidy up DB
206                     fba.cursor.execute(
207                         "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash]
208                     )
209                     searchres = fba.cursor.fetchone()
210
211                     if searchres == None:
212                         print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
213                         continue
214
215                     # DEBUG: print("DEBUG: Updating domain: ", searchres[0])
216                     blocked = searchres[0]
217                     origin = searchres[1]
218                     nodeinfo_url = searchres[2]
219
220                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
221                     if not validators.domain(blocked):
222                         print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
223                         continue
224                     elif not instances.is_registered(blocked):
225                         # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
226                         instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
227                 elif not validators.domain(blocked):
228                     print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
229                     continue
230
231                 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
232                 if not validators.domain(blocked):
233                     print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
234                     continue
235                 elif not instances.is_registered(blocked):
236                     # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
237                     instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
238
239                 blocking = blocked if blocked.count("*") <= 1 else blocked_hash
240                 # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'")
241
242                 if not blocks.is_instance_blocked(domain, blocked, block_level):
243                     # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
244                     blocks.add_instance(domain, blocking, reason, block_level)
245
246                     if block_level == "reject":
247                         blockdict.append({
248                             "blocked": blocked,
249                             "reason" : reason
250                         })
251                 else:
252                     # DEBUG: print(f"DEBUG: Updating block last seen and reason for domain='{domain}',blocking='{blocking}' ...")
253                     blocks.update_last_seen(domain, blocking, block_level)
254                     blocks.update_reason(reason, domain, blocking, block_level)
255
256         # DEBUG: print("DEBUG: Committing changes ...")
257         fba.connection.commit()
258     except Exception as e:
259         print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(e)}]:'{str(e)}'")
260
261     # DEBUG: print("DEBUG: EXIT!")