]> git.mxchange.org Git - fba.git/blob - fba/networks/mastodon.py
Continued:
[fba.git] / fba / networks / mastodon.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import inspect
18
19 import bs4
20 import requests
21 import validators
22
23 from fba import blacklist
24 from fba import blocks
25 from fba import config
26 from fba import csrf
27 from fba import fba
28 from fba import instances
29 from fba import network
30
31 from fba.helpers import tidyup
32
33 language_mapping = {
34     # English -> English
35     "Silenced instances"            : "Silenced servers",
36     "Suspended instances"           : "Suspended servers",
37     "Limited instances"             : "Limited servers",
38     "Filtered media"                : "Filtered media",
39     # Mappuing German -> English
40     "Gesperrte Server"              : "Suspended servers",
41     "Gefilterte Medien"             : "Filtered media",
42     "Stummgeschaltete Server"       : "Silenced servers",
43     # Japanese -> English
44     "停止済みのサーバー"            : "Suspended servers",
45     "制限中のサーバー"              : "Limited servers",
46     "メディアを拒否しているサーバー": "Filtered media",
47     "サイレンス済みのサーバー"      : "Silenced servers",
48     # ??? -> English
49     "שרתים מושעים"                  : "Suspended servers",
50     "מדיה מסוננת"                   : "Filtered media",
51     "שרתים מוגבלים"                 : "Silenced servers",
52     # French -> English
53     "Serveurs suspendus"            : "Suspended servers",
54     "Médias filtrés"                : "Filtered media",
55     "Serveurs limités"              : "Limited servers",
56     "Serveurs modérés"              : "Limited servers",
57 }
58
59 def fetch_blocks_from_about(domain: str) -> dict:
60     # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
61     if not isinstance(domain, str):
62         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
63     elif domain == "":
64         raise ValueError("Parameter 'domain' is empty")
65
66     # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain)
67     blocklist = {
68         "Suspended servers": [],
69         "Filtered media"   : [],
70         "Limited servers"  : [],
71         "Silenced servers" : [],
72     }
73
74     try:
75         doc = bs4.BeautifulSoup(
76             network.fetch_response(
77                 domain,
78                 "/about/more",
79                 network.web_headers,
80                 (config.get("connection_timeout"), config.get("read_timeout"))
81             ).text,
82             "html.parser",
83         )
84     except BaseException as exception:
85         print("ERROR: Cannot fetch from domain:", domain, exception)
86         instances.update_last_error(domain, exception)
87         return {}
88
89     for header in doc.find_all("h3"):
90         header_text = tidyup.reason(header.text)
91
92         # DEBUG: print(f"DEBUG: header_text='{header_text}'")
93         if header_text in language_mapping:
94             # DEBUG: print(f"DEBUG: header_text='{header_text}'")
95             header_text = language_mapping[header_text]
96         else:
97             print(f"WARNING: header_text='{header_text}' not found in language mapping table")
98
99         if header_text in blocklist or header_text.lower() in blocklist:
100             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
101             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
102                 blocklist[header_text].append(
103                     {
104                         "domain": tidyup.domain(line.find("span").text),
105                         "hash"  : tidyup.domain(line.find("span")["title"][9:]),
106                         "reason": tidyup.reason(line.find_all("td")[1].text),
107                     }
108                 )
109         else:
110             print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}")
111
112     # DEBUG: print("DEBUG: Returning blocklist for domain:", domain)
113     return {
114         "reject"        : blocklist["Suspended servers"],
115         "media_removal" : blocklist["Filtered media"],
116         "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
117     }
118
119 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
120     # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
121     if not isinstance(domain, str):
122         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
123     elif domain == "":
124         raise ValueError("Parameter 'domain' is empty")
125     elif not isinstance(origin, str) and origin is not None:
126         raise ValueError(f"Parameter origin[]={type(origin)} is not 'str'")
127     elif origin == "":
128         raise ValueError("Parameter 'origin' is empty")
129     elif not isinstance(nodeinfo_url, str):
130         raise ValueError(f"Parameter nodeinfo_url[]={type(nodeinfo_url)} is not 'str'")
131     elif nodeinfo_url == "":
132         raise ValueError("Parameter 'nodeinfo_url' is empty")
133
134     headers = tuple()
135
136     # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
137     try:
138        headers = csrf.determine(domain, dict())
139     except network.exceptions as exception:
140         print(f"WARNING: Exception '{type(exception)}' during checking CSRF - EXIT!")
141         return
142
143     try:
144         # json endpoint for newer mastodongs
145         found_blocks = list()
146         blocklist = list()
147
148         rows = {
149             "reject"        : [],
150             "media_removal" : [],
151             "followers_only": [],
152             "report_removal": [],
153         }
154
155         # DEBUG: print("DEBUG: Querying API domain_blocks:", domain)
156         data = network.get_json_api(
157             domain,
158             "/api/v1/instance/domain_blocks",
159             headers,
160             (config.get("connection_timeout"), config.get("read_timeout"))
161         )
162
163         if "error_message" in data:
164             print(f"WARNING: Was not able to fetch domain_blocks from domain='{domain}': status_code='{data['status_code']}',error_message='{data['error_message']}'")
165             instances.update_last_error(domain, data)
166         else:
167             # Getting blocklist
168             blocklist = data["json"]
169
170         if len(blocklist) > 0:
171             print(f"INFO: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon' ...")
172             for block in blocklist:
173                 # Map block -> entry
174                 entry = {
175                     "domain": block["domain"],
176                     "hash"  : block["digest"],
177                     "reason": block["comment"]
178                 }
179
180                 # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
181                 if block['severity'] == 'suspend':
182                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
183                     rows['reject'].append(entry)
184                 elif block['severity'] == 'silence':
185                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
186                     rows['followers_only'].append(entry)
187                 elif block['severity'] == 'reject_media':
188                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
189                     rows['media_removal'].append(entry)
190                 elif block['severity'] == 'reject_reports':
191                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
192                     rows['report_removal'].append(entry)
193                 else:
194                     print("WARNING: Unknown severity:", block['severity'], block['domain'])
195         else:
196             # DEBUG: print(f"DEBUG: domain='{domain}' has returned zero rows, trying /about/more page ...")
197             rows = fetch_blocks_from_about(domain)
198
199         print(f"INFO: Checking {len(rows.items())} entries from domain='{domain}',software='mastodon' ...")
200         for block_level, blocklist in rows.items():
201             # DEBUG: print("DEBUG: domain,block_level,blocklist():", domain, block_level, len(blocklist))
202             block_level = tidyup.domain(block_level)
203
204             # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
205             if block_level == "":
206                 print("WARNING: block_level is empty, domain:", domain)
207                 continue
208
209             # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon',block_level='{block_level}' ...")
210             for block in blocklist:
211                 # DEBUG: print(f"DEBUG: block[]='{type(block)}'")
212                 blocked, blocked_hash, reason = block.values()
213                 # DEBUG: print(f"DEBUG: blocked='{blocked}',blocked_hash='{blocked_hash}',reason='{reason}':")
214                 blocked = tidyup.domain(blocked)
215                 reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
216                 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
217
218                 if blocked == "":
219                     print("WARNING: blocked is empty:", domain)
220                     continue
221                 elif blacklist.is_blacklisted(blocked):
222                     # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
223                     continue
224                 elif blocked.count("*") > 0:
225                     # Doing the hash search for instance names as well to tidy up DB
226                     fba.cursor.execute(
227                         "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash]
228                     )
229                     searchres = fba.cursor.fetchone()
230
231                     # DEBUG: print(f"DEBUG: searchres[]='{type(searchres)}'")
232                     if searchres is None:
233                         print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
234                         continue
235
236                     # DEBUG: print("DEBUG: Updating domain: ", searchres[0])
237                     blocked      = searchres[0]
238                     origin       = searchres[1]
239                     nodeinfo_url = searchres[2]
240
241                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
242                     if not validators.domain(blocked):
243                         print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
244                         continue
245                     elif not instances.is_registered(blocked):
246                         # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
247                         instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
248                 elif not validators.domain(blocked):
249                     print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
250                     continue
251
252                 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
253                 if not validators.domain(blocked):
254                     print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
255                     continue
256                 elif not instances.is_registered(blocked):
257                     # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
258                     instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
259
260                 blocking = blocked if blocked.count("*") <= 1 else blocked_hash
261                 # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'")
262
263                 if not blocks.is_instance_blocked(domain, blocked, block_level):
264                     # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
265                     blocks.add_instance(domain, blocking, reason, block_level)
266
267                     if block_level == "reject":
268                         found_blocks.append({
269                             "blocked": blocked,
270                             "reason" : reason
271                         })
272                 else:
273                     # DEBUG: print(f"DEBUG: Updating block last seen and reason for domain='{domain}',blocking='{blocking}' ...")
274                     blocks.update_last_seen(domain, blocking, block_level)
275                     blocks.update_reason(reason, domain, blocking, block_level)
276
277         # DEBUG: print("DEBUG: Committing changes ...")
278         fba.connection.commit()
279     except network.exceptions as exception:
280         print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
281
282     # DEBUG: print("DEBUG: EXIT!")