]> git.mxchange.org Git - fba.git/blob - fba/networks/mastodon.py
Continued:
[fba.git] / fba / networks / mastodon.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import inspect
18
19 import bs4
20 import requests
21 import validators
22
23 from fba import blacklist
24 from fba import blocks
25 from fba import config
26 from fba import csrf
27 from fba import fba
28 from fba import instances
29 from fba import network
30
31 from fba.helpers import tidyup
32
33 language_mapping = {
34     # English -> English
35     "Silenced instances"            : "Silenced servers",
36     "Suspended instances"           : "Suspended servers",
37     "Limited instances"             : "Limited servers",
38     "Filtered media"                : "Filtered media",
39     # Mappuing German -> English
40     "Gesperrte Server"              : "Suspended servers",
41     "Gefilterte Medien"             : "Filtered media",
42     "Stummgeschaltete Server"       : "Silenced servers",
43     # Japanese -> English
44     "停止済みのサーバー"            : "Suspended servers",
45     "制限中のサーバー"              : "Limited servers",
46     "メディアを拒否しているサーバー": "Filtered media",
47     "サイレンス済みのサーバー"      : "Silenced servers",
48     # ??? -> English
49     "שרתים מושעים"                  : "Suspended servers",
50     "מדיה מסוננת"                   : "Filtered media",
51     "שרתים מוגבלים"                 : "Silenced servers",
52     # French -> English
53     "Serveurs suspendus"            : "Suspended servers",
54     "Médias filtrés"                : "Filtered media",
55     "Serveurs limités"              : "Limited servers",
56     "Serveurs modérés"              : "Limited servers",
57 }
58
59 def fetch_blocks_from_about(domain: str) -> dict:
60     # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
61     if not isinstance(domain, str):
62         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
63     elif domain == "":
64         raise ValueError("Parameter 'domain' is empty")
65
66     # DEBUG: print("DEBUG: Fetching mastodon blocks from domain:", domain)
67     blocklist = {
68         "Suspended servers": [],
69         "Filtered media"   : [],
70         "Limited servers"  : [],
71         "Silenced servers" : [],
72     }
73
74     try:
75         doc = bs4.BeautifulSoup(
76             network.fetch_response(
77                 domain,
78                 "/about/more",
79                 network.web_headers,
80                 (config.get("connection_timeout"), config.get("read_timeout"))
81             ).text,
82             "html.parser",
83         )
84     except BaseException as exception:
85         print("ERROR: Cannot fetch from domain:", domain, exception)
86         instances.update_last_error(domain, exception)
87         return {}
88
89     for header in doc.find_all("h3"):
90         header_text = tidyup.reason(header.text)
91
92         # DEBUG: print(f"DEBUG: header_text='{header_text}'")
93         if header_text in language_mapping:
94             # DEBUG: print(f"DEBUG: header_text='{header_text}'")
95             header_text = language_mapping[header_text]
96         else:
97             print(f"WARNING: header_text='{header_text}' not found in language mapping table")
98
99         if header_text in blocklist or header_text.lower() in blocklist:
100             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
101             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
102                 blocklist[header_text].append(
103                     {
104                         "domain": tidyup.domain(line.find("span").text),
105                         "hash"  : tidyup.domain(line.find("span")["title"][9:]),
106                         "reason": tidyup.reason(line.find_all("td")[1].text),
107                     }
108                 )
109         else:
110             print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}")
111
112     # DEBUG: print("DEBUG: Returning blocklist for domain:", domain)
113     return {
114         "reject"        : blocklist["Suspended servers"],
115         "media_removal" : blocklist["Filtered media"],
116         "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
117     }
118
119 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
120     # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
121     if not isinstance(domain, str):
122         raise ValueError(f"Parameter domain[]={type(domain)} is not 'str'")
123     elif domain == "":
124         raise ValueError("Parameter 'domain' is empty")
125     elif not isinstance(origin, str) and origin is not None:
126         raise ValueError(f"Parameter origin[]={type(origin)} is not 'str'")
127     elif origin == "":
128         raise ValueError("Parameter 'origin' is empty")
129     elif not isinstance(nodeinfo_url, str):
130         raise ValueError(f"Parameter nodeinfo_url[]={type(nodeinfo_url)} is not 'str'")
131     elif nodeinfo_url == "":
132         raise ValueError("Parameter 'nodeinfo_url' is empty")
133
134     print(f"DEBUG: Checking CSRF for domain='{domain}'")
135     headers = csrf.determine(domain, dict())
136
137     try:
138         # json endpoint for newer mastodongs
139         found_blocks = list()
140         blocklist = list()
141
142         rows = {
143             "reject"        : [],
144             "media_removal" : [],
145             "followers_only": [],
146             "report_removal": [],
147         }
148
149         # DEBUG: print("DEBUG: Querying API domain_blocks:", domain)
150         data = network.get_json_api(
151             domain,
152             "/api/v1/instance/domain_blocks",
153             headers,
154             (config.get("connection_timeout"), config.get("read_timeout"))
155         )
156
157         if "error_message" in data:
158             print(f"WARNING: Was not able to fetch domain_blocks from domain='{domain}': status_code='{data['status_code']}',error_message='{data['error_message']}'")
159             instances.update_last_error(domain, data)
160         else:
161             # Getting blocklist
162             blocklist = data["json"]
163
164         if len(blocklist) > 0:
165             print(f"INFO: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon' ...")
166             for block in blocklist:
167                 # Map block -> entry
168                 entry = {
169                     "domain": block["domain"],
170                     "hash"  : block["digest"],
171                     "reason": block["comment"]
172                 }
173
174                 # DEBUG: print("DEBUG: severity,domain,hash,comment:", block['severity'], block['domain'], block['digest'], block['comment'])
175                 if block['severity'] == 'suspend':
176                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
177                     rows['reject'].append(entry)
178                 elif block['severity'] == 'silence':
179                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
180                     rows['followers_only'].append(entry)
181                 elif block['severity'] == 'reject_media':
182                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
183                     rows['media_removal'].append(entry)
184                 elif block['severity'] == 'reject_reports':
185                     # DEBUG: print(f"DEBUG: Adding entry='{entry}' with severity='{block['severity']}' ...")
186                     rows['report_removal'].append(entry)
187                 else:
188                     print("WARNING: Unknown severity:", block['severity'], block['domain'])
189         else:
190             # DEBUG: print(f"DEBUG: domain='{domain}' has returned zero rows, trying /about/more page ...")
191             rows = fetch_blocks_from_about(domain)
192
193         print(f"INFO: Checking {len(rows.items())} entries from domain='{domain}',software='mastodon' ...")
194         for block_level, blocklist in rows.items():
195             # DEBUG: print("DEBUG: domain,block_level,blocklist():", domain, block_level, len(blocklist))
196             block_level = tidyup.domain(block_level)
197
198             # DEBUG: print("DEBUG: AFTER-block_level:", block_level)
199             if block_level == "":
200                 print("WARNING: block_level is empty, domain:", domain)
201                 continue
202
203             # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',software='mastodon',block_level='{block_level}' ...")
204             for block in blocklist:
205                 # DEBUG: print(f"DEBUG: block[]='{type(block)}'")
206                 blocked, blocked_hash, reason = block.values()
207                 # DEBUG: print(f"DEBUG: blocked='{blocked}',blocked_hash='{blocked_hash}',reason='{reason}':")
208                 blocked = tidyup.domain(blocked)
209                 reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
210                 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
211
212                 if blocked == "":
213                     print("WARNING: blocked is empty:", domain)
214                     continue
215                 elif blacklist.is_blacklisted(blocked):
216                     # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
217                     continue
218                 elif blocked.count("*") > 0:
219                     # Doing the hash search for instance names as well to tidy up DB
220                     fba.cursor.execute(
221                         "SELECT domain, origin, nodeinfo_url FROM instances WHERE hash = ? LIMIT 1", [blocked_hash]
222                     )
223                     searchres = fba.cursor.fetchone()
224
225                     print(f"DEBUG: searchres[]='{type(searchres)}'")
226                     if searchres is None:
227                         print(f"WARNING: Cannot deobsfucate blocked='{blocked}',blocked_hash='{blocked_hash}' - SKIPPED!")
228                         continue
229
230                     # DEBUG: print("DEBUG: Updating domain: ", searchres[0])
231                     blocked      = searchres[0]
232                     origin       = searchres[1]
233                     nodeinfo_url = searchres[2]
234
235                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
236                     if not validators.domain(blocked):
237                         print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
238                         continue
239                     elif not instances.is_registered(blocked):
240                         # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
241                         instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
242                 elif not validators.domain(blocked):
243                     print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
244                     continue
245
246                 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
247                 if not validators.domain(blocked):
248                     print(f"WARNING: blocked='{blocked}',software='mastodon' is not a valid domain name - skipped!")
249                     continue
250                 elif not instances.is_registered(blocked):
251                     # DEBUG: print("DEBUG: Hash wasn't found, adding:", blocked, domain)
252                     instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
253
254                 blocking = blocked if blocked.count("*") <= 1 else blocked_hash
255                 # DEBUG: print(f"DEBUG: blocking='{blocking}',blocked='{blocked}',blocked_hash='{blocked_hash}'")
256
257                 if not blocks.is_instance_blocked(domain, blocked, block_level):
258                     # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
259                     blocks.add_instance(domain, blocking, reason, block_level)
260
261                     if block_level == "reject":
262                         found_blocks.append({
263                             "blocked": blocked,
264                             "reason" : reason
265                         })
266                 else:
267                     # DEBUG: print(f"DEBUG: Updating block last seen and reason for domain='{domain}',blocking='{blocking}' ...")
268                     blocks.update_last_seen(domain, blocking, block_level)
269                     blocks.update_reason(reason, domain, blocking, block_level)
270
271         # DEBUG: print("DEBUG: Committing changes ...")
272         fba.connection.commit()
273     except network.exceptions as exception:
274         print(f"ERROR: domain='{domain}',software='mastodon',exception[{type(exception)}]:'{str(exception)}'")
275
276     # DEBUG: print("DEBUG: EXIT!")