]> git.mxchange.org Git - fba.git/blob - fba/networks/mastodon.py
686ae9d67f21f502da459b6cd9f63dfc0db2ee8d
[fba.git] / fba / networks / mastodon.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import inspect
18 import logging
19
20 import bs4
21
22 from fba import csrf
23 from fba import database
24 from fba import utils
25
26 from fba.helpers import blacklist
27 from fba.helpers import config
28 from fba.helpers import domain as domain_helper
29 from fba.helpers import tidyup
30
31 from fba.http import network
32
33 from fba.models import blocks
34 from fba.models import instances
35
36 logging.basicConfig(level=logging.INFO)
37 logger = logging.getLogger(__name__)
38
39 # Language mapping X -> English
40 language_mapping = {
41     # English -> English
42     "Silenced instances"            : "Silenced servers",
43     "Suspended instances"           : "Suspended servers",
44     "Limited instances"             : "Limited servers",
45     "Filtered media"                : "Filtered media",
46     # Mappuing German -> English
47     "Gesperrte Server"              : "Suspended servers",
48     "Gefilterte Medien"             : "Filtered media",
49     "Stummgeschaltete Server"       : "Silenced servers",
50     # Japanese -> English
51     "停止済みのサーバー"            : "Suspended servers",
52     "制限中のサーバー"              : "Limited servers",
53     "メディアを拒否しているサーバー": "Filtered media",
54     "サイレンス済みのサーバー"      : "Silenced servers",
55     # ??? -> English
56     "שרתים מושעים"                  : "Suspended servers",
57     "מדיה מסוננת"                   : "Filtered media",
58     "שרתים מוגבלים"                 : "Silenced servers",
59     # French -> English
60     "Serveurs suspendus"            : "Suspended servers",
61     "Médias filtrés"                : "Filtered media",
62     "Serveurs limités"              : "Limited servers",
63     "Serveurs modérés"              : "Limited servers",
64 }
65
66 def fetch_blocks_from_about(domain: str) -> dict:
67     logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
68     domain_helper.raise_on(domain)
69
70     logger.debug("Fetching mastodon blocks from domain:", domain)
71     doc = None
72     for path in ["/about/more", "/about"]:
73         try:
74             logger.debug(f"Fetching path='{path}' from domain='{domain}' ...")
75             doc = bs4.BeautifulSoup(
76                 network.fetch_response(
77                     domain,
78                     path,
79                     network.web_headers,
80                     (config.get("connection_timeout"), config.get("read_timeout"))
81                 ).text,
82                 "html.parser",
83             )
84
85             if len(doc.find_all("h3")) > 0:
86                 logger.debug(f"path='{path}' had some headlines - BREAK!")
87                 break
88
89         except network.exceptions as exception:
90             logger.warning("Cannot fetch from domain='%s',exception='%s'", domain, type(exception))
91             instances.set_last_error(domain, exception)
92             break
93
94     blocklist = {
95         "Suspended servers": [],
96         "Filtered media"   : [],
97         "Limited servers"  : [],
98         "Silenced servers" : [],
99     }
100
101     logger.debug("doc[]='%s'", type(doc))
102     if doc is None:
103         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
104         return blocklist
105
106     for header in doc.find_all("h3"):
107         header_text = tidyup.reason(header.text)
108
109         logger.debug("header_text='%s'", header_text)
110         if header_text in language_mapping:
111             logger.debug("Translating header_text='%s' ...", header_text)
112             header_text = language_mapping[header_text]
113         else:
114             logger.warning("header_text='%s' not found in language mapping table", header_text)
115
116         if header_text in blocklist or header_text.lower() in blocklist:
117             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
118             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
119                 blocklist[header_text].append({
120                     "domain": tidyup.domain(line.find("span").text),
121                     "hash"  : tidyup.domain(line.find("span")["title"][9:]),
122                     "reason": tidyup.reason(line.find_all("td")[1].text),
123                 })
124         else:
125             logger.warning("header_text='%s' not found in blocklist()=%d", header_text, len(blocklist))
126
127     logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)
128     return {
129         "reject"        : blocklist["Suspended servers"],
130         "media_removal" : blocklist["Filtered media"],
131         "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
132     }
133
134 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
135     logger.debug(f"domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
136     domain_helper.raise_on(domain)
137     if not isinstance(origin, str) and origin is not None:
138         raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
139     elif origin == "":
140         raise ValueError("Parameter 'origin' is empty")
141     elif not isinstance(nodeinfo_url, str):
142         raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
143     elif nodeinfo_url == "":
144         raise ValueError("Parameter 'nodeinfo_url' is empty")
145
146     # No CSRF by default, you don't have to add network.api_headers by yourself here
147     headers = tuple()
148
149     try:
150         logger.debug("Checking CSRF for domain='%s'", domain)
151         headers = csrf.determine(domain, dict())
152     except network.exceptions as exception:
153         logger.warning("Exception '%s' during checking CSRF (fetch_blocks,%s) - EXIT!", type(exception), __name__)
154         instances.set_last_error(domain, exception)
155         return
156
157     try:
158         # json endpoint for newer mastodongs
159         found_blocks = list()
160         blocklist = list()
161
162         rows = {
163             "reject"        : [],
164             "media_removal" : [],
165             "followers_only": [],
166             "report_removal": [],
167         }
168
169         logger.debug("Querying API domain_blocks: domain='%s'", domain)
170         data = network.get_json_api(
171             domain,
172             "/api/v1/instance/domain_blocks",
173             headers,
174             (config.get("connection_timeout"), config.get("read_timeout"))
175         )
176
177         logger.debug("data[]='%s'", type(data))
178         if "error_message" in data:
179             logger.debug(f"Was not able to fetch domain_blocks from domain='{domain}': status_code='{data['status_code']}',error_message='{data['error_message']}'")
180             instances.set_last_error(domain, data)
181             return
182         elif "json" in data and "error" in data["json"]:
183             logger.warning("JSON API returned error message: '%s'", data['json']['error'])
184             instances.set_last_error(domain, data)
185             return
186         else:
187             # Getting blocklist
188             blocklist = data["json"]
189
190         if len(blocklist) > 0:
191             logger.info("Checking %d entries from domain='%s' ...", len(blocklist), domain)
192             for block in blocklist:
193                 # Check type
194                 logger.debug("block[]='%s'", type(block))
195                 if not isinstance(block, dict):
196                     logger.debug(f"block[]='{type(block)}' is of type 'dict' - SKIPPED!")
197                     continue
198
199                 # Map block -> entry
200                 logger.debug(f"block[{type(block)}]='{block}'")
201                 entry = {
202                     "domain": block["domain"],
203                     "hash"  : block["digest"],
204                     "reason": block["comment"] if "comment" in block else None
205                 }
206
207                 logger.debug("severity='%s',domain='%s',hash='%s',comment='%s'", block['severity'], block['domain'], block['digest'], block['comment'])
208                 if block['severity'] == 'suspend':
209                     logger.debug("Adding entry='%s' with severity='%s' ...", entry, block['severity'])
210                     rows['reject'].append(entry)
211                 elif block['severity'] == 'silence':
212                     logger.debug("Adding entry='%s' with severity='%s' ...", entry, block['severity'])
213                     rows['followers_only'].append(entry)
214                 elif block['severity'] == 'reject_media':
215                     logger.debug("Adding entry='%s' with severity='%s' ...", entry, block['severity'])
216                     rows['media_removal'].append(entry)
217                 elif block['severity'] == 'reject_reports':
218                     logger.debug("Adding entry='%s' with severity='%s' ...", entry, block['severity'])
219                     rows['report_removal'].append(entry)
220                 else:
221                     logger.warning("Unknown severity='%s', domain='%s'", block['severity'], block['domain'])
222         else:
223             logger.debug("domain='%s' has returned zero rows, trying /about/more page ...", domain)
224             rows = fetch_blocks_from_about(domain)
225
226         logger.info("Checking %d entries from domain='%s' ...", len(rows.items()), domain)
227         for block_level, blocklist in rows.items():
228             logger.debug("domain='%s',block_level='%s',blocklist()=%d", domain, block_level, len(blocklist))
229             block_level = tidyup.domain(block_level)
230
231             logger.debug("block_level='%s' - AFTER!", block_level)
232             if block_level == "":
233                 logger.warning("block_level is empty, domain='%s'", domain)
234                 continue
235             elif block_level == "accept":
236                 logger.debug("domain='%s' skipping block_level='accept'", domain)
237                 continue
238
239             logger.debug("Checking %s entries from domain='{domain}',block_level='{block_level}' ...", len(blocklist))
240             for block in blocklist:
241                 logger.debug("block[]='%s'", type(block))
242                 blocked, blocked_hash, reason = block.values()
243
244                 logger.debug("blocked='%s',blocked_hash='%s',reason='%s'", blocked, blocked_hash, reason)
245                 blocked = tidyup.domain(blocked)
246                 reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
247                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
248
249                 if blocked == "":
250                     logger.warning("blocked is empty, domain='%s'", domain)
251                     continue
252                 elif blocked.count("*") > 0:
253                     # Doing the hash search for instance names as well to tidy up DB
254                     row = instances.deobscure("*", blocked, blocked_hash)
255
256                     logger.debug("row[]='%s'", type(row))
257                     if row is None:
258                         logger.warning("Cannot deobsfucate blocked='%s',blocked_hash='%s' - SKIPPED!", blocked, blocked_hash)
259                         continue
260
261                     logger.debug("Updating domain: row[0]='%s'", row[0])
262                     blocked      = row[0]
263                     origin       = row[1]
264                     nodeinfo_url = row[2]
265                 elif blocked.count("?") > 0:
266                     # Doing the hash search for instance names as well to tidy up DB
267                     row = instances.deobscure("?", blocked, blocked_hash)
268
269                     logger.debug("row[]='%s'", type(row))
270                     if row is None:
271                         logger.warning("Cannot deobsfucate blocked='%s',blocked_hash='%s' - SKIPPED!", blocked, blocked_hash)
272                         continue
273
274                     logger.debug("Updating domain: row[0]='%s'", row[0])
275                     blocked      = row[0]
276                     origin       = row[1]
277                     nodeinfo_url = row[2]
278
279                 logger.debug("Looking up instance by domain: blocked='%s'", blocked)
280                 if not utils.is_domain_wanted(blocked):
281                     logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
282                     continue
283                 elif blacklist.is_blacklisted(blocked):
284                     logger.debug("blocked='%s' is blacklisted - SKIPPED!", blocked)
285                     continue
286                 elif not instances.is_registered(blocked):
287                     logger.debug(f"Domain blocked='%s' wasn't found, adding ..., domain='%s',origin='%s',nodeinfo_url='%s'", blocked, domain, origin, nodeinfo_url)
288                     instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
289
290                 logger.debug("Looking up instance by domain: blocked='%s'", blocked)
291                 if not utils.is_domain_wanted(blocked):
292                     logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
293                     continue
294                 elif not instances.is_registered(blocked):
295                     logger.debug("Hash wasn't found, adding: blocked='%s',domain='%s'", blocked, domain)
296                     instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
297
298                 if not blocks.is_instance_blocked(domain, blocked, block_level):
299                     logger.debug("Blocking domain='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
300                     blocks.add_instance(domain, blocked, reason, block_level)
301
302                     if block_level == "reject":
303                         found_blocks.append({
304                             "blocked": blocked,
305                             "reason" : reason
306                         })
307                 else:
308                     logger.debug("Updating block last seen and reason for domain='%s',blocked='%s' ...", domain, blocked)
309                     blocks.update_last_seen(domain, blocked, block_level)
310                     blocks.update_reason(reason, domain, blocked, block_level)
311
312         logger.debug("Invoking commit() ...")
313         database.connection.commit()
314     except network.exceptions as exception:
315         logger.warning("domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
316         instances.set_last_error(domain, exception)
317
318     logger.debug("EXIT!")