]> git.mxchange.org Git - fba.git/blob - fba/networks/pleroma.py
f8fb931f9c8f69c628eac9a661bc1ccc364818e1
[fba.git] / fba / networks / pleroma.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import logging
18
19 import bs4
20
21 from fba import database
22 from fba import utils
23
24 from fba.helpers import config
25 from fba.helpers import domain as domain_helper
26 from fba.helpers import tidyup
27
28 from fba.http import federation
29 from fba.http import network
30
31 from fba.models import instances
32
33 logging.basicConfig(level=logging.INFO)
34 logger = logging.getLogger(__name__)
35
36 # Language mapping X -> English
37 language_mapping = {
38     # English -> English
39     "Reject": "Suspended servers",
40 }
41
42 def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
43     logger.debug("domain='%s',nodeinfo_url='%s' - CALLED!", domain, nodeinfo_url)
44     domain_helper.raise_on(domain)
45
46     if not isinstance(nodeinfo_url, str):
47         raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
48     elif nodeinfo_url == "":
49         raise ValueError("Parameter 'nodeinfo_url' is empty")
50
51     blockdict = list()
52     rows = None
53     try:
54         logger.debug("Fetching nodeinfo: domain='%s',nodeinfo_url='%s'", domain, nodeinfo_url)
55         rows = federation.fetch_nodeinfo(domain, nodeinfo_url)
56     except network.exceptions as exception:
57         logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain)
58         instances.set_last_error(domain, exception)
59
60     if rows is None:
61         logger.warning("Could not fetch nodeinfo from domain='%s'", domain)
62         return list()
63     elif "metadata" not in rows:
64         logger.warning("rows()=%d does not have key 'metadata', domain='%s'", len(rows), domain)
65         return list()
66     elif "federation" not in rows["metadata"]:
67         logger.warning("rows()=%d does not have key 'federation', domain='%s'", len(rows['metadata']), domain)
68         return list()
69
70     data = rows["metadata"]["federation"]
71     found = False
72
73     logger.debug("data[]='%s'", type(data))
74     if "mrf_simple" in data:
75         logger.debug("Found mrf_simple in API response from domain='%s'", domain)
76         found = True
77         for block_level, blocklist in (
78             {
79                 **data["mrf_simple"],
80                 **{
81                     "quarantined_instances": data["quarantined_instances"]
82                 }
83             }
84         ).items():
85             logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
86             block_level = tidyup.domain(block_level)
87             logger.debug("block_level='%s' - AFTER!", block_level)
88
89             if block_level == "":
90                 logger.warning("block_level is now empty!")
91                 continue
92             elif block_level == "accept":
93                 logger.debug("domain='%s' skipping block_level='accept'", domain)
94                 continue
95             elif block_level == "suspend":
96                 logger.debug("domain='%s', mapping 'suspend' to 'suspended'", domain)
97                 block_level = "suspended"
98             elif block_level == "silence":
99                 logger.debug("domain='%s', mapping 'silence' to 'silenced'", domain)
100                 block_level = "silenced"
101
102             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level)
103             if len(blocklist) > 0:
104                 for blocked in blocklist:
105                     logger.debug("blocked='%s' - BEFORE!", blocked)
106                     blocked = tidyup.domain(blocked)
107                     logger.debug("blocked='%s' - AFTER!", blocked)
108
109                     if blocked == "":
110                         logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
111                         continue
112                     elif not utils.is_domain_wanted(blocked):
113                         logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
114                         continue
115
116                     logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
117                     blocked = utils.deobfuscate_domain(blocked, domain)
118
119                     logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
120                     if not utils.is_domain_wanted(blocked):
121                         logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
122                         continue
123
124                     logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
125                     blockdict.append({
126                         "blocker"    : domain,
127                         "blocked"    : blocked,
128                         "reason"     : None,
129                         "block_level": block_level,
130                     })
131
132     elif "quarantined_instances" in data:
133         logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain)
134         found = True
135         block_level = "quarantined"
136
137         for blocked in data["quarantined_instances"]:
138             logger.debug("blocked='%s' - BEFORE!", blocked)
139             blocked = tidyup.domain(blocked)
140             logger.debug("blocked='%s' - AFTER!", blocked)
141
142             if blocked == "":
143                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
144                 continue
145             elif not utils.is_domain_wanted(blocked):
146                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
147                 continue
148
149             logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
150             blocked = utils.deobfuscate_domain(blocked, domain)
151
152             logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
153             if not utils.is_domain_wanted(blocked):
154                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
155                 continue
156
157             logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
158             blockdict.append({
159                 "blocker"    : domain,
160                 "blocked"    : blocked,
161                 "reason"     : None,
162                 "block_level": block_level,
163             })
164
165     else:
166         logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain)
167
168     logger.debug("Invoking commit() ...")
169     database.connection.commit()
170
171     # Reasons
172     if "mrf_simple_info" in data:
173         logger.debug("Found mrf_simple_info in API response: domain='%s'", domain)
174         found = True
175         for block_level, info in (
176             {
177                 **data["mrf_simple_info"],
178                 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
179             }
180         ).items():
181             logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
182             block_level = tidyup.domain(block_level)
183             logger.debug("block_level='%s' - AFTER!", block_level)
184
185             if block_level == "":
186                 logger.warning("block_level is now empty!")
187                 continue
188             elif block_level == "accept":
189                 logger.debug("domain='%s' skipping block_level='accept'", domain)
190                 continue
191             elif block_level == "suspend":
192                 logger.debug("domain='%s', mapping 'suspend' to 'suspended'", domain)
193                 block_level = "suspended"
194             elif block_level == "silence":
195                 logger.debug("domain='%s', mapping 'silence' to 'silenced'", domain)
196                 block_level = "silenced"
197
198             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
199             for blocked, reason in info.items():
200                 logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
201                 blocked = tidyup.domain(blocked)
202                 logger.debug("blocked='%s' - AFTER!", blocked)
203
204                 if isinstance(reason, str):
205                     logger.debug("reason[] is a string")
206                     reason = tidyup.reason(reason)
207                 elif isinstance(reason, dict) and "reason" in reason:
208                     logger.debug("reason[] is a dict")
209                     reason = tidyup.reason(reason["reason"])
210                 elif reason is not None:
211                     raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
212
213                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
214
215                 if blocked == "":
216                     logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
217                     continue
218                 elif not utils.is_domain_wanted(blocked):
219                     logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
220                     continue
221
222                 logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
223                 blocked = utils.deobfuscate_domain(blocked, domain)
224                 logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
225
226                 logger.debug("Checking %d blockdict records ...", len(blockdict))
227                 for block in blockdict:
228                     logger.debug("block[blocked]='%s',blocked='%s'", block['blocked'], blocked)
229                     if block['blocked'] == blocked:
230                         logger.debug("Updating reason='%s' for blocker='%s'", reason, block['blocked'])
231                         block['reason'] = reason
232
233     elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
234         logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain)
235         found = True
236         block_level = "quarantined"
237
238         #print(data["quarantined_instances_info"])
239         rows = data["quarantined_instances_info"]["quarantined_instances"]
240         for blocked in rows:
241             logger.debug("blocked='%s' - BEFORE!", blocked)
242             blocked = tidyup.domain(blocked)
243             logger.debug("blocked='%s' - AFTER!", blocked)
244
245             if blocked not in rows or "reason" not in rows[blocked]:
246                 logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
247                 break
248
249             reason = rows[blocked]["reason"]
250             logger.debug("reason='%s'", reason)
251
252             if blocked == "":
253                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
254                 continue
255
256             logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
257             blocked = utils.deobfuscate_domain(blocked, domain)
258
259             logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
260             if not utils.is_domain_wanted(blocked):
261                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
262                 continue
263
264             logger.debug("Checking %d blockdict records ...", len(blockdict))
265             for block in blockdict:
266                 logger.debug("block[blocked]='%s',blocked='%s'", block['blocked'], blocked)
267                 if block['blocked'] == blocked:
268                     logger.debug("Updating reason='%s' for blocker='%s'", reason, block['blocked'])
269                     block['reason'] = reason
270     else:
271         logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
272
273     if not found:
274         logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
275         blocklist = fetch_blocks_from_about(domain)
276
277         logger.debug("blocklist()=%d", len(blocklist))
278         if len(blocklist) > 0:
279             logger.info("Checking %d record(s) ...", len(blocklist))
280             for block_level in blocklist:
281                 logger.debug("block_level='%s'", block_level)
282                 rows = blocklist[block_level]
283
284                 logger.debug("rows[%s]()=%d'", type(rows), len(rows))
285                 for record in rows:
286                     logger.debug("record[]='%s'", type(record))
287                     blocked = tidyup.domain(record["blocked"])
288                     reason  = tidyup.reason(record["reason"])
289                     logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
290
291                     if blocked == "":
292                         logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
293                         continue
294                     elif not utils.is_domain_wanted(blocked):
295                         logger.warning("blocked='%s' is not wanted - SKIPPED!", blocked)
296                         continue
297
298                     logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
299                     blocked = utils.deobfuscate_domain(blocked, domain)
300                     logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
301
302                     logger.debug("Appending blocker='%s',blocked='%s',reason='%s',block_level='%s' ...",domain, blocked, reason, block_level)
303                     blockdict.append({
304                         "blocker"    : domain,
305                         "blocked"    : blocked,
306                         "reason"     : reason,
307                         "block_level": block_level,
308                     })
309
310     logger.debug("blockdict()=%d - EXIT!", len(blockdict))
311     return blockdict
312
313 def fetch_blocks_from_about(domain: str) -> dict:
314     logger.debug("domain='%s' - CALLED!", domain)
315     domain_helper.raise_on(domain)
316
317     logger.debug("Fetching mastodon blocks from domain='%s'", domain)
318     doc = None
319     for path in ["/instance/about/index.html"]:
320         try:
321             # Resetting doc type
322             doc = None
323
324             logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
325             response = network.fetch_response(
326                 domain,
327                 path,
328                 network.web_headers,
329                 (config.get("connection_timeout"), config.get("read_timeout"))
330             )
331
332             logger.debug("response.ok='%s',response.status_code='%d',response.text()=%d", response.ok, response.status_code, len(response.text))
333             if not response.ok or response.text.strip() == "":
334                 logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
335                 continue
336
337             logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
338             doc = bs4.BeautifulSoup(
339                 response.text,
340                 "html.parser",
341             )
342
343             logger.debug("doc[]='%s'", type(doc))
344             if doc.find("h2") is not None:
345                 logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
346                 break
347
348         except network.exceptions as exception:
349             logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
350             instances.set_last_error(domain, exception)
351             break
352
353     blocklist = {
354         "Suspended servers": [],
355         "Filtered media"   : [],
356         "Limited servers"  : [],
357         "Silenced servers" : [],
358     }
359
360     logger.debug("doc[]='%s'", type(doc))
361     if doc is None:
362         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
363         return list()
364
365     for header in doc.find_all("h2"):
366         header_text = tidyup.reason(header.text)
367
368         logger.debug("header_text='%s' - BEFORE!", header_text)
369         if header_text in language_mapping:
370             logger.debug("header_text='%s' - FOUND!", header_text)
371             header_text = language_mapping[header_text]
372         else:
373             logger.warning("header_text='%s' not found in language mapping table", header_text)
374
375         logger.debug("header_text='%s - AFTER!'", header_text)
376         if header_text in blocklist or header_text.lower() in blocklist:
377             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
378             logger.debug("Found header_text='%s', importing domain blocks ...", header_text)
379             for line in header.find_next("table").find_all("tr")[1:]:
380                 logger.debug("line[]='%s'", type(line))
381                 blocklist[header_text].append({
382                     "blocked": tidyup.domain(line.find_all("td")[0].text),
383                     "reason" : tidyup.reason(line.find_all("td")[1].text),
384                 })
385         else:
386             logger.warning("header_text='%s' not found in blocklist()=%d", header_text, len(blocklist))
387
388     logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)
389     return {
390         "reject"        : blocklist["Suspended servers"],
391         "media_removal" : blocklist["Filtered media"],
392         "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
393     }