]> git.mxchange.org Git - fba.git/blob - fba/networks/pleroma.py
6ee00a6b83ab428f99f66dfcbbf5416484ec0cb3
[fba.git] / fba / networks / pleroma.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import logging
18
19 import bs4
20 import validators
21
22 from fba import database
23 from fba import utils
24
25 from fba.helpers import blacklist
26 from fba.helpers import config
27 from fba.helpers import domain as domain_helper
28 from fba.helpers import tidyup
29
30 from fba.http import network
31 from fba.http import nodeinfo
32
33 from fba.models import blocks
34 from fba.models import instances
35
36 logging.basicConfig(level=logging.INFO)
37 logger = logging.getLogger(__name__)
38
39 # Language mapping X -> English
40 language_mapping = {
41     # English -> English
42     "filtered media"   : "filtered_media",
43     "limited servers"  : "followers_only",
44     "followers-only"   : "followers_only",
45     "media removal"    : "media_removal",
46     "media_removal"    : "media_removal",
47     "media force-set as sensitive": "media_nsfw",
48     "nsfw"             : "media_nsfw",
49     "reject"           : "reject",
50     "suspended servers": "reject",
51     "silenced servers" : "silenced",
52     "removal from \"the whole known network\" timeline": "federated_timeline_removal",
53 }
54
55 def fetch_blocks(domain: str) -> list:
56     logger.debug("domain='%s' - CALLED!", domain)
57     domain_helper.raise_on(domain)
58
59     if blacklist.is_blacklisted(domain):
60         raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
61     elif not instances.is_registered(domain):
62         raise Exception(f"domain='{domain}' is not registered but function is invoked.")
63
64     blockdict = list()
65     rows = None
66
67     try:
68         logger.debug("Fetching nodeinfo: domain='%s'", domain)
69         rows = nodeinfo.fetch(domain, update_mode=False)
70
71         if "error_message" in rows:
72             logger.warning("Error message '%s' during fetching nodeinfo for domain='%s'", rows["error_message"], domain)
73             instances.set_last_error(domain, rows)
74             instances.update(domain)
75
76             logger.debug("Returning empty list ... - EXIT!")
77             return list()
78         elif "exception" in rows:
79             logger.warning("Exception '%s' during fetching nodeinfo for domain='%s' - EXIT!", type(rows["exception"]), domain)
80             return list()
81         elif "json" in rows:
82             logger.debug("rows[json] found for domain='%s'", domain)
83             rows = rows["json"]
84
85     except network.exceptions as exception:
86         logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain)
87         instances.set_last_error(domain, exception)
88
89     logger.debug("rows[]='%s'", type(rows))
90     if rows is None:
91         logger.warning("Could not fetch nodeinfo from domain='%s' - EXIT!", domain)
92         return list()
93     elif "metadata" not in rows:
94         logger.warning("rows()=%d does not have key 'metadata', domain='%s' - EXIT!", len(rows), domain)
95         return list()
96     elif "federation" not in rows["metadata"]:
97         logger.warning("rows()=%d does not have key 'federation', domain='%s' - EXIT!", len(rows["metadata"]), domain)
98         return list()
99
100     found = False
101     data = rows["metadata"]["federation"]
102     logger.debug("data[]='%s'", type(data))
103
104     if "mrf_simple" in data:
105         logger.debug("Found mrf_simple in API response from domain='%s'", domain)
106         found = True
107         for block_level, blocklist in (
108             {
109                 **data["mrf_simple"],
110                 **{
111                     "quarantined_instances": data["quarantined_instances"]
112                 }
113             }
114         ).items():
115             logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
116             block_level = tidyup.domain(block_level) if block_level != "" else None
117             logger.debug("block_level='%s' - AFTER!", block_level)
118
119             if block_level == "":
120                 logger.warning("block_level is now empty!")
121                 continue
122             elif block_level == "accept":
123                 logger.debug("domain='%s' skipping block_level='accept'", domain)
124                 continue
125
126             block_level = blocks.alias_block_level(block_level)
127
128             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level)
129             for blocked in blocklist:
130                 logger.debug("blocked='%s' - BEFORE!", blocked)
131                 blocked = tidyup.domain(blocked) if blocked != "" else None
132                 logger.debug("blocked='%s' - AFTER!", blocked)
133
134                 if blocked in [None, ""]:
135                     logger.warning("blocked='%s' is empty after tidyup.domain(): domain='%s',block_level='%s' - SKIPPED!", blocked, domain, block_level)
136                     continue
137                 elif validators.domain(blocked) and blacklist.is_blacklisted(blocked):
138                     logger.debug("blocked='%s' is blacklisted - SKIPPED!")
139                     continue
140
141                 logger.debug("Invoking utils.deobfuscate(%s, %s) ...", blocked, domain)
142                 blocked = utils.deobfuscate(blocked, domain)
143                 logger.debug("blocked[%s]='%s' - DEOBFUSCATED!", type(blocked), blocked)
144
145                 if blocked in [None, ""]:
146                     logger.warning("instance[host]='%s' is None or empty after tidyup.domain() - SKIPPED!", instance["host"])
147                     continue
148                 elif not domain_helper.is_wanted(blocked):
149                     logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
150                     continue
151
152                 logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
153                 blockdict.append({
154                     "blocker"    : domain,
155                     "blocked"    : blocked,
156                     "reason"     : None,
157                     "block_level": block_level,
158                 })
159
160     elif "quarantined_instances" in data:
161         logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain)
162         found = True
163         block_level = "quarantined"
164
165         logger.debug("Checking %d quarantined instance(s) ...", len(data["quarantined_instances"]))
166         for blocked in data["quarantined_instances"]:
167             logger.debug("blocked='%s' - BEFORE!", blocked)
168             blocked = tidyup.domain(blocked) if blocked != "" else None
169             logger.debug("blocked='%s' - AFTER!", blocked)
170
171             if blocked in [None, ""]:
172                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
173                 continue
174             elif not domain_helper.is_wanted(blocked):
175                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
176                 continue
177
178             logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
179             blockdict.append({
180                 "blocker"    : domain,
181                 "blocked"    : blocked,
182                 "reason"     : None,
183                 "block_level": block_level,
184             })
185
186     else:
187         logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain)
188
189     logger.debug("Invoking commit() ...")
190     database.connection.commit()
191
192     # Reasons
193     if "mrf_simple_info" in data:
194         logger.debug("Found mrf_simple_info in API response: domain='%s'", domain)
195         found = True
196         for block_level, info in (
197             {
198                 **data["mrf_simple_info"],
199                 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
200             }
201         ).items():
202             logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
203             block_level = tidyup.domain(block_level) if block_level != "" else None
204             logger.debug("block_level='%s' - AFTER!", block_level)
205
206             if block_level in [None, ""]:
207                 logger.warning("block_level='%s' is now empty!", block_level)
208                 continue
209             elif block_level == "accept":
210                 logger.debug("domain='%s': Skipping block_level='%s' ...", domain, block_level)
211                 continue
212
213             block_level = blocks.alias_block_level(block_level)
214
215             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
216             for blocked, reason in info.items():
217                 logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
218                 blocked = tidyup.domain(blocked) if blocked != "" else None
219                 logger.debug("blocked='%s' - AFTER!", blocked)
220
221                 if isinstance(reason, str):
222                     logger.debug("reason[] is a string")
223                     reason = tidyup.reason(reason)
224                 elif isinstance(reason, dict) and "reason" in reason:
225                     logger.debug("reason[] is a dict")
226                     reason = tidyup.reason(reason["reason"]) if isinstance(reason["reason"], str) else None
227                 elif reason is not None:
228                     raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
229
230                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
231
232                 if blocked == "":
233                     logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
234                     continue
235
236                 logger.debug("Checking %d blockdict records ...", len(blockdict))
237                 for block in blockdict:
238                     logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
239                     if block["blocked"] == blocked:
240                         logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
241                         block["reason"] = reason
242
243     elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
244         logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain)
245         found = True
246         block_level = "quarantined"
247
248         #print(data["quarantined_instances_info"])
249         rows = data["quarantined_instances_info"]["quarantined_instances"]
250         for blocked in rows:
251             logger.debug("blocked='%s' - BEFORE!", blocked)
252             reason  = tidyup.reason(rows[blocked]["reason"]) if rows[blocked]["reason"] != "" else None
253             blocked = tidyup.domain(blocked) if blocked != "" else None
254             logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
255
256             if blocked not in rows or "reason" not in rows[blocked]:
257                 logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
258                 break
259             elif blocked == "":
260                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
261                 continue
262
263             logger.debug("Checking %d blockdict record(s) ...", len(blockdict))
264             for block in blockdict:
265                 logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
266                 if block["blocked"] == blocked:
267                     logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
268                     block["reason"] = reason
269     else:
270         logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
271
272     if not found:
273         logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
274         blocklist = fetch_blocks_from_about(domain)
275
276         logger.debug("blocklist()=%d", len(blocklist))
277         if len(blocklist) > 0:
278             logger.info("Checking %d different blocklist(s) ...", len(blocklist))
279             for block_level in blocklist:
280                 logger.debug("Checking blocklist[%s]()=%d entries ...", block_level, blocklist[block_level])
281                 for block in blocklist[block_level]:
282                     logger.debug("Appending blocker='%s',block[blocked]='%s',block[reason]='%s',block_level='%s' ...",domain, block["blocked"], block["reason"], block_level)
283                     blockdict.append({
284                         "blocker"    : domain,
285                         "blocked"    : block["blocked"],
286                         "reason"     : block["reason"],
287                         "block_level": block_level,
288                     })
289
290     logger.debug("blockdict()=%d - EXIT!", len(blockdict))
291     return blockdict
292
293 def fetch_blocks_from_about(domain: str) -> dict:
294     logger.debug("domain='%s' - CALLED!", domain)
295     domain_helper.raise_on(domain)
296
297     if blacklist.is_blacklisted(domain):
298         raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
299     elif not instances.is_registered(domain):
300         raise Exception(f"domain='{domain}' is not registered but function is invoked.")
301
302     logger.debug("Fetching mastodon blocks from domain='%s'", domain)
303     doc = None
304     for path in ["/instance/about/index.html"]:
305         try:
306             # Resetting doc type
307             doc = None
308
309             logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
310             response = network.fetch_response(
311                 domain,
312                 path,
313                 network.web_headers,
314                 (config.get("connection_timeout"), config.get("read_timeout"))
315             )
316
317             logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
318             if not response.ok or response.text.strip() == "":
319                 logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
320                 continue
321
322             logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
323             doc = bs4.BeautifulSoup(
324                 response.text,
325                 "html.parser",
326             )
327
328             logger.debug("doc[]='%s'", type(doc))
329             if doc.find("h2") is not None:
330                 logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
331                 break
332
333         except network.exceptions as exception:
334             logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
335             instances.set_last_error(domain, exception)
336             break
337
338     blocklist = {
339         "reject"        : [],
340         "filtered_media": [],
341         "followers_only": [],
342         "silenced"      : [],
343         "media_nsfw"    : [],
344         "media_removal" : [],
345         "federated_timeline_removal": [],
346     }
347
348     logger.debug("doc[]='%s'", type(doc))
349     if doc is None:
350         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
351         return list()
352
353     headers = doc.find_all("h2")
354
355     logger.debug("headers[]='%s'", type(headers))
356     if headers is None:
357         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
358         return list()
359
360     logger.info("Checking %d headers ...", len(headers))
361     for header in headers:
362         logger.debug("header[%s]='%s'", type(header), header)
363         block_level = tidyup.reason(header.text).lower()
364
365         logger.debug("block_level='%s' - BEFORE!", block_level)
366         if block_level in language_mapping:
367             logger.debug("block_level='%s' - FOUND!", block_level)
368             block_level = language_mapping[block_level].lower()
369         else:
370             logger.warning("block_level='%s' not found in language mapping table", block_level)
371
372         logger.debug("block_level='%s - AFTER!'", block_level)
373         if block_level in blocklist:
374             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
375             logger.debug("Found block_level='%s', importing domain blocks ...", block_level)
376             for line in header.find_next("table").find_all("tr")[1:]:
377                 logger.debug("line[]='%s'", type(line))
378                 blocked = line.find_all("td")[0].text
379                 reason  = line.find_all("td")[1].text
380
381                 logger.debug("blocked='%s',reason='%s'", blocked, reason)
382                 blocked = tidyup.domain(blocked) if blocked != "" else None
383                 reason = tidyup.reason(reason) if reason != "" else None
384                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
385
386                 if blocked in [None, ""]:
387                     logger.debug("domain='%s',block_level='%s': blocked='%s' is empty - SKIPPED!", domain, block_level, blocked)
388                     continue
389                 elif not domain_helper.is_wanted(blocked):
390                     logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
391                     continue
392
393                 logger.debug("Appending block_level='%s',blocked='%s',reason='%s' ...", block_level, blocked, reason)
394                 blocklist[block_level].append({
395                     "blocked": blocked,
396                     "reason" : reason,
397                 })
398         else:
399             logger.warning("block_level='%s' not found in blocklist()=%d", block_level, len(blocklist))
400
401     logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)
402     return blocklist