]> git.mxchange.org Git - fba.git/blob - fba/networks/pleroma.py
3b580560cba02ddf154c3b5dae1ad95f30d4fc67
[fba.git] / fba / networks / pleroma.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import logging
18
19 import bs4
20
21 from fba import database
22 from fba import utils
23
24 from fba.helpers import blacklist
25 from fba.helpers import config
26 from fba.helpers import domain as domain_helper
27 from fba.helpers import tidyup
28
29 from fba.http import network
30 from fba.http import nodeinfo
31
32 from fba.models import blocks
33 from fba.models import instances
34
35 logging.basicConfig(level=logging.INFO)
36 logger = logging.getLogger(__name__)
37
38 # Language mapping X -> English
39 language_mapping = {
40     # English -> English
41     "filtered media"   : "filtered_media",
42     "limited servers"  : "followers_only",
43     "followers-only"   : "followers_only",
44     "media removal"    : "media_removal",
45     "media_removal"    : "media_removal",
46     "media force-set as sensitive": "media_nsfw",
47     "nsfw"             : "media_nsfw",
48     "reject"           : "reject",
49     "suspended servers": "reject",
50     "silenced servers" : "silenced",
51     "removal from \"the whole known network\" timeline": "federated_timeline_removal",
52 }
53
54 def fetch_blocks(domain: str) -> list:
55     logger.debug("domain='%s' - CALLED!", domain)
56     domain_helper.raise_on(domain)
57
58     if blacklist.is_blacklisted(domain):
59         raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
60     elif not instances.is_registered(domain):
61         raise Exception(f"domain='{domain}' is not registered but function is invoked.")
62
63     blockdict = list()
64     rows = None
65
66     try:
67         logger.debug("Fetching nodeinfo: domain='%s'", domain)
68         rows = nodeinfo.fetch(domain, update_mode=False)
69
70         if "error_message" in rows:
71             logger.warning("Error message '%s' during fetching nodeinfo for domain='%s'", rows["error_message"], domain)
72             instances.set_last_error(domain, rows)
73             instances.update(domain)
74
75             logger.debug("Returning empty list ... - EXIT!")
76             return list()
77         elif "exception" in rows:
78             logger.warning("Exception '%s' during fetching nodeinfo for domain='%s' - EXIT!", type(rows["exception"]), domain)
79             return list()
80         elif "json" in rows:
81             logger.debug("rows[json] found for domain='%s'", domain)
82             rows = rows["json"]
83
84     except network.exceptions as exception:
85         logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain)
86         instances.set_last_error(domain, exception)
87
88     if rows is None:
89         logger.warning("Could not fetch nodeinfo from domain='%s' - EXIT!", domain)
90         return list()
91     elif "metadata" not in rows:
92         logger.warning("rows()=%d does not have key 'metadata', domain='%s' - EXIT!", len(rows), domain)
93         return list()
94     elif "federation" not in rows["metadata"]:
95         logger.warning("rows()=%d does not have key 'federation', domain='%s' - EXIT!", len(rows["metadata"]), domain)
96         return list()
97
98     data = rows["metadata"]["federation"]
99     found = False
100
101     logger.debug("data[]='%s'", type(data))
102     if "mrf_simple" in data:
103         logger.debug("Found mrf_simple in API response from domain='%s'", domain)
104         found = True
105         for block_level, blocklist in (
106             {
107                 **data["mrf_simple"],
108                 **{
109                     "quarantined_instances": data["quarantined_instances"]
110                 }
111             }
112         ).items():
113             logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
114             block_level = tidyup.domain(block_level) if block_level != "" else None
115             logger.debug("block_level='%s' - AFTER!", block_level)
116
117             if block_level == "":
118                 logger.warning("block_level is now empty!")
119                 continue
120             elif block_level == "accept":
121                 logger.debug("domain='%s' skipping block_level='accept'", domain)
122                 continue
123
124             block_level = blocks.alias_block_level(block_level)
125
126             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level)
127             if len(blocklist) > 0:
128                 for blocked in blocklist:
129                     logger.debug("blocked='%s' - BEFORE!", blocked)
130                     blocked = tidyup.domain(blocked) if blocked != "" else None
131                     logger.debug("blocked='%s' - AFTER!", blocked)
132
133                     if blocked is None or blocked == "":
134                         logger.warning("blocked='%s' is empty after tidyup.domain(): domain='%s',block_level='%s' - SKIPPED!", blocked, domain, block_level)
135                         continue
136
137                     logger.debug("Invoking utils.deobfuscate(%s, %s) ...", blocked, domain)
138                     blocked = utils.deobfuscate(blocked, domain)
139
140                     logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
141                     if blocked is None or blocked == "":
142                         logger.warning("instance[host]='%s' is None or empty after tidyup.domain() - SKIPPED!", instance["host"])
143                         continue
144                     elif not domain_helper.is_wanted(blocked):
145                         logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
146                         continue
147
148                     logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
149                     blockdict.append({
150                         "blocker"    : domain,
151                         "blocked"    : blocked,
152                         "reason"     : None,
153                         "block_level": block_level,
154                     })
155
156     elif "quarantined_instances" in data:
157         logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain)
158         found = True
159         block_level = "quarantined"
160
161         for blocked in data["quarantined_instances"]:
162             logger.debug("blocked='%s' - BEFORE!", blocked)
163             blocked = tidyup.domain(blocked) if blocked != "" else None
164
165             logger.debug("blocked='%s' - AFTER!", blocked)
166             if blocked is None or blocked == "":
167                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
168                 continue
169             elif not domain_helper.is_wanted(blocked):
170                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
171                 continue
172
173             logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
174             blockdict.append({
175                 "blocker"    : domain,
176                 "blocked"    : blocked,
177                 "reason"     : None,
178                 "block_level": block_level,
179             })
180
181     else:
182         logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain)
183
184     logger.debug("Invoking commit() ...")
185     database.connection.commit()
186
187     # Reasons
188     if "mrf_simple_info" in data:
189         logger.debug("Found mrf_simple_info in API response: domain='%s'", domain)
190         found = True
191         for block_level, info in (
192             {
193                 **data["mrf_simple_info"],
194                 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
195             }
196         ).items():
197             logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
198             block_level = tidyup.domain(block_level) if block_level != "" else None
199             logger.debug("block_level='%s' - AFTER!", block_level)
200
201             if block_level is None:
202                 logger.warning("block_level is now None!")
203                 continue
204             elif block_level == "":
205                 logger.warning("block_level is now empty!")
206                 continue
207             elif block_level == "accept":
208                 logger.debug("domain='%s': Skipping block_level='%s' ...", domain, block_level)
209                 continue
210
211             block_level = blocks.alias_block_level(block_level)
212
213             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
214             for blocked, reason in info.items():
215                 logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
216                 blocked = tidyup.domain(blocked) if blocked != "" else None
217                 logger.debug("blocked='%s' - AFTER!", blocked)
218
219                 if isinstance(reason, str):
220                     logger.debug("reason[] is a string")
221                     reason = tidyup.reason(reason)
222                 elif isinstance(reason, dict) and "reason" in reason:
223                     logger.debug("reason[] is a dict")
224                     reason = tidyup.reason(reason["reason"]) if isinstance(reason["reason"], str) else None
225                 elif reason is not None:
226                     raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
227
228                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
229
230                 if blocked == "":
231                     logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
232                     continue
233
234                 logger.debug("Checking %d blockdict records ...", len(blockdict))
235                 for block in blockdict:
236                     logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
237                     if block["blocked"] == blocked:
238                         logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
239                         block["reason"] = reason
240
241     elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
242         logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain)
243         found = True
244         block_level = "quarantined"
245
246         #print(data["quarantined_instances_info"])
247         rows = data["quarantined_instances_info"]["quarantined_instances"]
248         for blocked in rows:
249             logger.debug("blocked='%s' - BEFORE!", blocked)
250             reason = tidyup.reason(rows[blocked]["reason"])
251             blocked = tidyup.domain(blocked) if blocked != "" else None
252             logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
253
254             if blocked not in rows or "reason" not in rows[blocked]:
255                 logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
256                 break
257             elif blocked == "":
258                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
259                 continue
260
261             logger.debug("Checking %d blockdict records ...", len(blockdict))
262             for block in blockdict:
263                 logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
264                 if block["blocked"] == blocked:
265                     logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
266                     block["reason"] = reason
267     else:
268         logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
269
270     if not found:
271         logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
272         blocklist = fetch_blocks_from_about(domain)
273
274         logger.debug("blocklist()=%d", len(blocklist))
275         if len(blocklist) > 0:
276             logger.info("Checking %d different blocklists ...", len(blocklist))
277             for block_level in blocklist:
278                 logger.debug("block_level='%s'", block_level)
279                 rows = blocklist[block_level]
280
281                 logger.debug("rows[%s]()=%d'", type(rows), len(rows))
282                 for block in rows:
283                     logger.debug("Appending blocker='%s',block[blocked]='%s',block[reason]='%s',block_level='%s' ...",domain, block["blocked"], block["reason"], block_level)
284                     blockdict.append({
285                         "blocker"    : domain,
286                         "blocked"    : block["blocked"],
287                         "reason"     : block["reason"],
288                         "block_level": block_level,
289                     })
290
291     logger.debug("blockdict()=%d - EXIT!", len(blockdict))
292     return blockdict
293
294 def fetch_blocks_from_about(domain: str) -> dict:
295     logger.debug("domain='%s' - CALLED!", domain)
296     domain_helper.raise_on(domain)
297
298     if blacklist.is_blacklisted(domain):
299         raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
300     elif not instances.is_registered(domain):
301         raise Exception(f"domain='{domain}' is not registered but function is invoked.")
302
303     logger.debug("Fetching mastodon blocks from domain='%s'", domain)
304     doc = None
305     for path in ["/instance/about/index.html"]:
306         try:
307             # Resetting doc type
308             doc = None
309
310             logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
311             response = network.fetch_response(
312                 domain,
313                 path,
314                 network.web_headers,
315                 (config.get("connection_timeout"), config.get("read_timeout"))
316             )
317
318             logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
319             if not response.ok or response.text.strip() == "":
320                 logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
321                 continue
322
323             logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
324             doc = bs4.BeautifulSoup(
325                 response.text,
326                 "html.parser",
327             )
328
329             logger.debug("doc[]='%s'", type(doc))
330             if doc.find("h2") is not None:
331                 logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
332                 break
333
334         except network.exceptions as exception:
335             logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
336             instances.set_last_error(domain, exception)
337             break
338
339     blocklist = {
340         "reject"        : [],
341         "filtered_media": [],
342         "followers_only": [],
343         "silenced"      : [],
344         "media_nsfw"    : [],
345         "media_removal" : [],
346         "federated_timeline_removal": [],
347     }
348
349     logger.debug("doc[]='%s'", type(doc))
350     if doc is None:
351         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
352         return list()
353
354     headers = doc.find_all("h2")
355
356     logger.debug("headers[]='%s'", type(headers))
357     if headers is None:
358         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
359         return list()
360
361     logger.info("Checking %d headers ...", len(headers))
362     for header in headers:
363         logger.debug("header[%s]='%s'", type(header), header)
364         block_level = tidyup.reason(header.text).lower()
365
366         logger.debug("block_level='%s' - BEFORE!", block_level)
367         if block_level in language_mapping:
368             logger.debug("block_level='%s' - FOUND!", block_level)
369             block_level = language_mapping[block_level].lower()
370         else:
371             logger.warning("block_level='%s' not found in language mapping table", block_level)
372
373         logger.debug("block_level='%s - AFTER!'", block_level)
374         if block_level in blocklist:
375             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
376             logger.debug("Found block_level='%s', importing domain blocks ...", block_level)
377             for line in header.find_next("table").find_all("tr")[1:]:
378                 logger.debug("line[]='%s'", type(line))
379                 blocked = line.find_all("td")[0].text
380                 logger.debug("blocked='%s'", blocked)
381
382                 blocked = tidyup.domain(blocked) if blocked != "" else None
383                 reason = tidyup.reason(line.find_all("td")[1].text)
384                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
385
386                 if blocked is None or blocked == "":
387                     logger.debug("domain='%s',block_level='%s': blocked is empty - SKIPPED!", domain, block_level)
388                     continue
389                 elif not domain_helper.is_wanted(blocked):
390                     logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
391                     continue
392
393                 logger.debug("Appending block_level='%s',blocked='%s',reason='%s' ...", block_level, blocked, reason)
394                 blocklist[block_level].append({
395                     "blocked": blocked,
396                     "reason" : reason,
397                 })
398         else:
399             logger.warning("block_level='%s' not found in blocklist()=%d", block_level, len(blocklist))
400
401     logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)
402     return blocklist