]> git.mxchange.org Git - fba.git/blob - fba/networks/pleroma.py
c637f65a9d0e59b6c45697b381cbb67e94da89f2
[fba.git] / fba / networks / pleroma.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import logging
18
19 import bs4
20
21 from fba import database
22 from fba import utils
23
24 from fba.helpers import blacklist
25 from fba.helpers import config
26 from fba.helpers import domain as domain_helper
27 from fba.helpers import tidyup
28
29 from fba.http import network
30 from fba.http import nodeinfo
31
32 from fba.models import blocks
33 from fba.models import instances
34
35 logging.basicConfig(level=logging.INFO)
36 logger = logging.getLogger(__name__)
37
38 # Language mapping X -> English
39 language_mapping = {
40     # English -> English
41     "filtered media"   : "filtered_media",
42     "limited servers"  : "followers_only",
43     "followers-only"   : "followers_only",
44     "media removal"    : "media_removal",
45     "media_removal"    : "media_removal",
46     "media force-set as sensitive": "media_nsfw",
47     "nsfw"             : "media_nsfw",
48     "reject"           : "reject",
49     "suspended servers": "reject",
50     "silenced servers" : "silenced",
51     "removal from \"the whole known network\" timeline": "federated_timeline_removal",
52 }
53
54 def fetch_blocks(domain: str) -> list:
55     logger.debug("domain='%s' - CALLED!", domain)
56     domain_helper.raise_on(domain)
57
58     if blacklist.is_blacklisted(domain):
59         raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
60     elif not instances.is_registered(domain):
61         raise Exception(f"domain='{domain}' is not registered but function is invoked.")
62
63     blockdict = list()
64     rows = None
65
66     try:
67         logger.debug("Fetching nodeinfo: domain='%s'", domain)
68         rows = nodeinfo.fetch(domain, update_mode=False)
69
70         if "error_message" in rows:
71             logger.warning("Error message '%s' during fetching nodeinfo for domain='%s'", rows["error_message"], domain)
72             instances.set_last_error(domain, rows)
73             instances.update(domain)
74
75             logger.debug("Returning empty list ... - EXIT!")
76             return list()
77         elif "exception" in rows:
78             logger.warning("Exception '%s' during fetching nodeinfo for domain='%s' - EXIT!", type(rows["exception"]), domain)
79             return list()
80         elif "json" in rows:
81             logger.debug("rows[json] found for domain='%s'", domain)
82             rows = rows["json"]
83
84     except network.exceptions as exception:
85         logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain)
86         instances.set_last_error(domain, exception)
87
88     if rows is None:
89         logger.warning("Could not fetch nodeinfo from domain='%s' - EXIT!", domain)
90         return list()
91     elif "metadata" not in rows:
92         logger.warning("rows()=%d does not have key 'metadata', domain='%s' - EXIT!", len(rows), domain)
93         return list()
94     elif "federation" not in rows["metadata"]:
95         logger.warning("rows()=%d does not have key 'federation', domain='%s' - EXIT!", len(rows["metadata"]), domain)
96         return list()
97
98     data = rows["metadata"]["federation"]
99     found = False
100
101     logger.debug("data[]='%s'", type(data))
102     if "mrf_simple" in data:
103         logger.debug("Found mrf_simple in API response from domain='%s'", domain)
104         found = True
105         for block_level, blocklist in (
106             {
107                 **data["mrf_simple"],
108                 **{
109                     "quarantined_instances": data["quarantined_instances"]
110                 }
111             }
112         ).items():
113             logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
114             block_level = tidyup.domain(block_level) if block_level != "" else None
115             logger.debug("block_level='%s' - AFTER!", block_level)
116
117             if block_level == "":
118                 logger.warning("block_level is now empty!")
119                 continue
120             elif block_level == "accept":
121                 logger.debug("domain='%s' skipping block_level='accept'", domain)
122                 continue
123
124             block_level = blocks.alias_block_level(block_level)
125
126             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level)
127             for blocked in blocklist:
128                 logger.debug("blocked='%s' - BEFORE!", blocked)
129                 blocked = tidyup.domain(blocked) if blocked != "" else None
130                 logger.debug("blocked='%s' - AFTER!", blocked)
131
132                 if blocked in [None, ""]:
133                     logger.warning("blocked='%s' is empty after tidyup.domain(): domain='%s',block_level='%s' - SKIPPED!", blocked, domain, block_level)
134                     continue
135
136                 logger.debug("Invoking utils.deobfuscate(%s, %s) ...", blocked, domain)
137                 blocked = utils.deobfuscate(blocked, domain)
138                 logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
139
140                 if blocked in [None, ""]:
141                     logger.warning("instance[host]='%s' is None or empty after tidyup.domain() - SKIPPED!", instance["host"])
142                     continue
143                 elif not domain_helper.is_wanted(blocked):
144                     logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
145                     continue
146
147                 logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
148                 blockdict.append({
149                     "blocker"    : domain,
150                     "blocked"    : blocked,
151                     "reason"     : None,
152                     "block_level": block_level,
153                 })
154
155     elif "quarantined_instances" in data:
156         logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain)
157         found = True
158         block_level = "quarantined"
159
160         logger.debug("Checking %d quarantined instance(s) ...", len(data["quarantined_instances"]))
161         for blocked in data["quarantined_instances"]:
162             logger.debug("blocked='%s' - BEFORE!", blocked)
163             blocked = tidyup.domain(blocked) if blocked != "" else None
164
165             logger.debug("blocked='%s' - AFTER!", blocked)
166             if blocked in [None, ""]:
167                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
168                 continue
169             elif not domain_helper.is_wanted(blocked):
170                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
171                 continue
172
173             logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
174             blockdict.append({
175                 "blocker"    : domain,
176                 "blocked"    : blocked,
177                 "reason"     : None,
178                 "block_level": block_level,
179             })
180
181     else:
182         logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain)
183
184     logger.debug("Invoking commit() ...")
185     database.connection.commit()
186
187     # Reasons
188     if "mrf_simple_info" in data:
189         logger.debug("Found mrf_simple_info in API response: domain='%s'", domain)
190         found = True
191         for block_level, info in (
192             {
193                 **data["mrf_simple_info"],
194                 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
195             }
196         ).items():
197             logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
198             block_level = tidyup.domain(block_level) if block_level != "" else None
199             logger.debug("block_level='%s' - AFTER!", block_level)
200
201             if block_level in [None, ""]:
202                 logger.warning("block_level='%s' is now empty!", block_level)
203                 continue
204             elif block_level == "accept":
205                 logger.debug("domain='%s': Skipping block_level='%s' ...", domain, block_level)
206                 continue
207
208             block_level = blocks.alias_block_level(block_level)
209
210             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
211             for blocked, reason in info.items():
212                 logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
213                 blocked = tidyup.domain(blocked) if blocked != "" else None
214                 logger.debug("blocked='%s' - AFTER!", blocked)
215
216                 if isinstance(reason, str):
217                     logger.debug("reason[] is a string")
218                     reason = tidyup.reason(reason)
219                 elif isinstance(reason, dict) and "reason" in reason:
220                     logger.debug("reason[] is a dict")
221                     reason = tidyup.reason(reason["reason"]) if isinstance(reason["reason"], str) else None
222                 elif reason is not None:
223                     raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
224
225                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
226
227                 if blocked == "":
228                     logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
229                     continue
230
231                 logger.debug("Checking %d blockdict records ...", len(blockdict))
232                 for block in blockdict:
233                     logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
234                     if block["blocked"] == blocked:
235                         logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
236                         block["reason"] = reason
237
238     elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
239         logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain)
240         found = True
241         block_level = "quarantined"
242
243         #print(data["quarantined_instances_info"])
244         rows = data["quarantined_instances_info"]["quarantined_instances"]
245         for blocked in rows:
246             logger.debug("blocked='%s' - BEFORE!", blocked)
247             reason = tidyup.reason(rows[blocked]["reason"])
248             blocked = tidyup.domain(blocked) if blocked != "" else None
249             logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
250
251             if blocked not in rows or "reason" not in rows[blocked]:
252                 logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
253                 break
254             elif blocked == "":
255                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
256                 continue
257
258             logger.debug("Checking %d blockdict record(s) ...", len(blockdict))
259             for block in blockdict:
260                 logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
261                 if block["blocked"] == blocked:
262                     logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
263                     block["reason"] = reason
264     else:
265         logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
266
267     if not found:
268         logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
269         blocklist = fetch_blocks_from_about(domain)
270
271         logger.debug("blocklist()=%d", len(blocklist))
272         if len(blocklist) > 0:
273             logger.info("Checking %d different blocklist(s) ...", len(blocklist))
274             for block_level in blocklist:
275                 logger.debug("block_level='%s'", block_level)
276                 rows = blocklist[block_level]
277
278                 logger.debug("rows[%s]()=%d'", type(rows), len(rows))
279                 for block in rows:
280                     logger.debug("Appending blocker='%s',block[blocked]='%s',block[reason]='%s',block_level='%s' ...",domain, block["blocked"], block["reason"], block_level)
281                     blockdict.append({
282                         "blocker"    : domain,
283                         "blocked"    : block["blocked"],
284                         "reason"     : block["reason"],
285                         "block_level": block_level,
286                     })
287
288     logger.debug("blockdict()=%d - EXIT!", len(blockdict))
289     return blockdict
290
291 def fetch_blocks_from_about(domain: str) -> dict:
292     logger.debug("domain='%s' - CALLED!", domain)
293     domain_helper.raise_on(domain)
294
295     if blacklist.is_blacklisted(domain):
296         raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
297     elif not instances.is_registered(domain):
298         raise Exception(f"domain='{domain}' is not registered but function is invoked.")
299
300     logger.debug("Fetching mastodon blocks from domain='%s'", domain)
301     doc = None
302     for path in ["/instance/about/index.html"]:
303         try:
304             # Resetting doc type
305             doc = None
306
307             logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
308             response = network.fetch_response(
309                 domain,
310                 path,
311                 network.web_headers,
312                 (config.get("connection_timeout"), config.get("read_timeout"))
313             )
314
315             logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
316             if not response.ok or response.text.strip() == "":
317                 logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
318                 continue
319
320             logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
321             doc = bs4.BeautifulSoup(
322                 response.text,
323                 "html.parser",
324             )
325
326             logger.debug("doc[]='%s'", type(doc))
327             if doc.find("h2") is not None:
328                 logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
329                 break
330
331         except network.exceptions as exception:
332             logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
333             instances.set_last_error(domain, exception)
334             break
335
336     blocklist = {
337         "reject"        : [],
338         "filtered_media": [],
339         "followers_only": [],
340         "silenced"      : [],
341         "media_nsfw"    : [],
342         "media_removal" : [],
343         "federated_timeline_removal": [],
344     }
345
346     logger.debug("doc[]='%s'", type(doc))
347     if doc is None:
348         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
349         return list()
350
351     headers = doc.find_all("h2")
352
353     logger.debug("headers[]='%s'", type(headers))
354     if headers is None:
355         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
356         return list()
357
358     logger.info("Checking %d headers ...", len(headers))
359     for header in headers:
360         logger.debug("header[%s]='%s'", type(header), header)
361         block_level = tidyup.reason(header.text).lower()
362
363         logger.debug("block_level='%s' - BEFORE!", block_level)
364         if block_level in language_mapping:
365             logger.debug("block_level='%s' - FOUND!", block_level)
366             block_level = language_mapping[block_level].lower()
367         else:
368             logger.warning("block_level='%s' not found in language mapping table", block_level)
369
370         logger.debug("block_level='%s - AFTER!'", block_level)
371         if block_level in blocklist:
372             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
373             logger.debug("Found block_level='%s', importing domain blocks ...", block_level)
374             for line in header.find_next("table").find_all("tr")[1:]:
375                 logger.debug("line[]='%s'", type(line))
376                 blocked = line.find_all("td")[0].text
377                 logger.debug("blocked='%s'", blocked)
378
379                 blocked = tidyup.domain(blocked) if blocked != "" else None
380                 reason = tidyup.reason(line.find_all("td")[1].text)
381                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
382
383                 if blocked is None or blocked == "":
384                     logger.debug("domain='%s',block_level='%s': blocked is empty - SKIPPED!", domain, block_level)
385                     continue
386                 elif not domain_helper.is_wanted(blocked):
387                     logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
388                     continue
389
390                 logger.debug("Appending block_level='%s',blocked='%s',reason='%s' ...", block_level, blocked, reason)
391                 blocklist[block_level].append({
392                     "blocked": blocked,
393                     "reason" : reason,
394                 })
395         else:
396             logger.warning("block_level='%s' not found in blocklist()=%d", block_level, len(blocklist))
397
398     logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)
399     return blocklist