]> git.mxchange.org Git - fba.git/blob - fba/networks/pleroma.py
Continued:
[fba.git] / fba / networks / pleroma.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import logging
18
19 import bs4
20 import validators
21
22 from fba import database
23 from fba import utils
24
25 from fba.helpers import blacklist
26 from fba.helpers import config
27 from fba.helpers import domain as domain_helper
28 from fba.helpers import tidyup
29
30 from fba.http import network
31 from fba.http import nodeinfo
32
33 from fba.models import blocks
34 from fba.models import instances
35
36 logging.basicConfig(level=logging.INFO)
37 logger = logging.getLogger(__name__)
38
39 # Language mapping X -> English
40 language_mapping = {
41     # English -> English
42     "filtered media"   : "filtered_media",
43     "limited servers"  : "followers_only",
44     "followers-only"   : "followers_only",
45     "media removal"    : "media_removal",
46     "media_removal"    : "media_removal",
47     "media force-set as sensitive": "media_nsfw",
48     "nsfw"             : "media_nsfw",
49     "reject"           : "reject",
50     "suspended servers": "reject",
51     "silenced servers" : "silenced",
52     "removal from \"the whole known network\" timeline": "federated_timeline_removal",
53 }
54
55 def fetch_blocks(domain: str) -> list:
56     logger.debug("domain='%s' - CALLED!", domain)
57     domain_helper.raise_on(domain)
58
59     if blacklist.is_blacklisted(domain):
60         raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
61     elif not instances.is_registered(domain):
62         raise Exception(f"domain='{domain}' is not registered but function is invoked.")
63
64     blockdict = list()
65     rows = None
66
67     try:
68         logger.debug("Fetching nodeinfo: domain='%s'", domain)
69         rows = nodeinfo.fetch(domain, update_mode=False)
70
71         if "error_message" in rows:
72             logger.warning("Error message '%s' during fetching nodeinfo for domain='%s'", rows["error_message"], domain)
73             instances.set_last_error(domain, rows)
74             instances.update(domain)
75
76             logger.debug("Returning empty list ... - EXIT!")
77             return list()
78         elif "exception" in rows:
79             logger.warning("Exception '%s' during fetching nodeinfo for domain='%s' - EXIT!", type(rows["exception"]), domain)
80             return list()
81         elif "json" in rows:
82             logger.debug("rows[json] found for domain='%s'", domain)
83             rows = rows["json"]
84
85     except network.exceptions as exception:
86         logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain)
87         instances.set_last_error(domain, exception)
88
89     if rows is None:
90         logger.warning("Could not fetch nodeinfo from domain='%s' - EXIT!", domain)
91         return list()
92     elif "metadata" not in rows:
93         logger.warning("rows()=%d does not have key 'metadata', domain='%s' - EXIT!", len(rows), domain)
94         return list()
95     elif "federation" not in rows["metadata"]:
96         logger.warning("rows()=%d does not have key 'federation', domain='%s' - EXIT!", len(rows["metadata"]), domain)
97         return list()
98
99     data = rows["metadata"]["federation"]
100     found = False
101
102     logger.debug("data[]='%s'", type(data))
103     if "mrf_simple" in data:
104         logger.debug("Found mrf_simple in API response from domain='%s'", domain)
105         found = True
106         for block_level, blocklist in (
107             {
108                 **data["mrf_simple"],
109                 **{
110                     "quarantined_instances": data["quarantined_instances"]
111                 }
112             }
113         ).items():
114             logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
115             block_level = tidyup.domain(block_level) if block_level != "" else None
116             logger.debug("block_level='%s' - AFTER!", block_level)
117
118             if block_level == "":
119                 logger.warning("block_level is now empty!")
120                 continue
121             elif block_level == "accept":
122                 logger.debug("domain='%s' skipping block_level='accept'", domain)
123                 continue
124
125             block_level = blocks.alias_block_level(block_level)
126
127             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level)
128             for blocked in blocklist:
129                 logger.debug("blocked='%s' - BEFORE!", blocked)
130                 blocked = tidyup.domain(blocked) if blocked != "" else None
131                 logger.debug("blocked='%s' - AFTER!", blocked)
132
133                 if blocked in [None, ""]:
134                     logger.warning("blocked='%s' is empty after tidyup.domain(): domain='%s',block_level='%s' - SKIPPED!", blocked, domain, block_level)
135                     continue
136                 elif validators.domain(blocked) and blacklist.is_blacklisted(blocked):
137                     logger.debug("blocked='%s' is blacklisted - SKIPPED!")
138                     continue
139
140                 logger.debug("Invoking utils.deobfuscate(%s, %s) ...", blocked, domain)
141                 blocked = utils.deobfuscate(blocked, domain)
142                 logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
143
144                 if blocked in [None, ""]:
145                     logger.warning("instance[host]='%s' is None or empty after tidyup.domain() - SKIPPED!", instance["host"])
146                     continue
147                 elif not domain_helper.is_wanted(blocked):
148                     logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
149                     continue
150
151                 logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
152                 blockdict.append({
153                     "blocker"    : domain,
154                     "blocked"    : blocked,
155                     "reason"     : None,
156                     "block_level": block_level,
157                 })
158
159     elif "quarantined_instances" in data:
160         logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain)
161         found = True
162         block_level = "quarantined"
163
164         logger.debug("Checking %d quarantined instance(s) ...", len(data["quarantined_instances"]))
165         for blocked in data["quarantined_instances"]:
166             logger.debug("blocked='%s' - BEFORE!", blocked)
167             blocked = tidyup.domain(blocked) if blocked != "" else None
168
169             logger.debug("blocked='%s' - AFTER!", blocked)
170             if blocked in [None, ""]:
171                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
172                 continue
173             elif not domain_helper.is_wanted(blocked):
174                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
175                 continue
176
177             logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
178             blockdict.append({
179                 "blocker"    : domain,
180                 "blocked"    : blocked,
181                 "reason"     : None,
182                 "block_level": block_level,
183             })
184
185     else:
186         logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain)
187
188     logger.debug("Invoking commit() ...")
189     database.connection.commit()
190
191     # Reasons
192     if "mrf_simple_info" in data:
193         logger.debug("Found mrf_simple_info in API response: domain='%s'", domain)
194         found = True
195         for block_level, info in (
196             {
197                 **data["mrf_simple_info"],
198                 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
199             }
200         ).items():
201             logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
202             block_level = tidyup.domain(block_level) if block_level != "" else None
203             logger.debug("block_level='%s' - AFTER!", block_level)
204
205             if block_level in [None, ""]:
206                 logger.warning("block_level='%s' is now empty!", block_level)
207                 continue
208             elif block_level == "accept":
209                 logger.debug("domain='%s': Skipping block_level='%s' ...", domain, block_level)
210                 continue
211
212             block_level = blocks.alias_block_level(block_level)
213
214             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
215             for blocked, reason in info.items():
216                 logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
217                 blocked = tidyup.domain(blocked) if blocked != "" else None
218                 logger.debug("blocked='%s' - AFTER!", blocked)
219
220                 if isinstance(reason, str):
221                     logger.debug("reason[] is a string")
222                     reason = tidyup.reason(reason)
223                 elif isinstance(reason, dict) and "reason" in reason:
224                     logger.debug("reason[] is a dict")
225                     reason = tidyup.reason(reason["reason"]) if isinstance(reason["reason"], str) else None
226                 elif reason is not None:
227                     raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
228
229                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
230
231                 if blocked == "":
232                     logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
233                     continue
234
235                 logger.debug("Checking %d blockdict records ...", len(blockdict))
236                 for block in blockdict:
237                     logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
238                     if block["blocked"] == blocked:
239                         logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
240                         block["reason"] = reason
241
242     elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
243         logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain)
244         found = True
245         block_level = "quarantined"
246
247         #print(data["quarantined_instances_info"])
248         rows = data["quarantined_instances_info"]["quarantined_instances"]
249         for blocked in rows:
250             logger.debug("blocked='%s' - BEFORE!", blocked)
251             reason = tidyup.reason(rows[blocked]["reason"])
252             blocked = tidyup.domain(blocked) if blocked != "" else None
253             logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
254
255             if blocked not in rows or "reason" not in rows[blocked]:
256                 logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
257                 break
258             elif blocked == "":
259                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
260                 continue
261
262             logger.debug("Checking %d blockdict record(s) ...", len(blockdict))
263             for block in blockdict:
264                 logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
265                 if block["blocked"] == blocked:
266                     logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
267                     block["reason"] = reason
268     else:
269         logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
270
271     if not found:
272         logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
273         blocklist = fetch_blocks_from_about(domain)
274
275         logger.debug("blocklist()=%d", len(blocklist))
276         if len(blocklist) > 0:
277             logger.info("Checking %d different blocklist(s) ...", len(blocklist))
278             for block_level in blocklist:
279                 logger.debug("block_level='%s'", block_level)
280                 rows = blocklist[block_level]
281
282                 logger.debug("rows[%s]()=%d'", type(rows), len(rows))
283                 for block in rows:
284                     logger.debug("Appending blocker='%s',block[blocked]='%s',block[reason]='%s',block_level='%s' ...",domain, block["blocked"], block["reason"], block_level)
285                     blockdict.append({
286                         "blocker"    : domain,
287                         "blocked"    : block["blocked"],
288                         "reason"     : block["reason"],
289                         "block_level": block_level,
290                     })
291
292     logger.debug("blockdict()=%d - EXIT!", len(blockdict))
293     return blockdict
294
295 def fetch_blocks_from_about(domain: str) -> dict:
296     logger.debug("domain='%s' - CALLED!", domain)
297     domain_helper.raise_on(domain)
298
299     if blacklist.is_blacklisted(domain):
300         raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
301     elif not instances.is_registered(domain):
302         raise Exception(f"domain='{domain}' is not registered but function is invoked.")
303
304     logger.debug("Fetching mastodon blocks from domain='%s'", domain)
305     doc = None
306     for path in ["/instance/about/index.html"]:
307         try:
308             # Resetting doc type
309             doc = None
310
311             logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
312             response = network.fetch_response(
313                 domain,
314                 path,
315                 network.web_headers,
316                 (config.get("connection_timeout"), config.get("read_timeout"))
317             )
318
319             logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
320             if not response.ok or response.text.strip() == "":
321                 logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
322                 continue
323
324             logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
325             doc = bs4.BeautifulSoup(
326                 response.text,
327                 "html.parser",
328             )
329
330             logger.debug("doc[]='%s'", type(doc))
331             if doc.find("h2") is not None:
332                 logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
333                 break
334
335         except network.exceptions as exception:
336             logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
337             instances.set_last_error(domain, exception)
338             break
339
340     blocklist = {
341         "reject"        : [],
342         "filtered_media": [],
343         "followers_only": [],
344         "silenced"      : [],
345         "media_nsfw"    : [],
346         "media_removal" : [],
347         "federated_timeline_removal": [],
348     }
349
350     logger.debug("doc[]='%s'", type(doc))
351     if doc is None:
352         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
353         return list()
354
355     headers = doc.find_all("h2")
356
357     logger.debug("headers[]='%s'", type(headers))
358     if headers is None:
359         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
360         return list()
361
362     logger.info("Checking %d headers ...", len(headers))
363     for header in headers:
364         logger.debug("header[%s]='%s'", type(header), header)
365         block_level = tidyup.reason(header.text).lower()
366
367         logger.debug("block_level='%s' - BEFORE!", block_level)
368         if block_level in language_mapping:
369             logger.debug("block_level='%s' - FOUND!", block_level)
370             block_level = language_mapping[block_level].lower()
371         else:
372             logger.warning("block_level='%s' not found in language mapping table", block_level)
373
374         logger.debug("block_level='%s - AFTER!'", block_level)
375         if block_level in blocklist:
376             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
377             logger.debug("Found block_level='%s', importing domain blocks ...", block_level)
378             for line in header.find_next("table").find_all("tr")[1:]:
379                 logger.debug("line[]='%s'", type(line))
380                 blocked = line.find_all("td")[0].text
381                 logger.debug("blocked='%s'", blocked)
382
383                 blocked = tidyup.domain(blocked) if blocked != "" else None
384                 reason = tidyup.reason(line.find_all("td")[1].text)
385                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
386
387                 if blocked is None or blocked == "":
388                     logger.debug("domain='%s',block_level='%s': blocked is empty - SKIPPED!", domain, block_level)
389                     continue
390                 elif not domain_helper.is_wanted(blocked):
391                     logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
392                     continue
393
394                 logger.debug("Appending block_level='%s',blocked='%s',reason='%s' ...", block_level, blocked, reason)
395                 blocklist[block_level].append({
396                     "blocked": blocked,
397                     "reason" : reason,
398                 })
399         else:
400             logger.warning("block_level='%s' not found in blocklist()=%d", block_level, len(blocklist))
401
402     logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)
403     return blocklist