]> git.mxchange.org Git - fba.git/blob - fba/networks/pleroma.py
Continued:
[fba.git] / fba / networks / pleroma.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import logging
18
19 import bs4
20
21 from fba import database
22 from fba import utils
23
24 from fba.helpers import config
25 from fba.helpers import domain as domain_helper
26 from fba.helpers import tidyup
27
28 from fba.http import federation
29 from fba.http import network
30
31 from fba.models import instances
32
33 logging.basicConfig(level=logging.INFO)
34 logger = logging.getLogger(__name__)
35
36 # Language mapping X -> English
37 language_mapping = {
38     # English -> English
39     "filtered media"   : "filtered_media",
40     "limited servers"  : "followers_only",
41     "followers-only"   : "followers_only",
42     "media removal"    : "media_removal",
43     "media_removal"    : "media_removal",
44     "media force-set as sensitive": "media_nsfw",
45     "nsfw"             : "media_nsfw",
46     "reject"           : "reject",
47     "suspended servers": "reject",
48     "silenced servers" : "silenced",
49     "removal from \"the whole known network\" timeline": "federated_timeline_removal",
50 }
51
52 def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
53     logger.debug("domain='%s',nodeinfo_url='%s' - CALLED!", domain, nodeinfo_url)
54     domain_helper.raise_on(domain)
55
56     if not isinstance(nodeinfo_url, str):
57         raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not of type 'str'")
58     elif nodeinfo_url == "":
59         raise ValueError("Parameter 'nodeinfo_url' is empty")
60
61     blockdict = list()
62     rows = None
63     try:
64         logger.debug("Fetching nodeinfo: domain='%s',nodeinfo_url='%s'", domain, nodeinfo_url)
65         rows = federation.fetch_nodeinfo(domain, nodeinfo_url)
66
67         if "error_message" in rows:
68             logger.warning("Error message '%s' during fetching nodeinfo for domain='%s',nodeinfo_url='%s' - EXIT!", rows["error_message"], domain, nodeinfo_url)
69             instances.set_last_error(domain, rows)
70             return list()
71         elif "exception" in rows:
72             logger.warning("Exception '%s' during fetching nodeinfo for domain='%s',nodeinfo_url='%s' - EXIT!", type(rows["exception"]), domain, nodeinfo_url)
73             return list()
74         elif "json" in rows:
75             logger.debug("rows[json] found for domain='%s',nodeinfo_url='%s'", domain, nodeinfo_url)
76             rows = rows["json"]
77     except network.exceptions as exception:
78         logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain)
79         instances.set_last_error(domain, exception)
80
81     if rows is None:
82         logger.warning("Could not fetch nodeinfo from domain='%s'", domain)
83         return list()
84     elif "metadata" not in rows:
85         logger.warning("rows()=%d does not have key 'metadata', domain='%s'", len(rows), domain)
86         return list()
87     elif "federation" not in rows["metadata"]:
88         logger.warning("rows()=%d does not have key 'federation', domain='%s'", len(rows["metadata"]), domain)
89         return list()
90
91     data = rows["metadata"]["federation"]
92     found = False
93
94     logger.debug("data[]='%s'", type(data))
95     if "mrf_simple" in data:
96         logger.debug("Found mrf_simple in API response from domain='%s'", domain)
97         found = True
98         for block_level, blocklist in (
99             {
100                 **data["mrf_simple"],
101                 **{
102                     "quarantined_instances": data["quarantined_instances"]
103                 }
104             }
105         ).items():
106             logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
107             block_level = tidyup.domain(block_level)
108             logger.debug("block_level='%s' - AFTER!", block_level)
109
110             if block_level == "":
111                 logger.warning("block_level is now empty!")
112                 continue
113             elif block_level == "accept":
114                 logger.debug("domain='%s' skipping block_level='accept'", domain)
115                 continue
116
117             block_level = utils.alias_block_level(block_level)
118
119             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level)
120             if len(blocklist) > 0:
121                 for blocked in blocklist:
122                     logger.debug("blocked='%s' - BEFORE!", blocked)
123                     blocked = tidyup.domain(blocked)
124                     logger.debug("blocked='%s' - AFTER!", blocked)
125
126                     if blocked == "":
127                         logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s' - SKIPPED!", domain, block_level)
128                         continue
129                     elif not utils.is_domain_wanted(blocked):
130                         logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
131                         continue
132
133                     logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
134                     blocked = utils.deobfuscate_domain(blocked, domain)
135
136                     logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
137                     if not utils.is_domain_wanted(blocked):
138                         logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
139                         continue
140
141                     logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
142                     blockdict.append({
143                         "blocker"    : domain,
144                         "blocked"    : blocked,
145                         "reason"     : None,
146                         "block_level": block_level,
147                     })
148
149     elif "quarantined_instances" in data:
150         logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain)
151         found = True
152         block_level = "quarantined"
153
154         for blocked in data["quarantined_instances"]:
155             logger.debug("blocked='%s' - BEFORE!", blocked)
156             blocked = tidyup.domain(blocked)
157             logger.debug("blocked='%s' - AFTER!", blocked)
158
159             if blocked == "":
160                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
161                 continue
162             elif not utils.is_domain_wanted(blocked):
163                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
164                 continue
165
166             logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
167             blocked = utils.deobfuscate_domain(blocked, domain)
168
169             logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
170             if not utils.is_domain_wanted(blocked):
171                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
172                 continue
173
174             logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
175             blockdict.append({
176                 "blocker"    : domain,
177                 "blocked"    : blocked,
178                 "reason"     : None,
179                 "block_level": block_level,
180             })
181
182     else:
183         logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain)
184
185     logger.debug("Invoking commit() ...")
186     database.connection.commit()
187
188     # Reasons
189     if "mrf_simple_info" in data:
190         logger.debug("Found mrf_simple_info in API response: domain='%s'", domain)
191         found = True
192         for block_level, info in (
193             {
194                 **data["mrf_simple_info"],
195                 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
196             }
197         ).items():
198             logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
199             block_level = tidyup.domain(block_level)
200             logger.debug("block_level='%s' - AFTER!", block_level)
201
202             if block_level == "":
203                 logger.warning("block_level is now empty!")
204                 continue
205             elif block_level == "accept":
206                 logger.debug("domain='%s': Skipping block_level='%s' ...", domain, block_level)
207                 continue
208
209             block_level = utils.alias_block_level(block_level)
210
211             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
212             for blocked, reason in info.items():
213                 logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
214                 blocked = tidyup.domain(blocked)
215                 logger.debug("blocked='%s' - AFTER!", blocked)
216
217                 if isinstance(reason, str):
218                     logger.debug("reason[] is a string")
219                     reason = tidyup.reason(reason)
220                 elif isinstance(reason, dict) and "reason" in reason:
221                     logger.debug("reason[] is a dict")
222                     reason = tidyup.reason(reason["reason"]) if isinstance(reason["reason"], str) else None
223                 elif reason is not None:
224                     raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
225
226                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
227
228                 if blocked == "":
229                     logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
230                     continue
231                 elif not utils.is_domain_wanted(blocked):
232                     logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
233                     continue
234
235                 logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
236                 blocked = utils.deobfuscate_domain(blocked, domain)
237                 logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
238
239                 logger.debug("Checking %d blockdict records ...", len(blockdict))
240                 for block in blockdict:
241                     logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
242                     if block["blocked"] == blocked:
243                         logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
244                         block["reason"] = reason
245
246     elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
247         logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain)
248         found = True
249         block_level = "quarantined"
250
251         #print(data["quarantined_instances_info"])
252         rows = data["quarantined_instances_info"]["quarantined_instances"]
253         for blocked in rows:
254             logger.debug("blocked='%s' - BEFORE!", blocked)
255             reason = tidyup.reason(rows[blocked]["reason"])
256             blocked = tidyup.domain(blocked)
257             logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
258
259             if blocked not in rows or "reason" not in rows[blocked]:
260                 logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
261                 break
262             elif blocked == "":
263                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
264                 continue
265             elif not utils.is_domain_wanted(blocked):
266                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
267                 continue
268
269             logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
270             blocked = utils.deobfuscate_domain(blocked, domain)
271
272             logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
273             if not utils.is_domain_wanted(blocked):
274                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
275                 continue
276
277             logger.debug("Checking %d blockdict records ...", len(blockdict))
278             for block in blockdict:
279                 logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
280                 if block["blocked"] == blocked:
281                     logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
282                     block["reason"] = reason
283     else:
284         logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
285
286     if not found:
287         logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
288         blocklist = fetch_blocks_from_about(domain)
289
290         logger.debug("blocklist()=%d", len(blocklist))
291         if len(blocklist) > 0:
292             logger.info("Checking %d different blocklists ...", len(blocklist))
293             for block_level in blocklist:
294                 logger.debug("block_level='%s'", block_level)
295                 rows = blocklist[block_level]
296
297                 logger.debug("rows[%s]()=%d'", type(rows), len(rows))
298                 for block in rows:
299                     logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", block["blocked"], domain)
300                     block["blocked"] = utils.deobfuscate_domain(block["blocked"], domain)
301
302                     logger.debug("block[blocked]='%s' - DEOBFUSCATED!", block["blocked"])
303                     if not utils.is_domain_wanted(block["blocked"]):
304                         logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
305                         continue
306
307                     logger.debug("Appending blocker='%s',block[blocked]='%s',block[reason]='%s',block_level='%s' ...",domain, block["blocked"], block["reason"], block_level)
308                     blockdict.append({
309                         "blocker"    : domain,
310                         "blocked"    : block["blocked"],
311                         "reason"     : block["reason"],
312                         "block_level": block_level,
313                     })
314
315     logger.debug("blockdict()=%d - EXIT!", len(blockdict))
316     return blockdict
317
318 def fetch_blocks_from_about(domain: str) -> dict:
319     logger.debug("domain='%s' - CALLED!", domain)
320     domain_helper.raise_on(domain)
321
322     logger.debug("Fetching mastodon blocks from domain='%s'", domain)
323     doc = None
324     for path in ["/instance/about/index.html"]:
325         try:
326             # Resetting doc type
327             doc = None
328
329             logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
330             response = network.fetch_response(
331                 domain,
332                 path,
333                 network.web_headers,
334                 (config.get("connection_timeout"), config.get("read_timeout"))
335             )
336
337             logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
338             if not response.ok or response.text.strip() == "":
339                 logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
340                 continue
341
342             logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
343             doc = bs4.BeautifulSoup(
344                 response.text,
345                 "html.parser",
346             )
347
348             logger.debug("doc[]='%s'", type(doc))
349             if doc.find("h2") is not None:
350                 logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
351                 break
352
353         except network.exceptions as exception:
354             logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
355             instances.set_last_error(domain, exception)
356             break
357
358     blocklist = {
359         "reject"        : [],
360         "filtered_media": [],
361         "followers_only": [],
362         "silenced"      : [],
363         "media_nsfw"    : [],
364         "media_removal" : [],
365         "federated_timeline_removal": [],
366     }
367
368     logger.debug("doc[]='%s'", type(doc))
369     if doc is None:
370         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
371         return list()
372
373     headers = doc.find_all("h2")
374
375     logger.debug("headers[]='%s'", type(headers))
376     if headers is None:
377         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
378         return list()
379
380     logger.info("Checking %d headers ...", len(headers))
381     for header in headers:
382         logger.debug("header[%s]='%s'", type(header), header)
383         block_level = tidyup.reason(header.text).lower()
384
385         logger.debug("block_level='%s' - BEFORE!", block_level)
386         if block_level in language_mapping:
387             logger.debug("block_level='%s' - FOUND!", block_level)
388             block_level = language_mapping[block_level].lower()
389         else:
390             logger.warning("block_level='%s' not found in language mapping table", block_level)
391
392         logger.debug("block_level='%s - AFTER!'", block_level)
393         if block_level in blocklist:
394             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
395             logger.debug("Found block_level='%s', importing domain blocks ...", block_level)
396             for line in header.find_next("table").find_all("tr")[1:]:
397                 logger.debug("line[]='%s'", type(line))
398                 blocked = tidyup.domain(line.find_all("td")[0].text)
399                 reason = tidyup.reason(line.find_all("td")[1].text)
400
401                 logger.debug("Appending block_level='%s',blocked='%s',reason='%s' ...", block_level, blocked, reason)
402                 blocklist[block_level].append({
403                     "blocked": blocked,
404                     "reason" : reason,
405                 })
406         else:
407             logger.warning("block_level='%s' not found in blocklist()=%d", block_level, len(blocklist))
408
409     logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)
410     return blocklist