]> git.mxchange.org Git - fba.git/blob - fba/networks/pleroma.py
1a69c239a7d42d374f7da7968391c790b720a983
[fba.git] / fba / networks / pleroma.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import logging
18
19 import bs4
20
21 from fba import database
22 from fba import utils
23
24 from fba.helpers import config
25 from fba.helpers import domain as domain_helper
26 from fba.helpers import tidyup
27
28 from fba.http import network
29 from fba.http import nodeinfo
30
31 from fba.models import blocks
32 from fba.models import instances
33
34 logging.basicConfig(level=logging.INFO)
35 logger = logging.getLogger(__name__)
36
37 # Language mapping X -> English
38 language_mapping = {
39     # English -> English
40     "filtered media"   : "filtered_media",
41     "limited servers"  : "followers_only",
42     "followers-only"   : "followers_only",
43     "media removal"    : "media_removal",
44     "media_removal"    : "media_removal",
45     "media force-set as sensitive": "media_nsfw",
46     "nsfw"             : "media_nsfw",
47     "reject"           : "reject",
48     "suspended servers": "reject",
49     "silenced servers" : "silenced",
50     "removal from \"the whole known network\" timeline": "federated_timeline_removal",
51 }
52
53 def fetch_blocks(domain: str) -> list:
54     logger.debug("domain='%s' - CALLED!", domain)
55     domain_helper.raise_on(domain)
56
57     if not instances.is_registered(domain):
58         raise Exception(f"domain='{domain}' is not registered but function is invoked.")
59
60     blockdict = list()
61     rows = None
62
63     try:
64         logger.debug("Fetching nodeinfo: domain='%s'", domain)
65         rows = nodeinfo.fetch(domain, update_mode=False)
66
67         if "error_message" in rows:
68             logger.warning("Error message '%s' during fetching nodeinfo for domain='%s'", rows["error_message"], domain)
69             instances.set_last_error(domain, rows)
70             instances.update(domain)
71
72             logger.debug("Returning empty list ... - EXIT!")
73             return list()
74         elif "exception" in rows:
75             logger.warning("Exception '%s' during fetching nodeinfo for domain='%s' - EXIT!", type(rows["exception"]), domain)
76             return list()
77         elif "json" in rows:
78             logger.debug("rows[json] found for domain='%s'", domain)
79             rows = rows["json"]
80
81     except network.exceptions as exception:
82         logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain)
83         instances.set_last_error(domain, exception)
84
85     if rows is None:
86         logger.warning("Could not fetch nodeinfo from domain='%s' - EXIT!", domain)
87         return list()
88     elif "metadata" not in rows:
89         logger.warning("rows()=%d does not have key 'metadata', domain='%s' - EXIT!", len(rows), domain)
90         return list()
91     elif "federation" not in rows["metadata"]:
92         logger.warning("rows()=%d does not have key 'federation', domain='%s' - EXIT!", len(rows["metadata"]), domain)
93         return list()
94
95     data = rows["metadata"]["federation"]
96     found = False
97
98     logger.debug("data[]='%s'", type(data))
99     if "mrf_simple" in data:
100         logger.debug("Found mrf_simple in API response from domain='%s'", domain)
101         found = True
102         for block_level, blocklist in (
103             {
104                 **data["mrf_simple"],
105                 **{
106                     "quarantined_instances": data["quarantined_instances"]
107                 }
108             }
109         ).items():
110             logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
111             block_level = tidyup.domain(block_level) if block_level != "" else None
112             logger.debug("block_level='%s' - AFTER!", block_level)
113
114             if block_level == "":
115                 logger.warning("block_level is now empty!")
116                 continue
117             elif block_level == "accept":
118                 logger.debug("domain='%s' skipping block_level='accept'", domain)
119                 continue
120
121             block_level = blocks.alias_block_level(block_level)
122
123             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level)
124             if len(blocklist) > 0:
125                 for blocked in blocklist:
126                     logger.debug("blocked='%s' - BEFORE!", blocked)
127                     blocked = tidyup.domain(blocked) if blocked != "" else None
128                     logger.debug("blocked='%s' - AFTER!", blocked)
129
130                     if blocked is None:
131                         logger.warning("blocked is empty - SKIPPED!")
132                         continue
133                     elif blocked == "":
134                         logger.warning("blocked is an empty string after tidyup.domain(): domain='%s',block_level='%s' - SKIPPED!", domain, block_level)
135                         continue
136
137                     logger.debug("Invoking utils.deobfuscate(%s, %s) ...", blocked, domain)
138                     blocked = utils.deobfuscate(blocked, domain)
139
140                     logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
141                     if not domain_helper.is_wanted(blocked):
142                         logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
143                         continue
144
145                     logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
146                     blockdict.append({
147                         "blocker"    : domain,
148                         "blocked"    : blocked,
149                         "reason"     : None,
150                         "block_level": block_level,
151                     })
152
153     elif "quarantined_instances" in data:
154         logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain)
155         found = True
156         block_level = "quarantined"
157
158         for blocked in data["quarantined_instances"]:
159             logger.debug("blocked='%s' - BEFORE!", blocked)
160             blocked = tidyup.domain(blocked) if blocked != "" else None
161             logger.debug("blocked='%s' - AFTER!", blocked)
162
163             if blocked == "":
164                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
165                 continue
166             elif not domain_helper.is_wanted(blocked):
167                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
168                 continue
169
170             logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
171             blockdict.append({
172                 "blocker"    : domain,
173                 "blocked"    : blocked,
174                 "reason"     : None,
175                 "block_level": block_level,
176             })
177
178     else:
179         logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain)
180
181     logger.debug("Invoking commit() ...")
182     database.connection.commit()
183
184     # Reasons
185     if "mrf_simple_info" in data:
186         logger.debug("Found mrf_simple_info in API response: domain='%s'", domain)
187         found = True
188         for block_level, info in (
189             {
190                 **data["mrf_simple_info"],
191                 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
192             }
193         ).items():
194             logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
195             block_level = tidyup.domain(block_level) if block_level != "" else None
196             logger.debug("block_level='%s' - AFTER!", block_level)
197
198             if block_level == "":
199                 logger.warning("block_level is now empty!")
200                 continue
201             elif block_level == "accept":
202                 logger.debug("domain='%s': Skipping block_level='%s' ...", domain, block_level)
203                 continue
204
205             block_level = blocks.alias_block_level(block_level)
206
207             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
208             for blocked, reason in info.items():
209                 logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
210                 blocked = tidyup.domain(blocked) if blocked != "" else None
211                 logger.debug("blocked='%s' - AFTER!", blocked)
212
213                 if isinstance(reason, str):
214                     logger.debug("reason[] is a string")
215                     reason = tidyup.reason(reason)
216                 elif isinstance(reason, dict) and "reason" in reason:
217                     logger.debug("reason[] is a dict")
218                     reason = tidyup.reason(reason["reason"]) if isinstance(reason["reason"], str) else None
219                 elif reason is not None:
220                     raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
221
222                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
223
224                 if blocked == "":
225                     logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
226                     continue
227
228                 logger.debug("Checking %d blockdict records ...", len(blockdict))
229                 for block in blockdict:
230                     logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
231                     if block["blocked"] == blocked:
232                         logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
233                         block["reason"] = reason
234
235     elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
236         logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain)
237         found = True
238         block_level = "quarantined"
239
240         #print(data["quarantined_instances_info"])
241         rows = data["quarantined_instances_info"]["quarantined_instances"]
242         for blocked in rows:
243             logger.debug("blocked='%s' - BEFORE!", blocked)
244             reason = tidyup.reason(rows[blocked]["reason"])
245             blocked = tidyup.domain(blocked) if blocked != "" else None
246             logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
247
248             if blocked not in rows or "reason" not in rows[blocked]:
249                 logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
250                 break
251             elif blocked == "":
252                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
253                 continue
254
255             logger.debug("Checking %d blockdict records ...", len(blockdict))
256             for block in blockdict:
257                 logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
258                 if block["blocked"] == blocked:
259                     logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
260                     block["reason"] = reason
261     else:
262         logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
263
264     if not found:
265         logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
266         blocklist = fetch_blocks_from_about(domain)
267
268         logger.debug("blocklist()=%d", len(blocklist))
269         if len(blocklist) > 0:
270             logger.info("Checking %d different blocklists ...", len(blocklist))
271             for block_level in blocklist:
272                 logger.debug("block_level='%s'", block_level)
273                 rows = blocklist[block_level]
274
275                 logger.debug("rows[%s]()=%d'", type(rows), len(rows))
276                 for block in rows:
277                     logger.debug("Appending blocker='%s',block[blocked]='%s',block[reason]='%s',block_level='%s' ...",domain, block["blocked"], block["reason"], block_level)
278                     blockdict.append({
279                         "blocker"    : domain,
280                         "blocked"    : block["blocked"],
281                         "reason"     : block["reason"],
282                         "block_level": block_level,
283                     })
284
285     logger.debug("blockdict()=%d - EXIT!", len(blockdict))
286     return blockdict
287
288 def fetch_blocks_from_about(domain: str) -> dict:
289     logger.debug("domain='%s' - CALLED!", domain)
290     domain_helper.raise_on(domain)
291
292     if not instances.is_registered(domain):
293         raise Exception(f"domain='{domain}' is not registered but function is invoked.")
294
295     logger.debug("Fetching mastodon blocks from domain='%s'", domain)
296     doc = None
297     for path in ["/instance/about/index.html"]:
298         try:
299             # Resetting doc type
300             doc = None
301
302             logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
303             response = network.fetch_response(
304                 domain,
305                 path,
306                 network.web_headers,
307                 (config.get("connection_timeout"), config.get("read_timeout"))
308             )
309
310             logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
311             if not response.ok or response.text.strip() == "":
312                 logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
313                 continue
314
315             logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
316             doc = bs4.BeautifulSoup(
317                 response.text,
318                 "html.parser",
319             )
320
321             logger.debug("doc[]='%s'", type(doc))
322             if doc.find("h2") is not None:
323                 logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
324                 break
325
326         except network.exceptions as exception:
327             logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
328             instances.set_last_error(domain, exception)
329             break
330
331     blocklist = {
332         "reject"        : [],
333         "filtered_media": [],
334         "followers_only": [],
335         "silenced"      : [],
336         "media_nsfw"    : [],
337         "media_removal" : [],
338         "federated_timeline_removal": [],
339     }
340
341     logger.debug("doc[]='%s'", type(doc))
342     if doc is None:
343         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
344         return list()
345
346     headers = doc.find_all("h2")
347
348     logger.debug("headers[]='%s'", type(headers))
349     if headers is None:
350         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
351         return list()
352
353     logger.info("Checking %d headers ...", len(headers))
354     for header in headers:
355         logger.debug("header[%s]='%s'", type(header), header)
356         block_level = tidyup.reason(header.text).lower()
357
358         logger.debug("block_level='%s' - BEFORE!", block_level)
359         if block_level in language_mapping:
360             logger.debug("block_level='%s' - FOUND!", block_level)
361             block_level = language_mapping[block_level].lower()
362         else:
363             logger.warning("block_level='%s' not found in language mapping table", block_level)
364
365         logger.debug("block_level='%s - AFTER!'", block_level)
366         if block_level in blocklist:
367             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
368             logger.debug("Found block_level='%s', importing domain blocks ...", block_level)
369             for line in header.find_next("table").find_all("tr")[1:]:
370                 logger.debug("line[]='%s'", type(line))
371                 blocked = line.find_all("td")[0].text
372                 logger.debug("blocked='%s'", blocked)
373
374                 blocked = tidyup.domain(blocked) if blocked != "" else None
375                 reason = tidyup.reason(line.find_all("td")[1].text)
376                 logger.debig("blocked='%s',reason='%s' - AFTER!", blocked, reason)
377
378                 if blocked is None or blocked == "":
379                     logger.debug("domain='%s',block_level='%s': blocked is empty - SKIPPED!", domain, block_level)
380                     continue
381                 elif not domain_helper.is_wanted(blocked):
382                     logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
383                     continue
384
385                 logger.debug("Appending block_level='%s',blocked='%s',reason='%s' ...", block_level, blocked, reason)
386                 blocklist[block_level].append({
387                     "blocked": blocked,
388                     "reason" : reason,
389                 })
390         else:
391             logger.warning("block_level='%s' not found in blocklist()=%d", block_level, len(blocklist))
392
393     logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)
394     return blocklist