]> git.mxchange.org Git - fba.git/blob - fba/networks/pleroma.py
5f403b9931eb1698a1cea68d2d138c2e5da69ea9
[fba.git] / fba / networks / pleroma.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import logging
18
19 import bs4
20
21 from fba import database
22 from fba import utils
23
24 from fba.helpers import config
25 from fba.helpers import domain as domain_helper
26 from fba.helpers import tidyup
27
28 from fba.http import network
29 from fba.http import nodeinfo
30
31 from fba.models import blocks
32 from fba.models import instances
33
34 logging.basicConfig(level=logging.INFO)
35 logger = logging.getLogger(__name__)
36
37 # Language mapping X -> English
38 language_mapping = {
39     # English -> English
40     "filtered media"   : "filtered_media",
41     "limited servers"  : "followers_only",
42     "followers-only"   : "followers_only",
43     "media removal"    : "media_removal",
44     "media_removal"    : "media_removal",
45     "media force-set as sensitive": "media_nsfw",
46     "nsfw"             : "media_nsfw",
47     "reject"           : "reject",
48     "suspended servers": "reject",
49     "silenced servers" : "silenced",
50     "removal from \"the whole known network\" timeline": "federated_timeline_removal",
51 }
52
53 def fetch_blocks(domain: str) -> list:
54     logger.debug("domain='%s' - CALLED!", domain)
55     domain_helper.raise_on(domain)
56
57     if not instances.is_registered(domain):
58         raise Exception(f"domain='{domain}' is not registered but function is invoked.")
59
60     blockdict = list()
61     rows = None
62
63     try:
64         logger.debug("Fetching nodeinfo: domain='%s'", domain)
65         rows = nodeinfo.fetch(domain, update_mode=False)
66
67         if "error_message" in rows:
68             logger.warning("Error message '%s' during fetching nodeinfo for domain='%s'", rows["error_message"], domain)
69             instances.set_last_error(domain, rows)
70             instances.update(domain)
71
72             logger.debug("Returning empty list ... - EXIT!")
73             return list()
74         elif "exception" in rows:
75             logger.warning("Exception '%s' during fetching nodeinfo for domain='%s' - EXIT!", type(rows["exception"]), domain)
76             return list()
77         elif "json" in rows:
78             logger.debug("rows[json] found for domain='%s'", domain)
79             rows = rows["json"]
80
81     except network.exceptions as exception:
82         logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain)
83         instances.set_last_error(domain, exception)
84
85     if rows is None:
86         logger.warning("Could not fetch nodeinfo from domain='%s' - EXIT!", domain)
87         return list()
88     elif "metadata" not in rows:
89         logger.warning("rows()=%d does not have key 'metadata', domain='%s' - EXIT!", len(rows), domain)
90         return list()
91     elif "federation" not in rows["metadata"]:
92         logger.warning("rows()=%d does not have key 'federation', domain='%s' - EXIT!", len(rows["metadata"]), domain)
93         return list()
94
95     data = rows["metadata"]["federation"]
96     found = False
97
98     logger.debug("data[]='%s'", type(data))
99     if "mrf_simple" in data:
100         logger.debug("Found mrf_simple in API response from domain='%s'", domain)
101         found = True
102         for block_level, blocklist in (
103             {
104                 **data["mrf_simple"],
105                 **{
106                     "quarantined_instances": data["quarantined_instances"]
107                 }
108             }
109         ).items():
110             logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
111             block_level = tidyup.domain(block_level) if block_level != "" else None
112             logger.debug("block_level='%s' - AFTER!", block_level)
113
114             if block_level == "":
115                 logger.warning("block_level is now empty!")
116                 continue
117             elif block_level == "accept":
118                 logger.debug("domain='%s' skipping block_level='accept'", domain)
119                 continue
120
121             block_level = blocks.alias_block_level(block_level)
122
123             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level)
124             if len(blocklist) > 0:
125                 for blocked in blocklist:
126                     logger.debug("blocked='%s' - BEFORE!", blocked)
127                     blocked = tidyup.domain(blocked) if blocked != "" else None
128                     logger.debug("blocked='%s' - AFTER!", blocked)
129
130                     if blocked is None or blocked == "":
131                         logger.warning("blocked='%s' is empty after tidyup.domain(): domain='%s',block_level='%s' - SKIPPED!", blocked, domain, block_level)
132                         continue
133
134                     logger.debug("Invoking utils.deobfuscate(%s, %s) ...", blocked, domain)
135                     blocked = utils.deobfuscate(blocked, domain)
136
137                     logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
138                     if not domain_helper.is_wanted(blocked):
139                         logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
140                         continue
141
142                     logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
143                     blockdict.append({
144                         "blocker"    : domain,
145                         "blocked"    : blocked,
146                         "reason"     : None,
147                         "block_level": block_level,
148                     })
149
150     elif "quarantined_instances" in data:
151         logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain)
152         found = True
153         block_level = "quarantined"
154
155         for blocked in data["quarantined_instances"]:
156             logger.debug("blocked='%s' - BEFORE!", blocked)
157             blocked = tidyup.domain(blocked) if blocked != "" else None
158
159             logger.debug("blocked='%s' - AFTER!", blocked)
160             if blocked is None or blocked == "":
161                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
162                 continue
163             elif not domain_helper.is_wanted(blocked):
164                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
165                 continue
166
167             logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
168             blockdict.append({
169                 "blocker"    : domain,
170                 "blocked"    : blocked,
171                 "reason"     : None,
172                 "block_level": block_level,
173             })
174
175     else:
176         logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain)
177
178     logger.debug("Invoking commit() ...")
179     database.connection.commit()
180
181     # Reasons
182     if "mrf_simple_info" in data:
183         logger.debug("Found mrf_simple_info in API response: domain='%s'", domain)
184         found = True
185         for block_level, info in (
186             {
187                 **data["mrf_simple_info"],
188                 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
189             }
190         ).items():
191             logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
192             block_level = tidyup.domain(block_level) if block_level != "" else None
193             logger.debug("block_level='%s' - AFTER!", block_level)
194
195             if block_level == "":
196                 logger.warning("block_level is now empty!")
197                 continue
198             elif block_level == "accept":
199                 logger.debug("domain='%s': Skipping block_level='%s' ...", domain, block_level)
200                 continue
201
202             block_level = blocks.alias_block_level(block_level)
203
204             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
205             for blocked, reason in info.items():
206                 logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
207                 blocked = tidyup.domain(blocked) if blocked != "" else None
208                 logger.debug("blocked='%s' - AFTER!", blocked)
209
210                 if isinstance(reason, str):
211                     logger.debug("reason[] is a string")
212                     reason = tidyup.reason(reason)
213                 elif isinstance(reason, dict) and "reason" in reason:
214                     logger.debug("reason[] is a dict")
215                     reason = tidyup.reason(reason["reason"]) if isinstance(reason["reason"], str) else None
216                 elif reason is not None:
217                     raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
218
219                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
220
221                 if blocked == "":
222                     logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
223                     continue
224
225                 logger.debug("Checking %d blockdict records ...", len(blockdict))
226                 for block in blockdict:
227                     logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
228                     if block["blocked"] == blocked:
229                         logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
230                         block["reason"] = reason
231
232     elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
233         logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain)
234         found = True
235         block_level = "quarantined"
236
237         #print(data["quarantined_instances_info"])
238         rows = data["quarantined_instances_info"]["quarantined_instances"]
239         for blocked in rows:
240             logger.debug("blocked='%s' - BEFORE!", blocked)
241             reason = tidyup.reason(rows[blocked]["reason"])
242             blocked = tidyup.domain(blocked) if blocked != "" else None
243             logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
244
245             if blocked not in rows or "reason" not in rows[blocked]:
246                 logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
247                 break
248             elif blocked == "":
249                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
250                 continue
251
252             logger.debug("Checking %d blockdict records ...", len(blockdict))
253             for block in blockdict:
254                 logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
255                 if block["blocked"] == blocked:
256                     logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
257                     block["reason"] = reason
258     else:
259         logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
260
261     if not found:
262         logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
263         blocklist = fetch_blocks_from_about(domain)
264
265         logger.debug("blocklist()=%d", len(blocklist))
266         if len(blocklist) > 0:
267             logger.info("Checking %d different blocklists ...", len(blocklist))
268             for block_level in blocklist:
269                 logger.debug("block_level='%s'", block_level)
270                 rows = blocklist[block_level]
271
272                 logger.debug("rows[%s]()=%d'", type(rows), len(rows))
273                 for block in rows:
274                     logger.debug("Appending blocker='%s',block[blocked]='%s',block[reason]='%s',block_level='%s' ...",domain, block["blocked"], block["reason"], block_level)
275                     blockdict.append({
276                         "blocker"    : domain,
277                         "blocked"    : block["blocked"],
278                         "reason"     : block["reason"],
279                         "block_level": block_level,
280                     })
281
282     logger.debug("blockdict()=%d - EXIT!", len(blockdict))
283     return blockdict
284
285 def fetch_blocks_from_about(domain: str) -> dict:
286     logger.debug("domain='%s' - CALLED!", domain)
287     domain_helper.raise_on(domain)
288
289     if not instances.is_registered(domain):
290         raise Exception(f"domain='{domain}' is not registered but function is invoked.")
291
292     logger.debug("Fetching mastodon blocks from domain='%s'", domain)
293     doc = None
294     for path in ["/instance/about/index.html"]:
295         try:
296             # Resetting doc type
297             doc = None
298
299             logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
300             response = network.fetch_response(
301                 domain,
302                 path,
303                 network.web_headers,
304                 (config.get("connection_timeout"), config.get("read_timeout"))
305             )
306
307             logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
308             if not response.ok or response.text.strip() == "":
309                 logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
310                 continue
311
312             logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
313             doc = bs4.BeautifulSoup(
314                 response.text,
315                 "html.parser",
316             )
317
318             logger.debug("doc[]='%s'", type(doc))
319             if doc.find("h2") is not None:
320                 logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
321                 break
322
323         except network.exceptions as exception:
324             logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
325             instances.set_last_error(domain, exception)
326             break
327
328     blocklist = {
329         "reject"        : [],
330         "filtered_media": [],
331         "followers_only": [],
332         "silenced"      : [],
333         "media_nsfw"    : [],
334         "media_removal" : [],
335         "federated_timeline_removal": [],
336     }
337
338     logger.debug("doc[]='%s'", type(doc))
339     if doc is None:
340         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
341         return list()
342
343     headers = doc.find_all("h2")
344
345     logger.debug("headers[]='%s'", type(headers))
346     if headers is None:
347         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
348         return list()
349
350     logger.info("Checking %d headers ...", len(headers))
351     for header in headers:
352         logger.debug("header[%s]='%s'", type(header), header)
353         block_level = tidyup.reason(header.text).lower()
354
355         logger.debug("block_level='%s' - BEFORE!", block_level)
356         if block_level in language_mapping:
357             logger.debug("block_level='%s' - FOUND!", block_level)
358             block_level = language_mapping[block_level].lower()
359         else:
360             logger.warning("block_level='%s' not found in language mapping table", block_level)
361
362         logger.debug("block_level='%s - AFTER!'", block_level)
363         if block_level in blocklist:
364             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
365             logger.debug("Found block_level='%s', importing domain blocks ...", block_level)
366             for line in header.find_next("table").find_all("tr")[1:]:
367                 logger.debug("line[]='%s'", type(line))
368                 blocked = line.find_all("td")[0].text
369                 logger.debug("blocked='%s'", blocked)
370
371                 blocked = tidyup.domain(blocked) if blocked != "" else None
372                 reason = tidyup.reason(line.find_all("td")[1].text)
373                 logger.debig("blocked='%s',reason='%s' - AFTER!", blocked, reason)
374
375                 if blocked is None or blocked == "":
376                     logger.debug("domain='%s',block_level='%s': blocked is empty - SKIPPED!", domain, block_level)
377                     continue
378                 elif not domain_helper.is_wanted(blocked):
379                     logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
380                     continue
381
382                 logger.debug("Appending block_level='%s',blocked='%s',reason='%s' ...", block_level, blocked, reason)
383                 blocklist[block_level].append({
384                     "blocked": blocked,
385                     "reason" : reason,
386                 })
387         else:
388             logger.warning("block_level='%s' not found in blocklist()=%d", block_level, len(blocklist))
389
390     logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)
391     return blocklist