]> git.mxchange.org Git - fba.git/blob - fba/networks/pleroma.py
Continued/WIP:
[fba.git] / fba / networks / pleroma.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import logging
18
19 import bs4
20 import validators
21
22 from fba import database
23 from fba import utils
24
25 from fba.helpers import blacklist
26 from fba.helpers import config
27 from fba.helpers import domain as domain_helper
28 from fba.helpers import tidyup
29
30 from fba.http import network
31 from fba.http import nodeinfo
32
33 from fba.models import blocks
34 from fba.models import instances
35
36 logging.basicConfig(level=logging.INFO)
37 logger = logging.getLogger(__name__)
38
39 # Language mapping X -> English
40 language_mapping = {
41     # English -> English
42     "filtered media"   : "filtered_media",
43     "limited servers"  : "followers_only",
44     "followers-only"   : "followers_only",
45     "media removal"    : "media_removal",
46     "media_removal"    : "media_removal",
47     "media force-set as sensitive": "media_nsfw",
48     "nsfw"             : "media_nsfw",
49     "reject"           : "reject",
50     "suspended servers": "reject",
51     "silenced servers" : "silenced",
52     "removal from \"the whole known network\" timeline": "federated_timeline_removal",
53 }
54
55 def fetch_blocks(domain: str) -> list:
56     logger.debug("domain='%s' - CALLED!", domain)
57     domain_helper.raise_on(domain)
58
59     if blacklist.is_blacklisted(domain):
60         raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
61     elif not instances.is_registered(domain):
62         raise Exception(f"domain='{domain}' is not registered but function is invoked.")
63
64     # Init variables
65     blockdict = list()
66     rows = None
67
68     try:
69         logger.debug("Fetching nodeinfo: domain='%s'", domain)
70         rows = nodeinfo.fetch(domain, update_mode=False)
71
72         if "error_message" in rows:
73             logger.warning("Error message '%s' during fetching nodeinfo for domain='%s'", rows["error_message"], domain)
74             instances.set_last_error(domain, rows)
75             instances.update(domain)
76
77             logger.debug("Returning empty list ... - EXIT!")
78             return list()
79         elif "exception" in rows:
80             logger.warning("Exception '%s' during fetching nodeinfo for domain='%s' - EXIT!", type(rows["exception"]), domain)
81             return list()
82         elif "json" in rows:
83             logger.debug("rows[json] found for domain='%s'", domain)
84             rows = rows["json"]
85
86     except network.exceptions as exception:
87         logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain)
88         instances.set_last_error(domain, exception)
89
90     logger.debug("rows[]='%s'", type(rows))
91     if rows is None:
92         logger.warning("Could not fetch nodeinfo from domain='%s' - EXIT!", domain)
93         return list()
94     elif "metadata" not in rows:
95         logger.warning("rows()=%d does not have key 'metadata', domain='%s' - EXIT!", len(rows), domain)
96         return list()
97     elif "federation" not in rows["metadata"]:
98         logger.warning("rows()=%d does not have key 'federation', domain='%s' - EXIT!", len(rows["metadata"]), domain)
99         return list()
100
101     found = False
102     data = rows["metadata"]["federation"]
103     logger.debug("data[]='%s'", type(data))
104
105     if "mrf_simple" in data:
106         logger.debug("Found mrf_simple in API response from domain='%s'", domain)
107         found = True
108         for block_level, blocklist in (
109             {
110                 **data["mrf_simple"],
111                 **{
112                     "quarantined_instances": data["quarantined_instances"]
113                 }
114             }
115         ).items():
116             logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
117             block_level = tidyup.domain(block_level) if block_level != "" else None
118             logger.debug("block_level='%s' - AFTER!", block_level)
119
120             if block_level == "":
121                 logger.warning("block_level is now empty!")
122                 continue
123             elif block_level == "accept":
124                 logger.debug("domain='%s' skipping block_level='accept'", domain)
125                 continue
126
127             block_level = blocks.alias_block_level(block_level)
128
129             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level)
130             for blocked in blocklist:
131                 logger.debug("blocked='%s' - BEFORE!", blocked)
132                 blocked = tidyup.domain(blocked) if blocked != "" else None
133                 logger.debug("blocked='%s' - AFTER!", blocked)
134
135                 if blocked in [None, ""]:
136                     logger.warning("blocked='%s' is empty after tidyup.domain(): domain='%s',block_level='%s' - SKIPPED!", blocked, domain, block_level)
137                     continue
138                 elif validators.domain(blocked) and blacklist.is_blacklisted(blocked):
139                     logger.debug("blocked='%s' is blacklisted - SKIPPED!")
140                     continue
141
142                 logger.debug("Invoking utils.deobfuscate(%s, %s) ...", blocked, domain)
143                 blocked = utils.deobfuscate(blocked, domain)
144                 logger.debug("blocked[%s]='%s' - DEOBFUSCATED!", type(blocked), blocked)
145
146                 if blocked in [None, ""]:
147                     logger.warning("instance[host]='%s' is None or empty after tidyup.domain() - SKIPPED!", instance["host"])
148                     continue
149                 elif not domain_helper.is_wanted(blocked):
150                     logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
151                     continue
152
153                 logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
154                 blockdict.append({
155                     "blocker"    : domain,
156                     "blocked"    : blocked,
157                     "reason"     : None,
158                     "block_level": block_level,
159                 })
160
161     elif "quarantined_instances" in data:
162         logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain)
163         found = True
164         block_level = "quarantined"
165
166         logger.debug("Checking %d quarantined instance(s) ...", len(data["quarantined_instances"]))
167         for blocked in data["quarantined_instances"]:
168             logger.debug("blocked='%s' - BEFORE!", blocked)
169             blocked = tidyup.domain(blocked) if blocked != "" else None
170             logger.debug("blocked='%s' - AFTER!", blocked)
171
172             if blocked in [None, ""]:
173                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
174                 continue
175             elif not domain_helper.is_wanted(blocked):
176                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
177                 continue
178
179             logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
180             blockdict.append({
181                 "blocker"    : domain,
182                 "blocked"    : blocked,
183                 "reason"     : None,
184                 "block_level": block_level,
185             })
186
187     else:
188         logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain)
189
190     logger.debug("Invoking commit() ...")
191     database.connection.commit()
192
193     # Reasons
194     if "mrf_simple_info" in data:
195         logger.debug("Found mrf_simple_info in API response: domain='%s'", domain)
196         found = True
197         for block_level, info in (
198             {
199                 **data["mrf_simple_info"],
200                 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
201             }
202         ).items():
203             logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
204             block_level = tidyup.domain(block_level) if block_level != "" else None
205             logger.debug("block_level='%s' - AFTER!", block_level)
206
207             if block_level in [None, ""]:
208                 logger.warning("block_level='%s' is now empty!", block_level)
209                 continue
210             elif block_level == "accept":
211                 logger.debug("domain='%s': Skipping block_level='%s' ...", domain, block_level)
212                 continue
213
214             block_level = blocks.alias_block_level(block_level)
215
216             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
217             for blocked, reason in info.items():
218                 logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
219                 blocked = tidyup.domain(blocked) if blocked != "" else None
220                 logger.debug("blocked='%s' - AFTER!", blocked)
221
222                 if isinstance(reason, str):
223                     logger.debug("reason[] is a string")
224                     reason = tidyup.reason(reason)
225                 elif isinstance(reason, dict) and "reason" in reason:
226                     logger.debug("reason[] is a dict")
227                     reason = tidyup.reason(reason["reason"]) if isinstance(reason["reason"], str) else None
228                 elif reason is not None:
229                     raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
230
231                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
232
233                 if blocked == "":
234                     logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
235                     continue
236
237                 logger.debug("Checking %d blockdict records ...", len(blockdict))
238                 for block in blockdict:
239                     logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
240                     if block["blocked"] == blocked:
241                         logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
242                         block["reason"] = reason
243
244     elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
245         logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain)
246         found = True
247         block_level = "quarantined"
248
249         #print(data["quarantined_instances_info"])
250         rows = data["quarantined_instances_info"]["quarantined_instances"]
251         for blocked in rows:
252             logger.debug("blocked='%s' - BEFORE!", blocked)
253             reason  = tidyup.reason(rows[blocked]["reason"]) if rows[blocked]["reason"] != "" else None
254             blocked = tidyup.domain(blocked) if blocked != "" else None
255             logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
256
257             if blocked not in rows or "reason" not in rows[blocked]:
258                 logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
259                 break
260             elif blocked == "":
261                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
262                 continue
263
264             logger.debug("Checking %d blockdict record(s) ...", len(blockdict))
265             for block in blockdict:
266                 logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
267                 if block["blocked"] == blocked:
268                     logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
269                     block["reason"] = reason
270     else:
271         logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
272
273     logger.debug("found='%s'", found)
274     if not found:
275         logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
276         blocklist = fetch_blocks_from_about(domain)
277
278         logger.debug("blocklist()=%d", len(blocklist))
279         if len(blocklist) > 0:
280             logger.info("Checking %d different blocklist(s) ...", len(blocklist))
281             for block_level in blocklist:
282                 logger.debug("Checking blocklist[%s]()=%d entries ...", block_level, blocklist[block_level])
283                 for block in blocklist[block_level]:
284                     logger.debug("Appending blocker='%s',block[blocked]='%s',block[reason]='%s',block_level='%s' ...",domain, block["blocked"], block["reason"], block_level)
285                     blockdict.append({
286                         "blocker"    : domain,
287                         "blocked"    : block["blocked"],
288                         "reason"     : block["reason"],
289                         "block_level": block_level,
290                     })
291
292     logger.debug("blockdict()=%d - EXIT!", len(blockdict))
293     return blockdict
294
295 def fetch_blocks_from_about(domain: str) -> dict:
296     logger.debug("domain='%s' - CALLED!", domain)
297     domain_helper.raise_on(domain)
298
299     if blacklist.is_blacklisted(domain):
300         raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
301     elif not instances.is_registered(domain):
302         raise Exception(f"domain='{domain}' is not registered but function is invoked.")
303
304     # Init variables
305     doc = None
306
307     logger.debug("Fetching mastodon blocks from domain='%s'", domain)
308     for path in ["/instance/about/index.html"]:
309         try:
310             # Resetting doc type
311             doc = None
312
313             logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
314             response = network.fetch_response(
315                 domain,
316                 path,
317                 network.web_headers,
318                 (config.get("connection_timeout"), config.get("read_timeout"))
319             )
320
321             logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
322             if not response.ok or response.text.strip() == "":
323                 logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
324                 continue
325
326             logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
327             doc = bs4.BeautifulSoup(
328                 response.text,
329                 "html.parser",
330             )
331
332             logger.debug("doc[]='%s'", type(doc))
333             if doc.find("h2") is not None:
334                 logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
335                 break
336
337         except network.exceptions as exception:
338             logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
339             instances.set_last_error(domain, exception)
340             break
341
342     blocklist = {
343         "reject"        : [],
344         "filtered_media": [],
345         "followers_only": [],
346         "silenced"      : [],
347         "media_nsfw"    : [],
348         "media_removal" : [],
349         "federated_timeline_removal": [],
350     }
351
352     logger.debug("doc[]='%s'", type(doc))
353     if doc is None:
354         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
355         return list()
356
357     headers = doc.find_all("h2")
358
359     logger.debug("headers[]='%s'", type(headers))
360     if headers is None:
361         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
362         return list()
363
364     logger.info("Checking %d headers ...", len(headers))
365     for header in headers:
366         logger.debug("header[%s]='%s'", type(header), header)
367         block_level = tidyup.reason(header.text).lower()
368
369         logger.debug("block_level='%s' - BEFORE!", block_level)
370         if block_level in language_mapping:
371             logger.debug("block_level='%s' - FOUND!", block_level)
372             block_level = language_mapping[block_level].lower()
373         else:
374             logger.warning("block_level='%s' not found in language mapping table", block_level)
375
376         logger.debug("block_level='%s - AFTER!'", block_level)
377         if block_level in blocklist:
378             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
379             logger.debug("Found block_level='%s', importing domain blocks ...", block_level)
380             for line in header.find_next("table").find_all("tr")[1:]:
381                 logger.debug("line[]='%s'", type(line))
382                 blocked = line.find_all("td")[0].text
383                 reason  = line.find_all("td")[1].text
384
385                 logger.debug("blocked='%s',reason='%s' - BEFORE!", blocked, reason)
386                 blocked = tidyup.domain(blocked) if blocked != "" else None
387                 reason  = tidyup.reason(reason)  if reason  != "" else None
388                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
389
390                 if blocked in [None, ""]:
391                     logger.debug("domain='%s',block_level='%s': blocked='%s' is empty - SKIPPED!", domain, block_level, blocked)
392                     continue
393                 elif not domain_helper.is_wanted(blocked):
394                     logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
395                     continue
396
397                 logger.debug("Appending block_level='%s',blocked='%s',reason='%s' ...", block_level, blocked, reason)
398                 blocklist[block_level].append({
399                     "blocked": blocked,
400                     "reason" : reason,
401                 })
402         else:
403             logger.warning("block_level='%s' not found in blocklist()=%d", block_level, len(blocklist))
404
405     logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)
406     return blocklist