]> git.mxchange.org Git - fba.git/blob - fba/networks/pleroma.py
Continued:
[fba.git] / fba / networks / pleroma.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import logging
18
19 import bs4
20 import validators
21
22 from fba import database
23 from fba import utils
24
25 from fba.helpers import blacklist
26 from fba.helpers import config
27 from fba.helpers import domain as domain_helper
28 from fba.helpers import tidyup
29
30 from fba.http import network
31 from fba.http import nodeinfo
32
33 from fba.models import blocks
34 from fba.models import instances
35
36 logging.basicConfig(level=logging.INFO)
37 logger = logging.getLogger(__name__)
38
39 # Language mapping X -> English
40 language_mapping = {
41     # English -> English
42     "filtered media"   : "filtered_media",
43     "limited servers"  : "followers_only",
44     "followers-only"   : "followers_only",
45     "media removal"    : "media_removal",
46     "media_removal"    : "media_removal",
47     "media force-set as sensitive": "media_nsfw",
48     "nsfw"             : "media_nsfw",
49     "reject"           : "reject",
50     "suspended servers": "reject",
51     "silenced servers" : "silenced",
52     "removal from \"the whole known network\" timeline": "federated_timeline_removal",
53 }
54
55 def fetch_blocks(domain: str) -> list:
56     logger.debug("domain='%s' - CALLED!", domain)
57     domain_helper.raise_on(domain)
58
59     if blacklist.is_blacklisted(domain):
60         raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
61     elif not instances.is_registered(domain):
62         raise Exception(f"domain='{domain}' is not registered but function is invoked.")
63
64     # Init variables
65     blockdict = list()
66     rows = None
67
68     try:
69         logger.debug("Fetching nodeinfo: domain='%s'", domain)
70         rows = nodeinfo.fetch(domain, update_mode=False)
71
72         if "error_message" in rows:
73             logger.warning("Error message '%s' during fetching nodeinfo for domain='%s'", rows["error_message"], domain)
74             instances.set_last_error(domain, rows)
75             instances.update(domain)
76
77             logger.debug("Returning empty list ... - EXIT!")
78             return list()
79         elif "exception" in rows:
80             logger.warning("Exception '%s' during fetching nodeinfo for domain='%s' - EXIT!", type(rows["exception"]), domain)
81             return list()
82         elif "json" in rows:
83             logger.debug("rows[json] found for domain='%s'", domain)
84             rows = rows["json"]
85
86     except network.exceptions as exception:
87         logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain)
88         instances.set_last_error(domain, exception)
89
90     logger.debug("rows[]='%s'", type(rows))
91     if rows is None:
92         logger.warning("Could not fetch nodeinfo from domain='%s' - EXIT!", domain)
93         return list()
94     elif "metadata" not in rows:
95         logger.warning("rows()=%d does not have key 'metadata', domain='%s' - EXIT!", len(rows), domain)
96         return list()
97     elif "federation" not in rows["metadata"]:
98         logger.warning("rows()=%d does not have key 'federation', domain='%s' - EXIT!", len(rows["metadata"]), domain)
99         return list()
100
101     found = False
102     data = rows["metadata"]["federation"]
103     logger.debug("data[]='%s'", type(data))
104
105     if "mrf_simple" in data:
106         logger.debug("Found mrf_simple in API response from domain='%s'", domain)
107         found = True
108         for block_level, blocklist in data["mrf_simple"].items():
109             logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
110             block_level = tidyup.domain(block_level) if block_level != "" else None
111             logger.debug("block_level='%s' - AFTER!", block_level)
112
113             if block_level == "":
114                 logger.warning("block_level is now empty!")
115                 continue
116             elif block_level == "accept":
117                 logger.debug("domain='%s' skipping block_level='accept'", domain)
118                 continue
119
120             block_level = blocks.alias_block_level(block_level)
121
122             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level)
123             for blocked in blocklist:
124                 logger.debug("blocked='%s' - BEFORE!", blocked)
125                 blocked = tidyup.domain(blocked) if blocked != "" else None
126                 logger.debug("blocked='%s' - AFTER!", blocked)
127
128                 if blocked in [None, ""]:
129                     logger.warning("blocked[%s]='%s' is empty after tidyup.domain(): domain='%s',block_level='%s' - SKIPPED!", blocked, domain, block_level)
130                     continue
131                 elif validators.domain(blocked) and blacklist.is_blacklisted(blocked):
132                     logger.debug("blocked='%s' is blacklisted - SKIPPED!")
133                     continue
134
135                 logger.debug("Invoking utils.deobfuscate(%s, %s) ...", blocked, domain)
136                 blocked = utils.deobfuscate(blocked, domain)
137                 logger.debug("blocked[%s]='%s' - DEOBFUSCATED!", type(blocked), blocked)
138
139                 if blocked in [None, ""]:
140                     logger.warning("instance[host]='%s' is None or empty after tidyup.domain() - SKIPPED!", instance["host"])
141                     continue
142                 elif not domain_helper.is_wanted(blocked):
143                     logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
144                     continue
145
146                 logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
147                 blockdict.append({
148                     "blocker"    : domain,
149                     "blocked"    : blocked,
150                     "reason"     : None,
151                     "block_level": block_level,
152                 })
153
154     elif "quarantined_instances" in data:
155         logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain)
156         found = True
157         block_level = "quarantined"
158
159         logger.debug("Checking %d quarantined instance(s) ...", len(data["quarantined_instances"]))
160         for blocked in data["quarantined_instances"]:
161             logger.debug("blocked='%s' - BEFORE!", blocked)
162             blocked = tidyup.domain(blocked) if blocked != "" else None
163             logger.debug("blocked='%s' - AFTER!", blocked)
164
165             if blocked in [None, ""]:
166                 logger.warning("blocked[%s]='%s' is empty after tidyup.domain(): domain='%s',block_level='%s'", type(blocked), blocked, domain, block_level)
167                 continue
168             elif not domain_helper.is_wanted(blocked):
169                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
170                 continue
171
172             logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
173             blockdict.append({
174                 "blocker"    : domain,
175                 "blocked"    : blocked,
176                 "reason"     : None,
177                 "block_level": block_level,
178             })
179
180     else:
181         logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain)
182
183     logger.debug("Invoking commit() ...")
184     database.connection.commit()
185
186     # Reasons
187     if "mrf_simple_info" in data:
188         logger.debug("Found mrf_simple_info in API response: domain='%s'", domain)
189         found = True
190         for block_level, info in (
191             {
192                 **data["mrf_simple_info"],
193                 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
194             }
195         ).items():
196             logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
197             block_level = tidyup.domain(block_level) if block_level != "" else None
198             logger.debug("block_level='%s' - AFTER!", block_level)
199
200             if block_level in [None, ""]:
201                 logger.warning("block_level='%s' is now empty!", block_level)
202                 continue
203             elif block_level == "accept":
204                 logger.debug("domain='%s': Skipping block_level='%s' ...", domain, block_level)
205                 continue
206
207             block_level = blocks.alias_block_level(block_level)
208
209             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
210             for blocked, reason in info.items():
211                 logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
212                 blocked = tidyup.domain(blocked) if blocked != "" else None
213                 logger.debug("blocked='%s' - AFTER!", blocked)
214
215                 if isinstance(reason, str):
216                     logger.debug("reason[] is a string")
217                     reason = tidyup.reason(reason)
218                 elif isinstance(reason, dict) and "reason" in reason:
219                     logger.debug("reason[] is a dict")
220                     reason = tidyup.reason(reason["reason"]) if isinstance(reason["reason"], str) else None
221                 elif reason is not None:
222                     raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
223
224                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
225
226                 if blocked in [None, ""]:
227                     logger.warning("blocked[%s]='%s' is empty after tidyup.domain(): domain='%s',block_level='%s'", type(blocked), blocked, domain, block_level)
228                     continue
229
230                 logger.debug("Checking %d blockdict records ...", len(blockdict))
231                 for block in blockdict:
232                     logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
233                     if block["blocked"] == blocked:
234                         logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
235                         block["reason"] = reason
236
237     elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
238         logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain)
239         found = True
240         block_level = "quarantined"
241
242         #print(data["quarantined_instances_info"])
243         rows = data["quarantined_instances_info"]["quarantined_instances"]
244         for blocked in rows:
245             logger.debug("blocked='%s' - BEFORE!", blocked)
246             reason  = tidyup.reason(rows[blocked]["reason"]) if rows[blocked]["reason"] != "" else None
247             blocked = tidyup.domain(blocked) if blocked != "" else None
248             logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
249
250             if blocked not in rows or "reason" not in rows[blocked]:
251                 logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
252                 break
253             elif blocked in [None, ""]:
254                 logger.warning("blocked[%s]='%s' is empty after tidyup.domain(): domain='%s',block_level='%s'", type(blocked), blocked, domain, block_level)
255                 continue
256
257             logger.debug("Checking %d blockdict record(s) ...", len(blockdict))
258             for block in blockdict:
259                 logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
260                 if block["blocked"] == blocked:
261                     logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
262                     block["reason"] = reason
263     else:
264         logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
265
266     logger.debug("found='%s'", found)
267     if not found:
268         logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
269         blocklist = fetch_blocks_from_about(domain)
270
271         logger.debug("blocklist()=%d", len(blocklist))
272         if len(blocklist) > 0:
273             logger.info("Checking %d different blocklist(s) ...", len(blocklist))
274             for block_level in blocklist:
275                 logger.debug("Checking blocklist[%s]()=%d entries ...", block_level, blocklist[block_level])
276                 for block in blocklist[block_level]:
277                     logger.debug("Appending blocker='%s',block[blocked]='%s',block[reason]='%s',block_level='%s' ...",domain, block["blocked"], block["reason"], block_level)
278                     blockdict.append({
279                         "blocker"    : domain,
280                         "blocked"    : block["blocked"],
281                         "reason"     : block["reason"],
282                         "block_level": block_level,
283                     })
284
285     logger.debug("blockdict()=%d - EXIT!", len(blockdict))
286     return blockdict
287
288 def fetch_blocks_from_about(domain: str) -> dict:
289     logger.debug("domain='%s' - CALLED!", domain)
290     domain_helper.raise_on(domain)
291
292     if blacklist.is_blacklisted(domain):
293         raise Exception(f"domain='{domain}' is blacklisted but function is invoked.")
294     elif not instances.is_registered(domain):
295         raise Exception(f"domain='{domain}' is not registered but function is invoked.")
296
297     # Init variables
298     doc = None
299
300     logger.debug("Fetching mastodon blocks from domain='%s'", domain)
301     for path in ["/instance/about/index.html"]:
302         try:
303             # Resetting doc type
304             doc = None
305
306             logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
307             response = network.fetch_response(
308                 domain,
309                 path,
310                 network.web_headers,
311                 (config.get("connection_timeout"), config.get("read_timeout"))
312             )
313
314             logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
315             if not response.ok or response.text.strip() == "":
316                 logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
317                 continue
318
319             logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
320             doc = bs4.BeautifulSoup(
321                 response.text,
322                 "html.parser",
323             )
324
325             logger.debug("doc[]='%s'", type(doc))
326             if doc.find("h2") is not None:
327                 logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
328                 break
329
330         except network.exceptions as exception:
331             logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
332             instances.set_last_error(domain, exception)
333             break
334
335     blocklist = {
336         "reject"        : [],
337         "filtered_media": [],
338         "followers_only": [],
339         "silenced"      : [],
340         "media_nsfw"    : [],
341         "media_removal" : [],
342         "federated_timeline_removal": [],
343     }
344
345     logger.debug("doc[]='%s'", type(doc))
346     if doc is None:
347         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
348         return list()
349
350     headers = doc.find_all("h2")
351
352     logger.debug("headers[]='%s'", type(headers))
353     if headers is None:
354         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
355         return list()
356
357     logger.info("Checking %d headers ...", len(headers))
358     for header in headers:
359         logger.debug("header[%s]='%s'", type(header), header)
360         block_level = tidyup.reason(header.text).lower()
361
362         logger.debug("block_level='%s' - BEFORE!", block_level)
363         if block_level in language_mapping:
364             logger.debug("block_level='%s' - FOUND!", block_level)
365             block_level = language_mapping[block_level].lower()
366         else:
367             logger.warning("block_level='%s' not found in language mapping table", block_level)
368
369         logger.debug("block_level='%s - AFTER!'", block_level)
370         if block_level in blocklist:
371             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
372             logger.debug("Found block_level='%s', importing domain blocks ...", block_level)
373             for line in header.find_next("table").find_all("tr")[1:]:
374                 logger.debug("line[]='%s'", type(line))
375                 blocked = line.find_all("td")[0].text
376                 reason  = line.find_all("td")[1].text
377
378                 logger.debug("blocked='%s',reason='%s' - BEFORE!", blocked, reason)
379                 blocked = tidyup.domain(blocked) if blocked != "" else None
380                 reason  = tidyup.reason(reason)  if reason  != "" else None
381                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
382
383                 if blocked in [None, ""]:
384                     logger.debug("domain='%s',block_level='%s': blocked='%s' is empty - SKIPPED!", domain, block_level, blocked)
385                     continue
386                 elif not domain_helper.is_wanted(blocked):
387                     logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
388                     continue
389
390                 logger.debug("Appending block_level='%s',blocked='%s',reason='%s' ...", block_level, blocked, reason)
391                 blocklist[block_level].append({
392                     "blocked": blocked,
393                     "reason" : reason,
394                 })
395         else:
396             logger.warning("block_level='%s' not found in blocklist()=%d", block_level, len(blocklist))
397
398     logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)
399     return blocklist