]> git.mxchange.org Git - fba.git/blob - fba/networks/pleroma.py
Continued:
[fba.git] / fba / networks / pleroma.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import logging
18
19 import bs4
20
21 from fba import database
22 from fba import utils
23
24 from fba.helpers import config
25 from fba.helpers import domain as domain_helper
26 from fba.helpers import tidyup
27
28 from fba.http import federation
29 from fba.http import network
30
31 from fba.models import instances
32
33 logging.basicConfig(level=logging.INFO)
34 logger = logging.getLogger(__name__)
35
36 # Language mapping X -> English
37 language_mapping = {
38     # English -> English
39     "limited servers"  : "followers_only",
40     "suspended servers": "reject",
41     "silenced servers" : "silenced",
42     "filtered media"   : "filtered_media",
43 }
44
45 def fetch_blocks(domain: str, nodeinfo_url: str) -> list:
46     logger.debug("domain='%s',nodeinfo_url='%s' - CALLED!", domain, nodeinfo_url)
47     domain_helper.raise_on(domain)
48
49     if not isinstance(nodeinfo_url, str):
50         raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
51     elif nodeinfo_url == "":
52         raise ValueError("Parameter 'nodeinfo_url' is empty")
53
54     blockdict = list()
55     rows = None
56     try:
57         logger.debug("Fetching nodeinfo: domain='%s',nodeinfo_url='%s'", domain, nodeinfo_url)
58         rows = federation.fetch_nodeinfo(domain, nodeinfo_url)
59     except network.exceptions as exception:
60         logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain)
61         instances.set_last_error(domain, exception)
62
63     if rows is None:
64         logger.warning("Could not fetch nodeinfo from domain='%s'", domain)
65         return list()
66     elif "metadata" not in rows:
67         logger.warning("rows()=%d does not have key 'metadata', domain='%s'", len(rows), domain)
68         return list()
69     elif "federation" not in rows["metadata"]:
70         logger.warning("rows()=%d does not have key 'federation', domain='%s'", len(rows["metadata"]), domain)
71         return list()
72
73     data = rows["metadata"]["federation"]
74     found = False
75
76     logger.debug("data[]='%s'", type(data))
77     if "mrf_simple" in data:
78         logger.debug("Found mrf_simple in API response from domain='%s'", domain)
79         found = True
80         for block_level, blocklist in (
81             {
82                 **data["mrf_simple"],
83                 **{
84                     "quarantined_instances": data["quarantined_instances"]
85                 }
86             }
87         ).items():
88             logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
89             block_level = tidyup.domain(block_level)
90             logger.debug("block_level='%s' - AFTER!", block_level)
91
92             if block_level == "":
93                 logger.warning("block_level is now empty!")
94                 continue
95             elif block_level == "accept":
96                 logger.debug("domain='%s' skipping block_level='accept'", domain)
97                 continue
98
99             block_level = utils.alias_block_level(block_level)
100
101             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level)
102             if len(blocklist) > 0:
103                 for blocked in blocklist:
104                     logger.debug("blocked='%s' - BEFORE!", blocked)
105                     blocked = tidyup.domain(blocked)
106                     logger.debug("blocked='%s' - AFTER!", blocked)
107
108                     if blocked == "":
109                         logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s' - SKIPPED!", domain, block_level)
110                         continue
111                     elif not utils.is_domain_wanted(blocked):
112                         logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
113                         continue
114
115                     logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
116                     blocked = utils.deobfuscate_domain(blocked, domain)
117
118                     logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
119                     if not utils.is_domain_wanted(blocked):
120                         logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
121                         continue
122
123                     logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
124                     blockdict.append({
125                         "blocker"    : domain,
126                         "blocked"    : blocked,
127                         "reason"     : None,
128                         "block_level": block_level,
129                     })
130
131     elif "quarantined_instances" in data:
132         logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain)
133         found = True
134         block_level = "quarantined"
135
136         for blocked in data["quarantined_instances"]:
137             logger.debug("blocked='%s' - BEFORE!", blocked)
138             blocked = tidyup.domain(blocked)
139             logger.debug("blocked='%s' - AFTER!", blocked)
140
141             if blocked == "":
142                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
143                 continue
144             elif not utils.is_domain_wanted(blocked):
145                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
146                 continue
147
148             logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
149             blocked = utils.deobfuscate_domain(blocked, domain)
150
151             logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
152             if not utils.is_domain_wanted(blocked):
153                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
154                 continue
155
156             logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
157             blockdict.append({
158                 "blocker"    : domain,
159                 "blocked"    : blocked,
160                 "reason"     : None,
161                 "block_level": block_level,
162             })
163
164     else:
165         logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain)
166
167     logger.debug("Invoking commit() ...")
168     database.connection.commit()
169
170     # Reasons
171     if "mrf_simple_info" in data:
172         logger.debug("Found mrf_simple_info in API response: domain='%s'", domain)
173         found = True
174         for block_level, info in (
175             {
176                 **data["mrf_simple_info"],
177                 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
178             }
179         ).items():
180             logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
181             block_level = tidyup.domain(block_level)
182             logger.debug("block_level='%s' - AFTER!", block_level)
183
184             if block_level == "":
185                 logger.warning("block_level is now empty!")
186                 continue
187             elif block_level == "accept":
188                 logger.debug("domain='%s': Skipping block_level='%s' ...", domain, block_level)
189                 continue
190
191             block_level = utils.alias_block_level(block_level)
192
193             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
194             for blocked, reason in info.items():
195                 logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
196                 blocked = tidyup.domain(blocked)
197                 logger.debug("blocked='%s' - AFTER!", blocked)
198
199                 if isinstance(reason, str):
200                     logger.debug("reason[] is a string")
201                     reason = tidyup.reason(reason)
202                 elif isinstance(reason, dict) and "reason" in reason:
203                     logger.debug("reason[] is a dict")
204                     reason = tidyup.reason(reason["reason"]) if isinstance(reason["reason"], str) else None
205                 elif reason is not None:
206                     raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
207
208                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
209
210                 if blocked == "":
211                     logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
212                     continue
213                 elif not utils.is_domain_wanted(blocked):
214                     logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
215                     continue
216
217                 logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
218                 blocked = utils.deobfuscate_domain(blocked, domain)
219                 logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
220
221                 logger.debug("Checking %d blockdict records ...", len(blockdict))
222                 for block in blockdict:
223                     logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
224                     if block["blocked"] == blocked:
225                         logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
226                         block["reason"] = reason
227
228     elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
229         logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain)
230         found = True
231         block_level = "quarantined"
232
233         #print(data["quarantined_instances_info"])
234         rows = data["quarantined_instances_info"]["quarantined_instances"]
235         for blocked in rows:
236             logger.debug("blocked='%s' - BEFORE!", blocked)
237             blocked = tidyup.domain(blocked)
238             reason = tidyup.reason(rows[blocked]["reason"])
239             logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
240
241             if blocked not in rows or "reason" not in rows[blocked]:
242                 logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
243                 break
244             elif blocked == "":
245                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
246                 continue
247             elif not utils.is_domain_wanted(blocked):
248                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
249                 continue
250
251             logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", blocked, domain)
252             blocked = utils.deobfuscate_domain(blocked, domain)
253
254             logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
255             if not utils.is_domain_wanted(blocked):
256                 logger.debug("blocked='%s' is not wanted - SKIPPED!", blocked)
257                 continue
258
259             logger.debug("Checking %d blockdict records ...", len(blockdict))
260             for block in blockdict:
261                 logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
262                 if block["blocked"] == blocked:
263                     logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
264                     block["reason"] = reason
265     else:
266         logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
267
268     if not found:
269         logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
270         blocklist = fetch_blocks_from_about(domain)
271
272         logger.debug("blocklist()=%d", len(blocklist))
273         if len(blocklist) > 0:
274             logger.info("Checking %d different blocklists ...", len(blocklist))
275             for block_level in blocklist:
276                 logger.debug("block_level='%s'", block_level)
277                 rows = blocklist[block_level]
278
279                 logger.debug("rows[%s]()=%d'", type(rows), len(rows))
280                 for block in rows:
281                     logger.debug("Invoking utils.deobfuscate_domain(%s, %s) ...", block["blocked"], domain)
282                     block["blocked"] = utils.deobfuscate_domain(block["blocked"], domain)
283
284                     logger.debug("block[blocked]='%s' - DEOBFUSCATED!", block["blocked"])
285                     if not utils.is_domain_wanted(block["blocked"]):
286                         logger.debug("block[blocked]='%s' is not wanted - SKIPPED!", block["blocked"])
287                         continue
288
289                     logger.debug("Appending blocker='%s',block[blocked]='%s',block[reason]='%s',block_level='%s' ...",domain, block["blocked"], block["reason"], block_level)
290                     blockdict.append({
291                         "blocker"    : domain,
292                         "blocked"    : block["blocked"],
293                         "reason"     : block["reason"],
294                         "block_level": block_level,
295                     })
296
297     logger.debug("blockdict()=%d - EXIT!", len(blockdict))
298     return blockdict
299
300 def fetch_blocks_from_about(domain: str) -> dict:
301     logger.debug("domain='%s' - CALLED!", domain)
302     domain_helper.raise_on(domain)
303
304     logger.debug("Fetching mastodon blocks from domain='%s'", domain)
305     doc = None
306     for path in ["/instance/about/index.html"]:
307         try:
308             # Resetting doc type
309             doc = None
310
311             logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
312             response = network.fetch_response(
313                 domain,
314                 path,
315                 network.web_headers,
316                 (config.get("connection_timeout"), config.get("read_timeout"))
317             )
318
319             logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
320             if not response.ok or response.text.strip() == "":
321                 logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
322                 continue
323
324             logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
325             doc = bs4.BeautifulSoup(
326                 response.text,
327                 "html.parser",
328             )
329
330             logger.debug("doc[]='%s'", type(doc))
331             if doc.find("h2") is not None:
332                 logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
333                 break
334
335         except network.exceptions as exception:
336             logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
337             instances.set_last_error(domain, exception)
338             break
339
340     blocklist = {
341         "reject"        : [],
342         "filtered_media": [],
343         "followers_only": [],
344         "silenced"      : [],
345     }
346
347     logger.debug("doc[]='%s'", type(doc))
348     if doc is None:
349         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
350         return list()
351
352     headers = doc.find_all("h2")
353
354     logger.debug("headers[]='%s'", type(headers))
355     if headers is None:
356         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
357         return list()
358
359     logger.info("Checking %d headers ...", len(headers))
360     for header in headers:
361         logger.debug("header[%s]='%s'", type(header), header)
362         block_level = tidyup.reason(header.text).lower()
363
364         logger.debug("block_level='%s' - BEFORE!", block_level)
365         if block_level in language_mapping:
366             logger.debug("block_level='%s' - FOUND!", block_level)
367             block_level = language_mapping[block_level].lower()
368         else:
369             logger.warning("block_level='%s' not found in language mapping table", block_level)
370
371         logger.debug("block_level='%s - AFTER!'", block_level)
372         if block_level in blocklist:
373             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
374             logger.debug("Found block_level='%s', importing domain blocks ...", block_level)
375             for line in header.find_next("table").find_all("tr")[1:]:
376                 logger.debug("line[]='%s'", type(line))
377                 blocked = tidyup.domain(line.find_all("td")[0].text)
378                 reason = tidyup.reason(line.find_all("td")[1].text)
379
380                 logger.debug("Appending block_level='%s',blocked='%s',reason='%s' ...", block_level, blocked, reason)
381                 blocklist[block_level].append({
382                     "blocked": blocked,
383                     "reason" : reason,
384                 })
385         else:
386             logger.warning("block_level='%s' not found in blocklist()=%d", block_level, len(blocklist))
387
388     logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)
389     return blocklist