]> git.mxchange.org Git - fba.git/blob - fba/networks/pleroma.py
Another attempt to rewrite:
[fba.git] / fba / networks / pleroma.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import logging
18
19 import bs4
20
21 from fba import database
22 from fba import utils
23
24 from fba.helpers import config
25 from fba.helpers import domain as domain_helper
26 from fba.helpers import tidyup
27
28 from fba.http import network
29 from fba.http import nodeinfo
30
31 from fba.models import blocks
32 from fba.models import instances
33
34 logging.basicConfig(level=logging.INFO)
35 logger = logging.getLogger(__name__)
36
37 # Language mapping X -> English
38 language_mapping = {
39     # English -> English
40     "filtered media"   : "filtered_media",
41     "limited servers"  : "followers_only",
42     "followers-only"   : "followers_only",
43     "media removal"    : "media_removal",
44     "media_removal"    : "media_removal",
45     "media force-set as sensitive": "media_nsfw",
46     "nsfw"             : "media_nsfw",
47     "reject"           : "reject",
48     "suspended servers": "reject",
49     "silenced servers" : "silenced",
50     "removal from \"the whole known network\" timeline": "federated_timeline_removal",
51 }
52
53 def fetch_blocks(domain: str) -> list:
54     logger.debug("domain='%s' - CALLED!", domain)
55     domain_helper.raise_on(domain)
56
57     blockdict = list()
58     rows = None
59     try:
60         logger.debug("Fetching nodeinfo: domain='%s'", domain)
61         rows = nodeinfo.fetch(domain, update_mode=False)
62
63         if "error_message" in rows:
64             logger.warning("Error message '%s' during fetching nodeinfo for domain='%s'", rows["error_message"], domain)
65             instances.set_last_error(domain, rows)
66             instances.update(domain)
67
68             logger.debug("Returning empty list ... - EXIT!")
69             return list()
70         elif "exception" in rows:
71             logger.warning("Exception '%s' during fetching nodeinfo for domain='%s' - EXIT!", type(rows["exception"]), domain)
72             return list()
73         elif "json" in rows:
74             logger.debug("rows[json] found for domain='%s'", domain)
75             rows = rows["json"]
76
77     except network.exceptions as exception:
78         logger.warning("Exception '%s' during fetching nodeinfo from domain='%s'", type(exception), domain)
79         instances.set_last_error(domain, exception)
80
81     if rows is None:
82         logger.warning("Could not fetch nodeinfo from domain='%s' - EXIT!", domain)
83         return list()
84     elif "metadata" not in rows:
85         logger.warning("rows()=%d does not have key 'metadata', domain='%s' - EXIT!", len(rows), domain)
86         return list()
87     elif "federation" not in rows["metadata"]:
88         logger.warning("rows()=%d does not have key 'federation', domain='%s' - EXIT!", len(rows["metadata"]), domain)
89         return list()
90
91     data = rows["metadata"]["federation"]
92     found = False
93
94     logger.debug("data[]='%s'", type(data))
95     if "mrf_simple" in data:
96         logger.debug("Found mrf_simple in API response from domain='%s'", domain)
97         found = True
98         for block_level, blocklist in (
99             {
100                 **data["mrf_simple"],
101                 **{
102                     "quarantined_instances": data["quarantined_instances"]
103                 }
104             }
105         ).items():
106             logger.debug("block_level='%s', blocklist()=%d", block_level, len(blocklist))
107             block_level = tidyup.domain(block_level)
108             logger.debug("block_level='%s' - AFTER!", block_level)
109
110             if block_level == "":
111                 logger.warning("block_level is now empty!")
112                 continue
113             elif block_level == "accept":
114                 logger.debug("domain='%s' skipping block_level='accept'", domain)
115                 continue
116
117             block_level = blocks.alias_block_level(block_level)
118
119             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(blocklist), domain, block_level)
120             if len(blocklist) > 0:
121                 for blocked in blocklist:
122                     logger.debug("blocked='%s' - BEFORE!", blocked)
123                     blocked = tidyup.domain(blocked)
124                     logger.debug("blocked='%s' - AFTER!", blocked)
125
126                     if blocked == "":
127                         logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s' - SKIPPED!", domain, block_level)
128                         continue
129
130                     logger.debug("Invoking utils.deobfuscate(%s, %s) ...", blocked, domain)
131                     blocked = utils.deobfuscate(blocked, domain)
132
133                     logger.debug("blocked='%s' - DEOBFUSCATED!", blocked)
134
135                     logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
136                     blockdict.append({
137                         "blocker"    : domain,
138                         "blocked"    : blocked,
139                         "reason"     : None,
140                         "block_level": block_level,
141                     })
142
143     elif "quarantined_instances" in data:
144         logger.debug("Found 'quarantined_instances' in JSON response: domain='%s'", domain)
145         found = True
146         block_level = "quarantined"
147
148         for blocked in data["quarantined_instances"]:
149             logger.debug("blocked='%s' - BEFORE!", blocked)
150             blocked = tidyup.domain(blocked)
151             logger.debug("blocked='%s' - AFTER!", blocked)
152
153             if blocked == "":
154                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
155                 continue
156
157             logger.debug("Appending blocker='%s',blocked='%s',block_level='%s' ...", domain, blocked, block_level)
158             blockdict.append({
159                 "blocker"    : domain,
160                 "blocked"    : blocked,
161                 "reason"     : None,
162                 "block_level": block_level,
163             })
164
165     else:
166         logger.warning("Cannot find 'mrf_simple' or 'quarantined_instances' in JSON reply: domain='%s'", domain)
167
168     logger.debug("Invoking commit() ...")
169     database.connection.commit()
170
171     # Reasons
172     if "mrf_simple_info" in data:
173         logger.debug("Found mrf_simple_info in API response: domain='%s'", domain)
174         found = True
175         for block_level, info in (
176             {
177                 **data["mrf_simple_info"],
178                 **(data["quarantined_instances_info"] if "quarantined_instances_info" in data else {})
179             }
180         ).items():
181             logger.debug("block_level='%s', info.items()=%d", block_level, len(info.items()))
182             block_level = tidyup.domain(block_level)
183             logger.debug("block_level='%s' - AFTER!", block_level)
184
185             if block_level == "":
186                 logger.warning("block_level is now empty!")
187                 continue
188             elif block_level == "accept":
189                 logger.debug("domain='%s': Skipping block_level='%s' ...", domain, block_level)
190                 continue
191
192             block_level = blocks.alias_block_level(block_level)
193
194             logger.debug("Checking %d entries from domain='%s',block_level='%s' ...", len(info.items()), domain, block_level)
195             for blocked, reason in info.items():
196                 logger.debug("blocked='%s',reason[%s]='%s' - BEFORE!", blocked, type(reason), reason)
197                 blocked = tidyup.domain(blocked)
198                 logger.debug("blocked='%s' - AFTER!", blocked)
199
200                 if isinstance(reason, str):
201                     logger.debug("reason[] is a string")
202                     reason = tidyup.reason(reason)
203                 elif isinstance(reason, dict) and "reason" in reason:
204                     logger.debug("reason[] is a dict")
205                     reason = tidyup.reason(reason["reason"]) if isinstance(reason["reason"], str) else None
206                 elif reason is not None:
207                     raise ValueError(f"Cannot handle reason[]='{type(reason)}'")
208
209                 logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
210
211                 if blocked == "":
212                     logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
213                     continue
214
215                 logger.debug("Checking %d blockdict records ...", len(blockdict))
216                 for block in blockdict:
217                     logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
218                     if block["blocked"] == blocked:
219                         logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
220                         block["reason"] = reason
221
222     elif "quarantined_instances_info" in data and "quarantined_instances" in data["quarantined_instances_info"]:
223         logger.debug("Found 'quarantined_instances_info' in JSON response: domain='%s'", domain)
224         found = True
225         block_level = "quarantined"
226
227         #print(data["quarantined_instances_info"])
228         rows = data["quarantined_instances_info"]["quarantined_instances"]
229         for blocked in rows:
230             logger.debug("blocked='%s' - BEFORE!", blocked)
231             reason = tidyup.reason(rows[blocked]["reason"])
232             blocked = tidyup.domain(blocked)
233             logger.debug("blocked='%s',reason='%s' - AFTER!", blocked, reason)
234
235             if blocked not in rows or "reason" not in rows[blocked]:
236                 logger.warning("Cannot find blocked='%s' in rows()=%d,domain='%s' - BREAK!", blocked, len(rows), domain)
237                 break
238             elif blocked == "":
239                 logger.warning("blocked is empty after tidyup.domain(): domain='%s',block_level='%s'", domain, block_level)
240                 continue
241
242             logger.debug("Checking %d blockdict records ...", len(blockdict))
243             for block in blockdict:
244                 logger.debug("block[blocked]='%s',blocked='%s'", block["blocked"], blocked)
245                 if block["blocked"] == blocked:
246                     logger.debug("Updating reason='%s' for blocker='%s'", reason, block["blocked"])
247                     block["reason"] = reason
248     else:
249         logger.warning("Cannot find 'mrf_simple_info' or 'quarantined_instances_info' in JSON reply: domain='%s'", domain)
250
251     if not found:
252         logger.debug("Did not find any useable JSON elements, domain='%s', continuing with /about page ...", domain)
253         blocklist = fetch_blocks_from_about(domain)
254
255         logger.debug("blocklist()=%d", len(blocklist))
256         if len(blocklist) > 0:
257             logger.info("Checking %d different blocklists ...", len(blocklist))
258             for block_level in blocklist:
259                 logger.debug("block_level='%s'", block_level)
260                 rows = blocklist[block_level]
261
262                 logger.debug("rows[%s]()=%d'", type(rows), len(rows))
263                 for block in rows:
264                     logger.debug("Appending blocker='%s',block[blocked]='%s',block[reason]='%s',block_level='%s' ...",domain, block["blocked"], block["reason"], block_level)
265                     blockdict.append({
266                         "blocker"    : domain,
267                         "blocked"    : block["blocked"],
268                         "reason"     : block["reason"],
269                         "block_level": block_level,
270                     })
271
272     logger.debug("blockdict()=%d - EXIT!", len(blockdict))
273     return blockdict
274
275 def fetch_blocks_from_about(domain: str) -> dict:
276     logger.debug("domain='%s' - CALLED!", domain)
277     domain_helper.raise_on(domain)
278
279     logger.debug("Fetching mastodon blocks from domain='%s'", domain)
280     doc = None
281     for path in ["/instance/about/index.html"]:
282         try:
283             # Resetting doc type
284             doc = None
285
286             logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
287             response = network.fetch_response(
288                 domain,
289                 path,
290                 network.web_headers,
291                 (config.get("connection_timeout"), config.get("read_timeout"))
292             )
293
294             logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
295             if not response.ok or response.text.strip() == "":
296                 logger.warning("path='%s' does not exist on domain='%s' - SKIPPED!", path, domain)
297                 continue
298
299             logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
300             doc = bs4.BeautifulSoup(
301                 response.text,
302                 "html.parser",
303             )
304
305             logger.debug("doc[]='%s'", type(doc))
306             if doc.find("h2") is not None:
307                 logger.debug("Found 'h2' header in path='%s' - BREAK!", path)
308                 break
309
310         except network.exceptions as exception:
311             logger.warning("Cannot fetch from domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
312             instances.set_last_error(domain, exception)
313             break
314
315     blocklist = {
316         "reject"        : [],
317         "filtered_media": [],
318         "followers_only": [],
319         "silenced"      : [],
320         "media_nsfw"    : [],
321         "media_removal" : [],
322         "federated_timeline_removal": [],
323     }
324
325     logger.debug("doc[]='%s'", type(doc))
326     if doc is None:
327         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
328         return list()
329
330     headers = doc.find_all("h2")
331
332     logger.debug("headers[]='%s'", type(headers))
333     if headers is None:
334         logger.warning("Cannot fetch any /about pages for domain='%s' - EXIT!", domain)
335         return list()
336
337     logger.info("Checking %d headers ...", len(headers))
338     for header in headers:
339         logger.debug("header[%s]='%s'", type(header), header)
340         block_level = tidyup.reason(header.text).lower()
341
342         logger.debug("block_level='%s' - BEFORE!", block_level)
343         if block_level in language_mapping:
344             logger.debug("block_level='%s' - FOUND!", block_level)
345             block_level = language_mapping[block_level].lower()
346         else:
347             logger.warning("block_level='%s' not found in language mapping table", block_level)
348
349         logger.debug("block_level='%s - AFTER!'", block_level)
350         if block_level in blocklist:
351             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
352             logger.debug("Found block_level='%s', importing domain blocks ...", block_level)
353             for line in header.find_next("table").find_all("tr")[1:]:
354                 logger.debug("line[]='%s'", type(line))
355                 blocked = tidyup.domain(line.find_all("td")[0].text)
356                 reason = tidyup.reason(line.find_all("td")[1].text)
357
358                 if blocked is None or blocked == "":
359                     logger.debug("domain='%s',block_level='%s': blocked is empty - SKIPPED!", domain, block_level)
360                     continue
361
362                 logger.debug("Appending block_level='%s',blocked='%s',reason='%s' ...", block_level, blocked, reason)
363                 blocklist[block_level].append({
364                     "blocked": blocked,
365                     "reason" : reason,
366                 })
367         else:
368             logger.warning("block_level='%s' not found in blocklist()=%d", block_level, len(blocklist))
369
370     logger.debug("Returning blocklist for domain='%s' - EXIT!", domain)
371     return blocklist