]> git.mxchange.org Git - fba.git/blob - fba/networks/pleroma.py
5a6f7ec3709578b687ae36353b9bd708f168e029
[fba.git] / fba / networks / pleroma.py
1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
3 #
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
8 #
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 # GNU Affero General Public License for more details.
13 #
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
16
17 import inspect
18 import validators
19
20 from fba import blacklist
21 from fba import blocks
22 from fba import fba
23 from fba import federation
24 from fba import instances
25 from fba.helpers import tidyup
26
27 def fetch_blocks(domain: str, origin: str, nodeinfo_url: str):
28     # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}' - CALLED!")
29     if not isinstance(domain, str):
30         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
31     elif domain == "":
32         raise ValueError("Parameter 'domain' is empty")
33     elif not isinstance(origin, str) and origin is not None:
34         raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
35     elif origin == "":
36         raise ValueError("Parameter 'origin' is empty")
37     elif not isinstance(nodeinfo_url, str):
38         raise ValueError(f"Parameter nodeinfo_url[]='{type(nodeinfo_url)}' is not 'str'")
39     elif nodeinfo_url == "":
40         raise ValueError("Parameter 'nodeinfo_url' is empty")
41
42     # Blocks
43     blockdict = list()
44     rows = federation.fetch_nodeinfo(domain, nodeinfo_url)
45
46     if rows is None:
47         print("WARNING: Could not fetch nodeinfo from domain:", domain)
48         return
49     elif "metadata" not in rows:
50         print(f"WARNING: rows()={len(rows)} does not have key 'metadata', domain='{domain}'")
51         return
52     elif "federation" not in rows["metadata"]:
53         print(f"WARNING: rows()={len(rows['metadata'])} does not have key 'federation', domain='{domain}'")
54         return
55
56     # DEBUG: print("DEBUG: Updating nodeinfo:", domain)
57     instances.update_last_nodeinfo(domain)
58
59     data = rows["metadata"]["federation"]
60
61     if "enabled" in data:
62         # DEBUG: print("DEBUG: Instance has no block list to analyze:", domain)
63         return
64
65     if "mrf_simple" in data:
66         for block_level, blocklist in (
67             {**data["mrf_simple"],
68             **{"quarantined_instances": data["quarantined_instances"]}}
69         ).items():
70             # DEBUG: print("DEBUG: block_level, blocklist():", block_level, len(blocklist))
71             block_level = tidyup.domain(block_level)
72             # DEBUG: print("DEBUG: BEFORE block_level:", block_level)
73
74             if block_level == "":
75                 print("WARNING: block_level is now empty!")
76                 continue
77
78             # DEBUG: print(f"DEBUG: Checking {len(blocklist)} entries from domain='{domain}',block_level='{block_level}' ...")
79             if len(blocklist) > 0:
80                 for blocked in blocklist:
81                     # DEBUG: print("DEBUG: BEFORE blocked:", blocked)
82                     blocked = tidyup.domain(blocked)
83                     # DEBUG: print("DEBUG: AFTER blocked:", blocked)
84
85                     if blocked == "":
86                         print("WARNING: blocked is empty after tidyup.domain():", domain, block_level)
87                         continue
88                     elif blacklist.is_blacklisted(blocked):
89                         # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
90                         continue
91                     elif blocked.count("*") > 1:
92                         # -ACK!-oma also started obscuring domains without hash
93                         fba.cursor.execute(
94                             "SELECT domain, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
95                         )
96                         searchres = fba.cursor.fetchone()
97
98                         print(f"DEBUG: searchres[]='{type(searchres)}'")
99                         if searchres is None:
100                             print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
101                             continue
102
103                         blocked = searchres[0]
104                         nodeinfo_url = searchres[1]
105                         # DEBUG: print("DEBUG: Looked up domain:", blocked)
106                     elif not validators.domain(blocked):
107                         print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - skipped!")
108                         continue
109
110                     # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
111                     if not validators.domain(blocked):
112                         print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - skipped!")
113                         continue
114                     elif blocked.split(".")[-1] == "arpa":
115                         print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
116                         continue
117                     elif not instances.is_registered(blocked):
118                         # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
119                         instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
120
121                     if not blocks.is_instance_blocked(domain, blocked, block_level):
122                         # DEBUG: print("DEBUG: Blocking:", domain, blocked, block_level)
123                         blocks.add_instance(domain, blocked, "unknown", block_level)
124
125                         if block_level == "reject":
126                             # DEBUG: print("DEBUG: Adding to blockdict:", blocked)
127                             blockdict.append({
128                                     "blocked": blocked,
129                                     "reason" : None
130                             })
131                         else:
132                             # DEBUG: print(f"DEBUG: Updating block last seen for domain='{domain}',blocked='{blocked}' ...")
133                             blocks.update_last_seen(domain, blocked, block_level)
134             else:
135                 # DEBUG: print(f"DEBUG: domain='{domain}' has returned zero rows, trying /about/more page ...")
136                 rows = fetch_blocks_from_about(domain)
137
138     # DEBUG: print("DEBUG: Committing changes ...")
139     fba.connection.commit()
140
141     # Reasons
142     if "mrf_simple_info" in data:
143         # DEBUG: print("DEBUG: Found mrf_simple_info:", domain)
144         for block_level, info in (
145             {**data["mrf_simple_info"],
146             **(data["quarantined_instances_info"]
147             if "quarantined_instances_info" in data
148             else {})}
149         ).items():
150             # DEBUG: print("DEBUG: block_level, info.items():", block_level, len(info.items()))
151             block_level = tidyup.domain(block_level)
152             # DEBUG: print("DEBUG: BEFORE block_level:", block_level)
153
154             if block_level == "":
155                 print("WARNING: block_level is now empty!")
156                 continue
157
158             # DEBUG: print(f"DEBUG: Checking {len(info.items())} entries from domain='{domain}',software='pleroma',block_level='{block_level}' ...")
159             for blocked, reason in info.items():
160                 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - BEFORE!")
161                 blocked = tidyup.domain(blocked)
162                 reason  = tidyup.reason(reason) if reason is not None and reason != "" else None
163                 # DEBUG: print(f"DEBUG: blocked='{blocked}',reason='{reason}' - AFTER!")
164
165                 if blocked == "":
166                     print("WARNING: blocked is empty after tidyup.domain():", domain, block_level)
167                     continue
168                 elif not validators.domain(blocked):
169                     print(f"WARNING: blocked='{blocked}',software='pleroma' is not a valid domain name - skipped!")
170                     continue
171                 elif blacklist.is_blacklisted(blocked):
172                     # DEBUG: print(f"DEBUG: blocked='{blocked}' is blacklisted - skipping!")
173                     continue
174                 elif blocked.count("*") > 1:
175                     # same domain guess as above, but for reasons field
176                     fba.cursor.execute(
177                         "SELECT domain, origin, nodeinfo_url FROM instances WHERE domain LIKE ? ORDER BY rowid LIMIT 1", [blocked.replace("*", "_")]
178                     )
179                     searchres = fba.cursor.fetchone()
180
181                     # DEBUG: print(f"DEBUG: searchres[]='{type(searchres)}'")
182                     if searchres is None:
183                         print(f"WARNING: Cannot deobsfucate blocked='{blocked}' - SKIPPED!")
184                         continue
185
186                     blocked = searchres[0]
187                     origin = searchres[1]
188                     nodeinfo_url = searchres[2]
189
190                 # DEBUG: print("DEBUG: Looking up instance by domain:", blocked)
191                 if blocked.split(".")[-1] == "arpa":
192                     print(f"WARNING: blocked='{blocked}' is a reversed .arpa domain and should not be used generally.")
193                     continue
194                 elif not instances.is_registered(blocked):
195                     # DEBUG: print(f"DEBUG: Domain blocked='{blocked}' wasn't found, adding ..., domain='{domain}',origin='{origin}',nodeinfo_url='{nodeinfo_url}'")
196                     instances.add(blocked, domain, inspect.currentframe().f_code.co_name, nodeinfo_url)
197
198                 # DEBUG: print("DEBUG: Updating block reason:", domain, blocked, reason["reason"])
199                 blocks.update_reason(reason["reason"], domain, blocked, block_level)
200
201                 # DEBUG: print(f"DEBUG: blockdict()={len(blockdict)}")
202                 for entry in blockdict:
203                     if entry["blocked"] == blocked:
204                         # DEBUG: print("DEBUG: Updating entry reason:", blocked)
205                         entry["reason"] = reason["reason"]
206
207     fba.connection.commit()
208
209     # DEBUG: print("DEBUG: EXIT!")
210
211 def fetch_blocks_from_about(domain: str) -> dict:
212     print(f"DEBUG: domain='{domain}' - CALLED!")
213     if not isinstance(domain, str):
214         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
215     elif domain == "":
216         raise ValueError("Parameter 'domain' is empty")
217
218     print("DEBUG: Fetching mastodon blocks from domain:", domain)
219     blocklist = {
220         "Suspended servers": [],
221         "Filtered media"   : [],
222         "Limited servers"  : [],
223         "Silenced servers" : [],
224     }
225
226     doc = None
227     for path in ("/about/more", "/about"):
228         try:
229             print(f"DEBUG: Fetching path='{path}' from domain='{domain}' ...")
230             doc = bs4.BeautifulSoup(
231                 network.fetch_response(
232                     domain,
233                     path,
234                     network.web_headers,
235                     (config.get("connection_timeout"), config.get("read_timeout"))
236                 ).text,
237                 "html.parser",
238             )
239
240             if len(doc.find_all("h3")) > 0:
241                 print(f"DEBUG: path='{path}' had some headlines - BREAK!")
242                 break
243
244         except BaseException as exception:
245             print("ERROR: Cannot fetch from domain:", domain, exception)
246             instances.update_last_error(domain, exception)
247             break
248
249     print(f"DEBUG: doc[]='{type(doc)}'")
250     if doc is None:
251         print(f"WARNING: Cannot find any 'h3' tags for domain='{domain}' - EXIT!")
252         return blocklist
253
254     for header in doc.find_all("h3"):
255         header_text = tidyup.reason(header.text)
256
257         print(f"DEBUG: header_text='{header_text}'")
258         if header_text in language_mapping:
259             print(f"DEBUG: header_text='{header_text}'")
260             header_text = language_mapping[header_text]
261         else:
262             print(f"WARNING: header_text='{header_text}' not found in language mapping table")
263
264         if header_text in blocklist or header_text.lower() in blocklist:
265             # replaced find_next_siblings with find_all_next to account for instances that e.g. hide lists in dropdown menu
266             for line in header.find_all_next("table")[0].find_all("tr")[1:]:
267                 blocklist[header_text].append(
268                     {
269                         "domain": tidyup.domain(line.find("span").text),
270                         "hash"  : tidyup.domain(line.find("span")["title"][9:]),
271                         "reason": tidyup.reason(line.find_all("td")[1].text),
272                     }
273                 )
274         else:
275             print(f"WARNING: header_text='{header_text}' not found in blocklist()={len(blocklist)}")
276
277     print("DEBUG: Returning blocklist for domain:", domain)
278     return {
279         "reject"        : blocklist["Suspended servers"],
280         "media_removal" : blocklist["Filtered media"],
281         "followers_only": blocklist["Limited servers"] + blocklist["Silenced servers"],
282     }