]> git.mxchange.org Git - fba.git/blob - fba/federation.py
Continued:
[fba.git] / fba / federation.py
1 # Copyright (C) 2023 Free Software Foundation
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License as published
5 # by the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU Affero General Public License for more details.
12 #
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
15
16 from urllib.parse import urlparse
17
18 import bs4
19 import validators
20
21 from fba import blacklist
22 from fba import config
23 from fba import csrf
24 from fba import network
25
26 from fba.helpers import tidyup
27 from fba.helpers import version
28
29 from fba.models import instances
30
31 from fba.networks import lemmy
32 from fba.networks import misskey
33 from fba.networks import peertube
34
35 # "rel" identifiers (no real URLs)
36 nodeinfo_identifier = [
37     "https://nodeinfo.diaspora.software/ns/schema/2.1",
38     "https://nodeinfo.diaspora.software/ns/schema/2.0",
39     "https://nodeinfo.diaspora.software/ns/schema/1.1",
40     "https://nodeinfo.diaspora.software/ns/schema/1.0",
41     "http://nodeinfo.diaspora.software/ns/schema/2.1",
42     "http://nodeinfo.diaspora.software/ns/schema/2.0",
43     "http://nodeinfo.diaspora.software/ns/schema/1.1",
44     "http://nodeinfo.diaspora.software/ns/schema/1.0",
45 ]
46
47 def fetch_instances(domain: str, origin: str, software: str, command: str, path: str = None):
48     # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',software='{software}',path='{path}' - CALLED!")
49     if not isinstance(domain, str):
50         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
51     elif domain == "":
52         raise ValueError("Parameter 'domain' is empty")
53     elif domain.endswith(".tld"):
54         raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
55     elif not isinstance(origin, str) and origin is not None:
56         raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
57     elif software is None:
58         # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
59         instances.set_last_instance_fetch(domain)
60
61         # DEBUG: print(f"DEBUG: software for domain='{domain}' is not set, determining ...")
62         software = None
63         try:
64             software = determine_software(domain, path)
65         except network.exceptions as exception:
66             # DEBUG: print(f"DEBUG: Exception '{type(exception)}' during determining software type")
67             pass
68
69         # DEBUG: print(f"DEBUG: Determined software='{software}' for domain='{domain}'")
70     elif not isinstance(software, str):
71         raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'")
72     elif not isinstance(command, str):
73         raise ValueError(f"Parameter command[]='{type(command)}' is not 'str'")
74     elif command == "":
75         raise ValueError("Parameter 'command' is empty")
76     elif domain.endswith(".arpa"):
77         print(f"WARNING: domain='{domain}' is a reversed .arpa domain and should not be used generally.")
78         return
79     elif not validators.domain(domain.split("/")[0]):
80         raise ValueError(f"domain='{domain}' is not a valid domain")
81
82     if not instances.is_registered(domain.split("/")[0]):
83         # DEBUG: print(f"DEBUG: Adding new domain='{domain}',origin='{origin}',command='{command}',path='{path}',software='{software}'")
84         instances.add(domain, origin, command, path, software)
85
86     # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
87     instances.set_last_instance_fetch(domain)
88
89     # DEBUG: print("DEBUG: Fetching instances for domain:", domain, software)
90     peerlist = fetch_peers(domain, software)
91
92     if peerlist is None:
93         print("ERROR: Cannot fetch peers:", domain)
94         return
95     elif instances.has_pending(domain):
96         # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...")
97         instances.update_data(domain)
98
99     print(f"INFO: Checking {len(peerlist)} instances from domain='{domain}' ...")
100     for instance in peerlist:
101         # DEBUG: print(f"DEBUG: instance='{instance}'")
102         if instance is None:
103             # Skip "None" types as tidup.domain() cannot parse them
104             continue
105
106         # DEBUG: print(f"DEBUG: instance='{instance}' - BEFORE")
107         instance = tidyup.domain(instance)
108         # DEBUG: print(f"DEBUG: instance='{instance}' - AFTER")
109
110         if instance == "":
111             print(f"WARNING: Empty instance after tidyup.domain(), domain='{domain}'")
112             continue
113         elif not validators.domain(instance.split("/")[0]):
114             print(f"WARNING: Bad instance='{instance}' from domain='{domain}',origin='{origin}',software='{software}'")
115             continue
116         elif instance.endswith(".arpa"):
117             print(f"WARNING: instance='{instance}' is a reversed .arpa domain and should not be used generally.")
118             continue
119         elif blacklist.is_blacklisted(instance):
120             # DEBUG: print("DEBUG: instance is blacklisted:", instance)
121             continue
122
123         # DEBUG: print("DEBUG: Handling instance:", instance)
124         if instance.endswith(".arpa"):
125             print(f"WARNING: instance='{instance}' is a reversed .arpa domain and should not be used generally.")
126             continue
127         elif instance.find("/profile/") > 0 or instance.find("/users/") > 0:
128             # DEBUG: print(f"DEBUG: instance='{instance}' is a link to a single user profile - SKIPPED!")
129             continue
130         elif not instances.is_registered(instance):
131             # DEBUG: print("DEBUG: Adding new instance:", instance, domain)
132             instances.add(instance, domain, command)
133
134     # DEBUG: print("DEBUG: EXIT!")
135
136 def fetch_peers(domain: str, software: str) -> list:
137     # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},software={software} - CALLED!")
138     if not isinstance(domain, str):
139         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
140     elif domain == "":
141         raise ValueError("Parameter 'domain' is empty")
142     elif not isinstance(software, str) and software is not None:
143         raise ValueError(f"software[]='{type(software)}' is not 'str'")
144
145     if software == "misskey":
146         # DEBUG: print(f"DEBUG: Invoking misskey.fetch_peers({domain}) ...")
147         return misskey.fetch_peers(domain)
148     elif software == "lemmy":
149         # DEBUG: print(f"DEBUG: Invoking lemmy.fetch_peers({domain}) ...")
150         return lemmy.fetch_peers(domain)
151     elif software == "peertube":
152         # DEBUG: print(f"DEBUG: Invoking peertube.fetch_peers({domain}) ...")
153         return peertube.fetch_peers(domain)
154
155     # Init peers variable
156     peers = list()
157
158     # No CSRF by default, you don't have to add network.api_headers by yourself here
159     headers = tuple()
160
161     try:
162         # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
163         headers = csrf.determine(domain, dict())
164     except network.exceptions as exception:
165         print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
166         instances.set_last_error(domain, exception)
167         return peers
168
169     # DEBUG: print(f"DEBUG: Fetching peers from '{domain}',software='{software}' ...")
170     data = network.get_json_api(
171         domain,
172         "/api/v1/instance/peers",
173         headers,
174         (config.get("connection_timeout"), config.get("read_timeout"))
175     )
176
177     # DEBUG: print(f"DEBUG: data[]='{type(data)}'")
178     if "error_message" in data:
179         # DEBUG: print("DEBUG: Was not able to fetch peers, trying alternative ...")
180         data = network.get_json_api(
181             domain,
182             "/api/v3/site",
183             headers,
184             (config.get("connection_timeout"), config.get("read_timeout"))
185         )
186
187         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
188         if "error_message" in data:
189             print(f"WARNING: Could not reach any JSON API at domain='{domain}',status_code='{data['status_code']}',error_message='{data['error_message']}'")
190         elif "federated_instances" in data["json"]:
191             # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
192             peers = peers + add_peers(data["json"]["federated_instances"])
193             # DEBUG: print("DEBUG: Added instance(s) to peers")
194         else:
195             message = "JSON response does not contain 'federated_instances' or 'error_message'"
196             print(f"WARNING: {message},domain='{domain}'")
197             instances.set_last_error(domain, message)
198     else:
199         # DEBUG: print("DEBUG: Querying API was successful:", domain, len(data))
200         peers = data["json"]
201
202     # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
203     instances.set_total_peers(domain, peers)
204
205     # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
206     return peers
207
208 def fetch_nodeinfo(domain: str, path: str = None) -> dict:
209     # DEBUG: print(f"DEBUG: domain='{domain}',path={path} - CALLED!")
210     if not isinstance(domain, str):
211         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
212     elif domain == "":
213         raise ValueError("Parameter 'domain' is empty")
214     elif not isinstance(path, str) and path is not None:
215         raise ValueError(f"Parameter path[]='{type(path)}' is not 'str'")
216
217     # DEBUG: print(f"DEBUG: Fetching nodeinfo from domain='{domain}' ...")
218     nodeinfo = fetch_wellknown_nodeinfo(domain)
219
220     # DEBUG: print(f"DEBUG: nodeinfo[{type(nodeinfo)}]({len(nodeinfo)}='{nodeinfo}'")
221     if "error_message" not in nodeinfo and "json" in nodeinfo and len(nodeinfo["json"]) > 0:
222         # DEBUG: print(f"DEBUG: Found nodeinfo[json]()={len(nodeinfo['json'])} - EXIT!")
223         return nodeinfo["json"]
224
225     # No CSRF by default, you don't have to add network.api_headers by yourself here
226     headers = tuple()
227     data = dict()
228
229     try:
230         # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
231         headers = csrf.determine(domain, dict())
232     except network.exceptions as exception:
233         print(f"WARNING: Exception '{type(exception)}' during checking CSRF (nodeinfo,{__name__}) - EXIT!")
234         instances.set_last_error(domain, exception)
235         return {
236             "status_code"  : 500,
237             "error_message": f"exception[{type(exception)}]='{str(exception)}'",
238             "exception"    : exception,
239         }
240
241     request_paths = [
242        "/nodeinfo/2.1.json",
243        "/nodeinfo/2.1",
244        "/nodeinfo/2.0.json",
245        "/nodeinfo/2.0",
246        "/nodeinfo/1.0",
247        "/api/v1/instance"
248     ]
249
250     for request in request_paths:
251         # DEBUG: print(f"DEBUG: path[{type(path)}]='{path}',request='{request}'")
252         if path is None or path == request or path == f"http://{domain}{path}" or path == f"https://{domain}{path}":
253             # DEBUG: print(f"DEBUG: Fetching request='{request}' from domain='{domain}' ...")
254             if path == f"http://{domain}{path}" or path == f"https://{domain}{path}":
255                 print(f"DEBUG: domain='{domain}',path='{path}' has protocol in path, splitting ...")
256                 components = urlparse(path)
257                 path = components.path
258
259             data = network.get_json_api(
260                 domain,
261                 request,
262                 headers,
263                 (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
264             )
265
266             # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
267             if "error_message" not in data:
268                 # DEBUG: print("DEBUG: Success:", request)
269                 instances.set_detection_mode(domain, "STATIC_CHECK")
270                 instances.set_nodeinfo_url(domain, request)
271                 break
272
273             print(f"WARNING: Failed fetching nodeinfo from domain='{domain}',status_code='{data['status_code']}',error_message='{data['error_message']}'")
274
275     # DEBUG: print(f"DEBUG: data()={len(data)} - EXIT!")
276     return data
277
278 def fetch_wellknown_nodeinfo(domain: str) -> dict:
279     # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
280     if not isinstance(domain, str):
281         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
282     elif domain == "":
283         raise ValueError("Parameter 'domain' is empty")
284
285     # No CSRF by default, you don't have to add network.api_headers by yourself here
286     headers = tuple()
287
288     try:
289         # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
290         headers = csrf.determine(domain, dict())
291     except network.exceptions as exception:
292         print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_wellknown_nodeinfo,{__name__}) - EXIT!")
293         instances.set_last_error(domain, exception)
294         return {
295             "status_code"  : 500,
296             "error_message": type(exception),
297             "exception"    : exception,
298         }
299
300     # DEBUG: print("DEBUG: Fetching .well-known info for domain:", domain)
301     data = network.get_json_api(
302         domain,
303         "/.well-known/nodeinfo",
304         headers,
305         (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
306     )
307
308     if "error_message" not in data:
309         nodeinfo = data["json"]
310         # DEBUG: print("DEBUG: Found entries:", len(nodeinfo), domain)
311         if "links" in nodeinfo:
312             # DEBUG: print("DEBUG: Found links in nodeinfo():", len(nodeinfo["links"]))
313             for link in nodeinfo["links"]:
314                 # DEBUG: print(f"DEBUG: link[{type(link)}]='{link}'")
315                 if not isinstance(link, dict) or not "rel" in link:
316                     print(f"WARNING: link[]='{type(link)}' is not 'dict' or no element 'rel' found")
317                 elif link["rel"] in nodeinfo_identifier:
318                     # DEBUG: print("DEBUG: Fetching nodeinfo from:", link["href"])
319                     data = network.fetch_api_url(
320                         link["href"],
321                         (config.get("connection_timeout"), config.get("read_timeout"))
322                      )
323
324                     # DEBUG: print("DEBUG: href,data[]:", link["href"], type(data))
325                     if not "error_message" in data and "json" in data:
326                         # DEBUG: print("DEBUG: Found JSON nodeinfo():", len(data))
327                         instances.set_detection_mode(domain, "AUTO_DISCOVERY")
328                         instances.set_nodeinfo_url(domain, link["href"])
329                         break
330                     else:
331                         instances.set_last_error(domain, data)
332                 else:
333                     print("WARNING: Unknown 'rel' value:", domain, link["rel"])
334         else:
335             print("WARNING: nodeinfo does not contain 'links':", domain)
336
337     # DEBUG: print("DEBUG: Returning data[]:", type(data))
338     return data
339
340 def fetch_generator_from_path(domain: str, path: str = "/") -> str:
341     # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
342     if not isinstance(domain, str):
343         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
344     elif domain == "":
345         raise ValueError("Parameter 'domain' is empty")
346     elif not isinstance(path, str):
347         raise ValueError(f"path[]='{type(path)}' is not 'str'")
348     elif path == "":
349         raise ValueError("Parameter 'path' is empty")
350
351     # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' - CALLED!")
352     software = None
353
354     # DEBUG: print(f"DEBUG: Fetching path='{path}' from '{domain}' ...")
355     response = network.fetch_response(domain, path, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
356
357     # DEBUG: print("DEBUG: domain,response.ok,response.status_code,response.text[]:", domain, response.ok, response.status_code, type(response.text))
358     if response.ok and response.status_code < 300 and len(response.text) > 0:
359         # DEBUG: print("DEBUG: Search for <meta name='generator'>:", domain)
360         doc = bs4.BeautifulSoup(response.text, "html.parser")
361
362         # DEBUG: print("DEBUG: doc[]:", type(doc))
363         generator = doc.find("meta", {"name"    : "generator"})
364         site_name = doc.find("meta", {"property": "og:site_name"})
365
366         # DEBUG: print(f"DEBUG: generator='{generator}',site_name='{site_name}'")
367         if isinstance(generator, bs4.element.Tag) and isinstance(generator.get("content"), str):
368             print("DEBUG: Found generator meta tag:", domain)
369             software = tidyup.domain(generator.get("content"))
370             # DEBUG: print(f"DEBUG: software[{type(software)}]='{software}'")
371             if software is not None and software != "":
372                 print(f"INFO: domain='{domain}' is generated by '{software}'")
373                 instances.set_detection_mode(domain, "GENERATOR")
374         elif isinstance(site_name, bs4.element.Tag) and isinstance(site_name.get("content"), str):
375             # DEBUG: print("DEBUG: Found property=og:site_name:", domain)
376             software = tidyup.domain(site_name.get("content"))
377             # DEBUG: print(f"DEBUG: software[{type(software)}]='{software}'")
378             if software is not None and software != "":
379                 print(f"INFO: domain='{domain}' has og:site_name='{software}'")
380                 instances.set_detection_mode(domain, "SITE_NAME")
381
382     # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
383     if isinstance(software, str) and software == "":
384         # DEBUG: print(f"DEBUG: Corrected empty string to None for software of domain='{domain}'")
385         software = None
386     elif isinstance(software, str) and ("." in software or " " in software):
387         # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
388         software = version.remove(software)
389
390     # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
391     if isinstance(software, str) and "powered by " in software:
392         # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
393         software = version.remove(version.strip_powered_by(software))
394     elif isinstance(software, str) and " hosted on " in software:
395         # DEBUG: print(f"DEBUG: software='{software}' has 'hosted on' in it")
396         software = version.remove(version.strip_hosted_on(software))
397     elif isinstance(software, str) and " by " in software:
398         # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
399         software = version.strip_until(software, " by ")
400     elif isinstance(software, str) and " see " in software:
401         # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
402         software = version.strip_until(software, " see ")
403
404     # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
405     return software
406
407 def determine_software(domain: str, path: str = None) -> str:
408     # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
409     if not isinstance(domain, str):
410         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
411     elif domain == "":
412         raise ValueError("Parameter 'domain' is empty")
413     elif not isinstance(path, str) and path is not None:
414         raise ValueError(f"Parameter path[]='{type(path)}' is not 'str'")
415
416     # DEBUG: print("DEBUG: Determining software for domain,path:", domain, path)
417     software = None
418
419     # DEBUG: print(f"DEBUG: Fetching nodeinfo from '{domain}' ...")
420     data = fetch_nodeinfo(domain, path)
421
422     # DEBUG: print(f"DEBUG: data[{type(data)}]='{data}'")
423     if "exception" in data:
424         # Continue raising it
425         raise data["exception"]
426     elif "error_message" in data:
427         # DEBUG: print(f"DEBUG: Returned error_message during fetching nodeinfo: '{data['error_message']}',status_code='{data['status_code']}'")
428         return fetch_generator_from_path(domain)
429     elif "status" in data and data["status"] == "error" and "message" in data:
430         print("WARNING: JSON response is an error:", data["message"])
431         instances.set_last_error(domain, data["message"])
432         return fetch_generator_from_path(domain)
433     elif "message" in data:
434         print("WARNING: JSON response contains only a message:", data["message"])
435         instances.set_last_error(domain, data["message"])
436         return fetch_generator_from_path(domain)
437     elif "software" not in data or "name" not in data["software"]:
438         # DEBUG: print(f"DEBUG: JSON response from domain='{domain}' does not include [software][name], fetching / ...")
439         software = fetch_generator_from_path(domain)
440
441         # DEBUG: print(f"DEBUG: Generator for domain='{domain}' is: {software}, EXIT!")
442         return software
443
444     software = tidyup.domain(data["software"]["name"])
445
446     # DEBUG: print("DEBUG: sofware after tidyup.domain():", software)
447     if software in ["akkoma", "rebased"]:
448         # DEBUG: print("DEBUG: Setting pleroma:", domain, software)
449         software = "pleroma"
450     elif software in ["hometown", "ecko"]:
451         # DEBUG: print("DEBUG: Setting mastodon:", domain, software)
452         software = "mastodon"
453     elif software in ["calckey", "groundpolis", "foundkey", "cherrypick", "meisskey"]:
454         # DEBUG: print("DEBUG: Setting misskey:", domain, software)
455         software = "misskey"
456     elif software == "runtube.re":
457         # DEBUG: print("DEBUG: Setting peertube:", domain, software)
458         software = "peertube"
459     elif software == "nextcloud social":
460         # DEBUG: print("DEBUG: Setting nextcloud:", domain, software)
461         software = "nextcloud"
462     elif software.find("/") > 0:
463         print("WARNING: Spliting of slash:", software)
464         software = tidyup.domain(software.split("/")[-1])
465     elif software.find("|") > 0:
466         print("WARNING: Spliting of pipe:", software)
467         software = tidyup.domain(software.split("|")[0])
468     elif "powered by" in software:
469         # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
470         software = version.strip_powered_by(software)
471     elif isinstance(software, str) and " by " in software:
472         # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
473         software = version.strip_until(software, " by ")
474     elif isinstance(software, str) and " see " in software:
475         # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
476         software = version.strip_until(software, " see ")
477
478     # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
479     if software == "":
480         print("WARNING: tidyup.domain() left no software name behind:", domain)
481         software = None
482
483     # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
484     if str(software) == "":
485         # DEBUG: print(f"DEBUG: software for '{domain}' was not detected, trying generator ...")
486         software = fetch_generator_from_path(domain)
487     elif len(str(software)) > 0 and ("." in software or " " in software):
488         # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
489         software = version.remove(software)
490
491     # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
492     if isinstance(software, str) and "powered by" in software:
493         # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
494         software = version.remove(version.strip_powered_by(software))
495
496     # DEBUG: print("DEBUG: Returning domain,software:", domain, software)
497     return software
498
499 def find_domains(tag: bs4.element.Tag) -> list:
500     # DEBUG: print(f"DEBUG: tag[]='{type(tag)}' - CALLED!")
501     if not isinstance(tag, bs4.element.Tag):
502         raise ValueError(f"Parameter tag[]='{type(tag)}' is not type of bs4.element.Tag")
503     elif len(tag.select("tr")) == 0:
504         raise KeyError("No table rows found in table!")
505
506     domains = list()
507     for element in tag.select("tr"):
508         # DEBUG: print(f"DEBUG: element[]='{type(element)}'")
509         if not element.find("td"):
510             # DEBUG: print("DEBUG: Skipping element, no <td> found")
511             continue
512
513         domain = tidyup.domain(element.find("td").text)
514         reason = tidyup.reason(element.findAll("td")[1].text)
515
516         # DEBUG: print(f"DEBUG: domain='{domain}',reason='{reason}'")
517
518         if blacklist.is_blacklisted(domain):
519             print(f"WARNING: domain='{domain}' is blacklisted - SKIPPED!")
520             continue
521         elif domain == "gab.com/.ai, develop.gab.com":
522             # DEBUG: print("DEBUG: Multiple domains detected in one row")
523             domains.append({
524                 "domain": "gab.com",
525                 "reason": reason,
526             })
527             domains.append({
528                 "domain": "gab.ai",
529                 "reason": reason,
530             })
531             domains.append({
532                 "domain": "develop.gab.com",
533                 "reason": reason,
534             })
535             continue
536         elif not validators.domain(domain):
537             print(f"WARNING: domain='{domain}' is not a valid domain - SKIPPED!")
538             continue
539
540         # DEBUG: print(f"DEBUG: Adding domain='{domain}',reason='{reason}' ...")
541         domains.append({
542             "domain": domain,
543             "reason": reason,
544         })
545
546     # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!")
547     return domains
548
549 def add_peers(rows: dict) -> list:
550     # DEBUG: print(f"DEBUG: rows()={len(rows)} - CALLED!")
551     peers = list()
552     for key in ["linked", "allowed", "blocked"]:
553         # DEBUG: print(f"DEBUG: Checking key='{key}'")
554         if key in rows and rows[key] is not None:
555             # DEBUG: print(f"DEBUG: Adding {len(rows[key])} peer(s) to peers list ...")
556             for peer in rows[key]:
557                 # DEBUG: print(f"DEBUG: peer='{peer}' - BEFORE!")
558                 peer = tidyup.domain(peer)
559
560                 # DEBUG: print(f"DEBUG: peer='{peer}' - AFTER!")
561                 if blacklist.is_blacklisted(peer):
562                     # DEBUG: print(f"DEBUG: peer='{peer}' is blacklisted, skipped!")
563                     continue
564
565                 # DEBUG: print(f"DEBUG: Adding peer='{peer}' ...")
566                 peers.append(peer)
567
568     # DEBUG: print(f"DEBUG: peers()={len(peers)} - EXIT!")
569     return peers