]> git.mxchange.org Git - fba.git/blob - fba/federation.py
Continued:
[fba.git] / fba / federation.py
1 # Copyright (C) 2023 Free Software Foundation
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License as published
5 # by the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU Affero General Public License for more details.
12 #
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
15
16 from urllib.parse import urlparse
17
18 import bs4
19 import validators
20
21 from fba import blacklist
22 from fba import config
23 from fba import csrf
24 from fba import network
25
26 from fba.helpers import tidyup
27 from fba.helpers import version
28
29 from fba.models import instances
30
31 from fba.networks import lemmy
32 from fba.networks import misskey
33 from fba.networks import peertube
34
35 # "rel" identifiers (no real URLs)
36 nodeinfo_identifier = [
37     "https://nodeinfo.diaspora.software/ns/schema/2.1",
38     "https://nodeinfo.diaspora.software/ns/schema/2.0",
39     "https://nodeinfo.diaspora.software/ns/schema/1.1",
40     "https://nodeinfo.diaspora.software/ns/schema/1.0",
41     "http://nodeinfo.diaspora.software/ns/schema/2.1",
42     "http://nodeinfo.diaspora.software/ns/schema/2.0",
43     "http://nodeinfo.diaspora.software/ns/schema/1.1",
44     "http://nodeinfo.diaspora.software/ns/schema/1.0",
45 ]
46
47 def fetch_instances(domain: str, origin: str, software: str, command: str, path: str = None):
48     # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',software='{software}',path='{path}' - CALLED!")
49     if not isinstance(domain, str):
50         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
51     elif domain == "":
52         raise ValueError("Parameter 'domain' is empty")
53     elif domain.endswith(".tld"):
54         raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
55     elif not isinstance(origin, str) and origin is not None:
56         raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
57     elif software is None:
58         # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
59         instances.set_last_instance_fetch(domain)
60
61         # DEBUG: print(f"DEBUG: software for domain='{domain}' is not set, determining ...")
62         software = None
63         try:
64             software = determine_software(domain, path)
65         except network.exceptions as exception:
66             # DEBUG: print(f"DEBUG: Exception '{type(exception)}' during determining software type")
67             pass
68
69         # DEBUG: print(f"DEBUG: Determined software='{software}' for domain='{domain}'")
70     elif not isinstance(software, str):
71         raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'")
72     elif not isinstance(command, str):
73         raise ValueError(f"Parameter command[]='{type(command)}' is not 'str'")
74     elif command == "":
75         raise ValueError("Parameter 'command' is empty")
76     elif domain.endswith(".arpa"):
77         print(f"WARNING: domain='{domain}' is a reversed .arpa domain and should not be used generally.")
78         return
79     elif not validators.domain(domain.split("/")[0]):
80         raise ValueError(f"domain='{domain}' is not a valid domain")
81
82     if not instances.is_registered(domain.split("/")[0]):
83         # DEBUG: print(f"DEBUG: Adding new domain='{domain}',origin='{origin}',command='{command}',path='{path}',software='{software}'")
84         instances.add(domain, origin, command, path, software)
85
86     # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
87     instances.set_last_instance_fetch(domain)
88
89     # DEBUG: print("DEBUG: Fetching instances for domain:", domain, software)
90     peerlist = fetch_peers(domain, software)
91
92     if peerlist is None:
93         print("ERROR: Cannot fetch peers:", domain)
94         return
95     elif instances.has_pending(domain):
96         # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...")
97         instances.update_data(domain)
98
99     print(f"INFO: Checking {len(peerlist)} instances from domain='{domain}' ...")
100     for instance in peerlist:
101         # DEBUG: print(f"DEBUG: instance='{instance}'")
102         if instance is None:
103             # Skip "None" types as tidup.domain() cannot parse them
104             continue
105
106         # DEBUG: print(f"DEBUG: instance='{instance}' - BEFORE")
107         instance = tidyup.domain(instance)
108         # DEBUG: print(f"DEBUG: instance='{instance}' - AFTER")
109
110         if instance == "":
111             print(f"WARNING: Empty instance after tidyup.domain(), domain='{domain}'")
112             continue
113         elif not validators.domain(instance.split("/")[0]):
114             print(f"WARNING: Bad instance='{instance}' from domain='{domain}',origin='{origin}',software='{software}'")
115             continue
116         elif instance.endswith(".arpa"):
117             print(f"WARNING: instance='{instance}' is a reversed .arpa domain and should not be used generally.")
118             continue
119         elif blacklist.is_blacklisted(instance):
120             # DEBUG: print("DEBUG: instance is blacklisted:", instance)
121             continue
122
123         # DEBUG: print("DEBUG: Handling instance:", instance)
124         if instance.endswith(".arpa"):
125             print(f"WARNING: instance='{instance}' is a reversed .arpa domain and should not be used generally.")
126             continue
127         elif instance.find("/profile/") > 0 or instance.find("/users/") > 0:
128             # DEBUG: print(f"DEBUG: instance='{instance}' is a link to a single user profile - SKIPPED!")
129             continue
130         elif not instances.is_registered(instance):
131             # DEBUG: print("DEBUG: Adding new instance:", instance, domain)
132             instances.add(instance, domain, command)
133
134     # DEBUG: print("DEBUG: EXIT!")
135
136 def fetch_peers(domain: str, software: str) -> list:
137     # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},software={software} - CALLED!")
138     if not isinstance(domain, str):
139         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
140     elif domain == "":
141         raise ValueError("Parameter 'domain' is empty")
142     elif not isinstance(software, str) and software is not None:
143         raise ValueError(f"software[]='{type(software)}' is not 'str'")
144
145     if software == "misskey":
146         # DEBUG: print(f"DEBUG: Invoking misskey.fetch_peers({domain}) ...")
147         return misskey.fetch_peers(domain)
148     elif software == "lemmy":
149         # DEBUG: print(f"DEBUG: Invoking lemmy.fetch_peers({domain}) ...")
150         return lemmy.fetch_peers(domain)
151     elif software == "peertube":
152         # DEBUG: print(f"DEBUG: Invoking peertube.fetch_peers({domain}) ...")
153         return peertube.fetch_peers(domain)
154
155     # Init peers variable
156     peers = list()
157
158     # No CSRF by default, you don't have to add network.api_headers by yourself here
159     headers = tuple()
160
161     try:
162         # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
163         headers = csrf.determine(domain, dict())
164     except network.exceptions as exception:
165         print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
166         instances.set_last_error(domain, exception)
167         return peers
168
169     # DEBUG: print(f"DEBUG: Fetching peers from '{domain}',software='{software}' ...")
170     data = network.get_json_api(
171         domain,
172         "/api/v1/instance/peers",
173         headers,
174         (config.get("connection_timeout"), config.get("read_timeout"))
175     )
176
177     # DEBUG: print(f"DEBUG: data[]='{type(data)}'")
178     if "error_message" in data:
179         # DEBUG: print("DEBUG: Was not able to fetch peers, trying alternative ...")
180         data = network.get_json_api(
181             domain,
182             "/api/v3/site",
183             headers,
184             (config.get("connection_timeout"), config.get("read_timeout"))
185         )
186
187         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
188         if "error_message" in data:
189             print(f"WARNING: Could not reach any JSON API at domain='{domain}',status_code='{data['status_code']}',error_message='{data['error_message']}'")
190         elif "federated_instances" in data["json"]:
191             # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
192             peers = peers + add_peers(data["json"]["federated_instances"])
193             # DEBUG: print("DEBUG: Added instance(s) to peers")
194         else:
195             message = "JSON response does not contain 'federated_instances' or 'error_message'"
196             print(f"WARNING: {message},domain='{domain}'")
197             instances.set_last_error(domain, message)
198     else:
199         # DEBUG: print("DEBUG: Querying API was successful:", domain, len(data))
200         peers = data["json"]
201
202     # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
203     instances.set_total_peers(domain, peers)
204
205     # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
206     return peers
207
208 def fetch_nodeinfo(domain: str, path: str = None) -> dict:
209     # DEBUG: print(f"DEBUG: domain='{domain}',path={path} - CALLED!")
210     if not isinstance(domain, str):
211         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
212     elif domain == "":
213         raise ValueError("Parameter 'domain' is empty")
214     elif not isinstance(path, str) and path is not None:
215         raise ValueError(f"Parameter path[]='{type(path)}' is not 'str'")
216
217     # DEBUG: print(f"DEBUG: Fetching nodeinfo from domain='{domain}' ...")
218     nodeinfo = fetch_wellknown_nodeinfo(domain)
219
220     # DEBUG: print(f"DEBUG: nodeinfo[{type(nodeinfo)}]({len(nodeinfo)}='{nodeinfo}'")
221     if "error_message" not in nodeinfo and "json" in nodeinfo and len(nodeinfo["json"]) > 0:
222         # DEBUG: print(f"DEBUG: Found nodeinfo[json]()={len(nodeinfo['json'])} - EXIT!")
223         return nodeinfo["json"]
224
225     # No CSRF by default, you don't have to add network.api_headers by yourself here
226     headers = tuple()
227     data = dict()
228
229     try:
230         # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
231         headers = csrf.determine(domain, dict())
232     except network.exceptions as exception:
233         print(f"WARNING: Exception '{type(exception)}' during checking CSRF (nodeinfo,{__name__}) - EXIT!")
234         instances.set_last_error(domain, exception)
235         return {
236             "status_code"  : 500,
237             "error_message": f"exception[{type(exception)}]='{str(exception)}'",
238             "exception"    : exception,
239         }
240
241     request_paths = [
242        "/nodeinfo/2.1.json",
243        "/nodeinfo/2.1",
244        "/nodeinfo/2.0.json",
245        "/nodeinfo/2.0",
246        "/nodeinfo/1.0",
247        "/api/v1/instance"
248     ]
249
250     for request in request_paths:
251         # DEBUG: print(f"DEBUG: path[{type(path)}]='{path}',request='{request}'")
252         if path is None or path == request or path == f"http://{domain}{path}" or path == f"https://{domain}{path}":
253             # DEBUG: print(f"DEBUG: Fetching request='{request}' from domain='{domain}' ...")
254             if path == f"http://{domain}{path}" or path == f"https://{domain}{path}":
255                 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' has protocol in path, splitting ...")
256                 components = urlparse(path)
257                 path = components.path
258
259             data = network.get_json_api(
260                 domain,
261                 request,
262                 headers,
263                 (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
264             )
265
266             # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
267             if "error_message" not in data:
268                 # DEBUG: print("DEBUG: Success:", request)
269                 instances.set_detection_mode(domain, "STATIC_CHECK")
270                 instances.set_nodeinfo_url(domain, request)
271                 break
272
273             print(f"WARNING: Failed fetching nodeinfo from domain='{domain}',status_code='{data['status_code']}',error_message='{data['error_message']}'")
274
275     # DEBUG: print(f"DEBUG: data()={len(data)} - EXIT!")
276     return data
277
278 def fetch_wellknown_nodeinfo(domain: str) -> dict:
279     # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
280     if not isinstance(domain, str):
281         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
282     elif domain == "":
283         raise ValueError("Parameter 'domain' is empty")
284
285     # No CSRF by default, you don't have to add network.api_headers by yourself here
286     headers = tuple()
287
288     try:
289         # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
290         headers = csrf.determine(domain, dict())
291     except network.exceptions as exception:
292         print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_wellknown_nodeinfo,{__name__}) - EXIT!")
293         instances.set_last_error(domain, exception)
294         return {
295             "status_code"  : 500,
296             "error_message": type(exception),
297             "exception"    : exception,
298         }
299
300     # DEBUG: print("DEBUG: Fetching .well-known info for domain:", domain)
301     data = network.get_json_api(
302         domain,
303         "/.well-known/nodeinfo",
304         headers,
305         (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
306     )
307
308     if "error_message" not in data:
309         nodeinfo = data["json"]
310         # DEBUG: print("DEBUG: Found entries:", len(nodeinfo), domain)
311         if "links" in nodeinfo:
312             # DEBUG: print("DEBUG: Found links in nodeinfo():", len(nodeinfo["links"]))
313             for link in nodeinfo["links"]:
314                 # DEBUG: print(f"DEBUG: link[{type(link)}]='{link}'")
315                 if not isinstance(link, dict) or not "rel" in link:
316                     print(f"WARNING: link[]='{type(link)}' is not 'dict' or no element 'rel' found")
317                 elif link["rel"] in nodeinfo_identifier:
318                     # Default is that 'href' has a complete URL, but some hosts don't send that
319                     url = link["href"]
320                     components = urlparse(link["href"])
321
322                     # DEBUG: print(f"DEBUG: components[{type(components)}]='{components}'")
323                     if components.scheme == "" and components.netloc == "":
324                         # DEBUG: print(f"DEBUG: link[href]='{link['href']}' has no scheme and host name in it, prepending from domain='{domain}'")
325                         url = f"https://{domain}{url}"
326
327                     # DEBUG: print("DEBUG: Fetching nodeinfo from:", url)
328                     data = network.fetch_api_url(
329                         url,
330                         (config.get("connection_timeout"), config.get("read_timeout"))
331                      )
332
333                     # DEBUG: print("DEBUG: href,data[]:", link["href"], type(data))
334                     if not "error_message" in data and "json" in data:
335                         # DEBUG: print("DEBUG: Found JSON nodeinfo():", len(data))
336                         instances.set_detection_mode(domain, "AUTO_DISCOVERY")
337                         instances.set_nodeinfo_url(domain, link["href"])
338                         break
339                     else:
340                         instances.set_last_error(domain, data)
341                 else:
342                     print("WARNING: Unknown 'rel' value:", domain, link["rel"])
343         else:
344             print("WARNING: nodeinfo does not contain 'links':", domain)
345
346     # DEBUG: print("DEBUG: Returning data[]:", type(data))
347     return data
348
349 def fetch_generator_from_path(domain: str, path: str = "/") -> str:
350     # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
351     if not isinstance(domain, str):
352         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
353     elif domain == "":
354         raise ValueError("Parameter 'domain' is empty")
355     elif not isinstance(path, str):
356         raise ValueError(f"path[]='{type(path)}' is not 'str'")
357     elif path == "":
358         raise ValueError("Parameter 'path' is empty")
359
360     # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' - CALLED!")
361     software = None
362
363     # DEBUG: print(f"DEBUG: Fetching path='{path}' from '{domain}' ...")
364     response = network.fetch_response(domain, path, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
365
366     # DEBUG: print("DEBUG: domain,response.ok,response.status_code,response.text[]:", domain, response.ok, response.status_code, type(response.text))
367     if response.ok and response.status_code < 300 and len(response.text) > 0:
368         # DEBUG: print("DEBUG: Search for <meta name='generator'>:", domain)
369         doc = bs4.BeautifulSoup(response.text, "html.parser")
370
371         # DEBUG: print("DEBUG: doc[]:", type(doc))
372         generator = doc.find("meta", {"name"    : "generator"})
373         site_name = doc.find("meta", {"property": "og:site_name"})
374
375         # DEBUG: print(f"DEBUG: generator='{generator}',site_name='{site_name}'")
376         if isinstance(generator, bs4.element.Tag) and isinstance(generator.get("content"), str):
377             # DEBUG: print("DEBUG: Found generator meta tag:", domain)
378             software = tidyup.domain(generator.get("content"))
379             # DEBUG: print(f"DEBUG: software[{type(software)}]='{software}'")
380             if software is not None and software != "":
381                 print(f"INFO: domain='{domain}' is generated by '{software}'")
382                 instances.set_detection_mode(domain, "GENERATOR")
383         elif isinstance(site_name, bs4.element.Tag) and isinstance(site_name.get("content"), str):
384             # DEBUG: print("DEBUG: Found property=og:site_name:", domain)
385             software = tidyup.domain(site_name.get("content"))
386             # DEBUG: print(f"DEBUG: software[{type(software)}]='{software}'")
387             if software is not None and software != "":
388                 print(f"INFO: domain='{domain}' has og:site_name='{software}'")
389                 instances.set_detection_mode(domain, "SITE_NAME")
390
391     # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
392     if isinstance(software, str) and software == "":
393         # DEBUG: print(f"DEBUG: Corrected empty string to None for software of domain='{domain}'")
394         software = None
395     elif isinstance(software, str) and ("." in software or " " in software):
396         # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
397         software = version.remove(software)
398
399     # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
400     if isinstance(software, str) and "powered by " in software:
401         # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
402         software = version.remove(version.strip_powered_by(software))
403     elif isinstance(software, str) and " hosted on " in software:
404         # DEBUG: print(f"DEBUG: software='{software}' has 'hosted on' in it")
405         software = version.remove(version.strip_hosted_on(software))
406     elif isinstance(software, str) and " by " in software:
407         # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
408         software = version.strip_until(software, " by ")
409     elif isinstance(software, str) and " see " in software:
410         # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
411         software = version.strip_until(software, " see ")
412
413     # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
414     return software
415
416 def determine_software(domain: str, path: str = None) -> str:
417     # DEBUG: print(f"DEBUG: domain({len(domain)})={domain},path={path} - CALLED!")
418     if not isinstance(domain, str):
419         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
420     elif domain == "":
421         raise ValueError("Parameter 'domain' is empty")
422     elif not isinstance(path, str) and path is not None:
423         raise ValueError(f"Parameter path[]='{type(path)}' is not 'str'")
424
425     # DEBUG: print("DEBUG: Determining software for domain,path:", domain, path)
426     software = None
427
428     # DEBUG: print(f"DEBUG: Fetching nodeinfo from '{domain}' ...")
429     data = fetch_nodeinfo(domain, path)
430
431     # DEBUG: print(f"DEBUG: data[{type(data)}]='{data}'")
432     if "exception" in data:
433         # Continue raising it
434         raise data["exception"]
435     elif "error_message" in data:
436         # DEBUG: print(f"DEBUG: Returned error_message during fetching nodeinfo: '{data['error_message']}',status_code='{data['status_code']}'")
437         return fetch_generator_from_path(domain)
438     elif "status" in data and data["status"] == "error" and "message" in data:
439         print("WARNING: JSON response is an error:", data["message"])
440         instances.set_last_error(domain, data["message"])
441         return fetch_generator_from_path(domain)
442     elif "message" in data:
443         print("WARNING: JSON response contains only a message:", data["message"])
444         instances.set_last_error(domain, data["message"])
445         return fetch_generator_from_path(domain)
446     elif "software" not in data or "name" not in data["software"]:
447         # DEBUG: print(f"DEBUG: JSON response from domain='{domain}' does not include [software][name], fetching / ...")
448         software = fetch_generator_from_path(domain)
449         # DEBUG: print(f"DEBUG: Generator for domain='{domain}' is: '{software}'")
450     elif "software" in data and "name" in data["software"]:
451         # DEBUG: print("DEBUG: Found data[software][name] in JSON response")
452         software = data["software"]["name"]
453
454     if software is None:
455         # DEBUG: print("DEBUG: Returning None - EXIT!")
456         return None
457
458     sofware = tidyup.domain(software)
459     # DEBUG: print("DEBUG: sofware after tidyup.domain():", software)
460
461     if software in ["akkoma", "rebased"]:
462         # DEBUG: print("DEBUG: Setting pleroma:", domain, software)
463         software = "pleroma"
464     elif software in ["hometown", "ecko"]:
465         # DEBUG: print("DEBUG: Setting mastodon:", domain, software)
466         software = "mastodon"
467     elif software in ["slipfox calckey", "calckey", "groundpolis", "foundkey", "cherrypick", "meisskey"]:
468         # DEBUG: print("DEBUG: Setting misskey:", domain, software)
469         software = "misskey"
470     elif software == "runtube.re":
471         # DEBUG: print("DEBUG: Setting peertube:", domain, software)
472         software = "peertube"
473     elif software == "nextcloud social":
474         # DEBUG: print("DEBUG: Setting nextcloud:", domain, software)
475         software = "nextcloud"
476     elif software.find("/") > 0:
477         print("WARNING: Spliting of slash:", software)
478         software = tidyup.domain(software.split("/")[-1])
479     elif software.find("|") > 0:
480         print("WARNING: Spliting of pipe:", software)
481         software = tidyup.domain(software.split("|")[0])
482     elif "powered by" in software:
483         # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
484         software = version.strip_powered_by(software)
485     elif isinstance(software, str) and " by " in software:
486         # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
487         software = version.strip_until(software, " by ")
488     elif isinstance(software, str) and " see " in software:
489         # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
490         software = version.strip_until(software, " see ")
491
492     # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
493     if software == "":
494         print("WARNING: tidyup.domain() left no software name behind:", domain)
495         software = None
496
497     # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
498     if str(software) == "":
499         # DEBUG: print(f"DEBUG: software for '{domain}' was not detected, trying generator ...")
500         software = fetch_generator_from_path(domain)
501     elif len(str(software)) > 0 and ("." in software or " " in software):
502         # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
503         software = version.remove(software)
504
505     # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
506     if isinstance(software, str) and "powered by" in software:
507         # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
508         software = version.remove(version.strip_powered_by(software))
509
510     # DEBUG: print("DEBUG: Returning domain,software:", domain, software)
511     return software
512
513 def find_domains(tag: bs4.element.Tag) -> list:
514     # DEBUG: print(f"DEBUG: tag[]='{type(tag)}' - CALLED!")
515     if not isinstance(tag, bs4.element.Tag):
516         raise ValueError(f"Parameter tag[]='{type(tag)}' is not type of bs4.element.Tag")
517     elif len(tag.select("tr")) == 0:
518         raise KeyError("No table rows found in table!")
519
520     domains = list()
521     for element in tag.select("tr"):
522         # DEBUG: print(f"DEBUG: element[]='{type(element)}'")
523         if not element.find("td"):
524             # DEBUG: print("DEBUG: Skipping element, no <td> found")
525             continue
526
527         domain = tidyup.domain(element.find("td").text)
528         reason = tidyup.reason(element.findAll("td")[1].text)
529
530         # DEBUG: print(f"DEBUG: domain='{domain}',reason='{reason}'")
531
532         if blacklist.is_blacklisted(domain):
533             print(f"WARNING: domain='{domain}' is blacklisted - SKIPPED!")
534             continue
535         elif domain == "gab.com/.ai, develop.gab.com":
536             # DEBUG: print("DEBUG: Multiple domains detected in one row")
537             domains.append({
538                 "domain": "gab.com",
539                 "reason": reason,
540             })
541             domains.append({
542                 "domain": "gab.ai",
543                 "reason": reason,
544             })
545             domains.append({
546                 "domain": "develop.gab.com",
547                 "reason": reason,
548             })
549             continue
550         elif not validators.domain(domain):
551             print(f"WARNING: domain='{domain}' is not a valid domain - SKIPPED!")
552             continue
553
554         # DEBUG: print(f"DEBUG: Adding domain='{domain}',reason='{reason}' ...")
555         domains.append({
556             "domain": domain,
557             "reason": reason,
558         })
559
560     # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!")
561     return domains
562
563 def add_peers(rows: dict) -> list:
564     # DEBUG: print(f"DEBUG: rows()={len(rows)} - CALLED!")
565     peers = list()
566     for key in ["linked", "allowed", "blocked"]:
567         # DEBUG: print(f"DEBUG: Checking key='{key}'")
568         if key in rows and rows[key] is not None:
569             # DEBUG: print(f"DEBUG: Adding {len(rows[key])} peer(s) to peers list ...")
570             for peer in rows[key]:
571                 # DEBUG: print(f"DEBUG: peer='{peer}' - BEFORE!")
572                 peer = tidyup.domain(peer)
573
574                 # DEBUG: print(f"DEBUG: peer='{peer}' - AFTER!")
575                 if blacklist.is_blacklisted(peer):
576                     # DEBUG: print(f"DEBUG: peer='{peer}' is blacklisted, skipped!")
577                     continue
578
579                 # DEBUG: print(f"DEBUG: Adding peer='{peer}' ...")
580                 peers.append(peer)
581
582     # DEBUG: print(f"DEBUG: peers()={len(peers)} - EXIT!")
583     return peers