]> git.mxchange.org Git - fba.git/blob - fba/federation.py
Continued:
[fba.git] / fba / federation.py
1 # Copyright (C) 2023 Free Software Foundation
2 #
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License as published
5 # by the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
7 #
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
11 # GNU Affero General Public License for more details.
12 #
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program.  If not, see <https://www.gnu.org/licenses/>.
15
16 from urllib.parse import urlparse
17
18 import bs4
19 import validators
20
21 from fba import blacklist
22 from fba import config
23 from fba import csrf
24 from fba import network
25
26 from fba.helpers import tidyup
27 from fba.helpers import version
28
29 from fba.models import instances
30
31 from fba.networks import lemmy
32 from fba.networks import misskey
33 from fba.networks import peertube
34
35 # "rel" identifiers (no real URLs)
36 nodeinfo_identifier = [
37     "https://nodeinfo.diaspora.software/ns/schema/2.1",
38     "https://nodeinfo.diaspora.software/ns/schema/2.0",
39     "https://nodeinfo.diaspora.software/ns/schema/1.1",
40     "https://nodeinfo.diaspora.software/ns/schema/1.0",
41     "http://nodeinfo.diaspora.software/ns/schema/2.1",
42     "http://nodeinfo.diaspora.software/ns/schema/2.0",
43     "http://nodeinfo.diaspora.software/ns/schema/1.1",
44     "http://nodeinfo.diaspora.software/ns/schema/1.0",
45 ]
46
47 def fetch_instances(domain: str, origin: str, software: str, command: str, path: str = None):
48     # DEBUG: print(f"DEBUG: domain='{domain}',origin='{origin}',software='{software}',path='{path}' - CALLED!")
49     if not isinstance(domain, str):
50         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
51     elif domain == "":
52         raise ValueError("Parameter 'domain' is empty")
53     elif domain.endswith(".tld"):
54         raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
55     elif not isinstance(origin, str) and origin is not None:
56         raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
57     elif software is None:
58         # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
59         instances.set_last_instance_fetch(domain)
60
61         # DEBUG: print(f"DEBUG: software for domain='{domain}' is not set, determining ...")
62         software = None
63         try:
64             software = determine_software(domain, path)
65         except network.exceptions as exception:
66             # DEBUG: print(f"DEBUG: Exception '{type(exception)}' during determining software type")
67             pass
68
69         # DEBUG: print(f"DEBUG: Determined software='{software}' for domain='{domain}'")
70     elif not isinstance(software, str):
71         raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'")
72     elif not isinstance(command, str):
73         raise ValueError(f"Parameter command[]='{type(command)}' is not 'str'")
74     elif command == "":
75         raise ValueError("Parameter 'command' is empty")
76     elif domain.endswith(".arpa"):
77         print(f"WARNING: domain='{domain}' is a reversed .arpa domain and should not be used generally.")
78         return
79     elif not validators.domain(domain.split("/")[0]):
80         raise ValueError(f"domain='{domain}' is not a valid domain")
81
82     if not instances.is_registered(domain):
83         # DEBUG: print(f"DEBUG: Adding new domain='{domain}',origin='{origin}',command='{command}',path='{path}',software='{software}'")
84         instances.add(domain, origin, command, path, software)
85
86     # DEBUG: print(f"DEBUG: Updating last_instance_fetch for domain='{domain}' ...")
87     instances.set_last_instance_fetch(domain)
88
89     # DEBUG: print("DEBUG: Fetching instances for domain:", domain, software)
90     peerlist = fetch_peers(domain, software)
91
92     if peerlist is None:
93         print("ERROR: Cannot fetch peers:", domain)
94         return
95     elif instances.has_pending(domain):
96         # DEBUG: print(f"DEBUG: domain='{domain}' has pending nodeinfo data, flushing ...")
97         instances.update_data(domain)
98
99     print(f"INFO: Checking {len(peerlist)} instances from domain='{domain}' ...")
100     for instance in peerlist:
101         # DEBUG: print(f"DEBUG: instance='{instance}'")
102         if instance is None:
103             # Skip "None" types as tidup.domain() cannot parse them
104             continue
105
106         # DEBUG: print(f"DEBUG: instance='{instance}' - BEFORE")
107         instance = tidyup.domain(instance)
108         # DEBUG: print(f"DEBUG: instance='{instance}' - AFTER")
109
110         if instance == "":
111             print(f"WARNING: Empty instance after tidyup.domain(), domain='{domain}'")
112             continue
113         elif not validators.domain(instance.split("/")[0]):
114             print(f"WARNING: Bad instance='{instance}' from domain='{domain}',origin='{origin}'")
115             continue
116         elif instance.endswith(".arpa"):
117             print(f"WARNING: instance='{instance}' is a reversed .arpa domain and should not be used generally.")
118             continue
119         elif blacklist.is_blacklisted(instance):
120             # DEBUG: print("DEBUG: instance is blacklisted:", instance)
121             continue
122
123         # DEBUG: print("DEBUG: Handling instance:", instance)
124         if instance.endswith(".arpa"):
125             print(f"WARNING: instance='{instance}' is a reversed .arpa domain and should not be used generally.")
126             continue
127         elif instance.find("/profile/") > 0 or instance.find("/users/") > 0:
128             # DEBUG: print(f"DEBUG: instance='{instance}' is a link to a single user profile - SKIPPED!")
129             continue
130         elif instance.endswith(".tld"):
131             # DEBUG: print(f"DEBUG: instance='{instance}' is a fake domain - SKIPPED!")
132             continue
133         elif not instances.is_registered(instance):
134             # DEBUG: print("DEBUG: Adding new instance:", instance, domain)
135             instances.add(instance, domain, command)
136
137     # DEBUG: print("DEBUG: EXIT!")
138
139 def fetch_peers(domain: str, software: str) -> list:
140     # DEBUG: print(f"DEBUG: domain({len(domain)})='{domain}',software='{software}' - CALLED!")
141     if not isinstance(domain, str):
142         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
143     elif domain == "":
144         raise ValueError("Parameter 'domain' is empty")
145     elif not isinstance(software, str) and software is not None:
146         raise ValueError(f"software[]='{type(software)}' is not 'str'")
147
148     if software == "misskey":
149         # DEBUG: print(f"DEBUG: Invoking misskey.fetch_peers({domain}) ...")
150         return misskey.fetch_peers(domain)
151     elif software == "lemmy":
152         # DEBUG: print(f"DEBUG: Invoking lemmy.fetch_peers({domain}) ...")
153         return lemmy.fetch_peers(domain)
154     elif software == "peertube":
155         # DEBUG: print(f"DEBUG: Invoking peertube.fetch_peers({domain}) ...")
156         return peertube.fetch_peers(domain)
157
158     # Init peers variable
159     peers = list()
160
161     # No CSRF by default, you don't have to add network.api_headers by yourself here
162     headers = tuple()
163
164     try:
165         # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
166         headers = csrf.determine(domain, dict())
167     except network.exceptions as exception:
168         print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
169         instances.set_last_error(domain, exception)
170         return peers
171
172     # DEBUG: print(f"DEBUG: Fetching peers from '{domain}',software='{software}' ...")
173     data = network.get_json_api(
174         domain,
175         "/api/v1/instance/peers",
176         headers,
177         (config.get("connection_timeout"), config.get("read_timeout"))
178     )
179
180     # DEBUG: print(f"DEBUG: data[]='{type(data)}'")
181     if "error_message" in data:
182         # DEBUG: print("DEBUG: Was not able to fetch peers, trying alternative ...")
183         data = network.get_json_api(
184             domain,
185             "/api/v3/site",
186             headers,
187             (config.get("connection_timeout"), config.get("read_timeout"))
188         )
189
190         # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
191         if "error_message" in data:
192             print(f"WARNING: Could not reach any JSON API at domain='{domain}',status_code='{data['status_code']}',error_message='{data['error_message']}'")
193         elif "federated_instances" in data["json"]:
194             # DEBUG: print(f"DEBUG: Found federated_instances for domain='{domain}'")
195             peers = peers + add_peers(data["json"]["federated_instances"])
196             # DEBUG: print("DEBUG: Added instance(s) to peers")
197         else:
198             message = "JSON response does not contain 'federated_instances' or 'error_message'"
199             print(f"WARNING: {message},domain='{domain}'")
200             instances.set_last_error(domain, message)
201     elif isinstance(data["json"], list):
202         # DEBUG print("DEBUG: Querying API was successful:", domain, len(data['json']))
203         peers = data["json"]
204     else:
205         print(f"WARNING: Cannot parse data[json][]='{type(data['json'])}'")
206
207     # DEBUG: print(f"DEBUG: Adding '{len(peers)}' for domain='{domain}'")
208     instances.set_total_peers(domain, peers)
209
210     # DEBUG: print("DEBUG: Returning peers[]:", type(peers))
211     return peers
212
213 def fetch_nodeinfo(domain: str, path: str = None) -> dict:
214     # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' - CALLED!")
215     if not isinstance(domain, str):
216         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
217     elif domain == "":
218         raise ValueError("Parameter 'domain' is empty")
219     elif not isinstance(path, str) and path is not None:
220         raise ValueError(f"Parameter path[]='{type(path)}' is not 'str'")
221
222     # DEBUG: print(f"DEBUG: Fetching nodeinfo from domain='{domain}' ...")
223     nodeinfo = fetch_wellknown_nodeinfo(domain)
224
225     # DEBUG: print(f"DEBUG: nodeinfo[{type(nodeinfo)}]({len(nodeinfo)}='{nodeinfo}'")
226     if "error_message" not in nodeinfo and "json" in nodeinfo and len(nodeinfo["json"]) > 0:
227         # DEBUG: print(f"DEBUG: Found nodeinfo[json]()={len(nodeinfo['json'])} - EXIT!")
228         return nodeinfo["json"]
229
230     # No CSRF by default, you don't have to add network.api_headers by yourself here
231     headers = tuple()
232     data = dict()
233
234     try:
235         # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
236         headers = csrf.determine(domain, dict())
237     except network.exceptions as exception:
238         print(f"WARNING: Exception '{type(exception)}' during checking CSRF (nodeinfo,{__name__}) - EXIT!")
239         instances.set_last_error(domain, exception)
240         return {
241             "status_code"  : 500,
242             "error_message": f"exception[{type(exception)}]='{str(exception)}'",
243             "exception"    : exception,
244         }
245
246     request_paths = [
247        "/nodeinfo/2.1.json",
248        "/nodeinfo/2.1",
249        "/nodeinfo/2.0.json",
250        "/nodeinfo/2.0",
251        "/nodeinfo/1.0",
252        "/api/v1/instance"
253     ]
254
255     for request in request_paths:
256         # DEBUG: print(f"DEBUG: path[{type(path)}]='{path}',request='{request}'")
257         if path is None or path == request or path == f"http://{domain}{path}" or path == f"https://{domain}{path}":
258             # DEBUG: print(f"DEBUG: Fetching request='{request}' from domain='{domain}' ...")
259             if path == f"http://{domain}{path}" or path == f"https://{domain}{path}":
260                 # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' has protocol in path, splitting ...")
261                 components = urlparse(path)
262                 path = components.path
263
264             data = network.get_json_api(
265                 domain,
266                 request,
267                 headers,
268                 (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
269             )
270
271             # DEBUG: print(f"DEBUG: response.ok={response.ok},response.status_code={response.status_code},data[]='{type(data)}'")
272             if "error_message" not in data:
273                 # DEBUG: print("DEBUG: Success:", request)
274                 instances.set_detection_mode(domain, "STATIC_CHECK")
275                 instances.set_nodeinfo_url(domain, request)
276                 break
277
278             print(f"WARNING: Failed fetching nodeinfo from domain='{domain}',status_code='{data['status_code']}',error_message='{data['error_message']}'")
279
280     # DEBUG: print(f"DEBUG: data()={len(data)} - EXIT!")
281     return data
282
283 def fetch_wellknown_nodeinfo(domain: str) -> dict:
284     # DEBUG: print(f"DEBUG: domain='{domain}' - CALLED!")
285     if not isinstance(domain, str):
286         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
287     elif domain == "":
288         raise ValueError("Parameter 'domain' is empty")
289
290     # No CSRF by default, you don't have to add network.api_headers by yourself here
291     headers = tuple()
292
293     try:
294         # DEBUG: print(f"DEBUG: Checking CSRF for domain='{domain}'")
295         headers = csrf.determine(domain, dict())
296     except network.exceptions as exception:
297         print(f"WARNING: Exception '{type(exception)}' during checking CSRF (fetch_wellknown_nodeinfo,{__name__}) - EXIT!")
298         instances.set_last_error(domain, exception)
299         return {
300             "status_code"  : 500,
301             "error_message": type(exception),
302             "exception"    : exception,
303         }
304
305     # DEBUG: print("DEBUG: Fetching .well-known info for domain:", domain)
306     data = network.get_json_api(
307         domain,
308         "/.well-known/nodeinfo",
309         headers,
310         (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
311     )
312
313     if "error_message" not in data:
314         nodeinfo = data["json"]
315         # DEBUG: print("DEBUG: Found entries:", len(nodeinfo), domain)
316         if "links" in nodeinfo:
317             # DEBUG: print("DEBUG: Found links in nodeinfo():", len(nodeinfo["links"]))
318             for link in nodeinfo["links"]:
319                 # DEBUG: print(f"DEBUG: link[{type(link)}]='{link}'")
320                 if not isinstance(link, dict) or not "rel" in link:
321                     print(f"WARNING: link[]='{type(link)}' is not 'dict' or no element 'rel' found")
322                 elif link["rel"] in nodeinfo_identifier:
323                     # Default is that 'href' has a complete URL, but some hosts don't send that
324                     url = link["href"]
325                     components = urlparse(link["href"])
326
327                     # DEBUG: print(f"DEBUG: components[{type(components)}]='{components}'")
328                     if components.scheme == "" and components.netloc == "":
329                         # DEBUG: print(f"DEBUG: link[href]='{link['href']}' has no scheme and host name in it, prepending from domain='{domain}'")
330                         url = f"https://{domain}{url}"
331
332                     # DEBUG: print("DEBUG: Fetching nodeinfo from:", url)
333                     data = network.fetch_api_url(
334                         url,
335                         (config.get("connection_timeout"), config.get("read_timeout"))
336                      )
337
338                     # DEBUG: print("DEBUG: href,data[]:", link["href"], type(data))
339                     if not "error_message" in data and "json" in data:
340                         # DEBUG: print("DEBUG: Found JSON nodeinfo():", len(data))
341                         instances.set_detection_mode(domain, "AUTO_DISCOVERY")
342                         instances.set_nodeinfo_url(domain, link["href"])
343                         break
344                     else:
345                         instances.set_last_error(domain, data)
346                 else:
347                     print("WARNING: Unknown 'rel' value:", domain, link["rel"])
348         else:
349             print("WARNING: nodeinfo does not contain 'links':", domain)
350
351     # DEBUG: print("DEBUG: Returning data[]:", type(data))
352     return data
353
354 def fetch_generator_from_path(domain: str, path: str = "/") -> str:
355     # DEBUG: print(f"DEBUG: domain({len(domain)})='{domain}',path='{path}' - CALLED!")
356     if not isinstance(domain, str):
357         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
358     elif domain == "":
359         raise ValueError("Parameter 'domain' is empty")
360     elif not isinstance(path, str):
361         raise ValueError(f"path[]='{type(path)}' is not 'str'")
362     elif path == "":
363         raise ValueError("Parameter 'path' is empty")
364
365     # DEBUG: print(f"DEBUG: domain='{domain}',path='{path}' - CALLED!")
366     software = None
367
368     # DEBUG: print(f"DEBUG: Fetching path='{path}' from '{domain}' ...")
369     response = network.fetch_response(domain, path, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
370
371     # DEBUG: print("DEBUG: domain,response.ok,response.status_code,response.text[]:", domain, response.ok, response.status_code, type(response.text))
372     if response.ok and response.status_code < 300 and len(response.text) > 0:
373         # DEBUG: print("DEBUG: Search for <meta name='generator'>:", domain)
374         doc = bs4.BeautifulSoup(response.text, "html.parser")
375
376         # DEBUG: print("DEBUG: doc[]:", type(doc))
377         generator = doc.find("meta", {"name"    : "generator"})
378         site_name = doc.find("meta", {"property": "og:site_name"})
379
380         # DEBUG: print(f"DEBUG: generator='{generator}',site_name='{site_name}'")
381         if isinstance(generator, bs4.element.Tag) and isinstance(generator.get("content"), str):
382             # DEBUG: print("DEBUG: Found generator meta tag:", domain)
383             software = tidyup.domain(generator.get("content"))
384             # DEBUG: print(f"DEBUG: software[{type(software)}]='{software}'")
385             if software is not None and software != "":
386                 print(f"INFO: domain='{domain}' is generated by '{software}'")
387                 instances.set_detection_mode(domain, "GENERATOR")
388         elif isinstance(site_name, bs4.element.Tag) and isinstance(site_name.get("content"), str):
389             # DEBUG: print("DEBUG: Found property=og:site_name:", domain)
390             software = tidyup.domain(site_name.get("content"))
391             # DEBUG: print(f"DEBUG: software[{type(software)}]='{software}'")
392             if software is not None and software != "":
393                 print(f"INFO: domain='{domain}' has og:site_name='{software}'")
394                 instances.set_detection_mode(domain, "SITE_NAME")
395
396     # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
397     if isinstance(software, str) and software == "":
398         # DEBUG: print(f"DEBUG: Corrected empty string to None for software of domain='{domain}'")
399         software = None
400     elif isinstance(software, str) and ("." in software or " " in software):
401         # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
402         software = version.remove(software)
403
404     # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
405     if isinstance(software, str) and "powered by " in software:
406         # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
407         software = version.remove(version.strip_powered_by(software))
408     elif isinstance(software, str) and " hosted on " in software:
409         # DEBUG: print(f"DEBUG: software='{software}' has 'hosted on' in it")
410         software = version.remove(version.strip_hosted_on(software))
411     elif isinstance(software, str) and " by " in software:
412         # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
413         software = version.strip_until(software, " by ")
414     elif isinstance(software, str) and " see " in software:
415         # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
416         software = version.strip_until(software, " see ")
417
418     # DEBUG: print(f"DEBUG: software='{software}' - EXIT!")
419     return software
420
421 def determine_software(domain: str, path: str = None) -> str:
422     # DEBUG: print(f"DEBUG: domain({len(domain)})='{domain}',path='{path}' - CALLED!")
423     if not isinstance(domain, str):
424         raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
425     elif domain == "":
426         raise ValueError("Parameter 'domain' is empty")
427     elif not isinstance(path, str) and path is not None:
428         raise ValueError(f"Parameter path[]='{type(path)}' is not 'str'")
429
430     # DEBUG: print("DEBUG: Determining software for domain,path:", domain, path)
431     software = None
432
433     # DEBUG: print(f"DEBUG: Fetching nodeinfo from '{domain}' ...")
434     data = fetch_nodeinfo(domain, path)
435
436     # DEBUG: print(f"DEBUG: data[{type(data)}]='{data}'")
437     if "exception" in data:
438         # Continue raising it
439         raise data["exception"]
440     elif "error_message" in data:
441         # DEBUG: print(f"DEBUG: Returned error_message during fetching nodeinfo: '{data['error_message']}',status_code='{data['status_code']}'")
442         return fetch_generator_from_path(domain)
443     elif "status" in data and data["status"] == "error" and "message" in data:
444         print("WARNING: JSON response is an error:", data["message"])
445         instances.set_last_error(domain, data["message"])
446         return fetch_generator_from_path(domain)
447     elif "message" in data:
448         print("WARNING: JSON response contains only a message:", data["message"])
449         instances.set_last_error(domain, data["message"])
450         return fetch_generator_from_path(domain)
451     elif "software" not in data or "name" not in data["software"]:
452         # DEBUG: print(f"DEBUG: JSON response from domain='{domain}' does not include [software][name], fetching / ...")
453         software = fetch_generator_from_path(domain)
454         # DEBUG: print(f"DEBUG: Generator for domain='{domain}' is: '{software}'")
455     elif "software" in data and "name" in data["software"]:
456         # DEBUG: print("DEBUG: Found data[software][name] in JSON response")
457         software = data["software"]["name"]
458
459     if software is None:
460         # DEBUG: print("DEBUG: Returning None - EXIT!")
461         return None
462
463     sofware = tidyup.domain(software)
464     # DEBUG: print("DEBUG: sofware after tidyup.domain():", software)
465
466     if software in ["akkoma", "rebased"]:
467         # DEBUG: print("DEBUG: Setting pleroma:", domain, software)
468         software = "pleroma"
469     elif software in ["hometown", "ecko"]:
470         # DEBUG: print("DEBUG: Setting mastodon:", domain, software)
471         software = "mastodon"
472     elif software in ["slipfox calckey", "calckey", "groundpolis", "foundkey", "cherrypick", "meisskey"]:
473         # DEBUG: print("DEBUG: Setting misskey:", domain, software)
474         software = "misskey"
475     elif software == "runtube.re":
476         # DEBUG: print("DEBUG: Setting peertube:", domain, software)
477         software = "peertube"
478     elif software == "nextcloud social":
479         # DEBUG: print("DEBUG: Setting nextcloud:", domain, software)
480         software = "nextcloud"
481     elif software.find("/") > 0:
482         print("WARNING: Spliting of slash:", software)
483         software = tidyup.domain(software.split("/")[-1])
484     elif software.find("|") > 0:
485         print("WARNING: Spliting of pipe:", software)
486         software = tidyup.domain(software.split("|")[0])
487     elif "powered by" in software:
488         # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
489         software = version.strip_powered_by(software)
490     elif isinstance(software, str) and " by " in software:
491         # DEBUG: print(f"DEBUG: software='{software}' has ' by ' in it")
492         software = version.strip_until(software, " by ")
493     elif isinstance(software, str) and " see " in software:
494         # DEBUG: print(f"DEBUG: software='{software}' has ' see ' in it")
495         software = version.strip_until(software, " see ")
496
497     # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
498     if software == "":
499         print("WARNING: tidyup.domain() left no software name behind:", domain)
500         software = None
501
502     # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
503     if str(software) == "":
504         # DEBUG: print(f"DEBUG: software for '{domain}' was not detected, trying generator ...")
505         software = fetch_generator_from_path(domain)
506     elif len(str(software)) > 0 and ("." in software or " " in software):
507         # DEBUG: print(f"DEBUG: software='{software}' may contain a version number, domain='{domain}', removing it ...")
508         software = version.remove(software)
509
510     # DEBUG: print(f"DEBUG: software[]='{type(software)}'")
511     if isinstance(software, str) and "powered by" in software:
512         # DEBUG: print(f"DEBUG: software='{software}' has 'powered by' in it")
513         software = version.remove(version.strip_powered_by(software))
514
515     # DEBUG: print("DEBUG: Returning domain,software:", domain, software)
516     return software
517
518 def find_domains(tag: bs4.element.Tag) -> list:
519     # DEBUG: print(f"DEBUG: tag[]='{type(tag)}' - CALLED!")
520     if not isinstance(tag, bs4.element.Tag):
521         raise ValueError(f"Parameter tag[]='{type(tag)}' is not type of bs4.element.Tag")
522     elif len(tag.select("tr")) == 0:
523         raise KeyError("No table rows found in table!")
524
525     domains = list()
526     for element in tag.select("tr"):
527         # DEBUG: print(f"DEBUG: element[]='{type(element)}'")
528         if not element.find("td"):
529             # DEBUG: print("DEBUG: Skipping element, no <td> found")
530             continue
531
532         domain = tidyup.domain(element.find("td").text)
533         reason = tidyup.reason(element.findAll("td")[1].text)
534
535         # DEBUG: print(f"DEBUG: domain='{domain}',reason='{reason}'")
536
537         if blacklist.is_blacklisted(domain):
538             print(f"WARNING: domain='{domain}' is blacklisted - SKIPPED!")
539             continue
540         elif domain == "gab.com/.ai, develop.gab.com":
541             # DEBUG: print("DEBUG: Multiple domains detected in one row")
542             domains.append({
543                 "domain": "gab.com",
544                 "reason": reason,
545             })
546             domains.append({
547                 "domain": "gab.ai",
548                 "reason": reason,
549             })
550             domains.append({
551                 "domain": "develop.gab.com",
552                 "reason": reason,
553             })
554             continue
555         elif not validators.domain(domain):
556             print(f"WARNING: domain='{domain}' is not a valid domain - SKIPPED!")
557             continue
558
559         # DEBUG: print(f"DEBUG: Adding domain='{domain}',reason='{reason}' ...")
560         domains.append({
561             "domain": domain,
562             "reason": reason,
563         })
564
565     # DEBUG: print(f"DEBUG: domains()={len(domains)} - EXIT!")
566     return domains
567
568 def add_peers(rows: dict) -> list:
569     # DEBUG: print(f"DEBUG: rows()={len(rows)} - CALLED!")
570     peers = list()
571     for key in ["linked", "allowed", "blocked"]:
572         # DEBUG: print(f"DEBUG: Checking key='{key}'")
573         if key in rows and rows[key] is not None:
574             # DEBUG: print(f"DEBUG: Adding {len(rows[key])} peer(s) to peers list ...")
575             for peer in rows[key]:
576                 # DEBUG: print(f"DEBUG: peer='{peer}' - BEFORE!")
577                 peer = tidyup.domain(peer)
578
579                 # DEBUG: print(f"DEBUG: peer='{peer}' - AFTER!")
580                 if blacklist.is_blacklisted(peer):
581                     # DEBUG: print(f"DEBUG: peer='{peer}' is blacklisted, skipped!")
582                     continue
583
584                 # DEBUG: print(f"DEBUG: Adding peer='{peer}' ...")
585                 peers.append(peer)
586
587     # DEBUG: print(f"DEBUG: peers()={len(peers)} - EXIT!")
588     return peers