1 # Copyright (C) 2023 Free Software Foundation
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License as published
5 # by the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU Affero General Public License for more details.
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program. If not, see <https://www.gnu.org/licenses/>.
18 from urllib.parse import urlparse
26 from fba.helpers import blacklist
27 from fba.helpers import config
28 from fba.helpers import tidyup
29 from fba.helpers import version
31 from fba.http import network
33 from fba.models import instances
35 from fba.networks import lemmy
36 from fba.networks import misskey
37 from fba.networks import peertube
39 logging.basicConfig(level=logging.INFO)
40 logger = logging.getLogger(__name__)
42 # "rel" identifiers (no real URLs)
43 nodeinfo_identifier = [
44 "https://nodeinfo.diaspora.software/ns/schema/2.1",
45 "https://nodeinfo.diaspora.software/ns/schema/2.0",
46 "https://nodeinfo.diaspora.software/ns/schema/1.1",
47 "https://nodeinfo.diaspora.software/ns/schema/1.0",
48 "http://nodeinfo.diaspora.software/ns/schema/2.1",
49 "http://nodeinfo.diaspora.software/ns/schema/2.0",
50 "http://nodeinfo.diaspora.software/ns/schema/1.1",
51 "http://nodeinfo.diaspora.software/ns/schema/1.0",
54 def fetch_instances(domain: str, origin: str, software: str, command: str, path: str = None):
55 logger.debug(f"domain='{domain}',origin='{origin}',software='{software}',path='{path}' - CALLED!")
56 if not isinstance(domain, str):
57 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
59 raise ValueError("Parameter 'domain' is empty")
60 elif domain.lower() != domain:
61 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
62 elif not validators.domain(domain.split("/")[0]):
63 raise ValueError(f"domain='{domain}' is not a valid domain")
64 elif domain.endswith(".arpa"):
65 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
66 elif domain.endswith(".tld"):
67 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
68 elif not isinstance(origin, str) and origin is not None:
69 raise ValueError(f"Parameter origin[]='{type(origin)}' is not 'str'")
70 elif software is None:
71 logger.debug(f"Updating last_instance_fetch for domain='{domain}' ...")
72 instances.set_last_instance_fetch(domain)
74 logger.debug(f"software for domain='{domain}' is not set, determining ...")
77 software = determine_software(domain, path)
78 except network.exceptions as exception:
79 logger.debug(f"Exception '{type(exception)}' during determining software type")
82 logger.debug(f"Determined software='{software}' for domain='{domain}'")
83 elif not isinstance(software, str):
84 raise ValueError(f"Parameter software[]='{type(software)}' is not 'str'")
85 elif not isinstance(command, str):
86 raise ValueError(f"Parameter command[]='{type(command)}' is not 'str'")
88 raise ValueError("Parameter 'command' is empty")
89 elif not validators.domain(domain.split("/")[0]):
90 raise ValueError(f"domain='{domain}' is not a valid domain")
91 elif domain.endswith(".arpa"):
92 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
93 elif domain.endswith(".tld"):
94 raise ValueError(f"domain='{domain}' is a fake domain")
96 if not instances.is_registered(domain):
97 logger.debug(f"Adding new domain='{domain}',origin='{origin}',command='{command}',path='{path}',software='{software}'")
98 instances.add(domain, origin, command, path, software)
100 logger.debug(f"Updating last_instance_fetch for domain='{domain}' ...")
101 instances.set_last_instance_fetch(domain)
103 logger.debug("Fetching instances for domain:", domain, software)
104 peerlist = fetch_peers(domain, software)
107 logger.warning("Cannot fetch peers:", domain)
109 elif instances.has_pending(domain):
110 logger.debug(f"domain='{domain}' has pending nodeinfo data, flushing ...")
111 instances.update_data(domain)
113 logger.info("Checking %d instances from domain='%s' ...", len(peerlist), domain)
114 for instance in peerlist:
115 logger.debug(f"instance='{instance}'")
117 # Skip "None" types as tidup.domain() cannot parse them
120 logger.debug(f"instance='{instance}' - BEFORE")
121 instance = tidyup.domain(instance)
122 logger.debug(f"instance='{instance}' - AFTER")
125 logger.warning(f"Empty instance after tidyup.domain(), domain='{domain}'")
127 elif not utils.is_domain_wanted(instance):
128 logger.debug("instance='%s' is not wanted - SKIPPED!", instance)
130 elif instance.find("/profile/") > 0 or instance.find("/users/") > 0:
131 logger.debug("instance='%s' is a link to a single user profile - SKIPPED!", instance)
133 elif not instances.is_registered(instance):
134 logger.debug("Adding new instance:", instance, domain)
135 instances.add(instance, domain, command)
137 logger.debug("EXIT!")
139 def fetch_peers(domain: str, software: str) -> list:
140 logger.debug(f"domain({len(domain)})='{domain}',software='{software}' - CALLED!")
141 if not isinstance(domain, str):
142 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
144 raise ValueError("Parameter 'domain' is empty")
145 elif domain.lower() != domain:
146 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
147 elif not validators.domain(domain.split("/")[0]):
148 raise ValueError(f"domain='{domain}' is not a valid domain")
149 elif domain.endswith(".arpa"):
150 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
151 elif domain.endswith(".tld"):
152 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
153 elif not isinstance(software, str) and software is not None:
154 raise ValueError(f"software[]='{type(software)}' is not 'str'")
156 if software == "misskey":
157 logger.debug(f"Invoking misskey.fetch_peers({domain}) ...")
158 return misskey.fetch_peers(domain)
159 elif software == "lemmy":
160 logger.debug(f"Invoking lemmy.fetch_peers({domain}) ...")
161 return lemmy.fetch_peers(domain)
162 elif software == "peertube":
163 logger.debug(f"Invoking peertube.fetch_peers({domain}) ...")
164 return peertube.fetch_peers(domain)
166 # Init peers variable
169 # No CSRF by default, you don't have to add network.api_headers by yourself here
173 logger.debug("Checking CSRF for domain='%s'", domain)
174 headers = csrf.determine(domain, dict())
175 except network.exceptions as exception:
176 logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_peers,{__name__}) - EXIT!")
177 instances.set_last_error(domain, exception)
180 logger.debug(f"Fetching peers from '{domain}',software='{software}' ...")
181 data = network.get_json_api(
183 "/api/v1/instance/peers",
185 (config.get("connection_timeout"), config.get("read_timeout"))
188 logger.debug("data[]='%s'", type(data))
189 if "error_message" in data:
190 logger.debug("Was not able to fetch peers, trying alternative ...")
191 data = network.get_json_api(
195 (config.get("connection_timeout"), config.get("read_timeout"))
198 logger.debug("data[]='%s'", type(data))
199 if "error_message" in data:
200 logger.warning(f"Could not reach any JSON API at domain='{domain}',status_code='{data['status_code']}',error_message='{data['error_message']}'")
201 elif "federated_instances" in data["json"]:
202 logger.debug(f"Found federated_instances for domain='{domain}'")
203 peers = peers + add_peers(data["json"]["federated_instances"])
204 logger.debug("Added instance(s) to peers")
206 message = "JSON response does not contain 'federated_instances' or 'error_message'"
207 logger.warning("message='%s',domain='%s'", message, domain)
208 instances.set_last_error(domain, message)
209 elif isinstance(data["json"], list):
210 logger.debug("Querying API was successful: domain='%s',data[json]()=%d", domain, len(data['json']))
213 logger.warning("Cannot parse data[json][]='%s'", type(data['json']))
215 logger.debug(f"Adding '{len(peers)}' for domain='{domain}'")
216 instances.set_total_peers(domain, peers)
218 logger.debug("Returning peers[]:", type(peers))
221 def fetch_nodeinfo(domain: str, path: str = None) -> dict:
222 logger.debug(f"domain='{domain}',path='{path}' - CALLED!")
223 if not isinstance(domain, str):
224 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
226 raise ValueError("Parameter 'domain' is empty")
227 elif domain.lower() != domain:
228 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
229 elif not validators.domain(domain.split("/")[0]):
230 raise ValueError(f"domain='{domain}' is not a valid domain")
231 elif domain.endswith(".arpa"):
232 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
233 elif domain.endswith(".tld"):
234 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
235 elif not isinstance(path, str) and path is not None:
236 raise ValueError(f"Parameter path[]='{type(path)}' is not 'str'")
238 logger.debug(f"Fetching nodeinfo from domain='{domain}' ...")
239 nodeinfo = fetch_wellknown_nodeinfo(domain)
241 logger.debug(f"nodeinfo[{type(nodeinfo)}]({len(nodeinfo)}='{nodeinfo}'")
242 if "error_message" not in nodeinfo and "json" in nodeinfo and len(nodeinfo["json"]) > 0:
243 logger.debug(f"Found nodeinfo[json]()={len(nodeinfo['json'])} - EXIT!")
244 return nodeinfo["json"]
246 # No CSRF by default, you don't have to add network.api_headers by yourself here
251 logger.debug("Checking CSRF for domain='%s'", domain)
252 headers = csrf.determine(domain, dict())
253 except network.exceptions as exception:
254 logger.warning(f"Exception '{type(exception)}' during checking CSRF (nodeinfo,{__name__}) - EXIT!")
255 instances.set_last_error(domain, exception)
258 "error_message": f"exception[{type(exception)}]='{str(exception)}'",
259 "exception" : exception,
263 "/nodeinfo/2.1.json",
265 "/nodeinfo/2.0.json",
271 for request in request_paths:
272 logger.debug(f"path[{type(path)}]='{path}',request='{request}'")
273 if path is None or path == request or path == f"http://{domain}{path}" or path == f"https://{domain}{path}":
274 logger.debug(f"Fetching request='{request}' from domain='{domain}' ...")
275 if path == f"http://{domain}{path}" or path == f"https://{domain}{path}":
276 logger.debug(f"domain='{domain}',path='{path}' has protocol in path, splitting ...")
277 components = urlparse(path)
278 path = components.path
280 data = network.get_json_api(
284 (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
287 logger.debug("data[]='%s'", type(data))
288 if "error_message" not in data:
289 logger.debug("Success:", request)
290 instances.set_detection_mode(domain, "STATIC_CHECK")
291 instances.set_nodeinfo_url(domain, request)
294 logger.warning(f"Failed fetching nodeinfo from domain='{domain}',status_code='{data['status_code']}',error_message='{data['error_message']}'")
296 logger.debug("data()=%d - EXIT!", len(data))
299 def fetch_wellknown_nodeinfo(domain: str) -> dict:
300 logger.debug("domain(%d)='%s' - CALLED!", len(domain), domain)
301 if not isinstance(domain, str):
302 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
304 raise ValueError("Parameter 'domain' is empty")
305 elif domain.lower() != domain:
306 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
307 elif not validators.domain(domain.split("/")[0]):
308 raise ValueError(f"domain='{domain}' is not a valid domain")
309 elif domain.endswith(".arpa"):
310 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
311 elif domain.endswith(".tld"):
312 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
314 # No CSRF by default, you don't have to add network.api_headers by yourself here
318 logger.debug("Checking CSRF for domain='%s'", domain)
319 headers = csrf.determine(domain, dict())
320 except network.exceptions as exception:
321 logger.warning(f"Exception '{type(exception)}' during checking CSRF (fetch_wellknown_nodeinfo,{__name__}) - EXIT!")
322 instances.set_last_error(domain, exception)
325 "error_message": type(exception),
326 "exception" : exception,
329 logger.debug("Fetching .well-known info for domain:", domain)
330 data = network.get_json_api(
332 "/.well-known/nodeinfo",
334 (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
337 if "error_message" not in data:
338 nodeinfo = data["json"]
339 logger.debug("Found entries:", len(nodeinfo), domain)
340 if "links" in nodeinfo:
341 logger.debug("Found links in nodeinfo():", len(nodeinfo["links"]))
342 for link in nodeinfo["links"]:
343 logger.debug(f"link[{type(link)}]='{link}'")
344 if not isinstance(link, dict) or not "rel" in link:
345 logger.warning(f"link[]='{type(link)}' is not 'dict' or no element 'rel' found")
346 elif link["rel"] in nodeinfo_identifier:
347 # Default is that 'href' has a complete URL, but some hosts don't send that
349 components = urlparse(link["href"])
351 logger.debug(f"components[{type(components)}]='{components}'")
352 if components.scheme == "" and components.netloc == "":
353 logger.debug(f"link[href]='{link['href']}' has no scheme and host name in it, prepending from domain='{domain}'")
354 url = f"https://{domain}{url}"
355 components = urlparse(url)
357 if not utils.is_domain_wanted(components.netloc):
358 logger.debug("components.netloc='%s' is not wanted - SKIPPED!", components.netloc)
361 logger.debug("Fetching nodeinfo from:", url)
362 data = network.fetch_api_url(
364 (config.get("connection_timeout"), config.get("read_timeout"))
367 logger.debug("href,data[]:", link["href"], type(data))
368 if "error_message" not in data and "json" in data:
369 logger.debug("Found JSON nodeinfo():", len(data))
370 instances.set_detection_mode(domain, "AUTO_DISCOVERY")
371 instances.set_nodeinfo_url(domain, link["href"])
374 instances.set_last_error(domain, data)
376 logger.warning("Unknown 'rel' value:", domain, link["rel"])
378 logger.warning("nodeinfo does not contain 'links':", domain)
380 logger.debug("Returning data[]:", type(data))
383 def fetch_generator_from_path(domain: str, path: str = "/") -> str:
384 logger.debug(f"domain({len(domain)})='{domain}',path='{path}' - CALLED!")
385 if not isinstance(domain, str):
386 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
388 raise ValueError("Parameter 'domain' is empty")
389 elif domain.lower() != domain:
390 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
391 elif not validators.domain(domain.split("/")[0]):
392 raise ValueError(f"domain='{domain}' is not a valid domain")
393 elif domain.endswith(".arpa"):
394 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
395 elif domain.endswith(".tld"):
396 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
397 elif not isinstance(path, str):
398 raise ValueError(f"path[]='{type(path)}' is not 'str'")
400 raise ValueError("Parameter 'path' is empty")
402 logger.debug(f"domain='{domain}',path='{path}' - CALLED!")
405 logger.debug(f"Fetching path='{path}' from '{domain}' ...")
406 response = network.fetch_response(domain, path, network.web_headers, (config.get("connection_timeout"), config.get("read_timeout")))
408 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
409 if response.ok and response.status_code < 300 and response.text.find("<html") > 0:
410 logger.debug(f"Parsing response.text()={len(response.text)} Bytes ...")
412 doc = bs4.BeautifulSoup(response.text, "html.parser")
413 logger.debug("doc[]='%s'", type(doc))
415 generator = doc.find("meta", {"name" : "generator"})
416 site_name = doc.find("meta", {"property": "og:site_name"})
418 logger.debug("generator[]='%s',site_name[]='%s'", type(generator), type(site_name))
419 if isinstance(generator, bs4.element.Tag) and isinstance(generator.get("content"), str):
420 logger.debug("Found generator meta tag:", domain)
421 software = tidyup.domain(generator.get("content"))
423 logger.debug("software[%s]='%s'", type(software), software)
424 if software is not None and software != "":
425 logger.info("domain='%s' is generated by '%s'", domain, software)
426 instances.set_detection_mode(domain, "GENERATOR")
427 elif isinstance(site_name, bs4.element.Tag) and isinstance(site_name.get("content"), str):
428 logger.debug("Found property=og:site_name:", domain)
429 software = tidyup.domain(site_name.get("content"))
431 logger.debug("software[%s]='%s'", type(software), software)
432 if software is not None and software != "":
433 logger.info("domain='%s' has og:site_name='%s'", domain, software)
434 instances.set_detection_mode(domain, "SITE_NAME")
436 logger.debug("software[]='%s'", type(software))
437 if isinstance(software, str) and software == "":
438 logger.debug("Corrected empty string to None for software of domain='%s'", domain)
440 elif isinstance(software, str) and ("." in software or " " in software):
441 logger.debug(f"software='{software}' may contain a version number, domain='{domain}', removing it ...")
442 software = version.remove(software)
444 logger.debug("software[]='%s'", type(software))
445 if isinstance(software, str) and "powered by " in software:
446 logger.debug(f"software='{software}' has 'powered by' in it")
447 software = version.remove(version.strip_powered_by(software))
448 elif isinstance(software, str) and " hosted on " in software:
449 logger.debug(f"software='{software}' has 'hosted on' in it")
450 software = version.remove(version.strip_hosted_on(software))
451 elif isinstance(software, str) and " by " in software:
452 logger.debug(f"software='{software}' has ' by ' in it")
453 software = version.strip_until(software, " by ")
454 elif isinstance(software, str) and " see " in software:
455 logger.debug(f"software='{software}' has ' see ' in it")
456 software = version.strip_until(software, " see ")
458 logger.debug(f"software='{software}' - EXIT!")
461 def determine_software(domain: str, path: str = None) -> str:
462 logger.debug(f"domain({len(domain)})='{domain}',path='{path}' - CALLED!")
463 if not isinstance(domain, str):
464 raise ValueError(f"Parameter domain[]='{type(domain)}' is not 'str'")
466 raise ValueError("Parameter 'domain' is empty")
467 elif domain.lower() != domain:
468 raise ValueError(f"Parameter domain='{domain}' must be all lower-case")
469 elif not validators.domain(domain.split("/")[0]):
470 raise ValueError(f"domain='{domain}' is not a valid domain")
471 elif domain.endswith(".arpa"):
472 raise ValueError(f"domain='{domain}' is a domain for reversed IP addresses, please don't crawl them!")
473 elif domain.endswith(".tld"):
474 raise ValueError(f"domain='{domain}' is a fake domain, please don't crawl them!")
475 elif not isinstance(path, str) and path is not None:
476 raise ValueError(f"Parameter path[]='{type(path)}' is not 'str'")
478 logger.debug("Determining software for domain,path:", domain, path)
481 logger.debug(f"Fetching nodeinfo from '{domain}' ...")
482 data = fetch_nodeinfo(domain, path)
484 logger.debug(f"data[{type(data)}]='{data}'")
485 if "exception" in data:
486 # Continue raising it
487 raise data["exception"]
488 elif "error_message" in data:
489 logger.debug(f"Returned error_message during fetching nodeinfo: '{data['error_message']}',status_code='{data['status_code']}'")
490 return fetch_generator_from_path(domain)
491 elif "status" in data and data["status"] == "error" and "message" in data:
492 logger.warning("JSON response is an error:", data["message"])
493 instances.set_last_error(domain, data["message"])
494 return fetch_generator_from_path(domain)
495 elif "message" in data:
496 logger.warning("JSON response contains only a message:", data["message"])
497 instances.set_last_error(domain, data["message"])
498 return fetch_generator_from_path(domain)
499 elif "software" not in data or "name" not in data["software"]:
500 logger.debug(f"JSON response from domain='{domain}' does not include [software][name], fetching / ...")
501 software = fetch_generator_from_path(domain)
502 logger.debug(f"Generator for domain='{domain}' is: '{software}'")
503 elif "software" in data and "name" in data["software"]:
504 logger.debug("Found data[software][name] in JSON response")
505 software = data["software"]["name"]
508 logger.debug("Returning None - EXIT!")
511 sofware = tidyup.domain(software)
512 logger.debug("sofware after tidyup.domain():", software)
514 if software in ["akkoma", "rebased", "akkounfucked", "ched"]:
515 logger.debug("Setting pleroma:", domain, software)
517 elif software in ["hometown", "ecko"]:
518 logger.debug("Setting mastodon:", domain, software)
519 software = "mastodon"
520 elif software in ["slipfox calckey", "calckey", "groundpolis", "foundkey", "cherrypick", "meisskey", "magnetar", "keybump"]:
521 logger.debug("Setting misskey:", domain, software)
523 elif software == "runtube.re":
524 logger.debug("Setting peertube:", domain, software)
525 software = "peertube"
526 elif software == "nextcloud social":
527 logger.debug("Setting nextcloud:", domain, software)
528 software = "nextcloud"
529 elif software.find("/") > 0:
530 logger.warning("Spliting of slash:", software)
531 software = tidyup.domain(software.split("/")[-1])
532 elif software.find("|") > 0:
533 logger.warning("Spliting of pipe:", software)
534 software = tidyup.domain(software.split("|")[0])
535 elif "powered by" in software:
536 logger.debug(f"software='{software}' has 'powered by' in it")
537 software = version.strip_powered_by(software)
538 elif isinstance(software, str) and " by " in software:
539 logger.debug(f"software='{software}' has ' by ' in it")
540 software = version.strip_until(software, " by ")
541 elif isinstance(software, str) and " see " in software:
542 logger.debug(f"software='{software}' has ' see ' in it")
543 software = version.strip_until(software, " see ")
545 logger.debug("software[]='%s'", type(software))
547 logger.warning("tidyup.domain() left no software name behind:", domain)
550 logger.debug("software[]='%s'", type(software))
551 if str(software) == "":
552 logger.debug(f"software for '{domain}' was not detected, trying generator ...")
553 software = fetch_generator_from_path(domain)
554 elif len(str(software)) > 0 and ("." in software or " " in software):
555 logger.debug(f"software='{software}' may contain a version number, domain='{domain}', removing it ...")
556 software = version.remove(software)
558 logger.debug("software[]='%s'", type(software))
559 if isinstance(software, str) and "powered by" in software:
560 logger.debug(f"software='{software}' has 'powered by' in it")
561 software = version.remove(version.strip_powered_by(software))
563 logger.debug("Returning domain,software:", domain, software)
566 def find_domains(tag: bs4.element.Tag) -> list:
567 logger.debug(f"tag[]='{type(tag)}' - CALLED!")
568 if not isinstance(tag, bs4.element.Tag):
569 raise ValueError(f"Parameter tag[]='{type(tag)}' is not type of bs4.element.Tag")
570 elif len(tag.select("tr")) == 0:
571 raise KeyError("No table rows found in table!")
574 for element in tag.select("tr"):
575 logger.debug(f"element[]='{type(element)}'")
576 if not element.find("td"):
577 logger.debug("Skipping element, no <td> found")
580 domain = tidyup.domain(element.find("td").text)
581 reason = tidyup.reason(element.findAll("td")[1].text)
583 logger.debug("domain='%s',reason='%s'", domain, reason)
585 if not utils.is_domain_wanted(domain):
586 logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
588 elif domain == "gab.com/.ai, develop.gab.com":
589 logger.debug("Multiple domains detected in one row")
599 "domain": "develop.gab.com",
603 elif not validators.domain(domain.split("/")[0]):
604 logger.warning("domain='%s' is not a valid domain - SKIPPED!", domain)
607 logger.debug(f"Adding domain='{domain}',reason='{reason}' ...")
613 logger.debug(f"domains()={len(domains)} - EXIT!")
616 def add_peers(rows: dict) -> list:
617 logger.debug(f"rows[]={type(rows)} - CALLED!")
618 if not isinstance(rows, dict):
619 raise ValueError(f"Parameter rows[]='{type(rows)}' is not 'dict'")
622 for key in ["linked", "allowed", "blocked"]:
623 logger.debug(f"Checking key='{key}'")
624 if key not in rows or rows[key] is None:
625 logger.debug(f"Cannot find key='{key}' or it is NoneType - SKIPPED!")
628 logger.debug(f"Adding {len(rows[key])} peer(s) to peers list ...")
629 for peer in rows[key]:
630 logger.debug(f"peer='{peer}' - BEFORE!")
631 if isinstance(peer, dict) and "domain" in peer:
632 logger.debug(f"peer[domain]='{peer['domain']}'")
633 peer = tidyup.domain(peer["domain"])
634 elif isinstance(peer, str):
635 logger.debug(f"peer='{peer}'")
636 peer = tidyup.domain(peer)
638 raise ValueError(f"peer[]='{type(peer)}' is not supported,key='{key}'")
640 logger.debug(f"peer='{peer}' - AFTER!")
641 if not utils.is_domain_wanted(peer):
642 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
645 logger.debug(f"Adding peer='{peer}' ...")
648 logger.debug(f"peers()={len(peers)} - EXIT!")