1 # Copyright (C) 2023 Free Software Foundation
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License as published
5 # by the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU Affero General Public License for more details.
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program. If not, see <https://www.gnu.org/licenses/>.
18 from urllib.parse import urlparse
26 from fba.helpers import config
27 from fba.helpers import cookies
28 from fba.helpers import domain as domain_helper
29 from fba.helpers import software as software_helper
30 from fba.helpers import tidyup
31 from fba.helpers import version
33 from fba.http import network
35 from fba.models import instances
37 from fba.networks import lemmy
38 from fba.networks import misskey
39 from fba.networks import peertube
41 logging.basicConfig(level=logging.INFO)
42 logger = logging.getLogger(__name__)
44 def fetch_instances(domain: str, origin: str, software: str, command: str, path: str = None):
45 logger.debug("domain='%s',origin='%s',software='%s',command='%s',path='%s' - CALLED!", domain, origin, software, command, path)
46 domain_helper.raise_on(domain)
48 if not isinstance(origin, str) and origin is not None:
49 raise ValueError(f"Parameter origin[]='{type(origin)}' is not of type 'str'")
50 elif not isinstance(command, str):
51 raise ValueError(f"Parameter command[]='{type(command)}' is not of type 'str'")
53 raise ValueError("Parameter 'command' is empty")
54 elif software is None:
56 logger.debug("Software for domain='%s' is not set, determining ...", domain)
57 software = determine_software(domain, path)
58 except network.exceptions as exception:
59 logger.warning("Exception '%s' during determining software type", type(exception))
60 instances.set_last_error(domain, exception)
62 logger.debug("Determined software='%s' for domain='%s'", software, domain)
63 elif not isinstance(software, str):
64 raise ValueError(f"Parameter software[]='{type(software)}' is not of type 'str'")
66 logger.debug("Checking if domain='%s' is registered ...", domain)
67 if not instances.is_registered(domain):
68 logger.debug("Adding new domain='%s',origin='%s',command='%s',path='%s',software='%s'", domain, origin, command, path, software)
69 instances.add(domain, origin, command, path, software)
71 logger.debug("Updating last_instance_fetch for domain='%s' ...", domain)
72 instances.set_last_instance_fetch(domain)
76 logger.debug("Fetching instances for domain='%s',software='%s',origin='%s'", domain, software, origin)
77 peerlist = fetch_peers(domain, software, origin)
78 except network.exceptions as exception:
79 logger.warning("Cannot fetch peers from domain='%s': '%s'", domain, type(exception))
81 logger.debug("peerlist[]='%s'", type(peerlist))
82 if isinstance(peerlist, list):
83 logger.debug("Invoking instances.set_total_peerlist(%s,%d) ...", domain, len(peerlist))
84 instances.set_total_peers(domain, peerlist)
86 logger.debug("peerlist[]='%s'", type(peerlist))
87 if peerlist is None or len(peerlist) == 0:
88 logger.warning("Cannot fetch peers: domain='%s'", domain)
90 if instances.has_pending(domain):
91 logger.debug("Flushing updates for domain='%s' ...", domain)
92 instances.update_data(domain)
94 logger.debug("Invoking cookies.clear(%s) ...", domain)
100 logger.info("Checking %d instance(s) from domain='%s',software='%s' ...", len(peerlist), domain, software)
101 for instance in peerlist:
102 logger.debug("instance='%s'", instance)
104 # Skip "None" types as tidup.domain() cannot parse them
107 logger.debug("instance='%s' - BEFORE!", instance)
108 instance = tidyup.domain(instance)
109 logger.debug("instance='%s' - AFTER!", instance)
112 logger.warning("Empty instance after tidyup.domain(), domain='%s'", domain)
115 logger.debug("instance='%s' - BEFORE!", instance)
116 instance = instance.encode("idna").decode("utf-8")
117 logger.debug("instance='%s' - AFTER!", instance)
119 if not utils.is_domain_wanted(instance):
120 logger.debug("instance='%s' is not wanted - SKIPPED!", instance)
122 elif instance.find("/profile/") > 0 or instance.find("/users/") > 0 or (instances.is_registered(instance.split("/")[0]) and instance.find("/c/") > 0):
123 logger.debug("instance='%s' is a link to a single user profile - SKIPPED!", instance)
125 elif instance.find("/tag/") > 0:
126 logger.debug("instance='%s' is a link to a tag - SKIPPED!", instance)
128 elif not instances.is_registered(instance):
129 logger.debug("Adding new instance='%s',domain='%s',command='%s'", instance, domain, command)
130 instances.add(instance, domain, command)
132 logger.debug("Invoking cookies.clear(%s) ...", domain)
133 cookies.clear(domain)
135 logger.debug("Checking if domain='%s' has pending updates ...", domain)
136 if instances.has_pending(domain):
137 logger.debug("Flushing updates for domain='%s' ...", domain)
138 instances.update_data(domain)
140 logger.debug("EXIT!")
142 def fetch_peers(domain: str, software: str, origin: str) -> list:
143 logger.debug("domain='%s',software='%s',origin='%s' - CALLED!", domain, software, origin)
144 domain_helper.raise_on(domain)
146 if not isinstance(software, str) and software is not None:
147 raise ValueError(f"software[]='{type(software)}' is not of type 'str'")
149 if software == "misskey":
150 logger.debug("Invoking misskey.fetch_peers(%s) ...", domain)
151 return misskey.fetch_peers(domain)
152 elif software == "lemmy":
153 logger.debug("Invoking lemmy.fetch_peers(%s,%s) ...", domain, origin)
154 return lemmy.fetch_peers(domain, origin)
155 elif software == "peertube":
156 logger.debug("Invoking peertube.fetch_peers(%s) ...", domain)
157 return peertube.fetch_peers(domain)
159 # No CSRF by default, you don't have to add network.api_headers by yourself here
163 logger.debug("Checking CSRF for domain='%s'", domain)
164 headers = csrf.determine(domain, dict())
165 except network.exceptions as exception:
166 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
167 instances.set_last_error(domain, exception)
171 "/api/v1/instance/peers",
175 # Init peers variable
178 logger.debug("Checking %d paths ...", len(paths))
180 logger.debug("Fetching path='%s' from domain='%s',software='%s' ...", path, domain, software)
181 data = network.get_json_api(
185 (config.get("connection_timeout"), config.get("read_timeout"))
188 logger.debug("data[]='%s'", type(data))
189 if "error_message" in data:
190 logger.debug("Was not able to fetch peers from path='%s',domain='%s' ...", path, domain)
191 instances.set_last_error(domain, data)
192 elif "json" in data and len(data["json"]) > 0:
193 logger.debug("Querying API path='%s' was successful: domain='%s',data[json][%s]()=%d", path, domain, type(data['json']), len(data['json']))
196 logger.debug("Marking domain='%s' as successfully handled ...", domain)
197 instances.set_success(domain)
200 if not isinstance(peers, list):
201 logger.warning("peers[]='%s' is not of type 'list', maybe bad API response?", type(peers))
204 logger.debug("Invoking instances.set_total_peers(%s,%d) ...", domain, len(peers))
205 instances.set_total_peers(domain, peers)
207 logger.debug("peers()=%d - EXIT!", len(peers))
210 def fetch_nodeinfo(domain: str, path: str = None) -> dict:
211 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
212 domain_helper.raise_on(domain)
214 if not isinstance(path, str) and path is not None:
215 raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'")
217 logger.debug("Fetching nodeinfo from domain='%s' ...", domain)
218 nodeinfo = fetch_wellknown_nodeinfo(domain)
220 logger.debug("nodeinfo[%s](%d='%s'", type(nodeinfo), len(nodeinfo), nodeinfo)
221 if "error_message" not in nodeinfo and "json" in nodeinfo and len(nodeinfo["json"]) > 0:
222 logger.debug("Invoking instances.set_last_nodeinfo(%s) ...", domain)
223 instances.set_last_nodeinfo(domain)
225 logger.debug("Found nodeinfo[json]()=%d - EXIT!", len(nodeinfo['json']))
228 # No CSRF by default, you don't have to add network.api_headers by yourself here
233 logger.debug("Checking CSRF for domain='%s'", domain)
234 headers = csrf.determine(domain, dict())
235 except network.exceptions as exception:
236 logger.warning("Exception '%s' during checking CSRF (nodeinfo,%s) - EXIT!", type(exception), __name__)
237 instances.set_last_error(domain, exception)
238 instances.set_software(domain, None)
239 instances.set_detection_mode(domain, None)
240 instances.set_nodeinfo_url(domain, None)
243 "error_message": f"exception[{type(exception)}]='{str(exception)}'",
244 "exception" : exception,
248 "/nodeinfo/2.1.json",
250 "/nodeinfo/2.0.json",
252 "/nodeinfo/1.0.json",
257 for request in request_paths:
258 logger.debug("request='%s'", request)
259 http_url = f"http://{domain}{path}"
260 https_url = f"https://{domain}{path}"
262 logger.debug("path[%s]='%s',request='%s',http_url='%s',https_url='%s'", type(path), path, request, http_url, https_url)
263 if path is None or path in [request, http_url, https_url]:
264 logger.debug("path='%s',http_url='%s',https_url='%s'", path, http_url, https_url)
265 if path in [http_url, https_url]:
266 logger.debug("domain='%s',path='%s' has protocol in path, splitting ...", domain, path)
267 components = urlparse(path)
268 path = components.path
270 logger.debug("Fetching request='%s' from domain='%s' ...", request, domain)
271 data = network.get_json_api(
275 (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
278 logger.debug("data[]='%s'", type(data))
279 if "error_message" not in data and "json" in data:
280 logger.debug("Success: request='%s' - Setting detection_mode=STATIC_CHECK ...", request)
281 instances.set_last_nodeinfo(domain)
282 instances.set_detection_mode(domain, "STATIC_CHECK")
283 instances.set_nodeinfo_url(domain, request)
286 logger.warning("Failed fetching nodeinfo from domain='%s',status_code='%s',error_message='%s'", domain, data['status_code'], data['error_message'])
288 logger.debug("data()=%d - EXIT!", len(data))
291 def fetch_wellknown_nodeinfo(domain: str) -> dict:
292 logger.debug("domain='%s' - CALLED!", domain)
293 domain_helper.raise_on(domain)
295 # "rel" identifiers (no real URLs)
296 nodeinfo_identifier = [
297 "https://nodeinfo.diaspora.software/ns/schema/2.1",
298 "http://nodeinfo.diaspora.software/ns/schema/2.1",
299 "https://nodeinfo.diaspora.software/ns/schema/2.0",
300 "http://nodeinfo.diaspora.software/ns/schema/2.0",
301 "https://nodeinfo.diaspora.software/ns/schema/1.1",
302 "http://nodeinfo.diaspora.software/ns/schema/1.1",
303 "https://nodeinfo.diaspora.software/ns/schema/1.0",
304 "http://nodeinfo.diaspora.software/ns/schema/1.0",
307 # No CSRF by default, you don't have to add network.api_headers by yourself here
311 logger.debug("Checking CSRF for domain='%s'", domain)
312 headers = csrf.determine(domain, dict())
313 except network.exceptions as exception:
314 logger.warning("Exception '%s' during checking CSRF (fetch_wellknown_nodeinfo,%s) - EXIT!", type(exception), __name__)
315 instances.set_last_error(domain, exception)
318 "error_message": type(exception),
319 "exception" : exception,
322 logger.debug("Fetching .well-known info for domain='%s'", domain)
323 data = network.get_json_api(
325 "/.well-known/nodeinfo",
327 (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
330 logger.debug("data[]='%s'", type(data))
331 if "error_message" not in data:
332 nodeinfo = data["json"]
334 logger.debug("Marking domain='%s' as successfully handled ...", domain)
335 instances.set_success(domain)
337 logger.debug("Found entries: nodeinfo()=%d,domain='%s'", len(nodeinfo), domain)
338 if "links" in nodeinfo:
339 logger.debug("Found nodeinfo[links]()=%d record(s),", len(nodeinfo["links"]))
340 for niid in nodeinfo_identifier:
343 logger.debug("Checking niid='%s' ...", niid)
344 for link in nodeinfo["links"]:
345 logger.debug("link[%s]='%s'", type(link), link)
346 if not isinstance(link, dict) or not "rel" in link:
347 logger.debug("link[]='%s' is not of type 'dict' or no element 'rel' found - SKIPPED!", type(link))
349 elif link["rel"] != niid:
350 logger.debug("link[re]='%s' does not matched niid='%s' - SKIPPED!", link["rel"], niid)
352 elif "href" not in link:
353 logger.warning("link[rel]='%s' has no element 'href' - SKIPPED!", link["rel"])
355 elif link["href"] is None:
356 logger.debug("link[href] is None, link[rel]='%s' - SKIPPED!", link["rel"])
359 # Default is that 'href' has a complete URL, but some hosts don't send that
360 logger.debug("link[rel]='%s' matches niid='%s'", link["rel"], niid)
362 components = urlparse(url)
364 logger.debug("components[%s]='%s'", type(components), components)
365 if components.scheme == "" and components.netloc == "":
366 logger.warning("link[href]='%s' has no scheme and host name in it, prepending from domain='%s'", link['href'], domain)
367 url = f"https://{domain}{url}"
368 components = urlparse(url)
369 elif components.netloc == "":
370 logger.warning("link[href]='%s' has no netloc set, setting domain='%s'", link["href"], domain)
371 url = f"{components.scheme}://{domain}{components.path}"
372 components = urlparse(url)
374 logger.debug("components.netloc[]='%s'", type(components.netloc))
375 if not utils.is_domain_wanted(components.netloc):
376 logger.debug("components.netloc='%s' is not wanted - SKIPPED!", components.netloc)
379 logger.debug("Fetching nodeinfo from url='%s' ...", url)
380 data = network.fetch_api_url(
382 (config.get("connection_timeout"), config.get("read_timeout"))
385 logger.debug("link[href]='%s',data[]='%s'", link["href"], type(data))
386 if "error_message" not in data and "json" in data:
387 logger.debug("Found JSON data()=%d,link[href]='%s' - Setting detection_mode=AUTO_DISCOVERY ...", len(data), link["href"])
388 instances.set_detection_mode(domain, "AUTO_DISCOVERY")
389 instances.set_nodeinfo_url(domain, link["href"])
391 logger.debug("Marking domain='%s' as successfully handled ...", domain)
392 instances.set_success(domain)
395 logger.debug("Setting last error for domain='%s',data[]='%s'", domain, type(data))
396 instances.set_last_error(domain, data)
398 logger.debug("data()=%d", len(data))
399 if "error_message" not in data and "json" in data:
400 logger.debug("Auto-discovery successful: domain='%s'", domain)
403 logger.warning("nodeinfo does not contain 'links': domain='%s'", domain)
405 logger.debug("Returning data[]='%s' - EXIT!", type(data))
408 def fetch_generator_from_path(domain: str, path: str = "/") -> str:
409 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
410 domain_helper.raise_on(domain)
412 if not isinstance(path, str):
413 raise ValueError(f"path[]='{type(path)}' is not of type 'str'")
415 raise ValueError("Parameter 'path' is empty")
417 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
420 logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
421 response = network.fetch_response(
424 (config.get("connection_timeout"), config.get("read_timeout")),
428 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
429 if response.ok and response.status_code < 300 and response.text.find("<html") > 0 and domain_helper.is_in_url(domain, response.url):
430 logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
431 doc = bs4.BeautifulSoup(response.text, "html.parser")
433 logger.debug("doc[]='%s'", type(doc))
434 generator = doc.find("meta", {"name" : "generator"})
435 site_name = doc.find("meta", {"property": "og:site_name"})
436 platform = doc.find("meta", {"property": "og:platform"})
438 logger.debug("generator[]='%s',site_name[]='%s',platform[]='%s'", type(generator), type(site_name), type(platform))
439 if isinstance(generator, bs4.element.Tag) and isinstance(generator.get("content"), str):
440 logger.debug("Found generator meta tag: domain='%s'", domain)
441 software = tidyup.domain(generator.get("content"))
443 logger.debug("software[%s]='%s'", type(software), software)
444 if software is not None and software != "":
445 logger.info("domain='%s' is generated by software='%s' - Setting detection_mode=GENERATOR ...", domain, software)
446 instances.set_detection_mode(domain, "GENERATOR")
447 elif isinstance(site_name, bs4.element.Tag) and isinstance(site_name.get("content"), str):
448 logger.debug("Found property=og:site_name, domain='%s'", domain)
449 software = tidyup.domain(site_name.get("content"))
451 logger.debug("software[%s]='%s'", type(software), software)
452 if software is not None and software != "":
453 logger.debug("domain='%s' has og:site_name='%s' - Setting detection_mode=SITE_NAME ...", domain, software)
454 instances.set_detection_mode(domain, "SITE_NAME")
455 elif isinstance(platform, bs4.element.Tag) and isinstance(platform.get("content"), str):
456 logger.debug("Found property=og:platform, domain='%s'", domain)
457 software = tidyup.domain(platform.get("content"))
459 logger.debug("software[%s]='%s'", type(software), software)
460 if software is not None and software != "":
461 logger.debug("domain='%s' has og:platform='%s' - Setting detection_mode=PLATFORM ...", domain, software)
462 instances.set_detection_mode(domain, "PLATFORM")
463 elif not domain_helper.is_in_url(domain, response.url):
464 logger.warning("domain='%s' doesn't match response.url='%s', maybe redirect to other domain?", domain, response.url)
466 components = urlparse(response.url)
468 logger.debug("components[]='%s'", type(components))
469 if not instances.is_registered(components.netloc):
470 logger.info("components.netloc='%s' is not registered, adding ...", components.netloc)
471 fetch_instances(components.netloc, domain, None, "fetch_generator")
473 message = f"Redirect from domain='{domain}' to response.url='{response.url}'"
474 instances.set_last_error(domain, message)
475 instances.set_software(domain, None)
476 instances.set_detection_mode(domain, None)
477 instances.set_nodeinfo_url(domain, None)
479 raise requests.exceptions.TooManyRedirects(message)
481 logger.debug("software[]='%s'", type(software))
482 if isinstance(software, str) and software == "":
483 logger.debug("Corrected empty string to None for software of domain='%s'", domain)
485 elif isinstance(software, str) and ("." in software or " " in software):
486 logger.debug("software='%s' may contain a version number, domain='%s', removing it ...", software, domain)
487 software = version.remove(software)
489 logger.debug("software[]='%s'", type(software))
490 if isinstance(software, str) and "powered by " in software:
491 logger.debug("software='%s' has 'powered by' in it", software)
492 software = version.remove(version.strip_powered_by(software))
493 elif isinstance(software, str) and " hosted on " in software:
494 logger.debug("software='%s' has 'hosted on' in it", software)
495 software = version.remove(version.strip_hosted_on(software))
496 elif isinstance(software, str) and " by " in software:
497 logger.debug("software='%s' has ' by ' in it", software)
498 software = version.strip_until(software, " by ")
499 elif isinstance(software, str) and " see " in software:
500 logger.debug("software='%s' has ' see ' in it", software)
501 software = version.strip_until(software, " see ")
503 logger.debug("software='%s' - EXIT!", software)
506 def determine_software(domain: str, path: str = None) -> str:
507 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
508 domain_helper.raise_on(domain)
510 if not isinstance(path, str) and path is not None:
511 raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'")
513 logger.debug("Determining software for domain='%s',path='%s'", domain, path)
516 logger.debug("Fetching nodeinfo from domain='%s' ...", domain)
517 data = fetch_nodeinfo(domain, path)
519 logger.debug("data[%s]='%s'", type(data), data)
520 if "exception" in data:
521 # Continue raising it
522 logger.debug("data()=%d contains exception='%s' - raising ...", len(data), type(data["exception"]))
523 raise data["exception"]
524 elif "error_message" in data:
525 logger.debug("Returned error_message during fetching nodeinfo: '%s',status_code=%d", data['error_message'], data['status_code'])
526 software = fetch_generator_from_path(domain)
527 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
529 logger.debug("domain='%s',path='%s',data[json] found ...", domain, path)
532 logger.debug("JSON response from domain='%s' does not include [software][name], fetching / ...", domain)
533 software = fetch_generator_from_path(domain)
534 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
536 if "status" in data and data["status"] == "error" and "message" in data:
537 logger.warning("JSON response is an error: '%s' - Resetting detection_mode,nodeinfo_url ...", data["message"])
538 instances.set_last_error(domain, data["message"])
539 instances.set_detection_mode(domain, None)
540 instances.set_nodeinfo_url(domain, None)
541 software = fetch_generator_from_path(domain)
542 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
543 elif "software" in data and "name" in data["software"]:
544 logger.debug("Found data[json][software][name] in JSON response")
545 software = data["software"]["name"]
546 logger.debug("software[%s]='%s' - FOUND!", type(software), software)
547 elif "message" in data:
548 logger.warning("JSON response contains only a message: '%s' - Resetting detection_mode,nodeinfo_url ...", data["message"])
549 instances.set_last_error(domain, data["message"])
550 instances.set_detection_mode(domain, None)
551 instances.set_nodeinfo_url(domain, None)
553 logger.debug("Invoking fetch_generator_from_path(%s) ...", domain)
554 software = fetch_generator_from_path(domain)
555 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
556 elif "software" not in data or "name" not in data["software"]:
557 logger.debug("JSON response from domain='%s' does not include [software][name] - Resetting detection_mode,nodeinfo_url ...", domain)
558 instances.set_detection_mode(domain, None)
559 instances.set_nodeinfo_url(domain, None)
561 logger.debug("Invoking fetch_generator_from_path(%s) ...", domain)
562 software = fetch_generator_from_path(domain)
563 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
565 logger.debug("software[%s]='%s'", type(software), software)
567 logger.debug("Returning None - EXIT!")
570 logger.debug("software='%s'- BEFORE!", software)
571 software = software_helper.alias(software)
572 logger.debug("software['%s']='%s' - AFTER!", type(software), software)
574 if str(software) == "":
575 logger.debug("software for domain='%s' was not detected, trying generator ...", domain)
576 software = fetch_generator_from_path(domain)
577 elif len(str(software)) > 0 and ("." in software or " " in software):
578 logger.debug("software='%s' may contain a version number, domain='%s', removing it ...", software, domain)
579 software = version.remove(software)
581 logger.debug("software[]='%s'", type(software))
582 if isinstance(software, str) and "powered by" in software:
583 logger.debug("software='%s' has 'powered by' in it", software)
584 software = version.remove(version.strip_powered_by(software))
586 logger.debug("software='%s' - EXIT!", software)
589 def find_domains(tag: bs4.element.Tag) -> list:
590 logger.debug("tag[]='%s' - CALLED!", type(tag))
591 if not isinstance(tag, bs4.element.Tag):
592 raise ValueError(f"Parameter tag[]='{type(tag)}' is not type of bs4.element.Tag")
593 elif len(tag.select("tr")) == 0:
594 raise KeyError("No table rows found in table!")
597 for element in tag.select("tr"):
598 logger.debug("element[]='%s'", type(element))
599 if not element.find("td"):
600 logger.debug("Skipping element, no <td> found")
603 domain = tidyup.domain(element.find("td").text)
604 reason = tidyup.reason(element.findAll("td")[1].text)
606 logger.debug("domain='%s',reason='%s'", domain, reason)
608 if not utils.is_domain_wanted(domain):
609 logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
611 elif domain == "gab.com/.ai, develop.gab.com":
612 logger.debug("Multiple domains detected in one row")
622 "domain": "develop.gab.com",
626 elif not validators.domain(domain.split("/")[0]):
627 logger.warning("domain='%s' is not a valid domain - SKIPPED!", domain)
630 logger.debug("Adding domain='%s',reason='%s' ...", domain, reason)
636 logger.debug("domains()=%d - EXIT!", len(domains))
639 def add_peers(rows: dict) -> list:
640 logger.debug("rows[]='%s' - CALLED!", type(rows))
641 if not isinstance(rows, dict):
642 raise ValueError(f"Parameter rows[]='{type(rows)}' is not of type 'dict'")
645 for key in ["linked", "allowed", "blocked"]:
646 logger.debug("Checking key='%s'", key)
647 if key not in rows or rows[key] is None:
648 logger.debug("Cannot find key='%s' or it is NoneType - SKIPPED!", key)
651 logger.debug("Adding %d peer(s) to peers list ...", len(rows[key]))
652 for peer in rows[key]:
653 logger.debug("peer[%s]='%s' - BEFORE!", type(peer), peer)
654 if peer is None or peer == "":
655 logger.debug("peer is empty - SKIPPED")
657 elif isinstance(peer, dict) and "domain" in peer:
658 logger.debug("peer[domain]='%s'", peer["domain"])
659 peer = tidyup.domain(peer["domain"])
660 elif isinstance(peer, str):
661 logger.debug("peer='%s'", peer)
662 peer = tidyup.domain(peer)
664 raise ValueError(f"peer[]='{type(peer)}' is not supported,key='{key}'")
666 logger.debug("peer[%s]='%s' - AFTER!", type(peer), peer)
667 if not utils.is_domain_wanted(peer):
668 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
671 logger.debug("Appending peer='%s' ...", peer)
674 logger.debug("peers()=%d - EXIT!", len(peers))