1 # Copyright (C) 2023 Free Software Foundation
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License as published
5 # by the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU Affero General Public License for more details.
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program. If not, see <https://www.gnu.org/licenses/>.
18 from urllib.parse import urlparse
27 from fba.helpers import config
28 from fba.helpers import cookies
29 from fba.helpers import domain as domain_helper
30 from fba.helpers import software as software_helper
31 from fba.helpers import tidyup
32 from fba.helpers import version
34 from fba.http import network
36 from fba.models import instances
38 from fba.networks import lemmy
39 from fba.networks import misskey
40 from fba.networks import peertube
42 logging.basicConfig(level=logging.INFO)
43 logger = logging.getLogger(__name__)
45 def fetch_instances(domain: str, origin: str, software: str, command: str, path: str = None):
46 logger.debug("domain='%s',origin='%s',software='%s',command='%s',path='%s' - CALLED!", domain, origin, software, command, path)
47 domain_helper.raise_on(domain)
49 if not isinstance(origin, str) and origin is not None:
50 raise ValueError(f"Parameter origin[]='{type(origin)}' is not of type 'str'")
51 elif not isinstance(command, str):
52 raise ValueError(f"Parameter command[]='{type(command)}' is not of type 'str'")
54 raise ValueError("Parameter 'command' is empty")
55 elif software is None:
57 logger.debug("Software for domain='%s' is not set, determining ...", domain)
58 software = determine_software(domain, path)
59 except network.exceptions as exception:
60 logger.warning("Exception '%s' during determining software type", type(exception))
61 instances.set_last_error(domain, exception)
63 logger.debug("Determined software='%s' for domain='%s'", software, domain)
64 elif not isinstance(software, str):
65 raise ValueError(f"Parameter software[]='{type(software)}' is not of type 'str'")
67 logger.debug("Checking if domain='%s' is registered ...", domain)
68 if not instances.is_registered(domain):
69 logger.debug("Adding new domain='%s',origin='%s',command='%s',path='%s',software='%s'", domain, origin, command, path, software)
70 instances.add(domain, origin, command, path, software)
72 logger.debug("Updating last_instance_fetch for domain='%s' ...", domain)
73 instances.set_last_instance_fetch(domain)
77 logger.debug("Fetching instances for domain='%s',software='%s',origin='%s'", domain, software, origin)
78 peerlist = fetch_peers(domain, software, origin)
79 except network.exceptions as exception:
80 logger.warning("Cannot fetch peers from domain='%s': '%s'", domain, type(exception))
82 logger.debug("peerlist[]='%s'", type(peerlist))
83 if isinstance(peerlist, list):
84 logger.debug("Invoking instances.set_total_peerlist(%s,%d) ...", domain, len(peerlist))
85 instances.set_total_peers(domain, peerlist)
87 logger.debug("peerlist[]='%s'", type(peerlist))
88 if peerlist is None or len(peerlist) == 0:
89 logger.warning("Cannot fetch peers: domain='%s'", domain)
91 if instances.has_pending(domain):
92 logger.debug("Flushing updates for domain='%s' ...", domain)
93 instances.update_data(domain)
95 logger.debug("Invoking cookies.clear(%s) ...", domain)
101 logger.info("Checking %d instance(s) from domain='%s',software='%s' ...", len(peerlist), domain, software)
102 for instance in peerlist:
103 logger.debug("instance='%s'", instance)
105 # Skip "None" types as tidup.domain() cannot parse them
108 logger.debug("instance='%s' - BEFORE!", instance)
109 instance = tidyup.domain(instance)
110 logger.debug("instance='%s' - AFTER!", instance)
113 logger.warning("Empty instance after tidyup.domain(), domain='%s'", domain)
116 logger.debug("instance='%s' - BEFORE!", instance)
117 instance = instance.encode("idna").decode("utf-8")
118 logger.debug("instance='%s' - AFTER!", instance)
120 if not utils.is_domain_wanted(instance):
121 logger.debug("instance='%s' is not wanted - SKIPPED!", instance)
123 elif instance.find("/profile/") > 0 or instance.find("/users/") > 0 or (instances.is_registered(instance.split("/")[0]) and instance.find("/c/") > 0):
124 logger.debug("instance='%s' is a link to a single user profile - SKIPPED!", instance)
126 elif instance.find("/tag/") > 0:
127 logger.debug("instance='%s' is a link to a tag - SKIPPED!", instance)
129 elif not instances.is_registered(instance):
130 logger.debug("Adding new instance='%s',domain='%s',command='%s'", instance, domain, command)
131 instances.add(instance, domain, command)
133 logger.debug("Invoking cookies.clear(%s) ...", domain)
134 cookies.clear(domain)
136 logger.debug("Checking if domain='%s' has pending updates ...", domain)
137 if instances.has_pending(domain):
138 logger.debug("Flushing updates for domain='%s' ...", domain)
139 instances.update_data(domain)
141 logger.debug("EXIT!")
143 def fetch_peers(domain: str, software: str, origin: str) -> list:
144 logger.debug("domain='%s',software='%s',origin='%s' - CALLED!", domain, software, origin)
145 domain_helper.raise_on(domain)
147 if not isinstance(software, str) and software is not None:
148 raise ValueError(f"software[]='{type(software)}' is not of type 'str'")
150 if software == "misskey":
151 logger.debug("Invoking misskey.fetch_peers(%s) ...", domain)
152 return misskey.fetch_peers(domain)
153 elif software == "lemmy":
154 logger.debug("Invoking lemmy.fetch_peers(%s,%s) ...", domain, origin)
155 return lemmy.fetch_peers(domain, origin)
156 elif software == "peertube":
157 logger.debug("Invoking peertube.fetch_peers(%s) ...", domain)
158 return peertube.fetch_peers(domain)
160 # No CSRF by default, you don't have to add network.api_headers by yourself here
164 logger.debug("Checking CSRF for domain='%s'", domain)
165 headers = csrf.determine(domain, dict())
166 except network.exceptions as exception:
167 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
168 instances.set_last_error(domain, exception)
172 "/api/v1/instance/peers",
176 # Init peers variable
179 logger.debug("Checking %d paths ...", len(paths))
181 logger.debug("Fetching path='%s' from domain='%s',software='%s' ...", path, domain, software)
182 data = network.get_json_api(
186 (config.get("connection_timeout"), config.get("read_timeout"))
189 logger.debug("data[]='%s'", type(data))
190 if "error_message" in data:
191 logger.debug("Was not able to fetch peers from path='%s',domain='%s' ...", path, domain)
192 instances.set_last_error(domain, data)
193 elif "json" in data and len(data["json"]) > 0:
194 logger.debug("Querying API path='%s' was successful: domain='%s',data[json][%s]()=%d", path, domain, type(data['json']), len(data['json']))
197 logger.debug("Marking domain='%s' as successfully handled ...", domain)
198 instances.set_success(domain)
201 if not isinstance(peers, list):
202 logger.warning("peers[]='%s' is not of type 'list', maybe bad API response?", type(peers))
205 logger.debug("Invoking instances.set_total_peers(%s,%d) ...", domain, len(peers))
206 instances.set_total_peers(domain, peers)
208 logger.debug("peers()=%d - EXIT!", len(peers))
211 def fetch_nodeinfo(domain: str, path: str = None) -> dict:
212 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
213 domain_helper.raise_on(domain)
215 if not isinstance(path, str) and path is not None:
216 raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'")
218 logger.debug("Fetching nodeinfo from domain='%s' ...", domain)
219 nodeinfo = fetch_wellknown_nodeinfo(domain)
221 logger.debug("nodeinfo[%s](%d='%s'", type(nodeinfo), len(nodeinfo), nodeinfo)
222 if "error_message" not in nodeinfo and "json" in nodeinfo and len(nodeinfo["json"]) > 0:
223 logger.debug("Invoking instances.set_last_nodeinfo(%s) ...", domain)
224 instances.set_last_nodeinfo(domain)
226 logger.debug("Found nodeinfo[json]()=%d - EXIT!", len(nodeinfo['json']))
229 # No CSRF by default, you don't have to add network.api_headers by yourself here
234 logger.debug("Checking CSRF for domain='%s'", domain)
235 headers = csrf.determine(domain, dict())
236 except network.exceptions as exception:
237 logger.warning("Exception '%s' during checking CSRF (nodeinfo,%s) - EXIT!", type(exception), __name__)
238 instances.set_last_error(domain, exception)
239 instances.set_software(domain, None)
240 instances.set_detection_mode(domain, None)
241 instances.set_nodeinfo_url(domain, None)
244 "error_message": f"exception[{type(exception)}]='{str(exception)}'",
245 "exception" : exception,
249 "/nodeinfo/2.1.json",
251 "/nodeinfo/2.0.json",
253 "/nodeinfo/1.0.json",
258 for request in request_paths:
259 logger.debug("request='%s'", request)
260 http_url = f"http://{domain}{path}"
261 https_url = f"https://{domain}{path}"
263 logger.debug("path[%s]='%s',request='%s',http_url='%s',https_url='%s'", type(path), path, request, http_url, https_url)
264 if path is None or path in [request, http_url, https_url]:
265 logger.debug("path='%s',http_url='%s',https_url='%s'", path, http_url, https_url)
266 if path in [http_url, https_url]:
267 logger.debug("domain='%s',path='%s' has protocol in path, splitting ...", domain, path)
268 components = urlparse(path)
269 path = components.path
271 logger.debug("Fetching request='%s' from domain='%s' ...", request, domain)
272 data = network.get_json_api(
276 (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
279 logger.debug("data[]='%s'", type(data))
280 if "error_message" not in data and "json" in data:
281 logger.debug("Success: request='%s' - Setting detection_mode=STATIC_CHECK ...", request)
282 instances.set_last_nodeinfo(domain)
283 instances.set_detection_mode(domain, "STATIC_CHECK")
284 instances.set_nodeinfo_url(domain, request)
287 logger.warning("Failed fetching nodeinfo from domain='%s',status_code='%s',error_message='%s'", domain, data['status_code'], data['error_message'])
289 logger.debug("data()=%d - EXIT!", len(data))
292 def fetch_wellknown_nodeinfo(domain: str) -> dict:
293 logger.debug("domain='%s' - CALLED!", domain)
294 domain_helper.raise_on(domain)
296 # "rel" identifiers (no real URLs)
297 nodeinfo_identifier = [
298 "https://nodeinfo.diaspora.software/ns/schema/2.1",
299 "http://nodeinfo.diaspora.software/ns/schema/2.1",
300 "https://nodeinfo.diaspora.software/ns/schema/2.0",
301 "http://nodeinfo.diaspora.software/ns/schema/2.0",
302 "https://nodeinfo.diaspora.software/ns/schema/1.1",
303 "http://nodeinfo.diaspora.software/ns/schema/1.1",
304 "https://nodeinfo.diaspora.software/ns/schema/1.0",
305 "http://nodeinfo.diaspora.software/ns/schema/1.0",
308 # No CSRF by default, you don't have to add network.api_headers by yourself here
312 logger.debug("Checking CSRF for domain='%s'", domain)
313 headers = csrf.determine(domain, dict())
314 except network.exceptions as exception:
315 logger.warning("Exception '%s' during checking CSRF (fetch_wellknown_nodeinfo,%s) - EXIT!", type(exception), __name__)
316 instances.set_last_error(domain, exception)
319 "error_message": type(exception),
320 "exception" : exception,
323 logger.debug("Fetching .well-known info for domain='%s'", domain)
324 data = network.get_json_api(
326 "/.well-known/nodeinfo",
328 (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
331 logger.debug("data[]='%s'", type(data))
332 if "error_message" not in data:
333 nodeinfo = data["json"]
335 logger.debug("Marking domain='%s' as successfully handled ...", domain)
336 instances.set_success(domain)
338 logger.debug("Found entries: nodeinfo()=%d,domain='%s'", len(nodeinfo), domain)
339 if "links" in nodeinfo:
340 logger.debug("Found nodeinfo[links]()=%d record(s),", len(nodeinfo["links"]))
341 for niid in nodeinfo_identifier:
344 logger.debug("Checking niid='%s' ...", niid)
345 for link in nodeinfo["links"]:
346 logger.debug("link[%s]='%s'", type(link), link)
347 if not isinstance(link, dict) or not "rel" in link:
348 logger.debug("link[]='%s' is not of type 'dict' or no element 'rel' found - SKIPPED!", type(link))
350 elif link["rel"] != niid:
351 logger.debug("link[re]='%s' does not matched niid='%s' - SKIPPED!", link["rel"], niid)
353 elif "href" not in link:
354 logger.warning("link[rel]='%s' has no element 'href' - SKIPPED!", link["rel"])
356 elif link["href"] is None:
357 logger.debug("link[href] is None, link[rel]='%s' - SKIPPED!", link["rel"])
360 # Default is that 'href' has a complete URL, but some hosts don't send that
361 logger.debug("link[rel]='%s' matches niid='%s'", link["rel"], niid)
363 components = urlparse(url)
365 logger.debug("components[%s]='%s'", type(components), components)
366 if components.scheme == "" and components.netloc == "":
367 logger.warning("link[href]='%s' has no scheme and host name in it, prepending from domain='%s'", link['href'], domain)
368 url = f"https://{domain}{url}"
369 components = urlparse(url)
370 elif components.netloc == "":
371 logger.warning("link[href]='%s' has no netloc set, setting domain='%s'", link["href"], domain)
372 url = f"{components.scheme}://{domain}{components.path}"
373 components = urlparse(url)
375 logger.debug("components.netloc[]='%s'", type(components.netloc))
376 if not utils.is_domain_wanted(components.netloc):
377 logger.debug("components.netloc='%s' is not wanted - SKIPPED!", components.netloc)
380 logger.debug("Fetching nodeinfo from url='%s' ...", url)
381 data = network.fetch_api_url(
383 (config.get("connection_timeout"), config.get("read_timeout"))
386 logger.debug("link[href]='%s',data[]='%s'", link["href"], type(data))
387 if "error_message" not in data and "json" in data:
388 logger.debug("Found JSON data()=%d,link[href]='%s' - Setting detection_mode=AUTO_DISCOVERY ...", len(data), link["href"])
389 instances.set_detection_mode(domain, "AUTO_DISCOVERY")
390 instances.set_nodeinfo_url(domain, link["href"])
392 logger.debug("Marking domain='%s' as successfully handled ...", domain)
393 instances.set_success(domain)
396 logger.debug("Setting last error for domain='%s',data[]='%s'", domain, type(data))
397 instances.set_last_error(domain, data)
399 logger.debug("data()=%d", len(data))
400 if "error_message" not in data and "json" in data:
401 logger.debug("Auto-discovery successful: domain='%s'", domain)
404 logger.warning("nodeinfo does not contain 'links': domain='%s'", domain)
406 logger.debug("Returning data[]='%s' - EXIT!", type(data))
409 def fetch_generator_from_path(domain: str, path: str = "/") -> str:
410 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
411 domain_helper.raise_on(domain)
413 if not isinstance(path, str):
414 raise ValueError(f"path[]='{type(path)}' is not of type 'str'")
416 raise ValueError("Parameter 'path' is empty")
418 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
421 logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
422 response = network.fetch_response(
425 (config.get("connection_timeout"), config.get("read_timeout")),
429 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
430 if response.ok and response.status_code < 300 and response.text.find("<html") > 0 and domain_helper.is_in_url(domain, response.url):
431 logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
432 doc = bs4.BeautifulSoup(response.text, "html.parser")
434 logger.debug("doc[]='%s'", type(doc))
435 generator = doc.find("meta", {"name" : "generator"})
436 site_name = doc.find("meta", {"property": "og:site_name"})
437 platform = doc.find("meta", {"property": "og:platform"})
439 logger.debug("generator[]='%s',site_name[]='%s',platform[]='%s'", type(generator), type(site_name), type(platform))
440 if isinstance(generator, bs4.element.Tag) and isinstance(generator.get("content"), str):
441 logger.debug("Found generator meta tag: domain='%s'", domain)
442 software = tidyup.domain(generator.get("content"))
444 logger.debug("software[%s]='%s'", type(software), software)
445 if software is not None and software != "":
446 logger.info("domain='%s' is generated by software='%s' - Setting detection_mode=GENERATOR ...", domain, software)
447 instances.set_detection_mode(domain, "GENERATOR")
448 elif isinstance(site_name, bs4.element.Tag) and isinstance(site_name.get("content"), str):
449 logger.debug("Found property=og:site_name, domain='%s'", domain)
450 software = tidyup.domain(site_name.get("content"))
452 logger.debug("software[%s]='%s'", type(software), software)
453 if software is not None and software != "":
454 logger.debug("domain='%s' has og:site_name='%s' - Setting detection_mode=SITE_NAME ...", domain, software)
455 instances.set_detection_mode(domain, "SITE_NAME")
456 elif isinstance(platform, bs4.element.Tag) and isinstance(platform.get("content"), str):
457 logger.debug("Found property=og:platform, domain='%s'", domain)
458 software = tidyup.domain(platform.get("content"))
460 logger.debug("software[%s]='%s'", type(software), software)
461 if software is not None and software != "":
462 logger.debug("domain='%s' has og:platform='%s' - Setting detection_mode=PLATFORM ...", domain, software)
463 instances.set_detection_mode(domain, "PLATFORM")
464 elif not domain_helper.is_in_url(domain, response.url):
465 logger.warning("domain='%s' doesn't match response.url='%s', maybe redirect to other domain?", domain, response.url)
467 components = urlparse(response.url)
469 logger.debug("components[]='%s'", type(components))
470 if not instances.is_registered(components.netloc):
471 logger.info("components.netloc='%s' is not registered, adding ...", components.netloc)
472 fetch_instances(components.netloc, domain, None, "fetch_generator")
474 message = f"Redirect from domain='{domain}' to response.url='{response.url}'"
475 instances.set_last_error(domain, message)
476 instances.set_software(domain, None)
477 instances.set_detection_mode(domain, None)
478 instances.set_nodeinfo_url(domain, None)
480 raise requests.exceptions.TooManyRedirects(message)
482 logger.debug("software[]='%s'", type(software))
483 if isinstance(software, str) and software == "":
484 logger.debug("Corrected empty string to None for software of domain='%s'", domain)
486 elif isinstance(software, str) and ("." in software or " " in software):
487 logger.debug("software='%s' may contain a version number, domain='%s', removing it ...", software, domain)
488 software = version.remove(software)
490 logger.debug("software[]='%s'", type(software))
491 if isinstance(software, str) and "powered by " in software:
492 logger.debug("software='%s' has 'powered by' in it", software)
493 software = version.remove(version.strip_powered_by(software))
494 elif isinstance(software, str) and " hosted on " in software:
495 logger.debug("software='%s' has 'hosted on' in it", software)
496 software = version.remove(version.strip_hosted_on(software))
497 elif isinstance(software, str) and " by " in software:
498 logger.debug("software='%s' has ' by ' in it", software)
499 software = version.strip_until(software, " by ")
500 elif isinstance(software, str) and " see " in software:
501 logger.debug("software='%s' has ' see ' in it", software)
502 software = version.strip_until(software, " see ")
504 logger.debug("software='%s' - EXIT!", software)
507 def determine_software(domain: str, path: str = None) -> str:
508 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
509 domain_helper.raise_on(domain)
511 if not isinstance(path, str) and path is not None:
512 raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'")
514 logger.debug("Determining software for domain='%s',path='%s'", domain, path)
517 logger.debug("Fetching nodeinfo from domain='%s' ...", domain)
518 data = fetch_nodeinfo(domain, path)
520 logger.debug("data[%s]='%s'", type(data), data)
521 if "exception" in data:
522 # Continue raising it
523 logger.debug("data()=%d contains exception='%s' - raising ...", len(data), type(data["exception"]))
524 raise data["exception"]
525 elif "error_message" in data:
526 logger.debug("Returned error_message during fetching nodeinfo: '%s',status_code=%d", data['error_message'], data['status_code'])
527 software = fetch_generator_from_path(domain)
528 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
530 logger.debug("domain='%s',path='%s',data[json] found ...", domain, path)
533 logger.debug("JSON response from domain='%s' does not include [software][name], fetching / ...", domain)
534 software = fetch_generator_from_path(domain)
535 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
537 if "status" in data and data["status"] == "error" and "message" in data:
538 logger.warning("JSON response is an error: '%s' - Resetting detection_mode,nodeinfo_url ...", data["message"])
539 instances.set_last_error(domain, data["message"])
540 instances.set_detection_mode(domain, None)
541 instances.set_nodeinfo_url(domain, None)
542 software = fetch_generator_from_path(domain)
543 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
544 elif "software" in data and "name" in data["software"]:
545 logger.debug("Found data[json][software][name] in JSON response")
546 software = data["software"]["name"]
547 logger.debug("software[%s]='%s' - FOUND!", type(software), software)
548 elif "message" in data:
549 logger.warning("JSON response contains only a message: '%s' - Resetting detection_mode,nodeinfo_url ...", data["message"])
550 instances.set_last_error(domain, data["message"])
551 instances.set_detection_mode(domain, None)
552 instances.set_nodeinfo_url(domain, None)
554 logger.debug("Invoking fetch_generator_from_path(%s) ...", domain)
555 software = fetch_generator_from_path(domain)
556 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
557 elif "software" not in data or "name" not in data["software"]:
558 logger.debug("JSON response from domain='%s' does not include [software][name] - Resetting detection_mode,nodeinfo_url ...", domain)
559 instances.set_detection_mode(domain, None)
560 instances.set_nodeinfo_url(domain, None)
562 logger.debug("Invoking fetch_generator_from_path(%s) ...", domain)
563 software = fetch_generator_from_path(domain)
564 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
566 logger.debug("software[%s]='%s'", type(software), software)
568 logger.debug("Returning None - EXIT!")
571 logger.debug("software='%s'- BEFORE!", software)
572 software = software_helper.alias(software)
573 logger.debug("software['%s']='%s' - AFTER!", type(software), software)
575 if str(software) == "":
576 logger.debug("software for domain='%s' was not detected, trying generator ...", domain)
577 software = fetch_generator_from_path(domain)
578 elif len(str(software)) > 0 and ("." in software or " " in software):
579 logger.debug("software='%s' may contain a version number, domain='%s', removing it ...", software, domain)
580 software = version.remove(software)
582 logger.debug("software[]='%s'", type(software))
583 if isinstance(software, str) and "powered by" in software:
584 logger.debug("software='%s' has 'powered by' in it", software)
585 software = version.remove(version.strip_powered_by(software))
587 logger.debug("software='%s' - EXIT!", software)
590 def find_domains(tag: bs4.element.Tag) -> list:
591 logger.debug("tag[]='%s' - CALLED!", type(tag))
592 if not isinstance(tag, bs4.element.Tag):
593 raise ValueError(f"Parameter tag[]='{type(tag)}' is not type of bs4.element.Tag")
594 elif len(tag.select("tr")) == 0:
595 raise KeyError("No table rows found in table!")
598 for element in tag.select("tr"):
599 logger.debug("element[]='%s'", type(element))
600 if not element.find("td"):
601 logger.debug("Skipping element, no <td> found")
604 domain = tidyup.domain(element.find("td").text)
605 reason = tidyup.reason(element.findAll("td")[1].text)
607 logger.debug("domain='%s',reason='%s'", domain, reason)
609 if not utils.is_domain_wanted(domain):
610 logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
612 elif domain == "gab.com/.ai, develop.gab.com":
613 logger.debug("Multiple domains detected in one row")
623 "domain": "develop.gab.com",
627 elif not validators.domain(domain.split("/")[0]):
628 logger.warning("domain='%s' is not a valid domain - SKIPPED!", domain)
631 logger.debug("Adding domain='%s',reason='%s' ...", domain, reason)
637 logger.debug("domains()=%d - EXIT!", len(domains))
640 def add_peers(rows: dict) -> list:
641 logger.debug("rows[]='%s' - CALLED!", type(rows))
642 if not isinstance(rows, dict):
643 raise ValueError(f"Parameter rows[]='{type(rows)}' is not of type 'dict'")
646 for key in ["linked", "allowed", "blocked"]:
647 logger.debug("Checking key='%s'", key)
648 if key not in rows or rows[key] is None:
649 logger.debug("Cannot find key='%s' or it is NoneType - SKIPPED!", key)
652 logger.debug("Adding %d peer(s) to peers list ...", len(rows[key]))
653 for peer in rows[key]:
654 logger.debug("peer[%s]='%s' - BEFORE!", type(peer), peer)
655 if peer is None or peer == "":
656 logger.debug("peer is empty - SKIPPED")
658 elif isinstance(peer, dict) and "domain" in peer:
659 logger.debug("peer[domain]='%s'", peer["domain"])
660 peer = tidyup.domain(peer["domain"])
661 elif isinstance(peer, str):
662 logger.debug("peer='%s'", peer)
663 peer = tidyup.domain(peer)
665 raise ValueError(f"peer[]='{type(peer)}' is not supported,key='{key}'")
667 logger.debug("peer[%s]='%s' - AFTER!", type(peer), peer)
668 if not utils.is_domain_wanted(peer):
669 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
672 logger.debug("Appending peer='%s' ...", peer)
675 logger.debug("peers()=%d - EXIT!", len(peers))