1 # Copyright (C) 2023 Free Software Foundation
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License as published
5 # by the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU Affero General Public License for more details.
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program. If not, see <https://www.gnu.org/licenses/>.
18 from urllib.parse import urlparse
27 from fba.helpers import config
28 from fba.helpers import cookies
29 from fba.helpers import domain as domain_helper
30 from fba.helpers import software as software_helper
31 from fba.helpers import tidyup
32 from fba.helpers import version
34 from fba.http import network
36 from fba.models import instances
38 from fba.networks import lemmy
39 from fba.networks import misskey
40 from fba.networks import peertube
44 logging.basicConfig(level=logging.INFO)
45 logger = logging.getLogger(__name__)
47 def fetch_instances(domain: str, origin: str, software: str, command: str, path: str = None):
49 logger.debug("domain='%s',origin='%s',software='%s',command='%s',path='%s',_DEPTH=%d - CALLED!", domain, origin, software, command, path, _DEPTH)
51 domain_helper.raise_on(domain)
53 if not isinstance(origin, str) and origin is not None:
54 raise ValueError(f"Parameter origin[]='{type(origin)}' is not of type 'str'")
55 elif not isinstance(command, str):
56 raise ValueError(f"Parameter command[]='{type(command)}' is not of type 'str'")
58 raise ValueError("Parameter 'command' is empty")
59 elif software is None:
61 logger.debug("Software for domain='%s' is not set, determining ...", domain)
62 software = determine_software(domain, path)
63 except network.exceptions as exception:
64 logger.warning("Exception '%s' during determining software type", type(exception))
65 instances.set_last_error(domain, exception)
67 logger.debug("Determined software='%s' for domain='%s'", software, domain)
68 elif not isinstance(software, str):
69 raise ValueError(f"Parameter software[]='{type(software)}' is not of type 'str'")
70 elif not isinstance(path, str) and path is not None:
71 raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'")
73 logger.debug("Checking if domain='%s' is registered ...", domain)
74 if not instances.is_registered(domain):
75 logger.debug("Adding new domain='%s',origin='%s',command='%s',path='%s',software='%s'", domain, origin, command, path, software)
76 instances.add(domain, origin, command, path, software)
78 logger.debug("Updating last_instance_fetch for domain='%s' ...", domain)
79 instances.set_last_instance_fetch(domain)
82 logger.debug("software='%s'", software)
83 if software is not None:
85 logger.debug("Fetching instances for domain='%s',software='%s',origin='%s'", domain, software, origin)
86 peerlist = fetch_peers(domain, software, origin)
87 except network.exceptions as exception:
88 logger.warning("Cannot fetch peers from domain='%s',software='%s': '%s'", domain, software, type(exception))
90 logger.debug("peerlist[]='%s'", type(peerlist))
91 if isinstance(peerlist, list):
92 logger.debug("Invoking instances.set_total_peerlist(%s,%d) ...", domain, len(peerlist))
93 instances.set_total_peers(domain, peerlist)
95 logger.debug("peerlist[]='%s'", type(peerlist))
96 if peerlist is None or len(peerlist) == 0:
97 logger.warning("Cannot fetch peers: domain='%s',software='%s'", domain, software)
99 if instances.has_pending(domain):
100 logger.debug("Flushing updates for domain='%s' ...", domain)
101 instances.update_data(domain)
103 logger.debug("Invoking cookies.clear(%s) ...", domain)
104 cookies.clear(domain)
107 logger.debug("EXIT!")
110 logger.info("Checking %d instance(s) from domain='%s',software='%s' ...", len(peerlist), domain, software)
111 for instance in peerlist:
112 logger.debug("instance='%s'", instance)
113 if instance is None or instance == "":
114 logger.debug("instance[%s]='%s' is either None or empty - SKIPPED!", type(instance), instance)
117 logger.debug("instance='%s' - BEFORE!", instance)
118 instance = tidyup.domain(instance)
119 logger.debug("instance='%s' - AFTER!", instance)
122 logger.warning("Empty instance after tidyup.domain(), domain='%s'", domain)
125 logger.debug("instance='%s' - BEFORE!", instance)
126 instance = instance.encode("idna").decode("utf-8")
127 logger.debug("instance='%s' - AFTER!", instance)
129 if not utils.is_domain_wanted(instance):
130 logger.debug("instance='%s' is not wanted - SKIPPED!", instance)
132 elif instance.find("/profile/") > 0 or instance.find("/users/") > 0 or (instances.is_registered(instance.split("/")[0]) and instance.find("/c/") > 0):
133 logger.debug("instance='%s' is a link to a single user profile - SKIPPED!", instance)
135 elif instance.find("/tag/") > 0:
136 logger.debug("instance='%s' is a link to a tag - SKIPPED!", instance)
138 elif not instances.is_registered(instance):
139 logger.debug("Checking if domain='%s' has pending updates ...", domain)
140 if instances.has_pending(domain):
141 logger.debug("Flushing updates for domain='%s' ...", domain)
142 instances.update_data(domain)
144 logger.debug("instance='%s',origin='%s',_DEPTH=%d reached!", instance, origin, _DEPTH)
145 if _DEPTH <= config.get("max_crawl_depth") and len(peerlist) >= config.get("min_peers_length"):
146 logger.debug("Fetching instance='%s',origin='%s',command='%s',path='%s',_DEPTH=%d ...", instance, domain, command, path, _DEPTH)
147 fetch_instances(instance, domain, None, command, path)
149 logger.debug("Adding instance='%s',domain='%s',command='%s',_DEPTH=%d ...", instance, domain, command, _DEPTH)
150 instances.add(instance, domain, command)
152 logger.debug("Invoking cookies.clear(%s) ...", domain)
153 cookies.clear(domain)
155 logger.debug("Checking if domain='%s' has pending updates ...", domain)
156 if instances.has_pending(domain):
157 logger.debug("Flushing updates for domain='%s' ...", domain)
158 instances.update_data(domain)
161 logger.debug("EXIT!")
163 def fetch_peers(domain: str, software: str, origin: str) -> list:
164 logger.debug("domain='%s',software='%s',origin='%s' - CALLED!", domain, software, origin)
165 domain_helper.raise_on(domain)
167 if not isinstance(software, str) and software is not None:
168 raise ValueError(f"Parameter software[]='{type(software)}' is not of type 'str'")
169 elif not isinstance(origin, str) and origin is not None:
170 raise ValueError(f"Parameter origin[]='{type(origin)}' is not of type 'str'")
171 elif isinstance(origin, str) and origin == "":
172 raise ValueError("Parameter 'origin' is empty")
174 if software == "misskey":
175 logger.debug("Invoking misskey.fetch_peers(%s) ...", domain)
176 return misskey.fetch_peers(domain)
177 elif software == "lemmy":
178 logger.debug("Invoking lemmy.fetch_peers(%s,%s) ...", domain, origin)
179 return lemmy.fetch_peers(domain, origin)
180 elif software == "peertube":
181 logger.debug("Invoking peertube.fetch_peers(%s) ...", domain)
182 return peertube.fetch_peers(domain)
184 # No CSRF by default, you don't have to add network.api_headers by yourself here
188 logger.debug("Checking CSRF for domain='%s'", domain)
189 headers = csrf.determine(domain, dict())
190 except network.exceptions as exception:
191 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__)
192 instances.set_last_error(domain, exception)
194 logger.debug("Returning empty list ... - EXIT!")
198 "/api/v1/instance/peers",
202 # Init peers variable
205 logger.debug("Checking %d paths ...", len(paths))
207 logger.debug("Fetching path='%s' from domain='%s',software='%s' ...", path, domain, software)
208 data = network.get_json_api(
212 (config.get("connection_timeout"), config.get("read_timeout"))
215 logger.debug("data[]='%s'", type(data))
216 if "error_message" in data:
217 logger.debug("Was not able to fetch peers from path='%s',domain='%s' ...", path, domain)
218 instances.set_last_error(domain, data)
219 elif "json" in data and len(data["json"]) > 0:
220 logger.debug("Querying API path='%s' was successful: domain='%s',data[json][%s]()=%d", path, domain, type(data['json']), len(data['json']))
223 logger.debug("Marking domain='%s' as successfully handled ...", domain)
224 instances.set_success(domain)
227 if not isinstance(peers, list):
228 logger.warning("peers[]='%s' is not of type 'list', maybe bad API response?", type(peers))
231 logger.debug("Invoking instances.set_total_peers(%s,%d) ...", domain, len(peers))
232 instances.set_total_peers(domain, peers)
234 logger.debug("peers()=%d - EXIT!", len(peers))
237 def fetch_nodeinfo(domain: str, path: str = None) -> dict:
238 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
239 domain_helper.raise_on(domain)
241 if not isinstance(path, str) and path is not None:
242 raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'")
244 logger.debug("Fetching nodeinfo from domain='%s' ...", domain)
245 nodeinfo = fetch_wellknown_nodeinfo(domain)
247 logger.debug("nodeinfo[%s](%d)='%s'", type(nodeinfo), len(nodeinfo), nodeinfo)
248 if "error_message" not in nodeinfo and "json" in nodeinfo and len(nodeinfo["json"]) > 0:
249 logger.debug("Invoking instances.set_last_nodeinfo(%s) ...", domain)
250 instances.set_last_nodeinfo(domain)
252 logger.debug("Found nodeinfo[json]()=%d - EXIT!", len(nodeinfo['json']))
255 # No CSRF by default, you don't have to add network.api_headers by yourself here
260 logger.debug("Checking CSRF for domain='%s'", domain)
261 headers = csrf.determine(domain, dict())
262 except network.exceptions as exception:
263 logger.warning("Exception '%s' during checking CSRF (nodeinfo,%s) - EXIT!", type(exception), __name__)
264 instances.set_last_error(domain, exception)
265 instances.set_software(domain, None)
266 instances.set_detection_mode(domain, None)
267 instances.set_nodeinfo_url(domain, None)
270 "error_message": f"exception[{type(exception)}]='{str(exception)}'",
271 "exception" : exception,
275 "/nodeinfo/2.1.json",
277 "/nodeinfo/2.0.json",
279 "/nodeinfo/1.0.json",
284 for request in request_paths:
285 logger.debug("request='%s'", request)
286 http_url = f"http://{domain}{path}"
287 https_url = f"https://{domain}{path}"
289 logger.debug("path[%s]='%s',request='%s',http_url='%s',https_url='%s'", type(path), path, request, http_url, https_url)
290 if path is None or path in [request, http_url, https_url]:
291 logger.debug("path='%s',http_url='%s',https_url='%s'", path, http_url, https_url)
292 if path in [http_url, https_url]:
293 logger.debug("domain='%s',path='%s' has protocol in path, splitting ...", domain, path)
294 components = urlparse(path)
295 path = components.path
297 logger.debug("Fetching request='%s' from domain='%s' ...", request, domain)
298 data = network.get_json_api(
302 (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
305 logger.debug("data[]='%s'", type(data))
306 if "error_message" not in data and "json" in data:
307 logger.debug("Success: request='%s' - Setting detection_mode=STATIC_CHECK ...", request)
308 instances.set_last_nodeinfo(domain)
309 instances.set_detection_mode(domain, "STATIC_CHECK")
310 instances.set_nodeinfo_url(domain, request)
313 logger.warning("Failed fetching nodeinfo from domain='%s',status_code='%s',error_message='%s'", domain, data['status_code'], data['error_message'])
315 logger.debug("data()=%d - EXIT!", len(data))
318 def fetch_wellknown_nodeinfo(domain: str) -> dict:
319 logger.debug("domain='%s' - CALLED!", domain)
320 domain_helper.raise_on(domain)
322 # "rel" identifiers (no real URLs)
323 nodeinfo_identifier = [
324 "https://nodeinfo.diaspora.software/ns/schema/2.1",
325 "http://nodeinfo.diaspora.software/ns/schema/2.1",
326 "https://nodeinfo.diaspora.software/ns/schema/2.0",
327 "http://nodeinfo.diaspora.software/ns/schema/2.0",
328 "https://nodeinfo.diaspora.software/ns/schema/1.1",
329 "http://nodeinfo.diaspora.software/ns/schema/1.1",
330 "https://nodeinfo.diaspora.software/ns/schema/1.0",
331 "http://nodeinfo.diaspora.software/ns/schema/1.0",
334 # No CSRF by default, you don't have to add network.api_headers by yourself here
338 logger.debug("Checking CSRF for domain='%s'", domain)
339 headers = csrf.determine(domain, dict())
340 except network.exceptions as exception:
341 logger.warning("Exception '%s' during checking CSRF (fetch_wellknown_nodeinfo,%s) - EXIT!", type(exception), __name__)
342 instances.set_last_error(domain, exception)
345 "error_message": type(exception),
346 "exception" : exception,
351 logger.debug("Fetching .well-known info for domain='%s'", domain)
352 for path in ["/.well-known/nodeinfo", "/.well-known/x-nodeinfo2"]:
353 logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
354 data = network.get_json_api(
358 (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
360 logger.debug("data[]='%s'", type(data))
362 if "error_message" not in data and "json" in data:
363 logger.debug("path='%s' returned valid json()=%d", path, len(data["json"]))
366 logger.debug("data[]='%s'", type(data))
367 if "exception" in data:
368 logger.warning("domain='%s' returned exception '%s'", domain, str(data["exception"]))
369 raise data["exception"]
370 elif "error_message" in data:
371 logger.warning("domain='%s' returned error message: '%s'", domain, data["error_message"])
373 elif "json" not in data:
374 logger.warning("domain='%s' returned no 'json' key", domain)
377 nodeinfo = data["json"]
378 logger.debug("nodeinfo()=%d has been returned", len(nodeinfo))
380 if "links" in nodeinfo:
381 logger.debug("Marking domain='%s' as successfully handled ...", domain)
382 instances.set_success(domain)
384 logger.debug("Found nodeinfo[links]()=%d record(s),", len(nodeinfo["links"]))
385 for niid in nodeinfo_identifier:
388 logger.debug("Checking niid='%s' ...", niid)
389 for link in nodeinfo["links"]:
390 logger.debug("link[%s]='%s'", type(link), link)
391 if not isinstance(link, dict) or not "rel" in link:
392 logger.debug("link[]='%s' is not of type 'dict' or no element 'rel' found - SKIPPED!", type(link))
394 elif link["rel"] != niid:
395 logger.debug("link[re]='%s' does not matched niid='%s' - SKIPPED!", link["rel"], niid)
397 elif "href" not in link:
398 logger.warning("link[rel]='%s' has no element 'href' - SKIPPED!", link["rel"])
400 elif link["href"] is None:
401 logger.debug("link[href] is None, link[rel]='%s' - SKIPPED!", link["rel"])
404 # Default is that 'href' has a complete URL, but some hosts don't send that
405 logger.debug("link[rel]='%s' matches niid='%s'", link["rel"], niid)
407 components = urlparse(url)
409 logger.debug("components[%s]='%s'", type(components), components)
410 if components.scheme == "" and components.netloc == "":
411 logger.warning("link[href]='%s' has no scheme and host name in it, prepending from domain='%s'", link['href'], domain)
412 url = f"https://{domain}{url}"
413 components = urlparse(url)
414 elif components.netloc == "":
415 logger.warning("link[href]='%s' has no netloc set, setting domain='%s'", link["href"], domain)
416 url = f"{components.scheme}://{domain}{components.path}"
417 components = urlparse(url)
419 logger.debug("components.netloc[]='%s'", type(components.netloc))
420 if not utils.is_domain_wanted(components.netloc):
421 logger.debug("components.netloc='%s' is not wanted - SKIPPED!", components.netloc)
424 logger.debug("Fetching nodeinfo from url='%s' ...", url)
425 data = network.fetch_api_url(
427 (config.get("connection_timeout"), config.get("read_timeout"))
430 logger.debug("link[href]='%s',data[]='%s'", link["href"], type(data))
431 if "error_message" not in data and "json" in data:
432 logger.debug("Found JSON data()=%d,link[href]='%s' - Setting detection_mode=AUTO_DISCOVERY ...", len(data), link["href"])
433 instances.set_detection_mode(domain, "AUTO_DISCOVERY")
434 instances.set_nodeinfo_url(domain, link["href"])
436 logger.debug("Marking domain='%s' as successfully handled ...", domain)
437 instances.set_success(domain)
440 logger.debug("Setting last error for domain='%s',data[]='%s'", domain, type(data))
441 instances.set_last_error(domain, data)
443 logger.debug("data()=%d", len(data))
444 if "error_message" not in data and "json" in data:
445 logger.debug("Auto-discovery successful: domain='%s'", domain)
447 elif "server" in nodeinfo:
448 logger.debug("Found nodeinfo[server][software]='%s'", nodeinfo["server"]["software"])
449 instances.set_detection_mode(domain, "AUTO_DISCOVERY")
450 instances.set_nodeinfo_url(domain, f"https://{domain}/.well-known/x-nodeinfo2")
452 logger.debug("Marking domain='%s' as successfully handled ...", domain)
453 instances.set_success(domain)
455 logger.warning("nodeinfo does not contain 'links' or 'server': domain='%s'", domain)
457 logger.debug("Returning data[]='%s' - EXIT!", type(data))
460 def fetch_generator_from_path(domain: str, path: str = "/") -> str:
461 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
462 domain_helper.raise_on(domain)
464 if not isinstance(path, str):
465 raise ValueError(f"path[]='{type(path)}' is not of type 'str'")
467 raise ValueError("Parameter 'path' is empty")
469 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
472 logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
473 response = network.fetch_response(
477 (config.get("connection_timeout"), config.get("read_timeout")),
481 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
482 if response.ok and response.status_code < 300 and response.text.find("<html") > 0 and domain_helper.is_in_url(domain, response.url):
483 logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
484 doc = bs4.BeautifulSoup(response.text, "html.parser")
486 logger.debug("doc[]='%s'", type(doc))
487 generator = doc.find("meta", {"name" : "generator"})
488 site_name = doc.find("meta", {"property": "og:site_name"})
489 platform = doc.find("meta", {"property": "og:platform"})
491 logger.debug("generator[]='%s',site_name[]='%s',platform[]='%s'", type(generator), type(site_name), type(platform))
492 if isinstance(generator, bs4.element.Tag) and isinstance(generator.get("content"), str):
493 logger.debug("Found generator meta tag: domain='%s'", domain)
494 software = tidyup.domain(generator.get("content"))
496 logger.debug("software[%s]='%s'", type(software), software)
497 if software is not None and software != "":
498 logger.info("domain='%s' is generated by software='%s' - Setting detection_mode=GENERATOR ...", domain, software)
499 instances.set_detection_mode(domain, "GENERATOR")
500 elif isinstance(platform, bs4.element.Tag) and isinstance(platform.get("content"), str):
501 logger.debug("Found property=og:platform, domain='%s'", domain)
502 software = tidyup.domain(platform.get("content"))
504 logger.debug("software[%s]='%s'", type(software), software)
505 if software is not None and software != "":
506 logger.debug("domain='%s' has og:platform='%s' - Setting detection_mode=PLATFORM ...", domain, software)
507 instances.set_detection_mode(domain, "PLATFORM")
508 elif isinstance(site_name, bs4.element.Tag) and isinstance(site_name.get("content"), str):
509 logger.debug("Found property=og:site_name, domain='%s'", domain)
510 software = tidyup.domain(site_name.get("content"))
512 logger.debug("software[%s]='%s'", type(software), software)
513 if software is not None and software != "":
514 logger.debug("domain='%s' has og:site_name='%s' - Setting detection_mode=SITE_NAME ...", domain, software)
515 instances.set_detection_mode(domain, "SITE_NAME")
516 elif not domain_helper.is_in_url(domain, response.url):
517 logger.warning("domain='%s' doesn't match response.url='%s', maybe redirect to other domain?", domain, response.url)
519 components = urlparse(response.url)
521 logger.debug("components[]='%s'", type(components))
522 if not instances.is_registered(components.netloc):
523 logger.info("components.netloc='%s' is not registered, adding ...", components.netloc)
524 fetch_instances(components.netloc, domain, None, "fetch_generator")
526 message = f"Redirect from domain='{domain}' to response.url='{response.url}'"
527 instances.set_last_error(domain, message)
528 instances.set_software(domain, None)
529 instances.set_detection_mode(domain, None)
530 instances.set_nodeinfo_url(domain, None)
532 raise requests.exceptions.TooManyRedirects(message)
534 logger.debug("software[]='%s'", type(software))
535 if isinstance(software, str) and software == "":
536 logger.debug("Corrected empty string to None for software of domain='%s'", domain)
538 elif isinstance(software, str) and ("." in software or " " in software):
539 logger.debug("software='%s' may contain a version number, domain='%s', removing it ...", software, domain)
540 software = version.remove(software)
542 logger.debug("software[]='%s'", type(software))
543 if isinstance(software, str) and "powered by " in software:
544 logger.debug("software='%s' has 'powered by' in it", software)
545 software = version.remove(version.strip_powered_by(software))
546 elif isinstance(software, str) and " hosted on " in software:
547 logger.debug("software='%s' has 'hosted on' in it", software)
548 software = version.remove(version.strip_hosted_on(software))
549 elif isinstance(software, str) and " by " in software:
550 logger.debug("software='%s' has ' by ' in it", software)
551 software = version.strip_until(software, " by ")
552 elif isinstance(software, str) and " see " in software:
553 logger.debug("software='%s' has ' see ' in it", software)
554 software = version.strip_until(software, " see ")
556 logger.debug("software='%s' - EXIT!", software)
559 def determine_software(domain: str, path: str = None) -> str:
560 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
561 domain_helper.raise_on(domain)
563 if not isinstance(path, str) and path is not None:
564 raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'")
566 logger.debug("Determining software for domain='%s',path='%s'", domain, path)
569 logger.debug("Fetching nodeinfo from domain='%s' ...", domain)
570 data = fetch_nodeinfo(domain, path)
572 logger.debug("data[%s]='%s'", type(data), data)
573 if "exception" in data:
574 # Continue raising it
575 logger.debug("data()=%d contains exception='%s' - raising ...", len(data), type(data["exception"]))
576 raise data["exception"]
577 elif "error_message" in data:
578 logger.debug("Returned error_message during fetching nodeinfo: '%s',status_code=%d", data['error_message'], data['status_code'])
579 software = fetch_generator_from_path(domain)
580 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
582 logger.debug("domain='%s',path='%s',data[json] found ...", domain, path)
585 logger.debug("Auto-detection for domain='%s' was failing, fetching / ...", domain)
586 software = fetch_generator_from_path(domain)
587 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
589 if "status" in data and data["status"] == "error" and "message" in data:
590 logger.warning("JSON response is an error: '%s' - Resetting detection_mode,nodeinfo_url ...", data["message"])
591 instances.set_last_error(domain, data["message"])
592 instances.set_detection_mode(domain, None)
593 instances.set_nodeinfo_url(domain, None)
594 software = fetch_generator_from_path(domain)
595 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
596 elif "software" in data and "name" in data["software"]:
597 logger.debug("Found data[json][software][name] in JSON response")
598 software = data["software"]["name"]
599 logger.debug("software[%s]='%s' - FOUND!", type(software), software)
600 elif "message" in data:
601 logger.warning("JSON response contains only a message: '%s' - Resetting detection_mode,nodeinfo_url ...", data["message"])
602 instances.set_last_error(domain, data["message"])
603 instances.set_detection_mode(domain, None)
604 instances.set_nodeinfo_url(domain, None)
606 logger.debug("Invoking fetch_generator_from_path(%s) ...", domain)
607 software = fetch_generator_from_path(domain)
608 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
609 elif "server" in data and "software" in data["server"]:
610 logger.debug("Found data[server][software]='%s' for domain='%s'", data["server"]["software"].lower(), domain)
611 software = data["server"]["software"].lower()
612 logger.debug("Detected software for domain='%s' is: '%s'", domain, software)
613 elif "software" not in data or "name" not in data["software"]:
614 logger.debug("JSON response from domain='%s' does not include [software][name] - Resetting detection_mode,nodeinfo_url ...", domain)
615 instances.set_detection_mode(domain, None)
616 instances.set_nodeinfo_url(domain, None)
618 logger.debug("Invoking fetch_generator_from_path(%s) ...", domain)
619 software = fetch_generator_from_path(domain)
620 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
622 logger.debug("software[%s]='%s'", type(software), software)
624 logger.debug("Returning None - EXIT!")
627 logger.debug("software='%s'- BEFORE!", software)
628 software = software_helper.alias(software)
629 logger.debug("software['%s']='%s' - AFTER!", type(software), software)
631 if str(software) == "":
632 logger.debug("software for domain='%s' was not detected, trying generator ...", domain)
633 software = fetch_generator_from_path(domain)
634 elif len(str(software)) > 0 and ("." in software or " " in software):
635 logger.debug("software='%s' may contain a version number, domain='%s', removing it ...", software, domain)
636 software = version.remove(software)
638 logger.debug("software[]='%s'", type(software))
639 if isinstance(software, str) and "powered by" in software:
640 logger.debug("software='%s' has 'powered by' in it", software)
641 software = version.remove(version.strip_powered_by(software))
643 logger.debug("software='%s' - EXIT!", software)
646 def find_domains(tag: bs4.element.Tag) -> list:
647 logger.debug("tag[]='%s' - CALLED!", type(tag))
648 if not isinstance(tag, bs4.element.Tag):
649 raise ValueError(f"Parameter tag[]='{type(tag)}' is not type of bs4.element.Tag")
650 elif len(tag.select("tr")) == 0:
651 raise KeyError("No table rows found in table!")
654 for element in tag.select("tr"):
655 logger.debug("element[]='%s'", type(element))
656 if not element.find("td"):
657 logger.debug("Skipping element, no <td> found")
660 domain = tidyup.domain(element.find("td").text)
661 reason = tidyup.reason(element.findAll("td")[1].text)
663 logger.debug("domain='%s',reason='%s'", domain, reason)
665 if not utils.is_domain_wanted(domain):
666 logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
668 elif domain == "gab.com/.ai, develop.gab.com":
669 logger.debug("Multiple domains detected in one row")
679 "domain": "develop.gab.com",
683 elif not validators.domain(domain.split("/")[0]):
684 logger.warning("domain='%s' is not a valid domain - SKIPPED!", domain)
687 logger.debug("Adding domain='%s',reason='%s' ...", domain, reason)
693 logger.debug("domains()=%d - EXIT!", len(domains))
696 def add_peers(rows: dict) -> list:
697 logger.debug("rows[]='%s' - CALLED!", type(rows))
698 if not isinstance(rows, dict):
699 raise ValueError(f"Parameter rows[]='{type(rows)}' is not of type 'dict'")
702 for key in ["linked", "allowed", "blocked"]:
703 logger.debug("Checking key='%s'", key)
704 if key not in rows or rows[key] is None:
705 logger.debug("Cannot find key='%s' or it is NoneType - SKIPPED!", key)
708 logger.debug("Adding %d peer(s) to peers list ...", len(rows[key]))
709 for peer in rows[key]:
710 logger.debug("peer[%s]='%s' - BEFORE!", type(peer), peer)
711 if peer is None or peer == "":
712 logger.debug("peer is empty - SKIPPED")
714 elif isinstance(peer, dict) and "domain" in peer:
715 logger.debug("peer[domain]='%s'", peer["domain"])
716 peer = tidyup.domain(peer["domain"])
717 elif isinstance(peer, str):
718 logger.debug("peer='%s'", peer)
719 peer = tidyup.domain(peer)
721 raise ValueError(f"peer[]='{type(peer)}' is not supported,key='{key}'")
723 logger.debug("peer[%s]='%s' - AFTER!", type(peer), peer)
724 if not utils.is_domain_wanted(peer):
725 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
728 logger.debug("Appending peer='%s' ...", peer)
731 logger.debug("peers()=%d - EXIT!", len(peers))