1 # Copyright (C) 2023 Free Software Foundation
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License as published
5 # by the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU Affero General Public License for more details.
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program. If not, see <https://www.gnu.org/licenses/>.
18 from urllib.parse import urlparse
27 from fba.helpers import config
28 from fba.helpers import cookies
29 from fba.helpers import domain as domain_helper
30 from fba.helpers import software as software_helper
31 from fba.helpers import tidyup
32 from fba.helpers import version
34 from fba.http import network
36 from fba.models import instances
38 from fba.networks import lemmy
39 from fba.networks import misskey
40 from fba.networks import peertube
42 logging.basicConfig(level=logging.INFO)
43 logger = logging.getLogger(__name__)
45 def fetch_instances(domain: str, origin: str, software: str, command: str, path: str = None):
46 logger.debug("domain='%s',origin='%s',software='%s',command='%s',path='%s' - CALLED!", domain, origin, software, command, path)
47 domain_helper.raise_on(domain)
49 if not isinstance(origin, str) and origin is not None:
50 raise ValueError(f"Parameter origin[]='{type(origin)}' is not of type 'str'")
51 elif not isinstance(command, str):
52 raise ValueError(f"Parameter command[]='{type(command)}' is not of type 'str'")
54 raise ValueError("Parameter 'command' is empty")
55 elif software is None:
57 logger.debug("Software for domain='%s' is not set, determining ...", domain)
58 software = determine_software(domain, path)
59 except network.exceptions as exception:
60 logger.warning("Exception '%s' during determining software type", type(exception))
61 instances.set_last_error(domain, exception)
63 logger.debug("Determined software='%s' for domain='%s'", software, domain)
64 elif not isinstance(software, str):
65 raise ValueError(f"Parameter software[]='{type(software)}' is not of type 'str'")
66 elif not isinstance(path, str) and path is not None:
67 raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'")
69 logger.debug("Checking if domain='%s' is registered ...", domain)
70 if not instances.is_registered(domain):
71 logger.debug("Adding new domain='%s',origin='%s',command='%s',path='%s',software='%s'", domain, origin, command, path, software)
72 instances.add(domain, origin, command, path, software)
74 logger.debug("Updating last_instance_fetch for domain='%s' ...", domain)
75 instances.set_last_instance_fetch(domain)
79 logger.debug("Fetching instances for domain='%s',software='%s',origin='%s'", domain, software, origin)
80 peerlist = fetch_peers(domain, software, origin)
81 except network.exceptions as exception:
82 logger.warning("Cannot fetch peers from domain='%s': '%s'", domain, type(exception))
84 logger.debug("peerlist[]='%s'", type(peerlist))
85 if isinstance(peerlist, list):
86 logger.debug("Invoking instances.set_total_peerlist(%s,%d) ...", domain, len(peerlist))
87 instances.set_total_peers(domain, peerlist)
89 logger.debug("peerlist[]='%s'", type(peerlist))
90 if peerlist is None or len(peerlist) == 0:
91 logger.warning("Cannot fetch peers: domain='%s'", domain)
93 if instances.has_pending(domain):
94 logger.debug("Flushing updates for domain='%s' ...", domain)
95 instances.update_data(domain)
97 logger.debug("Invoking cookies.clear(%s) ...", domain)
100 logger.debug("EXIT!")
103 logger.info("Checking %d instance(s) from domain='%s',software='%s' ...", len(peerlist), domain, software)
104 for instance in peerlist:
105 logger.debug("instance='%s'", instance)
107 # Skip "None" types as tidup.domain() cannot parse them
110 logger.debug("instance='%s' - BEFORE!", instance)
111 instance = tidyup.domain(instance)
112 logger.debug("instance='%s' - AFTER!", instance)
115 logger.warning("Empty instance after tidyup.domain(), domain='%s'", domain)
118 logger.debug("instance='%s' - BEFORE!", instance)
119 instance = instance.encode("idna").decode("utf-8")
120 logger.debug("instance='%s' - AFTER!", instance)
122 if not utils.is_domain_wanted(instance):
123 logger.debug("instance='%s' is not wanted - SKIPPED!", instance)
125 elif instance.find("/profile/") > 0 or instance.find("/users/") > 0 or (instances.is_registered(instance.split("/")[0]) and instance.find("/c/") > 0):
126 logger.debug("instance='%s' is a link to a single user profile - SKIPPED!", instance)
128 elif instance.find("/tag/") > 0:
129 logger.debug("instance='%s' is a link to a tag - SKIPPED!", instance)
131 elif not instances.is_registered(instance):
132 logger.debug("Fetching instance='%s',origin='%s',command='%s',path='%s' ...", instance, domain, command, path)
133 fetch_instances(instance, domain, None, command, path)
135 logger.debug("Invoking cookies.clear(%s) ...", domain)
136 cookies.clear(domain)
138 logger.debug("Checking if domain='%s' has pending updates ...", domain)
139 if instances.has_pending(domain):
140 logger.debug("Flushing updates for domain='%s' ...", domain)
141 instances.update_data(domain)
143 logger.debug("EXIT!")
145 def fetch_peers(domain: str, software: str, origin: str) -> list:
146 logger.debug("domain='%s',software='%s',origin='%s' - CALLED!", domain, software, origin)
147 domain_helper.raise_on(domain)
149 if not isinstance(software, str) and software is not None:
150 raise ValueError(f"software[]='{type(software)}' is not of type 'str'")
152 if software == "misskey":
153 logger.debug("Invoking misskey.fetch_peers(%s) ...", domain)
154 return misskey.fetch_peers(domain)
155 elif software == "lemmy":
156 logger.debug("Invoking lemmy.fetch_peers(%s,%s) ...", domain, origin)
157 return lemmy.fetch_peers(domain, origin)
158 elif software == "peertube":
159 logger.debug("Invoking peertube.fetch_peers(%s) ...", domain)
160 return peertube.fetch_peers(domain)
162 # No CSRF by default, you don't have to add network.api_headers by yourself here
166 logger.debug("Checking CSRF for domain='%s'", domain)
167 headers = csrf.determine(domain, dict())
168 except network.exceptions as exception:
169 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s) - EXIT!", type(exception), __name__)
170 instances.set_last_error(domain, exception)
174 "/api/v1/instance/peers",
178 # Init peers variable
181 logger.debug("Checking %d paths ...", len(paths))
183 logger.debug("Fetching path='%s' from domain='%s',software='%s' ...", path, domain, software)
184 data = network.get_json_api(
188 (config.get("connection_timeout"), config.get("read_timeout"))
191 logger.debug("data[]='%s'", type(data))
192 if "error_message" in data:
193 logger.debug("Was not able to fetch peers from path='%s',domain='%s' ...", path, domain)
194 instances.set_last_error(domain, data)
195 elif "json" in data and len(data["json"]) > 0:
196 logger.debug("Querying API path='%s' was successful: domain='%s',data[json][%s]()=%d", path, domain, type(data['json']), len(data['json']))
199 logger.debug("Marking domain='%s' as successfully handled ...", domain)
200 instances.set_success(domain)
203 if not isinstance(peers, list):
204 logger.warning("peers[]='%s' is not of type 'list', maybe bad API response?", type(peers))
207 logger.debug("Invoking instances.set_total_peers(%s,%d) ...", domain, len(peers))
208 instances.set_total_peers(domain, peers)
210 logger.debug("peers()=%d - EXIT!", len(peers))
213 def fetch_nodeinfo(domain: str, path: str = None) -> dict:
214 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
215 domain_helper.raise_on(domain)
217 if not isinstance(path, str) and path is not None:
218 raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'")
220 logger.debug("Fetching nodeinfo from domain='%s' ...", domain)
221 nodeinfo = fetch_wellknown_nodeinfo(domain)
223 logger.debug("nodeinfo[%s](%d='%s'", type(nodeinfo), len(nodeinfo), nodeinfo)
224 if "error_message" not in nodeinfo and "json" in nodeinfo and len(nodeinfo["json"]) > 0:
225 logger.debug("Invoking instances.set_last_nodeinfo(%s) ...", domain)
226 instances.set_last_nodeinfo(domain)
228 logger.debug("Found nodeinfo[json]()=%d - EXIT!", len(nodeinfo['json']))
231 # No CSRF by default, you don't have to add network.api_headers by yourself here
236 logger.debug("Checking CSRF for domain='%s'", domain)
237 headers = csrf.determine(domain, dict())
238 except network.exceptions as exception:
239 logger.warning("Exception '%s' during checking CSRF (nodeinfo,%s) - EXIT!", type(exception), __name__)
240 instances.set_last_error(domain, exception)
241 instances.set_software(domain, None)
242 instances.set_detection_mode(domain, None)
243 instances.set_nodeinfo_url(domain, None)
246 "error_message": f"exception[{type(exception)}]='{str(exception)}'",
247 "exception" : exception,
251 "/nodeinfo/2.1.json",
253 "/nodeinfo/2.0.json",
255 "/nodeinfo/1.0.json",
260 for request in request_paths:
261 logger.debug("request='%s'", request)
262 http_url = f"http://{domain}{path}"
263 https_url = f"https://{domain}{path}"
265 logger.debug("path[%s]='%s',request='%s',http_url='%s',https_url='%s'", type(path), path, request, http_url, https_url)
266 if path is None or path in [request, http_url, https_url]:
267 logger.debug("path='%s',http_url='%s',https_url='%s'", path, http_url, https_url)
268 if path in [http_url, https_url]:
269 logger.debug("domain='%s',path='%s' has protocol in path, splitting ...", domain, path)
270 components = urlparse(path)
271 path = components.path
273 logger.debug("Fetching request='%s' from domain='%s' ...", request, domain)
274 data = network.get_json_api(
278 (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
281 logger.debug("data[]='%s'", type(data))
282 if "error_message" not in data and "json" in data:
283 logger.debug("Success: request='%s' - Setting detection_mode=STATIC_CHECK ...", request)
284 instances.set_last_nodeinfo(domain)
285 instances.set_detection_mode(domain, "STATIC_CHECK")
286 instances.set_nodeinfo_url(domain, request)
289 logger.warning("Failed fetching nodeinfo from domain='%s',status_code='%s',error_message='%s'", domain, data['status_code'], data['error_message'])
291 logger.debug("data()=%d - EXIT!", len(data))
294 def fetch_wellknown_nodeinfo(domain: str) -> dict:
295 logger.debug("domain='%s' - CALLED!", domain)
296 domain_helper.raise_on(domain)
298 # "rel" identifiers (no real URLs)
299 nodeinfo_identifier = [
300 "https://nodeinfo.diaspora.software/ns/schema/2.1",
301 "http://nodeinfo.diaspora.software/ns/schema/2.1",
302 "https://nodeinfo.diaspora.software/ns/schema/2.0",
303 "http://nodeinfo.diaspora.software/ns/schema/2.0",
304 "https://nodeinfo.diaspora.software/ns/schema/1.1",
305 "http://nodeinfo.diaspora.software/ns/schema/1.1",
306 "https://nodeinfo.diaspora.software/ns/schema/1.0",
307 "http://nodeinfo.diaspora.software/ns/schema/1.0",
310 # No CSRF by default, you don't have to add network.api_headers by yourself here
314 logger.debug("Checking CSRF for domain='%s'", domain)
315 headers = csrf.determine(domain, dict())
316 except network.exceptions as exception:
317 logger.warning("Exception '%s' during checking CSRF (fetch_wellknown_nodeinfo,%s) - EXIT!", type(exception), __name__)
318 instances.set_last_error(domain, exception)
321 "error_message": type(exception),
322 "exception" : exception,
325 logger.debug("Fetching .well-known info for domain='%s'", domain)
326 data = network.get_json_api(
328 "/.well-known/nodeinfo",
330 (config.get("nodeinfo_connection_timeout"), config.get("nodeinfo_read_timeout"))
333 logger.debug("data[]='%s'", type(data))
334 if "error_message" not in data:
335 nodeinfo = data["json"]
337 logger.debug("Marking domain='%s' as successfully handled ...", domain)
338 instances.set_success(domain)
340 logger.debug("Found entries: nodeinfo()=%d,domain='%s'", len(nodeinfo), domain)
341 if "links" in nodeinfo:
342 logger.debug("Found nodeinfo[links]()=%d record(s),", len(nodeinfo["links"]))
343 for niid in nodeinfo_identifier:
346 logger.debug("Checking niid='%s' ...", niid)
347 for link in nodeinfo["links"]:
348 logger.debug("link[%s]='%s'", type(link), link)
349 if not isinstance(link, dict) or not "rel" in link:
350 logger.debug("link[]='%s' is not of type 'dict' or no element 'rel' found - SKIPPED!", type(link))
352 elif link["rel"] != niid:
353 logger.debug("link[re]='%s' does not matched niid='%s' - SKIPPED!", link["rel"], niid)
355 elif "href" not in link:
356 logger.warning("link[rel]='%s' has no element 'href' - SKIPPED!", link["rel"])
358 elif link["href"] is None:
359 logger.debug("link[href] is None, link[rel]='%s' - SKIPPED!", link["rel"])
362 # Default is that 'href' has a complete URL, but some hosts don't send that
363 logger.debug("link[rel]='%s' matches niid='%s'", link["rel"], niid)
365 components = urlparse(url)
367 logger.debug("components[%s]='%s'", type(components), components)
368 if components.scheme == "" and components.netloc == "":
369 logger.warning("link[href]='%s' has no scheme and host name in it, prepending from domain='%s'", link['href'], domain)
370 url = f"https://{domain}{url}"
371 components = urlparse(url)
372 elif components.netloc == "":
373 logger.warning("link[href]='%s' has no netloc set, setting domain='%s'", link["href"], domain)
374 url = f"{components.scheme}://{domain}{components.path}"
375 components = urlparse(url)
377 logger.debug("components.netloc[]='%s'", type(components.netloc))
378 if not utils.is_domain_wanted(components.netloc):
379 logger.debug("components.netloc='%s' is not wanted - SKIPPED!", components.netloc)
382 logger.debug("Fetching nodeinfo from url='%s' ...", url)
383 data = network.fetch_api_url(
385 (config.get("connection_timeout"), config.get("read_timeout"))
388 logger.debug("link[href]='%s',data[]='%s'", link["href"], type(data))
389 if "error_message" not in data and "json" in data:
390 logger.debug("Found JSON data()=%d,link[href]='%s' - Setting detection_mode=AUTO_DISCOVERY ...", len(data), link["href"])
391 instances.set_detection_mode(domain, "AUTO_DISCOVERY")
392 instances.set_nodeinfo_url(domain, link["href"])
394 logger.debug("Marking domain='%s' as successfully handled ...", domain)
395 instances.set_success(domain)
398 logger.debug("Setting last error for domain='%s',data[]='%s'", domain, type(data))
399 instances.set_last_error(domain, data)
401 logger.debug("data()=%d", len(data))
402 if "error_message" not in data and "json" in data:
403 logger.debug("Auto-discovery successful: domain='%s'", domain)
406 logger.warning("nodeinfo does not contain 'links': domain='%s'", domain)
408 logger.debug("Returning data[]='%s' - EXIT!", type(data))
411 def fetch_generator_from_path(domain: str, path: str = "/") -> str:
412 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
413 domain_helper.raise_on(domain)
415 if not isinstance(path, str):
416 raise ValueError(f"path[]='{type(path)}' is not of type 'str'")
418 raise ValueError("Parameter 'path' is empty")
420 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
423 logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
424 response = network.fetch_response(
427 (config.get("connection_timeout"), config.get("read_timeout")),
431 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
432 if response.ok and response.status_code < 300 and response.text.find("<html") > 0 and domain_helper.is_in_url(domain, response.url):
433 logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
434 doc = bs4.BeautifulSoup(response.text, "html.parser")
436 logger.debug("doc[]='%s'", type(doc))
437 generator = doc.find("meta", {"name" : "generator"})
438 site_name = doc.find("meta", {"property": "og:site_name"})
439 platform = doc.find("meta", {"property": "og:platform"})
441 logger.debug("generator[]='%s',site_name[]='%s',platform[]='%s'", type(generator), type(site_name), type(platform))
442 if isinstance(generator, bs4.element.Tag) and isinstance(generator.get("content"), str):
443 logger.debug("Found generator meta tag: domain='%s'", domain)
444 software = tidyup.domain(generator.get("content"))
446 logger.debug("software[%s]='%s'", type(software), software)
447 if software is not None and software != "":
448 logger.info("domain='%s' is generated by software='%s' - Setting detection_mode=GENERATOR ...", domain, software)
449 instances.set_detection_mode(domain, "GENERATOR")
450 elif isinstance(site_name, bs4.element.Tag) and isinstance(site_name.get("content"), str):
451 logger.debug("Found property=og:site_name, domain='%s'", domain)
452 software = tidyup.domain(site_name.get("content"))
454 logger.debug("software[%s]='%s'", type(software), software)
455 if software is not None and software != "":
456 logger.debug("domain='%s' has og:site_name='%s' - Setting detection_mode=SITE_NAME ...", domain, software)
457 instances.set_detection_mode(domain, "SITE_NAME")
458 elif isinstance(platform, bs4.element.Tag) and isinstance(platform.get("content"), str):
459 logger.debug("Found property=og:platform, domain='%s'", domain)
460 software = tidyup.domain(platform.get("content"))
462 logger.debug("software[%s]='%s'", type(software), software)
463 if software is not None and software != "":
464 logger.debug("domain='%s' has og:platform='%s' - Setting detection_mode=PLATFORM ...", domain, software)
465 instances.set_detection_mode(domain, "PLATFORM")
466 elif not domain_helper.is_in_url(domain, response.url):
467 logger.warning("domain='%s' doesn't match response.url='%s', maybe redirect to other domain?", domain, response.url)
469 components = urlparse(response.url)
471 logger.debug("components[]='%s'", type(components))
472 if not instances.is_registered(components.netloc):
473 logger.info("components.netloc='%s' is not registered, adding ...", components.netloc)
474 fetch_instances(components.netloc, domain, None, "fetch_generator")
476 message = f"Redirect from domain='{domain}' to response.url='{response.url}'"
477 instances.set_last_error(domain, message)
478 instances.set_software(domain, None)
479 instances.set_detection_mode(domain, None)
480 instances.set_nodeinfo_url(domain, None)
482 raise requests.exceptions.TooManyRedirects(message)
484 logger.debug("software[]='%s'", type(software))
485 if isinstance(software, str) and software == "":
486 logger.debug("Corrected empty string to None for software of domain='%s'", domain)
488 elif isinstance(software, str) and ("." in software or " " in software):
489 logger.debug("software='%s' may contain a version number, domain='%s', removing it ...", software, domain)
490 software = version.remove(software)
492 logger.debug("software[]='%s'", type(software))
493 if isinstance(software, str) and "powered by " in software:
494 logger.debug("software='%s' has 'powered by' in it", software)
495 software = version.remove(version.strip_powered_by(software))
496 elif isinstance(software, str) and " hosted on " in software:
497 logger.debug("software='%s' has 'hosted on' in it", software)
498 software = version.remove(version.strip_hosted_on(software))
499 elif isinstance(software, str) and " by " in software:
500 logger.debug("software='%s' has ' by ' in it", software)
501 software = version.strip_until(software, " by ")
502 elif isinstance(software, str) and " see " in software:
503 logger.debug("software='%s' has ' see ' in it", software)
504 software = version.strip_until(software, " see ")
506 logger.debug("software='%s' - EXIT!", software)
509 def determine_software(domain: str, path: str = None) -> str:
510 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
511 domain_helper.raise_on(domain)
513 if not isinstance(path, str) and path is not None:
514 raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'")
516 logger.debug("Determining software for domain='%s',path='%s'", domain, path)
519 logger.debug("Fetching nodeinfo from domain='%s' ...", domain)
520 data = fetch_nodeinfo(domain, path)
522 logger.debug("data[%s]='%s'", type(data), data)
523 if "exception" in data:
524 # Continue raising it
525 logger.debug("data()=%d contains exception='%s' - raising ...", len(data), type(data["exception"]))
526 raise data["exception"]
527 elif "error_message" in data:
528 logger.debug("Returned error_message during fetching nodeinfo: '%s',status_code=%d", data['error_message'], data['status_code'])
529 software = fetch_generator_from_path(domain)
530 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
532 logger.debug("domain='%s',path='%s',data[json] found ...", domain, path)
535 logger.debug("JSON response from domain='%s' does not include [software][name], fetching / ...", domain)
536 software = fetch_generator_from_path(domain)
537 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
539 if "status" in data and data["status"] == "error" and "message" in data:
540 logger.warning("JSON response is an error: '%s' - Resetting detection_mode,nodeinfo_url ...", data["message"])
541 instances.set_last_error(domain, data["message"])
542 instances.set_detection_mode(domain, None)
543 instances.set_nodeinfo_url(domain, None)
544 software = fetch_generator_from_path(domain)
545 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
546 elif "software" in data and "name" in data["software"]:
547 logger.debug("Found data[json][software][name] in JSON response")
548 software = data["software"]["name"]
549 logger.debug("software[%s]='%s' - FOUND!", type(software), software)
550 elif "message" in data:
551 logger.warning("JSON response contains only a message: '%s' - Resetting detection_mode,nodeinfo_url ...", data["message"])
552 instances.set_last_error(domain, data["message"])
553 instances.set_detection_mode(domain, None)
554 instances.set_nodeinfo_url(domain, None)
556 logger.debug("Invoking fetch_generator_from_path(%s) ...", domain)
557 software = fetch_generator_from_path(domain)
558 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
559 elif "software" not in data or "name" not in data["software"]:
560 logger.debug("JSON response from domain='%s' does not include [software][name] - Resetting detection_mode,nodeinfo_url ...", domain)
561 instances.set_detection_mode(domain, None)
562 instances.set_nodeinfo_url(domain, None)
564 logger.debug("Invoking fetch_generator_from_path(%s) ...", domain)
565 software = fetch_generator_from_path(domain)
566 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
568 logger.debug("software[%s]='%s'", type(software), software)
570 logger.debug("Returning None - EXIT!")
573 logger.debug("software='%s'- BEFORE!", software)
574 software = software_helper.alias(software)
575 logger.debug("software['%s']='%s' - AFTER!", type(software), software)
577 if str(software) == "":
578 logger.debug("software for domain='%s' was not detected, trying generator ...", domain)
579 software = fetch_generator_from_path(domain)
580 elif len(str(software)) > 0 and ("." in software or " " in software):
581 logger.debug("software='%s' may contain a version number, domain='%s', removing it ...", software, domain)
582 software = version.remove(software)
584 logger.debug("software[]='%s'", type(software))
585 if isinstance(software, str) and "powered by" in software:
586 logger.debug("software='%s' has 'powered by' in it", software)
587 software = version.remove(version.strip_powered_by(software))
589 logger.debug("software='%s' - EXIT!", software)
592 def find_domains(tag: bs4.element.Tag) -> list:
593 logger.debug("tag[]='%s' - CALLED!", type(tag))
594 if not isinstance(tag, bs4.element.Tag):
595 raise ValueError(f"Parameter tag[]='{type(tag)}' is not type of bs4.element.Tag")
596 elif len(tag.select("tr")) == 0:
597 raise KeyError("No table rows found in table!")
600 for element in tag.select("tr"):
601 logger.debug("element[]='%s'", type(element))
602 if not element.find("td"):
603 logger.debug("Skipping element, no <td> found")
606 domain = tidyup.domain(element.find("td").text)
607 reason = tidyup.reason(element.findAll("td")[1].text)
609 logger.debug("domain='%s',reason='%s'", domain, reason)
611 if not utils.is_domain_wanted(domain):
612 logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
614 elif domain == "gab.com/.ai, develop.gab.com":
615 logger.debug("Multiple domains detected in one row")
625 "domain": "develop.gab.com",
629 elif not validators.domain(domain.split("/")[0]):
630 logger.warning("domain='%s' is not a valid domain - SKIPPED!", domain)
633 logger.debug("Adding domain='%s',reason='%s' ...", domain, reason)
639 logger.debug("domains()=%d - EXIT!", len(domains))
642 def add_peers(rows: dict) -> list:
643 logger.debug("rows[]='%s' - CALLED!", type(rows))
644 if not isinstance(rows, dict):
645 raise ValueError(f"Parameter rows[]='{type(rows)}' is not of type 'dict'")
648 for key in ["linked", "allowed", "blocked"]:
649 logger.debug("Checking key='%s'", key)
650 if key not in rows or rows[key] is None:
651 logger.debug("Cannot find key='%s' or it is NoneType - SKIPPED!", key)
654 logger.debug("Adding %d peer(s) to peers list ...", len(rows[key]))
655 for peer in rows[key]:
656 logger.debug("peer[%s]='%s' - BEFORE!", type(peer), peer)
657 if peer is None or peer == "":
658 logger.debug("peer is empty - SKIPPED")
660 elif isinstance(peer, dict) and "domain" in peer:
661 logger.debug("peer[domain]='%s'", peer["domain"])
662 peer = tidyup.domain(peer["domain"])
663 elif isinstance(peer, str):
664 logger.debug("peer='%s'", peer)
665 peer = tidyup.domain(peer)
667 raise ValueError(f"peer[]='{type(peer)}' is not supported,key='{key}'")
669 logger.debug("peer[%s]='%s' - AFTER!", type(peer), peer)
670 if not utils.is_domain_wanted(peer):
671 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
674 logger.debug("Appending peer='%s' ...", peer)
677 logger.debug("peers()=%d - EXIT!", len(peers))