1 # Copyright (C) 2023 Free Software Foundation
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License as published
5 # by the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU Affero General Public License for more details.
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program. If not, see <https://www.gnu.org/licenses/>.
18 from urllib.parse import urlparse
24 from fba.helpers import blacklist
25 from fba.helpers import config
26 from fba.helpers import cookies
27 from fba.helpers import domain as domain_helper
28 from fba.helpers import software as software_helper
29 from fba.helpers import tidyup
30 from fba.helpers import version
32 from fba.http import csrf
33 from fba.http import network
34 from fba.http import nodeinfo
36 from fba.models import blocks
37 from fba.models import instances
39 from fba.networks import lemmy
40 from fba.networks import misskey
41 from fba.networks import peertube
43 # Depth counter, being raised and lowered
48 "/api/v1/instance/peers",
52 logging.basicConfig(level=logging.INFO)
53 logger = logging.getLogger(__name__)
55 def fetch_instances(domain: str, origin: str, software: str, command: str, path: str = None) -> None:
57 logger.debug("domain='%s',origin='%s',software='%s',command='%s',path='%s',_DEPTH=%d - CALLED!", domain, origin, software, command, path, _DEPTH)
58 domain_helper.raise_on(domain)
60 if blacklist.is_blacklisted(domain):
61 raise Exception(f"domain='{domain}' is blacklisted but function was invoked")
62 elif not isinstance(origin, str) and origin is not None:
63 raise ValueError(f"Parameter origin[]='{type(origin)}' is not of type 'str'")
64 elif not isinstance(command, str):
65 raise ValueError(f"Parameter command[]='{type(command)}' is not of type 'str'")
67 raise ValueError("Parameter 'command' is empty")
68 elif command in ["fetch_blocks", "fetch_cs", "fetch_bkali", "fetch_relays", "fetch_fedipact", "fetch_joinmobilizon", "fetch_joinmisskey", "fetch_joinfediverse", "fetch_relaylist"] and origin is None:
69 raise ValueError(f"Parameter command='{command}' but origin is None, please fix invoking this function.")
70 elif not isinstance(path, str) and path is not None:
71 raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'")
72 elif path is not None and not path.startswith("/"):
73 raise ValueError(f"path='{path}' does not start with a slash")
74 elif _DEPTH > 0 and instances.is_recent(domain, "last_instance_fetch"):
75 raise ValueError(f"domain='{domain}' has recently been fetched but function was invoked")
76 elif software is None and not instances.is_recent(domain, "last_instance_fetch"):
78 logger.debug("Software for domain='%s',path='%s' is not set, determining ...", domain, path)
79 software = determine_software(domain, path)
80 except network.exceptions as exception:
81 logger.warning("Exception '%s' during determining software type", type(exception))
82 instances.set_last_error(domain, exception)
84 logger.debug("Determined software='%s' for domain='%s'", software, domain)
85 elif software is None:
86 logger.debug("domain='%s' has unknown software or nodeinfo has recently being fetched", domain)
87 elif not isinstance(software, str):
88 raise ValueError(f"Parameter software[]='{type(software)}' is not of type 'str'")
93 logger.debug("Checking if domain='%s' is registered ...", domain)
94 if not instances.is_registered(domain):
95 logger.debug("Adding new domain='%s',origin='%s',command='%s',path='%s',software='%s'", domain, origin, command, path, software)
96 instances.add(domain, origin, command, path, software)
98 logger.debug("software='%s'", software)
99 if software is not None and software_helper.is_relay(software):
100 logger.debug("software='%s' is a relay software - EXIT!", software)
104 logger.debug("Updating last_instance_fetch for domain='%s' ...", domain)
105 instances.set_last_instance_fetch(domain)
108 logger.debug("software='%s'", software)
109 if software is not None:
111 logger.debug("Fetching instances for domain='%s',software='%s',origin='%s'", domain, software, origin)
112 peerlist = fetch_peers(domain, software, origin)
113 except network.exceptions as exception:
117 logger.debug("peerlist[]='%s'", type(peerlist))
118 if isinstance(peerlist, list):
119 logger.debug("Invoking instances.set_total_peerlist(%s,%d) ...", domain, len(peerlist))
120 instances.set_total_peers(domain, peerlist)
122 logger.debug("Invoking cookies.clear(%s) ...", domain)
123 cookies.clear(domain)
125 logger.debug("peerlist[]='%s'", type(peerlist))
127 logger.warning("Cannot fetch peers: domain='%s',software='%s'", domain, software)
128 if instances.has_pending(domain):
129 logger.debug("Flushing updates for domain='%s' ...", domain)
130 instances.update(domain)
133 logger.debug("EXIT!")
135 elif len(peerlist) == 0:
136 logger.info("domain='%s' returned an empty peer list.", domain)
137 if instances.has_pending(domain):
138 logger.debug("Flushing updates for domain='%s' ...", domain)
139 instances.update(domain)
142 logger.debug("domain='%s',software='%s' has an empty peer list returned - EXIT!", domain, software)
145 logger.info("Checking %d instance(s) from domain='%s',software='%s',depth=%d ...", len(peerlist), domain, software, _DEPTH)
146 for instance in peerlist:
147 logger.debug("instance[%s]='%s'", type(instance), instance)
148 if instance in [None, ""]:
149 logger.debug("instance[%s]='%s' is either None or empty - SKIPPED!", type(instance), instance)
151 elif isinstance(instance, dict) and "url" in instance:
152 logger.debug("Found instance[url]='%s', extracting domain/host name ...", instance["url"])
153 if not validators.url(instance["url"]):
154 logger.warning("instance[url]='%s' is not a valid URL - SKIPPED!", instance["url"])
157 components = urlparse(instance["url"])
158 logger.debug("components[%s]()=%d", type(components), len(components))
160 instance = components.netloc.lower().split(":")[0]
161 logger.debug("instance='%s'", instance)
163 logger.debug("instance='%s' - BEFORE!", instance)
164 instance = tidyup.domain(instance) if isinstance(instance, str) and instance != "" else None
165 logger.debug("instance='%s' - AFTER!", instance)
167 if instance in [None, ""]:
168 logger.debug("instance[%s]='%s' is empty after tidyup.domain(), domain='%s'", type(instance), instance, domain)
170 elif ".." in instance:
171 logger.warning("instance='%s' contains double-dot, removing ...", instance)
172 instance = instance.replace("..", ".")
174 probe = instance.split("/")[0]
175 logger.debug("instance='%s',probe='%s'", instance, probe)
176 if not validators.domain(probe, rfc_2782=True):
177 logger.warning("probe='%s' is not a valid domain - SKIPPED!", probe)
179 elif not domain_helper.is_tld_wanted(probe):
180 logger.debug("probe='%s' has an unwanted TLD - SKIPPED!", probe)
183 logger.debug("instance='%s' - BEFORE!", instance)
184 instance = domain_helper.encode_idna(instance.split("?")[0])
185 logger.debug("instance='%s' - AFTER!", instance)
187 if not domain_helper.is_wanted(instance):
188 logger.debug("instance='%s' is not wanted - SKIPPED!", instance)
190 elif instance.find("/profile/") > 0 or instance.find("/users/") > 0 or (instances.is_registered(instance.split("/")[0]) and instance.find("/c/") > 0):
191 logger.debug("instance='%s' is a link to a single user profile - SKIPPED!", instance)
193 elif instance.find("/tag/") > 0:
194 logger.debug("instance='%s' is a link to a tag - SKIPPED!", instance)
196 elif not instances.is_registered(instance):
197 logger.debug("Checking if domain='%s' has pending updates ...", domain)
198 if instances.has_pending(domain):
199 logger.debug("Flushing updates for domain='%s' ...", domain)
200 instances.update(domain)
202 logger.debug("instance='%s',origin='%s',_DEPTH=%d reached!", instance, origin, _DEPTH)
203 if _DEPTH <= config.get("max_crawl_depth") and len(peerlist) >= config.get("min_peers_length"):
204 logger.debug("Fetching instance='%s',origin='%s',command='%s',path='%s',_DEPTH=%d ...", instance, domain, command, path, _DEPTH)
205 fetch_instances(instance, domain, None, command, path)
207 logger.debug("Adding instance='%s',domain='%s',command='%s',_DEPTH=%d ...", instance, domain, command, _DEPTH)
208 instances.add(instance, domain, command)
210 logger.debug("Checking if domain='%s' has pending updates ...", domain)
211 if instances.has_pending(domain):
212 logger.debug("Flushing updates for domain='%s' ...", domain)
213 instances.update(domain)
216 logger.debug("EXIT!")
218 def fetch_peers(domain: str, software: str, origin: str) -> list:
219 logger.debug("domain='%s',software='%s',origin='%s' - CALLED!", domain, software, origin)
220 domain_helper.raise_on(domain)
222 if blacklist.is_blacklisted(domain):
223 raise Exception(f"domain='{domain}' is blacklisted but function was invoked")
224 elif not isinstance(software, str) and software is not None:
225 raise ValueError(f"Parameter software[]='{type(software)}' is not of type 'str'")
226 elif isinstance(software, str) and software == "":
227 raise ValueError("Parameter 'software' is empty")
228 elif software is not None and software_helper.is_relay(software):
229 raise ValueError(f"domain='{domain}' is of software='{software}' and isn't supported here.")
230 elif not isinstance(origin, str) and origin is not None:
231 raise ValueError(f"Parameter origin[]='{type(origin)}' is not of type 'str'")
232 elif isinstance(origin, str) and origin == "":
233 raise ValueError("Parameter 'origin' is empty")
235 if software == "misskey":
236 logger.debug("Invoking misskey.fetch_peers(%s) ...", domain)
237 return misskey.fetch_peers(domain)
238 elif software == "lemmy":
239 logger.debug("Invoking lemmy.fetch_peers(%s,%s) ...", domain, origin)
240 return lemmy.fetch_peers(domain, origin)
241 elif software == "peertube":
242 logger.debug("Invoking peertube.fetch_peers(%s) ...", domain)
243 return peertube.fetch_peers(domain)
245 # No CSRF by default, you don't have to add network.api_headers by yourself here
249 logger.debug("Checking CSRF for domain='%s'", domain)
250 headers = csrf.determine(domain, dict())
251 except network.exceptions as exception:
252 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__)
253 instances.set_last_error(domain, exception)
255 logger.debug("Returning empty list ... - EXIT!")
258 # Init peers variable
261 logger.debug("Checking %d API paths ...", len(_api_paths))
262 for path in _api_paths:
263 logger.debug("Fetching path='%s' from domain='%s',software='%s' ...", path, domain, software)
264 peers = network.fetch_json_rows(
270 logger.debug("peers()=%d", len(peers))
272 logger.debug("Marking domain='%s' as successfully handled ...", domain)
273 instances.set_success(domain)
276 if not isinstance(peers, list):
277 logger.warning("peers[]='%s' is not of type 'list', maybe bad API response?", type(peers))
280 logger.debug("Invoking instances.set_total_peers(%s,%d) ...", domain, len(peers))
281 instances.set_total_peers(domain, peers)
283 logger.debug("peers()=%d - EXIT!", len(peers))
286 def fetch_generator_from_path(domain: str, path: str = "/") -> str:
287 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
288 domain_helper.raise_on(domain)
290 if blacklist.is_blacklisted(domain):
291 raise Exception(f"domain='{domain}' is blacklisted but function was invoked")
292 elif not isinstance(path, str):
293 raise ValueError(f"path[]='{type(path)}' is not of type 'str'")
295 raise ValueError("Parameter 'path' is empty")
296 elif not path.startswith("/"):
297 raise ValueError(f"path='{path}' does not start with / but should")
301 logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
302 response = network.fetch_response(
305 headers=network.web_headers,
306 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
310 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
311 response_url = response.url.split("#")[0]
312 logger.debug("response_url='%s'", response_url)
314 if ((response.ok and response.status_code == 200) or response.status_code == 410) and response.text.find("<html") >= 0 and validators.url(response_url) and domain_helper.is_in_url(domain, response_url):
315 logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
316 doc = bs4.BeautifulSoup(response.text, "html.parser")
318 logger.debug("doc[]='%s'", type(doc))
319 platform = doc.find("meta", {"property": "og:platform"})
320 generator = doc.find("meta", {"name" : "generator"})
321 site_name = doc.find("meta", {"property": "og:site_name"})
322 app_name = doc.find("meta", {"name" : "application-name"})
324 logger.debug("generator[]='%s',site_name[]='%s',platform[]='%s',app_name[]='%s'", type(generator), type(site_name), type(platform), type(app_name))
325 if isinstance(platform, bs4.element.Tag) and isinstance(platform.get("content"), str) and platform.get("content") != "":
326 logger.debug("Found property=og:platform, domain='%s'", domain)
327 software = tidyup.domain(platform.get("content"))
328 logger.debug("software[%s]='%s' after tidyup.domain() ...", type(software), software)
330 if software is not None and software != "":
331 logger.debug("domain='%s' has og:platform='%s' - Setting detection_mode=PLATFORM ...", domain, software)
332 instances.set_detection_mode(domain, "PLATFORM")
333 elif isinstance(generator, bs4.element.Tag) and isinstance(generator.get("content"), str) and generator.get("content") != "":
334 logger.debug("Found generator meta tag: domain='%s'", domain)
335 software = tidyup.domain(generator.get("content"))
337 logger.debug("software[%s]='%s'", type(software), software)
338 if software is not None and software != "":
339 logger.info("domain='%s' is generated by software='%s' - Setting detection_mode=GENERATOR ...", domain, software)
340 instances.set_detection_mode(domain, "GENERATOR")
341 elif isinstance(app_name, bs4.element.Tag) and isinstance(app_name.get("content"), str) and app_name.get("content") != "":
342 logger.debug("Found property=og:app_name, domain='%s'", domain)
343 software = tidyup.domain(app_name.get("content"))
345 logger.debug("software[%s]='%s'", type(software), software)
346 if software is not None and software != "":
347 logger.debug("domain='%s' has application-name='%s' - Setting detection_mode=app_name ...", domain, software)
348 instances.set_detection_mode(domain, "APP_NAME")
349 elif isinstance(site_name, bs4.element.Tag) and isinstance(site_name.get("content"), str) and site_name.get("content") != "":
350 logger.debug("Found property=og:site_name, domain='%s'", domain)
351 software = tidyup.domain(site_name.get("content"))
353 logger.debug("software[%s]='%s'", type(software), software)
354 if software is not None and software != "":
355 logger.debug("domain='%s' has og:site_name='%s' - Setting detection_mode=SITE_NAME ...", domain, software)
356 instances.set_detection_mode(domain, "SITE_NAME")
357 elif not validators.url(response_url):
358 logger.warning("response_url='%s' is not valid - Raising exception ...", response_url)
360 message = f"Redirect from domain='{domain}' to response_url='{response_url}'"
361 instances.set_last_error(domain, message)
362 raise requests.exceptions.TooManyRedirects(message)
363 elif not domain_helper.is_in_url(domain, response_url):
364 logger.warning("domain='%s' doesn't match response.url='%s', maybe redirect to other domain?", domain, response.url)
366 components = urlparse(response.url)
367 domain2 = components.netloc.lower().split(":")[0]
369 logger.debug("domain2='%s'", domain2)
370 if not domain_helper.is_wanted(domain2):
371 logger.debug("domain2='%s' is not wanted - EXIT!", domain2)
373 elif not instances.is_registered(domain2):
374 logger.info("components.netloc='%s' is not registered, adding ...", components.netloc)
375 instances.add(domain2, domain, "redirect_target")
377 message = f"Redirect from domain='{domain}' to response.url='{response.url}'"
378 instances.set_last_error(domain, message)
379 instances.set_software(domain, None)
380 instances.set_detection_mode(domain, None)
381 instances.set_nodeinfo_url(domain, None)
383 raise requests.exceptions.TooManyRedirects(message)
385 logger.debug("software[]='%s'", type(software))
386 if isinstance(software, str) and software == "":
387 logger.debug("Corrected empty string to None for software of domain='%s'", domain)
389 elif isinstance(software, str) and ("." in software or " " in software):
390 logger.debug("software='%s' may contain a version number, domain='%s', removing it ...", software, domain)
391 software = version.remove(software)
393 logger.debug("software[%s]='%s'", type(software), software)
394 if isinstance(software, str) and "powered by " in software:
395 logger.debug("software='%s' has 'powered by' in it", software)
396 software = version.remove(software_helper.strip_powered_by(software))
397 elif isinstance(software, str) and " hosted on " in software:
398 logger.debug("software='%s' has 'hosted on' in it", software)
399 software = version.remove(software_helper.strip_hosted_on(software))
400 elif isinstance(software, str) and " by " in software:
401 logger.debug("software='%s' has ' by ' in it", software)
402 software = software_helper.strip_until(software, " by ")
403 elif isinstance(software, str) and " see " in software:
404 logger.debug("software='%s' has ' see ' in it", software)
405 software = software_helper.strip_until(software, " see ")
407 logger.debug("software[%s]='%s' - EXIT!", type(software), software)
410 def determine_software(domain: str, path: str = None) -> str:
411 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
412 domain_helper.raise_on(domain)
414 if blacklist.is_blacklisted(domain):
415 raise Exception(f"domain='{domain}' is blacklisted but function was invoked")
416 elif not isinstance(path, str) and path is not None:
417 raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'")
418 elif path is not None and not path.startswith("/"):
419 raise ValueError(f"path='{path}' does not start with a slash")
421 logger.debug("Fetching nodeinfo from domain='%s',path='%s' ...", domain, path)
422 data = nodeinfo.fetch(domain, path)
425 logger.debug("data[%s]='%s'", type(data), data)
426 if "exception" in data:
427 # Continue raising it
428 logger.debug("data()=%d contains exception='%s' - raising ...", len(data), type(data["exception"]))
429 raise data["exception"]
430 elif "error_message" in data:
431 logger.debug("Returned error_message during fetching nodeinfo: '%s',status_code=%d", data['error_message'], data['status_code'])
432 software = fetch_generator_from_path(domain)
433 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
435 logger.debug("domain='%s',path='%s',data[json] found ...", domain, path)
438 logger.debug("Auto-detection for domain='%s' was failing, fetching / ...", domain)
439 software = fetch_generator_from_path(domain)
440 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
442 if "status" in data and data["status"] == "error" and "message" in data:
443 logger.warning("JSON response is an error: '%s' - Resetting detection_mode,nodeinfo_url ...", data["message"])
444 instances.set_last_error(domain, data["message"])
445 instances.set_detection_mode(domain, None)
446 instances.set_nodeinfo_url(domain, None)
447 software = fetch_generator_from_path(domain)
448 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
449 elif "software" in data and "name" in data["software"]:
450 logger.debug("Found data[json][software][name] in JSON response")
451 software = data["software"]["name"]
452 logger.debug("software[%s]='%s' - FOUND!", type(software), software)
453 elif "message" in data:
454 logger.warning("JSON response contains only a message: '%s' - Resetting detection_mode,nodeinfo_url ...", data["message"])
455 instances.set_last_error(domain, data["message"])
456 instances.set_detection_mode(domain, None)
457 instances.set_nodeinfo_url(domain, None)
459 logger.debug("Invoking fetch_generator_from_path(%s) ...", domain)
460 software = fetch_generator_from_path(domain)
461 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
462 elif "server" in data and "software" in data["server"]:
463 logger.debug("Found data[server][software]='%s' for domain='%s'", data["server"]["software"].lower(), domain)
464 software = data["server"]["software"].lower()
465 logger.debug("Detected software for domain='%s' is: '%s'", domain, software)
466 elif "software" not in data or "name" not in data["software"]:
467 logger.debug("JSON response from domain='%s' does not include [software][name] - Resetting detection_mode,nodeinfo_url ...", domain)
468 instances.set_detection_mode(domain, None)
469 instances.set_nodeinfo_url(domain, None)
471 logger.debug("Invoking fetch_generator_from_path(%s) ...", domain)
472 software = fetch_generator_from_path(domain)
473 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
475 logger.debug("software[%s]='%s'", type(software), software)
476 if software in [None, ""]:
477 logger.debug("Returning None - EXIT!")
480 logger.debug("Setting original software='%s' for domain='%s' ...", software, domain)
481 instances.set_original_software(domain, software)
483 logger.debug("software='%s'- BEFORE!", software)
484 software = software_helper.alias(software)
485 logger.debug("software['%s']='%s' - AFTER!", type(software), software)
487 if software in [None, ""]:
488 logger.debug("software for domain='%s' was not detected, trying generator ...", domain)
489 software = fetch_generator_from_path(domain)
490 elif len(str(software)) > 0 and ("." in software or " " in software):
491 logger.debug("software='%s' may contain a version number, domain='%s', removing it ...", software, domain)
492 software = version.remove(software)
494 logger.debug("software[]='%s'", type(software))
495 if isinstance(software, str) and "powered by" in software:
496 logger.debug("software='%s' has 'powered by' in it", software)
497 software = version.remove(software_helper.strip_powered_by(software))
499 software = software.strip()
501 logger.debug("software[%s]='%s' - EXIT!", type(software), software)
504 def find_domains(tag: bs4.element.Tag, domainColumn: str = "dt", reasonColumn: str = "dd", reasonText: str = "Categories:") -> list:
505 logger.debug("tag[]='%s',domainColumn='%s',reasonColumn='%s',reasonText='%s' - CALLED!", type(tag), domainColumn, reasonColumn, reasonText)
507 if not isinstance(tag, bs4.element.Tag):
508 raise ValueError(f"Parameter tag[]='{type(tag)}' is not type of bs4.element.Tag")
509 elif not isinstance(domainColumn, str):
510 raise ValueError(f"Parameter domainColumn[]='{type(domainColumn)}' is not type of 'str'")
511 elif domainColumn == "":
512 raise ValueError("Parameter 'domainColumn' is an empty string")
513 elif not isinstance(reasonColumn, str):
514 raise ValueError(f"Parameter reasonColumn[]='{type(reasonColumn)}' is not type of 'str'")
515 elif reasonColumn == "":
516 raise ValueError("Parameter 'reasonColumn' is an empty string")
517 elif len(tag.find_all(domainColumn)) == 0:
518 raise KeyError("No domainColumn='{domainColumn}' rows found in table!")
519 elif len(tag.find_all(reasonColumn)) == 0:
520 raise KeyError("No reasonColumn='{reasonColumn}' rows found in table!")
521 elif not isinstance(reasonText, str):
522 raise ValueError(f"Parameter reasonText[]='{type(reasonText)}' is not type of 'str'")
523 elif reasonText == "":
524 raise ValueError("Parameter 'reasonText' is an empty string")
527 for element in tag.find_all(domainColumn):
528 logger.debug("element[%s]='%s'", type(element), element)
529 domain = tidyup.domain(element.text)
530 reasons = element.find_next(reasonColumn).text.split(reasonText)[1].splitlines()
532 logger.debug("reasons(%d)='%s'", len(reasons), reasons)
535 logger.debug("r[%s]='%s'", type(r), r)
540 reason = tidyup.reason(reason)
541 logger.debug("domain='%s',reason='%s'", domain, reason)
543 if not domain_helper.is_wanted(domain):
544 logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
546 elif domain == "gab.com/.ai, develop.gab.com":
547 logger.debug("Multiple gab.com domains detected in one row")
557 "domain": "develop.gab.com",
561 elif not validators.domain(domain.split("/")[0], rfc_2782=True):
562 logger.warning("domain='%s' is not a valid domain - SKIPPED!", domain)
565 logger.debug("Adding domain='%s',reason='%s' ...", domain, reason)
571 logger.debug("domains()=%d - EXIT!", len(domains))
574 def add_peers(rows: dict) -> list:
575 logger.debug("rows[]='%s' - CALLED!", type(rows))
577 if not isinstance(rows, dict):
578 raise ValueError(f"Parameter rows[]='{type(rows)}' is not of type 'dict'")
580 raise ValueError("Parameter 'rows' is empty")
585 for key in ["linked", "allowed", "blocked"]:
586 logger.debug("key='%s'", key)
587 if key not in rows or rows[key] is None:
588 logger.debug("Cannot find key='%s' or it is NoneType - SKIPPED!", key)
591 logger.debug("Adding %d peer(s) to peers list ...", len(rows[key]))
592 for peer in rows[key]:
593 logger.debug("peer[%s]='%s' - BEFORE!", type(peer), peer)
594 if peer in [None, ""]:
595 logger.debug("peer is empty - SKIPPED!")
597 elif isinstance(peer, dict) and "domain" in peer:
598 logger.debug("peer[domain]='%s'", peer["domain"])
599 peer = tidyup.domain(peer["domain"]) if peer["domain"] != "" else None
600 elif isinstance(peer, str):
601 logger.debug("peer='%s'", peer)
602 peer = tidyup.domain(peer)
604 raise ValueError(f"peer[]='{type(peer)}' is not supported,key='{key}'")
606 logger.debug("peer[%s]='%s' - AFTER!", type(peer), peer)
607 if not domain_helper.is_wanted(peer):
608 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
611 logger.debug("Appending peer='%s' ...", peer)
614 logger.debug("peers()=%d - EXIT!", len(peers))
617 def fetch_blocks(domain: str) -> list:
618 logger.debug("domain='%s' - CALLED!", domain)
619 domain_helper.raise_on(domain)
621 if blacklist.is_blacklisted(domain):
622 raise Exception(f"domain='{domain}' is blacklisted but function was invoked")
623 elif not instances.is_registered(domain):
624 raise Exception(f"domain='{domain}' is not registered but function is invoked.")
629 # No CSRF by default, you don't have to add network.api_headers by yourself here
633 logger.debug("Checking CSRF for domain='%s'", domain)
634 headers = csrf.determine(domain, dict())
635 except network.exceptions as exception:
636 logger.warning("Exception '%s' during checking CSRF (fetch_blocks,%s)", type(exception), __name__)
637 instances.set_last_error(domain, exception)
639 logger.debug("Returning empty list ... - EXIT!")
643 # json endpoint for newer mastodongs
644 logger.info("Fetching domain_blocks from domain='%s' ...", domain)
645 rows = network.fetch_json_rows(
647 "/api/v1/instance/domain_blocks",
651 logger.debug("Marking domain='%s' as successfully handled ...", domain)
652 instances.set_success(domain)
654 logger.debug("rows()=%d", len(rows))
656 logger.debug("Checking %d entries from domain='%s' ...", len(rows), domain)
659 logger.debug("block[]='%s'", type(block))
660 if not isinstance(block, dict):
661 logger.debug("block[]='%s' is of type 'dict' - SKIPPED!", type(block))
663 elif "domain" not in block:
664 logger.warning("block()=%d does not contain element 'domain' - SKIPPED!", len(block))
666 elif "severity" not in block:
667 logger.warning("block()=%d does not contain element 'severity' - SKIPPED!", len(block))
669 elif block["severity"] in ["accept", "accepted"]:
670 logger.debug("block[domain]='%s' has unwanted severity level '%s' - SKIPPED!", block["domain"], block["severity"])
672 elif "digest" in block and not validators.hashes.sha256(block["digest"]):
673 logger.warning("block[domain]='%s' has invalid block[digest]='%s' - SKIPPED!", block["domain"], block["digest"])
676 reason = tidyup.reason(block["comment"]) if "comment" in block and block["comment"] is not None and block["comment"] != "" else None
678 logger.debug("Appending blocker='%s',blocked='%s',reason='%s',block_level='%s' ...", domain, block["domain"], reason, block["severity"])
681 "blocked" : block["domain"],
682 "digest" : block["digest"] if "digest" in block else None,
684 "block_level": blocks.alias_block_level(block["severity"]),
687 logger.debug("domain='%s' has no block list", domain)
689 except network.exceptions as exception:
690 logger.warning("domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
691 instances.set_last_error(domain, exception)
693 logger.debug("blocklist()=%d - EXIT!", len(blocklist))