1 # Copyright (C) 2023 Free Software Foundation
3 # This program is free software: you can redistribute it and/or modify
4 # it under the terms of the GNU Affero General Public License as published
5 # by the Free Software Foundation, either version 3 of the License, or
6 # (at your option) any later version.
8 # This program is distributed in the hope that it will be useful,
9 # but WITHOUT ANY WARRANTY; without even the implied warranty of
10 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 # GNU Affero General Public License for more details.
13 # You should have received a copy of the GNU Affero General Public License
14 # along with this program. If not, see <https://www.gnu.org/licenses/>.
18 from urllib.parse import urlparse
24 from fba.helpers import blacklist
25 from fba.helpers import config
26 from fba.helpers import cookies
27 from fba.helpers import domain as domain_helper
28 from fba.helpers import software as software_helper
29 from fba.helpers import tidyup
30 from fba.helpers import version
32 from fba.http import csrf
33 from fba.http import network
34 from fba.http import nodeinfo
36 from fba.models import blocks
37 from fba.models import instances
39 from fba.networks import lemmy
40 from fba.networks import misskey
41 from fba.networks import peertube
43 # Depth counter, being raised and lowered
48 "/api/v1/instance/peers",
52 logging.basicConfig(level=logging.INFO)
53 logger = logging.getLogger(__name__)
55 def fetch_instances(domain: str, origin: str, software: str, command: str, path: str = None):
57 logger.debug("domain='%s',origin='%s',software='%s',command='%s',path='%s',_DEPTH=%d - CALLED!", domain, origin, software, command, path, _DEPTH)
58 domain_helper.raise_on(domain)
60 if blacklist.is_blacklisted(domain):
61 raise Exception(f"domain='{domain}' is blacklisted but function was invoked")
62 elif not isinstance(origin, str) and origin is not None:
63 raise ValueError(f"Parameter origin[]='{type(origin)}' is not of type 'str'")
64 elif not isinstance(command, str):
65 raise ValueError(f"Parameter command[]='{type(command)}' is not of type 'str'")
67 raise ValueError("Parameter 'command' is empty")
68 elif command in ["fetch_blocks", "fetch_cs", "fetch_bkali", "fetch_relays", "fetch_fedipact", "fetch_joinmobilizon", "fetch_joinmisskey", "fetch_joinfediverse", "fetch_relaylist"] and origin is None:
69 raise ValueError(f"Parameter command='{command}' but origin is None, please fix invoking this function.")
70 elif not isinstance(path, str) and path is not None:
71 raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'")
72 elif path is not None and not path.startswith("/"):
73 raise ValueError(f"path='{path}' does not start with a slash")
74 elif _DEPTH > 0 and instances.is_recent(domain, "last_instance_fetch"):
75 raise ValueError(f"domain='{domain}' has recently been fetched but function was invoked")
76 elif software is None and not instances.is_recent(domain, "last_nodeinfo"):
78 logger.debug("Software for domain='%s',path='%s' is not set, determining ...", domain, path)
79 software = determine_software(domain, path)
80 except network.exceptions as exception:
81 logger.warning("Exception '%s' during determining software type", type(exception))
82 instances.set_last_error(domain, exception)
84 logger.debug("Determined software='%s' for domain='%s'", software, domain)
85 elif software is None:
86 logger.debug("domain='%s' has unknown software or nodeinfo has recently being fetched", domain)
87 elif not isinstance(software, str):
88 raise ValueError(f"Parameter software[]='{type(software)}' is not of type 'str'")
93 logger.debug("Checking if domain='%s' is registered ...", domain)
94 if not instances.is_registered(domain):
95 logger.debug("Adding new domain='%s',origin='%s',command='%s',path='%s',software='%s'", domain, origin, command, path, software)
96 instances.add(domain, origin, command, path, software)
98 logger.debug("software='%s'", software)
99 if software is not None and software_helper.is_relay(software):
100 logger.debug("software='%s' is a relay software - EXIT!", software)
104 logger.debug("Updating last_instance_fetch for domain='%s' ...", domain)
105 instances.set_last_instance_fetch(domain)
108 logger.debug("software='%s'", software)
109 if software is not None:
110 logger.debug("Fetching instances for domain='%s',software='%s',origin='%s'", domain, software, origin)
111 peerlist = fetch_peers(domain, software, origin)
113 logger.debug("peerlist[]='%s'", type(peerlist))
114 if isinstance(peerlist, list):
115 logger.debug("Invoking instances.set_total_peerlist(%s,%d) ...", domain, len(peerlist))
116 instances.set_total_peers(domain, peerlist)
118 logger.debug("Invoking cookies.clear(%s) ...", domain)
119 cookies.clear(domain)
121 logger.debug("peerlist[]='%s'", type(peerlist))
123 logger.warning("Cannot fetch peers: domain='%s',software='%s'", domain, software)
124 if instances.has_pending(domain):
125 logger.debug("Flushing updates for domain='%s' ...", domain)
126 instances.update(domain)
129 logger.debug("EXIT!")
131 elif len(peerlist) == 0:
132 logger.info("domain='%s' returned an empty peer list.", domain)
133 if instances.has_pending(domain):
134 logger.debug("Flushing updates for domain='%s' ...", domain)
135 instances.update(domain)
138 logger.debug("domain='%s',software='%s' has an empty peer list returned - EXIT!", domain, software)
141 logger.info("Checking %d instance(s) from domain='%s',software='%s',depth=%d ...", len(peerlist), domain, software, _DEPTH)
142 for instance in peerlist:
143 logger.debug("instance[%s]='%s'", type(instance), instance)
144 if instance in [None, ""]:
145 logger.debug("instance[%s]='%s' is either None or empty - SKIPPED!", type(instance), instance)
148 logger.debug("instance='%s' - BEFORE!", instance)
149 instance = tidyup.domain(instance) if isinstance(instance, str) and instance != "" else None
150 logger.debug("instance='%s' - AFTER!", instance)
152 if instance in [None, ""]:
153 logger.warning("instance='%s' is empty after tidyup.domain(), domain='%s'", instance, domain)
155 elif ".." in instance:
156 logger.warning("instance='%s' contains double-dot, removing ...", instance)
157 instance = instance.replace("..", ".")
159 logger.debug("instance='%s' - BEFORE!", instance)
160 instance = instance.encode("idna").decode("utf-8")
161 logger.debug("instance='%s' - AFTER!", instance)
163 if not domain_helper.is_wanted(instance):
164 logger.debug("instance='%s' is not wanted - SKIPPED!", instance)
166 elif instance.find("/profile/") > 0 or instance.find("/users/") > 0 or (instances.is_registered(instance.split("/")[0]) and instance.find("/c/") > 0):
167 logger.debug("instance='%s' is a link to a single user profile - SKIPPED!", instance)
169 elif instance.find("/tag/") > 0:
170 logger.debug("instance='%s' is a link to a tag - SKIPPED!", instance)
172 elif not instances.is_registered(instance):
173 logger.debug("Checking if domain='%s' has pending updates ...", domain)
174 if instances.has_pending(domain):
175 logger.debug("Flushing updates for domain='%s' ...", domain)
176 instances.update(domain)
178 logger.debug("instance='%s',origin='%s',_DEPTH=%d reached!", instance, origin, _DEPTH)
179 if _DEPTH <= config.get("max_crawl_depth") and len(peerlist) >= config.get("min_peers_length"):
180 logger.debug("Fetching instance='%s',origin='%s',command='%s',path='%s',_DEPTH=%d ...", instance, domain, command, path, _DEPTH)
181 fetch_instances(instance, domain, None, command, path)
183 logger.debug("Adding instance='%s',domain='%s',command='%s',_DEPTH=%d ...", instance, domain, command, _DEPTH)
184 instances.add(instance, domain, command)
186 logger.debug("Checking if domain='%s' has pending updates ...", domain)
187 if instances.has_pending(domain):
188 logger.debug("Flushing updates for domain='%s' ...", domain)
189 instances.update(domain)
192 logger.debug("EXIT!")
194 def fetch_peers(domain: str, software: str, origin: str) -> list:
195 logger.debug("domain='%s',software='%s',origin='%s' - CALLED!", domain, software, origin)
196 domain_helper.raise_on(domain)
198 if blacklist.is_blacklisted(domain):
199 raise Exception(f"domain='{domain}' is blacklisted but function was invoked")
200 elif not isinstance(software, str) and software is not None:
201 raise ValueError(f"Parameter software[]='{type(software)}' is not of type 'str'")
202 elif isinstance(software, str) and software == "":
203 raise ValueError("Parameter 'software' is empty")
204 elif software is not None and software_helper.is_relay(software):
205 raise ValueError(f"domain='{domain}' is of software='{software}' and isn't supported here.")
206 elif not isinstance(origin, str) and origin is not None:
207 raise ValueError(f"Parameter origin[]='{type(origin)}' is not of type 'str'")
208 elif isinstance(origin, str) and origin == "":
209 raise ValueError("Parameter 'origin' is empty")
211 if software == "misskey":
212 logger.debug("Invoking misskey.fetch_peers(%s) ...", domain)
213 return misskey.fetch_peers(domain)
214 elif software == "lemmy":
215 logger.debug("Invoking lemmy.fetch_peers(%s,%s) ...", domain, origin)
216 return lemmy.fetch_peers(domain, origin)
217 elif software == "peertube":
218 logger.debug("Invoking peertube.fetch_peers(%s) ...", domain)
219 return peertube.fetch_peers(domain)
221 # No CSRF by default, you don't have to add network.api_headers by yourself here
225 logger.debug("Checking CSRF for domain='%s'", domain)
226 headers = csrf.determine(domain, dict())
227 except network.exceptions as exception:
228 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__)
229 instances.set_last_error(domain, exception)
231 logger.debug("Returning empty list ... - EXIT!")
234 # Init peers variable
237 logger.debug("Checking %d API paths ...", len(_api_paths))
238 for path in _api_paths:
239 logger.debug("Fetching path='%s' from domain='%s',software='%s' ...", path, domain, software)
240 data = network.get_json_api(
244 timeout=(config.get("connection_timeout"), config.get("read_timeout"))
247 logger.debug("data(%d)[]='%s'", len(data), type(data))
248 if "error_message" in data:
249 logger.debug("Was not able to fetch peers from path='%s',domain='%s' ...", path, domain)
250 instances.set_last_error(domain, data)
251 elif "json" in data and len(data["json"]) > 0:
252 logger.debug("Querying API path='%s' was successful: domain='%s',data[json][%s]()=%d", path, domain, type(data['json']), len(data['json']))
255 logger.debug("Marking domain='%s' as successfully handled ...", domain)
256 instances.set_success(domain)
259 if not isinstance(peers, list):
260 logger.warning("peers[]='%s' is not of type 'list', maybe bad API response?", type(peers))
263 logger.debug("Invoking instances.set_total_peers(%s,%d) ...", domain, len(peers))
264 instances.set_total_peers(domain, peers)
266 logger.debug("peers()=%d - EXIT!", len(peers))
269 def fetch_generator_from_path(domain: str, path: str = "/") -> str:
270 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
271 domain_helper.raise_on(domain)
273 if blacklist.is_blacklisted(domain):
274 raise Exception(f"domain='{domain}' is blacklisted but function was invoked")
275 elif not isinstance(path, str):
276 raise ValueError(f"path[]='{type(path)}' is not of type 'str'")
278 raise ValueError("Parameter 'path' is empty")
279 elif not path.startswith("/"):
280 raise ValueError(f"path='{path}' does not start with / but should")
284 logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
285 response = network.fetch_response(
288 headers=network.web_headers,
289 timeout=(config.get("connection_timeout"), config.get("read_timeout")),
293 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
294 if ((response.ok and response.status_code == 200) or response.status_code == 410) and response.text.find("<html") > 0 and domain_helper.is_in_url(domain, response.url):
295 logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
296 doc = bs4.BeautifulSoup(response.text, "html.parser")
298 logger.debug("doc[]='%s'", type(doc))
299 platform = doc.find("meta", {"property": "og:platform"})
300 generator = doc.find("meta", {"name" : "generator"})
301 site_name = doc.find("meta", {"property": "og:site_name"})
302 app_name = doc.find("meta", {"name" : "application-name"})
304 logger.debug("generator[]='%s',site_name[]='%s',platform[]='%s',app_name[]='%s'", type(generator), type(site_name), type(platform), type(app_name))
305 if isinstance(platform, bs4.element.Tag) and isinstance(platform.get("content"), str) and platform.get("content") != "":
306 logger.debug("Found property=og:platform, domain='%s'", domain)
307 software = tidyup.domain(platform.get("content"))
308 logger.debug("software[%s]='%s' after tidyup.domain() ...", type(software), software)
310 if software is not None and software != "":
311 logger.debug("domain='%s' has og:platform='%s' - Setting detection_mode=PLATFORM ...", domain, software)
312 instances.set_detection_mode(domain, "PLATFORM")
313 elif isinstance(generator, bs4.element.Tag) and isinstance(generator.get("content"), str) and generator.get("content") != "":
314 logger.debug("Found generator meta tag: domain='%s'", domain)
315 software = tidyup.domain(generator.get("content"))
317 logger.debug("software[%s]='%s'", type(software), software)
318 if software is not None and software != "":
319 logger.info("domain='%s' is generated by software='%s' - Setting detection_mode=GENERATOR ...", domain, software)
320 instances.set_detection_mode(domain, "GENERATOR")
321 elif isinstance(app_name, bs4.element.Tag) and isinstance(app_name.get("content"), str) and app_name.get("content") != "":
322 logger.debug("Found property=og:app_name, domain='%s'", domain)
323 software = tidyup.domain(app_name.get("content"))
325 logger.debug("software[%s]='%s'", type(software), software)
326 if software is not None and software != "":
327 logger.debug("domain='%s' has application-name='%s' - Setting detection_mode=app_name ...", domain, software)
328 instances.set_detection_mode(domain, "APP_NAME")
329 elif isinstance(site_name, bs4.element.Tag) and isinstance(site_name.get("content"), str) and site_name.get("content") != "":
330 logger.debug("Found property=og:site_name, domain='%s'", domain)
331 software = tidyup.domain(site_name.get("content"))
333 logger.debug("software[%s]='%s'", type(software), software)
334 if software is not None and software != "":
335 logger.debug("domain='%s' has og:site_name='%s' - Setting detection_mode=SITE_NAME ...", domain, software)
336 instances.set_detection_mode(domain, "SITE_NAME")
337 elif not domain_helper.is_in_url(domain, response.url):
338 logger.warning("domain='%s' doesn't match response.url='%s', maybe redirect to other domain?", domain, response.url)
340 components = urlparse(response.url)
341 domain2 = components.netloc.lower().split(":")[0]
343 logger.debug("domain2='%s'", domain2)
344 if not domain_helper.is_wanted(domain2):
345 logger.debug("domain2='%s' is not wanted - EXIT!", domain2)
347 elif not instances.is_registered(domain2):
348 logger.info("components.netloc='%s' is not registered, adding ...", components.netloc)
349 instances.add(domain2, domain, "redirect_target")
351 message = f"Redirect from domain='{domain}' to response.url='{response.url}'"
352 instances.set_last_error(domain, message)
353 instances.set_software(domain, None)
354 instances.set_detection_mode(domain, None)
355 instances.set_nodeinfo_url(domain, None)
357 raise requests.exceptions.TooManyRedirects(message)
359 logger.debug("software[]='%s'", type(software))
360 if isinstance(software, str) and software == "":
361 logger.debug("Corrected empty string to None for software of domain='%s'", domain)
363 elif isinstance(software, str) and ("." in software or " " in software):
364 logger.debug("software='%s' may contain a version number, domain='%s', removing it ...", software, domain)
365 software = version.remove(software)
367 logger.debug("software[%s]='%s'", type(software), software)
368 if isinstance(software, str) and "powered by " in software:
369 logger.debug("software='%s' has 'powered by' in it", software)
370 software = version.remove(software_helper.strip_powered_by(software))
371 elif isinstance(software, str) and " hosted on " in software:
372 logger.debug("software='%s' has 'hosted on' in it", software)
373 software = version.remove(software_helper.strip_hosted_on(software))
374 elif isinstance(software, str) and " by " in software:
375 logger.debug("software='%s' has ' by ' in it", software)
376 software = software_helper.strip_until(software, " by ")
377 elif isinstance(software, str) and " see " in software:
378 logger.debug("software='%s' has ' see ' in it", software)
379 software = software_helper.strip_until(software, " see ")
381 logger.debug("software[%s]='%s' - EXIT!", type(software), software)
384 def determine_software(domain: str, path: str = None) -> str:
385 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
386 domain_helper.raise_on(domain)
388 if blacklist.is_blacklisted(domain):
389 raise Exception(f"domain='{domain}' is blacklisted but function was invoked")
390 elif not isinstance(path, str) and path is not None:
391 raise ValueError(f"Parameter path[]='{type(path)}' is not of type 'str'")
392 elif path is not None and not path.startswith("/"):
393 raise ValueError(f"path='{path}' does not start with a slash")
395 logger.debug("Fetching nodeinfo from domain='%s',path='%s' ...", domain, path)
396 data = nodeinfo.fetch(domain, path)
399 logger.debug("data[%s]='%s'", type(data), data)
400 if "exception" in data:
401 # Continue raising it
402 logger.debug("data()=%d contains exception='%s' - raising ...", len(data), type(data["exception"]))
403 raise data["exception"]
404 elif "error_message" in data:
405 logger.debug("Returned error_message during fetching nodeinfo: '%s',status_code=%d", data['error_message'], data['status_code'])
406 software = fetch_generator_from_path(domain)
407 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
409 logger.debug("domain='%s',path='%s',data[json] found ...", domain, path)
412 logger.debug("Auto-detection for domain='%s' was failing, fetching / ...", domain)
413 software = fetch_generator_from_path(domain)
414 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
416 if "status" in data and data["status"] == "error" and "message" in data:
417 logger.warning("JSON response is an error: '%s' - Resetting detection_mode,nodeinfo_url ...", data["message"])
418 instances.set_last_error(domain, data["message"])
419 instances.set_detection_mode(domain, None)
420 instances.set_nodeinfo_url(domain, None)
421 software = fetch_generator_from_path(domain)
422 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
423 elif "software" in data and "name" in data["software"]:
424 logger.debug("Found data[json][software][name] in JSON response")
425 software = data["software"]["name"]
426 logger.debug("software[%s]='%s' - FOUND!", type(software), software)
427 elif "message" in data:
428 logger.warning("JSON response contains only a message: '%s' - Resetting detection_mode,nodeinfo_url ...", data["message"])
429 instances.set_last_error(domain, data["message"])
430 instances.set_detection_mode(domain, None)
431 instances.set_nodeinfo_url(domain, None)
433 logger.debug("Invoking fetch_generator_from_path(%s) ...", domain)
434 software = fetch_generator_from_path(domain)
435 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
436 elif "server" in data and "software" in data["server"]:
437 logger.debug("Found data[server][software]='%s' for domain='%s'", data["server"]["software"].lower(), domain)
438 software = data["server"]["software"].lower()
439 logger.debug("Detected software for domain='%s' is: '%s'", domain, software)
440 elif "software" not in data or "name" not in data["software"]:
441 logger.debug("JSON response from domain='%s' does not include [software][name] - Resetting detection_mode,nodeinfo_url ...", domain)
442 instances.set_detection_mode(domain, None)
443 instances.set_nodeinfo_url(domain, None)
445 logger.debug("Invoking fetch_generator_from_path(%s) ...", domain)
446 software = fetch_generator_from_path(domain)
447 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
449 logger.debug("software[%s]='%s'", type(software), software)
450 if software in [None, ""]:
451 logger.debug("Returning None - EXIT!")
454 logger.debug("Setting original software='%s' ...", software)
455 instances.set_original_software(domain, software)
457 logger.debug("software='%s'- BEFORE!", software)
458 software = software_helper.alias(software)
459 logger.debug("software['%s']='%s' - AFTER!", type(software), software)
461 if str(software) == "":
462 logger.debug("software for domain='%s' was not detected, trying generator ...", domain)
463 software = fetch_generator_from_path(domain)
464 elif len(str(software)) > 0 and ("." in software or " " in software):
465 logger.debug("software='%s' may contain a version number, domain='%s', removing it ...", software, domain)
466 software = version.remove(software)
468 logger.debug("software[]='%s'", type(software))
469 if isinstance(software, str) and "powered by" in software:
470 logger.debug("software='%s' has 'powered by' in it", software)
471 software = version.remove(software_helper.strip_powered_by(software))
473 software = software.strip()
475 logger.debug("software[%s]='%s' - EXIT!", type(software), software)
478 def find_domains(tag: bs4.element.Tag) -> list:
479 logger.debug("tag[]='%s' - CALLED!", type(tag))
480 if not isinstance(tag, bs4.element.Tag):
481 raise ValueError(f"Parameter tag[]='{type(tag)}' is not type of bs4.element.Tag")
482 elif len(tag.select("tr")) == 0:
483 raise KeyError("No table rows found in table!")
486 for element in tag.select("tr"):
487 logger.debug("element[]='%s'", type(element))
488 if not element.find("td"):
489 logger.debug("Skipping element, no <td> found")
492 domain = tidyup.domain(element.find("td").text)
493 reason = tidyup.reason(element.findAll("td")[1].text)
495 logger.debug("domain='%s',reason='%s'", domain, reason)
497 if not domain_helper.is_wanted(domain):
498 logger.debug("domain='%s' is blacklisted - SKIPPED!", domain)
500 elif domain == "gab.com/.ai, develop.gab.com":
501 logger.debug("Multiple gab.com domains detected in one row")
511 "domain": "develop.gab.com",
515 elif not validators.domain(domain.split("/")[0]):
516 logger.warning("domain='%s' is not a valid domain - SKIPPED!", domain)
519 logger.debug("Adding domain='%s',reason='%s' ...", domain, reason)
525 logger.debug("domains()=%d - EXIT!", len(domains))
528 def add_peers(rows: dict) -> list:
529 logger.debug("rows[]='%s' - CALLED!", type(rows))
530 if not isinstance(rows, dict):
531 raise ValueError(f"Parameter rows[]='{type(rows)}' is not of type 'dict'")
534 for key in ["linked", "allowed", "blocked"]:
535 logger.debug("key='%s'", key)
536 if key not in rows or rows[key] is None:
537 logger.debug("Cannot find key='%s' or it is NoneType - SKIPPED!", key)
540 logger.debug("Adding %d peer(s) to peers list ...", len(rows[key]))
541 for peer in rows[key]:
542 logger.debug("peer[%s]='%s' - BEFORE!", type(peer), peer)
543 if peer in [None, ""]:
544 logger.debug("peer is empty - SKIPPED")
546 elif isinstance(peer, dict) and "domain" in peer:
547 logger.debug("peer[domain]='%s'", peer["domain"])
548 peer = tidyup.domain(peer["domain"])
549 elif isinstance(peer, str):
550 logger.debug("peer='%s'", peer)
551 peer = tidyup.domain(peer)
553 raise ValueError(f"peer[]='{type(peer)}' is not supported,key='{key}'")
555 logger.debug("peer[%s]='%s' - AFTER!", type(peer), peer)
556 if not domain_helper.is_wanted(peer):
557 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
560 logger.debug("Appending peer='%s' ...", peer)
563 logger.debug("peers()=%d - EXIT!", len(peers))
566 def fetch_blocks(domain: str) -> list:
567 logger.debug("domain='%s' - CALLED!", domain)
568 domain_helper.raise_on(domain)
570 if not instances.is_registered(domain):
571 raise Exception(f"domain='{domain}' is not registered but function is invoked.")
572 elif blacklist.is_blacklisted(domain):
573 raise Exception(f"domain='{domain}' is blacklisted but function was invoked")
578 # No CSRF by default, you don't have to add network.api_headers by yourself here
582 logger.debug("Checking CSRF for domain='%s'", domain)
583 headers = csrf.determine(domain, dict())
584 except network.exceptions as exception:
585 logger.warning("Exception '%s' during checking CSRF (fetch_blocks,%s)", type(exception), __name__)
586 instances.set_last_error(domain, exception)
588 logger.debug("Returning empty list ... - EXIT!")
592 # json endpoint for newer mastodongs
593 logger.info("Fetching domain_blocks from domain='%s' ...", domain)
594 data = network.get_json_api(
596 "/api/v1/instance/domain_blocks",
598 timeout=(config.get("connection_timeout"), config.get("read_timeout"))
602 logger.debug("data(%d)[]='%s'", len(data), type(data))
603 if "error_message" in data:
604 logger.debug("Was not able to fetch domain_blocks from domain='%s': status_code=%d,error_message='%s'", domain, data['status_code'], data['error_message'])
605 instances.set_last_error(domain, data)
607 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
609 elif "json" in data and "error" in data["json"]:
610 logger.warning("JSON API returned error message: '%s'", data["json"]["error"])
611 instances.set_last_error(domain, data)
613 logger.debug("blocklist()=%d - EXIT!", len(blocklist))
619 logger.debug("Marking domain='%s' as successfully handled ...", domain)
620 instances.set_success(domain)
622 logger.debug("rows(%d)[]='%s'", len(rows), type(rows))
624 logger.debug("Checking %d entries from domain='%s' ...", len(rows), domain)
627 logger.debug("block[]='%s'", type(block))
628 if not isinstance(block, dict):
629 logger.debug("block[]='%s' is of type 'dict' - SKIPPED!", type(block))
631 elif "domain" not in block:
632 logger.warning("block()=%d does not contain element 'domain' - SKIPPED!", len(block))
634 elif "severity" not in block:
635 logger.warning("block()=%d does not contain element 'severity' - SKIPPED!", len(block))
637 elif block["severity"] in ["accept", "accepted"]:
638 logger.debug("block[domain]='%s' has unwanted severity level '%s' - SKIPPED!", block["domain"], block["severity"])
640 elif "digest" in block and not validators.hashes.sha256(block["digest"]):
641 logger.warning("block[domain]='%s' has invalid block[digest]='%s' - SKIPPED!", block["domain"], block["digest"])
644 reason = tidyup.reason(block["comment"]) if "comment" in block and block["comment"] is not None and block["comment"] != "" else None
646 logger.debug("Appending blocker='%s',blocked='%s',reason='%s',block_level='%s' ...", domain, block["domain"], reason, block["severity"])
649 "blocked" : block["domain"],
650 "digest" : block["digest"] if "digest" in block else None,
652 "block_level": blocks.alias_block_level(block["severity"]),
655 logger.debug("domain='%s' has no block list", domain)
657 except network.exceptions as exception:
658 logger.warning("domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
659 instances.set_last_error(domain, exception)
661 logger.debug("blocklist()=%d - EXIT!", len(blocklist))