1 # Fedi API Block - An aggregator for fetching blocking data from fediverse nodes
2 # Copyright (C) 2023 Free Software Foundation
4 # This program is free software: you can redistribute it and/or modify
5 # it under the terms of the GNU Affero General Public License as published
6 # by the Free Software Foundation, either version 3 of the License, or
7 # (at your option) any later version.
9 # This program is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # GNU Affero General Public License for more details.
14 # You should have received a copy of the GNU Affero General Public License
15 # along with this program. If not, see <https://www.gnu.org/licenses/>.
24 from fba.helpers import blacklist
25 from fba.helpers import blocks as blocks_helper
26 from fba.helpers import config
27 from fba.helpers import cookies
28 from fba.helpers import domain as domain_helper
29 from fba.helpers import software as software_helper
30 from fba.helpers import tidyup
31 from fba.helpers import version
33 from fba.http import csrf
34 from fba.http import network
35 from fba.http import nodeinfo
37 from fba.models import instances
39 from fba.networks import lemmy
40 from fba.networks import misskey
41 from fba.networks import peertube
43 # Depth counter, being raised and lowered
48 "/api/v1/instance/peers",
52 # Local "cache" to shorten intense debug output
53 _max_crawl_depth = config.get("max_crawl_depth")
54 _min_peers_length = config.get("min_peers_length")
56 logging.basicConfig(level=logging.INFO)
57 logger = logging.getLogger(__name__)
58 #logger.setLevel(logging.DEBUG)
60 def fetch_instances(domain: str, origin: str, software: str, command: str, path: str = None) -> None:
62 logger.debug("domain='%s',origin='%s',software='%s',command='%s',path='%s',_DEPTH=%d - CALLED!", domain, origin, software, command, path, _DEPTH)
63 domain_helper.raise_on(domain)
65 if blacklist.is_blacklisted(domain):
66 raise RuntimeError(f"domain='{domain}' is blacklisted but function was invoked")
67 elif not isinstance(origin, str) and origin is not None:
68 raise TypeError(f"Parameter origin[]='{type(origin)}' has not expected type 'str'")
69 elif not isinstance(command, str):
70 raise TypeError(f"Parameter command[]='{type(command)}' has not expected type 'str'")
72 raise ValueError("Parameter 'command' is an empty string")
73 elif command in ["fetch_blocks", "fetch_cs", "fetch_bkali", "fetch_relays", "fetch_fedipact", "fetch_joinmobilizon", "fetch_joinmisskey", "fetch_joinfediverse", "fetch_relaylist"] and origin is None:
74 raise ValueError(f"Parameter command='{command}' but origin is None, please fix invoking this function.")
75 elif not isinstance(path, str) and path is not None:
76 raise TypeError(f"Parameter path[]='{type(path)}' has not expected type 'str'")
77 elif path is not None and not path.startswith("/"):
78 raise ValueError(f"path='{path}' does not start with a slash")
79 elif _DEPTH > 0 and instances.is_recent(domain, "last_instance_fetch"):
80 raise RuntimeError(f"domain='{domain}' has recently been fetched but function was invoked")
81 elif software is None and not instances.is_recent(domain, "last_instance_fetch"):
83 logger.debug("Software for domain='%s',path='%s' is not set, determining ...", domain, path)
84 software = determine_software(domain, path)
85 except network.exceptions as exception:
86 logger.warning("Exception '%s' during determining software type", type(exception))
87 instances.set_last_error(domain, exception)
89 logger.debug("Determined software='%s' for domain='%s'", software, domain)
90 elif software is None:
91 logger.debug("domain='%s' has unknown software or nodeinfo has recently being fetched", domain)
92 elif not isinstance(software, str):
93 raise TypeError(f"Parameter software[]='{type(software)}' has not expected type 'str'")
95 logger.debug("domain='%s' - BEFORE!", domain)
96 instance = domain_helper.encode_idna(domain.split("?")[0])
97 logger.debug("instance='%s' - AFTER!", instance)
102 logger.debug("Checking if instance='%s' is registered ...", instance)
103 if not instances.is_registered(instance):
104 logger.debug("Adding new instance='%s',origin='%s',command='%s',path='%s',software='%s'", instance, origin, command, path, software)
105 instances.add(instance, origin, command, path, software)
107 logger.debug("software='%s'", software)
108 if software is not None and software_helper.is_relay(software):
109 logger.debug("software='%s' is a relay software - EXIT!", software)
113 logger.debug("Updating last_instance_fetch for instance='%s' ...", instance)
114 instances.set_last_instance_fetch(instance)
117 logger.debug("software='%s'", software)
118 if software is not None and not software_helper.is_relay(software):
120 logger.debug("Fetching instances for instance='%s',software='%s',origin='%s'", instance, software, origin)
121 peerlist = fetch_peers(instance, software, origin)
122 except network.exceptions as exception:
126 logger.debug("peerlist[]='%s'", type(peerlist))
127 if isinstance(peerlist, list):
128 logger.debug("Invoking instances.set_total_peerlist(%s,%d) ...", instance, len(peerlist))
129 instances.set_total_peers(instance, peerlist)
131 logger.debug("Invoking cookies.clear(%s) ...", instance)
132 cookies.clear(instance)
134 logger.debug("peerlist[]='%s'", type(peerlist))
136 logger.warning("Cannot fetch peers: instance='%s',software='%s'", instance, software)
137 if instances.has_pending(instance):
138 logger.debug("Flushing updates for instance='%s' ...", instance)
139 instances.update(instance)
142 logger.debug("EXIT!")
144 elif len(peerlist) == 0:
145 logger.info("instance='%s' returned an empty peer list.", instance)
146 if instances.has_pending(instance):
147 logger.debug("Flushing updates for instance='%s' ...", instance)
148 instances.update(instance)
151 logger.debug("instance='%s',software='%s' has an empty peer list returned - EXIT!", instance, software)
154 logger.info("Checking %d instance(s) from instance='%s',software='%s',depth=%d ...", len(peerlist), instance, software, _DEPTH)
155 for peer in peerlist:
156 logger.debug("peer[%s]='%s'", type(peer), peer)
157 if peer in [None, ""]:
158 logger.debug("peer[%s]='%s' is either None or empty - SKIPPED!", type(peer), peer)
160 elif isinstance(peer, dict) and "url" in peer:
161 logger.debug("Found peer[url]='%s', extracting domain/host name ...", peer["url"])
162 if not validators.url(peer["url"]):
163 logger.warning("peer[url]='%s' is not a valid URL - SKIPPED!", peer["url"])
166 components = urllib.parse.urlparse(peer["url"])
167 logger.debug("components[%s]()=%d", type(components), len(components))
169 peer = components.netloc.lower().split(":")[0]
170 logger.debug("peer='%s'", peer)
172 logger.debug("peer='%s' - BEFORE!", peer)
173 peer = tidyup.domain(peer) if isinstance(peer, str) and peer != "" else None
174 logger.debug("peer='%s' - AFTER!", peer)
176 if peer in [None, ""]:
177 logger.debug("peer[%s]='%s' is empty after tidyup.domain(), domain='%s'", type(peer), peer, domain)
180 logger.warning("peer='%s' contains double-dot, removing ...", peer)
181 peer = peer.replace("..", ".")
183 probe = peer.split("/")[0]
184 logger.debug("peer='%s',probe='%s'", peer, probe)
185 if not validators.domain(probe, rfc_2782=True):
186 logger.warning("probe='%s' is not a valid domain - SKIPPED!", probe)
188 elif not domain_helper.is_tld_wanted(probe):
189 logger.debug("probe='%s' has an unwanted TLD - SKIPPED!", probe)
192 logger.debug("peer='%s' - BEFORE!", peer)
193 peer = domain_helper.encode_idna(peer.split("?")[0])
194 logger.debug("peer='%s' - AFTER!", peer)
196 if not domain_helper.is_wanted(peer):
197 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
199 elif peer.find("/profile/") > 0 or peer.find("/users/") > 0 or (instances.is_registered(peer.split("/")[0]) and peer.find("/c/") > 0):
200 logger.debug("peer='%s' is a link to a single user profile - SKIPPED!", peer)
202 elif peer.find("/tag/") > 0:
203 logger.debug("peer='%s' is a link to a tag - SKIPPED!", peer)
205 elif not instances.is_registered(peer):
206 logger.debug("Checking if domain='%s' has pending updates ...", domain)
207 if instances.has_pending(domain):
208 logger.debug("Flushing updates for domain='%s' ...", domain)
209 instances.update(domain)
211 logger.debug("peer='%s',origin='%s',_DEPTH=%d reached!", peer, origin, _DEPTH)
212 if _DEPTH <= _max_crawl_depth and len(peerlist) >= _min_peers_length:
213 logger.debug("Fetching peer='%s',origin='%s',command='%s',path='%s',_DEPTH=%d ...", peer, domain, command, path, _DEPTH)
214 fetch_instances(peer, domain, None, command, path)
216 logger.debug("Adding peer='%s',domain='%s',command='%s',_DEPTH=%d ...", peer, domain, command, _DEPTH)
217 instances.add(peer, domain, command)
219 logger.debug("Checking if domain='%s' has pending updates ...", domain)
220 if instances.has_pending(domain):
221 logger.debug("Flushing updates for domain='%s' ...", domain)
222 instances.update(domain)
225 logger.debug("EXIT!")
227 def fetch_peers(domain: str, software: str, origin: str) -> list:
228 logger.debug("domain='%s',software='%s',origin='%s' - CALLED!", domain, software, origin)
229 domain_helper.raise_on(domain)
231 if blacklist.is_blacklisted(domain):
232 raise RuntimeError(f"domain='{domain}' is blacklisted but function was invoked")
233 elif not isinstance(software, str) and software is not None:
234 raise TypeError(f"Parameter software[]='{type(software)}' has not expected type 'str'")
235 elif isinstance(software, str) and software == "":
236 raise ValueError("Parameter 'software' is an empty string")
237 elif software is not None and software_helper.is_relay(software):
238 raise RuntimeError(f"domain='{domain}' is of software='{software}' and isn't supported here.")
239 elif not isinstance(origin, str) and origin is not None:
240 raise TypeError(f"Parameter origin[]='{type(origin)}' has not expected type 'str'")
241 elif isinstance(origin, str) and origin == "":
242 raise ValueError("Parameter 'origin' is an empty string")
244 if software == "misskey":
245 logger.debug("Invoking misskey.fetch_peers(%s) ...", domain)
246 return misskey.fetch_peers(domain)
247 elif software == "lemmy":
248 logger.debug("Invoking lemmy.fetch_peers(%s,%s) ...", domain, origin)
249 return lemmy.fetch_peers(domain, origin)
250 elif software == "peertube":
251 logger.debug("Invoking peertube.fetch_peers(%s) ...", domain)
252 return peertube.fetch_peers(domain)
254 # No CSRF by default, you don't have to add network.api_headers by yourself here
258 logger.debug("Checking CSRF for domain='%s'", domain)
259 headers = csrf.determine(domain, {})
260 except network.exceptions as exception:
261 logger.warning("Exception '%s' during checking CSRF (fetch_peers,%s)", type(exception), __name__)
262 instances.set_last_error(domain, exception)
264 logger.debug("Returning empty list ... - EXIT!")
267 # Init peers variable
270 logger.debug("Checking %d API paths ...", len(_api_paths))
271 for path in _api_paths:
272 logger.debug("Fetching path='%s' from domain='%s',software='%s' ...", path, domain, software)
273 peers = network.fetch_json_rows(
279 logger.debug("peers()=%d", len(peers))
281 logger.debug("Marking domain='%s' as successfully handled ...", domain)
282 instances.set_success(domain)
285 if not isinstance(peers, list):
286 logger.warning("peers[]='%s' has not expected type 'list', maybe bad API response?", type(peers))
289 logger.debug("Invoking instances.set_total_peers(%s,%d) ...", domain, len(peers))
290 instances.set_total_peers(domain, peers)
292 logger.debug("peers()=%d - EXIT!", len(peers))
295 def fetch_generator_from_path(domain: str, path: str = "/") -> str:
296 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
297 domain_helper.raise_on(domain)
299 if blacklist.is_blacklisted(domain):
300 raise RuntimeError(f"domain='{domain}' is blacklisted but function was invoked")
301 elif not isinstance(path, str):
302 raise TypeError(f"path[]='{type(path)}' has not expected type 'str'")
304 raise ValueError("Parameter 'path' is an empty string")
305 elif not path.startswith("/"):
306 raise ValueError(f"path='{path}' does not start with / but should")
310 logger.debug("Fetching path='%s' from domain='%s' ...", path, domain)
311 response = network.get_generic(
317 logger.debug("response.ok='%s',response.status_code=%d,response.text()=%d", response.ok, response.status_code, len(response.text))
318 response_url = response.url.split("#")[0]
319 logger.debug("response_url='%s'", response_url)
321 if ((response.ok and response.status_code == 200) or response.status_code == 410) and response.text.find("<html") >= 0 and validators.url(response_url) and domain_helper.is_in_url(domain, response_url):
322 logger.debug("Parsing response.text()=%d Bytes ...", len(response.text))
323 doc = bs4.BeautifulSoup(response.text, features="html.parser")
325 logger.debug("doc[]='%s'", type(doc))
326 platform = doc.find("meta", {"property": "og:platform"})
327 generator = doc.find("meta", {"name" : "generator"})
328 site_name = doc.find("meta", {"property": "og:site_name"})
329 app_name = doc.find("meta", {"name" : "application-name"})
331 logger.debug("generator[]='%s',site_name[]='%s',platform[]='%s',app_name[]='%s'", type(generator), type(site_name), type(platform), type(app_name))
332 if isinstance(platform, bs4.element.Tag) and isinstance(platform.get("content"), str) and platform.get("content") != "":
333 logger.debug("Found property=og:platform, domain='%s'", domain)
334 software = tidyup.domain(platform.get("content"))
335 logger.debug("software[%s]='%s' after tidyup.domain() ...", type(software), software)
337 if software is not None and software != "":
338 logger.info("domain='%s' has og:platform='%s' - Setting detection_mode=PLATFORM ...", domain, software)
339 instances.set_detection_mode(domain, "PLATFORM")
340 elif isinstance(generator, bs4.element.Tag) and isinstance(generator.get("content"), str) and generator.get("content") != "":
341 logger.debug("Found generator meta tag: domain='%s'", domain)
342 software = tidyup.domain(generator.get("content"))
344 logger.debug("software[%s]='%s'", type(software), software)
345 if software is not None and software != "":
346 logger.info("domain='%s' is generated by software='%s' - Setting detection_mode=GENERATOR ...", domain, software)
347 instances.set_detection_mode(domain, "GENERATOR")
348 elif isinstance(app_name, bs4.element.Tag) and isinstance(app_name.get("content"), str) and app_name.get("content") != "":
349 logger.debug("Found property=og:app_name, domain='%s'", domain)
350 software = tidyup.domain(app_name.get("content"))
352 logger.debug("software[%s]='%s'", type(software), software)
353 if software is not None and software != "":
354 logger.info("domain='%s' has application-name='%s' - Setting detection_mode=APP_NAME ...", domain, software)
355 instances.set_detection_mode(domain, "APP_NAME")
356 elif isinstance(site_name, bs4.element.Tag) and isinstance(site_name.get("content"), str) and site_name.get("content") != "":
357 logger.debug("Found property=og:site_name, domain='%s'", domain)
358 software = tidyup.domain(site_name.get("content"))
360 logger.debug("software[%s]='%s'", type(software), software)
361 if software is not None and software != "":
362 logger.info("domain='%s' has og:site_name='%s' - Setting detection_mode=SITE_NAME ...", domain, software)
363 instances.set_detection_mode(domain, "SITE_NAME")
364 elif not validators.url(response_url):
365 logger.warning("response_url='%s' is not valid - Raising exception ...", response_url)
367 message = f"Redirect from domain='{domain}' to response_url='{response_url}'"
368 instances.set_last_error(domain, message)
369 raise requests.exceptions.TooManyRedirects(message)
370 elif not domain_helper.is_in_url(domain, response_url):
371 logger.warning("domain='%s' doesn't match response.url='%s', maybe redirect to other domain?", domain, response.url)
373 components = urllib.parse.urlparse(response.url)
374 domain2 = components.netloc.lower().split(":")[0]
376 logger.debug("domain2='%s'", domain2)
377 if not domain_helper.is_wanted(domain2):
378 logger.debug("domain2='%s' is not wanted - EXIT!", domain2)
380 elif not instances.is_registered(domain2):
381 logger.info("components.netloc='%s' is not registered, adding ...", components.netloc)
382 instances.add(domain2, domain, "redirect_target")
384 message = f"Redirect from domain='{domain}' to response.url='{response.url}'"
385 instances.set_last_error(domain, message)
386 instances.set_software(domain, None)
387 instances.set_detection_mode(domain, None)
388 instances.set_nodeinfo_url(domain, None)
390 raise requests.exceptions.TooManyRedirects(message)
392 logger.debug("software[]='%s'", type(software))
393 software = software_helper.remove_extras(software) if isinstance(software, str) else None
395 logger.debug("software[%s]='%s' - EXIT!", type(software), software)
398 def determine_software(domain: str, path: str = None) -> str:
399 logger.debug("domain='%s',path='%s' - CALLED!", domain, path)
400 domain_helper.raise_on(domain)
402 if blacklist.is_blacklisted(domain):
403 raise RuntimeError(f"domain='{domain}' is blacklisted but function was invoked")
404 elif not isinstance(path, str) and path is not None:
405 raise TypeError(f"Parameter path[]='{type(path)}' has not expected type 'str'")
406 elif path is not None and not path.startswith("/"):
407 raise ValueError(f"path='{path}' does not start with a slash")
409 logger.debug("Fetching nodeinfo from domain='%s',path='%s' ...", domain, path)
410 data = nodeinfo.fetch(domain, path)
413 logger.debug("data[%s]='%s'", type(data), data)
414 if "exception" in data:
415 # Continue raising it
416 logger.debug("data()=%d contains exception='%s' - raising ...", len(data), type(data["exception"]))
417 raise data["exception"]
418 elif "error_message" in data:
419 logger.debug("Returned error_message during fetching nodeinfo: '%s',status_code=%d", data['error_message'], data['status_code'])
420 software = fetch_generator_from_path(domain)
421 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
423 logger.debug("domain='%s',path='%s',data[json] found ...", domain, path)
426 logger.debug("Auto-detection for domain='%s' was failing, fetching / ...", domain)
427 software = fetch_generator_from_path(domain)
428 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
430 if "status" in data and data["status"] == "error" and "message" in data:
431 logger.warning("JSON response is an error: '%s' - Resetting detection_mode,nodeinfo_url ...", data["message"])
432 instances.set_last_error(domain, data["message"])
433 instances.set_detection_mode(domain, None)
434 instances.set_nodeinfo_url(domain, None)
435 software = fetch_generator_from_path(domain)
436 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
437 elif "software" in data and "name" in data["software"]:
438 logger.debug("Found data[json][software][name] in JSON response")
439 software = data["software"]["name"]
440 logger.debug("software[%s]='%s' - FOUND!", type(software), software)
441 elif "message" in data:
442 logger.warning("JSON response contains only a message: '%s' - Resetting detection_mode,nodeinfo_url ...", data["message"])
443 instances.set_last_error(domain, data["message"])
444 instances.set_detection_mode(domain, None)
445 instances.set_nodeinfo_url(domain, None)
447 logger.debug("Invoking fetch_generator_from_path(%s) ...", domain)
448 software = fetch_generator_from_path(domain)
449 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
450 elif "server" in data and "software" in data["server"]:
451 logger.debug("Found data[server][software]='%s' for domain='%s'", data["server"]["software"].lower(), domain)
452 software = data["server"]["software"].lower()
453 logger.debug("Detected software for domain='%s' is: '%s'", domain, software)
454 elif "software" not in data or "name" not in data["software"]:
455 logger.debug("JSON response from domain='%s' does not include [software][name] - Resetting detection_mode,nodeinfo_url ...", domain)
456 instances.set_detection_mode(domain, None)
457 instances.set_nodeinfo_url(domain, None)
459 logger.debug("Invoking fetch_generator_from_path(%s) ...", domain)
460 software = fetch_generator_from_path(domain)
461 logger.debug("Generator for domain='%s' is: '%s'", domain, software)
463 logger.debug("software[%s]='%s'", type(software), software)
464 if software in [None, ""]:
465 logger.debug("Returning None - EXIT!")
468 logger.debug("Setting original software='%s' for domain='%s' ...", software, domain)
469 instances.set_original_software(domain, software)
471 logger.debug("software='%s'- BEFORE!", software)
472 software = software_helper.alias(software)
473 logger.debug("software['%s']='%s' - AFTER!", type(software), software)
475 if software in [None, ""]:
476 logger.debug("software for domain='%s' was not detected, trying generator ...", domain)
477 software = fetch_generator_from_path(domain)
478 elif len(str(software)) > 0 and ("." in software or " " in software):
479 logger.debug("software='%s' may contain a version number, domain='%s', removing it ...", software, domain)
480 software = version.remove(software)
482 logger.debug("software[]='%s'", type(software))
483 if isinstance(software, str) and "powered by" in software:
484 logger.debug("software='%s' has 'powered by' in it", software)
485 software = version.remove(software_helper.strip_powered_by(software))
487 software = software.strip()
489 logger.debug("software[%s]='%s' - EXIT!", type(software), software)
492 def find_domains(tag: bs4.element.Tag, domain_column: str = "dt", reason_column: str = "dd", reason_text: str = "Categories:") -> list:
493 logger.debug("tag[]='%s',domain_column='%s',reason_column='%s',reason_text='%s' - CALLED!", type(tag), domain_column, reason_column, reason_text)
495 if not isinstance(tag, bs4.element.Tag):
496 raise TypeError(f"Parameter tag[]='{type(tag)}' is not type of bs4.element.Tag")
497 elif not isinstance(domain_column, str):
498 raise TypeError(f"Parameter domain_column[]='{type(domain_column)}' is not type of 'str'")
499 elif domain_column == "":
500 raise ValueError("Parameter 'domain_column' is an empty string")
501 elif not isinstance(reason_column, str):
502 raise TypeError(f"Parameter reason_column[]='{type(reason_column)}' is not type of 'str'")
503 elif reason_column == "":
504 raise ValueError("Parameter 'reason_column' is an empty string")
505 elif len(tag.find_all(domain_column)) == 0:
506 raise KeyError("No domain_column='{domain_column}' rows found in table!")
507 elif len(tag.find_all(reason_column)) == 0:
508 raise KeyError("No reason_column='{reason_column}' rows found in table!")
509 elif not isinstance(reason_text, str):
510 raise TypeError(f"Parameter reason_text[]='{type(reason_text)}' is not type of 'str'")
511 elif reason_text == "":
512 raise ValueError("Parameter 'reason_text' is an empty string")
515 for element in tag.find_all(domain_column):
516 logger.debug("element[%s]='%s'", type(element), element)
518 domain = tidyup.domain(element.text)
519 reasons = element.find_next(reason_column).text.split(reason_text)[1].splitlines()
520 logger.debug("domain='%s',reasons(%d)='%s'", domain, len(reasons), reasons)
523 for reason in reasons:
524 logger.debug("reason[%s]='%s'", type(reason), reason)
525 if reason not in [None, ""]:
529 found = tidyup.reason(found)
530 logger.debug("domain='%s',found='%s'", domain, found)
532 if domain == "gab.com/.ai, develop.gab.com":
533 logger.debug("Multiple gab.com domains detected in one row")
543 "domain": "develop.gab.com",
547 elif not validators.domain(domain.split("/")[0], rfc_2782=True):
548 logger.warning("domain='%s' is not a valid domain - SKIPPED!", domain)
550 elif not domain_helper.is_wanted(domain):
551 logger.debug("domain='%s' is not wanted - SKIPPED!", domain)
554 logger.debug("Adding domain='%s',found='%s' ...", domain, found)
560 logger.debug("domains()=%d - EXIT!", len(domains))
563 def add_peers(rows: dict) -> list:
564 logger.debug("rows[]='%s' - CALLED!", type(rows))
566 if not isinstance(rows, dict):
567 raise TypeError(f"Parameter rows[]='{type(rows)}' has not expected type 'dict'")
569 raise ValueError("Parameter 'rows' is an empty string")
574 for key in ["linked", "allowed", "blocked"]:
575 logger.debug("key='%s'", key)
576 if key not in rows or rows[key] is None:
577 logger.debug("Cannot find key='%s' or it is NoneType - SKIPPED!", key)
580 logger.debug("Adding %d peer(s) to peers list ...", len(rows[key]))
581 for peer in rows[key]:
582 logger.debug("peer[%s]='%s' - BEFORE!", type(peer), peer)
583 if peer in [None, ""]:
584 logger.debug("peer is empty - SKIPPED!")
586 elif isinstance(peer, dict) and "domain" in peer:
587 logger.debug("peer[domain]='%s'", peer["domain"])
588 peer = tidyup.domain(peer["domain"]) if peer["domain"] != "" else None
589 elif isinstance(peer, str):
590 logger.debug("peer='%s'", peer)
591 peer = tidyup.domain(peer)
593 raise ValueError(f"peer[]='{type(peer)}' is not supported,key='{key}'")
595 logger.debug("peer[%s]='%s' - AFTER!", type(peer), peer)
596 if not domain_helper.is_wanted(peer):
597 logger.debug("peer='%s' is not wanted - SKIPPED!", peer)
600 logger.debug("Appending peer='%s' ...", peer)
603 logger.debug("peers()=%d - EXIT!", len(peers))
606 def fetch_blocks(domain: str) -> list:
607 logger.debug("domain='%s' - CALLED!", domain)
608 domain_helper.raise_on(domain)
610 if blacklist.is_blacklisted(domain):
611 raise RuntimeError(f"domain='{domain}' is blacklisted but function was invoked")
612 elif not instances.is_registered(domain):
613 raise RuntimeError(f"domain='{domain}' is not registered but function was invoked")
618 # No CSRF by default, you don't have to add network.api_headers by yourself here
622 logger.debug("Checking CSRF for domain='%s'", domain)
623 headers = csrf.determine(domain, {})
624 except network.exceptions as exception:
625 logger.warning("Exception '%s' during checking CSRF (fetch_blocks,%s)", type(exception), __name__)
626 instances.set_last_error(domain, exception)
628 logger.debug("Returning empty list ... - EXIT!")
632 # json endpoint for newer mastodongs
633 logger.info("Fetching domain_blocks from domain='%s' ...", domain)
634 rows = network.fetch_json_rows(
636 "/api/v1/instance/domain_blocks",
640 logger.debug("Marking domain='%s' as successfully handled ...", domain)
641 instances.set_success(domain)
643 logger.debug("rows()=%d", len(rows))
645 logger.debug("Checking %d entries from domain='%s' ...", len(rows), domain)
648 logger.debug("block[]='%s'", type(block))
649 if not isinstance(block, dict):
650 logger.debug("block[]='%s' is of type 'dict' - SKIPPED!", type(block))
652 elif "domain" not in block:
653 logger.warning("block()=%d does not contain element 'domain' - SKIPPED!", len(block))
655 elif "severity" not in block:
656 logger.warning("block()=%d does not contain element 'severity' - SKIPPED!", len(block))
658 elif block["severity"] in ["accept", "accepted"]:
659 logger.debug("block[domain]='%s' has unwanted severity level '%s' - SKIPPED!", block["domain"], block["severity"])
661 elif "digest" in block and not block["digest"] is None and not validators.hashes.sha256(block["digest"]):
662 logger.warning("block[domain]='%s' has invalid block[digest]='%s' - SKIPPED!", block["domain"], block["digest"])
665 reason = tidyup.reason(block["comment"]) if "comment" in block and block["comment"] is not None and block["comment"] != "" else None
667 logger.debug("Appending blocker='%s',blocked='%s',reason='%s',block_level='%s' ...", domain, block["domain"], reason, block["severity"])
670 "blocked" : block["domain"],
671 "digest" : block["digest"] if "digest" in block else None,
673 "block_level": blocks_helper.alias_block_level(block["severity"]),
676 logger.debug("domain='%s' has no block list", domain)
678 except network.exceptions as exception:
679 logger.warning("domain='%s',exception[%s]='%s'", domain, type(exception), str(exception))
680 instances.set_last_error(domain, exception)
682 logger.debug("blocklist()=%d - EXIT!", len(blocklist))