2 resolver stuff (main module)
4 Copyright (C) 2005 Michael Tokarev <mjt@corpit.ru>
5 This file is part of UDNS library, an async DNS stub resolver.
7 This library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
12 This library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public
18 License along with this library, in file named COPYING.LGPL; if not,
19 write to the Free Software Foundation, Inc., 59 Temple Place,
20 Suite 330, Boston, MA 02111-1307 USA
28 # include <winsock2.h> /* includes <windows.h> */
29 # include <ws2tcpip.h> /* needed for struct in6_addr */
31 # include <sys/types.h>
32 # include <sys/socket.h>
33 # include <netinet/in.h>
36 # include <sys/time.h>
38 # include <sys/poll.h>
40 # ifdef HAVE_SYS_SELECT_H
41 # include <sys/select.h>
45 # include <sys/times.h>
47 # define closesocket(sock) close(sock)
59 # define EAFNOSUPPORT EINVAL
62 # define MSG_DONTWAIT 0
66 struct dns_query *head, *tail;
70 struct dns_query *dnsq_next; /* double-linked list */
71 struct dns_query *dnsq_prev;
72 unsigned dnsq_origdnl0; /* original query DN len w/o last 0 */
73 unsigned dnsq_flags; /* control flags for this query */
74 unsigned dnsq_servi; /* index of next server to try */
75 unsigned dnsq_servwait; /* bitmask: servers left to wait */
76 unsigned dnsq_servskip; /* bitmask: servers to skip */
77 unsigned dnsq_servnEDNS0; /* bitmask: servers refusing EDNS0 */
78 unsigned dnsq_try; /* number of tries made so far */
79 dnscc_t *dnsq_nxtsrch; /* next search pointer @dnsc_srchbuf */
80 time_t dnsq_deadline; /* when current try will expire */
81 dns_parse_fn *dnsq_parse; /* parse: raw => application */
82 dns_query_fn *dnsq_cbck; /* the callback to call when done */
83 void *dnsq_cbdata; /* user data for the callback */
85 struct dns_ctx *dnsq_ctx; /* the resolver context */
87 /* char fields at the end to avoid padding */
88 dnsc_t dnsq_id[2]; /* query ID */
89 dnsc_t dnsq_typcls[4]; /* requested RR type+class */
90 dnsc_t dnsq_dn[DNS_MAXDN+DNS_DNPAD]; /* the query DN +alignment */
93 /* working with dns_query lists */
95 static __inline void qlist_init(struct dns_qlist *list) {
96 list->head = list->tail = NULL;
99 static __inline void qlist_remove(struct dns_qlist *list, struct dns_query *q) {
100 if (q->dnsq_prev) q->dnsq_prev->dnsq_next = q->dnsq_next;
101 else list->head = q->dnsq_next;
102 if (q->dnsq_next) q->dnsq_next->dnsq_prev = q->dnsq_prev;
103 else list->tail = q->dnsq_prev;
107 qlist_add_head(struct dns_qlist *list, struct dns_query *q) {
108 q->dnsq_next = list->head;
109 if (list->head) list->head->dnsq_prev = q;
116 qlist_insert_after(struct dns_qlist *list,
117 struct dns_query *q, struct dns_query *prev) {
118 if ((q->dnsq_prev = prev) != NULL) {
119 if ((q->dnsq_next = prev->dnsq_next) != NULL)
120 q->dnsq_next->dnsq_prev = q;
126 qlist_add_head(list, q);
131 struct sockaddr_in sin;
133 struct sockaddr_in6 sin6;
137 #define sin_eq(a,b) \
138 ((a).sin_port == (b).sin_port && \
139 (a).sin_addr.s_addr == (b).sin_addr.s_addr)
140 #define sin6_eq(a,b) \
141 ((a).sin6_port == (b).sin6_port && \
142 memcmp(&(a).sin6_addr, &(b).sin6_addr, sizeof(struct in6_addr)) == 0)
144 struct dns_ctx { /* resolver context */
146 unsigned dnsc_flags; /* various flags */
147 unsigned dnsc_timeout; /* timeout (base value) for queries */
148 unsigned dnsc_ntries; /* number of retries */
149 unsigned dnsc_ndots; /* ndots to assume absolute name */
150 unsigned dnsc_port; /* default port (DNS_PORT) */
151 unsigned dnsc_udpbuf; /* size of UDP buffer */
152 /* array of nameserver addresses */
153 union sockaddr_ns dnsc_serv[DNS_MAXSERV];
154 unsigned dnsc_nserv; /* number of nameservers */
155 unsigned dnsc_salen; /* length of socket addresses */
156 dnsc_t dnsc_srchbuf[1024]; /* buffer for searchlist */
157 dnsc_t *dnsc_srchend; /* current end of srchbuf */
159 dns_utm_fn *dnsc_utmfn; /* register/cancel timer events */
160 void *dnsc_utmctx; /* user timer context for utmfn() */
161 time_t dnsc_utmexp; /* when user timer expires */
163 dns_dbgfn *dnsc_udbgfn; /* debugging function */
166 struct udns_jranctx dnsc_jran; /* random number generator state */
167 unsigned dnsc_nextid; /* next queue ID to use if !0 */
168 int dnsc_udpsock; /* UDP socket */
169 struct dns_qlist dnsc_qactive; /* active list sorted by deadline */
170 int dnsc_nactive; /* number entries in dnsc_qactive */
171 dnsc_t *dnsc_pbuf; /* packet buffer (udpbuf size) */
172 int dnsc_qstatus; /* last query status value */
175 static const struct {
181 #define opt(name,opt,field,min,max) \
182 {name,opt,offsetof(struct dns_ctx,field),min,max}
183 opt("retrans", DNS_OPT_TIMEOUT, dnsc_timeout, 1,300),
184 opt("timeout", DNS_OPT_TIMEOUT, dnsc_timeout, 1,300),
185 opt("retry", DNS_OPT_NTRIES, dnsc_ntries, 1,50),
186 opt("attempts", DNS_OPT_NTRIES, dnsc_ntries, 1,50),
187 opt("ndots", DNS_OPT_NDOTS, dnsc_ndots, 0,1000),
188 opt("port", DNS_OPT_PORT, dnsc_port, 1,0xffff),
189 opt("udpbuf", DNS_OPT_UDPSIZE, dnsc_udpbuf, DNS_MAXPACKET,65536),
192 #define dns_ctxopt(ctx,idx) (*((unsigned*)(((char*)ctx)+dns_opts[idx].offset)))
194 #define ISSPACE(x) (x == ' ' || x == '\t' || x == '\r' || x == '\n')
196 struct dns_ctx dns_defctx;
198 #define SETCTX(ctx) if (!ctx) ctx = &dns_defctx
199 #define SETCTXINITED(ctx) SETCTX(ctx); assert(CTXINITED(ctx))
200 #define CTXINITED(ctx) (ctx->dnsc_flags & DNS_INITED)
201 #define SETCTXFRESH(ctx) SETCTXINITED(ctx); assert(!CTXOPEN(ctx))
202 #define SETCTXINACTIVE(ctx) \
203 SETCTXINITED(ctx); assert(!ctx->dnsc_nactive)
204 #define SETCTXOPEN(ctx) SETCTXINITED(ctx); assert(CTXOPEN(ctx))
205 #define CTXOPEN(ctx) (ctx->dnsc_udpsock >= 0)
207 #if defined(NDEBUG) || !defined(DEBUG)
208 #define dns_assert_ctx(ctx)
210 static void dns_assert_ctx(const struct dns_ctx *ctx) {
212 const struct dns_query *q;
213 for(q = ctx->dnsc_qactive.head; q; q = q->dnsq_next) {
214 assert(q->dnsq_ctx == ctx);
215 assert(q == (q->dnsq_next ?
216 q->dnsq_next->dnsq_prev : ctx->dnsc_qactive.tail));
217 assert(q == (q->dnsq_prev ?
218 q->dnsq_prev->dnsq_next : ctx->dnsc_qactive.head));
221 assert(nactive == ctx->dnsc_nactive);
226 DNS_INTERNAL = 0xffff, /* internal flags mask */
227 DNS_INITED = 0x0001, /* the context is initialized */
228 DNS_ASIS_DONE = 0x0002, /* search: skip the last as-is query */
229 DNS_SEEN_NODATA = 0x0004, /* search: NODATA has been received */
232 int dns_add_serv(struct dns_ctx *ctx, const char *serv) {
233 union sockaddr_ns *sns;
236 return (ctx->dnsc_nserv = 0);
237 if (ctx->dnsc_nserv >= DNS_MAXSERV)
238 return errno = ENFILE, -1;
239 sns = &ctx->dnsc_serv[ctx->dnsc_nserv];
240 memset(sns, 0, sizeof(*sns));
241 if (dns_pton(AF_INET, serv, &sns->sin.sin_addr) > 0) {
242 sns->sin.sin_family = AF_INET;
243 return ++ctx->dnsc_nserv;
246 if (dns_pton(AF_INET6, serv, &sns->sin6.sin6_addr) > 0) {
247 sns->sin6.sin6_family = AF_INET6;
248 return ++ctx->dnsc_nserv;
255 int dns_add_serv_s(struct dns_ctx *ctx, const struct sockaddr *sa) {
258 return (ctx->dnsc_nserv = 0);
259 if (ctx->dnsc_nserv >= DNS_MAXSERV)
260 return errno = ENFILE, -1;
262 else if (sa->sa_family == AF_INET6)
263 ctx->dnsc_serv[ctx->dnsc_nserv].sin6 = *(struct sockaddr_in6*)sa;
265 else if (sa->sa_family == AF_INET)
266 ctx->dnsc_serv[ctx->dnsc_nserv].sin = *(struct sockaddr_in*)sa;
268 return errno = EAFNOSUPPORT, -1;
269 return ++ctx->dnsc_nserv;
272 int dns_set_opts(struct dns_ctx *ctx, const char *opts) {
277 while(ISSPACE(*opts)) ++opts;
280 if (i >= sizeof(dns_opts)/sizeof(dns_opts[0])) { ++err; break; }
281 v = strlen(dns_opts[i].name);
282 if (strncmp(dns_opts[i].name, opts, v) != 0 ||
283 (opts[v] != ':' && opts[v] != '='))
287 if (*opts < '0' || *opts > '9') { ++err; break; }
288 do v = v * 10 + (*opts++ - '0');
289 while (*opts >= '0' && *opts <= '9');
290 if (v < dns_opts[i].min) v = dns_opts[i].min;
291 if (v > dns_opts[i].max) v = dns_opts[i].max;
292 dns_ctxopt(ctx, i) = v;
295 while(*opts && !ISSPACE(*opts)) ++opts;
300 int dns_set_opt(struct dns_ctx *ctx, enum dns_opt opt, int val) {
304 for(i = 0; i < sizeof(dns_opts)/sizeof(dns_opts[0]); ++i) {
305 if (dns_opts[i].opt != opt) continue;
306 prev = dns_ctxopt(ctx, i);
309 if (v < dns_opts[i].min || v > dns_opts[i].max) {
313 dns_ctxopt(ctx, i) = v;
317 if (opt == DNS_OPT_FLAGS) {
318 prev = ctx->dnsc_flags & ~DNS_INTERNAL;
321 (ctx->dnsc_flags & DNS_INTERNAL) | (val & ~DNS_INTERNAL);
328 int dns_add_srch(struct dns_ctx *ctx, const char *srch) {
332 memset(ctx->dnsc_srchbuf, 0, sizeof(ctx->dnsc_srchbuf));
333 ctx->dnsc_srchend = ctx->dnsc_srchbuf;
337 sizeof(ctx->dnsc_srchbuf) - (ctx->dnsc_srchend - ctx->dnsc_srchbuf) - 1;
338 dnl = dns_sptodn(srch, ctx->dnsc_srchend, dnl);
340 ctx->dnsc_srchend += dnl;
341 ctx->dnsc_srchend[0] = '\0'; /* we ensure the list is always ends at . */
348 static void dns_drop_utm(struct dns_ctx *ctx) {
350 ctx->dnsc_utmfn(NULL, -1, ctx->dnsc_utmctx);
351 ctx->dnsc_utmctx = NULL;
352 ctx->dnsc_utmexp = -1;
356 _dns_request_utm(struct dns_ctx *ctx, time_t now) {
360 q = ctx->dnsc_qactive.head;
362 deadline = -1, timeout = -1;
363 else if (!now || q->dnsq_deadline <= now)
364 deadline = 0, timeout = 0;
366 deadline = q->dnsq_deadline, timeout = (int)(deadline - now);
367 if (ctx->dnsc_utmexp == deadline)
369 ctx->dnsc_utmfn(ctx, timeout, ctx->dnsc_utmctx);
370 ctx->dnsc_utmexp = deadline;
374 dns_request_utm(struct dns_ctx *ctx, time_t now) {
376 _dns_request_utm(ctx, now);
379 void dns_set_dbgfn(struct dns_ctx *ctx, dns_dbgfn *dbgfn) {
381 ctx->dnsc_udbgfn = dbgfn;
385 dns_set_tmcbck(struct dns_ctx *ctx, dns_utm_fn *fn, void *data) {
388 ctx->dnsc_utmfn = fn;
389 ctx->dnsc_utmctx = data;
391 dns_request_utm(ctx, 0);
394 static unsigned dns_nonrandom_32(void) {
397 GetSystemTimeAsFileTime(&ft);
398 return ft.dwLowDateTime;
401 gettimeofday(&tv, NULL);
406 /* This is historic deprecated API */
407 UDNS_API unsigned dns_random16(void);
408 unsigned dns_random16(void) {
409 unsigned x = dns_nonrandom_32();
410 return (x ^ (x >> 16)) & 0xffff;
413 static void dns_init_rng(struct dns_ctx *ctx) {
414 udns_jraninit(&ctx->dnsc_jran, dns_nonrandom_32());
415 ctx->dnsc_nextid = 0;
418 void dns_close(struct dns_ctx *ctx) {
419 struct dns_query *q, *p;
421 if (CTXINITED(ctx)) {
422 if (ctx->dnsc_udpsock >= 0)
423 closesocket(ctx->dnsc_udpsock);
424 ctx->dnsc_udpsock = -1;
426 free(ctx->dnsc_pbuf);
427 ctx->dnsc_pbuf = NULL;
428 q = ctx->dnsc_qactive.head;
429 while((p = q) != NULL) {
433 qlist_init(&ctx->dnsc_qactive);
434 ctx->dnsc_nactive = 0;
439 void dns_reset(struct dns_ctx *ctx) {
442 memset(ctx, 0, sizeof(*ctx));
443 ctx->dnsc_timeout = 4;
444 ctx->dnsc_ntries = 3;
446 ctx->dnsc_udpbuf = DNS_EDNS0PACKET;
447 ctx->dnsc_port = DNS_PORT;
448 ctx->dnsc_udpsock = -1;
449 ctx->dnsc_srchend = ctx->dnsc_srchbuf;
450 qlist_init(&ctx->dnsc_qactive);
452 ctx->dnsc_flags = DNS_INITED;
455 struct dns_ctx *dns_new(const struct dns_ctx *copy) {
458 dns_assert_ctx(copy);
459 ctx = malloc(sizeof(*ctx));
463 ctx->dnsc_udpsock = -1;
464 qlist_init(&ctx->dnsc_qactive);
465 ctx->dnsc_nactive = 0;
466 ctx->dnsc_pbuf = NULL;
467 ctx->dnsc_qstatus = 0;
468 ctx->dnsc_srchend = ctx->dnsc_srchbuf +
469 (copy->dnsc_srchend - copy->dnsc_srchbuf);
470 ctx->dnsc_utmfn = NULL;
471 ctx->dnsc_utmctx = NULL;
476 void dns_free(struct dns_ctx *ctx) {
477 assert(ctx != NULL && ctx != &dns_defctx);
482 int dns_open(struct dns_ctx *ctx) {
486 union sockaddr_ns *sns;
488 unsigned have_inet6 = 0;
492 assert(!CTXOPEN(ctx));
494 port = htons((unsigned short)ctx->dnsc_port);
495 /* ensure we have at least one server */
496 if (!ctx->dnsc_nserv) {
497 sns = ctx->dnsc_serv;
498 sns->sin.sin_family = AF_INET;
499 sns->sin.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
503 for (i = 0; i < ctx->dnsc_nserv; ++i) {
504 sns = &ctx->dnsc_serv[i];
505 /* set port for each sockaddr */
507 if (sns->sa.sa_family == AF_INET6) {
508 if (!sns->sin6.sin6_port) sns->sin6.sin6_port = (unsigned short)port;
514 assert(sns->sa.sa_family == AF_INET);
515 if (!sns->sin.sin_port) sns->sin.sin_port = (unsigned short)port;
520 if (have_inet6 && have_inet6 < ctx->dnsc_nserv) {
521 /* convert all IPv4 addresses to IPv6 V4MAPPED */
522 struct sockaddr_in6 sin6;
523 memset(&sin6, 0, sizeof(sin6));
524 sin6.sin6_family = AF_INET6;
525 /* V4MAPPED: ::ffff:1.2.3.4 */
526 sin6.sin6_addr.s6_addr[10] = 0xff;
527 sin6.sin6_addr.s6_addr[11] = 0xff;
528 for(i = 0; i < ctx->dnsc_nserv; ++i) {
529 sns = &ctx->dnsc_serv[i];
530 if (sns->sa.sa_family == AF_INET) {
531 sin6.sin6_port = sns->sin.sin_port;
532 memcpy(sin6.sin6_addr.s6_addr + 4*3, &sns->sin.sin_addr, 4);
538 ctx->dnsc_salen = have_inet6 ?
539 sizeof(struct sockaddr_in6) : sizeof(struct sockaddr_in);
542 sock = socket(PF_INET6, SOCK_DGRAM, IPPROTO_UDP);
544 sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
545 #else /* !HAVE_IPv6 */
546 sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
547 ctx->dnsc_salen = sizeof(struct sockaddr_in);
548 #endif /* HAVE_IPv6 */
551 ctx->dnsc_qstatus = DNS_E_TEMPFAIL;
555 { unsigned long on = 1;
556 if (ioctlsocket(sock, FIONBIO, &on) == SOCKET_ERROR) {
558 ctx->dnsc_qstatus = DNS_E_TEMPFAIL;
563 if (fcntl(sock, F_SETFL, fcntl(sock, F_GETFL) | O_NONBLOCK) < 0 ||
564 fcntl(sock, F_SETFD, FD_CLOEXEC) < 0) {
566 ctx->dnsc_qstatus = DNS_E_TEMPFAIL;
570 /* allocate the packet buffer */
571 if ((ctx->dnsc_pbuf = malloc(ctx->dnsc_udpbuf)) == NULL) {
573 ctx->dnsc_qstatus = DNS_E_NOMEM;
578 ctx->dnsc_udpsock = sock;
579 dns_request_utm(ctx, 0);
583 int dns_sock(const struct dns_ctx *ctx) {
585 return ctx->dnsc_udpsock;
588 int dns_active(const struct dns_ctx *ctx) {
591 return ctx->dnsc_nactive;
594 int dns_status(const struct dns_ctx *ctx) {
596 return ctx->dnsc_qstatus;
598 void dns_setstatus(struct dns_ctx *ctx, int status) {
600 ctx->dnsc_qstatus = status;
603 /* End the query: disconnect it from the active list, free it,
604 * and return the result to the caller.
607 dns_end_query(struct dns_ctx *ctx, struct dns_query *q,
608 int status, void *result) {
609 dns_query_fn *cbck = q->dnsq_cbck;
610 void *cbdata = q->dnsq_cbdata;
611 ctx->dnsc_qstatus = status;
612 assert((status < 0 && result == 0) || (status >= 0 && result != 0));
613 assert(cbck != 0); /*XXX callback may be NULL */
614 assert(ctx->dnsc_nactive > 0);
616 qlist_remove(&ctx->dnsc_qactive, q);
617 /* force the query to be unconnected */
618 /*memset(q, 0, sizeof(*q));*/
623 cbck(ctx, result, cbdata);
626 #define DNS_DBG(ctx, code, sa, slen, pkt, plen) \
628 if (ctx->dnsc_udbgfn) \
629 ctx->dnsc_udbgfn(code, (sa), slen, pkt, plen, 0, 0); \
631 #define DNS_DBGQ(ctx, q, code, sa, slen, pkt, plen) \
633 if (ctx->dnsc_udbgfn) \
634 ctx->dnsc_udbgfn(code, (sa), slen, pkt, plen, q, q->dnsq_cbdata); \
637 static void dns_newid(struct dns_ctx *ctx, struct dns_query *q) {
638 /* this is how we choose an identifier for a new query (qID).
639 * For now, it's just sequential number, incremented for every query, and
640 * thus obviously trivial to guess.
641 * There are two choices:
642 * a) use sequential numbers. It is plain insecure. In DNS, there are two
643 * places where random numbers are (or can) be used to increase security:
644 * random qID and random source port number. Without this randomness
645 * (udns uses fixed port for all queries), or when the randomness is weak,
646 * it's trivial to spoof query replies. With randomness however, it
647 * becomes a bit more difficult task. Too bad we only have 16 bits for
648 * our security, as qID is only two bytes. It isn't a security per se,
649 * to rely on those 16 bits - an attacker can just flood us with fake
650 * replies with all possible qIDs (only 65536 of them), and in this case,
651 * even if we'll use true random qIDs, we'll be in trouble (not protected
652 * against spoofing). Yes, this is only possible on a high-speed network
653 * (probably on the LAN only, since usually a border router for a LAN
654 * protects internal machines from packets with spoofed local addresses
655 * from outside, and usually a nameserver resides on LAN), but it's
656 * still very well possible to send us fake replies.
657 * In other words: there's nothing a DNS (stub) resolver can do against
658 * spoofing attacks, unless DNSSEC is in use, which helps here alot.
659 * Too bad that DNSSEC isn't widespread, so relying on it isn't an
660 * option in almost all cases...
661 * b) use random qID, based on some random-number generation mechanism.
662 * This way, we increase our protection a bit (see above - it's very weak
663 * still), but we also increase risk of qID reuse and matching late replies
664 * that comes to queries we've sent before against new queries. There are
665 * some more corner cases around that, as well - for example, normally,
666 * udns tries to find the query for a given reply by qID, *and* by
667 * verifying that the query DN and other parameters are also the same
668 * (so if the new query is against another domain name, old reply will
669 * be ignored automatically). But certain types of replies which we now
670 * handle - for example, FORMERR reply from servers which refuses to
671 * process EDNS0-enabled packets - comes without all the query parameters
672 * but the qID - so we're forced to use qID only when determining which
673 * query the given reply corresponds to. This makes us even more
674 * vulnerable to spoofing attacks, because an attacker don't even need to
675 * know which queries we perform to spoof the replies - he only needs to
676 * flood us with fake FORMERR "replies".
678 * That all to say: using sequential (or any other trivially guessable)
679 * numbers for qIDs is insecure, but the whole thing is inherently insecure
680 * as well, and this "extra weakness" that comes from weak qID choosing
681 * algorithm adds almost nothing to the underlying problem.
683 * It CAN NOT be made secure. Period. That's it.
684 * Unless we choose to implement DNSSEC, which is a whole different story.
685 * Forcing TCP mode makes it better, but who uses TCP for DNS anyway?
686 * (and it's hardly possible because of huge impact on the recursive
689 * Note that ALL stub resolvers (again, unless they implement and enforce
690 * DNSSEC) suffers from this same problem.
692 * Here, I use a pseudo-random number generator for qIDs, instead of a
693 * simpler sequential IDs. This is _not_ more secure than sequential
694 * ID, but some found random IDs more enjoyeable for some reason. So
698 /* Use random number and check if it's unique.
699 * If it's not, try again up to 5 times.
703 for(loop = 0; loop < 5; ++loop) {
704 const struct dns_query *c;
705 if (!ctx->dnsc_nextid)
706 ctx->dnsc_nextid = udns_jranval(&ctx->dnsc_jran);
707 c0 = ctx->dnsc_nextid & 0xff;
708 c1 = (ctx->dnsc_nextid >> 8) & 0xff;
709 ctx->dnsc_nextid >>= 16;
710 for(c = ctx->dnsc_qactive.head; c; c = c->dnsq_next)
711 if (c->dnsq_id[0] == c0 && c->dnsq_id[1] == c1)
712 break; /* found such entry, try again */
716 q->dnsq_id[0] = c0; q->dnsq_id[1] = c1;
718 /* reset all parameters relevant for previous query lifetime */
721 /*XXX probably should keep dnsq_servnEDNS0 bits?
722 * See also comments in dns_ioevent() about FORMERR case */
723 q->dnsq_servwait = q->dnsq_servskip = q->dnsq_servnEDNS0 = 0;
726 /* Find next search suffix and fills in q->dnsq_dn.
727 * Return 0 if no more to try. */
728 static int dns_next_srch(struct dns_ctx *ctx, struct dns_query *q) {
732 if (q->dnsq_nxtsrch > ctx->dnsc_srchend)
734 dnl = dns_dnlen(q->dnsq_nxtsrch);
735 if (dnl + q->dnsq_origdnl0 <= DNS_MAXDN &&
736 (*q->dnsq_nxtsrch || !(q->dnsq_flags & DNS_ASIS_DONE)))
738 q->dnsq_nxtsrch += dnl;
740 memcpy(q->dnsq_dn + q->dnsq_origdnl0, q->dnsq_nxtsrch, dnl);
741 if (!*q->dnsq_nxtsrch)
742 q->dnsq_flags |= DNS_ASIS_DONE;
743 q->dnsq_nxtsrch += dnl;
744 dns_newid(ctx, q); /* new ID for new qDN */
748 /* find the server to try for current iteration.
749 * Note that current dnsq_servi may point to a server we should skip --
750 * in that case advance to the next server.
751 * Return true if found, false if all tried.
753 static int dns_find_serv(const struct dns_ctx *ctx, struct dns_query *q) {
754 while(q->dnsq_servi < ctx->dnsc_nserv) {
755 if (!(q->dnsq_servskip & (1 << q->dnsq_servi)))
762 /* format and send the query to a given server.
763 * In case of network problem (sendto() fails), return -1,
767 dns_send_this(struct dns_ctx *ctx, struct dns_query *q,
768 unsigned servi, time_t now) {
772 { /* format the query buffer */
773 dnsc_t *p = ctx->dnsc_pbuf;
774 memset(p, 0, DNS_HSIZE);
775 if (!(q->dnsq_flags & DNS_NORD)) p[DNS_H_F1] |= DNS_HF1_RD;
776 if (q->dnsq_flags & DNS_AAONLY) p[DNS_H_F1] |= DNS_HF1_AA;
777 if (q->dnsq_flags & DNS_SET_CD) p[DNS_H_F2] |= DNS_HF2_CD;
779 memcpy(p + DNS_H_QID, q->dnsq_id, 2);
782 p += dns_dntodn(q->dnsq_dn, p, DNS_MAXDN);
783 /* query type and class */
784 memcpy(p, q->dnsq_typcls, 4); p += 4;
785 /* add EDNS0 record. DO flag requires it */
786 if (q->dnsq_flags & DNS_SET_DO ||
787 (ctx->dnsc_udpbuf > DNS_MAXPACKET &&
788 !(q->dnsq_servnEDNS0 & (1 << servi)))) {
789 *p++ = 0; /* empty (root) DN */
790 p = dns_put16(p, DNS_T_OPT);
791 p = dns_put16(p, ctx->dnsc_udpbuf);
792 /* EDNS0 RCODE & VERSION; rest of the TTL field; RDLEN */
794 if (q->dnsq_flags & DNS_SET_DO) p[2] |= DNS_EF1_DO;
796 ctx->dnsc_pbuf[DNS_H_ARCNT2] = 1;
798 qlen = p - ctx->dnsc_pbuf;
799 assert(qlen <= ctx->dnsc_udpbuf);
804 while (sendto(ctx->dnsc_udpsock, (void*)ctx->dnsc_pbuf, qlen, 0,
805 &ctx->dnsc_serv[servi].sa, ctx->dnsc_salen) < 0) {
806 /*XXX just ignore the sendto() error for now and try again.
807 * In the future, it may be possible to retrieve the error code
808 * and find which operation/query failed.
809 *XXX try the next server too? (if ENETUNREACH is returned immediately)
811 if (--tries) continue;
812 /* if we can't send the query, fail it. */
813 dns_end_query(ctx, q, DNS_E_TEMPFAIL, 0);
817 &ctx->dnsc_serv[servi].sa, sizeof(union sockaddr_ns),
818 ctx->dnsc_pbuf, qlen);
819 q->dnsq_servwait |= 1 << servi; /* expect reply from this ns */
821 q->dnsq_deadline = now +
822 (dns_find_serv(ctx, q) ? 1 : ctx->dnsc_timeout << q->dnsq_try);
824 /* move the query to the proper place, according to the new deadline */
825 qlist_remove(&ctx->dnsc_qactive, q);
826 { /* insert from the tail */
828 for(p = ctx->dnsc_qactive.tail; p; p = p->dnsq_prev)
829 if (p->dnsq_deadline <= q->dnsq_deadline)
831 qlist_insert_after(&ctx->dnsc_qactive, q, p);
837 /* send the query out using next available server
838 * and add it to the active list, or, if no servers available,
842 dns_send(struct dns_ctx *ctx, struct dns_query *q, time_t now) {
844 /* if we can't send the query, return TEMPFAIL even when searching:
845 * we can't be sure whenever the name we tried to search exists or not,
846 * so don't continue searching, or we may find the wrong name. */
848 if (!dns_find_serv(ctx, q)) {
849 /* no more servers in this iteration. Try the next cycle */
850 q->dnsq_servi = 0; /* reset */
851 q->dnsq_try++; /* next try */
852 if (q->dnsq_try >= ctx->dnsc_ntries ||
853 !dns_find_serv(ctx, q)) {
854 /* no more servers and tries, fail the query */
855 /* return TEMPFAIL even when searching: no more tries for this
856 * searchlist, and no single definitive reply (handled in dns_ioevent()
857 * in NOERROR or NXDOMAIN cases) => all nameservers failed to process
858 * current search list element, so we don't know whenever the name exists.
860 dns_end_query(ctx, q, DNS_E_TEMPFAIL, 0);
865 dns_send_this(ctx, q, q->dnsq_servi++, now);
868 static void dns_dummy_cb(struct dns_ctx *ctx, void *result, void *data) {
869 if (result) free(result);
870 data = ctx = 0; /* used */
873 /* The (only, main, real) query submission routine.
874 * Allocate new query structure, initialize it, check validity of
875 * parameters, and add it to the head of the active list, without
876 * trying to send it (to be picked up on next event).
877 * Error return (without calling the callback routine) -
878 * no memory or wrong parameters.
879 *XXX The `no memory' case probably should go to the callback anyway...
882 dns_submit_dn(struct dns_ctx *ctx,
883 dnscc_t *dn, int qcls, int qtyp, int flags,
884 dns_parse_fn *parse, dns_query_fn *cbck, void *data) {
889 q = calloc(sizeof(*q), 1);
891 ctx->dnsc_qstatus = DNS_E_NOMEM;
898 q->dnsq_parse = parse;
899 q->dnsq_cbck = cbck ? cbck : dns_dummy_cb;
900 q->dnsq_cbdata = data;
902 q->dnsq_origdnl0 = dns_dntodn(dn, q->dnsq_dn, sizeof(q->dnsq_dn));
903 assert(q->dnsq_origdnl0 > 0);
904 --q->dnsq_origdnl0; /* w/o the trailing 0 */
905 dns_put16(q->dnsq_typcls+0, qtyp);
906 dns_put16(q->dnsq_typcls+2, qcls);
907 q->dnsq_flags = (flags | ctx->dnsc_flags) & ~DNS_INTERNAL;
909 if (flags & DNS_NOSRCH ||
910 dns_dnlabels(q->dnsq_dn) > ctx->dnsc_ndots) {
911 q->dnsq_nxtsrch = flags & DNS_NOSRCH ?
912 ctx->dnsc_srchend /* end of the search list if no search requested */ :
913 ctx->dnsc_srchbuf /* beginning of the list, but try as-is first */;
914 q->dnsq_flags |= DNS_ASIS_DONE;
918 q->dnsq_nxtsrch = ctx->dnsc_srchbuf;
919 dns_next_srch(ctx, q);
922 /* q->dnsq_deadline is set to 0 (calloc above): the new query is
923 * "already expired" when first inserted into queue, so it's safe
924 * to insert it into the head of the list. Next call to dns_timeouts()
925 * will actually send it.
927 qlist_add_head(&ctx->dnsc_qactive, q);
929 dns_request_utm(ctx, 0);
935 dns_submit_p(struct dns_ctx *ctx,
936 const char *name, int qcls, int qtyp, int flags,
937 dns_parse_fn *parse, dns_query_fn *cbck, void *data) {
940 if (dns_ptodn(name, 0, ctx->dnsc_pbuf, DNS_MAXDN, &isabs) <= 0) {
941 ctx->dnsc_qstatus = DNS_E_BADQUERY;
947 dns_submit_dn(ctx, ctx->dnsc_pbuf, qcls, qtyp, flags, parse, cbck, data);
950 /* process readable fd condition.
951 * To be usable in edge-triggered environment, the routine
952 * should consume all input so it should loop over.
953 * Note it isn't really necessary to loop here, because
954 * an application may perform the loop just fine by it's own,
955 * but in this case we should return some sensitive result,
956 * to indicate when to stop calling and error conditions.
957 * Note also we may encounter all sorts of recvfrom()
958 * errors which aren't fatal, and at the same time we may
959 * loop forever if an error IS fatal.
961 void dns_ioevent(struct dns_ctx *ctx, time_t now) {
966 dnscc_t *pend, *pcur;
968 union sockaddr_ns sns;
975 pbuf = ctx->dnsc_pbuf;
977 if (!now) now = time(NULL);
979 again: /* receive the reply */
982 r = recvfrom(ctx->dnsc_udpsock, (void*)pbuf, ctx->dnsc_udpbuf,
983 MSG_DONTWAIT, &sns.sa, &slen);
985 /*XXX just ignore recvfrom() errors for now.
986 * in the future it may be possible to determine which
987 * query failed and requeue it.
988 * Note there may be various error conditions, triggered
989 * by both local problems and remote problems. It isn't
990 * quite trivial to determine whenever an error is local
991 * or remote. On local errors, we should stop, while
992 * remote errors should be ignored (for now anyway).
995 if (WSAGetLastError() == WSAEWOULDBLOCK)
1000 dns_request_utm(ctx, now);
1007 pcur = dns_payload(pbuf);
1009 /* check reply header */
1010 if (pcur > pend || dns_numqd(pbuf) > 1 || dns_opcode(pbuf) != 0) {
1011 DNS_DBG(ctx, -1/*bad reply*/, &sns.sa, slen, pbuf, r);
1015 /* find the matching query, by qID */
1016 for (q = ctx->dnsc_qactive.head; ; q = q->dnsq_next) {
1018 /* no more requests: old reply? */
1019 DNS_DBG(ctx, -5/*no matching query*/, &sns.sa, slen, pbuf, r);
1022 if (pbuf[DNS_H_QID1] == q->dnsq_id[0] &&
1023 pbuf[DNS_H_QID2] == q->dnsq_id[1])
1027 /* if we have numqd, compare with our query qDN */
1028 if (dns_numqd(pbuf)) {
1029 /* decode the qDN */
1030 dnsc_t dn[DNS_MAXDN];
1031 if (dns_getdn(pbuf, &pcur, pend, dn, sizeof(dn)) < 0 ||
1033 DNS_DBG(ctx, -1/*bad reply*/, &sns.sa, slen, pbuf, r);
1036 if (!dns_dnequal(dn, q->dnsq_dn) ||
1037 memcmp(pcur, q->dnsq_typcls, 4) != 0) {
1038 /* not this query */
1039 DNS_DBG(ctx, -5/*no matching query*/, &sns.sa, slen, pbuf, r);
1042 /* here, query match, and pcur points past qDN in query section in pbuf */
1044 /* if no numqd, we only allow FORMERR rcode */
1045 else if (dns_rcode(pbuf) != DNS_R_FORMERR) {
1046 /* treat it as bad reply if !FORMERR */
1047 DNS_DBG(ctx, -1/*bad reply*/, &sns.sa, slen, pbuf, r);
1051 /* else it's FORMERR, handled below */
1056 if (sns.sa.sa_family == AF_INET6 && slen >= sizeof(sns.sin6)) {
1057 for(servi = 0; servi < ctx->dnsc_nserv; ++servi)
1058 if (sin6_eq(ctx->dnsc_serv[servi].sin6, sns.sin6))
1063 if (sns.sa.sa_family == AF_INET && slen >= sizeof(sns.sin)) {
1064 for(servi = 0; servi < ctx->dnsc_nserv; ++servi)
1065 if (sin_eq(ctx->dnsc_serv[servi].sin, sns.sin))
1069 servi = ctx->dnsc_nserv;
1071 /* check if we expect reply from this server.
1072 * Note we can receive reply from first try if we're already at next */
1073 if (!(q->dnsq_servwait & (1 << servi))) { /* if ever asked this NS */
1074 DNS_DBG(ctx, -2/*wrong server*/, &sns.sa, slen, pbuf, r);
1078 /* we got (some) reply for our query */
1080 DNS_DBGQ(ctx, q, 0, &sns.sa, slen, pbuf, r);
1081 q->dnsq_servwait &= ~(1 << servi); /* don't expect reply from this serv */
1083 /* process the RCODE */
1084 switch(dns_rcode(pbuf)) {
1088 /* possible truncation. We can't deal with it. */
1089 /*XXX for now, treat TC bit the same as SERVFAIL.
1090 * It is possible to:
1091 * a) try to decode the reply - may be ANSWER section is ok;
1092 * b) check if server understands EDNS0, and if it is, and
1093 * answer still don't fit, end query.
1097 if (!dns_numan(pbuf)) { /* no data of requested type */
1098 if (dns_next_srch(ctx, q)) {
1099 /* if we're searching, try next searchlist element,
1100 * but remember NODATA reply. */
1101 q->dnsq_flags |= DNS_SEEN_NODATA;
1102 dns_send(ctx, q, now);
1105 /* else - nothing to search any more - finish the query.
1106 * It will be NODATA since we've seen a NODATA reply. */
1107 dns_end_query(ctx, q, DNS_E_NODATA, 0);
1109 /* we've got a positive reply here */
1110 else if (q->dnsq_parse) {
1111 /* if we have parsing routine, call it and return whatever it returned */
1112 /* don't try to re-search if NODATA here. For example,
1113 * if we asked for A but only received CNAME. Unless we'll
1114 * someday do recursive queries. And that's problematic too, since
1115 * we may be dealing with specific AA-only nameservers for a given
1116 * domain, but CNAME points elsewhere...
1118 r = q->dnsq_parse(q->dnsq_dn, pbuf, pcur, pend, &result);
1119 dns_end_query(ctx, q, r, r < 0 ? NULL : result);
1121 /* else just malloc+copy the raw DNS reply */
1122 else if ((result = malloc(r)) == NULL)
1123 dns_end_query(ctx, q, DNS_E_NOMEM, NULL);
1125 memcpy(result, pbuf, r);
1126 dns_end_query(ctx, q, r, result);
1130 case DNS_R_NXDOMAIN: /* Non-existing domain. */
1131 if (dns_next_srch(ctx, q))
1132 /* more search entries exists, try them. */
1133 dns_send(ctx, q, now);
1135 /* nothing to search anymore. End the query, returning either NODATA
1136 * if we've seen it before, or NXDOMAIN if not. */
1137 dns_end_query(ctx, q,
1138 q->dnsq_flags & DNS_SEEN_NODATA ? DNS_E_NODATA : DNS_E_NXDOMAIN, 0);
1143 /* for FORMERR and NOTIMPL rcodes, if we tried EDNS0-enabled query,
1145 if (ctx->dnsc_udpbuf > DNS_MAXPACKET &&
1146 !(q->dnsq_servnEDNS0 & (1 << servi))) {
1147 /* we always trying EDNS0 first if enabled, and retry a given query
1148 * if not available. Maybe it's better to remember inavailability of
1149 * EDNS0 in ctx as a per-NS flag, and never try again for this NS.
1150 * For long-running applications.. maybe they will change the nameserver
1151 * while we're running? :) Also, since FORMERR is the only rcode we
1152 * allow to be header-only, and in this case the only check we do to
1153 * find a query it belongs to is qID (not qDN+qCLS+qTYP), it's much
1154 * easier to spoof and to force us to perform non-EDNS0 queries only...
1156 q->dnsq_servnEDNS0 |= 1 << servi;
1157 dns_send_this(ctx, q, servi, now);
1160 /* else we handle it the same as SERVFAIL etc */
1162 case DNS_R_SERVFAIL:
1164 /* for these rcodes, advance this request
1165 * to the next server and reschedule */
1166 default: /* unknown rcode? hmmm... */
1170 /* here, we received unexpected reply */
1171 q->dnsq_servskip |= (1 << servi); /* don't retry this server */
1173 /* we don't expect replies from this server anymore.
1174 * But there may be other servers. Some may be still processing our
1175 * query, and some may be left to try.
1176 * We just ignore this reply and wait a bit more if some NSes haven't
1177 * replied yet (dnsq_servwait != 0), and let the situation to be handled
1178 * on next event processing. Timeout for this query is set correctly,
1179 * if not taking into account the one-second difference - we can try
1180 * next server in the same iteration sooner.
1183 /* try next server */
1184 if (!q->dnsq_servwait) {
1185 /* next retry: maybe some other servers will reply next time.
1186 * dns_send() will end the query for us if no more servers to try.
1187 * Note we can't continue with the next searchlist element here:
1188 * we don't know if the current qdn exists or not, there's no definitive
1189 * answer yet (which is seen in cases above).
1190 *XXX standard resolver also tries as-is query in case all nameservers
1191 * failed to process our query and if not tried before. We don't do it.
1193 dns_send(ctx, q, now);
1196 /* else don't do anything - not all servers replied yet */
1202 /* handle all timeouts */
1203 int dns_timeouts(struct dns_ctx *ctx, int maxwait, time_t now) {
1204 /* this is a hot routine */
1205 struct dns_query *q;
1208 dns_assert_ctx(ctx);
1210 /* Pick up first entry from query list.
1211 * If its deadline has passed, (re)send it
1212 * (dns_send() will move it next in the list).
1213 * If not, this is the query which determines the closest deadline.
1216 q = ctx->dnsc_qactive.head;
1222 if (q->dnsq_deadline > now) { /* first non-expired query */
1223 int w = (int)(q->dnsq_deadline - now);
1224 if (maxwait < 0 || maxwait > w)
1229 /* process expired deadline */
1230 dns_send(ctx, q, now);
1232 } while((q = ctx->dnsc_qactive.head) != NULL);
1234 dns_request_utm(ctx, now); /* update timer with new deadline */
1238 struct dns_resolve_data {
1243 static void dns_resolve_cb(struct dns_ctx *ctx, void *result, void *data) {
1244 struct dns_resolve_data *d = data;
1245 d->dnsrd_result = result;
1250 void *dns_resolve(struct dns_ctx *ctx, struct dns_query *q) {
1252 struct dns_resolve_data d;
1259 assert(ctx == q->dnsq_ctx);
1260 dns_assert_ctx(ctx);
1261 /* do not allow re-resolving syncronous queries */
1262 assert(q->dnsq_cbck != dns_resolve_cb && "can't resolve syncronous query");
1263 if (q->dnsq_cbck == dns_resolve_cb) {
1264 ctx->dnsc_qstatus = DNS_E_BADQUERY;
1267 q->dnsq_cbck = dns_resolve_cb;
1268 q->dnsq_cbdata = &d;
1272 while(!d.dnsrd_done && (n = dns_timeouts(ctx, -1, now)) >= 0) {
1275 pfd.fd = ctx->dnsc_udpsock;
1276 pfd.events = POLLIN;
1277 n = poll(&pfd, 1, n * 1000);
1282 FD_SET(ctx->dnsc_udpsock, &rfd);
1283 tv.tv_sec = n; tv.tv_usec = 0;
1284 n = select(ctx->dnsc_udpsock + 1, &rfd, NULL, NULL, &tv);
1288 dns_ioevent(ctx, now);
1291 return d.dnsrd_result;
1294 void *dns_resolve_dn(struct dns_ctx *ctx,
1295 dnscc_t *dn, int qcls, int qtyp, int flags,
1296 dns_parse_fn *parse) {
1299 dns_submit_dn(ctx, dn, qcls, qtyp, flags, parse, NULL, NULL));
1302 void *dns_resolve_p(struct dns_ctx *ctx,
1303 const char *name, int qcls, int qtyp, int flags,
1304 dns_parse_fn *parse) {
1307 dns_submit_p(ctx, name, qcls, qtyp, flags, parse, NULL, NULL));
1310 int dns_cancel(struct dns_ctx *ctx, struct dns_query *q) {
1312 dns_assert_ctx(ctx);
1313 assert(q->dnsq_ctx == ctx);
1314 /* do not allow cancelling syncronous queries */
1315 assert(q->dnsq_cbck != dns_resolve_cb && "can't cancel syncronous query");
1316 if (q->dnsq_cbck == dns_resolve_cb)
1317 return (ctx->dnsc_qstatus = DNS_E_BADQUERY);
1318 qlist_remove(&ctx->dnsc_qactive, q);
1319 --ctx->dnsc_nactive;
1320 dns_request_utm(ctx, 0);