"Fossies" - the Fresh Open Source Software Archive

Member "dnsmasq-2.85/src/forward.c" (7 Apr 2021, 77887 Bytes) of package /linux/misc/dns/dnsmasq-2.85.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "forward.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 2.84_vs_2.85.

    1 /* dnsmasq is Copyright (c) 2000-2021 Simon Kelley
    2 
    3    This program is free software; you can redistribute it and/or modify
    4    it under the terms of the GNU General Public License as published by
    5    the Free Software Foundation; version 2 dated June, 1991, or
    6    (at your option) version 3 dated 29 June, 2007.
    7  
    8    This program is distributed in the hope that it will be useful,
    9    but WITHOUT ANY WARRANTY; without even the implied warranty of
   10    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   11    GNU General Public License for more details.
   12      
   13    You should have received a copy of the GNU General Public License
   14    along with this program.  If not, see <http://www.gnu.org/licenses/>.
   15 */
   16 
   17 #include "dnsmasq.h"
   18 
   19 static struct frec *lookup_frec(unsigned short id, int fd, void *hash);
   20 static struct frec *lookup_frec_by_query(void *hash, unsigned int flags);
   21 
   22 static unsigned short get_id(void);
   23 static void free_frec(struct frec *f);
   24 static void query_full(time_t now);
   25 
   26 /* Send a UDP packet with its source address set as "source" 
   27    unless nowild is true, when we just send it with the kernel default */
   28 int send_from(int fd, int nowild, char *packet, size_t len, 
   29           union mysockaddr *to, union all_addr *source,
   30           unsigned int iface)
   31 {
   32   struct msghdr msg;
   33   struct iovec iov[1]; 
   34   union {
   35     struct cmsghdr align; /* this ensures alignment */
   36 #if defined(HAVE_LINUX_NETWORK)
   37     char control[CMSG_SPACE(sizeof(struct in_pktinfo))];
   38 #elif defined(IP_SENDSRCADDR)
   39     char control[CMSG_SPACE(sizeof(struct in_addr))];
   40 #endif
   41     char control6[CMSG_SPACE(sizeof(struct in6_pktinfo))];
   42   } control_u;
   43   
   44   iov[0].iov_base = packet;
   45   iov[0].iov_len = len;
   46 
   47   msg.msg_control = NULL;
   48   msg.msg_controllen = 0;
   49   msg.msg_flags = 0;
   50   msg.msg_name = to;
   51   msg.msg_namelen = sa_len(to);
   52   msg.msg_iov = iov;
   53   msg.msg_iovlen = 1;
   54   
   55   if (!nowild)
   56     {
   57       struct cmsghdr *cmptr;
   58       msg.msg_control = &control_u;
   59       msg.msg_controllen = sizeof(control_u);
   60       cmptr = CMSG_FIRSTHDR(&msg);
   61 
   62       if (to->sa.sa_family == AF_INET)
   63     {
   64 #if defined(HAVE_LINUX_NETWORK)
   65       struct in_pktinfo p;
   66       p.ipi_ifindex = 0;
   67       p.ipi_spec_dst = source->addr4;
   68       msg.msg_controllen = CMSG_SPACE(sizeof(struct in_pktinfo));
   69       memcpy(CMSG_DATA(cmptr), &p, sizeof(p));
   70       cmptr->cmsg_len = CMSG_LEN(sizeof(struct in_pktinfo));
   71       cmptr->cmsg_level = IPPROTO_IP;
   72       cmptr->cmsg_type = IP_PKTINFO;
   73 #elif defined(IP_SENDSRCADDR)
   74       msg.msg_controllen = CMSG_SPACE(sizeof(struct in_addr));
   75       memcpy(CMSG_DATA(cmptr), &(source->addr4), sizeof(source->addr4));
   76       cmptr->cmsg_len = CMSG_LEN(sizeof(struct in_addr));
   77       cmptr->cmsg_level = IPPROTO_IP;
   78       cmptr->cmsg_type = IP_SENDSRCADDR;
   79 #endif
   80     }
   81       else
   82     {
   83       struct in6_pktinfo p;
   84       p.ipi6_ifindex = iface; /* Need iface for IPv6 to handle link-local addrs */
   85       p.ipi6_addr = source->addr6;
   86       msg.msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
   87       memcpy(CMSG_DATA(cmptr), &p, sizeof(p));
   88       cmptr->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
   89       cmptr->cmsg_type = daemon->v6pktinfo;
   90       cmptr->cmsg_level = IPPROTO_IPV6;
   91     }
   92     }
   93   
   94   while (retry_send(sendmsg(fd, &msg, 0)));
   95 
   96   if (errno != 0)
   97     {
   98 #ifdef HAVE_LINUX_NETWORK
   99       /* If interface is still in DAD, EINVAL results - ignore that. */
  100       if (errno != EINVAL)
  101     my_syslog(LOG_ERR, _("failed to send packet: %s"), strerror(errno));
  102 #endif
  103       return 0;
  104     }
  105   
  106   return 1;
  107 }
  108           
  109 static unsigned int search_servers(time_t now, union all_addr **addrpp, unsigned int qtype,
  110                    char *qdomain, int *type, char **domain, int *norebind)
  111                   
  112 {
  113   /* If the query ends in the domain in one of our servers, set
  114      domain to point to that name. We find the largest match to allow both
  115      domain.org and sub.domain.org to exist. */
  116   
  117   unsigned int namelen = strlen(qdomain);
  118   unsigned int matchlen = 0;
  119   struct server *serv;
  120   unsigned int flags = 0;
  121   static union all_addr zero;
  122   
  123   for (serv = daemon->servers; serv; serv=serv->next)
  124     if (qtype == F_DNSSECOK && !(serv->flags & SERV_DO_DNSSEC))
  125       continue;
  126     /* domain matches take priority over NODOTS matches */
  127     else if ((serv->flags & SERV_FOR_NODOTS) && *type != SERV_HAS_DOMAIN && !strchr(qdomain, '.') && namelen != 0)
  128       {
  129     unsigned int sflag = serv->addr.sa.sa_family == AF_INET ? F_IPV4 : F_IPV6; 
  130     *type = SERV_FOR_NODOTS;
  131     if ((serv->flags & SERV_NO_REBIND) && norebind)
  132       *norebind = 1;
  133     else if (serv->flags & SERV_NO_ADDR)
  134       flags = F_NXDOMAIN;
  135     else if (serv->flags & SERV_LITERAL_ADDRESS)
  136       { 
  137         /* literal address = '#' -> return all-zero address for IPv4 and IPv6 */
  138         if ((serv->flags & SERV_USE_RESOLV) && (qtype & (F_IPV6 | F_IPV4)))
  139           {
  140         memset(&zero, 0, sizeof(zero));
  141         flags = qtype;
  142         *addrpp = &zero;
  143           }
  144         else if (sflag & qtype)
  145           {
  146         flags = sflag;
  147         if (serv->addr.sa.sa_family == AF_INET) 
  148           *addrpp = (union all_addr *)&serv->addr.in.sin_addr;
  149         else
  150           *addrpp = (union all_addr *)&serv->addr.in6.sin6_addr;
  151           }
  152         else if (!flags || (flags & F_NXDOMAIN))
  153           flags = F_NOERR;
  154       } 
  155       }
  156     else if (serv->flags & SERV_HAS_DOMAIN)
  157       {
  158     unsigned int domainlen = strlen(serv->domain);
  159     char *matchstart = qdomain + namelen - domainlen;
  160     if (namelen >= domainlen &&
  161         hostname_isequal(matchstart, serv->domain) &&
  162         (domainlen == 0 || namelen == domainlen || *(matchstart-1) == '.' ))
  163       {
  164         if ((serv->flags & SERV_NO_REBIND) && norebind) 
  165           *norebind = 1;
  166         else
  167           {
  168         unsigned int sflag = serv->addr.sa.sa_family == AF_INET ? F_IPV4 : F_IPV6;
  169         /* implement priority rules for --address and --server for same domain.
  170            --address wins if the address is for the correct AF
  171            --server wins otherwise. */
  172         if (domainlen != 0 && domainlen == matchlen)
  173           {
  174             if ((serv->flags & SERV_LITERAL_ADDRESS))
  175               {
  176             if (!(sflag & qtype) && flags == 0)
  177               continue;
  178               }
  179             else
  180               {
  181             if (flags & (F_IPV4 | F_IPV6))
  182               continue;
  183               }
  184           }
  185         
  186         if (domainlen >= matchlen)
  187           {
  188             *type = serv->flags & (SERV_HAS_DOMAIN | SERV_USE_RESOLV | SERV_NO_REBIND | SERV_DO_DNSSEC);
  189             *domain = serv->domain;
  190             matchlen = domainlen;
  191             if (serv->flags & SERV_NO_ADDR)
  192               flags = F_NXDOMAIN;
  193             else if (serv->flags & SERV_LITERAL_ADDRESS)
  194               {
  195              /* literal address = '#' -> return all-zero address for IPv4 and IPv6 */
  196             if ((serv->flags & SERV_USE_RESOLV) && (qtype & (F_IPV6 | F_IPV4)))
  197               {             
  198                 memset(&zero, 0, sizeof(zero));
  199                 flags = qtype;
  200                 *addrpp = &zero;
  201               }
  202             else if (sflag & qtype)
  203               {
  204                 flags = sflag;
  205                 if (serv->addr.sa.sa_family == AF_INET) 
  206                   *addrpp = (union all_addr *)&serv->addr.in.sin_addr;
  207                 else
  208                   *addrpp = (union all_addr *)&serv->addr.in6.sin6_addr;
  209               }
  210             else if (!flags || (flags & F_NXDOMAIN))
  211               flags = F_NOERR;
  212               }
  213             else
  214               flags = 0;
  215           } 
  216           }
  217       }
  218       }
  219   
  220   if (flags == 0 && !(qtype & (F_QUERY | F_DNSSECOK)) && 
  221       option_bool(OPT_NODOTS_LOCAL) && !strchr(qdomain, '.') && namelen != 0)
  222     /* don't forward A or AAAA queries for simple names, except the empty name */
  223     flags = F_NOERR;
  224   
  225   if (flags == F_NXDOMAIN && check_for_local_domain(qdomain, now))
  226     flags = F_NOERR;
  227 
  228   if (flags)
  229     {
  230        if (flags == F_NXDOMAIN || flags == F_NOERR)
  231      log_query(flags | qtype | F_NEG | F_CONFIG | F_FORWARD, qdomain, NULL, NULL);
  232        else
  233      {
  234        /* handle F_IPV4 and F_IPV6 set on ANY query to 0.0.0.0/:: domain. */
  235        if (flags & F_IPV4)
  236          log_query((flags | F_CONFIG | F_FORWARD) & ~F_IPV6, qdomain, *addrpp, NULL);
  237        if (flags & F_IPV6)
  238          log_query((flags | F_CONFIG | F_FORWARD) & ~F_IPV4, qdomain, *addrpp, NULL);
  239      }
  240     }
  241   else if ((*type) & SERV_USE_RESOLV)
  242     {
  243       *type = 0; /* use normal servers for this domain */
  244       *domain = NULL;
  245     }
  246   return  flags;
  247 }
  248 
  249 #ifdef HAVE_CONNTRACK
  250 static void set_outgoing_mark(struct frec *forward, int fd)
  251 {
  252   /* Copy connection mark of incoming query to outgoing connection. */
  253   unsigned int mark;
  254   if (get_incoming_mark(&forward->frec_src.source, &forward->frec_src.dest, 0, &mark))
  255     setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
  256 }
  257 #endif
  258 
  259 static void log_query_mysockaddr(unsigned int flags, char *name, union mysockaddr *addr, char *arg)
  260 {
  261   if (addr->sa.sa_family == AF_INET)
  262     log_query(flags | F_IPV4, name, (union all_addr *)&addr->in.sin_addr, arg);
  263   else
  264     log_query(flags | F_IPV6, name, (union all_addr *)&addr->in6.sin6_addr, arg);
  265 }
  266 
  267 static void server_send(struct server *server, int fd,
  268             const void *header, size_t plen, int flags)
  269 {
  270   while (retry_send(sendto(fd, header, plen, flags,
  271                &server->addr.sa,
  272                sa_len(&server->addr))));
  273 }
  274 
  275 #ifdef HAVE_DNSSEC
  276 static void server_send_log(struct server *server, int fd,
  277             const void *header, size_t plen, int dumpflags,
  278             unsigned int logflags, char *name, char *arg)
  279 {
  280 #ifdef HAVE_DUMPFILE
  281       dump_packet(dumpflags, (void *)header, (size_t)plen, NULL, &server->addr);
  282 #endif
  283       log_query_mysockaddr(logflags, name, &server->addr, arg);
  284       server_send(server, fd, header, plen, 0);
  285 }
  286 #endif
  287 
  288 static int server_test_type(const struct server *server,
  289                 const char *domain, int type, int extratype)
  290 {
  291   return (type == (server->flags & (SERV_TYPE | extratype)) &&
  292       (type != SERV_HAS_DOMAIN || hostname_isequal(domain, server->domain)) &&
  293       !(server->flags & (SERV_LITERAL_ADDRESS | SERV_LOOP)));
  294 }
  295 
  296 static int forward_query(int udpfd, union mysockaddr *udpaddr,
  297              union all_addr *dst_addr, unsigned int dst_iface,
  298              struct dns_header *header, size_t plen, time_t now, 
  299              struct frec *forward, int ad_reqd, int do_bit)
  300 {
  301   char *domain = NULL;
  302   int type = SERV_DO_DNSSEC, norebind = 0;
  303   union all_addr *addrp = NULL;
  304   unsigned int flags = 0;
  305   unsigned int fwd_flags = 0;
  306   struct server *start = NULL;
  307   void *hash = hash_questions(header, plen, daemon->namebuff);
  308 #ifdef HAVE_DNSSEC
  309   int do_dnssec = 0;
  310 #endif
  311   unsigned int gotname = extract_request(header, plen, daemon->namebuff, NULL);
  312   unsigned char *oph = find_pseudoheader(header, plen, NULL, NULL, NULL, NULL);
  313   int old_src = 0;
  314   
  315   (void)do_bit;
  316   
  317   if (header->hb4 & HB4_CD)
  318     fwd_flags |= FREC_CHECKING_DISABLED;
  319   if (ad_reqd)
  320     fwd_flags |= FREC_AD_QUESTION;
  321   if (oph)
  322     fwd_flags |= FREC_HAS_PHEADER;
  323 #ifdef HAVE_DNSSEC
  324   if (do_bit)
  325     fwd_flags |= FREC_DO_QUESTION;
  326 #endif
  327   
  328   /* Check for retry on existing query */
  329   if (forward)
  330     old_src = 1;
  331   else if ((forward = lookup_frec_by_query(hash, fwd_flags)))
  332     {
  333       struct frec_src *src;
  334       
  335       for (src = &forward->frec_src; src; src = src->next)
  336     if (src->orig_id == ntohs(header->id) && 
  337         sockaddr_isequal(&src->source, udpaddr))
  338       break;
  339       
  340       if (src)
  341     old_src = 1;
  342       else
  343     {
  344       /* Existing query, but from new source, just add this 
  345          client to the list that will get the reply.*/
  346       
  347       /* Note whine_malloc() zeros memory. */
  348       if (!daemon->free_frec_src &&
  349           daemon->frec_src_count < daemon->ftabsize &&
  350           (daemon->free_frec_src = whine_malloc(sizeof(struct frec_src))))
  351         {
  352           daemon->frec_src_count++;
  353           daemon->free_frec_src->next = NULL;
  354         }
  355       
  356       /* If we've been spammed with many duplicates, return REFUSED. */
  357       if (!daemon->free_frec_src)
  358         {
  359           query_full(now);
  360           goto frec_err;
  361         }
  362       
  363       src = daemon->free_frec_src;
  364       daemon->free_frec_src = src->next;
  365       src->next = forward->frec_src.next;
  366       forward->frec_src.next = src;
  367       src->orig_id = ntohs(header->id);
  368       src->source = *udpaddr;
  369       src->dest = *dst_addr;
  370       src->log_id = daemon->log_id;
  371       src->iface = dst_iface;
  372       src->fd = udpfd;
  373 
  374       /* closely spaced identical queries cannot be a try and a retry, so
  375          it's safe to wait for the reply from the first without
  376          forwarding the second. */
  377       if (difftime(now, forward->time) < 2)
  378         return 0;
  379     }
  380     }
  381 
  382   /* retry existing query */
  383   if (forward)
  384     {
  385       /* If we didn't get an answer advertising a maximal packet in EDNS,
  386      fall back to 1280, which should work everywhere on IPv6.
  387      If that generates an answer, it will become the new default
  388      for this server */
  389       forward->flags |= FREC_TEST_PKTSZ;
  390       
  391 #ifdef HAVE_DNSSEC
  392       /* If we've already got an answer to this query, but we're awaiting keys for validation,
  393      there's no point retrying the query, retry the key query instead...... */
  394       if (forward->blocking_query)
  395     {
  396       int fd, is_sign;
  397       unsigned char *pheader;
  398       
  399       forward->flags &= ~FREC_TEST_PKTSZ;
  400       
  401       while (forward->blocking_query)
  402         forward = forward->blocking_query;
  403        
  404       blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
  405       plen = forward->stash_len;
  406       
  407       forward->flags |= FREC_TEST_PKTSZ;
  408       if (find_pseudoheader(header, plen, NULL, &pheader, &is_sign, NULL) && !is_sign)
  409         PUTSHORT(SAFE_PKTSZ, pheader);
  410       
  411       if ((fd = allocate_rfd(&forward->rfds, forward->sentto)) != -1)
  412         server_send_log(forward->sentto, fd, header, plen,
  413                 DUMP_SEC_QUERY,
  414                 F_NOEXTRA | F_DNSSEC, "retry", "dnssec");
  415 
  416       return 1;
  417     }
  418 #endif
  419 
  420       /* retry on existing query, from original source. Send to all available servers  */
  421       domain = forward->sentto->domain;
  422       forward->sentto->failed_queries++;
  423       if (!option_bool(OPT_ORDER) && old_src)
  424     {
  425       forward->forwardall = 1;
  426       daemon->last_server = NULL;
  427     }
  428       type = forward->sentto->flags & SERV_TYPE;
  429 #ifdef HAVE_DNSSEC
  430       do_dnssec = forward->sentto->flags & SERV_DO_DNSSEC;
  431 #endif
  432 
  433       if (!(start = forward->sentto->next))
  434     start = daemon->servers; /* at end of list, recycle */
  435       header->id = htons(forward->new_id);
  436     }
  437   else 
  438     {
  439       /* new query */
  440 
  441       if (gotname)
  442     flags = search_servers(now, &addrp, gotname, daemon->namebuff, &type, &domain, &norebind);
  443       
  444 #ifdef HAVE_DNSSEC
  445       do_dnssec = type & SERV_DO_DNSSEC;
  446 #endif
  447       type &= ~SERV_DO_DNSSEC;      
  448       
  449       /* may be no servers available. */
  450       if (daemon->servers && !flags)
  451     forward = get_new_frec(now, NULL, NULL);
  452       /* table full - flags == 0, return REFUSED */
  453       
  454       if (forward)
  455     {
  456       forward->frec_src.source = *udpaddr;
  457       forward->frec_src.orig_id = ntohs(header->id);
  458       forward->frec_src.dest = *dst_addr;
  459       forward->frec_src.iface = dst_iface;
  460       forward->frec_src.next = NULL;
  461       forward->frec_src.fd = udpfd;
  462       forward->new_id = get_id();
  463       memcpy(forward->hash, hash, HASH_SIZE);
  464       forward->forwardall = 0;
  465       forward->flags = fwd_flags;
  466       if (norebind)
  467         forward->flags |= FREC_NOREBIND;
  468       if (header->hb4 & HB4_CD)
  469         forward->flags |= FREC_CHECKING_DISABLED;
  470       if (ad_reqd)
  471         forward->flags |= FREC_AD_QUESTION;
  472 #ifdef HAVE_DNSSEC
  473       forward->work_counter = DNSSEC_WORK;
  474       if (do_bit)
  475         forward->flags |= FREC_DO_QUESTION;
  476 #endif
  477       
  478       header->id = htons(forward->new_id);
  479       
  480       /* In strict_order mode, always try servers in the order 
  481          specified in resolv.conf, if a domain is given 
  482          always try all the available servers,
  483          otherwise, use the one last known to work. */
  484       
  485       if (type == 0)
  486         {
  487           if (option_bool(OPT_ORDER))
  488         start = daemon->servers;
  489           else if (!(start = daemon->last_server) ||
  490                daemon->forwardcount++ > FORWARD_TEST ||
  491                difftime(now, daemon->forwardtime) > FORWARD_TIME)
  492         {
  493           start = daemon->servers;
  494           forward->forwardall = 1;
  495           daemon->forwardcount = 0;
  496           daemon->forwardtime = now;
  497         }
  498         }
  499       else
  500         {
  501           start = daemon->servers;
  502           if (!option_bool(OPT_ORDER))
  503         forward->forwardall = 1;
  504         }
  505     }
  506     }
  507 
  508   /* check for send errors here (no route to host) 
  509      if we fail to send to all nameservers, send back an error
  510      packet straight away (helps modem users when offline)  */
  511   
  512   if (!flags && forward)
  513     {
  514       struct server *firstsentto = start;
  515       int subnet, cacheable, forwarded = 0;
  516       size_t edns0_len;
  517       unsigned char *pheader;
  518       
  519       /* If a query is retried, use the log_id for the retry when logging the answer. */
  520       forward->frec_src.log_id = daemon->log_id;
  521       
  522       plen = add_edns0_config(header, plen, ((unsigned char *)header) + PACKETSZ, &forward->frec_src.source, now, &subnet, &cacheable);
  523       
  524       if (subnet)
  525     forward->flags |= FREC_HAS_SUBNET;
  526 
  527       if (!cacheable)
  528     forward->flags |= FREC_NO_CACHE;
  529 
  530 #ifdef HAVE_DNSSEC
  531       if (option_bool(OPT_DNSSEC_VALID) && do_dnssec)
  532     {
  533       plen = add_do_bit(header, plen, ((unsigned char *) header) + PACKETSZ);
  534               
  535       /* For debugging, set Checking Disabled, otherwise, have the upstream check too,
  536          this allows it to select auth servers when one is returning bad data. */
  537       if (option_bool(OPT_DNSSEC_DEBUG))
  538         header->hb4 |= HB4_CD;
  539 
  540     }
  541 #endif
  542 
  543       if (find_pseudoheader(header, plen, &edns0_len, &pheader, NULL, NULL))
  544     {
  545       /* If there wasn't a PH before, and there is now, we added it. */
  546       if (!oph)
  547         forward->flags |= FREC_ADDED_PHEADER;
  548 
  549       /* If we're sending an EDNS0 with any options, we can't recreate the query from a reply. */
  550       if (edns0_len > 11)
  551         forward->flags |= FREC_HAS_EXTRADATA;
  552 
  553       /* Reduce udp size on retransmits. */
  554       if (forward->flags & FREC_TEST_PKTSZ)
  555         PUTSHORT(SAFE_PKTSZ, pheader);
  556     }
  557       
  558       while (1)
  559     { 
  560       int fd;
  561 
  562       /* only send to servers dealing with our domain.
  563          domain may be NULL, in which case server->domain 
  564          must be NULL also. */
  565       
  566       if (server_test_type(start, domain, type, 0) &&
  567           ((fd = allocate_rfd(&forward->rfds, start)) != -1))
  568         {
  569           
  570 #ifdef HAVE_CONNTRACK
  571           /* Copy connection mark of incoming query to outgoing connection. */
  572           if (option_bool(OPT_CONNTRACK))
  573         set_outgoing_mark(forward, fd);
  574 #endif
  575           
  576 #ifdef HAVE_DNSSEC
  577           if (option_bool(OPT_DNSSEC_VALID) && (forward->flags & FREC_ADDED_PHEADER))
  578         {
  579           /* Difficult one here. If our client didn't send EDNS0, we will have set the UDP
  580              packet size to 512. But that won't provide space for the RRSIGS in many cases.
  581              The RRSIGS will be stripped out before the answer goes back, so the packet should
  582              shrink again. So, if we added a do-bit, bump the udp packet size to the value
  583              known to be OK for this server. We check returned size after stripping and set
  584              the truncated bit if it's still too big. */          
  585           unsigned char *pheader;
  586           int is_sign;
  587           if (find_pseudoheader(header, plen, NULL, &pheader, &is_sign, NULL) && !is_sign)
  588             PUTSHORT(start->edns_pktsz, pheader);
  589         }
  590 #endif
  591 
  592           if (retry_send(sendto(fd, (char *)header, plen, 0,
  593                     &start->addr.sa,
  594                     sa_len(&start->addr))))
  595         continue;
  596         
  597           if (errno == 0)
  598         {
  599 #ifdef HAVE_DUMPFILE
  600           dump_packet(DUMP_UP_QUERY, (void *)header, plen, NULL, &start->addr);
  601 #endif
  602           
  603           /* Keep info in case we want to re-send this packet */
  604           daemon->srv_save = start;
  605           daemon->packet_len = plen;
  606           daemon->fd_save = fd;
  607           
  608           if (!gotname)
  609             strcpy(daemon->namebuff, "query");
  610           log_query_mysockaddr(F_SERVER | F_FORWARD, daemon->namebuff,
  611                        &start->addr, NULL);
  612           start->queries++;
  613           forwarded = 1;
  614           forward->sentto = start;
  615           if (!forward->forwardall) 
  616             break;
  617           forward->forwardall++;
  618         }
  619         }
  620       
  621       if (!(start = start->next))
  622         start = daemon->servers;
  623       
  624       if (start == firstsentto)
  625         break;
  626     }
  627       
  628       if (forwarded)
  629     return 1;
  630       
  631       /* could not send on, prepare to return */ 
  632       header->id = htons(forward->frec_src.orig_id);
  633       free_frec(forward); /* cancel */
  634     }     
  635   
  636   /* could not send on, return empty answer or address if known for whole domain */
  637  frec_err:
  638   if (udpfd != -1)
  639     {
  640       plen = setup_reply(header, plen, addrp, flags, daemon->local_ttl);
  641       if (oph)
  642     plen = add_pseudoheader(header, plen, ((unsigned char *) header) + PACKETSZ, daemon->edns_pktsz, 0, NULL, 0, do_bit, 0);
  643       send_from(udpfd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND), (char *)header, plen, udpaddr, dst_addr, dst_iface);
  644     }
  645 
  646   return 0;
  647 }
  648 
  649 static size_t process_reply(struct dns_header *header, time_t now, struct server *server, size_t n, int check_rebind, 
  650                 int no_cache, int cache_secure, int bogusanswer, int ad_reqd, int do_bit, int added_pheader, 
  651                 int check_subnet, union mysockaddr *query_source)
  652 {
  653   unsigned char *pheader, *sizep;
  654   char **sets = 0;
  655   int munged = 0, is_sign;
  656   unsigned int rcode = RCODE(header);
  657   size_t plen; 
  658   
  659   (void)ad_reqd;
  660   (void)do_bit;
  661   (void)bogusanswer;
  662 
  663 #ifdef HAVE_IPSET
  664   if (daemon->ipsets && extract_request(header, n, daemon->namebuff, NULL))
  665     {
  666       /* Similar algorithm to search_servers. */
  667       struct ipsets *ipset_pos;
  668       unsigned int namelen = strlen(daemon->namebuff);
  669       unsigned int matchlen = 0;
  670       for (ipset_pos = daemon->ipsets; ipset_pos; ipset_pos = ipset_pos->next) 
  671     {
  672       unsigned int domainlen = strlen(ipset_pos->domain);
  673       char *matchstart = daemon->namebuff + namelen - domainlen;
  674       if (namelen >= domainlen && hostname_isequal(matchstart, ipset_pos->domain) &&
  675           (domainlen == 0 || namelen == domainlen || *(matchstart - 1) == '.' ) &&
  676           domainlen >= matchlen) 
  677         {
  678           matchlen = domainlen;
  679           sets = ipset_pos->sets;
  680         }
  681     }
  682     }
  683 #endif
  684 
  685   if ((pheader = find_pseudoheader(header, n, &plen, &sizep, &is_sign, NULL)))
  686     {
  687       /* Get extended RCODE. */
  688       rcode |= sizep[2] << 4;
  689 
  690       if (check_subnet && !check_source(header, plen, pheader, query_source))
  691     {
  692       my_syslog(LOG_WARNING, _("discarding DNS reply: subnet option mismatch"));
  693       return 0;
  694     }
  695       
  696       if (!is_sign)
  697     {
  698       if (added_pheader)
  699         {
  700           /* client didn't send EDNS0, we added one, strip it off before returning answer. */
  701           n = rrfilter(header, n, 0);
  702           pheader = NULL;
  703         }
  704       else
  705         {
  706           unsigned short udpsz;
  707 
  708           /* If upstream is advertising a larger UDP packet size
  709          than we allow, trim it so that we don't get overlarge
  710          requests for the client. We can't do this for signed packets. */
  711           GETSHORT(udpsz, sizep);
  712           if (udpsz > daemon->edns_pktsz)
  713         {
  714           sizep -= 2;
  715           PUTSHORT(daemon->edns_pktsz, sizep);
  716         }
  717 
  718 #ifdef HAVE_DNSSEC
  719           /* If the client didn't set the do bit, but we did, reset it. */
  720           if (option_bool(OPT_DNSSEC_VALID) && !do_bit)
  721         {
  722           unsigned short flags;
  723           sizep += 2; /* skip RCODE */
  724           GETSHORT(flags, sizep);
  725           flags &= ~0x8000;
  726           sizep -= 2;
  727           PUTSHORT(flags, sizep);
  728         }
  729 #endif
  730         }
  731     }
  732     }
  733   
  734   /* RFC 4035 sect 4.6 para 3 */
  735   if (!is_sign && !option_bool(OPT_DNSSEC_PROXY))
  736      header->hb4 &= ~HB4_AD;
  737   
  738   if (OPCODE(header) != QUERY)
  739     return resize_packet(header, n, pheader, plen);
  740 
  741   if (rcode != NOERROR && rcode != NXDOMAIN)
  742     {
  743       union all_addr a;
  744       a.log.rcode = rcode;
  745       log_query(F_UPSTREAM | F_RCODE, "error", &a, NULL);
  746       
  747       return resize_packet(header, n, pheader, plen);
  748     }
  749   
  750   /* Complain loudly if the upstream server is non-recursive. */
  751   if (!(header->hb4 & HB4_RA) && rcode == NOERROR &&
  752       server && !(server->flags & SERV_WARNED_RECURSIVE))
  753     {
  754       (void)prettyprint_addr(&server->addr, daemon->namebuff);
  755       my_syslog(LOG_WARNING, _("nameserver %s refused to do a recursive query"), daemon->namebuff);
  756       if (!option_bool(OPT_LOG))
  757     server->flags |= SERV_WARNED_RECURSIVE;
  758     }  
  759 
  760   if (daemon->bogus_addr && rcode != NXDOMAIN &&
  761       check_for_bogus_wildcard(header, n, daemon->namebuff, now))
  762     {
  763       munged = 1;
  764       SET_RCODE(header, NXDOMAIN);
  765       header->hb3 &= ~HB3_AA;
  766       cache_secure = 0;
  767     }
  768   else 
  769     {
  770       int doctored = 0;
  771       
  772       if (rcode == NXDOMAIN && 
  773       extract_request(header, n, daemon->namebuff, NULL) &&
  774       check_for_local_domain(daemon->namebuff, now))
  775     {
  776       /* if we forwarded a query for a locally known name (because it was for 
  777          an unknown type) and the answer is NXDOMAIN, convert that to NODATA,
  778          since we know that the domain exists, even if upstream doesn't */
  779       munged = 1;
  780       header->hb3 |= HB3_AA;
  781       SET_RCODE(header, NOERROR);
  782       cache_secure = 0;
  783     }
  784       
  785       if (extract_addresses(header, n, daemon->namebuff, now, sets, is_sign, check_rebind, no_cache, cache_secure, &doctored))
  786     {
  787       my_syslog(LOG_WARNING, _("possible DNS-rebind attack detected: %s"), daemon->namebuff);
  788       munged = 1;
  789       cache_secure = 0;
  790     }
  791 
  792       if (doctored)
  793     cache_secure = 0;
  794     }
  795   
  796 #ifdef HAVE_DNSSEC
  797   if (bogusanswer && !(header->hb4 & HB4_CD) && !option_bool(OPT_DNSSEC_DEBUG))
  798     {
  799       /* Bogus reply, turn into SERVFAIL */
  800       SET_RCODE(header, SERVFAIL);
  801       munged = 1;
  802     }
  803 
  804   if (option_bool(OPT_DNSSEC_VALID))
  805     {
  806       header->hb4 &= ~HB4_AD;
  807       
  808       if (!(header->hb4 & HB4_CD) && ad_reqd && cache_secure)
  809     header->hb4 |= HB4_AD;
  810       
  811       /* If the requestor didn't set the DO bit, don't return DNSSEC info. */
  812       if (!do_bit)
  813     n = rrfilter(header, n, 1);
  814     }
  815 #endif
  816 
  817   /* do this after extract_addresses. Ensure NODATA reply and remove
  818      nameserver info. */
  819   
  820   if (munged)
  821     {
  822       header->ancount = htons(0);
  823       header->nscount = htons(0);
  824       header->arcount = htons(0);
  825       header->hb3 &= ~HB3_TC;
  826     }
  827   
  828   /* the bogus-nxdomain stuff, doctor and NXDOMAIN->NODATA munging can all elide
  829      sections of the packet. Find the new length here and put back pseudoheader
  830      if it was removed. */
  831   return resize_packet(header, n, pheader, plen);
  832 }
  833 
  834 /* sets new last_server */
  835 void reply_query(int fd, time_t now)
  836 {
  837   /* packet from peer server, extract data for cache, and send to
  838      original requester */
  839   struct dns_header *header;
  840   union mysockaddr serveraddr;
  841   struct frec *forward;
  842   socklen_t addrlen = sizeof(serveraddr);
  843   ssize_t n = recvfrom(fd, daemon->packet, daemon->packet_buff_sz, 0, &serveraddr.sa, &addrlen);
  844   size_t nn;
  845   struct server *server;
  846   void *hash;
  847 
  848   /* packet buffer overwritten */
  849   daemon->srv_save = NULL;
  850 
  851   /* Determine the address of the server replying  so that we can mark that as good */
  852   if (serveraddr.sa.sa_family == AF_INET6)
  853     serveraddr.in6.sin6_flowinfo = 0;
  854   
  855   header = (struct dns_header *)daemon->packet;
  856 
  857   if (n < (int)sizeof(struct dns_header) || !(header->hb3 & HB3_QR))
  858     return;
  859   
  860   /* spoof check: answer must come from known server, */
  861   for (server = daemon->servers; server; server = server->next)
  862     if (!(server->flags & (SERV_LITERAL_ADDRESS | SERV_NO_ADDR)) &&
  863     sockaddr_isequal(&server->addr, &serveraddr))
  864       break;
  865   
  866   if (!server)
  867     return;
  868 
  869   /* If sufficient time has elapsed, try and expand UDP buffer size again. */
  870   if (difftime(now, server->pktsz_reduced) > UDP_TEST_TIME)
  871     server->edns_pktsz = daemon->edns_pktsz;
  872 
  873   hash = hash_questions(header, n, daemon->namebuff);
  874   
  875   if (!(forward = lookup_frec(ntohs(header->id), fd, hash)))
  876     return;
  877   
  878 #ifdef HAVE_DUMPFILE
  879   dump_packet((forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)) ? DUMP_SEC_REPLY : DUMP_UP_REPLY,
  880           (void *)header, n, &serveraddr, NULL);
  881 #endif
  882 
  883   /* log_query gets called indirectly all over the place, so 
  884      pass these in global variables - sorry. */
  885   daemon->log_display_id = forward->frec_src.log_id;
  886   daemon->log_source_addr = &forward->frec_src.source;
  887   
  888   if (daemon->ignore_addr && RCODE(header) == NOERROR &&
  889       check_for_ignored_address(header, n))
  890     return;
  891 
  892   /* Note: if we send extra options in the EDNS0 header, we can't recreate
  893      the query from the reply. */
  894   if ((RCODE(header) == REFUSED || RCODE(header) == SERVFAIL) &&
  895       forward->forwardall == 0 &&
  896       !(forward->flags & FREC_HAS_EXTRADATA))
  897     /* for broken servers, attempt to send to another one. */
  898     {
  899       unsigned char *pheader;
  900       size_t plen;
  901       int is_sign;
  902 
  903 #ifdef HAVE_DNSSEC
  904       if (forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY))
  905     {
  906       struct server *start;
  907       
  908       blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
  909       plen = forward->stash_len;
  910 
  911       forward->forwardall = 2; /* only retry once */
  912       start = forward->sentto;
  913 
  914       /* for non-domain specific servers, see if we can find another to try. */
  915       if ((forward->sentto->flags & SERV_TYPE) == 0)
  916         while (1)
  917           {
  918         if (!(start = start->next))
  919           start = daemon->servers;
  920         if (start == forward->sentto)
  921           break;
  922         
  923         if ((start->flags & SERV_TYPE) == 0 &&
  924             (start->flags & SERV_DO_DNSSEC))
  925           break;
  926           }
  927         
  928       
  929       if ((fd = allocate_rfd(&forward->rfds, start)) != -1)
  930         server_send_log(start, fd, header, plen,
  931                 DUMP_SEC_QUERY,
  932                 F_NOEXTRA | F_DNSSEC, "retry", "dnssec");
  933       return;
  934     }
  935 #endif
  936       
  937       /* In strict order mode, there must be a server later in the chain
  938      left to send to, otherwise without the forwardall mechanism,
  939      code further on will cycle around the list forwever if they
  940      all return REFUSED. Note that server is always non-NULL before 
  941      this executes. */
  942       if (option_bool(OPT_ORDER))
  943     for (server = forward->sentto->next; server; server = server->next)
  944       if (!(server->flags & (SERV_LITERAL_ADDRESS | SERV_HAS_DOMAIN | SERV_FOR_NODOTS | SERV_NO_ADDR | SERV_LOOP)))
  945         break;
  946 
  947       /* recreate query from reply */
  948       pheader = find_pseudoheader(header, (size_t)n, &plen, NULL, &is_sign, NULL);
  949       if (!is_sign && server)
  950     {
  951       header->ancount = htons(0);
  952       header->nscount = htons(0);
  953       header->arcount = htons(0);
  954       if ((nn = resize_packet(header, (size_t)n, pheader, plen)))
  955         {
  956           header->hb3 &= ~(HB3_QR | HB3_AA | HB3_TC);
  957           header->hb4 &= ~(HB4_RA | HB4_RCODE | HB4_CD | HB4_AD);
  958           if (forward->flags & FREC_CHECKING_DISABLED)
  959         header->hb4 |= HB4_CD;
  960           if (forward->flags & FREC_AD_QUESTION)
  961         header->hb4 |= HB4_AD;
  962           if (forward->flags & FREC_DO_QUESTION)
  963         add_do_bit(header, nn,  (unsigned char *)pheader + plen);
  964           forward_query(-1, NULL, NULL, 0, header, nn, now, forward, forward->flags & FREC_AD_QUESTION, forward->flags & FREC_DO_QUESTION);
  965           return;
  966         }
  967     }
  968     }   
  969    
  970   server = forward->sentto;
  971   if ((forward->sentto->flags & SERV_TYPE) == 0)
  972     {
  973       if (RCODE(header) == REFUSED)
  974     server = NULL;
  975       else
  976     {
  977       struct server *last_server;
  978       
  979       /* find good server by address if possible, otherwise assume the last one we sent to */ 
  980       for (last_server = daemon->servers; last_server; last_server = last_server->next)
  981         if (!(last_server->flags & (SERV_LITERAL_ADDRESS | SERV_HAS_DOMAIN | SERV_FOR_NODOTS | SERV_NO_ADDR)) &&
  982         sockaddr_isequal(&last_server->addr, &serveraddr))
  983           {
  984         server = last_server;
  985         break;
  986           }
  987     } 
  988       if (!option_bool(OPT_ALL_SERVERS))
  989     daemon->last_server = server;
  990     }
  991  
  992   /* We tried resending to this server with a smaller maximum size and got an answer.
  993      Make that permanent. To avoid reduxing the packet size for a single dropped packet,
  994      only do this when we get a truncated answer, or one larger than the safe size. */
  995   if (forward->sentto->edns_pktsz > SAFE_PKTSZ && (forward->flags & FREC_TEST_PKTSZ) && 
  996       ((header->hb3 & HB3_TC) || n >= SAFE_PKTSZ))
  997     {
  998       forward->sentto->edns_pktsz = SAFE_PKTSZ;
  999       forward->sentto->pktsz_reduced = now;
 1000       (void)prettyprint_addr(&forward->sentto->addr, daemon->addrbuff);
 1001       my_syslog(LOG_WARNING, _("reducing DNS packet size for nameserver %s to %d"), daemon->addrbuff, SAFE_PKTSZ);
 1002     }
 1003 
 1004     
 1005   /* If the answer is an error, keep the forward record in place in case
 1006      we get a good reply from another server. Kill it when we've
 1007      had replies from all to avoid filling the forwarding table when
 1008      everything is broken */
 1009   if (forward->forwardall == 0 || --forward->forwardall == 1 || RCODE(header) != REFUSED)
 1010     {
 1011       int check_rebind = 0, no_cache_dnssec = 0, cache_secure = 0, bogusanswer = 0;
 1012       
 1013       if (option_bool(OPT_NO_REBIND))
 1014     check_rebind = !(forward->flags & FREC_NOREBIND);
 1015       
 1016       /*   Don't cache replies where DNSSEC validation was turned off, either
 1017        the upstream server told us so, or the original query specified it.  */
 1018       if ((header->hb4 & HB4_CD) || (forward->flags & FREC_CHECKING_DISABLED))
 1019     no_cache_dnssec = 1;
 1020       
 1021 #ifdef HAVE_DNSSEC
 1022       if ((forward->sentto->flags & SERV_DO_DNSSEC) && 
 1023       option_bool(OPT_DNSSEC_VALID) && !(forward->flags & FREC_CHECKING_DISABLED))
 1024     {
 1025       int status = 0;
 1026 
 1027       /* We've had a reply already, which we're validating. Ignore this duplicate */
 1028       if (forward->blocking_query)
 1029         return;
 1030       
 1031        /* Truncated answer can't be validated.
 1032           If this is an answer to a DNSSEC-generated query, we still
 1033           need to get the client to retry over TCP, so return
 1034           an answer with the TC bit set, even if the actual answer fits.
 1035        */
 1036       if (header->hb3 & HB3_TC)
 1037         status = STAT_TRUNCATED;
 1038       
 1039       while (1)
 1040         {
 1041           /* As soon as anything returns BOGUS, we stop and unwind, to do otherwise
 1042          would invite infinite loops, since the answers to DNSKEY and DS queries
 1043          will not be cached, so they'll be repeated. */
 1044           if (status != STAT_BOGUS && status != STAT_TRUNCATED && status != STAT_ABANDONED)
 1045         {
 1046           if (forward->flags & FREC_DNSKEY_QUERY)
 1047             status = dnssec_validate_by_ds(now, header, n, daemon->namebuff, daemon->keyname, forward->class);
 1048           else if (forward->flags & FREC_DS_QUERY)
 1049             status = dnssec_validate_ds(now, header, n, daemon->namebuff, daemon->keyname, forward->class);
 1050           else
 1051             status = dnssec_validate_reply(now, header, n, daemon->namebuff, daemon->keyname, &forward->class, 
 1052                            !option_bool(OPT_DNSSEC_IGN_NS) && (forward->sentto->flags & SERV_DO_DNSSEC),
 1053                            NULL, NULL, NULL);
 1054 #ifdef HAVE_DUMPFILE
 1055           if (status == STAT_BOGUS)
 1056             dump_packet((forward->flags & (FREC_DNSKEY_QUERY | FREC_DS_QUERY)) ? DUMP_SEC_BOGUS : DUMP_BOGUS,
 1057                 header, (size_t)n, &serveraddr, NULL);
 1058 #endif
 1059         }
 1060           
 1061           /* Can't validate, as we're missing key data. Put this
 1062          answer aside, whilst we get that. */     
 1063           if (status == STAT_NEED_DS || status == STAT_NEED_KEY)
 1064         {
 1065           struct frec *new, *orig;
 1066           
 1067           /* Free any saved query */
 1068           if (forward->stash)
 1069             blockdata_free(forward->stash);
 1070           
 1071           /* Now save reply pending receipt of key data */
 1072           if (!(forward->stash = blockdata_alloc((char *)header, n)))
 1073             return;
 1074           forward->stash_len = n;
 1075           
 1076           /* Find the original query that started it all.... */
 1077           for (orig = forward; orig->dependent; orig = orig->dependent);
 1078           
 1079           /* Make sure we don't expire and free the orig frec during the
 1080              allocation of a new one. */
 1081           if (--orig->work_counter == 0 || !(new = get_new_frec(now, NULL, orig)))
 1082             status = STAT_ABANDONED;
 1083           else
 1084             {
 1085               int querytype, fd, type = SERV_DO_DNSSEC;
 1086               struct frec *next = new->next;
 1087               char *domain;
 1088               
 1089               *new = *forward; /* copy everything, then overwrite */
 1090               new->next = next;
 1091               new->blocking_query = NULL;
 1092 
 1093               /* Find server to forward to. This will normally be the 
 1094              same as for the original query, but may be another if
 1095              servers for domains are involved. */             
 1096               if (search_servers(now, NULL, F_DNSSECOK, daemon->keyname, &type, &domain, NULL) == 0)
 1097             {
 1098               struct server *start, *new_server = NULL;
 1099               start = server = forward->sentto;
 1100               
 1101               while (1)
 1102                 {
 1103                   if (server_test_type(start, domain, type, SERV_DO_DNSSEC))
 1104                 {
 1105                   new_server = start;
 1106                   if (server == start)
 1107                     {
 1108                       new_server = NULL;
 1109                       break;
 1110                     }
 1111                 }
 1112                   
 1113                   if (!(start = start->next))
 1114                 start = daemon->servers;
 1115                   if (start == server)
 1116                 break;
 1117                 }
 1118               
 1119               if (new_server)
 1120                 server = new_server;
 1121             }
 1122               
 1123               new->sentto = server;
 1124               new->rfds = NULL;
 1125               new->frec_src.next = NULL;
 1126               new->flags &= ~(FREC_DNSKEY_QUERY | FREC_DS_QUERY | FREC_HAS_EXTRADATA);
 1127               new->forwardall = 0;
 1128               
 1129               new->dependent = forward; /* to find query awaiting new one. */
 1130               forward->blocking_query = new; /* for garbage cleaning */
 1131               /* validate routines leave name of required record in daemon->keyname */
 1132               if (status == STAT_NEED_KEY)
 1133             {
 1134               new->flags |= FREC_DNSKEY_QUERY; 
 1135               querytype = T_DNSKEY;
 1136             }
 1137               else 
 1138             {
 1139               new->flags |= FREC_DS_QUERY;
 1140               querytype = T_DS;
 1141             }
 1142 
 1143               nn = dnssec_generate_query(header,((unsigned char *) header) + server->edns_pktsz,
 1144                          daemon->keyname, forward->class, querytype, server->edns_pktsz);
 1145 
 1146               memcpy(new->hash, hash_questions(header, nn, daemon->namebuff), HASH_SIZE);
 1147               new->new_id = get_id();
 1148               header->id = htons(new->new_id);
 1149               /* Save query for retransmission */
 1150               new->stash = blockdata_alloc((char *)header, nn);
 1151               new->stash_len = nn;
 1152               
 1153               /* Don't resend this. */
 1154               daemon->srv_save = NULL;
 1155               
 1156               if ((fd = allocate_rfd(&new->rfds, server)) != -1)
 1157             {
 1158 #ifdef HAVE_CONNTRACK
 1159               if (option_bool(OPT_CONNTRACK))
 1160                 set_outgoing_mark(orig, fd);
 1161 #endif
 1162               server_send_log(server, fd, header, nn, DUMP_SEC_QUERY,
 1163                       F_NOEXTRA | F_DNSSEC, daemon->keyname,
 1164                       querystr("dnssec-query", querytype));
 1165               server->queries++;
 1166             }
 1167             }         
 1168           return;
 1169         }
 1170       
 1171           /* Validated original answer, all done. */
 1172           if (!forward->dependent)
 1173         break;
 1174           
 1175           /* validated subsidiary query, (and cached result)
 1176          pop that and return to the previous query we were working on. */
 1177           struct frec *prev = forward->dependent;
 1178           free_frec(forward);
 1179           forward = prev;
 1180           forward->blocking_query = NULL; /* already gone */
 1181           blockdata_retrieve(forward->stash, forward->stash_len, (void *)header);
 1182           n = forward->stash_len;
 1183         }
 1184     
 1185       
 1186       no_cache_dnssec = 0;
 1187       
 1188       if (status == STAT_TRUNCATED)
 1189         header->hb3 |= HB3_TC;
 1190       else
 1191         {
 1192           char *result, *domain = "result";
 1193           
 1194           if (status == STAT_ABANDONED)
 1195         {
 1196           result = "ABANDONED";
 1197           status = STAT_BOGUS;
 1198         }
 1199           else
 1200         result = (status == STAT_SECURE ? "SECURE" : (status == STAT_INSECURE ? "INSECURE" : "BOGUS"));
 1201           
 1202           if (status == STAT_BOGUS && extract_request(header, n, daemon->namebuff, NULL))
 1203         domain = daemon->namebuff;
 1204           
 1205           log_query(F_SECSTAT, domain, NULL, result);
 1206         }
 1207       
 1208       if (status == STAT_SECURE)
 1209         cache_secure = 1;
 1210       else if (status == STAT_BOGUS)
 1211         {
 1212           no_cache_dnssec = 1;
 1213           bogusanswer = 1;
 1214         }
 1215     }
 1216 
 1217 #endif
 1218 
 1219       /* restore CD bit to the value in the query */
 1220       if (forward->flags & FREC_CHECKING_DISABLED)
 1221     header->hb4 |= HB4_CD;
 1222       else
 1223     header->hb4 &= ~HB4_CD;
 1224 
 1225       /* Never cache answers which are contingent on the source or MAC address EDSN0 option,
 1226      since the cache is ignorant of such things. */
 1227       if (forward->flags & FREC_NO_CACHE)
 1228     no_cache_dnssec = 1;
 1229       
 1230       if ((nn = process_reply(header, now, forward->sentto, (size_t)n, check_rebind, no_cache_dnssec, cache_secure, bogusanswer, 
 1231                   forward->flags & FREC_AD_QUESTION, forward->flags & FREC_DO_QUESTION, 
 1232                   forward->flags & FREC_ADDED_PHEADER, forward->flags & FREC_HAS_SUBNET, &forward->frec_src.source)))
 1233     {
 1234       struct frec_src *src;
 1235 
 1236       header->id = htons(forward->frec_src.orig_id);
 1237       header->hb4 |= HB4_RA; /* recursion if available */
 1238 #ifdef HAVE_DNSSEC
 1239       /* We added an EDNSO header for the purpose of getting DNSSEC RRs, and set the value of the UDP payload size
 1240          greater than the no-EDNS0-implied 512 to have space for the RRSIGS. If, having stripped them and the EDNS0
 1241              header, the answer is still bigger than 512, truncate it and mark it so. The client then retries with TCP. */
 1242       if (option_bool(OPT_DNSSEC_VALID) && (forward->flags & FREC_ADDED_PHEADER) && (nn > PACKETSZ))
 1243         {
 1244           header->ancount = htons(0);
 1245           header->nscount = htons(0);
 1246           header->arcount = htons(0);
 1247           header->hb3 |= HB3_TC;
 1248           nn = resize_packet(header, nn, NULL, 0);
 1249         }
 1250 #endif
 1251 
 1252       for (src = &forward->frec_src; src; src = src->next)
 1253         {
 1254           header->id = htons(src->orig_id);
 1255           
 1256 #ifdef HAVE_DUMPFILE
 1257           dump_packet(DUMP_REPLY, daemon->packet, (size_t)nn, NULL, &src->source);
 1258 #endif
 1259           
 1260           send_from(src->fd, option_bool(OPT_NOWILD) || option_bool (OPT_CLEVERBIND), daemon->packet, nn, 
 1261             &src->source, &src->dest, src->iface);
 1262 
 1263           if (option_bool(OPT_EXTRALOG) && src != &forward->frec_src)
 1264         {
 1265           daemon->log_display_id = src->log_id;
 1266           daemon->log_source_addr = &src->source;
 1267           log_query(F_UPSTREAM, "query", NULL, "duplicate");
 1268         }
 1269         }
 1270     }
 1271 
 1272       free_frec(forward); /* cancel */
 1273     }
 1274 }
 1275 
 1276 
 1277 void receive_query(struct listener *listen, time_t now)
 1278 {
 1279   struct dns_header *header = (struct dns_header *)daemon->packet;
 1280   union mysockaddr source_addr;
 1281   unsigned char *pheader;
 1282   unsigned short type, udp_size = PACKETSZ; /* default if no EDNS0 */
 1283   union all_addr dst_addr;
 1284   struct in_addr netmask, dst_addr_4;
 1285   size_t m;
 1286   ssize_t n;
 1287   int if_index = 0, auth_dns = 0, do_bit = 0, have_pseudoheader = 0;
 1288 #ifdef HAVE_AUTH
 1289   int local_auth = 0;
 1290 #endif
 1291   struct iovec iov[1];
 1292   struct msghdr msg;
 1293   struct cmsghdr *cmptr;
 1294   union {
 1295     struct cmsghdr align; /* this ensures alignment */
 1296     char control6[CMSG_SPACE(sizeof(struct in6_pktinfo))];
 1297 #if defined(HAVE_LINUX_NETWORK)
 1298     char control[CMSG_SPACE(sizeof(struct in_pktinfo))];
 1299 #elif defined(IP_RECVDSTADDR) && defined(HAVE_SOLARIS_NETWORK)
 1300     char control[CMSG_SPACE(sizeof(struct in_addr)) +
 1301          CMSG_SPACE(sizeof(unsigned int))];
 1302 #elif defined(IP_RECVDSTADDR)
 1303     char control[CMSG_SPACE(sizeof(struct in_addr)) +
 1304          CMSG_SPACE(sizeof(struct sockaddr_dl))];
 1305 #endif
 1306   } control_u;
 1307   int family = listen->addr.sa.sa_family;
 1308    /* Can always get recvd interface for IPv6 */
 1309   int check_dst = !option_bool(OPT_NOWILD) || family == AF_INET6;
 1310 
 1311   /* packet buffer overwritten */
 1312   daemon->srv_save = NULL;
 1313 
 1314   dst_addr_4.s_addr = dst_addr.addr4.s_addr = 0;
 1315   netmask.s_addr = 0;
 1316   
 1317   if (option_bool(OPT_NOWILD) && listen->iface)
 1318     {
 1319       auth_dns = listen->iface->dns_auth;
 1320      
 1321       if (family == AF_INET)
 1322     {
 1323       dst_addr_4 = dst_addr.addr4 = listen->iface->addr.in.sin_addr;
 1324       netmask = listen->iface->netmask;
 1325     }
 1326     }
 1327   
 1328   iov[0].iov_base = daemon->packet;
 1329   iov[0].iov_len = daemon->edns_pktsz;
 1330     
 1331   msg.msg_control = control_u.control;
 1332   msg.msg_controllen = sizeof(control_u);
 1333   msg.msg_flags = 0;
 1334   msg.msg_name = &source_addr;
 1335   msg.msg_namelen = sizeof(source_addr);
 1336   msg.msg_iov = iov;
 1337   msg.msg_iovlen = 1;
 1338   
 1339   if ((n = recvmsg(listen->fd, &msg, 0)) == -1)
 1340     return;
 1341   
 1342   if (n < (int)sizeof(struct dns_header) || 
 1343       (msg.msg_flags & MSG_TRUNC) ||
 1344       (header->hb3 & HB3_QR))
 1345     return;
 1346 
 1347   /* Clear buffer beyond request to avoid risk of
 1348      information disclosure. */
 1349   memset(daemon->packet + n, 0, daemon->edns_pktsz - n);
 1350   
 1351   source_addr.sa.sa_family = family;
 1352   
 1353   if (family == AF_INET)
 1354     {
 1355        /* Source-port == 0 is an error, we can't send back to that. 
 1356       http://www.ietf.org/mail-archive/web/dnsop/current/msg11441.html */
 1357       if (source_addr.in.sin_port == 0)
 1358     return;
 1359     }
 1360   else
 1361     {
 1362       /* Source-port == 0 is an error, we can't send back to that. */
 1363       if (source_addr.in6.sin6_port == 0)
 1364     return;
 1365       source_addr.in6.sin6_flowinfo = 0;
 1366     }
 1367   
 1368   /* We can be configured to only accept queries from at-most-one-hop-away addresses. */
 1369   if (option_bool(OPT_LOCAL_SERVICE))
 1370     {
 1371       struct addrlist *addr;
 1372 
 1373       if (family == AF_INET6) 
 1374     {
 1375       for (addr = daemon->interface_addrs; addr; addr = addr->next)
 1376         if ((addr->flags & ADDRLIST_IPV6) &&
 1377         is_same_net6(&addr->addr.addr6, &source_addr.in6.sin6_addr, addr->prefixlen))
 1378           break;
 1379     }
 1380       else
 1381     {
 1382       struct in_addr netmask;
 1383       for (addr = daemon->interface_addrs; addr; addr = addr->next)
 1384         {
 1385           netmask.s_addr = htonl(~(in_addr_t)0 << (32 - addr->prefixlen));
 1386           if (!(addr->flags & ADDRLIST_IPV6) &&
 1387           is_same_net(addr->addr.addr4, source_addr.in.sin_addr, netmask))
 1388         break;
 1389         }
 1390     }
 1391       if (!addr)
 1392     {
 1393       static int warned = 0;
 1394       if (!warned)
 1395         {
 1396           my_syslog(LOG_WARNING, _("Ignoring query from non-local network"));
 1397           warned = 1;
 1398         }
 1399       return;
 1400     }
 1401     }
 1402         
 1403   if (check_dst)
 1404     {
 1405       struct ifreq ifr;
 1406 
 1407       if (msg.msg_controllen < sizeof(struct cmsghdr))
 1408     return;
 1409 
 1410 #if defined(HAVE_LINUX_NETWORK)
 1411       if (family == AF_INET)
 1412     for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
 1413       if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_PKTINFO)
 1414         {
 1415           union {
 1416         unsigned char *c;
 1417         struct in_pktinfo *p;
 1418           } p;
 1419           p.c = CMSG_DATA(cmptr);
 1420           dst_addr_4 = dst_addr.addr4 = p.p->ipi_spec_dst;
 1421           if_index = p.p->ipi_ifindex;
 1422         }
 1423 #elif defined(IP_RECVDSTADDR) && defined(IP_RECVIF)
 1424       if (family == AF_INET)
 1425     {
 1426       for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
 1427         {
 1428           union {
 1429         unsigned char *c;
 1430         unsigned int *i;
 1431         struct in_addr *a;
 1432 #ifndef HAVE_SOLARIS_NETWORK
 1433         struct sockaddr_dl *s;
 1434 #endif
 1435           } p;
 1436            p.c = CMSG_DATA(cmptr);
 1437            if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_RECVDSTADDR)
 1438          dst_addr_4 = dst_addr.addr4 = *(p.a);
 1439            else if (cmptr->cmsg_level == IPPROTO_IP && cmptr->cmsg_type == IP_RECVIF)
 1440 #ifdef HAVE_SOLARIS_NETWORK
 1441          if_index = *(p.i);
 1442 #else
 1443              if_index = p.s->sdl_index;
 1444 #endif
 1445         }
 1446     }
 1447 #endif
 1448       
 1449       if (family == AF_INET6)
 1450     {
 1451       for (cmptr = CMSG_FIRSTHDR(&msg); cmptr; cmptr = CMSG_NXTHDR(&msg, cmptr))
 1452         if (cmptr->cmsg_level == IPPROTO_IPV6 && cmptr->cmsg_type == daemon->v6pktinfo)
 1453           {
 1454         union {
 1455           unsigned char *c;
 1456           struct in6_pktinfo *p;
 1457         } p;
 1458         p.c = CMSG_DATA(cmptr);
 1459           
 1460         dst_addr.addr6 = p.p->ipi6_addr;
 1461         if_index = p.p->ipi6_ifindex;
 1462           }
 1463     }
 1464       
 1465       /* enforce available interface configuration */
 1466       
 1467       if (!indextoname(listen->fd, if_index, ifr.ifr_name))
 1468     return;
 1469       
 1470       if (!iface_check(family, &dst_addr, ifr.ifr_name, &auth_dns))
 1471     {
 1472        if (!option_bool(OPT_CLEVERBIND))
 1473          enumerate_interfaces(0); 
 1474        if (!loopback_exception(listen->fd, family, &dst_addr, ifr.ifr_name) &&
 1475            !label_exception(if_index, family, &dst_addr))
 1476          return;
 1477     }
 1478 
 1479       if (family == AF_INET && option_bool(OPT_LOCALISE))
 1480     {
 1481       struct irec *iface;
 1482       
 1483       /* get the netmask of the interface which has the address we were sent to.
 1484          This is no necessarily the interface we arrived on. */
 1485       
 1486       for (iface = daemon->interfaces; iface; iface = iface->next)
 1487         if (iface->addr.sa.sa_family == AF_INET &&
 1488         iface->addr.in.sin_addr.s_addr == dst_addr_4.s_addr)
 1489           break;
 1490       
 1491       /* interface may be new */
 1492       if (!iface && !option_bool(OPT_CLEVERBIND))
 1493         enumerate_interfaces(0); 
 1494       
 1495       for (iface = daemon->interfaces; iface; iface = iface->next)
 1496         if (iface->addr.sa.sa_family == AF_INET &&
 1497         iface->addr.in.sin_addr.s_addr == dst_addr_4.s_addr)
 1498           break;
 1499       
 1500       /* If we failed, abandon localisation */
 1501       if (iface)
 1502         netmask = iface->netmask;
 1503       else
 1504         dst_addr_4.s_addr = 0;
 1505     }
 1506     }
 1507    
 1508   /* log_query gets called indirectly all over the place, so 
 1509      pass these in global variables - sorry. */
 1510   daemon->log_display_id = ++daemon->log_id;
 1511   daemon->log_source_addr = &source_addr;
 1512 
 1513 #ifdef HAVE_DUMPFILE
 1514   dump_packet(DUMP_QUERY, daemon->packet, (size_t)n, &source_addr, NULL);
 1515 #endif
 1516       
 1517   if (extract_request(header, (size_t)n, daemon->namebuff, &type))
 1518     {
 1519 #ifdef HAVE_AUTH
 1520       struct auth_zone *zone;
 1521 #endif
 1522       char *types = querystr(auth_dns ? "auth" : "query", type);
 1523 
 1524       log_query_mysockaddr(F_QUERY | F_FORWARD, daemon->namebuff,
 1525                &source_addr, types);
 1526 
 1527 #ifdef HAVE_AUTH
 1528       /* find queries for zones we're authoritative for, and answer them directly */
 1529       if (!auth_dns && !option_bool(OPT_LOCALISE))
 1530     for (zone = daemon->auth_zones; zone; zone = zone->next)
 1531       if (in_zone(zone, daemon->namebuff, NULL))
 1532         {
 1533           auth_dns = 1;
 1534           local_auth = 1;
 1535           break;
 1536         }
 1537 #endif
 1538       
 1539 #ifdef HAVE_LOOP
 1540       /* Check for forwarding loop */
 1541       if (detect_loop(daemon->namebuff, type))
 1542     return;
 1543 #endif
 1544     }
 1545   
 1546   if (find_pseudoheader(header, (size_t)n, NULL, &pheader, NULL, NULL))
 1547     { 
 1548       unsigned short flags;
 1549       
 1550       have_pseudoheader = 1;
 1551       GETSHORT(udp_size, pheader);
 1552       pheader += 2; /* ext_rcode */
 1553       GETSHORT(flags, pheader);
 1554       
 1555       if (flags & 0x8000)
 1556     do_bit = 1;/* do bit */ 
 1557     
 1558       /* If the client provides an EDNS0 UDP size, use that to limit our reply.
 1559      (bounded by the maximum configured). If no EDNS0, then it
 1560      defaults to 512 */
 1561       if (udp_size > daemon->edns_pktsz)
 1562     udp_size = daemon->edns_pktsz;
 1563       else if (udp_size < PACKETSZ)
 1564     udp_size = PACKETSZ; /* Sanity check - can't reduce below default. RFC 6891 6.2.3 */
 1565     }
 1566 
 1567 #ifdef HAVE_AUTH
 1568   if (auth_dns)
 1569     {
 1570       m = answer_auth(header, ((char *) header) + udp_size, (size_t)n, now, &source_addr, 
 1571               local_auth, do_bit, have_pseudoheader);
 1572       if (m >= 1)
 1573     {
 1574       send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
 1575             (char *)header, m, &source_addr, &dst_addr, if_index);
 1576       daemon->metrics[METRIC_DNS_AUTH_ANSWERED]++;
 1577     }
 1578     }
 1579   else
 1580 #endif
 1581     {
 1582       int ad_reqd = do_bit;
 1583        /* RFC 6840 5.7 */
 1584       if (header->hb4 & HB4_AD)
 1585     ad_reqd = 1;
 1586 
 1587       m = answer_request(header, ((char *) header) + udp_size, (size_t)n, 
 1588              dst_addr_4, netmask, now, ad_reqd, do_bit, have_pseudoheader);
 1589       
 1590       if (m >= 1)
 1591     {
 1592       send_from(listen->fd, option_bool(OPT_NOWILD) || option_bool(OPT_CLEVERBIND),
 1593             (char *)header, m, &source_addr, &dst_addr, if_index);
 1594       daemon->metrics[METRIC_DNS_LOCAL_ANSWERED]++;
 1595     }
 1596       else if (forward_query(listen->fd, &source_addr, &dst_addr, if_index,
 1597                  header, (size_t)n, now, NULL, ad_reqd, do_bit))
 1598     daemon->metrics[METRIC_DNS_QUERIES_FORWARDED]++;
 1599       else
 1600     daemon->metrics[METRIC_DNS_LOCAL_ANSWERED]++;
 1601     }
 1602 }
 1603 
 1604 #ifdef HAVE_DNSSEC
 1605 /* Recurse up the key hierarchy */
 1606 static int tcp_key_recurse(time_t now, int status, struct dns_header *header, size_t n, 
 1607                int class, char *name, char *keyname, struct server *server, 
 1608                int have_mark, unsigned int mark, int *keycount)
 1609 {
 1610   int new_status;
 1611   unsigned char *packet = NULL;
 1612   unsigned char *payload = NULL;
 1613   struct dns_header *new_header = NULL;
 1614   u16 *length = NULL;
 1615  
 1616   while (1)
 1617     {
 1618       int type = SERV_DO_DNSSEC;
 1619       char *domain;
 1620       size_t m; 
 1621       unsigned char c1, c2;
 1622       struct server *firstsendto = NULL;
 1623       
 1624       /* limit the amount of work we do, to avoid cycling forever on loops in the DNS */
 1625       if (--(*keycount) == 0)
 1626     new_status = STAT_ABANDONED;
 1627       else if (status == STAT_NEED_KEY)
 1628     new_status = dnssec_validate_by_ds(now, header, n, name, keyname, class);
 1629       else if (status == STAT_NEED_DS)
 1630     new_status = dnssec_validate_ds(now, header, n, name, keyname, class);
 1631       else 
 1632     new_status = dnssec_validate_reply(now, header, n, name, keyname, &class,
 1633                        !option_bool(OPT_DNSSEC_IGN_NS) && (server->flags & SERV_DO_DNSSEC),
 1634                        NULL, NULL, NULL);
 1635       
 1636       if (new_status != STAT_NEED_DS && new_status != STAT_NEED_KEY)
 1637     break;
 1638 
 1639       /* Can't validate because we need a key/DS whose name now in keyname.
 1640      Make query for same, and recurse to validate */
 1641       if (!packet)
 1642     {
 1643       packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ + sizeof(u16));
 1644       payload = &packet[2];
 1645       new_header = (struct dns_header *)payload;
 1646       length = (u16 *)packet;
 1647     }
 1648       
 1649       if (!packet)
 1650     {
 1651       new_status = STAT_ABANDONED;
 1652       break;
 1653     }
 1654 
 1655       m = dnssec_generate_query(new_header, ((unsigned char *) new_header) + 65536, keyname, class, 
 1656                 new_status == STAT_NEED_KEY ? T_DNSKEY : T_DS, server->edns_pktsz);
 1657       
 1658       *length = htons(m);
 1659 
 1660       /* Find server to forward to. This will normally be the 
 1661      same as for the original query, but may be another if
 1662      servers for domains are involved. */             
 1663       if (search_servers(now, NULL, F_DNSSECOK, keyname, &type, &domain, NULL) != 0)
 1664     {
 1665       new_status = STAT_ABANDONED;
 1666       break;
 1667     }
 1668     
 1669       while (1)
 1670     {
 1671       int data_sent = 0;
 1672       
 1673       if (!firstsendto)
 1674         firstsendto = server;
 1675       else
 1676         {
 1677           if (!(server = server->next))
 1678         server = daemon->servers;
 1679           if (server == firstsendto)
 1680         {
 1681           /* can't find server to accept our query. */
 1682           new_status = STAT_ABANDONED;
 1683           break;
 1684         }
 1685         }
 1686       
 1687       if (!server_test_type(server, domain, type, SERV_DO_DNSSEC))
 1688         continue;
 1689 
 1690     retry:
 1691       /* may need to make new connection. */
 1692       if (server->tcpfd == -1)
 1693         {
 1694           if ((server->tcpfd = socket(server->addr.sa.sa_family, SOCK_STREAM, 0)) == -1)
 1695         continue; /* No good, next server */
 1696           
 1697 #ifdef HAVE_CONNTRACK
 1698           /* Copy connection mark of incoming query to outgoing connection. */
 1699           if (have_mark)
 1700         setsockopt(server->tcpfd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
 1701 #endif  
 1702           
 1703           if (!local_bind(server->tcpfd,  &server->source_addr, server->interface, 0, 1))
 1704         {
 1705           close(server->tcpfd);
 1706           server->tcpfd = -1;
 1707           continue; /* No good, next server */
 1708         }
 1709           
 1710 #ifdef MSG_FASTOPEN
 1711           server_send(server, server->tcpfd, packet, m + sizeof(u16), MSG_FASTOPEN);
 1712 
 1713           if (errno == 0)
 1714         data_sent = 1;
 1715 #endif
 1716           
 1717           if (!data_sent && connect(server->tcpfd, &server->addr.sa, sa_len(&server->addr)) == -1)
 1718         {
 1719           close(server->tcpfd);
 1720           server->tcpfd = -1;
 1721           continue; /* No good, next server */
 1722         }
 1723           
 1724           server->flags &= ~SERV_GOT_TCP;
 1725         }
 1726       
 1727       if ((!data_sent && !read_write(server->tcpfd, packet, m + sizeof(u16), 0)) ||
 1728           !read_write(server->tcpfd, &c1, 1, 1) ||
 1729           !read_write(server->tcpfd, &c2, 1, 1) ||
 1730           !read_write(server->tcpfd, payload, (c1 << 8) | c2, 1))
 1731         {
 1732           close(server->tcpfd);
 1733           server->tcpfd = -1;
 1734           /* We get data then EOF, reopen connection to same server,
 1735          else try next. This avoids DoS from a server which accepts
 1736          connections and then closes them. */
 1737           if (server->flags & SERV_GOT_TCP)
 1738         goto retry;
 1739           else
 1740         continue;
 1741         }
 1742 
 1743       log_query_mysockaddr(F_NOEXTRA | F_DNSSEC, keyname, &server->addr,
 1744               querystr("dnssec-query", new_status == STAT_NEED_KEY ? T_DNSKEY : T_DS));
 1745 
 1746       server->flags |= SERV_GOT_TCP;
 1747       
 1748       m = (c1 << 8) | c2;
 1749       new_status = tcp_key_recurse(now, new_status, new_header, m, class, name, keyname, server, have_mark, mark, keycount);
 1750       break;
 1751     }
 1752       
 1753       if (new_status != STAT_OK)
 1754     break;
 1755     }
 1756     
 1757   if (packet)
 1758     free(packet);
 1759     
 1760   return new_status;
 1761 }
 1762 #endif
 1763 
 1764 
 1765 /* The daemon forks before calling this: it should deal with one connection,
 1766    blocking as necessary, and then return. Note, need to be a bit careful
 1767    about resources for debug mode, when the fork is suppressed: that's
 1768    done by the caller. */
 1769 unsigned char *tcp_request(int confd, time_t now,
 1770                union mysockaddr *local_addr, struct in_addr netmask, int auth_dns)
 1771 {
 1772   size_t size = 0;
 1773   int norebind = 0;
 1774 #ifdef HAVE_AUTH
 1775   int local_auth = 0;
 1776 #endif
 1777   int checking_disabled, do_bit, added_pheader = 0, have_pseudoheader = 0;
 1778   int check_subnet, cacheable, no_cache_dnssec = 0, cache_secure = 0, bogusanswer = 0;
 1779   size_t m;
 1780   unsigned short qtype;
 1781   unsigned int gotname;
 1782   unsigned char c1, c2;
 1783   /* Max TCP packet + slop + size */
 1784   unsigned char *packet = whine_malloc(65536 + MAXDNAME + RRFIXEDSZ + sizeof(u16));
 1785   unsigned char *payload = &packet[2];
 1786   /* largest field in header is 16-bits, so this is still sufficiently aligned */
 1787   struct dns_header *header = (struct dns_header *)payload;
 1788   u16 *length = (u16 *)packet;
 1789   struct server *last_server;
 1790   struct in_addr dst_addr_4;
 1791   union mysockaddr peer_addr;
 1792   socklen_t peer_len = sizeof(union mysockaddr);
 1793   int query_count = 0;
 1794   unsigned char *pheader;
 1795   unsigned int mark = 0;
 1796   int have_mark = 0;
 1797 
 1798   (void)mark;
 1799   (void)have_mark;
 1800 
 1801   if (getpeername(confd, (struct sockaddr *)&peer_addr, &peer_len) == -1)
 1802     return packet;
 1803 
 1804 #ifdef HAVE_CONNTRACK
 1805   /* Get connection mark of incoming query to set on outgoing connections. */
 1806   if (option_bool(OPT_CONNTRACK))
 1807     {
 1808       union all_addr local;
 1809               
 1810       if (local_addr->sa.sa_family == AF_INET6)
 1811     local.addr6 = local_addr->in6.sin6_addr;
 1812       else
 1813     local.addr4 = local_addr->in.sin_addr;
 1814       
 1815       have_mark = get_incoming_mark(&peer_addr, &local, 1, &mark);
 1816     }
 1817 #endif  
 1818 
 1819   /* We can be configured to only accept queries from at-most-one-hop-away addresses. */
 1820   if (option_bool(OPT_LOCAL_SERVICE))
 1821     {
 1822       struct addrlist *addr;
 1823 
 1824       if (peer_addr.sa.sa_family == AF_INET6) 
 1825     {
 1826       for (addr = daemon->interface_addrs; addr; addr = addr->next)
 1827         if ((addr->flags & ADDRLIST_IPV6) &&
 1828         is_same_net6(&addr->addr.addr6, &peer_addr.in6.sin6_addr, addr->prefixlen))
 1829           break;
 1830     }
 1831       else
 1832     {
 1833       struct in_addr netmask;
 1834       for (addr = daemon->interface_addrs; addr; addr = addr->next)
 1835         {
 1836           netmask.s_addr = htonl(~(in_addr_t)0 << (32 - addr->prefixlen));
 1837           if (!(addr->flags & ADDRLIST_IPV6) && 
 1838           is_same_net(addr->addr.addr4, peer_addr.in.sin_addr, netmask))
 1839         break;
 1840         }
 1841     }
 1842       if (!addr)
 1843     {
 1844       my_syslog(LOG_WARNING, _("Ignoring query from non-local network"));
 1845       return packet;
 1846     }
 1847     }
 1848 
 1849   while (1)
 1850     {
 1851       if (query_count == TCP_MAX_QUERIES ||
 1852       !packet ||
 1853       !read_write(confd, &c1, 1, 1) || !read_write(confd, &c2, 1, 1) ||
 1854       !(size = c1 << 8 | c2) ||
 1855       !read_write(confd, payload, size, 1))
 1856         return packet; 
 1857   
 1858       if (size < (int)sizeof(struct dns_header))
 1859     continue;
 1860 
 1861       /* Clear buffer beyond request to avoid risk of
 1862      information disclosure. */
 1863       memset(payload + size, 0, 65536 - size);
 1864       
 1865       query_count++;
 1866 
 1867       /* log_query gets called indirectly all over the place, so 
 1868      pass these in global variables - sorry. */
 1869       daemon->log_display_id = ++daemon->log_id;
 1870       daemon->log_source_addr = &peer_addr;
 1871       
 1872       /* save state of "cd" flag in query */
 1873       if ((checking_disabled = header->hb4 & HB4_CD))
 1874     no_cache_dnssec = 1;
 1875        
 1876       if ((gotname = extract_request(header, (unsigned int)size, daemon->namebuff, &qtype)))
 1877     {
 1878 #ifdef HAVE_AUTH
 1879       struct auth_zone *zone;
 1880 #endif
 1881       char *types = querystr(auth_dns ? "auth" : "query", qtype);
 1882       
 1883       log_query_mysockaddr(F_QUERY | F_FORWARD, daemon->namebuff,
 1884                    &peer_addr, types);
 1885       
 1886 #ifdef HAVE_AUTH
 1887       /* find queries for zones we're authoritative for, and answer them directly */
 1888       if (!auth_dns && !option_bool(OPT_LOCALISE))
 1889         for (zone = daemon->auth_zones; zone; zone = zone->next)
 1890           if (in_zone(zone, daemon->namebuff, NULL))
 1891         {
 1892           auth_dns = 1;
 1893           local_auth = 1;
 1894           break;
 1895         }
 1896 #endif
 1897     }
 1898       
 1899       if (local_addr->sa.sa_family == AF_INET)
 1900     dst_addr_4 = local_addr->in.sin_addr;
 1901       else
 1902     dst_addr_4.s_addr = 0;
 1903       
 1904       do_bit = 0;
 1905 
 1906       if (find_pseudoheader(header, (size_t)size, NULL, &pheader, NULL, NULL))
 1907     { 
 1908       unsigned short flags;
 1909       
 1910       have_pseudoheader = 1;
 1911       pheader += 4; /* udp_size, ext_rcode */
 1912       GETSHORT(flags, pheader);
 1913       
 1914       if (flags & 0x8000)
 1915         do_bit = 1; /* do bit */ 
 1916     }
 1917 
 1918 #ifdef HAVE_AUTH
 1919       if (auth_dns)
 1920     m = answer_auth(header, ((char *) header) + 65536, (size_t)size, now, &peer_addr, 
 1921             local_auth, do_bit, have_pseudoheader);
 1922       else
 1923 #endif
 1924     {
 1925        int ad_reqd = do_bit;
 1926        /* RFC 6840 5.7 */
 1927        if (header->hb4 & HB4_AD)
 1928          ad_reqd = 1;
 1929        
 1930        /* m > 0 if answered from cache */
 1931        m = answer_request(header, ((char *) header) + 65536, (size_t)size, 
 1932                   dst_addr_4, netmask, now, ad_reqd, do_bit, have_pseudoheader);
 1933       
 1934       /* Do this by steam now we're not in the select() loop */
 1935       check_log_writer(1); 
 1936       
 1937       if (m == 0)
 1938         {
 1939           unsigned int flags = 0;
 1940           union all_addr *addrp = NULL;
 1941           int type = SERV_DO_DNSSEC;
 1942           char *domain = NULL;
 1943           unsigned char *oph = find_pseudoheader(header, size, NULL, NULL, NULL, NULL);
 1944 
 1945           size = add_edns0_config(header, size, ((unsigned char *) header) + 65536, &peer_addr, now, &check_subnet, &cacheable);
 1946 
 1947           if (gotname)
 1948         flags = search_servers(now, &addrp, gotname, daemon->namebuff, &type, &domain, &norebind);
 1949 
 1950 #ifdef HAVE_DNSSEC
 1951           if (option_bool(OPT_DNSSEC_VALID) && (type & SERV_DO_DNSSEC))
 1952         {
 1953           size = add_do_bit(header, size, ((unsigned char *) header) + 65536);
 1954           
 1955           /* For debugging, set Checking Disabled, otherwise, have the upstream check too,
 1956              this allows it to select auth servers when one is returning bad data. */
 1957           if (option_bool(OPT_DNSSEC_DEBUG))
 1958             header->hb4 |= HB4_CD;
 1959         }
 1960 #endif
 1961 
 1962           /* Check if we added a pheader on forwarding - may need to
 1963          strip it from the reply. */
 1964           if (!oph && find_pseudoheader(header, size, NULL, NULL, NULL, NULL))
 1965         added_pheader = 1;
 1966 
 1967           type &= ~SERV_DO_DNSSEC;
 1968           
 1969           if (type != 0  || option_bool(OPT_ORDER) || !daemon->last_server)
 1970         last_server = daemon->servers;
 1971           else
 1972         last_server = daemon->last_server;
 1973           
 1974           if (!flags && last_server)
 1975         {
 1976           struct server *firstsendto = NULL;
 1977           unsigned char hash[HASH_SIZE];
 1978           memcpy(hash, hash_questions(header, (unsigned int)size, daemon->namebuff), HASH_SIZE);
 1979 
 1980           /* Loop round available servers until we succeed in connecting to one.
 1981              Note that this code subtly ensures that consecutive queries on this connection
 1982              which can go to the same server, do so. */
 1983           while (1) 
 1984             {
 1985               int data_sent = 0;
 1986 
 1987               if (!firstsendto)
 1988             firstsendto = last_server;
 1989               else
 1990             {
 1991               if (!(last_server = last_server->next))
 1992                 last_server = daemon->servers;
 1993               
 1994               if (last_server == firstsendto)
 1995                 break;
 1996             }
 1997               
 1998               /* server for wrong domain */
 1999               if (!server_test_type(last_server, domain, type, 0))
 2000             continue;
 2001 
 2002             retry:
 2003               *length = htons(size);
 2004 
 2005               if (last_server->tcpfd == -1)
 2006             {
 2007               if ((last_server->tcpfd = socket(last_server->addr.sa.sa_family, SOCK_STREAM, 0)) == -1)
 2008                 continue;
 2009               
 2010 #ifdef HAVE_CONNTRACK
 2011               /* Copy connection mark of incoming query to outgoing connection. */
 2012               if (have_mark)
 2013                 setsockopt(last_server->tcpfd, SOL_SOCKET, SO_MARK, &mark, sizeof(unsigned int));
 2014 #endif            
 2015               
 2016               if ((!local_bind(last_server->tcpfd,  &last_server->source_addr, last_server->interface, 0, 1)))
 2017                 {
 2018                   close(last_server->tcpfd);
 2019                   last_server->tcpfd = -1;
 2020                   continue;
 2021                 }
 2022               
 2023 #ifdef MSG_FASTOPEN
 2024                 server_send(last_server, last_server->tcpfd, packet, size + sizeof(u16), MSG_FASTOPEN);
 2025 
 2026                 if (errno == 0)
 2027                   data_sent = 1;
 2028 #endif
 2029                 
 2030                 if (!data_sent && connect(last_server->tcpfd, &last_server->addr.sa, sa_len(&last_server->addr)) == -1)
 2031                 {
 2032                   close(last_server->tcpfd);
 2033                   last_server->tcpfd = -1;
 2034                   continue;
 2035                 }
 2036               
 2037               last_server->flags &= ~SERV_GOT_TCP;
 2038             }
 2039               
 2040               /* get query name again for logging - may have been overwritten */
 2041               if (!(gotname = extract_request(header, (unsigned int)size, daemon->namebuff, &qtype)))
 2042             strcpy(daemon->namebuff, "query");
 2043               
 2044               if ((!data_sent && !read_write(last_server->tcpfd, packet, size + sizeof(u16), 0)) ||
 2045               !read_write(last_server->tcpfd, &c1, 1, 1) ||
 2046               !read_write(last_server->tcpfd, &c2, 1, 1) ||
 2047               !read_write(last_server->tcpfd, payload, (c1 << 8) | c2, 1))
 2048             {
 2049               close(last_server->tcpfd);
 2050               last_server->tcpfd = -1;
 2051               /* We get data then EOF, reopen connection to same server,
 2052                  else try next. This avoids DoS from a server which accepts
 2053                  connections and then closes them. */
 2054               if (last_server->flags & SERV_GOT_TCP)
 2055                 goto retry;
 2056               else
 2057                 continue;
 2058             }
 2059               
 2060               last_server->flags |= SERV_GOT_TCP;
 2061 
 2062               m = (c1 << 8) | c2;
 2063 
 2064               log_query_mysockaddr(F_SERVER | F_FORWARD, daemon->namebuff,
 2065                        &last_server->addr, NULL);
 2066 
 2067 #ifdef HAVE_DNSSEC
 2068               if (option_bool(OPT_DNSSEC_VALID) && !checking_disabled && (last_server->flags & SERV_DO_DNSSEC))
 2069             {
 2070               int keycount = DNSSEC_WORK; /* Limit to number of DNSSEC questions, to catch loops and avoid filling cache. */
 2071               int status = tcp_key_recurse(now, STAT_OK, header, m, 0, daemon->namebuff, daemon->keyname, 
 2072                                last_server, have_mark, mark, &keycount);
 2073               char *result, *domain = "result";
 2074               
 2075               if (status == STAT_ABANDONED)
 2076                 {
 2077                   result = "ABANDONED";
 2078                   status = STAT_BOGUS;
 2079                 }
 2080               else
 2081                 result = (status == STAT_SECURE ? "SECURE" : (status == STAT_INSECURE ? "INSECURE" : "BOGUS"));
 2082               
 2083               if (status == STAT_BOGUS && extract_request(header, m, daemon->namebuff, NULL))
 2084                 domain = daemon->namebuff;
 2085 
 2086               log_query(F_SECSTAT, domain, NULL, result);
 2087               
 2088               if (status == STAT_BOGUS)
 2089                 {
 2090                   no_cache_dnssec = 1;
 2091                   bogusanswer = 1;
 2092                 }
 2093 
 2094               if (status == STAT_SECURE)
 2095                 cache_secure = 1;
 2096             }
 2097 #endif
 2098 
 2099               /* restore CD bit to the value in the query */
 2100               if (checking_disabled)
 2101             header->hb4 |= HB4_CD;
 2102               else
 2103             header->hb4 &= ~HB4_CD;
 2104               
 2105               /* There's no point in updating the cache, since this process will exit and
 2106              lose the information after a few queries. We make this call for the alias and 
 2107              bogus-nxdomain side-effects. */
 2108               /* If the crc of the question section doesn't match the crc we sent, then
 2109              someone might be attempting to insert bogus values into the cache by 
 2110              sending replies containing questions and bogus answers. */
 2111               if (memcmp(hash, hash_questions(header, (unsigned int)m, daemon->namebuff), HASH_SIZE) != 0)
 2112             { 
 2113               m = 0;
 2114               break;
 2115             }
 2116 
 2117               /* Never cache answers which are contingent on the source or MAC address EDSN0 option,
 2118              since the cache is ignorant of such things. */
 2119               if (!cacheable)
 2120             no_cache_dnssec = 1;
 2121               
 2122               m = process_reply(header, now, last_server, (unsigned int)m, 
 2123                     option_bool(OPT_NO_REBIND) && !norebind, no_cache_dnssec, cache_secure, bogusanswer,
 2124                     ad_reqd, do_bit, added_pheader, check_subnet, &peer_addr); 
 2125               
 2126               break;
 2127             }
 2128         }
 2129     
 2130           /* In case of local answer or no connections made. */
 2131           if (m == 0)
 2132         {
 2133           m = setup_reply(header, (unsigned int)size, addrp, flags, daemon->local_ttl);
 2134           if (have_pseudoheader)
 2135             m = add_pseudoheader(header, m, ((unsigned char *) header) + 65536, daemon->edns_pktsz, 0, NULL, 0, do_bit, 0);
 2136         }
 2137         }
 2138     }
 2139       
 2140       check_log_writer(1);
 2141       
 2142       *length = htons(m);
 2143            
 2144       if (m == 0 || !read_write(confd, packet, m + sizeof(u16), 0))
 2145     return packet;
 2146     }
 2147 }
 2148 
 2149 static struct frec *allocate_frec(time_t now)
 2150 {
 2151   struct frec *f;
 2152   
 2153   if ((f = (struct frec *)whine_malloc(sizeof(struct frec))))
 2154     {
 2155       f->next = daemon->frec_list;
 2156       f->time = now;
 2157       f->sentto = NULL;
 2158       f->rfds = NULL;
 2159       f->flags = 0;
 2160 #ifdef HAVE_DNSSEC
 2161       f->dependent = NULL;
 2162       f->blocking_query = NULL;
 2163       f->stash = NULL;
 2164 #endif
 2165       daemon->frec_list = f;
 2166     }
 2167 
 2168   return f;
 2169 }
 2170 
 2171 /* return a UDP socket bound to a random port, have to cope with straying into
 2172    occupied port nos and reserved ones. */
 2173 static int random_sock(struct server *s)
 2174 {
 2175   int fd;
 2176 
 2177   if ((fd = socket(s->source_addr.sa.sa_family, SOCK_DGRAM, 0)) != -1)
 2178     {
 2179       if (local_bind(fd, &s->source_addr, s->interface, s->ifindex, 0))
 2180     return fd;
 2181 
 2182       if (s->interface[0] == 0)
 2183     (void)prettyprint_addr(&s->source_addr, daemon->namebuff);
 2184       else
 2185     strcpy(daemon->namebuff, s->interface);
 2186 
 2187       my_syslog(LOG_ERR, _("failed to bind server socket to %s: %s"),
 2188         daemon->namebuff, strerror(errno));
 2189       close(fd);
 2190     }
 2191   
 2192   return -1;
 2193 }
 2194 
 2195 /* compare source addresses and interface, serv2 can be null. */
 2196 static int server_isequal(const struct server *serv1,
 2197              const struct server *serv2)
 2198 {
 2199   return (serv2 &&
 2200     serv2->ifindex == serv1->ifindex &&
 2201     sockaddr_isequal(&serv2->source_addr, &serv1->source_addr) &&
 2202     strncmp(serv2->interface, serv1->interface, IF_NAMESIZE) == 0);
 2203 }
 2204 
 2205 /* fdlp points to chain of randomfds already in use by transaction.
 2206    If there's already a suitable one, return it, else allocate a 
 2207    new one and add it to the list. 
 2208 
 2209    Not leaking any resources in the face of allocation failures
 2210    is rather convoluted here.
 2211    
 2212    Note that rfd->serv may be NULL, when a server goes away.
 2213 */
 2214 int allocate_rfd(struct randfd_list **fdlp, struct server *serv)
 2215 {
 2216   static int finger = 0;
 2217   int i, j = 0;
 2218   struct randfd_list *rfl;
 2219   struct randfd *rfd = NULL;
 2220   int fd = 0;
 2221   
 2222   /* If server has a pre-allocated fd, use that. */
 2223   if (serv->sfd)
 2224     return serv->sfd->fd;
 2225   
 2226   /* existing suitable random port socket linked to this transaction? */
 2227   for (rfl = *fdlp; rfl; rfl = rfl->next)
 2228     if (server_isequal(serv, rfl->rfd->serv))
 2229       return rfl->rfd->fd;
 2230 
 2231   /* No. need new link. */
 2232   if ((rfl = daemon->rfl_spare))
 2233     daemon->rfl_spare = rfl->next;
 2234   else if (!(rfl = whine_malloc(sizeof(struct randfd_list))))
 2235     return -1;
 2236    
 2237   /* limit the number of sockets we have open to avoid starvation of 
 2238      (eg) TFTP. Once we have a reasonable number, randomness should be OK */
 2239   for (i = 0; i < daemon->numrrand; i++)
 2240     if (daemon->randomsocks[i].refcount == 0)
 2241       {
 2242     if ((fd = random_sock(serv)) != -1)
 2243           {
 2244         rfd = &daemon->randomsocks[i];
 2245         rfd->serv = serv;
 2246         rfd->fd = fd;
 2247         rfd->refcount = 1;
 2248       }
 2249     break;
 2250       }
 2251   
 2252   /* No free ones or cannot get new socket, grab an existing one */
 2253   if (!rfd)
 2254     for (j = 0; j < daemon->numrrand; j++)
 2255       {
 2256     i = (j + finger) % daemon->numrrand;
 2257     if (daemon->randomsocks[i].refcount != 0 &&
 2258         server_isequal(serv, daemon->randomsocks[i].serv) &&
 2259         daemon->randomsocks[i].refcount != 0xfffe)
 2260       {
 2261         finger = i + 1;
 2262         rfd = &daemon->randomsocks[i];
 2263         rfd->refcount++;
 2264         break;
 2265       }
 2266       }
 2267 
 2268   if (j == daemon->numrrand)
 2269     {
 2270       struct randfd_list *rfl_poll;
 2271 
 2272       /* there are no free slots, and non with the same parameters we can piggy-back on. 
 2273      We're going to have to allocate a new temporary record, distinguished by
 2274      refcount == 0xffff. This will exist in the frec randfd list, never be shared,
 2275      and be freed when no longer in use. It will also be held on 
 2276      the daemon->rfl_poll list so the poll system can find it. */
 2277 
 2278       if ((rfl_poll = daemon->rfl_spare))
 2279     daemon->rfl_spare = rfl_poll->next;
 2280       else
 2281     rfl_poll = whine_malloc(sizeof(struct randfd_list));
 2282       
 2283       if (!rfl_poll ||
 2284       !(rfd = whine_malloc(sizeof(struct randfd))) ||
 2285       (fd = random_sock(serv)) == -1)
 2286     {
 2287       
 2288       /* Don't leak anything we may already have */
 2289       rfl->next = daemon->rfl_spare;
 2290       daemon->rfl_spare = rfl;
 2291 
 2292       if (rfl_poll)
 2293         {
 2294           rfl_poll->next = daemon->rfl_spare;
 2295           daemon->rfl_spare = rfl_poll;
 2296         }
 2297       
 2298       if (rfd)
 2299         free(rfd);
 2300       
 2301       return -1; /* doom */
 2302     }
 2303 
 2304       /* Note rfd->serv not set here, since it's not reused */
 2305       rfd->fd = fd;
 2306       rfd->refcount = 0xffff; /* marker for temp record */
 2307 
 2308       rfl_poll->rfd = rfd;
 2309       rfl_poll->next = daemon->rfl_poll;
 2310       daemon->rfl_poll = rfl_poll;
 2311     }
 2312   
 2313   rfl->rfd = rfd;
 2314   rfl->next = *fdlp;
 2315   *fdlp = rfl;
 2316   
 2317   return rfl->rfd->fd;
 2318 }
 2319 
 2320 void free_rfds(struct randfd_list **fdlp)
 2321 {
 2322   struct randfd_list *tmp, *rfl, *poll, *next, **up;
 2323   
 2324   for (rfl = *fdlp; rfl; rfl = tmp)
 2325     {
 2326       if (rfl->rfd->refcount == 0xffff || --(rfl->rfd->refcount) == 0)
 2327     close(rfl->rfd->fd);
 2328 
 2329       /* temporary overflow record */
 2330       if (rfl->rfd->refcount == 0xffff)
 2331     {
 2332       free(rfl->rfd);
 2333       
 2334       /* go through the link of all these by steam to delete.
 2335          This list is expected to be almost always empty. */
 2336       for (poll = daemon->rfl_poll, up = &daemon->rfl_poll; poll; poll = next)
 2337         {
 2338           next = poll->next;
 2339           
 2340           if (poll->rfd == rfl->rfd)
 2341         {
 2342           *up = poll->next;
 2343           poll->next = daemon->rfl_spare;
 2344           daemon->rfl_spare = poll;
 2345         }
 2346           else
 2347         up = &poll->next;
 2348         }
 2349     }
 2350 
 2351       tmp = rfl->next;
 2352       rfl->next = daemon->rfl_spare;
 2353       daemon->rfl_spare = rfl;
 2354     }
 2355 
 2356   *fdlp = NULL;
 2357 }
 2358 
 2359 static void free_frec(struct frec *f)
 2360 {
 2361   struct frec_src *last;
 2362   
 2363   /* add back to freelist if not the record builtin to every frec. */
 2364   for (last = f->frec_src.next; last && last->next; last = last->next) ;
 2365   if (last)
 2366     {
 2367       last->next = daemon->free_frec_src;
 2368       daemon->free_frec_src = f->frec_src.next;
 2369     }
 2370     
 2371   f->frec_src.next = NULL;    
 2372   free_rfds(&f->rfds);
 2373   f->sentto = NULL;
 2374   f->flags = 0;
 2375 
 2376 #ifdef HAVE_DNSSEC
 2377   if (f->stash)
 2378     {
 2379       blockdata_free(f->stash);
 2380       f->stash = NULL;
 2381     }
 2382 
 2383   /* Anything we're waiting on is pointless now, too */
 2384   if (f->blocking_query)
 2385     free_frec(f->blocking_query);
 2386   f->blocking_query = NULL;
 2387   f->dependent = NULL;
 2388 #endif
 2389 }
 2390 
 2391 
 2392 
 2393 /* if wait==NULL return a free or older than TIMEOUT record.
 2394    else return *wait zero if one available, or *wait is delay to
 2395    when the oldest in-use record will expire. Impose an absolute
 2396    limit of 4*TIMEOUT before we wipe things (for random sockets).
 2397    If force is non-NULL, always return a result, even if we have
 2398    to allocate above the limit, and never free the record pointed
 2399    to by the force argument. */
 2400 struct frec *get_new_frec(time_t now, int *wait, struct frec *force)
 2401 {
 2402   struct frec *f, *oldest, *target;
 2403   int count;
 2404   
 2405   if (wait)
 2406     *wait = 0;
 2407 
 2408   for (f = daemon->frec_list, oldest = NULL, target =  NULL, count = 0; f; f = f->next, count++)
 2409     if (!f->sentto)
 2410       target = f;
 2411     else 
 2412       {
 2413 #ifdef HAVE_DNSSEC
 2414         /* Don't free DNSSEC sub-queries here, as we may end up with
 2415            dangling references to them. They'll go when their "real" query 
 2416            is freed. */
 2417         if (!f->dependent && f != force)
 2418 #endif
 2419           {
 2420         if (difftime(now, f->time) >= 4*TIMEOUT)
 2421           {
 2422             free_frec(f);
 2423             target = f;
 2424           }
 2425          
 2426         
 2427         if (!oldest || difftime(f->time, oldest->time) <= 0)
 2428           oldest = f;
 2429           }
 2430       }
 2431 
 2432   if (target)
 2433     {
 2434       target->time = now;
 2435       return target;
 2436     }
 2437   
 2438   /* can't find empty one, use oldest if there is one
 2439      and it's older than timeout */
 2440   if (!force && oldest && ((int)difftime(now, oldest->time)) >= TIMEOUT)
 2441     { 
 2442       /* keep stuff for twice timeout if we can by allocating a new
 2443      record instead */
 2444       if (difftime(now, oldest->time) < 2*TIMEOUT && 
 2445       count <= daemon->ftabsize &&
 2446       (f = allocate_frec(now)))
 2447     return f;
 2448 
 2449       if (!wait)
 2450     {
 2451       free_frec(oldest);
 2452       oldest->time = now;
 2453     }
 2454       return oldest;
 2455     }
 2456   
 2457   /* none available, calculate time 'till oldest record expires */
 2458   if (!force && count > daemon->ftabsize)
 2459     {
 2460       if (oldest && wait)
 2461     *wait = oldest->time + (time_t)TIMEOUT - now;
 2462       
 2463       query_full(now);
 2464       
 2465       return NULL;
 2466     }
 2467   
 2468   if (!(f = allocate_frec(now)) && wait)
 2469     /* wait one second on malloc failure */
 2470     *wait = 1;
 2471 
 2472   return f; /* OK if malloc fails and this is NULL */
 2473 }
 2474 
 2475 static void query_full(time_t now)
 2476 {
 2477   static time_t last_log = 0;
 2478   
 2479   if ((int)difftime(now, last_log) > 5)
 2480     {
 2481       last_log = now;
 2482       my_syslog(LOG_WARNING, _("Maximum number of concurrent DNS queries reached (max: %d)"), daemon->ftabsize);
 2483     }
 2484 }
 2485 
 2486 
 2487 static struct frec *lookup_frec(unsigned short id, int fd, void *hash)
 2488 {
 2489   struct frec *f;
 2490   struct server *s;
 2491   int type;
 2492   struct randfd_list *fdl;
 2493   
 2494   for(f = daemon->frec_list; f; f = f->next)
 2495     if (f->sentto && f->new_id == id && 
 2496     (memcmp(hash, f->hash, HASH_SIZE) == 0))
 2497       {
 2498     /* sent from random port */
 2499     for (fdl = f->rfds; fdl; fdl = fdl->next)
 2500       if (fdl->rfd->fd == fd)
 2501       return f;
 2502     
 2503     /* Sent to upstream from socket associated with a server. 
 2504        Note we have to iterate over all the possible servers, since they may
 2505        have different bound sockets. */
 2506     type = f->sentto->flags & SERV_TYPE;
 2507     s = f->sentto;
 2508     do {
 2509       if (server_test_type(s, f->sentto->domain, type, 0) &&
 2510           s->sfd && s->sfd->fd == fd)
 2511         return f;
 2512       
 2513       s = s->next ? s->next : daemon->servers;
 2514     } while (s != f->sentto);
 2515       }
 2516   
 2517   return NULL;
 2518 }
 2519 
 2520 static struct frec *lookup_frec_by_query(void *hash, unsigned int flags)
 2521 {
 2522   struct frec *f;
 2523 
 2524   /* FREC_DNSKEY and FREC_DS_QUERY are never set in flags, so the test below 
 2525      ensures that no frec created for internal DNSSEC query can be returned here.
 2526      
 2527      Similarly FREC_NO_CACHE is never set in flags, so a query which is
 2528      contigent on a particular source address EDNS0 option will never be matched. */
 2529 
 2530 #define FLAGMASK (FREC_CHECKING_DISABLED | FREC_AD_QUESTION | FREC_DO_QUESTION \
 2531           | FREC_HAS_PHEADER | FREC_DNSKEY_QUERY | FREC_DS_QUERY | FREC_NO_CACHE)
 2532   
 2533   for(f = daemon->frec_list; f; f = f->next)
 2534     if (f->sentto &&
 2535     (f->flags & FLAGMASK) == flags &&
 2536     memcmp(hash, f->hash, HASH_SIZE) == 0)
 2537       return f;
 2538   
 2539   return NULL;
 2540 }
 2541 
 2542 /* Send query packet again, if we can. */
 2543 void resend_query()
 2544 {
 2545   if (daemon->srv_save)
 2546     server_send(daemon->srv_save, daemon->fd_save,
 2547         daemon->packet, daemon->packet_len, 0);
 2548 }
 2549 
 2550 /* A server record is going away, remove references to it */
 2551 void server_gone(struct server *server)
 2552 {
 2553   struct frec *f;
 2554   int i;
 2555   
 2556   for (f = daemon->frec_list; f; f = f->next)
 2557     if (f->sentto && f->sentto == server)
 2558       free_frec(f);
 2559 
 2560   /* If any random socket refers to this server, NULL the reference.
 2561      No more references to the socket will be created in the future. */
 2562   for (i = 0; i < daemon->numrrand; i++)
 2563     if (daemon->randomsocks[i].refcount != 0 && daemon->randomsocks[i].serv == server)
 2564       daemon->randomsocks[i].serv = NULL;
 2565   
 2566   if (daemon->last_server == server)
 2567     daemon->last_server = NULL;
 2568   
 2569   if (daemon->srv_save == server)
 2570     daemon->srv_save = NULL;
 2571 }
 2572 
 2573 /* return unique random ids. */
 2574 static unsigned short get_id(void)
 2575 {
 2576   unsigned short ret = 0;
 2577   struct frec *f;
 2578   
 2579   while (1)
 2580     {
 2581       ret = rand16();
 2582 
 2583       /* ensure id is unique. */
 2584       for (f = daemon->frec_list; f; f = f->next)
 2585     if (f->sentto && f->new_id == ret)
 2586       break;
 2587 
 2588       if (!f)
 2589     return ret;
 2590     }
 2591 }
 2592 
 2593 
 2594 
 2595 
 2596