"Fossies" - the Fresh Open Source Software Archive

Member "nsd-4.3.6/server.c" (6 Apr 2021, 130713 Bytes) of package /linux/misc/dns/nsd-4.3.6.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "server.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 4.3.5_vs_4.3.6.

    1 /*
    2  * server.c -- nsd(8) network input/output
    3  *
    4  * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
    5  *
    6  * See LICENSE for the license.
    7  *
    8  */
    9 
   10 #include "config.h"
   11 
   12 #include <sys/types.h>
   13 #include <sys/param.h>
   14 #include <limits.h>
   15 #include <sys/socket.h>
   16 #include <sys/uio.h>
   17 #include <sys/wait.h>
   18 
   19 #include <netinet/in.h>
   20 #ifdef USE_TCP_FASTOPEN
   21   #include <netinet/tcp.h>
   22 #endif
   23 #include <arpa/inet.h>
   24 
   25 #include <assert.h>
   26 #include <ctype.h>
   27 #include <errno.h>
   28 #include <fcntl.h>
   29 #include <stddef.h>
   30 #include <stdio.h>
   31 #include <stdlib.h>
   32 #include <string.h>
   33 #include <time.h>
   34 #include <unistd.h>
   35 #include <signal.h>
   36 #include <netdb.h>
   37 #include <poll.h>
   38 #ifdef HAVE_SYS_RANDOM_H
   39 #include <sys/random.h>
   40 #endif
   41 #ifndef SHUT_WR
   42 #define SHUT_WR 1
   43 #endif
   44 #ifdef HAVE_MMAP
   45 #include <sys/mman.h>
   46 #endif /* HAVE_MMAP */
   47 #ifdef HAVE_OPENSSL_RAND_H
   48 #include <openssl/rand.h>
   49 #endif
   50 #ifdef HAVE_OPENSSL_SSL_H
   51 #include <openssl/ssl.h>
   52 #endif
   53 #ifdef HAVE_OPENSSL_ERR_H
   54 #include <openssl/err.h>
   55 #endif
   56 #ifdef HAVE_OPENSSL_OCSP_H
   57 #include <openssl/ocsp.h>
   58 #endif
   59 #ifndef USE_MINI_EVENT
   60 #  ifdef HAVE_EVENT_H
   61 #    include <event.h>
   62 #  else
   63 #    include <event2/event.h>
   64 #    include "event2/event_struct.h"
   65 #    include "event2/event_compat.h"
   66 #  endif
   67 #else
   68 #  include "mini_event.h"
   69 #endif
   70 
   71 #include "axfr.h"
   72 #include "namedb.h"
   73 #include "netio.h"
   74 #include "xfrd.h"
   75 #include "xfrd-tcp.h"
   76 #include "xfrd-disk.h"
   77 #include "difffile.h"
   78 #include "nsec3.h"
   79 #include "ipc.h"
   80 #include "udb.h"
   81 #include "remote.h"
   82 #include "lookup3.h"
   83 #include "rrl.h"
   84 #ifdef USE_DNSTAP
   85 #include "dnstap/dnstap_collector.h"
   86 #endif
   87 
   88 #define RELOAD_SYNC_TIMEOUT 25 /* seconds */
   89 
   90 #ifdef USE_DNSTAP
   91 /*
   92  * log_addr() - the function to print sockaddr_in/sockaddr_in6 structures content
   93  * just like its done in Unbound via the same log_addr(VERB_LEVEL, const char*, sockaddr_storage*)
   94  */
   95 static void
   96 log_addr(const char* descr,
   97 #ifdef INET6
   98     struct sockaddr_storage* addr,
   99 #else
  100     struct sockaddr_in* addr,
  101 #endif
  102     short family)
  103 {
  104     char str_buf[64];
  105     if(verbosity < 6)
  106         return;
  107     if(family == AF_INET) {
  108         struct sockaddr_in* s = (struct sockaddr_in*)addr;
  109         inet_ntop(AF_INET, &s->sin_addr.s_addr, str_buf, sizeof(str_buf));
  110         VERBOSITY(6, (LOG_INFO, "%s: address is: %s, port is: %d", descr, str_buf, ntohs(s->sin_port)));
  111 #ifdef INET6
  112     } else {
  113         struct sockaddr_in6* s6 = (struct sockaddr_in6*)addr;
  114         inet_ntop(AF_INET6, &s6->sin6_addr.s6_addr, str_buf, sizeof(str_buf));
  115         VERBOSITY(6, (LOG_INFO, "%s: address is: %s, port is: %d", descr, str_buf, ntohs(s6->sin6_port)));
  116 #endif
  117     }
  118 }
  119 #endif /* USE_DNSTAP */
  120 
  121 #ifdef USE_TCP_FASTOPEN
  122   #define TCP_FASTOPEN_FILE "/proc/sys/net/ipv4/tcp_fastopen"
  123   #define TCP_FASTOPEN_SERVER_BIT_MASK 0x2
  124 #endif
  125 
  126 /*
  127  * Data for the UDP handlers.
  128  */
  129 struct udp_handler_data
  130 {
  131     struct nsd        *nsd;
  132     struct nsd_socket *socket;
  133     struct event       event;
  134 };
  135 
  136 struct tcp_accept_handler_data {
  137     struct nsd        *nsd;
  138     struct nsd_socket *socket;
  139     int                event_added;
  140     struct event       event;
  141 #ifdef HAVE_SSL
  142     /* handler accepts TLS connections on the dedicated port */
  143     int                tls_accept;
  144 #endif
  145 };
  146 
  147 /*
  148  * These globals are used to enable the TCP accept handlers
  149  * when the number of TCP connection drops below the maximum
  150  * number of TCP connections.
  151  */
  152 static size_t tcp_accept_handler_count;
  153 static struct tcp_accept_handler_data *tcp_accept_handlers;
  154 
  155 static struct event slowaccept_event;
  156 static int slowaccept;
  157 
  158 #ifdef HAVE_SSL
  159 static unsigned char *ocspdata = NULL;
  160 static long ocspdata_len = 0;
  161 #endif
  162 
  163 #ifdef NONBLOCKING_IS_BROKEN
  164 /* Define NUM_RECV_PER_SELECT to 1 (one) to avoid opportunistically trying to
  165    read multiple times from a socket when reported ready by select. */
  166 # define NUM_RECV_PER_SELECT (1)
  167 #else /* !NONBLOCKING_IS_BROKEN */
  168 # define NUM_RECV_PER_SELECT (100)
  169 #endif /* NONBLOCKING_IS_BROKEN */
  170 
  171 #ifndef HAVE_MMSGHDR
  172 struct mmsghdr {
  173     struct msghdr msg_hdr;
  174     unsigned int  msg_len;
  175 };
  176 #endif
  177 
  178 static struct mmsghdr msgs[NUM_RECV_PER_SELECT];
  179 static struct iovec iovecs[NUM_RECV_PER_SELECT];
  180 static struct query *queries[NUM_RECV_PER_SELECT];
  181 
  182 /*
  183  * Data for the TCP connection handlers.
  184  *
  185  * The TCP handlers use non-blocking I/O.  This is necessary to avoid
  186  * blocking the entire server on a slow TCP connection, but does make
  187  * reading from and writing to the socket more complicated.
  188  *
  189  * Basically, whenever a read/write would block (indicated by the
  190  * EAGAIN errno variable) we remember the position we were reading
  191  * from/writing to and return from the TCP reading/writing event
  192  * handler.  When the socket becomes readable/writable again we
  193  * continue from the same position.
  194  */
  195 struct tcp_handler_data
  196 {
  197     /*
  198      * The region used to allocate all TCP connection related
  199      * data, including this structure.  This region is destroyed
  200      * when the connection is closed.
  201      */
  202     region_type*        region;
  203 
  204     /*
  205      * The global nsd structure.
  206      */
  207     struct nsd*         nsd;
  208 
  209     /*
  210      * The current query data for this TCP connection.
  211      */
  212     query_type*         query;
  213 
  214     /*
  215      * The query_state is used to remember if we are performing an
  216      * AXFR, if we're done processing, or if we should discard the
  217      * query and connection.
  218      */
  219     query_state_type    query_state;
  220 
  221     /*
  222      * The event for the file descriptor and tcp timeout
  223      */
  224     struct event event;
  225 
  226     /*
  227      * The bytes_transmitted field is used to remember the number
  228      * of bytes transmitted when receiving or sending a DNS
  229      * packet.  The count includes the two additional bytes used
  230      * to specify the packet length on a TCP connection.
  231      */
  232     size_t              bytes_transmitted;
  233 
  234     /*
  235      * The number of queries handled by this specific TCP connection.
  236      */
  237     int                 query_count;
  238     
  239     /*
  240      * The timeout in msec for this tcp connection
  241      */
  242     int tcp_timeout;
  243 
  244     /*
  245      * If the connection is allowed to have further queries on it.
  246      */
  247     int tcp_no_more_queries;
  248 
  249 #ifdef USE_DNSTAP
  250     /* the socket of the accept socket to find proper service (local) address the socket is bound to. */
  251     struct nsd_socket *socket;
  252 #endif /* USE_DNSTAP */
  253 
  254 #ifdef HAVE_SSL
  255     /*
  256      * TLS object.
  257      */
  258     SSL* tls;
  259 
  260     /*
  261      * TLS handshake state.
  262      */
  263     enum { tls_hs_none, tls_hs_read, tls_hs_write,
  264         tls_hs_read_event, tls_hs_write_event } shake_state;
  265 #endif
  266     /* list of connections, for service of remaining tcp channels */
  267     struct tcp_handler_data *prev, *next;
  268 };
  269 /* global that is the list of active tcp channels */
  270 static struct tcp_handler_data *tcp_active_list = NULL;
  271 
  272 /*
  273  * Handle incoming queries on the UDP server sockets.
  274  */
  275 static void handle_udp(int fd, short event, void* arg);
  276 
  277 /*
  278  * Handle incoming connections on the TCP sockets.  These handlers
  279  * usually wait for the NETIO_EVENT_READ event (indicating an incoming
  280  * connection) but are disabled when the number of current TCP
  281  * connections is equal to the maximum number of TCP connections.
  282  * Disabling is done by changing the handler to wait for the
  283  * NETIO_EVENT_NONE type.  This is done using the function
  284  * configure_tcp_accept_handlers.
  285  */
  286 static void handle_tcp_accept(int fd, short event, void* arg);
  287 
  288 /*
  289  * Handle incoming queries on a TCP connection.  The TCP connections
  290  * are configured to be non-blocking and the handler may be called
  291  * multiple times before a complete query is received.
  292  */
  293 static void handle_tcp_reading(int fd, short event, void* arg);
  294 
  295 /*
  296  * Handle outgoing responses on a TCP connection.  The TCP connections
  297  * are configured to be non-blocking and the handler may be called
  298  * multiple times before a complete response is sent.
  299  */
  300 static void handle_tcp_writing(int fd, short event, void* arg);
  301 
  302 #ifdef HAVE_SSL
  303 /* Create SSL object and associate fd */
  304 static SSL* incoming_ssl_fd(SSL_CTX* ctx, int fd);
  305 /*
  306  * Handle TLS handshake. May be called multiple times if incomplete.
  307  */
  308 static int tls_handshake(struct tcp_handler_data* data, int fd, int writing);
  309 
  310 /*
  311  * Handle incoming queries on a TLS over TCP connection.  The TLS
  312  * connections are configured to be non-blocking and the handler may
  313  * be called multiple times before a complete query is received.
  314  */
  315 static void handle_tls_reading(int fd, short event, void* arg);
  316 
  317 /*
  318  * Handle outgoing responses on a TLS over TCP connection.  The TLS
  319  * connections are configured to be non-blocking and the handler may
  320  * be called multiple times before a complete response is sent.
  321  */
  322 static void handle_tls_writing(int fd, short event, void* arg);
  323 #endif
  324 
  325 /*
  326  * Send all children the quit nonblocking, then close pipe.
  327  */
  328 static void send_children_quit(struct nsd* nsd);
  329 /* same, for shutdown time, waits for child to exit to avoid restart issues */
  330 static void send_children_quit_and_wait(struct nsd* nsd);
  331 
  332 /* set childrens flags to send NSD_STATS to them */
  333 #ifdef BIND8_STATS
  334 static void set_children_stats(struct nsd* nsd);
  335 #endif /* BIND8_STATS */
  336 
  337 /*
  338  * Change the event types the HANDLERS are interested in to EVENT_TYPES.
  339  */
  340 static void configure_handler_event_types(short event_types);
  341 
  342 static uint16_t *compressed_dname_offsets = 0;
  343 static uint32_t compression_table_capacity = 0;
  344 static uint32_t compression_table_size = 0;
  345 static domain_type* compressed_dnames[MAXRRSPP];
  346 
  347 #ifdef USE_TCP_FASTOPEN
  348 /* Checks to see if the kernel value must be manually changed in order for
  349    TCP Fast Open to support server mode */
  350 static void report_tcp_fastopen_config() {
  351 
  352     int tcp_fastopen_fp;
  353     uint8_t tcp_fastopen_value;
  354 
  355     if ( (tcp_fastopen_fp = open(TCP_FASTOPEN_FILE, O_RDONLY)) == -1 ) {
  356         log_msg(LOG_INFO,"Error opening " TCP_FASTOPEN_FILE ": %s\n", strerror(errno));
  357     }
  358     if (read(tcp_fastopen_fp, &tcp_fastopen_value, 1) == -1 ) {
  359         log_msg(LOG_INFO,"Error reading " TCP_FASTOPEN_FILE ": %s\n", strerror(errno));
  360         close(tcp_fastopen_fp);
  361     }
  362     if (!(tcp_fastopen_value & TCP_FASTOPEN_SERVER_BIT_MASK)) {
  363         log_msg(LOG_WARNING, "Error: TCP Fast Open support is available and configured in NSD by default.\n");
  364         log_msg(LOG_WARNING, "However the kernel paramenters are not configured to support TCP_FASTOPEN in server mode.\n");
  365         log_msg(LOG_WARNING, "To enable TFO use the command:");
  366         log_msg(LOG_WARNING, "  'sudo sysctl -w net.ipv4.tcp_fastopen=2' for pure server mode or\n");
  367         log_msg(LOG_WARNING, "  'sudo sysctl -w net.ipv4.tcp_fastopen=3' for both client and server mode\n");
  368         log_msg(LOG_WARNING, "NSD will not have TCP Fast Open available until this change is made.\n");
  369         close(tcp_fastopen_fp);
  370     }
  371     close(tcp_fastopen_fp);
  372 }
  373 #endif
  374 
  375 /*
  376  * Remove the specified pid from the list of child pids.  Returns -1 if
  377  * the pid is not in the list, child_num otherwise.  The field is set to 0.
  378  */
  379 static int
  380 delete_child_pid(struct nsd *nsd, pid_t pid)
  381 {
  382     size_t i;
  383     for (i = 0; i < nsd->child_count; ++i) {
  384         if (nsd->children[i].pid == pid) {
  385             nsd->children[i].pid = 0;
  386             if(!nsd->children[i].need_to_exit) {
  387                 if(nsd->children[i].child_fd != -1)
  388                     close(nsd->children[i].child_fd);
  389                 nsd->children[i].child_fd = -1;
  390                 if(nsd->children[i].handler)
  391                     nsd->children[i].handler->fd = -1;
  392             }
  393             return i;
  394         }
  395     }
  396     return -1;
  397 }
  398 
  399 /*
  400  * Restart child servers if necessary.
  401  */
  402 static int
  403 restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio,
  404     int* xfrd_sock_p)
  405 {
  406     struct main_ipc_handler_data *ipc_data;
  407     size_t i;
  408     int sv[2];
  409 
  410     /* Fork the child processes... */
  411     for (i = 0; i < nsd->child_count; ++i) {
  412         if (nsd->children[i].pid <= 0) {
  413             if (nsd->children[i].child_fd != -1)
  414                 close(nsd->children[i].child_fd);
  415             if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) {
  416                 log_msg(LOG_ERR, "socketpair: %s",
  417                     strerror(errno));
  418                 return -1;
  419             }
  420             nsd->children[i].child_fd = sv[0];
  421             nsd->children[i].parent_fd = sv[1];
  422             nsd->children[i].pid = fork();
  423             switch (nsd->children[i].pid) {
  424             default: /* SERVER MAIN */
  425                 close(nsd->children[i].parent_fd);
  426                 nsd->children[i].parent_fd = -1;
  427                 if (fcntl(nsd->children[i].child_fd, F_SETFL, O_NONBLOCK) == -1) {
  428                     log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno));
  429                 }
  430                 if(!nsd->children[i].handler)
  431                 {
  432                     ipc_data = (struct main_ipc_handler_data*) region_alloc(
  433                         region, sizeof(struct main_ipc_handler_data));
  434                     ipc_data->nsd = nsd;
  435                     ipc_data->child = &nsd->children[i];
  436                     ipc_data->child_num = i;
  437                     ipc_data->xfrd_sock = xfrd_sock_p;
  438                     ipc_data->packet = buffer_create(region, QIOBUFSZ);
  439                     ipc_data->forward_mode = 0;
  440                     ipc_data->got_bytes = 0;
  441                     ipc_data->total_bytes = 0;
  442                     ipc_data->acl_num = 0;
  443                     nsd->children[i].handler = (struct netio_handler*) region_alloc(
  444                         region, sizeof(struct netio_handler));
  445                     nsd->children[i].handler->fd = nsd->children[i].child_fd;
  446                     nsd->children[i].handler->timeout = NULL;
  447                     nsd->children[i].handler->user_data = ipc_data;
  448                     nsd->children[i].handler->event_types = NETIO_EVENT_READ;
  449                     nsd->children[i].handler->event_handler = parent_handle_child_command;
  450                     netio_add_handler(netio, nsd->children[i].handler);
  451                 }
  452                 /* clear any ongoing ipc */
  453                 ipc_data = (struct main_ipc_handler_data*)
  454                     nsd->children[i].handler->user_data;
  455                 ipc_data->forward_mode = 0;
  456                 /* restart - update fd */
  457                 nsd->children[i].handler->fd = nsd->children[i].child_fd;
  458                 break;
  459             case 0: /* CHILD */
  460                 /* the child need not be able to access the
  461                  * nsd.db file */
  462                 namedb_close_udb(nsd->db);
  463 #ifdef MEMCLEAN /* OS collects memory pages */
  464                 region_destroy(region);
  465 #endif
  466                 nsd->pid = 0;
  467                 nsd->child_count = 0;
  468                 nsd->server_kind = nsd->children[i].kind;
  469                 nsd->this_child = &nsd->children[i];
  470                 nsd->this_child->child_num = i;
  471                 /* remove signal flags inherited from parent
  472                    the parent will handle them. */
  473                 nsd->signal_hint_reload_hup = 0;
  474                 nsd->signal_hint_reload = 0;
  475                 nsd->signal_hint_child = 0;
  476                 nsd->signal_hint_quit = 0;
  477                 nsd->signal_hint_shutdown = 0;
  478                 nsd->signal_hint_stats = 0;
  479                 nsd->signal_hint_statsusr = 0;
  480                 close(*xfrd_sock_p);
  481                 close(nsd->this_child->child_fd);
  482                 nsd->this_child->child_fd = -1;
  483                 if (fcntl(nsd->this_child->parent_fd, F_SETFL, O_NONBLOCK) == -1) {
  484                     log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno));
  485                 }
  486                 server_child(nsd);
  487                 /* NOTREACH */
  488                 exit(0);
  489             case -1:
  490                 log_msg(LOG_ERR, "fork failed: %s",
  491                     strerror(errno));
  492                 return -1;
  493             }
  494         }
  495     }
  496     return 0;
  497 }
  498 
  499 #ifdef BIND8_STATS
  500 static void set_bind8_alarm(struct nsd* nsd)
  501 {
  502     /* resync so that the next alarm is on the next whole minute */
  503     if(nsd->st.period > 0) /* % by 0 gives divbyzero error */
  504         alarm(nsd->st.period - (time(NULL) % nsd->st.period));
  505 }
  506 #endif
  507 
  508 /* set zone stat ids for zones initially read in */
  509 static void
  510 zonestatid_tree_set(struct nsd* nsd)
  511 {
  512     struct radnode* n;
  513     for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) {
  514         zone_type* zone = (zone_type*)n->elem;
  515         zone->zonestatid = getzonestatid(nsd->options, zone->opts);
  516     }
  517 }
  518 
  519 #ifdef USE_ZONE_STATS
  520 void
  521 server_zonestat_alloc(struct nsd* nsd)
  522 {
  523     size_t num = (nsd->options->zonestatnames->count==0?1:
  524             nsd->options->zonestatnames->count);
  525     size_t sz = sizeof(struct nsdst)*num;
  526     char tmpfile[256];
  527     uint8_t z = 0;
  528 
  529     /* file names */
  530     nsd->zonestatfname[0] = 0;
  531     nsd->zonestatfname[1] = 0;
  532     snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.0",
  533         nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
  534     nsd->zonestatfname[0] = region_strdup(nsd->region, tmpfile);
  535     snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.1",
  536         nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
  537     nsd->zonestatfname[1] = region_strdup(nsd->region, tmpfile);
  538 
  539     /* file descriptors */
  540     nsd->zonestatfd[0] = open(nsd->zonestatfname[0], O_CREAT|O_RDWR, 0600);
  541     if(nsd->zonestatfd[0] == -1) {
  542         log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[0],
  543             strerror(errno));
  544         exit(1);
  545     }
  546     nsd->zonestatfd[1] = open(nsd->zonestatfname[1], O_CREAT|O_RDWR, 0600);
  547     if(nsd->zonestatfd[0] == -1) {
  548         log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[1],
  549             strerror(errno));
  550         close(nsd->zonestatfd[0]);
  551         unlink(nsd->zonestatfname[0]);
  552         exit(1);
  553     }
  554 
  555 #ifdef HAVE_MMAP
  556     if(lseek(nsd->zonestatfd[0], (off_t)sz-1, SEEK_SET) == -1) {
  557         log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[0],
  558             strerror(errno));
  559         exit(1);
  560     }
  561     if(write(nsd->zonestatfd[0], &z, 1) == -1) {
  562         log_msg(LOG_ERR, "cannot extend stat file %s (%s)",
  563             nsd->zonestatfname[0], strerror(errno));
  564         exit(1);
  565     }
  566     if(lseek(nsd->zonestatfd[1], (off_t)sz-1, SEEK_SET) == -1) {
  567         log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[1],
  568             strerror(errno));
  569         exit(1);
  570     }
  571     if(write(nsd->zonestatfd[1], &z, 1) == -1) {
  572         log_msg(LOG_ERR, "cannot extend stat file %s (%s)",
  573             nsd->zonestatfname[1], strerror(errno));
  574         exit(1);
  575     }
  576     nsd->zonestat[0] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE,
  577         MAP_SHARED, nsd->zonestatfd[0], 0);
  578     if(nsd->zonestat[0] == MAP_FAILED) {
  579         log_msg(LOG_ERR, "mmap failed: %s", strerror(errno));
  580         unlink(nsd->zonestatfname[0]);
  581         unlink(nsd->zonestatfname[1]);
  582         exit(1);
  583     }
  584     nsd->zonestat[1] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE,
  585         MAP_SHARED, nsd->zonestatfd[1], 0);
  586     if(nsd->zonestat[1] == MAP_FAILED) {
  587         log_msg(LOG_ERR, "mmap failed: %s", strerror(errno));
  588         unlink(nsd->zonestatfname[0]);
  589         unlink(nsd->zonestatfname[1]);
  590         exit(1);
  591     }
  592     memset(nsd->zonestat[0], 0, sz);
  593     memset(nsd->zonestat[1], 0, sz);
  594     nsd->zonestatsize[0] = num;
  595     nsd->zonestatsize[1] = num;
  596     nsd->zonestatdesired = num;
  597     nsd->zonestatsizenow = num;
  598     nsd->zonestatnow = nsd->zonestat[0];
  599 #endif /* HAVE_MMAP */
  600 }
  601 
  602 void
  603 zonestat_remap(struct nsd* nsd, int idx, size_t sz)
  604 {
  605 #ifdef HAVE_MMAP
  606 #ifdef MREMAP_MAYMOVE
  607     nsd->zonestat[idx] = (struct nsdst*)mremap(nsd->zonestat[idx],
  608         sizeof(struct nsdst)*nsd->zonestatsize[idx], sz,
  609         MREMAP_MAYMOVE);
  610     if(nsd->zonestat[idx] == MAP_FAILED) {
  611         log_msg(LOG_ERR, "mremap failed: %s", strerror(errno));
  612         exit(1);
  613     }
  614 #else /* !HAVE MREMAP */
  615     if(msync(nsd->zonestat[idx],
  616         sizeof(struct nsdst)*nsd->zonestatsize[idx], MS_ASYNC) != 0)
  617         log_msg(LOG_ERR, "msync failed: %s", strerror(errno));
  618     if(munmap(nsd->zonestat[idx],
  619         sizeof(struct nsdst)*nsd->zonestatsize[idx]) != 0)
  620         log_msg(LOG_ERR, "munmap failed: %s", strerror(errno));
  621     nsd->zonestat[idx] = (struct nsdst*)mmap(NULL, sz,
  622         PROT_READ|PROT_WRITE, MAP_SHARED, nsd->zonestatfd[idx], 0);
  623     if(nsd->zonestat[idx] == MAP_FAILED) {
  624         log_msg(LOG_ERR, "mmap failed: %s", strerror(errno));
  625         exit(1);
  626     }
  627 #endif /* MREMAP */
  628 #endif /* HAVE_MMAP */
  629 }
  630 
  631 /* realloc the zonestat array for the one that is not currently in use,
  632  * to match the desired new size of the array (if applicable) */
  633 void
  634 server_zonestat_realloc(struct nsd* nsd)
  635 {
  636 #ifdef HAVE_MMAP
  637     uint8_t z = 0;
  638     size_t sz;
  639     int idx = 0; /* index of the zonestat array that is not in use */
  640     if(nsd->zonestatnow == nsd->zonestat[0])
  641         idx = 1;
  642     if(nsd->zonestatsize[idx] == nsd->zonestatdesired)
  643         return;
  644     sz = sizeof(struct nsdst)*nsd->zonestatdesired;
  645     if(lseek(nsd->zonestatfd[idx], (off_t)sz-1, SEEK_SET) == -1) {
  646         log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[idx],
  647             strerror(errno));
  648         exit(1);
  649     }
  650     if(write(nsd->zonestatfd[idx], &z, 1) == -1) {
  651         log_msg(LOG_ERR, "cannot extend stat file %s (%s)",
  652             nsd->zonestatfname[idx], strerror(errno));
  653         exit(1);
  654     }
  655     zonestat_remap(nsd, idx, sz);
  656     /* zero the newly allocated region */
  657     if(nsd->zonestatdesired > nsd->zonestatsize[idx]) {
  658         memset(((char*)nsd->zonestat[idx])+sizeof(struct nsdst) *
  659             nsd->zonestatsize[idx], 0, sizeof(struct nsdst) *
  660             (nsd->zonestatdesired - nsd->zonestatsize[idx]));
  661     }
  662     nsd->zonestatsize[idx] = nsd->zonestatdesired;
  663 #endif /* HAVE_MMAP */
  664 }
  665 
  666 /* switchover to use the other array for the new children, that
  667  * briefly coexist with the old children.  And we want to avoid them
  668  * both writing to the same statistics arrays. */
  669 void
  670 server_zonestat_switch(struct nsd* nsd)
  671 {
  672     if(nsd->zonestatnow == nsd->zonestat[0]) {
  673         nsd->zonestatnow = nsd->zonestat[1];
  674         nsd->zonestatsizenow = nsd->zonestatsize[1];
  675     } else {
  676         nsd->zonestatnow = nsd->zonestat[0];
  677         nsd->zonestatsizenow = nsd->zonestatsize[0];
  678     }
  679 }
  680 #endif /* USE_ZONE_STATS */
  681 
  682 static void
  683 cleanup_dname_compression_tables(void *ptr)
  684 {
  685     free(ptr);
  686     compressed_dname_offsets = NULL;
  687     compression_table_capacity = 0;
  688 }
  689 
  690 static void
  691 initialize_dname_compression_tables(struct nsd *nsd)
  692 {
  693     size_t needed = domain_table_count(nsd->db->domains) + 1;
  694     needed += EXTRA_DOMAIN_NUMBERS;
  695     if(compression_table_capacity < needed) {
  696         if(compressed_dname_offsets) {
  697             region_remove_cleanup(nsd->db->region,
  698                 cleanup_dname_compression_tables,
  699                 compressed_dname_offsets);
  700             free(compressed_dname_offsets);
  701         }
  702         compressed_dname_offsets = (uint16_t *) xmallocarray(
  703             needed, sizeof(uint16_t));
  704         region_add_cleanup(nsd->db->region, cleanup_dname_compression_tables,
  705             compressed_dname_offsets);
  706         compression_table_capacity = needed;
  707         compression_table_size=domain_table_count(nsd->db->domains)+1;
  708     }
  709     memset(compressed_dname_offsets, 0, needed * sizeof(uint16_t));
  710     compressed_dname_offsets[0] = QHEADERSZ; /* The original query name */
  711 }
  712 
  713 static int
  714 set_cloexec(struct nsd_socket *sock)
  715 {
  716     assert(sock != NULL);
  717 
  718     if(fcntl(sock->s, F_SETFD, FD_CLOEXEC) == -1) {
  719         const char *socktype =
  720             sock->addr.ai_family == SOCK_DGRAM ? "udp" : "tcp";
  721         log_msg(LOG_ERR, "fcntl(..., O_CLOEXEC) failed for %s: %s",
  722             socktype, strerror(errno));
  723         return -1;
  724     }
  725 
  726     return 1;
  727 }
  728 
  729 static int
  730 set_reuseport(struct nsd_socket *sock)
  731 {
  732 #ifdef SO_REUSEPORT
  733     int on = 1;
  734 #ifdef SO_REUSEPORT_LB
  735     /* FreeBSD 12 has SO_REUSEPORT_LB that does load balancing like
  736      * SO_REUSEPORT on Linux. This is what the users want with the config
  737      * option in nsd.conf; if we actually need local address and port reuse
  738      * they'll also need to have SO_REUSEPORT set for them, assume it was
  739      * _LB they want.
  740      */
  741     int opt = SO_REUSEPORT_LB;
  742     static const char optname[] = "SO_REUSEPORT_LB";
  743 #else /* !SO_REUSEPORT_LB */
  744     int opt = SO_REUSEPORT;
  745     static const char optname[] = "SO_REUSEPORT";
  746 #endif /* SO_REUSEPORT_LB */
  747 
  748     if (0 == setsockopt(sock->s, SOL_SOCKET, opt, &on, sizeof(on))) {
  749         return 1;
  750     } else if(verbosity >= 3 || errno != ENOPROTOOPT) {
  751         log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s",
  752             optname, strerror(errno));
  753     }
  754     return -1;
  755 #else
  756     (void)sock;
  757 #endif /* SO_REUSEPORT */
  758 
  759     return 0;
  760 }
  761 
  762 static int
  763 set_reuseaddr(struct nsd_socket *sock)
  764 {
  765 #ifdef SO_REUSEADDR
  766     int on = 1;
  767     if(setsockopt(sock->s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == 0) {
  768         return 1;
  769     }
  770     log_msg(LOG_ERR, "setsockopt(..., SO_REUSEADDR, ...) failed: %s",
  771         strerror(errno));
  772     return -1;
  773 #endif /* SO_REUSEADDR */
  774     return 0;
  775 }
  776 
  777 static int
  778 set_rcvbuf(struct nsd_socket *sock, int rcv)
  779 {
  780 #ifdef SO_RCVBUF
  781 #ifdef SO_RCVBUFFORCE
  782     if(0 == setsockopt(
  783         sock->s, SOL_SOCKET, SO_RCVBUFFORCE, &rcv, sizeof(rcv)))
  784     {
  785         return 1;
  786     }
  787     if(errno == EPERM || errno == ENOBUFS) {
  788         return 0;
  789     }
  790     log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUFFORCE, ...) failed: %s",
  791         strerror(errno));
  792     return -1;
  793 #else /* !SO_RCVBUFFORCE */
  794     if (0 == setsockopt(
  795         sock->s, SOL_SOCKET, SO_RCVBUF, &rcv, sizeof(rcv)))
  796     {
  797         return 1;
  798     }
  799     if(errno == ENOSYS || errno == ENOBUFS) {
  800         return 0;
  801     }
  802     log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUF, ...) failed: %s",
  803         strerror(errno));
  804     return -1;
  805 #endif /* SO_RCVBUFFORCE */
  806 #endif /* SO_RCVBUF */
  807 
  808     return 0;
  809 }
  810 
  811 static int
  812 set_sndbuf(struct nsd_socket *sock, int snd)
  813 {
  814 #ifdef SO_SNDBUF
  815 #ifdef SO_SNDBUFFORCE
  816     if(0 == setsockopt(
  817         sock->s, SOL_SOCKET, SO_SNDBUFFORCE, &snd, sizeof(snd)))
  818     {
  819         return 1;
  820     }
  821     if(errno == EPERM || errno == ENOBUFS) {
  822         return 0;
  823     }
  824     log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUFFORCE, ...) failed: %s",
  825         strerror(errno));
  826     return -1;
  827 #else /* !SO_SNDBUFFORCE */
  828     if(0 == setsockopt(
  829         sock->s, SOL_SOCKET, SO_SNDBUF, &snd, sizeof(snd)))
  830     {
  831         return 1;
  832     }
  833     if(errno == ENOSYS || errno == ENOBUFS) {
  834         return 0;
  835     }
  836     log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUF, ...) failed: %s",
  837         strerror(errno));
  838     return -1;
  839 #endif /* SO_SNDBUFFORCE */
  840 #endif /* SO_SNDBUF */
  841 
  842     return 0;
  843 }
  844 
  845 static int
  846 set_nonblock(struct nsd_socket *sock)
  847 {
  848     const char *socktype =
  849         sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp";
  850 
  851     if(fcntl(sock->s, F_SETFL, O_NONBLOCK) == -1) {
  852         log_msg(LOG_ERR, "fctnl(..., O_NONBLOCK) failed for %s: %s",
  853             socktype, strerror(errno));
  854         return -1;
  855     }
  856 
  857     return 1;
  858 }
  859 
  860 static int
  861 set_ipv6_v6only(struct nsd_socket *sock)
  862 {
  863 #ifdef INET6
  864 #ifdef IPV6_V6ONLY
  865     int on = 1;
  866     const char *socktype =
  867         sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp";
  868 
  869     if(0 == setsockopt(
  870         sock->s, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)))
  871     {
  872         return 1;
  873     }
  874 
  875     log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed for %s: %s",
  876         socktype, strerror(errno));
  877     return -1;
  878 #endif /* IPV6_V6ONLY */
  879 #endif /* INET6 */
  880 
  881     return 0;
  882 }
  883 
  884 static int
  885 set_ipv6_use_min_mtu(struct nsd_socket *sock)
  886 {
  887 #if defined(INET6) && (defined(IPV6_USE_MIN_MTU) || defined(IPV6_MTU))
  888 #if defined(IPV6_USE_MIN_MTU)
  889     /* There is no fragmentation of IPv6 datagrams during forwarding in the
  890      * network. Therefore we do not send UDP datagrams larger than the
  891      * minimum IPv6 MTU of 1280 octets. The EDNS0 message length can be
  892      * larger if the network stack supports IPV6_USE_MIN_MTU.
  893      */
  894     int opt = IPV6_USE_MIN_MTU;
  895     int optval = 1;
  896     static const char optname[] = "IPV6_USE_MIN_MTU";
  897 #elif defined(IPV6_MTU)
  898     /* On Linux, PMTUD is disabled by default for datagrams so set the MTU
  899      * to the MIN MTU to get the same.
  900      */
  901     int opt = IPV6_MTU;
  902     int optval = IPV6_MIN_MTU;
  903     static const char optname[] = "IPV6_MTU";
  904 #endif
  905     if(0 == setsockopt(
  906         sock->s, IPPROTO_IPV6, opt, &optval, sizeof(optval)))
  907     {
  908         return 1;
  909     }
  910 
  911     log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s",
  912         optname, strerror(errno));
  913     return -1;
  914 #else
  915     (void)sock;
  916 #endif /* INET6 */
  917 
  918     return 0;
  919 }
  920 
  921 static int
  922 set_ipv4_no_pmtu_disc(struct nsd_socket *sock)
  923 {
  924     int ret = 0;
  925 
  926 #if defined(IP_MTU_DISCOVER)
  927     int opt = IP_MTU_DISCOVER;
  928     int optval;
  929 # if defined(IP_PMTUDISC_OMIT)
  930     /* Linux 3.15 has IP_PMTUDISC_OMIT which makes sockets ignore PMTU
  931      * information and send packets with DF=0. Fragmentation is allowed if
  932      * and only if the packet size exceeds the outgoing interface MTU or
  933      * the packet encounters smaller MTU link in network. This mitigates
  934      * DNS fragmentation attacks by preventing forged PMTU information.
  935      * FreeBSD already has same semantics without setting the option.
  936      */
  937     optval = IP_PMTUDISC_OMIT;
  938     if(0 == setsockopt(
  939         sock->s, IPPROTO_IP, opt, &optval, sizeof(optval)))
  940     {
  941         return 1;
  942     }
  943 
  944     log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s",
  945         "IP_MTU_DISCOVER", "IP_PMTUDISC_OMIT", strerror(errno));
  946 # endif /* IP_PMTUDISC_OMIT */
  947 # if defined(IP_PMTUDISC_DONT)
  948     /* Use IP_PMTUDISC_DONT if IP_PMTUDISC_OMIT failed / undefined. */
  949     optval = IP_PMTUDISC_DONT;
  950     if(0 == setsockopt(
  951         sock->s, IPPROTO_IP, opt, &optval, sizeof(optval)))
  952     {
  953         return 1;
  954     }
  955 
  956     log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s",
  957         "IP_MTU_DISCOVER", "IP_PMTUDISC_DONT", strerror(errno));
  958 # endif
  959     ret = -1;
  960 #elif defined(IP_DONTFRAG)
  961     int off = 0;
  962     if (0 == setsockopt(
  963         sock->s, IPPROTO_IP, IP_DONTFRAG, &off, sizeof(off)))
  964     {
  965         return 1;
  966     }
  967 
  968     log_msg(LOG_ERR, "setsockopt(..., IP_DONTFRAG, ...) failed: %s",
  969         strerror(errno));
  970     ret = -1;
  971 #else
  972     (void)sock;
  973 #endif
  974 
  975     return ret;
  976 }
  977 
  978 static int
  979 set_ip_freebind(struct nsd_socket *sock)
  980 {
  981 #ifdef IP_FREEBIND
  982     int on = 1;
  983     const char *socktype =
  984         sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp";
  985     if(setsockopt(sock->s, IPPROTO_IP, IP_FREEBIND, &on, sizeof(on)) == 0)
  986     {
  987         return 1;
  988     }
  989     log_msg(LOG_ERR, "setsockopt(..., IP_FREEBIND, ...) failed for %s: %s",
  990         socktype, strerror(errno));
  991     return -1;
  992 #else
  993     (void)sock;
  994 #endif /* IP_FREEBIND */
  995 
  996     return 0;
  997 }
  998 
  999 static int
 1000 set_ip_transparent(struct nsd_socket *sock)
 1001 {
 1002     /*
 1003     The scandalous preprocessor blob here calls for some explanation :)
 1004     POSIX does not specify an option to bind non-local IPs, so
 1005     platforms developed several implementation-specific options,
 1006     all set in the same way, but with different names.
 1007     For additional complexity, some platform manage this setting
 1008     differently for different address families (IPv4 vs IPv6).
 1009     This scandalous preprocessor blob below abstracts such variability
 1010     in the way which leaves the C code as lean and clear as possible.
 1011     */
 1012 
 1013 #if defined(IP_TRANSPARENT)
 1014 #   define NSD_SOCKET_OPTION_TRANSPARENT                        IP_TRANSPARENT
 1015 #   define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL       IPPROTO_IP
 1016 #   define NSD_SOCKET_OPTION_TRANSPARENT_NAME           "IP_TRANSPARENT"
 1017 // as of 2020-01, Linux does not support this on IPv6 programmatically
 1018 #elif defined(SO_BINDANY)
 1019 #   define NSD_SOCKET_OPTION_TRANSPARENT                        SO_BINDANY
 1020 #   define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL       SOL_SOCKET
 1021 #   define NSD_SOCKET_OPTION_TRANSPARENT_NAME           "SO_BINDANY"
 1022 #elif defined(IP_BINDANY)
 1023 #   define NSD_SOCKET_OPTION_TRANSPARENT                        IP_BINDANY
 1024 #   define NSD_SOCKET_OPTION_TRANSPARENT6                       IPV6_BINDANY
 1025 #   define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL       IPPROTO_IP
 1026 #   define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6  IPPROTO_IPV6
 1027 #   define NSD_SOCKET_OPTION_TRANSPARENT_NAME           "IP_BINDANY"
 1028 #endif
 1029 
 1030 #ifndef NSD_SOCKET_OPTION_TRANSPARENT
 1031     (void)sock;
 1032 #else
 1033 #   ifndef NSD_SOCKET_OPTION_TRANSPARENT6
 1034 #       define NSD_SOCKET_OPTION_TRANSPARENT6 NSD_SOCKET_OPTION_TRANSPARENT
 1035 #   endif
 1036 #   ifndef NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6
 1037 #       define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL
 1038 #   endif
 1039 #   ifndef NSD_SOCKET_OPTION_TRANSPARENT_NAME6
 1040 #       define NSD_SOCKET_OPTION_TRANSPARENT_NAME6 NSD_SOCKET_OPTION_TRANSPARENT_NAME
 1041 #   endif
 1042 
 1043     int on = 1;
 1044     const char *socktype =
 1045         sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp";
 1046     const int is_ip6 = (sock->addr.ai_family == AF_INET6);
 1047 
 1048     if(0 == setsockopt(
 1049         sock->s,
 1050         is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 : NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL,
 1051         is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT6 : NSD_SOCKET_OPTION_TRANSPARENT,
 1052         &on, sizeof(on)))
 1053     {
 1054         return 1;
 1055     }
 1056 
 1057     log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed for %s: %s",
 1058         is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT_NAME6 : NSD_SOCKET_OPTION_TRANSPARENT_NAME, socktype, strerror(errno));
 1059     return -1;
 1060 #endif
 1061 
 1062     return 0;
 1063 }
 1064 
 1065 static int
 1066 set_tcp_maxseg(struct nsd_socket *sock, int mss)
 1067 {
 1068 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
 1069     if(setsockopt(sock->s, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == 0) {
 1070         return 1;
 1071     }
 1072     log_msg(LOG_ERR, "setsockopt(..., TCP_MAXSEG, ...) failed for tcp: %s",
 1073         strerror(errno));
 1074     return -1;
 1075 #else
 1076     log_msg(LOG_ERR, "setsockopt(TCP_MAXSEG) unsupported");
 1077 #endif
 1078     return 0;
 1079 }
 1080 
 1081 #ifdef USE_TCP_FASTOPEN
 1082 static int
 1083 set_tcp_fastopen(struct nsd_socket *sock)
 1084 {
 1085     /* qlen specifies how many outstanding TFO requests to allow. Limit is
 1086      * a defense against IP spoofing attacks as suggested in RFC7413.
 1087      */
 1088     int qlen;
 1089 
 1090 #ifdef __APPLE__
 1091     /* macOS X implementation only supports qlen of 1 via this call. The
 1092      * actual value is configured by the net.inet.tcp.fastopen_backlog
 1093      * kernel parameter.
 1094      */
 1095     qlen = 1;
 1096 #else
 1097     /* 5 is recommended on Linux. */
 1098     qlen = 5;
 1099 #endif
 1100     if (0 == setsockopt(
 1101         sock->s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)))
 1102     {
 1103         return 1;
 1104     }
 1105 
 1106     if (errno == EPERM) {
 1107         log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s "
 1108                  "; this could likely be because sysctl "
 1109                  "net.inet.tcp.fastopen.enabled, "
 1110                  "net.inet.tcp.fastopen.server_enable, or "
 1111                  "net.ipv4.tcp_fastopen is disabled",
 1112             strerror(errno));
 1113     /* Squelch ENOPROTOOPT: FreeBSD server mode with kernel support
 1114      * disabled, except when verbosity enabled for debugging
 1115      */
 1116     } else if(errno != ENOPROTOOPT || verbosity >= 3) {
 1117         log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s",
 1118             strerror(errno));
 1119     }
 1120 
 1121     return (errno == ENOPROTOOPT ? 0 : -1);
 1122 }
 1123 #endif /* USE_TCP_FASTOPEN */
 1124 
 1125 static int
 1126 set_bindtodevice(struct nsd_socket *sock)
 1127 {
 1128 #if defined(SO_BINDTODEVICE)
 1129     if(setsockopt(sock->s, SOL_SOCKET, SO_BINDTODEVICE,
 1130         sock->device, strlen(sock->device)) == -1)
 1131     {
 1132         log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s",
 1133                          "SO_BINDTODEVICE", sock->device, strerror(errno));
 1134         return -1;
 1135     }
 1136 
 1137     return 1;
 1138 #else
 1139     (void)sock;
 1140     return 0;
 1141 #endif
 1142 }
 1143 
 1144 static int
 1145 set_setfib(struct nsd_socket *sock)
 1146 {
 1147 #if defined(SO_SETFIB)
 1148     if(setsockopt(sock->s, SOL_SOCKET, SO_SETFIB,
 1149                   (const void *)&sock->fib, sizeof(sock->fib)) == -1)
 1150     {
 1151         log_msg(LOG_ERR, "setsockopt(..., %s, %d, ...) failed: %s",
 1152                          "SO_SETFIB", sock->fib, strerror(errno));
 1153         return -1;
 1154     }
 1155 
 1156     return 1;
 1157 #else
 1158     (void)sock;
 1159     return 0;
 1160 #endif
 1161 }
 1162 
 1163 static int
 1164 open_udp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works)
 1165 {
 1166     int rcv = 1*1024*1024, snd = 1*1024*1024;
 1167 
 1168     if(-1 == (sock->s = socket(
 1169         sock->addr.ai_family, sock->addr.ai_socktype, 0)))
 1170     {
 1171 #ifdef INET6
 1172         if((sock->flags & NSD_SOCKET_IS_OPTIONAL) &&
 1173            (sock->addr.ai_family == AF_INET6) &&
 1174            (errno == EAFNOSUPPORT))
 1175         {
 1176             log_msg(LOG_WARNING, "fallback to UDP4, no IPv6: "
 1177                 "not supported");
 1178             return 0;
 1179         }
 1180 #endif
 1181         log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno));
 1182         return -1;
 1183     }
 1184 
 1185     set_cloexec(sock);
 1186 
 1187     if(nsd->reuseport && reuseport_works && *reuseport_works)
 1188         *reuseport_works = (set_reuseport(sock) == 1);
 1189 
 1190     if(nsd->options->receive_buffer_size > 0)
 1191         rcv = nsd->options->receive_buffer_size;
 1192     if(set_rcvbuf(sock, rcv) == -1)
 1193         return -1;
 1194 
 1195     if(nsd->options->send_buffer_size > 0)
 1196         snd = nsd->options->send_buffer_size;
 1197     if(set_sndbuf(sock, snd) == -1)
 1198         return -1;
 1199 #ifdef INET6
 1200     if(sock->addr.ai_family == AF_INET6) {
 1201         if(set_ipv6_v6only(sock) == -1 ||
 1202            set_ipv6_use_min_mtu(sock) == -1)
 1203             return -1;
 1204     } else
 1205 #endif /* INET6 */
 1206     if(sock->addr.ai_family == AF_INET) {
 1207         if(set_ipv4_no_pmtu_disc(sock) == -1)
 1208             return -1;
 1209     }
 1210 
 1211     /* Set socket to non-blocking. Otherwise, on operating systems
 1212      * with thundering herd problems, the UDP recv could block
 1213      * after select returns readable.
 1214      */
 1215     set_nonblock(sock);
 1216 
 1217     if(nsd->options->ip_freebind)
 1218         (void)set_ip_freebind(sock);
 1219     if(nsd->options->ip_transparent)
 1220         (void)set_ip_transparent(sock);
 1221     if((sock->flags & NSD_BIND_DEVICE) && set_bindtodevice(sock) == -1)
 1222         return -1;
 1223     if(sock->fib != -1 && set_setfib(sock) == -1)
 1224         return -1;
 1225 
 1226     if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) {
 1227         char buf[256];
 1228         addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf));
 1229         log_msg(LOG_ERR, "can't bind udp socket %s: %s",
 1230             buf, strerror(errno));
 1231         return -1;
 1232     }
 1233 
 1234     return 1;
 1235 }
 1236 
 1237 static int
 1238 open_tcp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works)
 1239 {
 1240 #ifdef USE_TCP_FASTOPEN
 1241     report_tcp_fastopen_config();
 1242 #endif
 1243 
 1244     (void)reuseport_works;
 1245 
 1246     if(-1 == (sock->s = socket(
 1247         sock->addr.ai_family, sock->addr.ai_socktype, 0)))
 1248     {
 1249 #ifdef INET6
 1250         if((sock->flags & NSD_SOCKET_IS_OPTIONAL) &&
 1251            (sock->addr.ai_family == AF_INET6) &&
 1252            (errno == EAFNOSUPPORT))
 1253         {
 1254             log_msg(LOG_WARNING, "fallback to TCP4, no IPv6: "
 1255                                  "not supported");
 1256             return 0;
 1257         }
 1258 #endif /* INET6 */
 1259         log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno));
 1260         return -1;
 1261     }
 1262 
 1263     set_cloexec(sock);
 1264 
 1265     if(nsd->reuseport && reuseport_works && *reuseport_works)
 1266         *reuseport_works = (set_reuseport(sock) == 1);
 1267 
 1268     (void)set_reuseaddr(sock);
 1269 
 1270 #ifdef INET6
 1271     if(sock->addr.ai_family == AF_INET6) {
 1272         if (set_ipv6_v6only(sock) == -1 ||
 1273             set_ipv6_use_min_mtu(sock) == -1)
 1274             return -1;
 1275     }
 1276 #endif
 1277 
 1278     if(nsd->tcp_mss > 0)
 1279         set_tcp_maxseg(sock, nsd->tcp_mss);
 1280     /* (StevensUNP p463), if TCP listening socket is blocking, then
 1281        it may block in accept, even if select() says readable. */
 1282     (void)set_nonblock(sock);
 1283     if(nsd->options->ip_freebind)
 1284         (void)set_ip_freebind(sock);
 1285     if(nsd->options->ip_transparent)
 1286         (void)set_ip_transparent(sock);
 1287     if((sock->flags & NSD_BIND_DEVICE) && set_bindtodevice(sock) == -1)
 1288         return -1;
 1289     if(sock->fib != -1 && set_setfib(sock) == -1)
 1290         return -1;
 1291 
 1292     if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) {
 1293         char buf[256];
 1294         addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf));
 1295         log_msg(LOG_ERR, "can't bind tcp socket %s: %s",
 1296             buf, strerror(errno));
 1297         return -1;
 1298     }
 1299 
 1300 #ifdef USE_TCP_FASTOPEN
 1301     (void)set_tcp_fastopen(sock);
 1302 #endif
 1303 
 1304     if(listen(sock->s, TCP_BACKLOG) == -1) {
 1305         log_msg(LOG_ERR, "can't listen: %s", strerror(errno));
 1306         return -1;
 1307     }
 1308 
 1309     return 1;
 1310 }
 1311 
 1312 /*
 1313  * Initialize the server, reuseport, create and bind the sockets.
 1314  */
 1315 int
 1316 server_init(struct nsd *nsd)
 1317 {
 1318     size_t i;
 1319     int reuseport = 1; /* Determine if REUSEPORT works. */
 1320 
 1321     /* open server interface ports */
 1322     for(i = 0; i < nsd->ifs; i++) {
 1323         if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1 ||
 1324            open_tcp_socket(nsd, &nsd->tcp[i], &reuseport) == -1)
 1325         {
 1326             return -1;
 1327         }
 1328     }
 1329 
 1330     if(nsd->reuseport && reuseport) {
 1331         size_t ifs = nsd->ifs * nsd->reuseport;
 1332 
 1333         /* increase the size of the interface arrays, there are going
 1334          * to be separate interface file descriptors for every server
 1335          * instance */
 1336         region_remove_cleanup(nsd->region, free, nsd->udp);
 1337         region_remove_cleanup(nsd->region, free, nsd->tcp);
 1338 
 1339         nsd->udp = xrealloc(nsd->udp, ifs * sizeof(*nsd->udp));
 1340         nsd->tcp = xrealloc(nsd->tcp, ifs * sizeof(*nsd->tcp));
 1341         region_add_cleanup(nsd->region, free, nsd->udp);
 1342         region_add_cleanup(nsd->region, free, nsd->tcp);
 1343         if(ifs > nsd->ifs) {
 1344             memset(&nsd->udp[nsd->ifs], 0,
 1345                 (ifs-nsd->ifs)*sizeof(*nsd->udp));
 1346             memset(&nsd->tcp[nsd->ifs], 0,
 1347                 (ifs-nsd->ifs)*sizeof(*nsd->tcp));
 1348         }
 1349 
 1350         for(i = nsd->ifs; i < ifs; i++) {
 1351             nsd->udp[i] = nsd->udp[i%nsd->ifs];
 1352             nsd->udp[i].s = -1;
 1353             if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1) {
 1354                 return -1;
 1355             }
 1356             /* Turn off REUSEPORT for TCP by copying the socket
 1357              * file descriptor.
 1358              * This means we should not close TCP used by
 1359              * other servers in reuseport enabled mode, in
 1360              * server_child().
 1361              */
 1362             nsd->tcp[i] = nsd->tcp[i%nsd->ifs];
 1363         }
 1364 
 1365         nsd->ifs = ifs;
 1366     } else {
 1367         nsd->reuseport = 0;
 1368     }
 1369 
 1370     return 0;
 1371 }
 1372 
 1373 /*
 1374  * Prepare the server for take off.
 1375  *
 1376  */
 1377 int
 1378 server_prepare(struct nsd *nsd)
 1379 {
 1380 #ifdef RATELIMIT
 1381     /* set secret modifier for hashing (udb ptr buckets and rate limits) */
 1382 #ifdef HAVE_GETRANDOM
 1383     uint32_t v;
 1384     if(getrandom(&v, sizeof(v), 0) == -1) {
 1385         log_msg(LOG_ERR, "getrandom failed: %s", strerror(errno));
 1386         exit(1);
 1387     }
 1388     hash_set_raninit(v);
 1389 #elif defined(HAVE_ARC4RANDOM)
 1390     hash_set_raninit(arc4random());
 1391 #else
 1392     uint32_t v = getpid() ^ time(NULL);
 1393     srandom((unsigned long)v);
 1394 #  ifdef HAVE_SSL
 1395     if(RAND_status() && RAND_bytes((unsigned char*)&v, sizeof(v)) > 0)
 1396         hash_set_raninit(v);
 1397     else
 1398 #  endif
 1399         hash_set_raninit(random());
 1400 #endif
 1401     rrl_mmap_init(nsd->child_count, nsd->options->rrl_size,
 1402         nsd->options->rrl_ratelimit,
 1403         nsd->options->rrl_whitelist_ratelimit,
 1404         nsd->options->rrl_slip,
 1405         nsd->options->rrl_ipv4_prefix_length,
 1406         nsd->options->rrl_ipv6_prefix_length);
 1407 #endif /* RATELIMIT */
 1408 
 1409     /* Open the database... */
 1410     if ((nsd->db = namedb_open(nsd->dbfile, nsd->options)) == NULL) {
 1411         log_msg(LOG_ERR, "unable to open the database %s: %s",
 1412             nsd->dbfile, strerror(errno));
 1413         unlink(nsd->task[0]->fname);
 1414         unlink(nsd->task[1]->fname);
 1415 #ifdef USE_ZONE_STATS
 1416         unlink(nsd->zonestatfname[0]);
 1417         unlink(nsd->zonestatfname[1]);
 1418 #endif
 1419         xfrd_del_tempdir(nsd);
 1420         return -1;
 1421     }
 1422     /* check if zone files have been modified */
 1423     /* NULL for taskudb because we send soainfo in a moment, batched up,
 1424      * for all zones */
 1425     if(nsd->options->zonefiles_check || (nsd->options->database == NULL ||
 1426         nsd->options->database[0] == 0))
 1427         namedb_check_zonefiles(nsd, nsd->options, NULL, NULL);
 1428     zonestatid_tree_set(nsd);
 1429 
 1430     compression_table_capacity = 0;
 1431     initialize_dname_compression_tables(nsd);
 1432 
 1433 #ifdef  BIND8_STATS
 1434     /* Initialize times... */
 1435     time(&nsd->st.boot);
 1436     set_bind8_alarm(nsd);
 1437 #endif /* BIND8_STATS */
 1438 
 1439     return 0;
 1440 }
 1441 
 1442 /*
 1443  * Fork the required number of servers.
 1444  */
 1445 static int
 1446 server_start_children(struct nsd *nsd, region_type* region, netio_type* netio,
 1447     int* xfrd_sock_p)
 1448 {
 1449     size_t i;
 1450 
 1451     /* Start all child servers initially.  */
 1452     for (i = 0; i < nsd->child_count; ++i) {
 1453         nsd->children[i].pid = 0;
 1454     }
 1455 
 1456     return restart_child_servers(nsd, region, netio, xfrd_sock_p);
 1457 }
 1458 
 1459 static void
 1460 server_close_socket(struct nsd_socket *sock)
 1461 {
 1462     if(sock->s != -1) {
 1463         close(sock->s);
 1464         sock->s = -1;
 1465     }
 1466 }
 1467 
 1468 void
 1469 server_close_all_sockets(struct nsd_socket sockets[], size_t n)
 1470 {
 1471     size_t i;
 1472 
 1473     /* Close all the sockets... */
 1474     for (i = 0; i < n; ++i) {
 1475         server_close_socket(&sockets[i]);
 1476     }
 1477 }
 1478 
 1479 /*
 1480  * Close the sockets, shutdown the server and exit.
 1481  * Does not return.
 1482  */
 1483 void
 1484 server_shutdown(struct nsd *nsd)
 1485 {
 1486     size_t i;
 1487 
 1488     server_close_all_sockets(nsd->udp, nsd->ifs);
 1489     server_close_all_sockets(nsd->tcp, nsd->ifs);
 1490     /* CHILD: close command channel to parent */
 1491     if(nsd->this_child && nsd->this_child->parent_fd != -1)
 1492     {
 1493         close(nsd->this_child->parent_fd);
 1494         nsd->this_child->parent_fd = -1;
 1495     }
 1496     /* SERVER: close command channels to children */
 1497     if(!nsd->this_child)
 1498     {
 1499         for(i=0; i < nsd->child_count; ++i)
 1500             if(nsd->children[i].child_fd != -1)
 1501             {
 1502                 close(nsd->children[i].child_fd);
 1503                 nsd->children[i].child_fd = -1;
 1504             }
 1505     }
 1506 
 1507     tsig_finalize();
 1508 #ifdef HAVE_SSL
 1509     daemon_remote_delete(nsd->rc); /* ssl-delete secret keys */
 1510     if (nsd->tls_ctx)
 1511         SSL_CTX_free(nsd->tls_ctx);
 1512 #endif
 1513 
 1514 #ifdef MEMCLEAN /* OS collects memory pages */
 1515 #ifdef RATELIMIT
 1516     rrl_mmap_deinit_keep_mmap();
 1517 #endif
 1518 #ifdef USE_DNSTAP
 1519     dt_collector_destroy(nsd->dt_collector, nsd);
 1520 #endif
 1521     udb_base_free_keep_mmap(nsd->task[0]);
 1522     udb_base_free_keep_mmap(nsd->task[1]);
 1523     namedb_close_udb(nsd->db); /* keeps mmap */
 1524     namedb_close(nsd->db);
 1525     nsd_options_destroy(nsd->options);
 1526     region_destroy(nsd->region);
 1527 #endif
 1528     log_finalize();
 1529     exit(0);
 1530 }
 1531 
 1532 void
 1533 server_prepare_xfrd(struct nsd* nsd)
 1534 {
 1535     char tmpfile[256];
 1536     /* create task mmaps */
 1537     nsd->mytask = 0;
 1538     snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.0",
 1539         nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
 1540     nsd->task[0] = task_file_create(tmpfile);
 1541     if(!nsd->task[0]) {
 1542 #ifdef USE_ZONE_STATS
 1543         unlink(nsd->zonestatfname[0]);
 1544         unlink(nsd->zonestatfname[1]);
 1545 #endif
 1546         xfrd_del_tempdir(nsd);
 1547         exit(1);
 1548     }
 1549     snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.1",
 1550         nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
 1551     nsd->task[1] = task_file_create(tmpfile);
 1552     if(!nsd->task[1]) {
 1553         unlink(nsd->task[0]->fname);
 1554 #ifdef USE_ZONE_STATS
 1555         unlink(nsd->zonestatfname[0]);
 1556         unlink(nsd->zonestatfname[1]);
 1557 #endif
 1558         xfrd_del_tempdir(nsd);
 1559         exit(1);
 1560     }
 1561     assert(udb_base_get_userdata(nsd->task[0])->data == 0);
 1562     assert(udb_base_get_userdata(nsd->task[1])->data == 0);
 1563     /* create xfrd listener structure */
 1564     nsd->xfrd_listener = region_alloc(nsd->region,
 1565         sizeof(netio_handler_type));
 1566     nsd->xfrd_listener->user_data = (struct ipc_handler_conn_data*)
 1567         region_alloc(nsd->region, sizeof(struct ipc_handler_conn_data));
 1568     nsd->xfrd_listener->fd = -1;
 1569     ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->nsd =
 1570         nsd;
 1571     ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->conn =
 1572         xfrd_tcp_create(nsd->region, QIOBUFSZ);
 1573 }
 1574 
 1575 
 1576 void
 1577 server_start_xfrd(struct nsd *nsd, int del_db, int reload_active)
 1578 {
 1579     pid_t pid;
 1580     int sockets[2] = {0,0};
 1581     struct ipc_handler_conn_data *data;
 1582 
 1583     if(nsd->xfrd_listener->fd != -1)
 1584         close(nsd->xfrd_listener->fd);
 1585     if(del_db) {
 1586         /* recreate taskdb that xfrd was using, it may be corrupt */
 1587         /* we (or reload) use nsd->mytask, and xfrd uses the other */
 1588         char* tmpfile = nsd->task[1-nsd->mytask]->fname;
 1589         nsd->task[1-nsd->mytask]->fname = NULL;
 1590         /* free alloc already, so udb does not shrink itself */
 1591         udb_alloc_delete(nsd->task[1-nsd->mytask]->alloc);
 1592         nsd->task[1-nsd->mytask]->alloc = NULL;
 1593         udb_base_free(nsd->task[1-nsd->mytask]);
 1594         /* create new file, overwrite the old one */
 1595         nsd->task[1-nsd->mytask] = task_file_create(tmpfile);
 1596         free(tmpfile);
 1597     }
 1598     if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == -1) {
 1599         log_msg(LOG_ERR, "startxfrd failed on socketpair: %s", strerror(errno));
 1600         return;
 1601     }
 1602     pid = fork();
 1603     switch (pid) {
 1604     case -1:
 1605         log_msg(LOG_ERR, "fork xfrd failed: %s", strerror(errno));
 1606         break;
 1607     default:
 1608         /* PARENT: close first socket, use second one */
 1609         close(sockets[0]);
 1610         if (fcntl(sockets[1], F_SETFL, O_NONBLOCK) == -1) {
 1611             log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno));
 1612         }
 1613         if(del_db) xfrd_free_namedb(nsd);
 1614         /* use other task than I am using, since if xfrd died and is
 1615          * restarted, the reload is using nsd->mytask */
 1616         nsd->mytask = 1 - nsd->mytask;
 1617 
 1618 #ifdef HAVE_SETPROCTITLE
 1619         setproctitle("xfrd");
 1620 #endif
 1621 #ifdef HAVE_CPUSET_T
 1622         if(nsd->use_cpu_affinity) {
 1623             set_cpu_affinity(nsd->xfrd_cpuset);
 1624         }
 1625 #endif
 1626 
 1627         xfrd_init(sockets[1], nsd, del_db, reload_active, pid);
 1628         /* ENOTREACH */
 1629         break;
 1630     case 0:
 1631         /* CHILD: close second socket, use first one */
 1632         close(sockets[1]);
 1633         if (fcntl(sockets[0], F_SETFL, O_NONBLOCK) == -1) {
 1634             log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno));
 1635         }
 1636         nsd->xfrd_listener->fd = sockets[0];
 1637         break;
 1638     }
 1639     /* server-parent only */
 1640     nsd->xfrd_listener->timeout = NULL;
 1641     nsd->xfrd_listener->event_types = NETIO_EVENT_READ;
 1642     nsd->xfrd_listener->event_handler = parent_handle_xfrd_command;
 1643     /* clear ongoing ipc reads */
 1644     data = (struct ipc_handler_conn_data *) nsd->xfrd_listener->user_data;
 1645     data->conn->is_reading = 0;
 1646 }
 1647 
 1648 /** add all soainfo to taskdb */
 1649 static void
 1650 add_all_soa_to_task(struct nsd* nsd, struct udb_base* taskudb)
 1651 {
 1652     struct radnode* n;
 1653     udb_ptr task_last; /* last task, mytask is empty so NULL */
 1654     /* add all SOA INFO to mytask */
 1655     udb_ptr_init(&task_last, taskudb);
 1656     for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) {
 1657         task_new_soainfo(taskudb, &task_last, (zone_type*)n->elem, 0);
 1658     }
 1659     udb_ptr_unlink(&task_last, taskudb);
 1660 }
 1661 
 1662 void
 1663 server_send_soa_xfrd(struct nsd* nsd, int shortsoa)
 1664 {
 1665     /* normally this exchanges the SOA from nsd->xfrd and the expire back.
 1666      *   parent fills one taskdb with soas, xfrd fills other with expires.
 1667      *   then they exchange and process.
 1668      * shortsoa: xfrd crashes and needs to be restarted and one taskdb
 1669      *   may be in use by reload.  Fill SOA in taskdb and give to xfrd.
 1670      *   expire notifications can be sent back via a normal reload later
 1671      *   (xfrd will wait for current running reload to finish if any).
 1672      */
 1673     sig_atomic_t cmd = 0;
 1674     pid_t mypid;
 1675     int xfrd_sock = nsd->xfrd_listener->fd;
 1676     struct udb_base* taskudb = nsd->task[nsd->mytask];
 1677     udb_ptr t;
 1678     if(!shortsoa) {
 1679         if(nsd->signal_hint_shutdown) {
 1680         shutdown:
 1681             log_msg(LOG_WARNING, "signal received, shutting down...");
 1682             server_close_all_sockets(nsd->udp, nsd->ifs);
 1683             server_close_all_sockets(nsd->tcp, nsd->ifs);
 1684 #ifdef HAVE_SSL
 1685             daemon_remote_close(nsd->rc);
 1686 #endif
 1687             /* Unlink it if possible... */
 1688             unlinkpid(nsd->pidfile);
 1689             unlink(nsd->task[0]->fname);
 1690             unlink(nsd->task[1]->fname);
 1691 #ifdef USE_ZONE_STATS
 1692             unlink(nsd->zonestatfname[0]);
 1693             unlink(nsd->zonestatfname[1]);
 1694 #endif
 1695             /* write the nsd.db to disk, wait for it to complete */
 1696             udb_base_sync(nsd->db->udb, 1);
 1697             udb_base_close(nsd->db->udb);
 1698             server_shutdown(nsd);
 1699             /* ENOTREACH */
 1700             exit(0);
 1701         }
 1702     }
 1703     if(shortsoa) {
 1704         /* put SOA in xfrd task because mytask may be in use */
 1705         taskudb = nsd->task[1-nsd->mytask];
 1706     }
 1707 
 1708     add_all_soa_to_task(nsd, taskudb);
 1709     if(!shortsoa) {
 1710         /* wait for xfrd to signal task is ready, RELOAD signal */
 1711         if(block_read(nsd, xfrd_sock, &cmd, sizeof(cmd), -1) != sizeof(cmd) ||
 1712             cmd != NSD_RELOAD) {
 1713             log_msg(LOG_ERR, "did not get start signal from xfrd");
 1714             exit(1);
 1715         } 
 1716         if(nsd->signal_hint_shutdown) {
 1717             goto shutdown;
 1718         }
 1719     }
 1720     /* give xfrd our task, signal it with RELOAD_DONE */
 1721     task_process_sync(taskudb);
 1722     cmd = NSD_RELOAD_DONE;
 1723     if(!write_socket(xfrd_sock, &cmd,  sizeof(cmd))) {
 1724         log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s",
 1725             (int)nsd->pid, strerror(errno));
 1726     }
 1727     mypid = getpid();
 1728     if(!write_socket(nsd->xfrd_listener->fd, &mypid,  sizeof(mypid))) {
 1729         log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s",
 1730             strerror(errno));
 1731     }
 1732 
 1733     if(!shortsoa) {
 1734         /* process the xfrd task works (expiry data) */
 1735         nsd->mytask = 1 - nsd->mytask;
 1736         taskudb = nsd->task[nsd->mytask];
 1737         task_remap(taskudb);
 1738         udb_ptr_new(&t, taskudb, udb_base_get_userdata(taskudb));
 1739         while(!udb_ptr_is_null(&t)) {
 1740             task_process_expire(nsd->db, TASKLIST(&t));
 1741             udb_ptr_set_rptr(&t, taskudb, &TASKLIST(&t)->next);
 1742         }
 1743         udb_ptr_unlink(&t, taskudb);
 1744         task_clear(taskudb);
 1745 
 1746         /* tell xfrd that the task is emptied, signal with RELOAD_DONE */
 1747         cmd = NSD_RELOAD_DONE;
 1748         if(!write_socket(xfrd_sock, &cmd,  sizeof(cmd))) {
 1749             log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s",
 1750                 (int)nsd->pid, strerror(errno));
 1751         }
 1752     }
 1753 }
 1754 
 1755 #ifdef HAVE_SSL
 1756 static void
 1757 log_crypto_from_err(const char* str, unsigned long err)
 1758 {
 1759     /* error:[error code]:[library name]:[function name]:[reason string] */
 1760     char buf[128];
 1761     unsigned long e;
 1762     ERR_error_string_n(err, buf, sizeof(buf));
 1763     log_msg(LOG_ERR, "%s crypto %s", str, buf);
 1764     while( (e=ERR_get_error()) ) {
 1765         ERR_error_string_n(e, buf, sizeof(buf));
 1766         log_msg(LOG_ERR, "and additionally crypto %s", buf);
 1767     }
 1768 }
 1769 
 1770 void
 1771 log_crypto_err(const char* str)
 1772 {
 1773     log_crypto_from_err(str, ERR_get_error());
 1774 }
 1775 
 1776 /** true if the ssl handshake error has to be squelched from the logs */
 1777 static int
 1778 squelch_err_ssl_handshake(unsigned long err)
 1779 {
 1780     if(verbosity >= 3)
 1781         return 0; /* only squelch on low verbosity */
 1782     /* this is very specific, we could filter on ERR_GET_REASON()
 1783      * (the third element in ERR_PACK) */
 1784     if(err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTPS_PROXY_REQUEST) ||
 1785         err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTP_REQUEST) ||
 1786         err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_WRONG_VERSION_NUMBER) ||
 1787         err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_READ_BYTES, SSL_R_SSLV3_ALERT_BAD_CERTIFICATE)
 1788 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO
 1789         || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_POST_PROCESS_CLIENT_HELLO, SSL_R_NO_SHARED_CIPHER)
 1790 #endif
 1791 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO
 1792         || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNKNOWN_PROTOCOL)
 1793         || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNSUPPORTED_PROTOCOL)
 1794 #  ifdef SSL_R_VERSION_TOO_LOW
 1795         || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_VERSION_TOO_LOW)
 1796 #  endif
 1797 #endif
 1798         )
 1799         return 1;
 1800     return 0;
 1801 }
 1802 
 1803 void
 1804 perform_openssl_init(void)
 1805 {
 1806     /* init SSL library */
 1807 #ifdef HAVE_ERR_LOAD_CRYPTO_STRINGS
 1808     ERR_load_crypto_strings();
 1809 #endif
 1810     ERR_load_SSL_strings();
 1811 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_CRYPTO)
 1812     OpenSSL_add_all_algorithms();
 1813 #else
 1814     OPENSSL_init_crypto(OPENSSL_INIT_ADD_ALL_CIPHERS
 1815         | OPENSSL_INIT_ADD_ALL_DIGESTS
 1816         | OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL);
 1817 #endif
 1818 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_SSL)
 1819     (void)SSL_library_init();
 1820 #else
 1821     OPENSSL_init_ssl(0, NULL);
 1822 #endif
 1823 
 1824     if(!RAND_status()) {
 1825         /* try to seed it */
 1826         unsigned char buf[256];
 1827         unsigned int v, seed=(unsigned)time(NULL) ^ (unsigned)getpid();
 1828         size_t i;
 1829         v = seed;
 1830         for(i=0; i<256/sizeof(v); i++) {
 1831             memmove(buf+i*sizeof(v), &v, sizeof(v));
 1832             v = v*seed + (unsigned int)i;
 1833         }
 1834         RAND_seed(buf, 256);
 1835         log_msg(LOG_WARNING, "warning: no entropy, seeding openssl PRNG with time");
 1836     }
 1837 }
 1838 
 1839 static int
 1840 get_ocsp(char *filename, unsigned char **ocsp)
 1841 {
 1842     BIO *bio;
 1843     OCSP_RESPONSE *response;
 1844     int len = -1;
 1845     unsigned char *p, *buf;
 1846     assert(filename);
 1847 
 1848     if ((bio = BIO_new_file(filename, "r")) == NULL) {
 1849         log_crypto_err("get_ocsp: BIO_new_file failed");
 1850         return -1;
 1851     }
 1852 
 1853     if ((response = d2i_OCSP_RESPONSE_bio(bio, NULL)) == NULL) {
 1854         log_crypto_err("get_ocsp: d2i_OCSP_RESPONSE_bio failed");
 1855         BIO_free(bio);
 1856         return -1;
 1857     }
 1858 
 1859     if ((len = i2d_OCSP_RESPONSE(response, NULL)) <= 0) {
 1860         log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #1 failed");
 1861         OCSP_RESPONSE_free(response);
 1862         BIO_free(bio);
 1863         return -1;
 1864     }
 1865 
 1866     if ((buf = malloc((size_t) len)) == NULL) {
 1867         log_msg(LOG_ERR, "get_ocsp: malloc failed");
 1868         OCSP_RESPONSE_free(response);
 1869         BIO_free(bio);
 1870         return -1;
 1871     }
 1872 
 1873     p = buf;
 1874     if ((len = i2d_OCSP_RESPONSE(response, &p)) <= 0) {
 1875         log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #2 failed");
 1876         free(buf);
 1877         OCSP_RESPONSE_free(response);
 1878         BIO_free(bio);
 1879         return -1;
 1880     }
 1881 
 1882     OCSP_RESPONSE_free(response);
 1883     BIO_free(bio);
 1884 
 1885     *ocsp = buf;
 1886     return len;
 1887 }
 1888 
 1889 /* further setup ssl ctx after the keys are loaded */
 1890 static void
 1891 listen_sslctx_setup_2(void* ctxt)
 1892 {
 1893     SSL_CTX* ctx = (SSL_CTX*)ctxt;
 1894     (void)ctx;
 1895 #if HAVE_DECL_SSL_CTX_SET_ECDH_AUTO
 1896     if(!SSL_CTX_set_ecdh_auto(ctx,1)) {
 1897         /* ENOTREACH */
 1898         log_crypto_err("Error in SSL_CTX_ecdh_auto, not enabling ECDHE");
 1899     }
 1900 #elif defined(HAVE_DECL_SSL_CTX_SET_TMP_ECDH) && defined(NID_X9_62_prime256v1) && defined(HAVE_EC_KEY_NEW_BY_CURVE_NAME)
 1901     if(1) {
 1902         EC_KEY *ecdh = EC_KEY_new_by_curve_name (NID_X9_62_prime256v1);
 1903         if (!ecdh) {
 1904             log_crypto_err("could not find p256, not enabling ECDHE");
 1905         } else {
 1906             if (1 != SSL_CTX_set_tmp_ecdh (ctx, ecdh)) {
 1907                 log_crypto_err("Error in SSL_CTX_set_tmp_ecdh, not enabling ECDHE");
 1908             }
 1909             EC_KEY_free (ecdh);
 1910         }
 1911     }
 1912 #endif
 1913 }
 1914 
 1915 static int
 1916 add_ocsp_data_cb(SSL *s, void* ATTR_UNUSED(arg))
 1917 {
 1918     if(ocspdata) {
 1919         unsigned char *p;
 1920         if ((p=malloc(ocspdata_len)) == NULL) {
 1921             log_msg(LOG_ERR, "add_ocsp_data_cb: malloc failure");
 1922             return SSL_TLSEXT_ERR_NOACK;
 1923         }
 1924         memcpy(p, ocspdata, ocspdata_len);
 1925         if ((SSL_set_tlsext_status_ocsp_resp(s, p, ocspdata_len)) != 1) {
 1926             log_crypto_err("Error in SSL_set_tlsext_status_ocsp_resp");
 1927             free(p);
 1928             return SSL_TLSEXT_ERR_NOACK;
 1929         }
 1930         return SSL_TLSEXT_ERR_OK;
 1931     } else {
 1932         return SSL_TLSEXT_ERR_NOACK;
 1933     }
 1934 }
 1935 
 1936 SSL_CTX*
 1937 server_tls_ctx_setup(char* key, char* pem, char* verifypem)
 1938 {
 1939     SSL_CTX *ctx = SSL_CTX_new(SSLv23_server_method());
 1940     if(!ctx) {
 1941         log_crypto_err("could not SSL_CTX_new");
 1942         return NULL;
 1943     }
 1944     /* no SSLv2, SSLv3 because has defects */
 1945 #if SSL_OP_NO_SSLv2 != 0
 1946     if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2) & SSL_OP_NO_SSLv2) != SSL_OP_NO_SSLv2){
 1947         log_crypto_err("could not set SSL_OP_NO_SSLv2");
 1948         SSL_CTX_free(ctx);
 1949         return NULL;
 1950     }
 1951 #endif
 1952     if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv3) & SSL_OP_NO_SSLv3)
 1953         != SSL_OP_NO_SSLv3){
 1954         log_crypto_err("could not set SSL_OP_NO_SSLv3");
 1955         SSL_CTX_free(ctx);
 1956         return 0;
 1957     }
 1958 #if defined(SSL_OP_NO_TLSv1) && defined(SSL_OP_NO_TLSv1_1)
 1959     /* if we have tls 1.1 disable 1.0 */
 1960     if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1) & SSL_OP_NO_TLSv1)
 1961         != SSL_OP_NO_TLSv1){
 1962         log_crypto_err("could not set SSL_OP_NO_TLSv1");
 1963         SSL_CTX_free(ctx);
 1964         return 0;
 1965     }
 1966 #endif
 1967 #if defined(SSL_OP_NO_TLSv1_1) && defined(SSL_OP_NO_TLSv1_2)
 1968     /* if we have tls 1.2 disable 1.1 */
 1969     if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1_1) & SSL_OP_NO_TLSv1_1)
 1970         != SSL_OP_NO_TLSv1_1){
 1971         log_crypto_err("could not set SSL_OP_NO_TLSv1_1");
 1972         SSL_CTX_free(ctx);
 1973         return 0;
 1974     }
 1975 #endif
 1976 #if defined(SSL_OP_NO_RENEGOTIATION)
 1977     /* disable client renegotiation */
 1978     if((SSL_CTX_set_options(ctx, SSL_OP_NO_RENEGOTIATION) &
 1979         SSL_OP_NO_RENEGOTIATION) != SSL_OP_NO_RENEGOTIATION) {
 1980         log_crypto_err("could not set SSL_OP_NO_RENEGOTIATION");
 1981         SSL_CTX_free(ctx);
 1982         return 0;
 1983     }
 1984 #endif
 1985 #if defined(SHA256_DIGEST_LENGTH) && defined(SSL_TXT_CHACHA20)
 1986     /* if we have sha256, set the cipher list to have no known vulns */
 1987     if(!SSL_CTX_set_cipher_list(ctx, "ECDHE+AESGCM:ECDHE+CHACHA20"))
 1988         log_crypto_err("could not set cipher list with SSL_CTX_set_cipher_list");
 1989 #endif
 1990     if((SSL_CTX_set_options(ctx, SSL_OP_CIPHER_SERVER_PREFERENCE) &
 1991         SSL_OP_CIPHER_SERVER_PREFERENCE) !=
 1992         SSL_OP_CIPHER_SERVER_PREFERENCE) {
 1993         log_crypto_err("could not set SSL_OP_CIPHER_SERVER_PREFERENCE");
 1994         SSL_CTX_free(ctx);
 1995         return 0;
 1996     }
 1997 #ifdef HAVE_SSL_CTX_SET_SECURITY_LEVEL
 1998     SSL_CTX_set_security_level(ctx, 0);
 1999 #endif
 2000     if(!SSL_CTX_use_certificate_chain_file(ctx, pem)) {
 2001         log_msg(LOG_ERR, "error for cert file: %s", pem);
 2002         log_crypto_err("error in SSL_CTX use_certificate_chain_file");
 2003         SSL_CTX_free(ctx);
 2004         return NULL;
 2005     }
 2006     if(!SSL_CTX_use_PrivateKey_file(ctx, key, SSL_FILETYPE_PEM)) {
 2007         log_msg(LOG_ERR, "error for private key file: %s", key);
 2008         log_crypto_err("Error in SSL_CTX use_PrivateKey_file");
 2009         SSL_CTX_free(ctx);
 2010         return NULL;
 2011     }
 2012     if(!SSL_CTX_check_private_key(ctx)) {
 2013         log_msg(LOG_ERR, "error for key file: %s", key);
 2014         log_crypto_err("Error in SSL_CTX check_private_key");
 2015         SSL_CTX_free(ctx);
 2016         return NULL;
 2017     }
 2018     listen_sslctx_setup_2(ctx);
 2019     if(verifypem && verifypem[0]) {
 2020         if(!SSL_CTX_load_verify_locations(ctx, verifypem, NULL)) {
 2021             log_crypto_err("Error in SSL_CTX verify locations");
 2022             SSL_CTX_free(ctx);
 2023             return NULL;
 2024         }
 2025         SSL_CTX_set_client_CA_list(ctx, SSL_load_client_CA_file(verifypem));
 2026         SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER, NULL);
 2027     }
 2028     return ctx;
 2029 }
 2030 
 2031 SSL_CTX*
 2032 server_tls_ctx_create(struct nsd* nsd, char* verifypem, char* ocspfile)
 2033 {
 2034     char *key, *pem;
 2035     SSL_CTX *ctx;
 2036 
 2037     key = nsd->options->tls_service_key;
 2038     pem = nsd->options->tls_service_pem;
 2039     if(!key || key[0] == 0) {
 2040         log_msg(LOG_ERR, "error: no tls-service-key file specified");
 2041         return NULL;
 2042     }
 2043     if(!pem || pem[0] == 0) {
 2044         log_msg(LOG_ERR, "error: no tls-service-pem file specified");
 2045         return NULL;
 2046     }
 2047 
 2048     /* NOTE:This mimics the existing code in Unbound 1.5.1 by supporting SSL but
 2049      * raft-ietf-uta-tls-bcp-08 recommends only using TLSv1.2*/
 2050     ctx = server_tls_ctx_setup(key, pem, verifypem);
 2051     if(!ctx) {
 2052         log_msg(LOG_ERR, "could not setup server TLS context");
 2053         return NULL;
 2054     }
 2055     if(ocspfile && ocspfile[0]) {
 2056         if ((ocspdata_len = get_ocsp(ocspfile, &ocspdata)) < 0) {
 2057             log_crypto_err("Error reading OCSPfile");
 2058             SSL_CTX_free(ctx);
 2059             return NULL;
 2060         } else {
 2061             VERBOSITY(2, (LOG_INFO, "ocspfile %s loaded", ocspfile));
 2062             if(!SSL_CTX_set_tlsext_status_cb(ctx, add_ocsp_data_cb)) {
 2063                 log_crypto_err("Error in SSL_CTX_set_tlsext_status_cb");
 2064                 SSL_CTX_free(ctx);
 2065                 return NULL;
 2066             }
 2067         }
 2068     }
 2069     return ctx;
 2070 }
 2071 
 2072 /* check if tcp_handler_accept_data created for TLS dedicated port */
 2073 int
 2074 using_tls_port(struct sockaddr* addr, const char* tls_port)
 2075 {
 2076     in_port_t port = 0;
 2077 
 2078     if (addr->sa_family == AF_INET)
 2079         port = ((struct sockaddr_in*)addr)->sin_port;
 2080 #ifndef HAVE_STRUCT_SOCKADDR_IN6
 2081     else
 2082         port = ((struct sockaddr_in6*)addr)->sin6_port;
 2083 #endif /* HAVE_STRUCT_SOCKADDR_IN6 */
 2084     if (atoi(tls_port) == ntohs(port))
 2085         return 1;
 2086 
 2087     return 0;
 2088 }
 2089 #endif
 2090 
 2091 /* pass timeout=-1 for blocking. Returns size, 0, -1(err), or -2(timeout) */
 2092 ssize_t
 2093 block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout)
 2094 {
 2095     uint8_t* buf = (uint8_t*) p;
 2096     ssize_t total = 0;
 2097     struct pollfd fd;
 2098     memset(&fd, 0, sizeof(fd));
 2099     fd.fd = s;
 2100     fd.events = POLLIN;
 2101     
 2102     while( total < sz) {
 2103         ssize_t ret;
 2104         ret = poll(&fd, 1, (timeout==-1)?-1:timeout*1000);
 2105         if(ret == -1) {
 2106             if(errno == EAGAIN)
 2107                 /* blocking read */
 2108                 continue;
 2109             if(errno == EINTR) {
 2110                 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown))
 2111                     return -1;
 2112                 /* other signals can be handled later */
 2113                 continue;
 2114             }
 2115             /* some error */
 2116             return -1;
 2117         }
 2118         if(ret == 0) {
 2119             /* operation timed out */
 2120             return -2;
 2121         }
 2122         ret = read(s, buf+total, sz-total);
 2123         if(ret == -1) {
 2124             if(errno == EAGAIN)
 2125                 /* blocking read */
 2126                 continue;
 2127             if(errno == EINTR) {
 2128                 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown))
 2129                     return -1;
 2130                 /* other signals can be handled later */
 2131                 continue;
 2132             }
 2133             /* some error */
 2134             return -1;
 2135         }
 2136         if(ret == 0) {
 2137             /* closed connection! */
 2138             return 0;
 2139         }
 2140         total += ret;
 2141     }
 2142     return total;
 2143 }
 2144 
 2145 static void
 2146 reload_process_tasks(struct nsd* nsd, udb_ptr* last_task, int cmdsocket)
 2147 {
 2148     sig_atomic_t cmd = NSD_QUIT_SYNC;
 2149     udb_ptr t, next;
 2150     udb_base* u = nsd->task[nsd->mytask];
 2151     udb_ptr_init(&next, u);
 2152     udb_ptr_new(&t, u, udb_base_get_userdata(u));
 2153     udb_base_set_userdata(u, 0);
 2154     while(!udb_ptr_is_null(&t)) {
 2155         /* store next in list so this one can be deleted or reused */
 2156         udb_ptr_set_rptr(&next, u, &TASKLIST(&t)->next);
 2157         udb_rptr_zero(&TASKLIST(&t)->next, u);
 2158 
 2159         /* process task t */
 2160         /* append results for task t and update last_task */
 2161         task_process_in_reload(nsd, u, last_task, &t);
 2162 
 2163         /* go to next */
 2164         udb_ptr_set_ptr(&t, u, &next);
 2165 
 2166         /* if the parent has quit, we must quit too, poll the fd for cmds */
 2167         if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) {
 2168             DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd));
 2169             if(cmd == NSD_QUIT) {
 2170                 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd"));
 2171                 /* sync to disk (if needed) */
 2172                 udb_base_sync(nsd->db->udb, 0);
 2173                 /* unlink files of remainder of tasks */
 2174                 while(!udb_ptr_is_null(&t)) {
 2175                     if(TASKLIST(&t)->task_type == task_apply_xfr) {
 2176                         xfrd_unlink_xfrfile(nsd, TASKLIST(&t)->yesno);
 2177                     }
 2178                     udb_ptr_set_rptr(&t, u, &TASKLIST(&t)->next);
 2179                 }
 2180                 udb_ptr_unlink(&t, u);
 2181                 udb_ptr_unlink(&next, u);
 2182                 exit(0);
 2183             }
 2184         }
 2185 
 2186     }
 2187     udb_ptr_unlink(&t, u);
 2188     udb_ptr_unlink(&next, u);
 2189 }
 2190 
 2191 #ifdef BIND8_STATS
 2192 static void
 2193 parent_send_stats(struct nsd* nsd, int cmdfd)
 2194 {
 2195     size_t i;
 2196     if(!write_socket(cmdfd, &nsd->st, sizeof(nsd->st))) {
 2197         log_msg(LOG_ERR, "could not write stats to reload");
 2198         return;
 2199     }
 2200     for(i=0; i<nsd->child_count; i++)
 2201         if(!write_socket(cmdfd, &nsd->children[i].query_count,
 2202             sizeof(stc_type))) {
 2203             log_msg(LOG_ERR, "could not write stats to reload");
 2204             return;
 2205         }
 2206 }
 2207 
 2208 static void
 2209 reload_do_stats(int cmdfd, struct nsd* nsd, udb_ptr* last)
 2210 {
 2211     struct nsdst s;
 2212     stc_type* p;
 2213     size_t i;
 2214     if(block_read(nsd, cmdfd, &s, sizeof(s),
 2215         RELOAD_SYNC_TIMEOUT) != sizeof(s)) {
 2216         log_msg(LOG_ERR, "could not read stats from oldpar");
 2217         return;
 2218     }
 2219     s.db_disk = (nsd->db->udb?nsd->db->udb->base_size:0);
 2220     s.db_mem = region_get_mem(nsd->db->region);
 2221     p = (stc_type*)task_new_stat_info(nsd->task[nsd->mytask], last, &s,
 2222         nsd->child_count);
 2223     if(!p) return;
 2224     for(i=0; i<nsd->child_count; i++) {
 2225         if(block_read(nsd, cmdfd, p++, sizeof(stc_type), 1)!=
 2226             sizeof(stc_type))
 2227             return;
 2228     }
 2229 }
 2230 #endif /* BIND8_STATS */
 2231 
 2232 /*
 2233  * Reload the database, stop parent, re-fork children and continue.
 2234  * as server_main.
 2235  */
 2236 static void
 2237 server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio,
 2238     int cmdsocket)
 2239 {
 2240     pid_t mypid;
 2241     sig_atomic_t cmd = NSD_QUIT_SYNC;
 2242     int ret;
 2243     udb_ptr last_task;
 2244     struct sigaction old_sigchld, ign_sigchld;
 2245     /* ignore SIGCHLD from the previous server_main that used this pid */
 2246     memset(&ign_sigchld, 0, sizeof(ign_sigchld));
 2247     ign_sigchld.sa_handler = SIG_IGN;
 2248     sigaction(SIGCHLD, &ign_sigchld, &old_sigchld);
 2249 
 2250 #ifdef HAVE_SETPROCTITLE
 2251     setproctitle("main");
 2252 #endif
 2253 #ifdef HAVE_CPUSET_T
 2254     if(nsd->use_cpu_affinity) {
 2255         set_cpu_affinity(nsd->cpuset);
 2256     }
 2257 #endif
 2258 
 2259     /* see what tasks we got from xfrd */
 2260     task_remap(nsd->task[nsd->mytask]);
 2261     udb_ptr_init(&last_task, nsd->task[nsd->mytask]);
 2262     udb_compact_inhibited(nsd->db->udb, 1);
 2263     reload_process_tasks(nsd, &last_task, cmdsocket);
 2264     udb_compact_inhibited(nsd->db->udb, 0);
 2265     udb_compact(nsd->db->udb);
 2266 
 2267 #ifndef NDEBUG
 2268     if(nsd_debug_level >= 1)
 2269         region_log_stats(nsd->db->region);
 2270 #endif /* NDEBUG */
 2271     /* sync to disk (if needed) */
 2272     udb_base_sync(nsd->db->udb, 0);
 2273 
 2274     initialize_dname_compression_tables(nsd);
 2275 
 2276 #ifdef BIND8_STATS
 2277     /* Restart dumping stats if required.  */
 2278     time(&nsd->st.boot);
 2279     set_bind8_alarm(nsd);
 2280 #endif
 2281 #ifdef USE_ZONE_STATS
 2282     server_zonestat_realloc(nsd); /* realloc for new children */
 2283     server_zonestat_switch(nsd);
 2284 #endif
 2285 
 2286     /* listen for the signals of failed children again */
 2287     sigaction(SIGCHLD, &old_sigchld, NULL);
 2288     /* Start new child processes */
 2289     if (server_start_children(nsd, server_region, netio, &nsd->
 2290         xfrd_listener->fd) != 0) {
 2291         send_children_quit(nsd);
 2292         exit(1);
 2293     }
 2294 
 2295     /* if the parent has quit, we must quit too, poll the fd for cmds */
 2296     if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) {
 2297         DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd));
 2298         if(cmd == NSD_QUIT) {
 2299             DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd"));
 2300             send_children_quit(nsd);
 2301             exit(0);
 2302         }
 2303     }
 2304 
 2305     /* Send quit command to parent: blocking, wait for receipt. */
 2306     do {
 2307         DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main"));
 2308         if (!write_socket(cmdsocket, &cmd, sizeof(cmd)))
 2309         {
 2310             log_msg(LOG_ERR, "problems sending command from reload to oldnsd: %s",
 2311                 strerror(errno));
 2312         }
 2313         /* blocking: wait for parent to really quit. (it sends RELOAD as ack) */
 2314         DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc wait for ack main"));
 2315         ret = block_read(nsd, cmdsocket, &cmd, sizeof(cmd),
 2316             RELOAD_SYNC_TIMEOUT);
 2317         if(ret == -2) {
 2318             DEBUG(DEBUG_IPC, 1, (LOG_ERR, "reload timeout QUITSYNC. retry"));
 2319         }
 2320     } while (ret == -2);
 2321     if(ret == -1) {
 2322         log_msg(LOG_ERR, "reload: could not wait for parent to quit: %s",
 2323             strerror(errno));
 2324     }
 2325     DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc reply main %d %d", ret, (int)cmd));
 2326     if(cmd == NSD_QUIT) {
 2327         /* small race condition possible here, parent got quit cmd. */
 2328         send_children_quit(nsd);
 2329         exit(1);
 2330     }
 2331     assert(ret==-1 || ret == 0 || cmd == NSD_RELOAD);
 2332 #ifdef BIND8_STATS
 2333     reload_do_stats(cmdsocket, nsd, &last_task);
 2334 #endif
 2335     udb_ptr_unlink(&last_task, nsd->task[nsd->mytask]);
 2336     task_process_sync(nsd->task[nsd->mytask]);
 2337 #ifdef USE_ZONE_STATS
 2338     server_zonestat_realloc(nsd); /* realloc for next children */
 2339 #endif
 2340 
 2341     /* send soainfo to the xfrd process, signal it that reload is done,
 2342      * it picks up the taskudb */
 2343     cmd = NSD_RELOAD_DONE;
 2344     if(!write_socket(nsd->xfrd_listener->fd, &cmd,  sizeof(cmd))) {
 2345         log_msg(LOG_ERR, "problems sending reload_done xfrd: %s",
 2346             strerror(errno));
 2347     }
 2348     mypid = getpid();
 2349     if(!write_socket(nsd->xfrd_listener->fd, &mypid,  sizeof(mypid))) {
 2350         log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s",
 2351             strerror(errno));
 2352     }
 2353 
 2354     /* try to reopen file */
 2355     if (nsd->file_rotation_ok)
 2356         log_reopen(nsd->log_filename, 1);
 2357     /* exit reload, continue as new server_main */
 2358 }
 2359 
 2360 /*
 2361  * Get the mode depending on the signal hints that have been received.
 2362  * Multiple signal hints can be received and will be handled in turn.
 2363  */
 2364 static sig_atomic_t
 2365 server_signal_mode(struct nsd *nsd)
 2366 {
 2367     if(nsd->signal_hint_quit) {
 2368         nsd->signal_hint_quit = 0;
 2369         return NSD_QUIT;
 2370     }
 2371     else if(nsd->signal_hint_shutdown) {
 2372         nsd->signal_hint_shutdown = 0;
 2373         return NSD_SHUTDOWN;
 2374     }
 2375     else if(nsd->signal_hint_child) {
 2376         nsd->signal_hint_child = 0;
 2377         return NSD_REAP_CHILDREN;
 2378     }
 2379     else if(nsd->signal_hint_reload) {
 2380         nsd->signal_hint_reload = 0;
 2381         return NSD_RELOAD;
 2382     }
 2383     else if(nsd->signal_hint_reload_hup) {
 2384         nsd->signal_hint_reload_hup = 0;
 2385         return NSD_RELOAD_REQ;
 2386     }
 2387     else if(nsd->signal_hint_stats) {
 2388         nsd->signal_hint_stats = 0;
 2389 #ifdef BIND8_STATS
 2390         set_bind8_alarm(nsd);
 2391 #endif
 2392         return NSD_STATS;
 2393     }
 2394     else if(nsd->signal_hint_statsusr) {
 2395         nsd->signal_hint_statsusr = 0;
 2396         return NSD_STATS;
 2397     }
 2398     return NSD_RUN;
 2399 }
 2400 
 2401 /*
 2402  * The main server simply waits for signals and child processes to
 2403  * terminate.  Child processes are restarted as necessary.
 2404  */
 2405 void
 2406 server_main(struct nsd *nsd)
 2407 {
 2408     region_type *server_region = region_create(xalloc, free);
 2409     netio_type *netio = netio_create(server_region);
 2410     netio_handler_type reload_listener;
 2411     int reload_sockets[2] = {-1, -1};
 2412     struct timespec timeout_spec;
 2413     int status;
 2414     pid_t child_pid;
 2415     pid_t reload_pid = -1;
 2416     sig_atomic_t mode;
 2417 
 2418     /* Ensure we are the main process */
 2419     assert(nsd->server_kind == NSD_SERVER_MAIN);
 2420 
 2421     /* Add listener for the XFRD process */
 2422     netio_add_handler(netio, nsd->xfrd_listener);
 2423 
 2424     /* Start the child processes that handle incoming queries */
 2425     if (server_start_children(nsd, server_region, netio,
 2426         &nsd->xfrd_listener->fd) != 0) {
 2427         send_children_quit(nsd);
 2428         exit(1);
 2429     }
 2430     reload_listener.fd = -1;
 2431 
 2432     /* This_child MUST be 0, because this is the parent process */
 2433     assert(nsd->this_child == 0);
 2434 
 2435     /* Run the server until we get a shutdown signal */
 2436     while ((mode = nsd->mode) != NSD_SHUTDOWN) {
 2437         /* Did we receive a signal that changes our mode? */
 2438         if(mode == NSD_RUN) {
 2439             nsd->mode = mode = server_signal_mode(nsd);
 2440         }
 2441 
 2442         switch (mode) {
 2443         case NSD_RUN:
 2444             /* see if any child processes terminated */
 2445             while((child_pid = waitpid(-1, &status, WNOHANG)) != -1 && child_pid != 0) {
 2446                 int is_child = delete_child_pid(nsd, child_pid);
 2447                 if (is_child != -1 && nsd->children[is_child].need_to_exit) {
 2448                     if(nsd->children[is_child].child_fd == -1)
 2449                         nsd->children[is_child].has_exited = 1;
 2450                     parent_check_all_children_exited(nsd);
 2451                 } else if(is_child != -1) {
 2452                     log_msg(LOG_WARNING,
 2453                            "server %d died unexpectedly with status %d, restarting",
 2454                            (int) child_pid, status);
 2455                     restart_child_servers(nsd, server_region, netio,
 2456                         &nsd->xfrd_listener->fd);
 2457                 } else if (child_pid == reload_pid) {
 2458                     sig_atomic_t cmd = NSD_RELOAD_DONE;
 2459                     pid_t mypid;
 2460                     log_msg(LOG_WARNING,
 2461                            "Reload process %d failed with status %d, continuing with old database",
 2462                            (int) child_pid, status);
 2463                     reload_pid = -1;
 2464                     if(reload_listener.fd != -1) close(reload_listener.fd);
 2465                     reload_listener.fd = -1;
 2466                     reload_listener.event_types = NETIO_EVENT_NONE;
 2467                     task_process_sync(nsd->task[nsd->mytask]);
 2468                     /* inform xfrd reload attempt ended */
 2469                     if(!write_socket(nsd->xfrd_listener->fd,
 2470                         &cmd, sizeof(cmd))) {
 2471                         log_msg(LOG_ERR, "problems "
 2472                           "sending SOAEND to xfrd: %s",
 2473                           strerror(errno));
 2474                     }
 2475                     mypid = getpid();
 2476                     if(!write_socket(nsd->xfrd_listener->fd, &mypid,  sizeof(mypid))) {
 2477                         log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s",
 2478                             strerror(errno));
 2479                     }
 2480                 } else if(status != 0) {
 2481                     /* check for status, because we get
 2482                      * the old-servermain because reload
 2483                      * is the process-parent of old-main,
 2484                      * and we get older server-processes
 2485                      * that are exiting after a reload */
 2486                     log_msg(LOG_WARNING,
 2487                            "process %d terminated with status %d",
 2488                            (int) child_pid, status);
 2489                 }
 2490             }
 2491             if (child_pid == -1) {
 2492                 if (errno == EINTR) {
 2493                     continue;
 2494                 }
 2495                 if (errno != ECHILD)
 2496                     log_msg(LOG_WARNING, "wait failed: %s", strerror(errno));
 2497             }
 2498             if (nsd->mode != NSD_RUN)
 2499                 break;
 2500 
 2501             /* timeout to collect processes. In case no sigchild happens. */
 2502             timeout_spec.tv_sec = 60;
 2503             timeout_spec.tv_nsec = 0;
 2504 
 2505             /* listen on ports, timeout for collecting terminated children */
 2506             if(netio_dispatch(netio, &timeout_spec, 0) == -1) {
 2507                 if (errno != EINTR) {
 2508                     log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno));
 2509                 }
 2510             }
 2511             if(nsd->restart_children) {
 2512                 restart_child_servers(nsd, server_region, netio,
 2513                     &nsd->xfrd_listener->fd);
 2514                 nsd->restart_children = 0;
 2515             }
 2516             if(nsd->reload_failed) {
 2517                 sig_atomic_t cmd = NSD_RELOAD_DONE;
 2518                 pid_t mypid;
 2519                 nsd->reload_failed = 0;
 2520                 log_msg(LOG_WARNING,
 2521                        "Reload process %d failed, continuing with old database",
 2522                        (int) reload_pid);
 2523                 reload_pid = -1;
 2524                 if(reload_listener.fd != -1) close(reload_listener.fd);
 2525                 reload_listener.fd = -1;
 2526                 reload_listener.event_types = NETIO_EVENT_NONE;
 2527                 task_process_sync(nsd->task[nsd->mytask]);
 2528                 /* inform xfrd reload attempt ended */
 2529                 if(!write_socket(nsd->xfrd_listener->fd,
 2530                     &cmd, sizeof(cmd))) {
 2531                     log_msg(LOG_ERR, "problems "
 2532                       "sending SOAEND to xfrd: %s",
 2533                       strerror(errno));
 2534                 }
 2535                 mypid = getpid();
 2536                 if(!write_socket(nsd->xfrd_listener->fd, &mypid,  sizeof(mypid))) {
 2537                     log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s",
 2538                         strerror(errno));
 2539                 }
 2540             }
 2541 
 2542             break;
 2543         case NSD_RELOAD_REQ: {
 2544             sig_atomic_t cmd = NSD_RELOAD_REQ;
 2545             log_msg(LOG_WARNING, "SIGHUP received, reloading...");
 2546             DEBUG(DEBUG_IPC,1, (LOG_INFO,
 2547                 "main: ipc send reload_req to xfrd"));
 2548             if(!write_socket(nsd->xfrd_listener->fd,
 2549                 &cmd, sizeof(cmd))) {
 2550                 log_msg(LOG_ERR, "server_main: could not send "
 2551                 "reload_req to xfrd: %s", strerror(errno));
 2552             }
 2553             nsd->mode = NSD_RUN;
 2554             } break;
 2555         case NSD_RELOAD:
 2556             /* Continue to run nsd after reload */
 2557             nsd->mode = NSD_RUN;
 2558             DEBUG(DEBUG_IPC,1, (LOG_INFO, "reloading..."));
 2559             if (reload_pid != -1) {
 2560                 log_msg(LOG_WARNING, "Reload already in progress (pid = %d)",
 2561                        (int) reload_pid);
 2562                 break;
 2563             }
 2564 
 2565             /* switch the mytask to keep track of who owns task*/
 2566             nsd->mytask = 1 - nsd->mytask;
 2567             if (socketpair(AF_UNIX, SOCK_STREAM, 0, reload_sockets) == -1) {
 2568                 log_msg(LOG_ERR, "reload failed on socketpair: %s", strerror(errno));
 2569                 reload_pid = -1;
 2570                 break;
 2571             }
 2572 
 2573             /* Do actual reload */
 2574             reload_pid = fork();
 2575             switch (reload_pid) {
 2576             case -1:
 2577                 log_msg(LOG_ERR, "fork failed: %s", strerror(errno));
 2578                 break;
 2579             default:
 2580                 /* PARENT */
 2581                 close(reload_sockets[0]);
 2582                 server_reload(nsd, server_region, netio,
 2583                     reload_sockets[1]);
 2584                 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload exited to become new main"));
 2585                 close(reload_sockets[1]);
 2586                 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload closed"));
 2587                 /* drop stale xfrd ipc data */
 2588                 ((struct ipc_handler_conn_data*)nsd->
 2589                     xfrd_listener->user_data)
 2590                     ->conn->is_reading = 0;
 2591                 reload_pid = -1;
 2592                 reload_listener.fd = -1;
 2593                 reload_listener.event_types = NETIO_EVENT_NONE;
 2594                 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload resetup; run"));
 2595                 break;
 2596             case 0:
 2597                 /* CHILD */
 2598                 /* server_main keep running until NSD_QUIT_SYNC
 2599                  * received from reload. */
 2600                 close(reload_sockets[1]);
 2601                 reload_listener.fd = reload_sockets[0];
 2602                 reload_listener.timeout = NULL;
 2603                 reload_listener.user_data = nsd;
 2604                 reload_listener.event_types = NETIO_EVENT_READ;
 2605                 reload_listener.event_handler = parent_handle_reload_command; /* listens to Quit */
 2606                 netio_add_handler(netio, &reload_listener);
 2607                 reload_pid = getppid();
 2608                 break;
 2609             }
 2610             break;
 2611         case NSD_QUIT_SYNC:
 2612             /* synchronisation of xfrd, parent and reload */
 2613             if(!nsd->quit_sync_done && reload_listener.fd != -1) {
 2614                 sig_atomic_t cmd = NSD_RELOAD;
 2615                 /* stop xfrd ipc writes in progress */
 2616                 DEBUG(DEBUG_IPC,1, (LOG_INFO,
 2617                     "main: ipc send indication reload"));
 2618                 if(!write_socket(nsd->xfrd_listener->fd,
 2619                     &cmd, sizeof(cmd))) {
 2620                     log_msg(LOG_ERR, "server_main: could not send reload "
 2621                     "indication to xfrd: %s", strerror(errno));
 2622                 }
 2623                 /* wait for ACK from xfrd */
 2624                 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: wait ipc reply xfrd"));
 2625                 nsd->quit_sync_done = 1;
 2626             }
 2627             nsd->mode = NSD_RUN;
 2628             break;
 2629         case NSD_QUIT:
 2630             /* silent shutdown during reload */
 2631             if(reload_listener.fd != -1) {
 2632                 /* acknowledge the quit, to sync reload that we will really quit now */
 2633                 sig_atomic_t cmd = NSD_RELOAD;
 2634                 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: ipc ack reload"));
 2635                 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) {
 2636                     log_msg(LOG_ERR, "server_main: "
 2637                         "could not ack quit: %s", strerror(errno));
 2638                 }
 2639 #ifdef BIND8_STATS
 2640                 parent_send_stats(nsd, reload_listener.fd);
 2641 #endif /* BIND8_STATS */
 2642                 close(reload_listener.fd);
 2643             }
 2644             DEBUG(DEBUG_IPC,1, (LOG_INFO, "server_main: shutdown sequence"));
 2645             /* only quit children after xfrd has acked */
 2646             send_children_quit(nsd);
 2647 
 2648 #ifdef MEMCLEAN /* OS collects memory pages */
 2649             region_destroy(server_region);
 2650 #endif
 2651             server_shutdown(nsd);
 2652 
 2653             /* ENOTREACH */
 2654             break;
 2655         case NSD_SHUTDOWN:
 2656             break;
 2657         case NSD_REAP_CHILDREN:
 2658             /* continue; wait for child in run loop */
 2659             nsd->mode = NSD_RUN;
 2660             break;
 2661         case NSD_STATS:
 2662 #ifdef BIND8_STATS
 2663             set_children_stats(nsd);
 2664 #endif
 2665             nsd->mode = NSD_RUN;
 2666             break;
 2667         default:
 2668             log_msg(LOG_WARNING, "NSD main server mode invalid: %d", (int)nsd->mode);
 2669             nsd->mode = NSD_RUN;
 2670             break;
 2671         }
 2672     }
 2673     log_msg(LOG_WARNING, "signal received, shutting down...");
 2674 
 2675     /* close opened ports to avoid race with restart of nsd */
 2676     server_close_all_sockets(nsd->udp, nsd->ifs);
 2677     server_close_all_sockets(nsd->tcp, nsd->ifs);
 2678 #ifdef HAVE_SSL
 2679     daemon_remote_close(nsd->rc);
 2680 #endif
 2681     send_children_quit_and_wait(nsd);
 2682 
 2683     /* Unlink it if possible... */
 2684     unlinkpid(nsd->pidfile);
 2685     unlink(nsd->task[0]->fname);
 2686     unlink(nsd->task[1]->fname);
 2687 #ifdef USE_ZONE_STATS
 2688     unlink(nsd->zonestatfname[0]);
 2689     unlink(nsd->zonestatfname[1]);
 2690 #endif
 2691 #ifdef USE_DNSTAP
 2692     dt_collector_close(nsd->dt_collector, nsd);
 2693 #endif
 2694 
 2695     if(reload_listener.fd != -1) {
 2696         sig_atomic_t cmd = NSD_QUIT;
 2697         DEBUG(DEBUG_IPC,1, (LOG_INFO,
 2698             "main: ipc send quit to reload-process"));
 2699         if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) {
 2700             log_msg(LOG_ERR, "server_main: could not send quit to reload: %s",
 2701                 strerror(errno));
 2702         }
 2703         fsync(reload_listener.fd);
 2704         close(reload_listener.fd);
 2705         /* wait for reload to finish processing */
 2706         while(1) {
 2707             if(waitpid(reload_pid, NULL, 0) == -1) {
 2708                 if(errno == EINTR) continue;
 2709                 if(errno == ECHILD) break;
 2710                 log_msg(LOG_ERR, "waitpid(reload %d): %s",
 2711                     (int)reload_pid, strerror(errno));
 2712             }
 2713             break;
 2714         }
 2715     }
 2716     if(nsd->xfrd_listener->fd != -1) {
 2717         /* complete quit, stop xfrd */
 2718         sig_atomic_t cmd = NSD_QUIT;
 2719         DEBUG(DEBUG_IPC,1, (LOG_INFO,
 2720             "main: ipc send quit to xfrd"));
 2721         if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) {
 2722             log_msg(LOG_ERR, "server_main: could not send quit to xfrd: %s",
 2723                 strerror(errno));
 2724         }
 2725         fsync(nsd->xfrd_listener->fd);
 2726         close(nsd->xfrd_listener->fd);
 2727         (void)kill(nsd->pid, SIGTERM);
 2728     }
 2729 
 2730 #ifdef MEMCLEAN /* OS collects memory pages */
 2731     region_destroy(server_region);
 2732 #endif
 2733     /* write the nsd.db to disk, wait for it to complete */
 2734     udb_base_sync(nsd->db->udb, 1);
 2735     udb_base_close(nsd->db->udb);
 2736     server_shutdown(nsd);
 2737 }
 2738 
 2739 static query_state_type
 2740 server_process_query(struct nsd *nsd, struct query *query)
 2741 {
 2742     return query_process(query, nsd);
 2743 }
 2744 
 2745 static query_state_type
 2746 server_process_query_udp(struct nsd *nsd, struct query *query)
 2747 {
 2748 #ifdef RATELIMIT
 2749     if(query_process(query, nsd) != QUERY_DISCARDED) {
 2750         if(rrl_process_query(query))
 2751             return rrl_slip(query);
 2752         else    return QUERY_PROCESSED;
 2753     }
 2754     return QUERY_DISCARDED;
 2755 #else
 2756     return query_process(query, nsd);
 2757 #endif
 2758 }
 2759 
 2760 const char*
 2761 nsd_event_vs(void)
 2762 {
 2763 #ifdef USE_MINI_EVENT
 2764     return "";
 2765 #else
 2766     return event_get_version();
 2767 #endif
 2768 }
 2769 
 2770 #if !defined(USE_MINI_EVENT) && defined(EV_FEATURE_BACKENDS)
 2771 static const char* ub_ev_backend2str(int b)
 2772 {
 2773     switch(b) {
 2774     case EVBACKEND_SELECT:  return "select";
 2775     case EVBACKEND_POLL:    return "poll";
 2776     case EVBACKEND_EPOLL:   return "epoll";
 2777     case EVBACKEND_KQUEUE:  return "kqueue";
 2778     case EVBACKEND_DEVPOLL: return "devpoll";
 2779     case EVBACKEND_PORT:    return "evport";
 2780     }
 2781     return "unknown";
 2782 }
 2783 #endif
 2784 
 2785 const char*
 2786 nsd_event_method(void)
 2787 {
 2788 #ifdef USE_MINI_EVENT
 2789     return "select";
 2790 #else
 2791     struct event_base* b = nsd_child_event_base();
 2792     const char* m = "?";
 2793 #  ifdef EV_FEATURE_BACKENDS
 2794     m = ub_ev_backend2str(ev_backend((struct ev_loop*)b));
 2795 #  elif defined(HAVE_EVENT_BASE_GET_METHOD)
 2796     m = event_base_get_method(b);
 2797 #  endif
 2798 #  ifdef MEMCLEAN
 2799     event_base_free(b);
 2800 #  endif
 2801     return m;
 2802 #endif
 2803 }
 2804 
 2805 struct event_base*
 2806 nsd_child_event_base(void)
 2807 {
 2808     struct event_base* base;
 2809 #ifdef USE_MINI_EVENT
 2810     static time_t secs;
 2811     static struct timeval now;
 2812     base = event_init(&secs, &now);
 2813 #else
 2814 #  if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
 2815     /* libev */
 2816     base = (struct event_base *)ev_default_loop(EVFLAG_AUTO);
 2817 #  else
 2818     /* libevent */
 2819 #    ifdef HAVE_EVENT_BASE_NEW
 2820     base = event_base_new();
 2821 #    else
 2822     base = event_init();
 2823 #    endif
 2824 #  endif
 2825 #endif
 2826     return base;
 2827 }
 2828 
 2829 static void
 2830 add_udp_handler(
 2831     struct nsd *nsd,
 2832     struct nsd_socket *sock,
 2833     struct udp_handler_data *data)
 2834 {
 2835     struct event *handler = &data->event;
 2836 
 2837     data->nsd = nsd;
 2838     data->socket = sock;
 2839 
 2840     memset(handler, 0, sizeof(*handler));
 2841     event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_udp, data);
 2842     if(event_base_set(nsd->event_base, handler) != 0)
 2843         log_msg(LOG_ERR, "nsd udp: event_base_set failed");
 2844     if(event_add(handler, NULL) != 0)
 2845         log_msg(LOG_ERR, "nsd udp: event_add failed");
 2846 }
 2847 
 2848 void
 2849 add_tcp_handler(
 2850     struct nsd *nsd,
 2851     struct nsd_socket *sock,
 2852     struct tcp_accept_handler_data *data)
 2853 {
 2854     struct event *handler = &data->event;
 2855 
 2856     data->nsd = nsd;
 2857     data->socket = sock;
 2858 
 2859 #ifdef HAVE_SSL
 2860     if (nsd->tls_ctx &&
 2861         nsd->options->tls_port &&
 2862         using_tls_port((struct sockaddr *)&sock->addr.ai_addr, nsd->options->tls_port))
 2863     {
 2864         data->tls_accept = 1;
 2865         if(verbosity >= 2) {
 2866             char buf[48];
 2867             addrport2str((struct sockaddr_storage*)&sock->addr.ai_addr, buf, sizeof(buf));
 2868             VERBOSITY(2, (LOG_NOTICE, "setup TCP for TLS service on interface %s", buf));
 2869         }
 2870     } else {
 2871         data->tls_accept = 0;
 2872     }
 2873 #endif
 2874 
 2875     memset(handler, 0, sizeof(*handler));
 2876     event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_tcp_accept, data);
 2877     if(event_base_set(nsd->event_base, handler) != 0)
 2878         log_msg(LOG_ERR, "nsd tcp: event_base_set failed");
 2879     if(event_add(handler, NULL) != 0)
 2880         log_msg(LOG_ERR, "nsd tcp: event_add failed");
 2881     data->event_added = 1;
 2882 }
 2883 
 2884 /*
 2885  * Serve DNS requests.
 2886  */
 2887 void
 2888 server_child(struct nsd *nsd)
 2889 {
 2890     size_t i, from, numifs;
 2891     region_type *server_region = region_create(xalloc, free);
 2892     struct event_base* event_base = nsd_child_event_base();
 2893     sig_atomic_t mode;
 2894 
 2895     if(!event_base) {
 2896         log_msg(LOG_ERR, "nsd server could not create event base");
 2897         exit(1);
 2898     }
 2899     nsd->event_base = event_base;
 2900     nsd->server_region = server_region;
 2901 
 2902 #ifdef RATELIMIT
 2903     rrl_init(nsd->this_child->child_num);
 2904 #endif
 2905 
 2906     assert(nsd->server_kind != NSD_SERVER_MAIN);
 2907     DEBUG(DEBUG_IPC, 2, (LOG_INFO, "child process started"));
 2908 
 2909 #ifdef HAVE_SETPROCTITLE
 2910     setproctitle("server %d", nsd->this_child->child_num + 1);
 2911 #endif
 2912 #ifdef HAVE_CPUSET_T
 2913     if(nsd->use_cpu_affinity) {
 2914         set_cpu_affinity(nsd->this_child->cpuset);
 2915     }
 2916 #endif
 2917 
 2918     if (!(nsd->server_kind & NSD_SERVER_TCP)) {
 2919         server_close_all_sockets(nsd->tcp, nsd->ifs);
 2920     }
 2921     if (!(nsd->server_kind & NSD_SERVER_UDP)) {
 2922         server_close_all_sockets(nsd->udp, nsd->ifs);
 2923     }
 2924 
 2925     if (nsd->this_child->parent_fd != -1) {
 2926         struct event *handler;
 2927         struct ipc_handler_conn_data* user_data =
 2928             (struct ipc_handler_conn_data*)region_alloc(
 2929             server_region, sizeof(struct ipc_handler_conn_data));
 2930         user_data->nsd = nsd;
 2931         user_data->conn = xfrd_tcp_create(server_region, QIOBUFSZ);
 2932 
 2933         handler = (struct event*) region_alloc(
 2934             server_region, sizeof(*handler));
 2935         memset(handler, 0, sizeof(*handler));
 2936         event_set(handler, nsd->this_child->parent_fd, EV_PERSIST|
 2937             EV_READ, child_handle_parent_command, user_data);
 2938         if(event_base_set(event_base, handler) != 0)
 2939             log_msg(LOG_ERR, "nsd ipcchild: event_base_set failed");
 2940         if(event_add(handler, NULL) != 0)
 2941             log_msg(LOG_ERR, "nsd ipcchild: event_add failed");
 2942     }
 2943 
 2944     if(nsd->reuseport) {
 2945         numifs = nsd->ifs / nsd->reuseport;
 2946         from = numifs * nsd->this_child->child_num;
 2947         if(from+numifs > nsd->ifs) { /* should not happen */
 2948             from = 0;
 2949             numifs = nsd->ifs;
 2950         }
 2951     } else {
 2952         from = 0;
 2953         numifs = nsd->ifs;
 2954     }
 2955 
 2956     if (nsd->server_kind & NSD_SERVER_UDP) {
 2957         int child = nsd->this_child->child_num;
 2958         memset(msgs, 0, sizeof(msgs));
 2959         for (i = 0; i < NUM_RECV_PER_SELECT; i++) {
 2960             queries[i] = query_create(server_region,
 2961                 compressed_dname_offsets,
 2962                 compression_table_size, compressed_dnames);
 2963             query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
 2964             iovecs[i].iov_base          = buffer_begin(queries[i]->packet);
 2965             iovecs[i].iov_len           = buffer_remaining(queries[i]->packet);
 2966             msgs[i].msg_hdr.msg_iov     = &iovecs[i];
 2967             msgs[i].msg_hdr.msg_iovlen  = 1;
 2968             msgs[i].msg_hdr.msg_name    = &queries[i]->addr;
 2969             msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen;
 2970         }
 2971 
 2972         for (i = 0; i < nsd->ifs; i++) {
 2973             int listen;
 2974             struct udp_handler_data *data;
 2975 
 2976             listen = nsd_bitset_isset(nsd->udp[i].servers, child);
 2977 
 2978             if(i >= from && i < (from + numifs) && listen) {
 2979                 data = region_alloc_zero(
 2980                     nsd->server_region, sizeof(*data));
 2981                 add_udp_handler(nsd, &nsd->udp[i], data);
 2982             } else {
 2983                 /* close sockets intended for other servers */
 2984                 server_close_socket(&nsd->udp[i]);
 2985             }
 2986         }
 2987     }
 2988 
 2989     /*
 2990      * Keep track of all the TCP accept handlers so we can enable
 2991      * and disable them based on the current number of active TCP
 2992      * connections.
 2993      */
 2994     if (nsd->server_kind & NSD_SERVER_TCP) {
 2995         int child = nsd->this_child->child_num;
 2996         tcp_accept_handler_count = numifs;
 2997         tcp_accept_handlers = region_alloc_array(server_region,
 2998             numifs, sizeof(*tcp_accept_handlers));
 2999 
 3000         for (i = 0; i < nsd->ifs; i++) {
 3001             int listen;
 3002             struct tcp_accept_handler_data *data;
 3003 
 3004             listen = nsd_bitset_isset(nsd->tcp[i].servers, child);
 3005 
 3006             if(i >= from && i < (from + numifs) && listen) {
 3007                 data = &tcp_accept_handlers[i-from];
 3008                 memset(data, 0, sizeof(*data));
 3009                 add_tcp_handler(nsd, &nsd->tcp[i], data);
 3010             } else {
 3011                 /* close sockets intended for other servers */
 3012                 /*
 3013                  * uncomment this once tcp servers are no
 3014                  * longer copied in the tcp fd copy line
 3015                  * in server_init().
 3016                 server_close_socket(&nsd->tcp[i]);
 3017                 */
 3018                 /* close sockets not meant for this server*/
 3019                 if(!listen)
 3020                     server_close_socket(&nsd->tcp[i]);
 3021             }
 3022         }
 3023     } else {
 3024         tcp_accept_handler_count = 0;
 3025     }
 3026 
 3027     /* The main loop... */
 3028     while ((mode = nsd->mode) != NSD_QUIT) {
 3029         if(mode == NSD_RUN) nsd->mode = mode = server_signal_mode(nsd);
 3030 
 3031         /* Do we need to do the statistics... */
 3032         if (mode == NSD_STATS) {
 3033 #ifdef BIND8_STATS
 3034             int p = nsd->st.period;
 3035             nsd->st.period = 1; /* force stats printout */
 3036             /* Dump the statistics */
 3037             bind8_stats(nsd);
 3038             nsd->st.period = p;
 3039 #else /* !BIND8_STATS */
 3040             log_msg(LOG_NOTICE, "Statistics support not enabled at compile time.");
 3041 #endif /* BIND8_STATS */
 3042 
 3043             nsd->mode = NSD_RUN;
 3044         }
 3045         else if (mode == NSD_REAP_CHILDREN) {
 3046             /* got signal, notify parent. parent reaps terminated children. */
 3047             if (nsd->this_child->parent_fd != -1) {
 3048                 sig_atomic_t parent_notify = NSD_REAP_CHILDREN;
 3049                 if (write(nsd->this_child->parent_fd,
 3050                     &parent_notify,
 3051                     sizeof(parent_notify)) == -1)
 3052                 {
 3053                     log_msg(LOG_ERR, "problems sending command from %d to parent: %s",
 3054                         (int) nsd->this_child->pid, strerror(errno));
 3055                 }
 3056             } else /* no parent, so reap 'em */
 3057                 while (waitpid(-1, NULL, WNOHANG) > 0) ;
 3058             nsd->mode = NSD_RUN;
 3059         }
 3060         else if(mode == NSD_RUN) {
 3061             /* Wait for a query... */
 3062             if(event_base_loop(event_base, EVLOOP_ONCE) == -1) {
 3063                 if (errno != EINTR) {
 3064                     log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno));
 3065                     break;
 3066                 }
 3067             }
 3068         } else if(mode == NSD_QUIT) {
 3069             /* ignore here, quit */
 3070         } else {
 3071             log_msg(LOG_ERR, "mode bad value %d, back to service.",
 3072                 (int)mode);
 3073             nsd->mode = NSD_RUN;
 3074         }
 3075     }
 3076 
 3077     service_remaining_tcp(nsd);
 3078 #ifdef  BIND8_STATS
 3079     bind8_stats(nsd);
 3080 #endif /* BIND8_STATS */
 3081 
 3082 #ifdef MEMCLEAN /* OS collects memory pages */
 3083 #ifdef RATELIMIT
 3084     rrl_deinit(nsd->this_child->child_num);
 3085 #endif
 3086     event_base_free(event_base);
 3087     region_destroy(server_region);
 3088 #endif
 3089     server_shutdown(nsd);
 3090 }
 3091 
 3092 static void remaining_tcp_timeout(int ATTR_UNUSED(fd), short event, void* arg)
 3093 {
 3094     int* timed_out = (int*)arg;
 3095         assert(event & EV_TIMEOUT); (void)event;
 3096     /* wake up the service tcp thread, note event is no longer
 3097      * registered */
 3098     *timed_out = 1;
 3099 }
 3100 
 3101 void
 3102 service_remaining_tcp(struct nsd* nsd)
 3103 {
 3104     struct tcp_handler_data* p;
 3105     struct event_base* event_base;
 3106     /* check if it is needed */
 3107     if(nsd->current_tcp_count == 0 || tcp_active_list == NULL)
 3108         return;
 3109     VERBOSITY(4, (LOG_INFO, "service remaining TCP connections"));
 3110 
 3111     /* setup event base */
 3112     event_base = nsd_child_event_base();
 3113     if(!event_base) {
 3114         log_msg(LOG_ERR, "nsd remain tcp could not create event base");
 3115         return;
 3116     }
 3117     /* register tcp connections */
 3118     for(p = tcp_active_list; p != NULL; p = p->next) {
 3119         struct timeval timeout;
 3120         int fd = p->event.ev_fd;
 3121 #ifdef USE_MINI_EVENT
 3122         short event = p->event.ev_flags & (EV_READ|EV_WRITE);
 3123 #else
 3124         short event = p->event.ev_events & (EV_READ|EV_WRITE);
 3125 #endif
 3126         void (*fn)(int, short, void*);
 3127 #ifdef HAVE_SSL
 3128         if(p->tls) {
 3129             if((event&EV_READ))
 3130                 fn = handle_tls_reading;
 3131             else    fn = handle_tls_writing;
 3132         } else {
 3133 #endif
 3134             if((event&EV_READ))
 3135                 fn = handle_tcp_reading;
 3136             else    fn = handle_tcp_writing;
 3137 #ifdef HAVE_SSL
 3138         }
 3139 #endif
 3140 
 3141         p->tcp_no_more_queries = 1;
 3142         /* set timeout to 1/10 second */
 3143         if(p->tcp_timeout > 100)
 3144             p->tcp_timeout = 100;
 3145         timeout.tv_sec = p->tcp_timeout / 1000;
 3146         timeout.tv_usec = (p->tcp_timeout % 1000)*1000;
 3147         event_del(&p->event);
 3148         memset(&p->event, 0, sizeof(p->event));
 3149         event_set(&p->event, fd, EV_PERSIST | event | EV_TIMEOUT,
 3150             fn, p);
 3151         if(event_base_set(event_base, &p->event) != 0)
 3152             log_msg(LOG_ERR, "event base set failed");
 3153         if(event_add(&p->event, &timeout) != 0)
 3154             log_msg(LOG_ERR, "event add failed");
 3155     }
 3156 
 3157     /* handle it */
 3158     while(nsd->current_tcp_count > 0) {
 3159         mode_t m = server_signal_mode(nsd);
 3160         struct event timeout;
 3161         struct timeval tv;
 3162         int timed_out = 0;
 3163         if(m == NSD_QUIT || m == NSD_SHUTDOWN ||
 3164             m == NSD_REAP_CHILDREN) {
 3165             /* quit */
 3166             break;
 3167         }
 3168         /* timer */
 3169         /* have to do something every second */
 3170         tv.tv_sec = 1;
 3171         tv.tv_usec = 0;
 3172         memset(&timeout, 0, sizeof(timeout));
 3173         event_set(&timeout, -1, EV_TIMEOUT, remaining_tcp_timeout,
 3174             &timed_out);
 3175         if(event_base_set(event_base, &timeout) != 0)
 3176             log_msg(LOG_ERR, "remaintcp timer: event_base_set failed");
 3177         if(event_add(&timeout, &tv) != 0)
 3178             log_msg(LOG_ERR, "remaintcp timer: event_add failed");
 3179 
 3180         /* service loop */
 3181         if(event_base_loop(event_base, EVLOOP_ONCE) == -1) {
 3182             if (errno != EINTR) {
 3183                 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno));
 3184                 break;
 3185             }
 3186         }
 3187         if(!timed_out) {
 3188             event_del(&timeout);
 3189         } else {
 3190             /* timed out, quit */
 3191             VERBOSITY(4, (LOG_INFO, "service remaining TCP connections: timed out, quit"));
 3192             break;
 3193         }
 3194     }
 3195 #ifdef MEMCLEAN
 3196     event_base_free(event_base);
 3197 #endif
 3198     /* continue to quit after return */
 3199 }
 3200 
 3201 /* Implement recvmmsg and sendmmsg if the platform does not. These functions
 3202  * are always used, even if nonblocking operations are broken, in which case
 3203  * NUM_RECV_PER_SELECT is defined to 1 (one).
 3204  */
 3205 #if defined(HAVE_RECVMMSG)
 3206 #define nsd_recvmmsg recvmmsg
 3207 #else /* !HAVE_RECVMMSG */
 3208 
 3209 static int
 3210 nsd_recvmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen,
 3211              int flags, struct timespec *timeout)
 3212 {
 3213     unsigned int vpos = 0;
 3214     ssize_t rcvd;
 3215 
 3216     /* timeout is ignored, ensure caller does not expect it to work */
 3217     assert(timeout == NULL); (void)timeout;
 3218 
 3219     while(vpos < vlen) {
 3220         rcvd = recvfrom(sockfd,
 3221                         msgvec[vpos].msg_hdr.msg_iov->iov_base,
 3222                         msgvec[vpos].msg_hdr.msg_iov->iov_len,
 3223                         flags,
 3224                         msgvec[vpos].msg_hdr.msg_name,
 3225                        &msgvec[vpos].msg_hdr.msg_namelen);
 3226         if(rcvd < 0) {
 3227             break;
 3228         } else {
 3229             assert((unsigned long long)rcvd <= (unsigned long long)UINT_MAX);
 3230             msgvec[vpos].msg_len = (unsigned int)rcvd;
 3231             vpos++;
 3232         }
 3233     }
 3234 
 3235     if(vpos) {
 3236         /* error will be picked up next time */
 3237         return (int)vpos;
 3238     } else if(errno == 0) {
 3239         return 0;
 3240     } else if(errno == EAGAIN) {
 3241         return 0;
 3242     }
 3243 
 3244     return -1;
 3245 }
 3246 #endif /* HAVE_RECVMMSG */
 3247 
 3248 #ifdef HAVE_SENDMMSG
 3249 #define nsd_sendmmsg(...) sendmmsg(__VA_ARGS__)
 3250 #else /* !HAVE_SENDMMSG */
 3251 
 3252 static int
 3253 nsd_sendmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags)
 3254 {
 3255     unsigned int vpos = 0;
 3256     ssize_t snd;
 3257 
 3258     while(vpos < vlen) {
 3259         assert(msgvec[vpos].msg_hdr.msg_iovlen == 1);
 3260         snd = sendto(sockfd,
 3261                      msgvec[vpos].msg_hdr.msg_iov->iov_base,
 3262                      msgvec[vpos].msg_hdr.msg_iov->iov_len,
 3263                      flags,
 3264                      msgvec[vpos].msg_hdr.msg_name,
 3265                      msgvec[vpos].msg_hdr.msg_namelen);
 3266         if(snd < 0) {
 3267             break;
 3268         } else {
 3269             msgvec[vpos].msg_len = (unsigned int)snd;
 3270             vpos++;
 3271         }
 3272     }
 3273 
 3274     if(vpos) {
 3275         return (int)vpos;
 3276     } else if(errno == 0) {
 3277         return 0;
 3278     }
 3279 
 3280     return -1;
 3281 }
 3282 #endif /* HAVE_SENDMMSG */
 3283 
 3284 static void
 3285 handle_udp(int fd, short event, void* arg)
 3286 {
 3287     struct udp_handler_data *data = (struct udp_handler_data *) arg;
 3288     int received, sent, recvcount, i;
 3289     struct query *q;
 3290 
 3291     if (!(event & EV_READ)) {
 3292         return;
 3293     }
 3294     recvcount = nsd_recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL);
 3295     /* this printf strangely gave a performance increase on Linux */
 3296     /* printf("recvcount %d \n", recvcount); */
 3297     if (recvcount == -1) {
 3298         if (errno != EAGAIN && errno != EINTR) {
 3299             log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno));
 3300             STATUP(data->nsd, rxerr);
 3301             /* No zone statup */
 3302         }
 3303         /* Simply no data available */
 3304         return;
 3305     }
 3306     for (i = 0; i < recvcount; i++) {
 3307     loopstart:
 3308         received = msgs[i].msg_len;
 3309         queries[i]->addrlen = msgs[i].msg_hdr.msg_namelen;
 3310         q = queries[i];
 3311         if (received == -1) {
 3312             log_msg(LOG_ERR, "recvmmsg %d failed %s", i, strerror(
 3313 #if defined(HAVE_RECVMMSG)
 3314                 msgs[i].msg_hdr.msg_flags
 3315 #else
 3316                 errno
 3317 #endif
 3318                 ));
 3319             STATUP(data->nsd, rxerr);
 3320             /* No zone statup */
 3321             query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
 3322             iovecs[i].iov_len = buffer_remaining(q->packet);
 3323             msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen;
 3324             goto swap_drop;
 3325         }
 3326 
 3327         /* Account... */
 3328 #ifdef BIND8_STATS
 3329         if (data->socket->addr.ai_family == AF_INET) {
 3330             STATUP(data->nsd, qudp);
 3331         } else if (data->socket->addr.ai_family == AF_INET6) {
 3332             STATUP(data->nsd, qudp6);
 3333         }
 3334 #endif
 3335 
 3336         buffer_skip(q->packet, received);
 3337         buffer_flip(q->packet);
 3338 #ifdef USE_DNSTAP
 3339         /*
 3340          * sending UDP-query with server address (local) and client address to dnstap process
 3341          */
 3342         log_addr("query from client", &q->addr, data->socket->addr.ai_family);
 3343         log_addr("to server (local)", &data->socket->addr.ai_addr, data->socket->addr.ai_family);
 3344         dt_collector_submit_auth_query(data->nsd, &data->socket->addr.ai_addr, &q->addr, q->addrlen,
 3345             q->tcp, q->packet);
 3346 #endif /* USE_DNSTAP */
 3347 
 3348         /* Process and answer the query... */
 3349         if (server_process_query_udp(data->nsd, q) != QUERY_DISCARDED) {
 3350             if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) {
 3351                 STATUP(data->nsd, nona);
 3352                 ZTATUP(data->nsd, q->zone, nona);
 3353             }
 3354 
 3355 #ifdef USE_ZONE_STATS
 3356             if (data->socket->addr.ai_family == AF_INET) {
 3357                 ZTATUP(data->nsd, q->zone, qudp);
 3358             } else if (data->socket->addr.ai_family == AF_INET6) {
 3359                 ZTATUP(data->nsd, q->zone, qudp6);
 3360             }
 3361 #endif
 3362 
 3363             /* Add EDNS0 and TSIG info if necessary.  */
 3364             query_add_optional(q, data->nsd);
 3365 
 3366             buffer_flip(q->packet);
 3367             iovecs[i].iov_len = buffer_remaining(q->packet);
 3368 #ifdef BIND8_STATS
 3369             /* Account the rcode & TC... */
 3370             STATUP2(data->nsd, rcode, RCODE(q->packet));
 3371             ZTATUP2(data->nsd, q->zone, rcode, RCODE(q->packet));
 3372             if (TC(q->packet)) {
 3373                 STATUP(data->nsd, truncated);
 3374                 ZTATUP(data->nsd, q->zone, truncated);
 3375             }
 3376 #endif /* BIND8_STATS */
 3377 #ifdef USE_DNSTAP
 3378             /*
 3379              * sending UDP-response with server address (local) and client address to dnstap process
 3380              */
 3381             log_addr("from server (local)", &data->socket->addr.ai_addr, data->socket->addr.ai_family);
 3382             log_addr("response to client", &q->addr, data->socket->addr.ai_family);
 3383             dt_collector_submit_auth_response(data->nsd, &data->socket->addr.ai_addr,
 3384                 &q->addr, q->addrlen, q->tcp, q->packet,
 3385                 q->zone);
 3386 #endif /* USE_DNSTAP */
 3387         } else {
 3388             query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
 3389             iovecs[i].iov_len = buffer_remaining(q->packet);
 3390             msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen;
 3391         swap_drop:
 3392             STATUP(data->nsd, dropped);
 3393             ZTATUP(data->nsd, q->zone, dropped);
 3394             if(i != recvcount-1) {
 3395                 /* swap with last and decrease recvcount */
 3396                 struct mmsghdr mtmp = msgs[i];
 3397                 struct iovec iotmp = iovecs[i];
 3398                 recvcount--;
 3399                 msgs[i] = msgs[recvcount];
 3400                 iovecs[i] = iovecs[recvcount];
 3401                 queries[i] = queries[recvcount];
 3402                 msgs[recvcount] = mtmp;
 3403                 iovecs[recvcount] = iotmp;
 3404                 queries[recvcount] = q;
 3405                 msgs[i].msg_hdr.msg_iov = &iovecs[i];
 3406                 msgs[recvcount].msg_hdr.msg_iov = &iovecs[recvcount];
 3407                 goto loopstart;
 3408             } else { recvcount --; }
 3409         }
 3410     }
 3411 
 3412     /* send until all are sent */
 3413     i = 0;
 3414     while(i<recvcount) {
 3415         sent = nsd_sendmmsg(fd, &msgs[i], recvcount-i, 0);
 3416         if(sent == -1) {
 3417             if(errno == ENOBUFS ||
 3418 #ifdef EWOULDBLOCK
 3419                 errno == EWOULDBLOCK ||
 3420 #endif
 3421                 errno == EAGAIN) {
 3422                 /* block to wait until send buffer avail */
 3423                 int flag, errstore;
 3424                 if((flag = fcntl(fd, F_GETFL)) == -1) {
 3425                     log_msg(LOG_ERR, "cannot fcntl F_GETFL: %s", strerror(errno));
 3426                     flag = 0;
 3427                 }
 3428                 flag &= ~O_NONBLOCK;
 3429                 if(fcntl(fd, F_SETFL, flag) == -1)
 3430                     log_msg(LOG_ERR, "cannot fcntl F_SETFL 0: %s", strerror(errno));
 3431                 sent = nsd_sendmmsg(fd, &msgs[i], recvcount-i, 0);
 3432                 errstore = errno;
 3433                 flag |= O_NONBLOCK;
 3434                 if(fcntl(fd, F_SETFL, flag) == -1)
 3435                     log_msg(LOG_ERR, "cannot fcntl F_SETFL O_NONBLOCK: %s", strerror(errno));
 3436                 if(sent != -1) {
 3437                     i += sent;
 3438                     continue;
 3439                 }
 3440                 errno = errstore;
 3441             }
 3442             /* don't log transient network full errors, unless
 3443              * on higher verbosity */
 3444             if(!(errno == ENOBUFS && verbosity < 1) &&
 3445 #ifdef EWOULDBLOCK
 3446                errno != EWOULDBLOCK &&
 3447 #endif
 3448                errno != EAGAIN) {
 3449                 const char* es = strerror(errno);
 3450                 char a[64];
 3451                 addrport2str(&queries[i]->addr, a, sizeof(a));
 3452                 log_msg(LOG_ERR, "sendmmsg [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es);
 3453             }
 3454 #ifdef BIND8_STATS
 3455             data->nsd->st.txerr += recvcount-i;
 3456 #endif /* BIND8_STATS */
 3457             break;
 3458         }
 3459         i += sent;
 3460     }
 3461     for(i=0; i<recvcount; i++) {
 3462         query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
 3463         iovecs[i].iov_len = buffer_remaining(queries[i]->packet);
 3464         msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen;
 3465     }
 3466 }
 3467 
 3468 #ifdef HAVE_SSL
 3469 /*
 3470  * Setup an event for the tcp handler.
 3471  */
 3472 static void
 3473 tcp_handler_setup_event(struct tcp_handler_data* data, void (*fn)(int, short, void *),
 3474        int fd, short event)
 3475 {
 3476     struct timeval timeout;
 3477     struct event_base* ev_base;
 3478 
 3479     timeout.tv_sec = data->nsd->tcp_timeout;
 3480     timeout.tv_usec = 0L;
 3481 
 3482     ev_base = data->event.ev_base;
 3483     event_del(&data->event);
 3484     memset(&data->event, 0, sizeof(data->event));
 3485     event_set(&data->event, fd, event, fn, data);
 3486     if(event_base_set(ev_base, &data->event) != 0)
 3487         log_msg(LOG_ERR, "event base set failed");
 3488     if(event_add(&data->event, &timeout) != 0)
 3489         log_msg(LOG_ERR, "event add failed");
 3490 }
 3491 #endif /* HAVE_SSL */
 3492 
 3493 static void
 3494 cleanup_tcp_handler(struct tcp_handler_data* data)
 3495 {
 3496     event_del(&data->event);
 3497 #ifdef HAVE_SSL
 3498     if(data->tls) {
 3499         SSL_shutdown(data->tls);
 3500         SSL_free(data->tls);
 3501         data->tls = NULL;
 3502     }
 3503 #endif
 3504     close(data->event.ev_fd);
 3505     if(data->prev)
 3506         data->prev->next = data->next;
 3507     else    tcp_active_list = data->next;
 3508     if(data->next)
 3509         data->next->prev = data->prev;
 3510 
 3511     /*
 3512      * Enable the TCP accept handlers when the current number of
 3513      * TCP connections is about to drop below the maximum number
 3514      * of TCP connections.
 3515      */
 3516     if (slowaccept || data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) {
 3517         configure_handler_event_types(EV_READ|EV_PERSIST);
 3518         if(slowaccept) {
 3519             event_del(&slowaccept_event);
 3520             slowaccept = 0;
 3521         }
 3522     }
 3523     --data->nsd->current_tcp_count;
 3524     assert(data->nsd->current_tcp_count >= 0);
 3525 
 3526     region_destroy(data->region);
 3527 }
 3528 
 3529 static void
 3530 handle_tcp_reading(int fd, short event, void* arg)
 3531 {
 3532     struct tcp_handler_data *data = (struct tcp_handler_data *) arg;
 3533     ssize_t received;
 3534     struct event_base* ev_base;
 3535     struct timeval timeout;
 3536 
 3537     if ((event & EV_TIMEOUT)) {
 3538         /* Connection timed out.  */
 3539         cleanup_tcp_handler(data);
 3540         return;
 3541     }
 3542 
 3543     if ((data->nsd->tcp_query_count > 0 &&
 3544         data->query_count >= data->nsd->tcp_query_count) ||
 3545         data->tcp_no_more_queries) {
 3546         /* No more queries allowed on this tcp connection. */
 3547         cleanup_tcp_handler(data);
 3548         return;
 3549     }
 3550 
 3551     assert((event & EV_READ));
 3552 
 3553     if (data->bytes_transmitted == 0) {
 3554         query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1);
 3555     }
 3556 
 3557     /*
 3558      * Check if we received the leading packet length bytes yet.
 3559      */
 3560     if (data->bytes_transmitted < sizeof(uint16_t)) {
 3561         received = read(fd,
 3562                 (char *) &data->query->tcplen
 3563                 + data->bytes_transmitted,
 3564                 sizeof(uint16_t) - data->bytes_transmitted);
 3565         if (received == -1) {
 3566             if (errno == EAGAIN || errno == EINTR) {
 3567                 /*
 3568                  * Read would block, wait until more
 3569                  * data is available.
 3570                  */
 3571                 return;
 3572             } else {
 3573                 char buf[48];
 3574                 addr2str(&data->query->addr, buf, sizeof(buf));
 3575 #ifdef ECONNRESET
 3576                 if (verbosity >= 2 || errno != ECONNRESET)
 3577 #endif /* ECONNRESET */
 3578                 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno));
 3579                 cleanup_tcp_handler(data);
 3580                 return;
 3581             }
 3582         } else if (received == 0) {
 3583             /* EOF */
 3584             cleanup_tcp_handler(data);
 3585             return;
 3586         }
 3587 
 3588         data->bytes_transmitted += received;
 3589         if (data->bytes_transmitted < sizeof(uint16_t)) {
 3590             /*
 3591              * Not done with the tcplen yet, wait for more
 3592              * data to become available.
 3593              */
 3594             return;
 3595         }
 3596 
 3597         assert(data->bytes_transmitted == sizeof(uint16_t));
 3598 
 3599         data->query->tcplen = ntohs(data->query->tcplen);
 3600 
 3601         /*
 3602          * Minimum query size is:
 3603          *
 3604          *     Size of the header (12)
 3605          *   + Root domain name   (1)
 3606          *   + Query class        (2)
 3607          *   + Query type         (2)
 3608          */
 3609         if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) {
 3610             VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection"));
 3611             cleanup_tcp_handler(data);
 3612             return;
 3613         }
 3614 
 3615         if (data->query->tcplen > data->query->maxlen) {
 3616             VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection"));
 3617             cleanup_tcp_handler(data);
 3618             return;
 3619         }
 3620 
 3621         buffer_set_limit(data->query->packet, data->query->tcplen);
 3622     }
 3623 
 3624     assert(buffer_remaining(data->query->packet) > 0);
 3625 
 3626     /* Read the (remaining) query data.  */
 3627     received = read(fd,
 3628             buffer_current(data->query->packet),
 3629             buffer_remaining(data->query->packet));
 3630     if (received == -1) {
 3631         if (errno == EAGAIN || errno == EINTR) {
 3632             /*
 3633              * Read would block, wait until more data is
 3634              * available.
 3635              */
 3636             return;
 3637         } else {
 3638             char buf[48];
 3639             addr2str(&data->query->addr, buf, sizeof(buf));
 3640 #ifdef ECONNRESET
 3641             if (verbosity >= 2 || errno != ECONNRESET)
 3642 #endif /* ECONNRESET */
 3643             log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno));
 3644             cleanup_tcp_handler(data);
 3645             return;
 3646         }
 3647     } else if (received == 0) {
 3648         /* EOF */
 3649         cleanup_tcp_handler(data);
 3650         return;
 3651     }
 3652 
 3653     data->bytes_transmitted += received;
 3654     buffer_skip(data->query->packet, received);
 3655     if (buffer_remaining(data->query->packet) > 0) {
 3656         /*
 3657          * Message not yet complete, wait for more data to
 3658          * become available.
 3659          */
 3660         return;
 3661     }
 3662 
 3663     assert(buffer_position(data->query->packet) == data->query->tcplen);
 3664 
 3665     /* Account... */
 3666 #ifdef BIND8_STATS
 3667 #ifndef INET6
 3668     STATUP(data->nsd, ctcp);
 3669 #else
 3670     if (data->query->addr.ss_family == AF_INET) {
 3671         STATUP(data->nsd, ctcp);
 3672     } else if (data->query->addr.ss_family == AF_INET6) {
 3673         STATUP(data->nsd, ctcp6);
 3674     }
 3675 #endif
 3676 #endif /* BIND8_STATS */
 3677 
 3678     /* We have a complete query, process it.  */
 3679 
 3680     /* tcp-query-count: handle query counter ++ */
 3681     data->query_count++;
 3682 
 3683     buffer_flip(data->query->packet);
 3684 #ifdef USE_DNSTAP
 3685     /*
 3686      * and send TCP-query with found address (local) and client address to dnstap process
 3687      */
 3688     log_addr("query from client", &data->query->addr, data->query->addr.ss_family);
 3689     log_addr("to server (local)", &data->socket->addr.ai_addr, data->query->addr.ss_family);
 3690     dt_collector_submit_auth_query(data->nsd, &data->socket->addr.ai_addr, &data->query->addr,
 3691         data->query->addrlen, data->query->tcp, data->query->packet);
 3692 #endif /* USE_DNSTAP */
 3693     data->query_state = server_process_query(data->nsd, data->query);
 3694     if (data->query_state == QUERY_DISCARDED) {
 3695         /* Drop the packet and the entire connection... */
 3696         STATUP(data->nsd, dropped);
 3697         ZTATUP(data->nsd, data->query->zone, dropped);
 3698         cleanup_tcp_handler(data);
 3699         return;
 3700     }
 3701 
 3702 #ifdef BIND8_STATS
 3703     if (RCODE(data->query->packet) == RCODE_OK
 3704         && !AA(data->query->packet))
 3705     {
 3706         STATUP(data->nsd, nona);
 3707         ZTATUP(data->nsd, data->query->zone, nona);
 3708     }
 3709 #endif /* BIND8_STATS */
 3710 
 3711 #ifdef USE_ZONE_STATS
 3712 #ifndef INET6
 3713     ZTATUP(data->nsd, data->query->zone, ctcp);
 3714 #else
 3715     if (data->query->addr.ss_family == AF_INET) {
 3716         ZTATUP(data->nsd, data->query->zone, ctcp);
 3717     } else if (data->query->addr.ss_family == AF_INET6) {
 3718         ZTATUP(data->nsd, data->query->zone, ctcp6);
 3719     }
 3720 #endif
 3721 #endif /* USE_ZONE_STATS */
 3722 
 3723     query_add_optional(data->query, data->nsd);
 3724 
 3725     /* Switch to the tcp write handler.  */
 3726     buffer_flip(data->query->packet);
 3727     data->query->tcplen = buffer_remaining(data->query->packet);
 3728 #ifdef BIND8_STATS
 3729     /* Account the rcode & TC... */
 3730     STATUP2(data->nsd, rcode, RCODE(data->query->packet));
 3731     ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet));
 3732     if (TC(data->query->packet)) {
 3733         STATUP(data->nsd, truncated);
 3734         ZTATUP(data->nsd, data->query->zone, truncated);
 3735     }
 3736 #endif /* BIND8_STATS */
 3737 #ifdef USE_DNSTAP
 3738     /*
 3739      * sending TCP-response with found (earlier) address (local) and client address to dnstap process
 3740      */
 3741     log_addr("from server (local)", &data->socket->addr.ai_addr, data->query->addr.ss_family);
 3742     log_addr("response to client", &data->query->addr, data->query->addr.ss_family);
 3743     dt_collector_submit_auth_response(data->nsd, &data->socket->addr.ai_addr, &data->query->addr,
 3744         data->query->addrlen, data->query->tcp, data->query->packet,
 3745         data->query->zone);
 3746 #endif /* USE_DNSTAP */
 3747     data->bytes_transmitted = 0;
 3748 
 3749     timeout.tv_sec = data->tcp_timeout / 1000;
 3750     timeout.tv_usec = (data->tcp_timeout % 1000)*1000;
 3751 
 3752     ev_base = data->event.ev_base;
 3753     event_del(&data->event);
 3754     memset(&data->event, 0, sizeof(data->event));
 3755     event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT,
 3756         handle_tcp_reading, data);
 3757     if(event_base_set(ev_base, &data->event) != 0)
 3758         log_msg(LOG_ERR, "event base set tcpr failed");
 3759     if(event_add(&data->event, &timeout) != 0)
 3760         log_msg(LOG_ERR, "event add tcpr failed");
 3761     /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/
 3762     handle_tcp_writing(fd, EV_WRITE, data);
 3763 }
 3764 
 3765 static void
 3766 handle_tcp_writing(int fd, short event, void* arg)
 3767 {
 3768     struct tcp_handler_data *data = (struct tcp_handler_data *) arg;
 3769     ssize_t sent;
 3770     struct query *q = data->query;
 3771     struct timeval timeout;
 3772     struct event_base* ev_base;
 3773 
 3774     if ((event & EV_TIMEOUT)) {
 3775         /* Connection timed out.  */
 3776         cleanup_tcp_handler(data);
 3777         return;
 3778     }
 3779 
 3780     assert((event & EV_WRITE));
 3781 
 3782     if (data->bytes_transmitted < sizeof(q->tcplen)) {
 3783         /* Writing the response packet length.  */
 3784         uint16_t n_tcplen = htons(q->tcplen);
 3785 #ifdef HAVE_WRITEV
 3786         struct iovec iov[2];
 3787         iov[0].iov_base = (uint8_t*)&n_tcplen + data->bytes_transmitted;
 3788         iov[0].iov_len = sizeof(n_tcplen) - data->bytes_transmitted; 
 3789         iov[1].iov_base = buffer_begin(q->packet);
 3790         iov[1].iov_len = buffer_limit(q->packet);
 3791         sent = writev(fd, iov, 2);
 3792 #else /* HAVE_WRITEV */
 3793         sent = write(fd,
 3794                  (const char *) &n_tcplen + data->bytes_transmitted,
 3795                  sizeof(n_tcplen) - data->bytes_transmitted);
 3796 #endif /* HAVE_WRITEV */
 3797         if (sent == -1) {
 3798             if (errno == EAGAIN || errno == EINTR) {
 3799                 /*
 3800                  * Write would block, wait until
 3801                  * socket becomes writable again.
 3802                  */
 3803                 return;
 3804             } else {
 3805 #ifdef ECONNRESET
 3806                 if(verbosity >= 2 || errno != ECONNRESET)
 3807 #endif /* ECONNRESET */
 3808 #ifdef EPIPE
 3809                   if(verbosity >= 2 || errno != EPIPE)
 3810 #endif /* EPIPE 'broken pipe' */
 3811                     log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno));
 3812                 cleanup_tcp_handler(data);
 3813                 return;
 3814             }
 3815         }
 3816 
 3817         data->bytes_transmitted += sent;
 3818         if (data->bytes_transmitted < sizeof(q->tcplen)) {
 3819             /*
 3820              * Writing not complete, wait until socket
 3821              * becomes writable again.
 3822              */
 3823             return;
 3824         }
 3825 
 3826 #ifdef HAVE_WRITEV
 3827         sent -= sizeof(n_tcplen);
 3828         /* handle potential 'packet done' code */
 3829         goto packet_could_be_done;
 3830 #endif
 3831     }
 3832  
 3833     sent = write(fd,
 3834              buffer_current(q->packet),
 3835              buffer_remaining(q->packet));
 3836     if (sent == -1) {
 3837         if (errno == EAGAIN || errno == EINTR) {
 3838             /*
 3839              * Write would block, wait until
 3840              * socket becomes writable again.
 3841              */
 3842             return;
 3843         } else {
 3844 #ifdef ECONNRESET
 3845             if(verbosity >= 2 || errno != ECONNRESET)
 3846 #endif /* ECONNRESET */
 3847 #ifdef EPIPE
 3848                   if(verbosity >= 2 || errno != EPIPE)
 3849 #endif /* EPIPE 'broken pipe' */
 3850             log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno));
 3851             cleanup_tcp_handler(data);
 3852             return;
 3853         }
 3854     }
 3855 
 3856     data->bytes_transmitted += sent;
 3857 #ifdef HAVE_WRITEV
 3858   packet_could_be_done:
 3859 #endif
 3860     buffer_skip(q->packet, sent);
 3861     if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) {
 3862         /*
 3863          * Still more data to write when socket becomes
 3864          * writable again.
 3865          */
 3866         return;
 3867     }
 3868 
 3869     assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen));
 3870 
 3871     if (data->query_state == QUERY_IN_AXFR) {
 3872         /* Continue processing AXFR and writing back results.  */
 3873         buffer_clear(q->packet);
 3874         data->query_state = query_axfr(data->nsd, q);
 3875         if (data->query_state != QUERY_PROCESSED) {
 3876             query_add_optional(data->query, data->nsd);
 3877 
 3878             /* Reset data. */
 3879             buffer_flip(q->packet);
 3880             q->tcplen = buffer_remaining(q->packet);
 3881             data->bytes_transmitted = 0;
 3882             /* Reset timeout.  */
 3883             timeout.tv_sec = data->tcp_timeout / 1000;
 3884             timeout.tv_usec = (data->tcp_timeout % 1000)*1000;
 3885             ev_base = data->event.ev_base;
 3886             event_del(&data->event);
 3887             memset(&data->event, 0, sizeof(data->event));
 3888             event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT,
 3889                 handle_tcp_writing, data);
 3890             if(event_base_set(ev_base, &data->event) != 0)
 3891                 log_msg(LOG_ERR, "event base set tcpw failed");
 3892             if(event_add(&data->event, &timeout) != 0)
 3893                 log_msg(LOG_ERR, "event add tcpw failed");
 3894 
 3895             /*
 3896              * Write data if/when the socket is writable
 3897              * again.
 3898              */
 3899             return;
 3900         }
 3901     }
 3902 
 3903     /*
 3904      * Done sending, wait for the next request to arrive on the
 3905      * TCP socket by installing the TCP read handler.
 3906      */
 3907     if ((data->nsd->tcp_query_count > 0 &&
 3908         data->query_count >= data->nsd->tcp_query_count) ||
 3909         data->tcp_no_more_queries) {
 3910 
 3911         (void) shutdown(fd, SHUT_WR);
 3912     }
 3913 
 3914     data->bytes_transmitted = 0;
 3915 
 3916     timeout.tv_sec = data->tcp_timeout / 1000;
 3917     timeout.tv_usec = (data->tcp_timeout % 1000)*1000;
 3918     ev_base = data->event.ev_base;
 3919     event_del(&data->event);
 3920     memset(&data->event, 0, sizeof(data->event));
 3921     event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT,
 3922         handle_tcp_reading, data);
 3923     if(event_base_set(ev_base, &data->event) != 0)
 3924         log_msg(LOG_ERR, "event base set tcpw failed");
 3925     if(event_add(&data->event, &timeout) != 0)
 3926         log_msg(LOG_ERR, "event add tcpw failed");
 3927 }
 3928 
 3929 #ifdef HAVE_SSL
 3930 /** create SSL object and associate fd */
 3931 static SSL*
 3932 incoming_ssl_fd(SSL_CTX* ctx, int fd)
 3933 {
 3934     SSL* ssl = SSL_new((SSL_CTX*)ctx);
 3935     if(!ssl) {
 3936         log_crypto_err("could not SSL_new");
 3937         return NULL;
 3938     }
 3939     SSL_set_accept_state(ssl);
 3940     (void)SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY);
 3941     if(!SSL_set_fd(ssl, fd)) {
 3942         log_crypto_err("could not SSL_set_fd");
 3943         SSL_free(ssl);
 3944         return NULL;
 3945     }
 3946     return ssl;
 3947 }
 3948 
 3949 /** TLS handshake to upgrade TCP connection */
 3950 static int
 3951 tls_handshake(struct tcp_handler_data* data, int fd, int writing)
 3952 {
 3953     int r;
 3954     if(data->shake_state == tls_hs_read_event) {
 3955         /* read condition satisfied back to writing */
 3956         tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE);
 3957         data->shake_state = tls_hs_none;
 3958         return 1;
 3959     }
 3960     if(data->shake_state == tls_hs_write_event) {
 3961         /* write condition satisfied back to reading */
 3962         tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ);
 3963         data->shake_state = tls_hs_none;
 3964         return 1;
 3965     }
 3966 
 3967     /* (continue to) setup the TLS connection */
 3968     ERR_clear_error();
 3969     r = SSL_do_handshake(data->tls);
 3970 
 3971     if(r != 1) {
 3972         int want = SSL_get_error(data->tls, r);
 3973         if(want == SSL_ERROR_WANT_READ) {
 3974             if(data->shake_state == tls_hs_read) {
 3975                 /* try again later */
 3976                 return 1;
 3977             }
 3978             data->shake_state = tls_hs_read;
 3979             /* switch back to reading mode */
 3980             tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ);
 3981             return 1;
 3982         } else if(want == SSL_ERROR_WANT_WRITE) {
 3983             if(data->shake_state == tls_hs_write) {
 3984                 /* try again later */
 3985                 return 1;
 3986             }
 3987             data->shake_state = tls_hs_write;
 3988             /* switch back to writing mode */
 3989             tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE);
 3990             return 1;
 3991         } else {
 3992             if(r == 0)
 3993                 VERBOSITY(3, (LOG_ERR, "TLS handshake: connection closed prematurely"));
 3994             else {
 3995                 unsigned long err = ERR_get_error();
 3996                 if(!squelch_err_ssl_handshake(err)) {
 3997                     char a[64], s[256];
 3998                     addr2str(&data->query->addr, a, sizeof(a));
 3999                     snprintf(s, sizeof(s), "TLS handshake failed from %s", a);
 4000                     log_crypto_from_err(s, err);
 4001                 }
 4002             }
 4003             cleanup_tcp_handler(data);
 4004             return 0;
 4005         }
 4006     }
 4007 
 4008     /* Use to log successful upgrade for testing - could be removed*/
 4009     VERBOSITY(3, (LOG_INFO, "TLS handshake succeeded."));
 4010     /* set back to the event we need to have when reading (or writing) */
 4011     if(data->shake_state == tls_hs_read && writing) {
 4012         tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE);
 4013     } else if(data->shake_state == tls_hs_write && !writing) {
 4014         tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ);
 4015     }
 4016     data->shake_state = tls_hs_none;
 4017     return 1;
 4018 }
 4019 
 4020 /** handle TLS reading of incoming query */
 4021 static void
 4022 handle_tls_reading(int fd, short event, void* arg)
 4023 {
 4024     struct tcp_handler_data *data = (struct tcp_handler_data *) arg;
 4025     ssize_t received;
 4026 
 4027     if ((event & EV_TIMEOUT)) {
 4028         /* Connection timed out.  */
 4029         cleanup_tcp_handler(data);
 4030         return;
 4031     }
 4032 
 4033     if ((data->nsd->tcp_query_count > 0 &&
 4034         data->query_count >= data->nsd->tcp_query_count) ||
 4035         data->tcp_no_more_queries) {
 4036         /* No more queries allowed on this tcp connection. */
 4037         cleanup_tcp_handler(data);
 4038         return;
 4039     }
 4040 
 4041     assert((event & EV_READ));
 4042 
 4043     if (data->bytes_transmitted == 0) {
 4044         query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1);
 4045     }
 4046 
 4047     if(data->shake_state != tls_hs_none) {
 4048         if(!tls_handshake(data, fd, 0))
 4049             return;
 4050         if(data->shake_state != tls_hs_none)
 4051             return;
 4052     }
 4053 
 4054     /*
 4055      * Check if we received the leading packet length bytes yet.
 4056      */
 4057     if(data->bytes_transmitted < sizeof(uint16_t)) {
 4058         ERR_clear_error();
 4059         if((received=SSL_read(data->tls, (char *) &data->query->tcplen
 4060             + data->bytes_transmitted,
 4061             sizeof(uint16_t) - data->bytes_transmitted)) <= 0) {
 4062             int want = SSL_get_error(data->tls, received);
 4063             if(want == SSL_ERROR_ZERO_RETURN) {
 4064                 cleanup_tcp_handler(data);
 4065                 return; /* shutdown, closed */
 4066             } else if(want == SSL_ERROR_WANT_READ) {
 4067                 /* wants to be called again */
 4068                 return;
 4069             }
 4070             else if(want == SSL_ERROR_WANT_WRITE) {
 4071                 /* switch to writing */
 4072                 data->shake_state = tls_hs_write_event;
 4073                 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT);
 4074                 return;
 4075             }
 4076             cleanup_tcp_handler(data);
 4077             log_crypto_err("could not SSL_read");
 4078             return;
 4079         }
 4080 
 4081         data->bytes_transmitted += received;
 4082         if (data->bytes_transmitted < sizeof(uint16_t)) {
 4083             /*
 4084              * Not done with the tcplen yet, wait for more
 4085              * data to become available.
 4086              */
 4087             return;
 4088         }
 4089 
 4090         assert(data->bytes_transmitted == sizeof(uint16_t));
 4091 
 4092         data->query->tcplen = ntohs(data->query->tcplen);
 4093 
 4094         /*
 4095          * Minimum query size is:
 4096          *
 4097          *     Size of the header (12)
 4098          *   + Root domain name   (1)
 4099          *   + Query class        (2)
 4100          *   + Query type         (2)
 4101          */
 4102         if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) {
 4103             VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection"));
 4104             cleanup_tcp_handler(data);
 4105             return;
 4106         }
 4107 
 4108         if (data->query->tcplen > data->query->maxlen) {
 4109             VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection"));
 4110             cleanup_tcp_handler(data);
 4111             return;
 4112         }
 4113 
 4114         buffer_set_limit(data->query->packet, data->query->tcplen);
 4115     }
 4116 
 4117     assert(buffer_remaining(data->query->packet) > 0);
 4118 
 4119     /* Read the (remaining) query data.  */
 4120     ERR_clear_error();
 4121     received = SSL_read(data->tls, (void*)buffer_current(data->query->packet),
 4122                 (int)buffer_remaining(data->query->packet));
 4123     if(received <= 0) {
 4124         int want = SSL_get_error(data->tls, received);
 4125         if(want == SSL_ERROR_ZERO_RETURN) {
 4126             cleanup_tcp_handler(data);
 4127             return; /* shutdown, closed */
 4128         } else if(want == SSL_ERROR_WANT_READ) {
 4129             /* wants to be called again */
 4130             return;
 4131         }
 4132         else if(want == SSL_ERROR_WANT_WRITE) {
 4133             /* switch back writing */
 4134             data->shake_state = tls_hs_write_event;
 4135             tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT);
 4136             return;
 4137         }
 4138         cleanup_tcp_handler(data);
 4139         log_crypto_err("could not SSL_read");
 4140         return;
 4141     }
 4142 
 4143     data->bytes_transmitted += received;
 4144     buffer_skip(data->query->packet, received);
 4145     if (buffer_remaining(data->query->packet) > 0) {
 4146         /*
 4147          * Message not yet complete, wait for more data to
 4148          * become available.
 4149          */
 4150         return;
 4151     }
 4152 
 4153     assert(buffer_position(data->query->packet) == data->query->tcplen);
 4154 
 4155     /* Account... */
 4156 #ifndef INET6
 4157     STATUP(data->nsd, ctls);
 4158 #else
 4159     if (data->query->addr.ss_family == AF_INET) {
 4160         STATUP(data->nsd, ctls);
 4161     } else if (data->query->addr.ss_family == AF_INET6) {
 4162         STATUP(data->nsd, ctls6);
 4163     }
 4164 #endif
 4165 
 4166     /* We have a complete query, process it.  */
 4167 
 4168     /* tcp-query-count: handle query counter ++ */
 4169     data->query_count++;
 4170 
 4171     buffer_flip(data->query->packet);
 4172 #ifdef USE_DNSTAP
 4173     /*
 4174      * and send TCP-query with found address (local) and client address to dnstap process
 4175      */
 4176     log_addr("query from client", &data->query->addr, data->query->addr.ss_family);
 4177     log_addr("to server (local)", &data->socket->addr.ai_addr, data->query->addr.ss_family);
 4178     dt_collector_submit_auth_query(data->nsd, &data->socket->addr.ai_addr, &data->query->addr,
 4179         data->query->addrlen, data->query->tcp, data->query->packet);
 4180 #endif /* USE_DNSTAP */
 4181     data->query_state = server_process_query(data->nsd, data->query);
 4182     if (data->query_state == QUERY_DISCARDED) {
 4183         /* Drop the packet and the entire connection... */
 4184         STATUP(data->nsd, dropped);
 4185         ZTATUP(data->nsd, data->query->zone, dropped);
 4186         cleanup_tcp_handler(data);
 4187         return;
 4188     }
 4189 
 4190 #ifdef BIND8_STATS
 4191     if (RCODE(data->query->packet) == RCODE_OK
 4192         && !AA(data->query->packet))
 4193     {
 4194         STATUP(data->nsd, nona);
 4195         ZTATUP(data->nsd, data->query->zone, nona);
 4196     }
 4197 #endif /* BIND8_STATS */
 4198 
 4199 #ifdef USE_ZONE_STATS
 4200 #ifndef INET6
 4201     ZTATUP(data->nsd, data->query->zone, ctls);
 4202 #else
 4203     if (data->query->addr.ss_family == AF_INET) {
 4204         ZTATUP(data->nsd, data->query->zone, ctls);
 4205     } else if (data->query->addr.ss_family == AF_INET6) {
 4206         ZTATUP(data->nsd, data->query->zone, ctls6);
 4207     }
 4208 #endif
 4209 #endif /* USE_ZONE_STATS */
 4210 
 4211     query_add_optional(data->query, data->nsd);
 4212 
 4213     /* Switch to the tcp write handler.  */
 4214     buffer_flip(data->query->packet);
 4215     data->query->tcplen = buffer_remaining(data->query->packet);
 4216 #ifdef BIND8_STATS
 4217     /* Account the rcode & TC... */
 4218     STATUP2(data->nsd, rcode, RCODE(data->query->packet));
 4219     ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet));
 4220     if (TC(data->query->packet)) {
 4221         STATUP(data->nsd, truncated);
 4222         ZTATUP(data->nsd, data->query->zone, truncated);
 4223     }
 4224 #endif /* BIND8_STATS */
 4225 #ifdef USE_DNSTAP
 4226     /*
 4227      * sending TCP-response with found (earlier) address (local) and client address to dnstap process
 4228      */
 4229     log_addr("from server (local)", &data->socket->addr.ai_addr, data->query->addr.ss_family);
 4230     log_addr("response to client", &data->query->addr, data->query->addr.ss_family);
 4231     dt_collector_submit_auth_response(data->nsd, &data->socket->addr.ai_addr, &data->query->addr,
 4232         data->query->addrlen, data->query->tcp, data->query->packet,
 4233         data->query->zone);
 4234 #endif /* USE_DNSTAP */
 4235     data->bytes_transmitted = 0;
 4236 
 4237     tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT);
 4238 
 4239     /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/
 4240     handle_tls_writing(fd, EV_WRITE, data);
 4241 }
 4242 
 4243 /** handle TLS writing of outgoing response */
 4244 static void
 4245 handle_tls_writing(int fd, short event, void* arg)
 4246 {
 4247     struct tcp_handler_data *data = (struct tcp_handler_data *) arg;
 4248     ssize_t sent;
 4249     struct query *q = data->query;
 4250     /* static variable that holds reassembly buffer used to put the
 4251      * TCP length in front of the packet, like writev. */
 4252     static buffer_type* global_tls_temp_buffer = NULL;
 4253     buffer_type* write_buffer;
 4254 
 4255     if ((event & EV_TIMEOUT)) {
 4256         /* Connection timed out.  */
 4257         cleanup_tcp_handler(data);
 4258         return;
 4259     }
 4260 
 4261     assert((event & EV_WRITE));
 4262 
 4263     if(data->shake_state != tls_hs_none) {
 4264         if(!tls_handshake(data, fd, 1))
 4265             return;
 4266         if(data->shake_state != tls_hs_none)
 4267             return;
 4268     }
 4269 
 4270     (void)SSL_set_mode(data->tls, SSL_MODE_ENABLE_PARTIAL_WRITE);
 4271 
 4272     /* If we are writing the start of a message, we must include the length
 4273      * this is done with a copy into write_buffer. */
 4274     write_buffer = NULL;
 4275     if (data->bytes_transmitted == 0) {
 4276         if(!global_tls_temp_buffer) {
 4277             /* gets deallocated when nsd shuts down from
 4278              * nsd.region */
 4279             global_tls_temp_buffer = buffer_create(nsd.region,
 4280                 QIOBUFSZ + sizeof(q->tcplen));
 4281             if (!global_tls_temp_buffer) {
 4282                 return;
 4283             }
 4284         }
 4285         write_buffer = global_tls_temp_buffer;
 4286         buffer_clear(write_buffer);
 4287         buffer_write_u16(write_buffer, q->tcplen);
 4288         buffer_write(write_buffer, buffer_current(q->packet),
 4289             (int)buffer_remaining(q->packet));
 4290         buffer_flip(write_buffer);
 4291     } else {
 4292         write_buffer = q->packet;
 4293     }
 4294 
 4295     /* Write the response */
 4296     ERR_clear_error();
 4297     sent = SSL_write(data->tls, buffer_current(write_buffer), buffer_remaining(write_buffer));
 4298     if(sent <= 0) {
 4299         int want = SSL_get_error(data->tls, sent);
 4300         if(want == SSL_ERROR_ZERO_RETURN) {
 4301             cleanup_tcp_handler(data);
 4302             /* closed */
 4303         } else if(want == SSL_ERROR_WANT_READ) {
 4304             /* switch back to reading */
 4305             data->shake_state = tls_hs_read_event;
 4306             tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT);
 4307         } else if(want != SSL_ERROR_WANT_WRITE) {
 4308             cleanup_tcp_handler(data);
 4309             log_crypto_err("could not SSL_write");
 4310         }
 4311         return;
 4312     }
 4313 
 4314     buffer_skip(write_buffer, sent);
 4315     if(buffer_remaining(write_buffer) != 0) {
 4316         /* If not all sent, sync up the real buffer if it wasn't used.*/
 4317         if (data->bytes_transmitted == 0 && (ssize_t)sent > (ssize_t)sizeof(q->tcplen)) {
 4318             buffer_skip(q->packet, (ssize_t)sent - (ssize_t)sizeof(q->tcplen));
 4319         }
 4320     }
 4321 
 4322     data->bytes_transmitted += sent;
 4323     if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) {
 4324         /*
 4325          * Still more data to write when socket becomes
 4326          * writable again.
 4327          */
 4328         return;
 4329     }
 4330 
 4331     assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen));
 4332 
 4333     if (data->query_state == QUERY_IN_AXFR) {
 4334         /* Continue processing AXFR and writing back results.  */
 4335         buffer_clear(q->packet);
 4336         data->query_state = query_axfr(data->nsd, q);
 4337         if (data->query_state != QUERY_PROCESSED) {
 4338             query_add_optional(data->query, data->nsd);
 4339 
 4340             /* Reset data. */
 4341             buffer_flip(q->packet);
 4342             q->tcplen = buffer_remaining(q->packet);
 4343             data->bytes_transmitted = 0;
 4344             /* Reset to writing mode.  */
 4345             tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT);
 4346 
 4347             /*
 4348              * Write data if/when the socket is writable
 4349              * again.
 4350              */
 4351             return;
 4352         }
 4353     }
 4354 
 4355     /*
 4356      * Done sending, wait for the next request to arrive on the
 4357      * TCP socket by installing the TCP read handler.
 4358      */
 4359     if ((data->nsd->tcp_query_count > 0 &&
 4360         data->query_count >= data->nsd->tcp_query_count) ||
 4361         data->tcp_no_more_queries) {
 4362 
 4363         (void) shutdown(fd, SHUT_WR);
 4364     }
 4365 
 4366     data->bytes_transmitted = 0;
 4367 
 4368     tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT);
 4369 }
 4370 #endif
 4371 
 4372 static void
 4373 handle_slowaccept_timeout(int ATTR_UNUSED(fd), short ATTR_UNUSED(event),
 4374     void* ATTR_UNUSED(arg))
 4375 {
 4376     if(slowaccept) {
 4377         configure_handler_event_types(EV_PERSIST | EV_READ);
 4378         slowaccept = 0;
 4379     }
 4380 }
 4381 
 4382 static int perform_accept(int fd, struct sockaddr *addr, socklen_t *addrlen)
 4383 {
 4384 #ifndef HAVE_ACCEPT4
 4385     int s = accept(fd, addr, addrlen);
 4386     if (s != -1) {
 4387         if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) {
 4388             log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno));
 4389             close(s);
 4390             s = -1;
 4391             errno=EINTR; /* stop error printout as error in accept4
 4392                 by setting this errno, it omits printout, in
 4393                 later code that calls nsd_accept4 */
 4394         }
 4395     }
 4396     return s;
 4397 #else
 4398     return accept4(fd, addr, addrlen, SOCK_NONBLOCK);
 4399 #endif /* HAVE_ACCEPT4 */
 4400 }
 4401 
 4402 /*
 4403  * Handle an incoming TCP connection.  The connection is accepted and
 4404  * a new TCP reader event handler is added.  The TCP handler
 4405  * is responsible for cleanup when the connection is closed.
 4406  */
 4407 static void
 4408 handle_tcp_accept(int fd, short event, void* arg)
 4409 {
 4410     struct tcp_accept_handler_data *data
 4411         = (struct tcp_accept_handler_data *) arg;
 4412     int s;
 4413     int reject = 0;
 4414     struct tcp_handler_data *tcp_data;
 4415     region_type *tcp_region;
 4416 #ifdef INET6
 4417     struct sockaddr_storage addr;
 4418 #else
 4419     struct sockaddr_in addr;
 4420 #endif
 4421     socklen_t addrlen;
 4422     struct timeval timeout;
 4423 
 4424     if (!(event & EV_READ)) {
 4425         return;
 4426     }
 4427 
 4428     if (data->nsd->current_tcp_count >= data->nsd->maximum_tcp_count) {
 4429         reject = data->nsd->options->tcp_reject_overflow;
 4430         if (!reject) {
 4431             return;
 4432         }
 4433     }
 4434 
 4435     /* Accept it... */
 4436     addrlen = sizeof(addr);
 4437     s = perform_accept(fd, (struct sockaddr *) &addr, &addrlen);
 4438     if (s == -1) {
 4439         /**
 4440          * EMFILE and ENFILE is a signal that the limit of open
 4441          * file descriptors has been reached. Pause accept().
 4442          * EINTR is a signal interrupt. The others are various OS ways
 4443          * of saying that the client has closed the connection.
 4444          */
 4445         if (errno == EMFILE || errno == ENFILE) {
 4446             if (!slowaccept) {
 4447                 /* disable accept events */
 4448                 struct timeval tv;
 4449                 configure_handler_event_types(0);
 4450                 tv.tv_sec = SLOW_ACCEPT_TIMEOUT;
 4451                 tv.tv_usec = 0L;
 4452                 memset(&slowaccept_event, 0,
 4453                     sizeof(slowaccept_event));
 4454                 event_set(&slowaccept_event, -1, EV_TIMEOUT,
 4455                     handle_slowaccept_timeout, NULL);
 4456                 (void)event_base_set(data->event.ev_base,
 4457                     &slowaccept_event);
 4458                 (void)event_add(&slowaccept_event, &tv);
 4459                 slowaccept = 1;
 4460                 /* We don't want to spam the logs here */
 4461             }
 4462         } else if (errno != EINTR
 4463             && errno != EWOULDBLOCK
 4464 #ifdef ECONNABORTED
 4465             && errno != ECONNABORTED
 4466 #endif /* ECONNABORTED */
 4467 #ifdef EPROTO
 4468             && errno != EPROTO
 4469 #endif /* EPROTO */
 4470             ) {
 4471             log_msg(LOG_ERR, "accept failed: %s", strerror(errno));
 4472         }
 4473         return;
 4474     }
 4475 
 4476     if (reject) {
 4477         shutdown(s, SHUT_RDWR);
 4478         close(s);
 4479         return;
 4480     }
 4481 
 4482     /*
 4483      * This region is deallocated when the TCP connection is
 4484      * closed by the TCP handler.
 4485      */
 4486     tcp_region = region_create(xalloc, free);
 4487     tcp_data = (struct tcp_handler_data *) region_alloc(
 4488         tcp_region, sizeof(struct tcp_handler_data));
 4489     tcp_data->region = tcp_region;
 4490     tcp_data->query = query_create(tcp_region, compressed_dname_offsets,
 4491         compression_table_size, compressed_dnames);
 4492     tcp_data->nsd = data->nsd;
 4493     tcp_data->query_count = 0;
 4494 #ifdef HAVE_SSL
 4495     tcp_data->shake_state = tls_hs_none;
 4496     tcp_data->tls = NULL;
 4497 #endif
 4498     tcp_data->prev = NULL;
 4499     tcp_data->next = NULL;
 4500 
 4501     tcp_data->query_state = QUERY_PROCESSED;
 4502     tcp_data->bytes_transmitted = 0;
 4503     memcpy(&tcp_data->query->addr, &addr, addrlen);
 4504     tcp_data->query->addrlen = addrlen;
 4505 
 4506     tcp_data->tcp_no_more_queries = 0;
 4507     tcp_data->tcp_timeout = data->nsd->tcp_timeout * 1000;
 4508     if (data->nsd->current_tcp_count > data->nsd->maximum_tcp_count/2) {
 4509         /* very busy, give smaller timeout */
 4510         tcp_data->tcp_timeout = 200;
 4511     }
 4512     memset(&tcp_data->event, 0, sizeof(tcp_data->event));
 4513     timeout.tv_sec = tcp_data->tcp_timeout / 1000;
 4514     timeout.tv_usec = (tcp_data->tcp_timeout % 1000)*1000;
 4515 
 4516 #ifdef USE_DNSTAP
 4517     /* save the address of the connection */
 4518     tcp_data->socket = data->socket;
 4519 #endif /* USE_DNSTAP */
 4520 
 4521 #ifdef HAVE_SSL
 4522     if (data->tls_accept) {
 4523         tcp_data->tls = incoming_ssl_fd(tcp_data->nsd->tls_ctx, s);
 4524         if(!tcp_data->tls) {
 4525             close(s);
 4526             return;
 4527         }
 4528         tcp_data->shake_state = tls_hs_read;
 4529         memset(&tcp_data->event, 0, sizeof(tcp_data->event));
 4530         event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT,
 4531               handle_tls_reading, tcp_data);
 4532     } else {
 4533 #endif
 4534         memset(&tcp_data->event, 0, sizeof(tcp_data->event));
 4535         event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT,
 4536               handle_tcp_reading, tcp_data);
 4537 #ifdef HAVE_SSL
 4538     }
 4539 #endif
 4540     if(event_base_set(data->event.ev_base, &tcp_data->event) != 0) {
 4541         log_msg(LOG_ERR, "cannot set tcp event base");
 4542         close(s);
 4543         region_destroy(tcp_region);
 4544         return;
 4545     }
 4546     if(event_add(&tcp_data->event, &timeout) != 0) {
 4547         log_msg(LOG_ERR, "cannot add tcp to event base");
 4548         close(s);
 4549         region_destroy(tcp_region);
 4550         return;
 4551     }
 4552     if(tcp_active_list) {
 4553         tcp_active_list->prev = tcp_data;
 4554         tcp_data->next = tcp_active_list;
 4555     }
 4556     tcp_active_list = tcp_data;
 4557 
 4558     /*
 4559      * Keep track of the total number of TCP handlers installed so
 4560      * we can stop accepting connections when the maximum number
 4561      * of simultaneous TCP connections is reached.
 4562      *
 4563      * If tcp-reject-overflow is enabled, however, then we do not
 4564      * change the handler event type; we keep it as-is and accept
 4565      * overflow TCP connections only so that we can forcibly kill
 4566      * them off.
 4567      */
 4568     ++data->nsd->current_tcp_count;
 4569     if (!data->nsd->options->tcp_reject_overflow &&
 4570          data->nsd->current_tcp_count == data->nsd->maximum_tcp_count)
 4571     {
 4572         configure_handler_event_types(0);
 4573     }
 4574 }
 4575 
 4576 static void
 4577 send_children_command(struct nsd* nsd, sig_atomic_t command, int timeout)
 4578 {
 4579     size_t i;
 4580     assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0);
 4581     for (i = 0; i < nsd->child_count; ++i) {
 4582         if (nsd->children[i].pid > 0 && nsd->children[i].child_fd != -1) {
 4583             if (write(nsd->children[i].child_fd,
 4584                 &command,
 4585                 sizeof(command)) == -1)
 4586             {
 4587                 if(errno != EAGAIN && errno != EINTR)
 4588                     log_msg(LOG_ERR, "problems sending command %d to server %d: %s",
 4589                     (int) command,
 4590                     (int) nsd->children[i].pid,
 4591                     strerror(errno));
 4592             } else if (timeout > 0) {
 4593                 (void)block_read(NULL,
 4594                     nsd->children[i].child_fd,
 4595                     &command, sizeof(command), timeout);
 4596             }
 4597             fsync(nsd->children[i].child_fd);
 4598             close(nsd->children[i].child_fd);
 4599             nsd->children[i].child_fd = -1;
 4600         }
 4601     }
 4602 }
 4603 
 4604 static void
 4605 send_children_quit(struct nsd* nsd)
 4606 {
 4607     DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit"));
 4608     send_children_command(nsd, NSD_QUIT, 0);
 4609 }
 4610 
 4611 static void
 4612 send_children_quit_and_wait(struct nsd* nsd)
 4613 {
 4614     DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit and wait"));
 4615     send_children_command(nsd, NSD_QUIT_CHILD, 3);
 4616 }
 4617 
 4618 #ifdef BIND8_STATS
 4619 static void
 4620 set_children_stats(struct nsd* nsd)
 4621 {
 4622     size_t i;
 4623     assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0);
 4624     DEBUG(DEBUG_IPC, 1, (LOG_INFO, "parent set stats to send to children"));
 4625     for (i = 0; i < nsd->child_count; ++i) {
 4626         nsd->children[i].need_to_send_STATS = 1;
 4627         nsd->children[i].handler->event_types |= NETIO_EVENT_WRITE;
 4628     }
 4629 }
 4630 #endif /* BIND8_STATS */
 4631 
 4632 static void
 4633 configure_handler_event_types(short event_types)
 4634 {
 4635     size_t i;
 4636 
 4637     for (i = 0; i < tcp_accept_handler_count; ++i) {
 4638         struct event* handler = &tcp_accept_handlers[i].event;
 4639         if(event_types) {
 4640             /* reassign */
 4641             int fd = handler->ev_fd;
 4642             struct event_base* base = handler->ev_base;
 4643             if(tcp_accept_handlers[i].event_added)
 4644                 event_del(handler);
 4645             memset(handler, 0, sizeof(*handler));
 4646             event_set(handler, fd, event_types,
 4647                 handle_tcp_accept, &tcp_accept_handlers[i]);
 4648             if(event_base_set(base, handler) != 0)
 4649                 log_msg(LOG_ERR, "conhand: cannot event_base");
 4650             if(event_add(handler, NULL) != 0)
 4651                 log_msg(LOG_ERR, "conhand: cannot event_add");
 4652             tcp_accept_handlers[i].event_added = 1;
 4653         } else {
 4654             /* remove */
 4655             if(tcp_accept_handlers[i].event_added) {
 4656                 event_del(handler);
 4657                 tcp_accept_handlers[i].event_added = 0;
 4658             }
 4659         }
 4660     }
 4661 }