"Fossies" - the Fresh Open Source Software Archive

Member "nsd-4.3.7/server.c" (22 Jul 2021, 133803 Bytes) of package /linux/misc/dns/nsd-4.3.7.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "server.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 4.3.6_vs_4.3.7.

    1 /*
    2  * server.c -- nsd(8) network input/output
    3  *
    4  * Copyright (c) 2001-2006, NLnet Labs. All rights reserved.
    5  *
    6  * See LICENSE for the license.
    7  *
    8  */
    9 
   10 #include "config.h"
   11 
   12 #include <sys/types.h>
   13 #include <sys/param.h>
   14 #include <limits.h>
   15 #include <sys/socket.h>
   16 #include <sys/uio.h>
   17 #include <sys/wait.h>
   18 
   19 #include <netinet/in.h>
   20 #ifdef USE_TCP_FASTOPEN
   21   #include <netinet/tcp.h>
   22 #endif
   23 #include <arpa/inet.h>
   24 
   25 #include <assert.h>
   26 #include <ctype.h>
   27 #include <errno.h>
   28 #include <fcntl.h>
   29 #include <stddef.h>
   30 #include <stdio.h>
   31 #include <stdlib.h>
   32 #include <string.h>
   33 #include <time.h>
   34 #include <unistd.h>
   35 #include <signal.h>
   36 #include <netdb.h>
   37 #include <poll.h>
   38 #ifdef HAVE_SYS_RANDOM_H
   39 #include <sys/random.h>
   40 #endif
   41 #ifndef SHUT_WR
   42 #define SHUT_WR 1
   43 #endif
   44 #ifdef HAVE_MMAP
   45 #include <sys/mman.h>
   46 #endif /* HAVE_MMAP */
   47 #ifdef HAVE_OPENSSL_RAND_H
   48 #include <openssl/rand.h>
   49 #endif
   50 #ifdef HAVE_OPENSSL_SSL_H
   51 #include <openssl/ssl.h>
   52 #endif
   53 #ifdef HAVE_OPENSSL_ERR_H
   54 #include <openssl/err.h>
   55 #endif
   56 #ifdef HAVE_OPENSSL_OCSP_H
   57 #include <openssl/ocsp.h>
   58 #endif
   59 #ifndef USE_MINI_EVENT
   60 #  ifdef HAVE_EVENT_H
   61 #    include <event.h>
   62 #  else
   63 #    include <event2/event.h>
   64 #    include "event2/event_struct.h"
   65 #    include "event2/event_compat.h"
   66 #  endif
   67 #else
   68 #  include "mini_event.h"
   69 #endif
   70 
   71 #include "axfr.h"
   72 #include "namedb.h"
   73 #include "netio.h"
   74 #include "xfrd.h"
   75 #include "xfrd-tcp.h"
   76 #include "xfrd-disk.h"
   77 #include "difffile.h"
   78 #include "nsec3.h"
   79 #include "ipc.h"
   80 #include "udb.h"
   81 #include "remote.h"
   82 #include "lookup3.h"
   83 #include "rrl.h"
   84 #ifdef USE_DNSTAP
   85 #include "dnstap/dnstap_collector.h"
   86 #endif
   87 
   88 #define RELOAD_SYNC_TIMEOUT 25 /* seconds */
   89 
   90 #ifdef USE_DNSTAP
   91 /*
   92  * log_addr() - the function to print sockaddr_in/sockaddr_in6 structures content
   93  * just like its done in Unbound via the same log_addr(VERB_LEVEL, const char*, sockaddr_storage*)
   94  */
   95 static void
   96 log_addr(const char* descr,
   97 #ifdef INET6
   98     struct sockaddr_storage* addr
   99 #else
  100     struct sockaddr_in* addr
  101 #endif
  102     )
  103 {
  104     char str_buf[64];
  105     if(verbosity < 6)
  106         return;
  107     if(
  108 #ifdef INET6
  109         addr->ss_family == AF_INET
  110 #else
  111         addr->sin_family == AF_INET
  112 #endif
  113         ) {
  114         struct sockaddr_in* s = (struct sockaddr_in*)addr;
  115         inet_ntop(AF_INET, &s->sin_addr.s_addr, str_buf, sizeof(str_buf));
  116         VERBOSITY(6, (LOG_INFO, "%s: address is: %s, port is: %d", descr, str_buf, ntohs(s->sin_port)));
  117 #ifdef INET6
  118     } else {
  119         struct sockaddr_in6* s6 = (struct sockaddr_in6*)addr;
  120         inet_ntop(AF_INET6, &s6->sin6_addr.s6_addr, str_buf, sizeof(str_buf));
  121         VERBOSITY(6, (LOG_INFO, "%s: address is: %s, port is: %d", descr, str_buf, ntohs(s6->sin6_port)));
  122 #endif
  123     }
  124 }
  125 #endif /* USE_DNSTAP */
  126 
  127 #ifdef USE_TCP_FASTOPEN
  128   #define TCP_FASTOPEN_FILE "/proc/sys/net/ipv4/tcp_fastopen"
  129   #define TCP_FASTOPEN_SERVER_BIT_MASK 0x2
  130 #endif
  131 
  132 /*
  133  * Data for the UDP handlers.
  134  */
  135 struct udp_handler_data
  136 {
  137     struct nsd        *nsd;
  138     struct nsd_socket *socket;
  139     struct event       event;
  140 };
  141 
  142 struct tcp_accept_handler_data {
  143     struct nsd        *nsd;
  144     struct nsd_socket *socket;
  145     int                event_added;
  146     struct event       event;
  147 #ifdef HAVE_SSL
  148     /* handler accepts TLS connections on the dedicated port */
  149     int                tls_accept;
  150 #endif
  151 };
  152 
  153 /*
  154  * These globals are used to enable the TCP accept handlers
  155  * when the number of TCP connection drops below the maximum
  156  * number of TCP connections.
  157  */
  158 static size_t tcp_accept_handler_count;
  159 static struct tcp_accept_handler_data *tcp_accept_handlers;
  160 
  161 static struct event slowaccept_event;
  162 static int slowaccept;
  163 
  164 #ifdef HAVE_SSL
  165 static unsigned char *ocspdata = NULL;
  166 static long ocspdata_len = 0;
  167 #endif
  168 
  169 #ifdef NONBLOCKING_IS_BROKEN
  170 /* Define NUM_RECV_PER_SELECT to 1 (one) to avoid opportunistically trying to
  171    read multiple times from a socket when reported ready by select. */
  172 # define NUM_RECV_PER_SELECT (1)
  173 #else /* !NONBLOCKING_IS_BROKEN */
  174 # define NUM_RECV_PER_SELECT (100)
  175 #endif /* NONBLOCKING_IS_BROKEN */
  176 
  177 #ifndef HAVE_MMSGHDR
  178 struct mmsghdr {
  179     struct msghdr msg_hdr;
  180     unsigned int  msg_len;
  181 };
  182 #endif
  183 
  184 static struct mmsghdr msgs[NUM_RECV_PER_SELECT];
  185 static struct iovec iovecs[NUM_RECV_PER_SELECT];
  186 static struct query *queries[NUM_RECV_PER_SELECT];
  187 
  188 /*
  189  * Data for the TCP connection handlers.
  190  *
  191  * The TCP handlers use non-blocking I/O.  This is necessary to avoid
  192  * blocking the entire server on a slow TCP connection, but does make
  193  * reading from and writing to the socket more complicated.
  194  *
  195  * Basically, whenever a read/write would block (indicated by the
  196  * EAGAIN errno variable) we remember the position we were reading
  197  * from/writing to and return from the TCP reading/writing event
  198  * handler.  When the socket becomes readable/writable again we
  199  * continue from the same position.
  200  */
  201 struct tcp_handler_data
  202 {
  203     /*
  204      * The region used to allocate all TCP connection related
  205      * data, including this structure.  This region is destroyed
  206      * when the connection is closed.
  207      */
  208     region_type*        region;
  209 
  210     /*
  211      * The global nsd structure.
  212      */
  213     struct nsd*         nsd;
  214 
  215     /*
  216      * The current query data for this TCP connection.
  217      */
  218     query_type*         query;
  219 
  220     /*
  221      * The query_state is used to remember if we are performing an
  222      * AXFR, if we're done processing, or if we should discard the
  223      * query and connection.
  224      */
  225     query_state_type    query_state;
  226 
  227     /*
  228      * The event for the file descriptor and tcp timeout
  229      */
  230     struct event event;
  231 
  232     /*
  233      * The bytes_transmitted field is used to remember the number
  234      * of bytes transmitted when receiving or sending a DNS
  235      * packet.  The count includes the two additional bytes used
  236      * to specify the packet length on a TCP connection.
  237      */
  238     size_t              bytes_transmitted;
  239 
  240     /*
  241      * The number of queries handled by this specific TCP connection.
  242      */
  243     int                 query_count;
  244     
  245     /*
  246      * The timeout in msec for this tcp connection
  247      */
  248     int tcp_timeout;
  249 
  250     /*
  251      * If the connection is allowed to have further queries on it.
  252      */
  253     int tcp_no_more_queries;
  254 
  255 #ifdef USE_DNSTAP
  256     /* the socket of the accept socket to find proper service (local) address the socket is bound to. */
  257     struct nsd_socket *socket;
  258 #endif /* USE_DNSTAP */
  259 
  260 #ifdef HAVE_SSL
  261     /*
  262      * TLS object.
  263      */
  264     SSL* tls;
  265 
  266     /*
  267      * TLS handshake state.
  268      */
  269     enum { tls_hs_none, tls_hs_read, tls_hs_write,
  270         tls_hs_read_event, tls_hs_write_event } shake_state;
  271 #endif
  272     /* list of connections, for service of remaining tcp channels */
  273     struct tcp_handler_data *prev, *next;
  274 };
  275 /* global that is the list of active tcp channels */
  276 static struct tcp_handler_data *tcp_active_list = NULL;
  277 
  278 /*
  279  * Handle incoming queries on the UDP server sockets.
  280  */
  281 static void handle_udp(int fd, short event, void* arg);
  282 
  283 /*
  284  * Handle incoming connections on the TCP sockets.  These handlers
  285  * usually wait for the NETIO_EVENT_READ event (indicating an incoming
  286  * connection) but are disabled when the number of current TCP
  287  * connections is equal to the maximum number of TCP connections.
  288  * Disabling is done by changing the handler to wait for the
  289  * NETIO_EVENT_NONE type.  This is done using the function
  290  * configure_tcp_accept_handlers.
  291  */
  292 static void handle_tcp_accept(int fd, short event, void* arg);
  293 
  294 /*
  295  * Handle incoming queries on a TCP connection.  The TCP connections
  296  * are configured to be non-blocking and the handler may be called
  297  * multiple times before a complete query is received.
  298  */
  299 static void handle_tcp_reading(int fd, short event, void* arg);
  300 
  301 /*
  302  * Handle outgoing responses on a TCP connection.  The TCP connections
  303  * are configured to be non-blocking and the handler may be called
  304  * multiple times before a complete response is sent.
  305  */
  306 static void handle_tcp_writing(int fd, short event, void* arg);
  307 
  308 #ifdef HAVE_SSL
  309 /* Create SSL object and associate fd */
  310 static SSL* incoming_ssl_fd(SSL_CTX* ctx, int fd);
  311 /*
  312  * Handle TLS handshake. May be called multiple times if incomplete.
  313  */
  314 static int tls_handshake(struct tcp_handler_data* data, int fd, int writing);
  315 
  316 /*
  317  * Handle incoming queries on a TLS over TCP connection.  The TLS
  318  * connections are configured to be non-blocking and the handler may
  319  * be called multiple times before a complete query is received.
  320  */
  321 static void handle_tls_reading(int fd, short event, void* arg);
  322 
  323 /*
  324  * Handle outgoing responses on a TLS over TCP connection.  The TLS
  325  * connections are configured to be non-blocking and the handler may
  326  * be called multiple times before a complete response is sent.
  327  */
  328 static void handle_tls_writing(int fd, short event, void* arg);
  329 #endif
  330 
  331 /*
  332  * Send all children the quit nonblocking, then close pipe.
  333  */
  334 static void send_children_quit(struct nsd* nsd);
  335 /* same, for shutdown time, waits for child to exit to avoid restart issues */
  336 static void send_children_quit_and_wait(struct nsd* nsd);
  337 
  338 /* set childrens flags to send NSD_STATS to them */
  339 #ifdef BIND8_STATS
  340 static void set_children_stats(struct nsd* nsd);
  341 #endif /* BIND8_STATS */
  342 
  343 /*
  344  * Change the event types the HANDLERS are interested in to EVENT_TYPES.
  345  */
  346 static void configure_handler_event_types(short event_types);
  347 
  348 static uint16_t *compressed_dname_offsets = 0;
  349 static uint32_t compression_table_capacity = 0;
  350 static uint32_t compression_table_size = 0;
  351 static domain_type* compressed_dnames[MAXRRSPP];
  352 
  353 #ifdef USE_TCP_FASTOPEN
  354 /* Checks to see if the kernel value must be manually changed in order for
  355    TCP Fast Open to support server mode */
  356 static void report_tcp_fastopen_config() {
  357 
  358     int tcp_fastopen_fp;
  359     uint8_t tcp_fastopen_value;
  360 
  361     if ( (tcp_fastopen_fp = open(TCP_FASTOPEN_FILE, O_RDONLY)) == -1 ) {
  362         log_msg(LOG_INFO,"Error opening " TCP_FASTOPEN_FILE ": %s\n", strerror(errno));
  363     }
  364     if (read(tcp_fastopen_fp, &tcp_fastopen_value, 1) == -1 ) {
  365         log_msg(LOG_INFO,"Error reading " TCP_FASTOPEN_FILE ": %s\n", strerror(errno));
  366         close(tcp_fastopen_fp);
  367     }
  368     if (!(tcp_fastopen_value & TCP_FASTOPEN_SERVER_BIT_MASK)) {
  369         log_msg(LOG_WARNING, "Error: TCP Fast Open support is available and configured in NSD by default.\n");
  370         log_msg(LOG_WARNING, "However the kernel paramenters are not configured to support TCP_FASTOPEN in server mode.\n");
  371         log_msg(LOG_WARNING, "To enable TFO use the command:");
  372         log_msg(LOG_WARNING, "  'sudo sysctl -w net.ipv4.tcp_fastopen=2' for pure server mode or\n");
  373         log_msg(LOG_WARNING, "  'sudo sysctl -w net.ipv4.tcp_fastopen=3' for both client and server mode\n");
  374         log_msg(LOG_WARNING, "NSD will not have TCP Fast Open available until this change is made.\n");
  375         close(tcp_fastopen_fp);
  376     }
  377     close(tcp_fastopen_fp);
  378 }
  379 #endif
  380 
  381 /*
  382  * Remove the specified pid from the list of child pids.  Returns -1 if
  383  * the pid is not in the list, child_num otherwise.  The field is set to 0.
  384  */
  385 static int
  386 delete_child_pid(struct nsd *nsd, pid_t pid)
  387 {
  388     size_t i;
  389     for (i = 0; i < nsd->child_count; ++i) {
  390         if (nsd->children[i].pid == pid) {
  391             nsd->children[i].pid = 0;
  392             if(!nsd->children[i].need_to_exit) {
  393                 if(nsd->children[i].child_fd != -1)
  394                     close(nsd->children[i].child_fd);
  395                 nsd->children[i].child_fd = -1;
  396                 if(nsd->children[i].handler)
  397                     nsd->children[i].handler->fd = -1;
  398             }
  399             return i;
  400         }
  401     }
  402     return -1;
  403 }
  404 
  405 /*
  406  * Restart child servers if necessary.
  407  */
  408 static int
  409 restart_child_servers(struct nsd *nsd, region_type* region, netio_type* netio,
  410     int* xfrd_sock_p)
  411 {
  412     struct main_ipc_handler_data *ipc_data;
  413     size_t i;
  414     int sv[2];
  415 
  416     /* Fork the child processes... */
  417     for (i = 0; i < nsd->child_count; ++i) {
  418         if (nsd->children[i].pid <= 0) {
  419             if (nsd->children[i].child_fd != -1)
  420                 close(nsd->children[i].child_fd);
  421             if (socketpair(AF_UNIX, SOCK_STREAM, 0, sv) == -1) {
  422                 log_msg(LOG_ERR, "socketpair: %s",
  423                     strerror(errno));
  424                 return -1;
  425             }
  426             nsd->children[i].child_fd = sv[0];
  427             nsd->children[i].parent_fd = sv[1];
  428             nsd->children[i].pid = fork();
  429             switch (nsd->children[i].pid) {
  430             default: /* SERVER MAIN */
  431                 close(nsd->children[i].parent_fd);
  432                 nsd->children[i].parent_fd = -1;
  433                 if (fcntl(nsd->children[i].child_fd, F_SETFL, O_NONBLOCK) == -1) {
  434                     log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno));
  435                 }
  436                 if(!nsd->children[i].handler)
  437                 {
  438                     ipc_data = (struct main_ipc_handler_data*) region_alloc(
  439                         region, sizeof(struct main_ipc_handler_data));
  440                     ipc_data->nsd = nsd;
  441                     ipc_data->child = &nsd->children[i];
  442                     ipc_data->child_num = i;
  443                     ipc_data->xfrd_sock = xfrd_sock_p;
  444                     ipc_data->packet = buffer_create(region, QIOBUFSZ);
  445                     ipc_data->forward_mode = 0;
  446                     ipc_data->got_bytes = 0;
  447                     ipc_data->total_bytes = 0;
  448                     ipc_data->acl_num = 0;
  449                     nsd->children[i].handler = (struct netio_handler*) region_alloc(
  450                         region, sizeof(struct netio_handler));
  451                     nsd->children[i].handler->fd = nsd->children[i].child_fd;
  452                     nsd->children[i].handler->timeout = NULL;
  453                     nsd->children[i].handler->user_data = ipc_data;
  454                     nsd->children[i].handler->event_types = NETIO_EVENT_READ;
  455                     nsd->children[i].handler->event_handler = parent_handle_child_command;
  456                     netio_add_handler(netio, nsd->children[i].handler);
  457                 }
  458                 /* clear any ongoing ipc */
  459                 ipc_data = (struct main_ipc_handler_data*)
  460                     nsd->children[i].handler->user_data;
  461                 ipc_data->forward_mode = 0;
  462                 /* restart - update fd */
  463                 nsd->children[i].handler->fd = nsd->children[i].child_fd;
  464                 break;
  465             case 0: /* CHILD */
  466                 /* the child need not be able to access the
  467                  * nsd.db file */
  468                 namedb_close_udb(nsd->db);
  469 #ifdef MEMCLEAN /* OS collects memory pages */
  470                 region_destroy(region);
  471 #endif
  472                 nsd->pid = 0;
  473                 nsd->child_count = 0;
  474                 nsd->server_kind = nsd->children[i].kind;
  475                 nsd->this_child = &nsd->children[i];
  476                 nsd->this_child->child_num = i;
  477                 /* remove signal flags inherited from parent
  478                    the parent will handle them. */
  479                 nsd->signal_hint_reload_hup = 0;
  480                 nsd->signal_hint_reload = 0;
  481                 nsd->signal_hint_child = 0;
  482                 nsd->signal_hint_quit = 0;
  483                 nsd->signal_hint_shutdown = 0;
  484                 nsd->signal_hint_stats = 0;
  485                 nsd->signal_hint_statsusr = 0;
  486                 close(*xfrd_sock_p);
  487                 close(nsd->this_child->child_fd);
  488                 nsd->this_child->child_fd = -1;
  489                 if (fcntl(nsd->this_child->parent_fd, F_SETFL, O_NONBLOCK) == -1) {
  490                     log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno));
  491                 }
  492                 server_child(nsd);
  493                 /* NOTREACH */
  494                 exit(0);
  495             case -1:
  496                 log_msg(LOG_ERR, "fork failed: %s",
  497                     strerror(errno));
  498                 return -1;
  499             }
  500         }
  501     }
  502     return 0;
  503 }
  504 
  505 #ifdef BIND8_STATS
  506 static void set_bind8_alarm(struct nsd* nsd)
  507 {
  508     /* resync so that the next alarm is on the next whole minute */
  509     if(nsd->st.period > 0) /* % by 0 gives divbyzero error */
  510         alarm(nsd->st.period - (time(NULL) % nsd->st.period));
  511 }
  512 #endif
  513 
  514 /* set zone stat ids for zones initially read in */
  515 static void
  516 zonestatid_tree_set(struct nsd* nsd)
  517 {
  518     struct radnode* n;
  519     for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) {
  520         zone_type* zone = (zone_type*)n->elem;
  521         zone->zonestatid = getzonestatid(nsd->options, zone->opts);
  522     }
  523 }
  524 
  525 #ifdef USE_ZONE_STATS
  526 void
  527 server_zonestat_alloc(struct nsd* nsd)
  528 {
  529     size_t num = (nsd->options->zonestatnames->count==0?1:
  530             nsd->options->zonestatnames->count);
  531     size_t sz = sizeof(struct nsdst)*num;
  532     char tmpfile[256];
  533     uint8_t z = 0;
  534 
  535     /* file names */
  536     nsd->zonestatfname[0] = 0;
  537     nsd->zonestatfname[1] = 0;
  538     snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.0",
  539         nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
  540     nsd->zonestatfname[0] = region_strdup(nsd->region, tmpfile);
  541     snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.zstat.1",
  542         nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
  543     nsd->zonestatfname[1] = region_strdup(nsd->region, tmpfile);
  544 
  545     /* file descriptors */
  546     nsd->zonestatfd[0] = open(nsd->zonestatfname[0], O_CREAT|O_RDWR, 0600);
  547     if(nsd->zonestatfd[0] == -1) {
  548         log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[0],
  549             strerror(errno));
  550         exit(1);
  551     }
  552     nsd->zonestatfd[1] = open(nsd->zonestatfname[1], O_CREAT|O_RDWR, 0600);
  553     if(nsd->zonestatfd[0] == -1) {
  554         log_msg(LOG_ERR, "cannot create %s: %s", nsd->zonestatfname[1],
  555             strerror(errno));
  556         close(nsd->zonestatfd[0]);
  557         unlink(nsd->zonestatfname[0]);
  558         exit(1);
  559     }
  560 
  561 #ifdef HAVE_MMAP
  562     if(lseek(nsd->zonestatfd[0], (off_t)sz-1, SEEK_SET) == -1) {
  563         log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[0],
  564             strerror(errno));
  565         exit(1);
  566     }
  567     if(write(nsd->zonestatfd[0], &z, 1) == -1) {
  568         log_msg(LOG_ERR, "cannot extend stat file %s (%s)",
  569             nsd->zonestatfname[0], strerror(errno));
  570         exit(1);
  571     }
  572     if(lseek(nsd->zonestatfd[1], (off_t)sz-1, SEEK_SET) == -1) {
  573         log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[1],
  574             strerror(errno));
  575         exit(1);
  576     }
  577     if(write(nsd->zonestatfd[1], &z, 1) == -1) {
  578         log_msg(LOG_ERR, "cannot extend stat file %s (%s)",
  579             nsd->zonestatfname[1], strerror(errno));
  580         exit(1);
  581     }
  582     nsd->zonestat[0] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE,
  583         MAP_SHARED, nsd->zonestatfd[0], 0);
  584     if(nsd->zonestat[0] == MAP_FAILED) {
  585         log_msg(LOG_ERR, "mmap failed: %s", strerror(errno));
  586         unlink(nsd->zonestatfname[0]);
  587         unlink(nsd->zonestatfname[1]);
  588         exit(1);
  589     }
  590     nsd->zonestat[1] = (struct nsdst*)mmap(NULL, sz, PROT_READ|PROT_WRITE,
  591         MAP_SHARED, nsd->zonestatfd[1], 0);
  592     if(nsd->zonestat[1] == MAP_FAILED) {
  593         log_msg(LOG_ERR, "mmap failed: %s", strerror(errno));
  594         unlink(nsd->zonestatfname[0]);
  595         unlink(nsd->zonestatfname[1]);
  596         exit(1);
  597     }
  598     memset(nsd->zonestat[0], 0, sz);
  599     memset(nsd->zonestat[1], 0, sz);
  600     nsd->zonestatsize[0] = num;
  601     nsd->zonestatsize[1] = num;
  602     nsd->zonestatdesired = num;
  603     nsd->zonestatsizenow = num;
  604     nsd->zonestatnow = nsd->zonestat[0];
  605 #endif /* HAVE_MMAP */
  606 }
  607 
  608 void
  609 zonestat_remap(struct nsd* nsd, int idx, size_t sz)
  610 {
  611 #ifdef HAVE_MMAP
  612 #ifdef MREMAP_MAYMOVE
  613     nsd->zonestat[idx] = (struct nsdst*)mremap(nsd->zonestat[idx],
  614         sizeof(struct nsdst)*nsd->zonestatsize[idx], sz,
  615         MREMAP_MAYMOVE);
  616     if(nsd->zonestat[idx] == MAP_FAILED) {
  617         log_msg(LOG_ERR, "mremap failed: %s", strerror(errno));
  618         exit(1);
  619     }
  620 #else /* !HAVE MREMAP */
  621     if(msync(nsd->zonestat[idx],
  622         sizeof(struct nsdst)*nsd->zonestatsize[idx], MS_ASYNC) != 0)
  623         log_msg(LOG_ERR, "msync failed: %s", strerror(errno));
  624     if(munmap(nsd->zonestat[idx],
  625         sizeof(struct nsdst)*nsd->zonestatsize[idx]) != 0)
  626         log_msg(LOG_ERR, "munmap failed: %s", strerror(errno));
  627     nsd->zonestat[idx] = (struct nsdst*)mmap(NULL, sz,
  628         PROT_READ|PROT_WRITE, MAP_SHARED, nsd->zonestatfd[idx], 0);
  629     if(nsd->zonestat[idx] == MAP_FAILED) {
  630         log_msg(LOG_ERR, "mmap failed: %s", strerror(errno));
  631         exit(1);
  632     }
  633 #endif /* MREMAP */
  634 #endif /* HAVE_MMAP */
  635 }
  636 
  637 /* realloc the zonestat array for the one that is not currently in use,
  638  * to match the desired new size of the array (if applicable) */
  639 void
  640 server_zonestat_realloc(struct nsd* nsd)
  641 {
  642 #ifdef HAVE_MMAP
  643     uint8_t z = 0;
  644     size_t sz;
  645     int idx = 0; /* index of the zonestat array that is not in use */
  646     if(nsd->zonestatnow == nsd->zonestat[0])
  647         idx = 1;
  648     if(nsd->zonestatsize[idx] == nsd->zonestatdesired)
  649         return;
  650     sz = sizeof(struct nsdst)*nsd->zonestatdesired;
  651     if(lseek(nsd->zonestatfd[idx], (off_t)sz-1, SEEK_SET) == -1) {
  652         log_msg(LOG_ERR, "lseek %s: %s", nsd->zonestatfname[idx],
  653             strerror(errno));
  654         exit(1);
  655     }
  656     if(write(nsd->zonestatfd[idx], &z, 1) == -1) {
  657         log_msg(LOG_ERR, "cannot extend stat file %s (%s)",
  658             nsd->zonestatfname[idx], strerror(errno));
  659         exit(1);
  660     }
  661     zonestat_remap(nsd, idx, sz);
  662     /* zero the newly allocated region */
  663     if(nsd->zonestatdesired > nsd->zonestatsize[idx]) {
  664         memset(((char*)nsd->zonestat[idx])+sizeof(struct nsdst) *
  665             nsd->zonestatsize[idx], 0, sizeof(struct nsdst) *
  666             (nsd->zonestatdesired - nsd->zonestatsize[idx]));
  667     }
  668     nsd->zonestatsize[idx] = nsd->zonestatdesired;
  669 #endif /* HAVE_MMAP */
  670 }
  671 
  672 /* switchover to use the other array for the new children, that
  673  * briefly coexist with the old children.  And we want to avoid them
  674  * both writing to the same statistics arrays. */
  675 void
  676 server_zonestat_switch(struct nsd* nsd)
  677 {
  678     if(nsd->zonestatnow == nsd->zonestat[0]) {
  679         nsd->zonestatnow = nsd->zonestat[1];
  680         nsd->zonestatsizenow = nsd->zonestatsize[1];
  681     } else {
  682         nsd->zonestatnow = nsd->zonestat[0];
  683         nsd->zonestatsizenow = nsd->zonestatsize[0];
  684     }
  685 }
  686 #endif /* USE_ZONE_STATS */
  687 
  688 static void
  689 cleanup_dname_compression_tables(void *ptr)
  690 {
  691     free(ptr);
  692     compressed_dname_offsets = NULL;
  693     compression_table_capacity = 0;
  694 }
  695 
  696 static void
  697 initialize_dname_compression_tables(struct nsd *nsd)
  698 {
  699     size_t needed = domain_table_count(nsd->db->domains) + 1;
  700     needed += EXTRA_DOMAIN_NUMBERS;
  701     if(compression_table_capacity < needed) {
  702         if(compressed_dname_offsets) {
  703             region_remove_cleanup(nsd->db->region,
  704                 cleanup_dname_compression_tables,
  705                 compressed_dname_offsets);
  706             free(compressed_dname_offsets);
  707         }
  708         compressed_dname_offsets = (uint16_t *) xmallocarray(
  709             needed, sizeof(uint16_t));
  710         region_add_cleanup(nsd->db->region, cleanup_dname_compression_tables,
  711             compressed_dname_offsets);
  712         compression_table_capacity = needed;
  713         compression_table_size=domain_table_count(nsd->db->domains)+1;
  714     }
  715     memset(compressed_dname_offsets, 0, needed * sizeof(uint16_t));
  716     compressed_dname_offsets[0] = QHEADERSZ; /* The original query name */
  717 }
  718 
  719 static int
  720 set_cloexec(struct nsd_socket *sock)
  721 {
  722     assert(sock != NULL);
  723 
  724     if(fcntl(sock->s, F_SETFD, FD_CLOEXEC) == -1) {
  725         const char *socktype =
  726             sock->addr.ai_family == SOCK_DGRAM ? "udp" : "tcp";
  727         log_msg(LOG_ERR, "fcntl(..., O_CLOEXEC) failed for %s: %s",
  728             socktype, strerror(errno));
  729         return -1;
  730     }
  731 
  732     return 1;
  733 }
  734 
  735 static int
  736 set_reuseport(struct nsd_socket *sock)
  737 {
  738 #ifdef SO_REUSEPORT
  739     int on = 1;
  740 #ifdef SO_REUSEPORT_LB
  741     /* FreeBSD 12 has SO_REUSEPORT_LB that does load balancing like
  742      * SO_REUSEPORT on Linux. This is what the users want with the config
  743      * option in nsd.conf; if we actually need local address and port reuse
  744      * they'll also need to have SO_REUSEPORT set for them, assume it was
  745      * _LB they want.
  746      */
  747     int opt = SO_REUSEPORT_LB;
  748     static const char optname[] = "SO_REUSEPORT_LB";
  749 #else /* !SO_REUSEPORT_LB */
  750     int opt = SO_REUSEPORT;
  751     static const char optname[] = "SO_REUSEPORT";
  752 #endif /* SO_REUSEPORT_LB */
  753 
  754     if (0 == setsockopt(sock->s, SOL_SOCKET, opt, &on, sizeof(on))) {
  755         return 1;
  756     } else if(verbosity >= 3 || errno != ENOPROTOOPT) {
  757         log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s",
  758             optname, strerror(errno));
  759     }
  760     return -1;
  761 #else
  762     (void)sock;
  763 #endif /* SO_REUSEPORT */
  764 
  765     return 0;
  766 }
  767 
  768 static int
  769 set_reuseaddr(struct nsd_socket *sock)
  770 {
  771 #ifdef SO_REUSEADDR
  772     int on = 1;
  773     if(setsockopt(sock->s, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) == 0) {
  774         return 1;
  775     }
  776     log_msg(LOG_ERR, "setsockopt(..., SO_REUSEADDR, ...) failed: %s",
  777         strerror(errno));
  778     return -1;
  779 #endif /* SO_REUSEADDR */
  780     return 0;
  781 }
  782 
  783 static int
  784 set_rcvbuf(struct nsd_socket *sock, int rcv)
  785 {
  786 #ifdef SO_RCVBUF
  787 #ifdef SO_RCVBUFFORCE
  788     if(0 == setsockopt(
  789         sock->s, SOL_SOCKET, SO_RCVBUFFORCE, &rcv, sizeof(rcv)))
  790     {
  791         return 1;
  792     }
  793     if(errno == EPERM || errno == ENOBUFS) {
  794         return 0;
  795     }
  796     log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUFFORCE, ...) failed: %s",
  797         strerror(errno));
  798     return -1;
  799 #else /* !SO_RCVBUFFORCE */
  800     if (0 == setsockopt(
  801         sock->s, SOL_SOCKET, SO_RCVBUF, &rcv, sizeof(rcv)))
  802     {
  803         return 1;
  804     }
  805     if(errno == ENOSYS || errno == ENOBUFS) {
  806         return 0;
  807     }
  808     log_msg(LOG_ERR, "setsockopt(..., SO_RCVBUF, ...) failed: %s",
  809         strerror(errno));
  810     return -1;
  811 #endif /* SO_RCVBUFFORCE */
  812 #endif /* SO_RCVBUF */
  813 
  814     return 0;
  815 }
  816 
  817 static int
  818 set_sndbuf(struct nsd_socket *sock, int snd)
  819 {
  820 #ifdef SO_SNDBUF
  821 #ifdef SO_SNDBUFFORCE
  822     if(0 == setsockopt(
  823         sock->s, SOL_SOCKET, SO_SNDBUFFORCE, &snd, sizeof(snd)))
  824     {
  825         return 1;
  826     }
  827     if(errno == EPERM || errno == ENOBUFS) {
  828         return 0;
  829     }
  830     log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUFFORCE, ...) failed: %s",
  831         strerror(errno));
  832     return -1;
  833 #else /* !SO_SNDBUFFORCE */
  834     if(0 == setsockopt(
  835         sock->s, SOL_SOCKET, SO_SNDBUF, &snd, sizeof(snd)))
  836     {
  837         return 1;
  838     }
  839     if(errno == ENOSYS || errno == ENOBUFS) {
  840         return 0;
  841     }
  842     log_msg(LOG_ERR, "setsockopt(..., SO_SNDBUF, ...) failed: %s",
  843         strerror(errno));
  844     return -1;
  845 #endif /* SO_SNDBUFFORCE */
  846 #endif /* SO_SNDBUF */
  847 
  848     return 0;
  849 }
  850 
  851 static int
  852 set_nonblock(struct nsd_socket *sock)
  853 {
  854     const char *socktype =
  855         sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp";
  856 
  857     if(fcntl(sock->s, F_SETFL, O_NONBLOCK) == -1) {
  858         log_msg(LOG_ERR, "fctnl(..., O_NONBLOCK) failed for %s: %s",
  859             socktype, strerror(errno));
  860         return -1;
  861     }
  862 
  863     return 1;
  864 }
  865 
  866 #ifdef INET6
  867 static int
  868 set_ipv6_v6only(struct nsd_socket *sock)
  869 {
  870 #ifdef IPV6_V6ONLY
  871     int on = 1;
  872     const char *socktype =
  873         sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp";
  874 
  875     if(0 == setsockopt(
  876         sock->s, IPPROTO_IPV6, IPV6_V6ONLY, &on, sizeof(on)))
  877     {
  878         return 1;
  879     }
  880 
  881     log_msg(LOG_ERR, "setsockopt(..., IPV6_V6ONLY, ...) failed for %s: %s",
  882         socktype, strerror(errno));
  883     return -1;
  884 #else
  885     (void)sock;
  886 #endif /* IPV6_V6ONLY */
  887 
  888     return 0;
  889 }
  890 #endif /* INET6 */
  891 
  892 #ifdef INET6
  893 static int
  894 set_ipv6_use_min_mtu(struct nsd_socket *sock)
  895 {
  896 #if defined(IPV6_USE_MIN_MTU) || defined(IPV6_MTU)
  897 #if defined(IPV6_USE_MIN_MTU)
  898     /* There is no fragmentation of IPv6 datagrams during forwarding in the
  899      * network. Therefore we do not send UDP datagrams larger than the
  900      * minimum IPv6 MTU of 1280 octets. The EDNS0 message length can be
  901      * larger if the network stack supports IPV6_USE_MIN_MTU.
  902      */
  903     int opt = IPV6_USE_MIN_MTU;
  904     int optval = 1;
  905     static const char optname[] = "IPV6_USE_MIN_MTU";
  906 #elif defined(IPV6_MTU)
  907     /* On Linux, PMTUD is disabled by default for datagrams so set the MTU
  908      * to the MIN MTU to get the same.
  909      */
  910     int opt = IPV6_MTU;
  911     int optval = IPV6_MIN_MTU;
  912     static const char optname[] = "IPV6_MTU";
  913 #endif
  914     if(0 == setsockopt(
  915         sock->s, IPPROTO_IPV6, opt, &optval, sizeof(optval)))
  916     {
  917         return 1;
  918     }
  919 
  920     log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed: %s",
  921         optname, strerror(errno));
  922     return -1;
  923 #else
  924     (void)sock;
  925 #endif /* INET6 */
  926 
  927     return 0;
  928 }
  929 #endif /* INET6 */
  930 
  931 static int
  932 set_ipv4_no_pmtu_disc(struct nsd_socket *sock)
  933 {
  934     int ret = 0;
  935 
  936 #if defined(IP_MTU_DISCOVER)
  937     int opt = IP_MTU_DISCOVER;
  938     int optval;
  939 # if defined(IP_PMTUDISC_OMIT)
  940     /* Linux 3.15 has IP_PMTUDISC_OMIT which makes sockets ignore PMTU
  941      * information and send packets with DF=0. Fragmentation is allowed if
  942      * and only if the packet size exceeds the outgoing interface MTU or
  943      * the packet encounters smaller MTU link in network. This mitigates
  944      * DNS fragmentation attacks by preventing forged PMTU information.
  945      * FreeBSD already has same semantics without setting the option.
  946      */
  947     optval = IP_PMTUDISC_OMIT;
  948     if(0 == setsockopt(
  949         sock->s, IPPROTO_IP, opt, &optval, sizeof(optval)))
  950     {
  951         return 1;
  952     }
  953 
  954     log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s",
  955         "IP_MTU_DISCOVER", "IP_PMTUDISC_OMIT", strerror(errno));
  956 # endif /* IP_PMTUDISC_OMIT */
  957 # if defined(IP_PMTUDISC_DONT)
  958     /* Use IP_PMTUDISC_DONT if IP_PMTUDISC_OMIT failed / undefined. */
  959     optval = IP_PMTUDISC_DONT;
  960     if(0 == setsockopt(
  961         sock->s, IPPROTO_IP, opt, &optval, sizeof(optval)))
  962     {
  963         return 1;
  964     }
  965 
  966     log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s",
  967         "IP_MTU_DISCOVER", "IP_PMTUDISC_DONT", strerror(errno));
  968 # endif
  969     ret = -1;
  970 #elif defined(IP_DONTFRAG)
  971     int off = 0;
  972     if (0 == setsockopt(
  973         sock->s, IPPROTO_IP, IP_DONTFRAG, &off, sizeof(off)))
  974     {
  975         return 1;
  976     }
  977 
  978     log_msg(LOG_ERR, "setsockopt(..., IP_DONTFRAG, ...) failed: %s",
  979         strerror(errno));
  980     ret = -1;
  981 #else
  982     (void)sock;
  983 #endif
  984 
  985     return ret;
  986 }
  987 
  988 static int
  989 set_ip_freebind(struct nsd_socket *sock)
  990 {
  991 #ifdef IP_FREEBIND
  992     int on = 1;
  993     const char *socktype =
  994         sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp";
  995     if(setsockopt(sock->s, IPPROTO_IP, IP_FREEBIND, &on, sizeof(on)) == 0)
  996     {
  997         return 1;
  998     }
  999     log_msg(LOG_ERR, "setsockopt(..., IP_FREEBIND, ...) failed for %s: %s",
 1000         socktype, strerror(errno));
 1001     return -1;
 1002 #else
 1003     (void)sock;
 1004 #endif /* IP_FREEBIND */
 1005 
 1006     return 0;
 1007 }
 1008 
 1009 static int
 1010 set_ip_transparent(struct nsd_socket *sock)
 1011 {
 1012     /*
 1013     The scandalous preprocessor blob here calls for some explanation :)
 1014     POSIX does not specify an option to bind non-local IPs, so
 1015     platforms developed several implementation-specific options,
 1016     all set in the same way, but with different names.
 1017     For additional complexity, some platform manage this setting
 1018     differently for different address families (IPv4 vs IPv6).
 1019     This scandalous preprocessor blob below abstracts such variability
 1020     in the way which leaves the C code as lean and clear as possible.
 1021     */
 1022 
 1023 #if defined(IP_TRANSPARENT)
 1024 #   define NSD_SOCKET_OPTION_TRANSPARENT                        IP_TRANSPARENT
 1025 #   define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL       IPPROTO_IP
 1026 #   define NSD_SOCKET_OPTION_TRANSPARENT_NAME           "IP_TRANSPARENT"
 1027 // as of 2020-01, Linux does not support this on IPv6 programmatically
 1028 #elif defined(SO_BINDANY)
 1029 #   define NSD_SOCKET_OPTION_TRANSPARENT                        SO_BINDANY
 1030 #   define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL       SOL_SOCKET
 1031 #   define NSD_SOCKET_OPTION_TRANSPARENT_NAME           "SO_BINDANY"
 1032 #elif defined(IP_BINDANY)
 1033 #   define NSD_SOCKET_OPTION_TRANSPARENT                        IP_BINDANY
 1034 #   define NSD_SOCKET_OPTION_TRANSPARENT6                       IPV6_BINDANY
 1035 #   define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL       IPPROTO_IP
 1036 #   define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6  IPPROTO_IPV6
 1037 #   define NSD_SOCKET_OPTION_TRANSPARENT_NAME           "IP_BINDANY"
 1038 #endif
 1039 
 1040 #ifndef NSD_SOCKET_OPTION_TRANSPARENT
 1041     (void)sock;
 1042 #else
 1043 #   ifndef NSD_SOCKET_OPTION_TRANSPARENT6
 1044 #       define NSD_SOCKET_OPTION_TRANSPARENT6 NSD_SOCKET_OPTION_TRANSPARENT
 1045 #   endif
 1046 #   ifndef NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6
 1047 #       define NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL
 1048 #   endif
 1049 #   ifndef NSD_SOCKET_OPTION_TRANSPARENT_NAME6
 1050 #       define NSD_SOCKET_OPTION_TRANSPARENT_NAME6 NSD_SOCKET_OPTION_TRANSPARENT_NAME
 1051 #   endif
 1052 
 1053     int on = 1;
 1054     const char *socktype =
 1055         sock->addr.ai_socktype == SOCK_DGRAM ? "udp" : "tcp";
 1056     const int is_ip6 = (sock->addr.ai_family == AF_INET6);
 1057 
 1058     if(0 == setsockopt(
 1059         sock->s,
 1060         is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL6 : NSD_SOCKET_OPTION_TRANSPARENT_OPTLEVEL,
 1061         is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT6 : NSD_SOCKET_OPTION_TRANSPARENT,
 1062         &on, sizeof(on)))
 1063     {
 1064         return 1;
 1065     }
 1066 
 1067     log_msg(LOG_ERR, "setsockopt(..., %s, ...) failed for %s: %s",
 1068         is_ip6 ? NSD_SOCKET_OPTION_TRANSPARENT_NAME6 : NSD_SOCKET_OPTION_TRANSPARENT_NAME, socktype, strerror(errno));
 1069     return -1;
 1070 #endif
 1071 
 1072     return 0;
 1073 }
 1074 
 1075 static int
 1076 set_tcp_maxseg(struct nsd_socket *sock, int mss)
 1077 {
 1078 #if defined(IPPROTO_TCP) && defined(TCP_MAXSEG)
 1079     if(setsockopt(sock->s, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == 0) {
 1080         return 1;
 1081     }
 1082     log_msg(LOG_ERR, "setsockopt(..., TCP_MAXSEG, ...) failed for tcp: %s",
 1083         strerror(errno));
 1084     return -1;
 1085 #else
 1086     log_msg(LOG_ERR, "setsockopt(TCP_MAXSEG) unsupported");
 1087 #endif
 1088     return 0;
 1089 }
 1090 
 1091 #ifdef USE_TCP_FASTOPEN
 1092 static int
 1093 set_tcp_fastopen(struct nsd_socket *sock)
 1094 {
 1095     /* qlen specifies how many outstanding TFO requests to allow. Limit is
 1096      * a defense against IP spoofing attacks as suggested in RFC7413.
 1097      */
 1098     int qlen;
 1099 
 1100 #ifdef __APPLE__
 1101     /* macOS X implementation only supports qlen of 1 via this call. The
 1102      * actual value is configured by the net.inet.tcp.fastopen_backlog
 1103      * kernel parameter.
 1104      */
 1105     qlen = 1;
 1106 #else
 1107     /* 5 is recommended on Linux. */
 1108     qlen = 5;
 1109 #endif
 1110     if (0 == setsockopt(
 1111         sock->s, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)))
 1112     {
 1113         return 1;
 1114     }
 1115 
 1116     if (errno == EPERM) {
 1117         log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s "
 1118                  "; this could likely be because sysctl "
 1119                  "net.inet.tcp.fastopen.enabled, "
 1120                  "net.inet.tcp.fastopen.server_enable, or "
 1121                  "net.ipv4.tcp_fastopen is disabled",
 1122             strerror(errno));
 1123     /* Squelch ENOPROTOOPT: FreeBSD server mode with kernel support
 1124      * disabled, except when verbosity enabled for debugging
 1125      */
 1126     } else if(errno != ENOPROTOOPT || verbosity >= 3) {
 1127         log_msg(LOG_ERR, "Setting TCP Fast Open as server failed: %s",
 1128             strerror(errno));
 1129     }
 1130 
 1131     return (errno == ENOPROTOOPT ? 0 : -1);
 1132 }
 1133 #endif /* USE_TCP_FASTOPEN */
 1134 
 1135 static int
 1136 set_bindtodevice(struct nsd_socket *sock)
 1137 {
 1138 #if defined(SO_BINDTODEVICE)
 1139     if(setsockopt(sock->s, SOL_SOCKET, SO_BINDTODEVICE,
 1140         sock->device, strlen(sock->device)) == -1)
 1141     {
 1142         log_msg(LOG_ERR, "setsockopt(..., %s, %s, ...) failed: %s",
 1143                          "SO_BINDTODEVICE", sock->device, strerror(errno));
 1144         return -1;
 1145     }
 1146 
 1147     return 1;
 1148 #else
 1149     (void)sock;
 1150     return 0;
 1151 #endif
 1152 }
 1153 
 1154 static int
 1155 set_setfib(struct nsd_socket *sock)
 1156 {
 1157 #if defined(SO_SETFIB)
 1158     if(setsockopt(sock->s, SOL_SOCKET, SO_SETFIB,
 1159                   (const void *)&sock->fib, sizeof(sock->fib)) == -1)
 1160     {
 1161         log_msg(LOG_ERR, "setsockopt(..., %s, %d, ...) failed: %s",
 1162                          "SO_SETFIB", sock->fib, strerror(errno));
 1163         return -1;
 1164     }
 1165 
 1166     return 1;
 1167 #else
 1168     (void)sock;
 1169     return 0;
 1170 #endif
 1171 }
 1172 
 1173 static int
 1174 open_udp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works)
 1175 {
 1176     int rcv = 1*1024*1024, snd = 1*1024*1024;
 1177 
 1178     if(-1 == (sock->s = socket(
 1179         sock->addr.ai_family, sock->addr.ai_socktype, 0)))
 1180     {
 1181 #ifdef INET6
 1182         if((sock->flags & NSD_SOCKET_IS_OPTIONAL) &&
 1183            (sock->addr.ai_family == AF_INET6) &&
 1184            (errno == EAFNOSUPPORT))
 1185         {
 1186             log_msg(LOG_WARNING, "fallback to UDP4, no IPv6: "
 1187                 "not supported");
 1188             return 0;
 1189         }
 1190 #endif
 1191         log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno));
 1192         return -1;
 1193     }
 1194 
 1195     set_cloexec(sock);
 1196 
 1197     if(nsd->reuseport && reuseport_works && *reuseport_works)
 1198         *reuseport_works = (set_reuseport(sock) == 1);
 1199 
 1200     if(nsd->options->receive_buffer_size > 0)
 1201         rcv = nsd->options->receive_buffer_size;
 1202     if(set_rcvbuf(sock, rcv) == -1)
 1203         return -1;
 1204 
 1205     if(nsd->options->send_buffer_size > 0)
 1206         snd = nsd->options->send_buffer_size;
 1207     if(set_sndbuf(sock, snd) == -1)
 1208         return -1;
 1209 #ifdef INET6
 1210     if(sock->addr.ai_family == AF_INET6) {
 1211         if(set_ipv6_v6only(sock) == -1 ||
 1212            set_ipv6_use_min_mtu(sock) == -1)
 1213             return -1;
 1214     } else
 1215 #endif /* INET6 */
 1216     if(sock->addr.ai_family == AF_INET) {
 1217         if(set_ipv4_no_pmtu_disc(sock) == -1)
 1218             return -1;
 1219     }
 1220 
 1221     /* Set socket to non-blocking. Otherwise, on operating systems
 1222      * with thundering herd problems, the UDP recv could block
 1223      * after select returns readable.
 1224      */
 1225     set_nonblock(sock);
 1226 
 1227     if(nsd->options->ip_freebind)
 1228         (void)set_ip_freebind(sock);
 1229     if(nsd->options->ip_transparent)
 1230         (void)set_ip_transparent(sock);
 1231     if((sock->flags & NSD_BIND_DEVICE) && set_bindtodevice(sock) == -1)
 1232         return -1;
 1233     if(sock->fib != -1 && set_setfib(sock) == -1)
 1234         return -1;
 1235 
 1236     if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) {
 1237         char buf[256];
 1238         addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf));
 1239         log_msg(LOG_ERR, "can't bind udp socket %s: %s",
 1240             buf, strerror(errno));
 1241         return -1;
 1242     }
 1243 
 1244     return 1;
 1245 }
 1246 
 1247 static int
 1248 open_tcp_socket(struct nsd *nsd, struct nsd_socket *sock, int *reuseport_works)
 1249 {
 1250 #ifdef USE_TCP_FASTOPEN
 1251     report_tcp_fastopen_config();
 1252 #endif
 1253 
 1254     (void)reuseport_works;
 1255 
 1256     if(-1 == (sock->s = socket(
 1257         sock->addr.ai_family, sock->addr.ai_socktype, 0)))
 1258     {
 1259 #ifdef INET6
 1260         if((sock->flags & NSD_SOCKET_IS_OPTIONAL) &&
 1261            (sock->addr.ai_family == AF_INET6) &&
 1262            (errno == EAFNOSUPPORT))
 1263         {
 1264             log_msg(LOG_WARNING, "fallback to TCP4, no IPv6: "
 1265                                  "not supported");
 1266             return 0;
 1267         }
 1268 #endif /* INET6 */
 1269         log_msg(LOG_ERR, "can't create a socket: %s", strerror(errno));
 1270         return -1;
 1271     }
 1272 
 1273     set_cloexec(sock);
 1274 
 1275     if(nsd->reuseport && reuseport_works && *reuseport_works)
 1276         *reuseport_works = (set_reuseport(sock) == 1);
 1277 
 1278     (void)set_reuseaddr(sock);
 1279 
 1280 #ifdef INET6
 1281     if(sock->addr.ai_family == AF_INET6) {
 1282         if (set_ipv6_v6only(sock) == -1 ||
 1283             set_ipv6_use_min_mtu(sock) == -1)
 1284             return -1;
 1285     }
 1286 #endif
 1287 
 1288     if(nsd->tcp_mss > 0)
 1289         set_tcp_maxseg(sock, nsd->tcp_mss);
 1290     /* (StevensUNP p463), if TCP listening socket is blocking, then
 1291        it may block in accept, even if select() says readable. */
 1292     (void)set_nonblock(sock);
 1293     if(nsd->options->ip_freebind)
 1294         (void)set_ip_freebind(sock);
 1295     if(nsd->options->ip_transparent)
 1296         (void)set_ip_transparent(sock);
 1297     if((sock->flags & NSD_BIND_DEVICE) && set_bindtodevice(sock) == -1)
 1298         return -1;
 1299     if(sock->fib != -1 && set_setfib(sock) == -1)
 1300         return -1;
 1301 
 1302     if(bind(sock->s, (struct sockaddr *)&sock->addr.ai_addr, sock->addr.ai_addrlen) == -1) {
 1303         char buf[256];
 1304         addrport2str((void*)&sock->addr.ai_addr, buf, sizeof(buf));
 1305         log_msg(LOG_ERR, "can't bind tcp socket %s: %s",
 1306             buf, strerror(errno));
 1307         return -1;
 1308     }
 1309 
 1310 #ifdef USE_TCP_FASTOPEN
 1311     (void)set_tcp_fastopen(sock);
 1312 #endif
 1313 
 1314     if(listen(sock->s, TCP_BACKLOG) == -1) {
 1315         log_msg(LOG_ERR, "can't listen: %s", strerror(errno));
 1316         return -1;
 1317     }
 1318 
 1319     return 1;
 1320 }
 1321 
 1322 /*
 1323  * Initialize the server, reuseport, create and bind the sockets.
 1324  */
 1325 int
 1326 server_init(struct nsd *nsd)
 1327 {
 1328     size_t i;
 1329     int reuseport = 1; /* Determine if REUSEPORT works. */
 1330 
 1331     /* open server interface ports */
 1332     for(i = 0; i < nsd->ifs; i++) {
 1333         if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1 ||
 1334            open_tcp_socket(nsd, &nsd->tcp[i], &reuseport) == -1)
 1335         {
 1336             return -1;
 1337         }
 1338     }
 1339 
 1340     if(nsd->reuseport && reuseport) {
 1341         size_t ifs = nsd->ifs * nsd->reuseport;
 1342 
 1343         /* increase the size of the interface arrays, there are going
 1344          * to be separate interface file descriptors for every server
 1345          * instance */
 1346         region_remove_cleanup(nsd->region, free, nsd->udp);
 1347         region_remove_cleanup(nsd->region, free, nsd->tcp);
 1348 
 1349         nsd->udp = xrealloc(nsd->udp, ifs * sizeof(*nsd->udp));
 1350         nsd->tcp = xrealloc(nsd->tcp, ifs * sizeof(*nsd->tcp));
 1351         region_add_cleanup(nsd->region, free, nsd->udp);
 1352         region_add_cleanup(nsd->region, free, nsd->tcp);
 1353         if(ifs > nsd->ifs) {
 1354             memset(&nsd->udp[nsd->ifs], 0,
 1355                 (ifs-nsd->ifs)*sizeof(*nsd->udp));
 1356             memset(&nsd->tcp[nsd->ifs], 0,
 1357                 (ifs-nsd->ifs)*sizeof(*nsd->tcp));
 1358         }
 1359 
 1360         for(i = nsd->ifs; i < ifs; i++) {
 1361             nsd->udp[i] = nsd->udp[i%nsd->ifs];
 1362             nsd->udp[i].s = -1;
 1363             if(open_udp_socket(nsd, &nsd->udp[i], &reuseport) == -1) {
 1364                 return -1;
 1365             }
 1366             /* Turn off REUSEPORT for TCP by copying the socket
 1367              * file descriptor.
 1368              * This means we should not close TCP used by
 1369              * other servers in reuseport enabled mode, in
 1370              * server_child().
 1371              */
 1372             nsd->tcp[i] = nsd->tcp[i%nsd->ifs];
 1373         }
 1374 
 1375         nsd->ifs = ifs;
 1376     } else {
 1377         nsd->reuseport = 0;
 1378     }
 1379 
 1380     return 0;
 1381 }
 1382 
 1383 /*
 1384  * Prepare the server for take off.
 1385  *
 1386  */
 1387 int
 1388 server_prepare(struct nsd *nsd)
 1389 {
 1390 #ifdef RATELIMIT
 1391     /* set secret modifier for hashing (udb ptr buckets and rate limits) */
 1392 #ifdef HAVE_GETRANDOM
 1393     uint32_t v;
 1394     if(getrandom(&v, sizeof(v), 0) == -1) {
 1395         log_msg(LOG_ERR, "getrandom failed: %s", strerror(errno));
 1396         exit(1);
 1397     }
 1398     hash_set_raninit(v);
 1399 #elif defined(HAVE_ARC4RANDOM)
 1400     hash_set_raninit(arc4random());
 1401 #else
 1402     uint32_t v = getpid() ^ time(NULL);
 1403     srandom((unsigned long)v);
 1404 #  ifdef HAVE_SSL
 1405     if(RAND_status() && RAND_bytes((unsigned char*)&v, sizeof(v)) > 0)
 1406         hash_set_raninit(v);
 1407     else
 1408 #  endif
 1409         hash_set_raninit(random());
 1410 #endif
 1411     rrl_mmap_init(nsd->child_count, nsd->options->rrl_size,
 1412         nsd->options->rrl_ratelimit,
 1413         nsd->options->rrl_whitelist_ratelimit,
 1414         nsd->options->rrl_slip,
 1415         nsd->options->rrl_ipv4_prefix_length,
 1416         nsd->options->rrl_ipv6_prefix_length);
 1417 #endif /* RATELIMIT */
 1418 
 1419     /* Open the database... */
 1420     if ((nsd->db = namedb_open(nsd->dbfile, nsd->options)) == NULL) {
 1421         log_msg(LOG_ERR, "unable to open the database %s: %s",
 1422             nsd->dbfile, strerror(errno));
 1423         unlink(nsd->task[0]->fname);
 1424         unlink(nsd->task[1]->fname);
 1425 #ifdef USE_ZONE_STATS
 1426         unlink(nsd->zonestatfname[0]);
 1427         unlink(nsd->zonestatfname[1]);
 1428 #endif
 1429         xfrd_del_tempdir(nsd);
 1430         return -1;
 1431     }
 1432     /* check if zone files have been modified */
 1433     /* NULL for taskudb because we send soainfo in a moment, batched up,
 1434      * for all zones */
 1435     if(nsd->options->zonefiles_check || (nsd->options->database == NULL ||
 1436         nsd->options->database[0] == 0))
 1437         namedb_check_zonefiles(nsd, nsd->options, NULL, NULL);
 1438     zonestatid_tree_set(nsd);
 1439 
 1440     compression_table_capacity = 0;
 1441     initialize_dname_compression_tables(nsd);
 1442 
 1443 #ifdef  BIND8_STATS
 1444     /* Initialize times... */
 1445     time(&nsd->st.boot);
 1446     set_bind8_alarm(nsd);
 1447 #endif /* BIND8_STATS */
 1448 
 1449     return 0;
 1450 }
 1451 
 1452 /*
 1453  * Fork the required number of servers.
 1454  */
 1455 static int
 1456 server_start_children(struct nsd *nsd, region_type* region, netio_type* netio,
 1457     int* xfrd_sock_p)
 1458 {
 1459     size_t i;
 1460 
 1461     /* Start all child servers initially.  */
 1462     for (i = 0; i < nsd->child_count; ++i) {
 1463         nsd->children[i].pid = 0;
 1464     }
 1465 
 1466     return restart_child_servers(nsd, region, netio, xfrd_sock_p);
 1467 }
 1468 
 1469 static void
 1470 server_close_socket(struct nsd_socket *sock)
 1471 {
 1472     if(sock->s != -1) {
 1473         close(sock->s);
 1474         sock->s = -1;
 1475     }
 1476 }
 1477 
 1478 void
 1479 server_close_all_sockets(struct nsd_socket sockets[], size_t n)
 1480 {
 1481     size_t i;
 1482 
 1483     /* Close all the sockets... */
 1484     for (i = 0; i < n; ++i) {
 1485         server_close_socket(&sockets[i]);
 1486     }
 1487 }
 1488 
 1489 /*
 1490  * Close the sockets, shutdown the server and exit.
 1491  * Does not return.
 1492  */
 1493 void
 1494 server_shutdown(struct nsd *nsd)
 1495 {
 1496     size_t i;
 1497 
 1498     server_close_all_sockets(nsd->udp, nsd->ifs);
 1499     server_close_all_sockets(nsd->tcp, nsd->ifs);
 1500     /* CHILD: close command channel to parent */
 1501     if(nsd->this_child && nsd->this_child->parent_fd != -1)
 1502     {
 1503         close(nsd->this_child->parent_fd);
 1504         nsd->this_child->parent_fd = -1;
 1505     }
 1506     /* SERVER: close command channels to children */
 1507     if(!nsd->this_child)
 1508     {
 1509         for(i=0; i < nsd->child_count; ++i)
 1510             if(nsd->children[i].child_fd != -1)
 1511             {
 1512                 close(nsd->children[i].child_fd);
 1513                 nsd->children[i].child_fd = -1;
 1514             }
 1515     }
 1516 
 1517     tsig_finalize();
 1518 #ifdef HAVE_SSL
 1519     daemon_remote_delete(nsd->rc); /* ssl-delete secret keys */
 1520     if (nsd->tls_ctx)
 1521         SSL_CTX_free(nsd->tls_ctx);
 1522 #endif
 1523 
 1524 #ifdef MEMCLEAN /* OS collects memory pages */
 1525 #ifdef RATELIMIT
 1526     rrl_mmap_deinit_keep_mmap();
 1527 #endif
 1528 #ifdef USE_DNSTAP
 1529     dt_collector_destroy(nsd->dt_collector, nsd);
 1530 #endif
 1531     udb_base_free_keep_mmap(nsd->task[0]);
 1532     udb_base_free_keep_mmap(nsd->task[1]);
 1533     namedb_close_udb(nsd->db); /* keeps mmap */
 1534     namedb_close(nsd->db);
 1535     nsd_options_destroy(nsd->options);
 1536     region_destroy(nsd->region);
 1537 #endif
 1538     log_finalize();
 1539     exit(0);
 1540 }
 1541 
 1542 void
 1543 server_prepare_xfrd(struct nsd* nsd)
 1544 {
 1545     char tmpfile[256];
 1546     /* create task mmaps */
 1547     nsd->mytask = 0;
 1548     snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.0",
 1549         nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
 1550     nsd->task[0] = task_file_create(tmpfile);
 1551     if(!nsd->task[0]) {
 1552 #ifdef USE_ZONE_STATS
 1553         unlink(nsd->zonestatfname[0]);
 1554         unlink(nsd->zonestatfname[1]);
 1555 #endif
 1556         xfrd_del_tempdir(nsd);
 1557         exit(1);
 1558     }
 1559     snprintf(tmpfile, sizeof(tmpfile), "%snsd-xfr-%d/nsd.%u.task.1",
 1560         nsd->options->xfrdir, (int)getpid(), (unsigned)getpid());
 1561     nsd->task[1] = task_file_create(tmpfile);
 1562     if(!nsd->task[1]) {
 1563         unlink(nsd->task[0]->fname);
 1564 #ifdef USE_ZONE_STATS
 1565         unlink(nsd->zonestatfname[0]);
 1566         unlink(nsd->zonestatfname[1]);
 1567 #endif
 1568         xfrd_del_tempdir(nsd);
 1569         exit(1);
 1570     }
 1571     assert(udb_base_get_userdata(nsd->task[0])->data == 0);
 1572     assert(udb_base_get_userdata(nsd->task[1])->data == 0);
 1573     /* create xfrd listener structure */
 1574     nsd->xfrd_listener = region_alloc(nsd->region,
 1575         sizeof(netio_handler_type));
 1576     nsd->xfrd_listener->user_data = (struct ipc_handler_conn_data*)
 1577         region_alloc(nsd->region, sizeof(struct ipc_handler_conn_data));
 1578     nsd->xfrd_listener->fd = -1;
 1579     ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->nsd =
 1580         nsd;
 1581     ((struct ipc_handler_conn_data*)nsd->xfrd_listener->user_data)->conn =
 1582         xfrd_tcp_create(nsd->region, QIOBUFSZ);
 1583 }
 1584 
 1585 
 1586 void
 1587 server_start_xfrd(struct nsd *nsd, int del_db, int reload_active)
 1588 {
 1589     pid_t pid;
 1590     int sockets[2] = {0,0};
 1591     struct ipc_handler_conn_data *data;
 1592 
 1593     if(nsd->xfrd_listener->fd != -1)
 1594         close(nsd->xfrd_listener->fd);
 1595     if(del_db) {
 1596         /* recreate taskdb that xfrd was using, it may be corrupt */
 1597         /* we (or reload) use nsd->mytask, and xfrd uses the other */
 1598         char* tmpfile = nsd->task[1-nsd->mytask]->fname;
 1599         nsd->task[1-nsd->mytask]->fname = NULL;
 1600         /* free alloc already, so udb does not shrink itself */
 1601         udb_alloc_delete(nsd->task[1-nsd->mytask]->alloc);
 1602         nsd->task[1-nsd->mytask]->alloc = NULL;
 1603         udb_base_free(nsd->task[1-nsd->mytask]);
 1604         /* create new file, overwrite the old one */
 1605         nsd->task[1-nsd->mytask] = task_file_create(tmpfile);
 1606         free(tmpfile);
 1607     }
 1608     if (socketpair(AF_UNIX, SOCK_STREAM, 0, sockets) == -1) {
 1609         log_msg(LOG_ERR, "startxfrd failed on socketpair: %s", strerror(errno));
 1610         return;
 1611     }
 1612     pid = fork();
 1613     switch (pid) {
 1614     case -1:
 1615         log_msg(LOG_ERR, "fork xfrd failed: %s", strerror(errno));
 1616         break;
 1617     default:
 1618         /* PARENT: close first socket, use second one */
 1619         close(sockets[0]);
 1620         if (fcntl(sockets[1], F_SETFL, O_NONBLOCK) == -1) {
 1621             log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno));
 1622         }
 1623         if(del_db) xfrd_free_namedb(nsd);
 1624         /* use other task than I am using, since if xfrd died and is
 1625          * restarted, the reload is using nsd->mytask */
 1626         nsd->mytask = 1 - nsd->mytask;
 1627 
 1628 #ifdef HAVE_SETPROCTITLE
 1629         setproctitle("xfrd");
 1630 #endif
 1631 #ifdef HAVE_CPUSET_T
 1632         if(nsd->use_cpu_affinity) {
 1633             set_cpu_affinity(nsd->xfrd_cpuset);
 1634         }
 1635 #endif
 1636 
 1637         xfrd_init(sockets[1], nsd, del_db, reload_active, pid);
 1638         /* ENOTREACH */
 1639         break;
 1640     case 0:
 1641         /* CHILD: close second socket, use first one */
 1642         close(sockets[1]);
 1643         if (fcntl(sockets[0], F_SETFL, O_NONBLOCK) == -1) {
 1644             log_msg(LOG_ERR, "cannot fcntl pipe: %s", strerror(errno));
 1645         }
 1646         nsd->xfrd_listener->fd = sockets[0];
 1647         break;
 1648     }
 1649     /* server-parent only */
 1650     nsd->xfrd_listener->timeout = NULL;
 1651     nsd->xfrd_listener->event_types = NETIO_EVENT_READ;
 1652     nsd->xfrd_listener->event_handler = parent_handle_xfrd_command;
 1653     /* clear ongoing ipc reads */
 1654     data = (struct ipc_handler_conn_data *) nsd->xfrd_listener->user_data;
 1655     data->conn->is_reading = 0;
 1656 }
 1657 
 1658 /** add all soainfo to taskdb */
 1659 static void
 1660 add_all_soa_to_task(struct nsd* nsd, struct udb_base* taskudb)
 1661 {
 1662     struct radnode* n;
 1663     udb_ptr task_last; /* last task, mytask is empty so NULL */
 1664     /* add all SOA INFO to mytask */
 1665     udb_ptr_init(&task_last, taskudb);
 1666     for(n=radix_first(nsd->db->zonetree); n; n=radix_next(n)) {
 1667         task_new_soainfo(taskudb, &task_last, (zone_type*)n->elem, 0);
 1668     }
 1669     udb_ptr_unlink(&task_last, taskudb);
 1670 }
 1671 
 1672 void
 1673 server_send_soa_xfrd(struct nsd* nsd, int shortsoa)
 1674 {
 1675     /* normally this exchanges the SOA from nsd->xfrd and the expire back.
 1676      *   parent fills one taskdb with soas, xfrd fills other with expires.
 1677      *   then they exchange and process.
 1678      * shortsoa: xfrd crashes and needs to be restarted and one taskdb
 1679      *   may be in use by reload.  Fill SOA in taskdb and give to xfrd.
 1680      *   expire notifications can be sent back via a normal reload later
 1681      *   (xfrd will wait for current running reload to finish if any).
 1682      */
 1683     sig_atomic_t cmd = 0;
 1684     pid_t mypid;
 1685     int xfrd_sock = nsd->xfrd_listener->fd;
 1686     struct udb_base* taskudb = nsd->task[nsd->mytask];
 1687     udb_ptr t;
 1688     if(!shortsoa) {
 1689         if(nsd->signal_hint_shutdown) {
 1690         shutdown:
 1691             log_msg(LOG_WARNING, "signal received, shutting down...");
 1692             server_close_all_sockets(nsd->udp, nsd->ifs);
 1693             server_close_all_sockets(nsd->tcp, nsd->ifs);
 1694 #ifdef HAVE_SSL
 1695             daemon_remote_close(nsd->rc);
 1696 #endif
 1697             /* Unlink it if possible... */
 1698             unlinkpid(nsd->pidfile);
 1699             unlink(nsd->task[0]->fname);
 1700             unlink(nsd->task[1]->fname);
 1701 #ifdef USE_ZONE_STATS
 1702             unlink(nsd->zonestatfname[0]);
 1703             unlink(nsd->zonestatfname[1]);
 1704 #endif
 1705             /* write the nsd.db to disk, wait for it to complete */
 1706             udb_base_sync(nsd->db->udb, 1);
 1707             udb_base_close(nsd->db->udb);
 1708             server_shutdown(nsd);
 1709             /* ENOTREACH */
 1710             exit(0);
 1711         }
 1712     }
 1713     if(shortsoa) {
 1714         /* put SOA in xfrd task because mytask may be in use */
 1715         taskudb = nsd->task[1-nsd->mytask];
 1716     }
 1717 
 1718     add_all_soa_to_task(nsd, taskudb);
 1719     if(!shortsoa) {
 1720         /* wait for xfrd to signal task is ready, RELOAD signal */
 1721         if(block_read(nsd, xfrd_sock, &cmd, sizeof(cmd), -1) != sizeof(cmd) ||
 1722             cmd != NSD_RELOAD) {
 1723             log_msg(LOG_ERR, "did not get start signal from xfrd");
 1724             exit(1);
 1725         } 
 1726         if(nsd->signal_hint_shutdown) {
 1727             goto shutdown;
 1728         }
 1729     }
 1730     /* give xfrd our task, signal it with RELOAD_DONE */
 1731     task_process_sync(taskudb);
 1732     cmd = NSD_RELOAD_DONE;
 1733     if(!write_socket(xfrd_sock, &cmd,  sizeof(cmd))) {
 1734         log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s",
 1735             (int)nsd->pid, strerror(errno));
 1736     }
 1737     mypid = getpid();
 1738     if(!write_socket(nsd->xfrd_listener->fd, &mypid,  sizeof(mypid))) {
 1739         log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s",
 1740             strerror(errno));
 1741     }
 1742 
 1743     if(!shortsoa) {
 1744         /* process the xfrd task works (expiry data) */
 1745         nsd->mytask = 1 - nsd->mytask;
 1746         taskudb = nsd->task[nsd->mytask];
 1747         task_remap(taskudb);
 1748         udb_ptr_new(&t, taskudb, udb_base_get_userdata(taskudb));
 1749         while(!udb_ptr_is_null(&t)) {
 1750             task_process_expire(nsd->db, TASKLIST(&t));
 1751             udb_ptr_set_rptr(&t, taskudb, &TASKLIST(&t)->next);
 1752         }
 1753         udb_ptr_unlink(&t, taskudb);
 1754         task_clear(taskudb);
 1755 
 1756         /* tell xfrd that the task is emptied, signal with RELOAD_DONE */
 1757         cmd = NSD_RELOAD_DONE;
 1758         if(!write_socket(xfrd_sock, &cmd,  sizeof(cmd))) {
 1759             log_msg(LOG_ERR, "problems sending soa end from reload %d to xfrd: %s",
 1760                 (int)nsd->pid, strerror(errno));
 1761         }
 1762     }
 1763 }
 1764 
 1765 #ifdef HAVE_SSL
 1766 static void
 1767 log_crypto_from_err(const char* str, unsigned long err)
 1768 {
 1769     /* error:[error code]:[library name]:[function name]:[reason string] */
 1770     char buf[128];
 1771     unsigned long e;
 1772     ERR_error_string_n(err, buf, sizeof(buf));
 1773     log_msg(LOG_ERR, "%s crypto %s", str, buf);
 1774     while( (e=ERR_get_error()) ) {
 1775         ERR_error_string_n(e, buf, sizeof(buf));
 1776         log_msg(LOG_ERR, "and additionally crypto %s", buf);
 1777     }
 1778 }
 1779 
 1780 void
 1781 log_crypto_err(const char* str)
 1782 {
 1783     log_crypto_from_err(str, ERR_get_error());
 1784 }
 1785 
 1786 /** true if the ssl handshake error has to be squelched from the logs */
 1787 static int
 1788 squelch_err_ssl_handshake(unsigned long err)
 1789 {
 1790     if(verbosity >= 3)
 1791         return 0; /* only squelch on low verbosity */
 1792     /* this is very specific, we could filter on ERR_GET_REASON()
 1793      * (the third element in ERR_PACK) */
 1794     if(err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTPS_PROXY_REQUEST) ||
 1795         err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_HTTP_REQUEST) ||
 1796         err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_GET_RECORD, SSL_R_WRONG_VERSION_NUMBER) ||
 1797         err == ERR_PACK(ERR_LIB_SSL, SSL_F_SSL3_READ_BYTES, SSL_R_SSLV3_ALERT_BAD_CERTIFICATE)
 1798 #ifdef SSL_F_TLS_POST_PROCESS_CLIENT_HELLO
 1799         || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_POST_PROCESS_CLIENT_HELLO, SSL_R_NO_SHARED_CIPHER)
 1800 #endif
 1801 #ifdef SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO
 1802         || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNKNOWN_PROTOCOL)
 1803         || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_UNSUPPORTED_PROTOCOL)
 1804 #  ifdef SSL_R_VERSION_TOO_LOW
 1805         || err == ERR_PACK(ERR_LIB_SSL, SSL_F_TLS_EARLY_POST_PROCESS_CLIENT_HELLO, SSL_R_VERSION_TOO_LOW)
 1806 #  endif
 1807 #endif
 1808         )
 1809         return 1;
 1810     return 0;
 1811 }
 1812 
 1813 void
 1814 perform_openssl_init(void)
 1815 {
 1816     /* init SSL library */
 1817 #ifdef HAVE_ERR_LOAD_CRYPTO_STRINGS
 1818     ERR_load_crypto_strings();
 1819 #endif
 1820     ERR_load_SSL_strings();
 1821 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_CRYPTO)
 1822     OpenSSL_add_all_algorithms();
 1823 #else
 1824     OPENSSL_init_crypto(OPENSSL_INIT_ADD_ALL_CIPHERS
 1825         | OPENSSL_INIT_ADD_ALL_DIGESTS
 1826         | OPENSSL_INIT_LOAD_CRYPTO_STRINGS, NULL);
 1827 #endif
 1828 #if OPENSSL_VERSION_NUMBER < 0x10100000 || !defined(HAVE_OPENSSL_INIT_SSL)
 1829     (void)SSL_library_init();
 1830 #else
 1831     OPENSSL_init_ssl(0, NULL);
 1832 #endif
 1833 
 1834     if(!RAND_status()) {
 1835         /* try to seed it */
 1836         unsigned char buf[256];
 1837         unsigned int v, seed=(unsigned)time(NULL) ^ (unsigned)getpid();
 1838         size_t i;
 1839         v = seed;
 1840         for(i=0; i<256/sizeof(v); i++) {
 1841             memmove(buf+i*sizeof(v), &v, sizeof(v));
 1842             v = v*seed + (unsigned int)i;
 1843         }
 1844         RAND_seed(buf, 256);
 1845         log_msg(LOG_WARNING, "warning: no entropy, seeding openssl PRNG with time");
 1846     }
 1847 }
 1848 
 1849 static int
 1850 get_ocsp(char *filename, unsigned char **ocsp)
 1851 {
 1852     BIO *bio;
 1853     OCSP_RESPONSE *response;
 1854     int len = -1;
 1855     unsigned char *p, *buf;
 1856     assert(filename);
 1857 
 1858     if ((bio = BIO_new_file(filename, "r")) == NULL) {
 1859         log_crypto_err("get_ocsp: BIO_new_file failed");
 1860         return -1;
 1861     }
 1862 
 1863     if ((response = d2i_OCSP_RESPONSE_bio(bio, NULL)) == NULL) {
 1864         log_crypto_err("get_ocsp: d2i_OCSP_RESPONSE_bio failed");
 1865         BIO_free(bio);
 1866         return -1;
 1867     }
 1868 
 1869     if ((len = i2d_OCSP_RESPONSE(response, NULL)) <= 0) {
 1870         log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #1 failed");
 1871         OCSP_RESPONSE_free(response);
 1872         BIO_free(bio);
 1873         return -1;
 1874     }
 1875 
 1876     if ((buf = malloc((size_t) len)) == NULL) {
 1877         log_msg(LOG_ERR, "get_ocsp: malloc failed");
 1878         OCSP_RESPONSE_free(response);
 1879         BIO_free(bio);
 1880         return -1;
 1881     }
 1882 
 1883     p = buf;
 1884     if ((len = i2d_OCSP_RESPONSE(response, &p)) <= 0) {
 1885         log_crypto_err("get_ocsp: i2d_OCSP_RESPONSE #2 failed");
 1886         free(buf);
 1887         OCSP_RESPONSE_free(response);
 1888         BIO_free(bio);
 1889         return -1;
 1890     }
 1891 
 1892     OCSP_RESPONSE_free(response);
 1893     BIO_free(bio);
 1894 
 1895     *ocsp = buf;
 1896     return len;
 1897 }
 1898 
 1899 /* further setup ssl ctx after the keys are loaded */
 1900 static void
 1901 listen_sslctx_setup_2(void* ctxt)
 1902 {
 1903     SSL_CTX* ctx = (SSL_CTX*)ctxt;
 1904     (void)ctx;
 1905 #if HAVE_DECL_SSL_CTX_SET_ECDH_AUTO
 1906     if(!SSL_CTX_set_ecdh_auto(ctx,1)) {
 1907         /* ENOTREACH */
 1908         log_crypto_err("Error in SSL_CTX_ecdh_auto, not enabling ECDHE");
 1909     }
 1910 #elif defined(HAVE_DECL_SSL_CTX_SET_TMP_ECDH) && defined(NID_X9_62_prime256v1) && defined(HAVE_EC_KEY_NEW_BY_CURVE_NAME)
 1911     if(1) {
 1912         EC_KEY *ecdh = EC_KEY_new_by_curve_name (NID_X9_62_prime256v1);
 1913         if (!ecdh) {
 1914             log_crypto_err("could not find p256, not enabling ECDHE");
 1915         } else {
 1916             if (1 != SSL_CTX_set_tmp_ecdh (ctx, ecdh)) {
 1917                 log_crypto_err("Error in SSL_CTX_set_tmp_ecdh, not enabling ECDHE");
 1918             }
 1919             EC_KEY_free (ecdh);
 1920         }
 1921     }
 1922 #endif
 1923 }
 1924 
 1925 static int
 1926 add_ocsp_data_cb(SSL *s, void* ATTR_UNUSED(arg))
 1927 {
 1928     if(ocspdata) {
 1929         unsigned char *p;
 1930         if ((p=malloc(ocspdata_len)) == NULL) {
 1931             log_msg(LOG_ERR, "add_ocsp_data_cb: malloc failure");
 1932             return SSL_TLSEXT_ERR_NOACK;
 1933         }
 1934         memcpy(p, ocspdata, ocspdata_len);
 1935         if ((SSL_set_tlsext_status_ocsp_resp(s, p, ocspdata_len)) != 1) {
 1936             log_crypto_err("Error in SSL_set_tlsext_status_ocsp_resp");
 1937             free(p);
 1938             return SSL_TLSEXT_ERR_NOACK;
 1939         }
 1940         return SSL_TLSEXT_ERR_OK;
 1941     } else {
 1942         return SSL_TLSEXT_ERR_NOACK;
 1943     }
 1944 }
 1945 
 1946 SSL_CTX*
 1947 server_tls_ctx_setup(char* key, char* pem, char* verifypem)
 1948 {
 1949     SSL_CTX *ctx = SSL_CTX_new(SSLv23_server_method());
 1950     if(!ctx) {
 1951         log_crypto_err("could not SSL_CTX_new");
 1952         return NULL;
 1953     }
 1954     /* no SSLv2, SSLv3 because has defects */
 1955 #if SSL_OP_NO_SSLv2 != 0
 1956     if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv2) & SSL_OP_NO_SSLv2) != SSL_OP_NO_SSLv2){
 1957         log_crypto_err("could not set SSL_OP_NO_SSLv2");
 1958         SSL_CTX_free(ctx);
 1959         return NULL;
 1960     }
 1961 #endif
 1962     if((SSL_CTX_set_options(ctx, SSL_OP_NO_SSLv3) & SSL_OP_NO_SSLv3)
 1963         != SSL_OP_NO_SSLv3){
 1964         log_crypto_err("could not set SSL_OP_NO_SSLv3");
 1965         SSL_CTX_free(ctx);
 1966         return 0;
 1967     }
 1968 #if defined(SSL_OP_NO_TLSv1) && defined(SSL_OP_NO_TLSv1_1)
 1969     /* if we have tls 1.1 disable 1.0 */
 1970     if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1) & SSL_OP_NO_TLSv1)
 1971         != SSL_OP_NO_TLSv1){
 1972         log_crypto_err("could not set SSL_OP_NO_TLSv1");
 1973         SSL_CTX_free(ctx);
 1974         return 0;
 1975     }
 1976 #endif
 1977 #if defined(SSL_OP_NO_TLSv1_1) && defined(SSL_OP_NO_TLSv1_2)
 1978     /* if we have tls 1.2 disable 1.1 */
 1979     if((SSL_CTX_set_options(ctx, SSL_OP_NO_TLSv1_1) & SSL_OP_NO_TLSv1_1)
 1980         != SSL_OP_NO_TLSv1_1){
 1981         log_crypto_err("could not set SSL_OP_NO_TLSv1_1");
 1982         SSL_CTX_free(ctx);
 1983         return 0;
 1984     }
 1985 #endif
 1986 #if defined(SSL_OP_NO_RENEGOTIATION)
 1987     /* disable client renegotiation */
 1988     if((SSL_CTX_set_options(ctx, SSL_OP_NO_RENEGOTIATION) &
 1989         SSL_OP_NO_RENEGOTIATION) != SSL_OP_NO_RENEGOTIATION) {
 1990         log_crypto_err("could not set SSL_OP_NO_RENEGOTIATION");
 1991         SSL_CTX_free(ctx);
 1992         return 0;
 1993     }
 1994 #endif
 1995 #if defined(SHA256_DIGEST_LENGTH) && defined(SSL_TXT_CHACHA20)
 1996     /* if we have sha256, set the cipher list to have no known vulns */
 1997     if(!SSL_CTX_set_cipher_list(ctx, "ECDHE+AESGCM:ECDHE+CHACHA20"))
 1998         log_crypto_err("could not set cipher list with SSL_CTX_set_cipher_list");
 1999 #endif
 2000     if((SSL_CTX_set_options(ctx, SSL_OP_CIPHER_SERVER_PREFERENCE) &
 2001         SSL_OP_CIPHER_SERVER_PREFERENCE) !=
 2002         SSL_OP_CIPHER_SERVER_PREFERENCE) {
 2003         log_crypto_err("could not set SSL_OP_CIPHER_SERVER_PREFERENCE");
 2004         SSL_CTX_free(ctx);
 2005         return 0;
 2006     }
 2007 #ifdef HAVE_SSL_CTX_SET_SECURITY_LEVEL
 2008     SSL_CTX_set_security_level(ctx, 0);
 2009 #endif
 2010     if(!SSL_CTX_use_certificate_chain_file(ctx, pem)) {
 2011         log_msg(LOG_ERR, "error for cert file: %s", pem);
 2012         log_crypto_err("error in SSL_CTX use_certificate_chain_file");
 2013         SSL_CTX_free(ctx);
 2014         return NULL;
 2015     }
 2016     if(!SSL_CTX_use_PrivateKey_file(ctx, key, SSL_FILETYPE_PEM)) {
 2017         log_msg(LOG_ERR, "error for private key file: %s", key);
 2018         log_crypto_err("Error in SSL_CTX use_PrivateKey_file");
 2019         SSL_CTX_free(ctx);
 2020         return NULL;
 2021     }
 2022     if(!SSL_CTX_check_private_key(ctx)) {
 2023         log_msg(LOG_ERR, "error for key file: %s", key);
 2024         log_crypto_err("Error in SSL_CTX check_private_key");
 2025         SSL_CTX_free(ctx);
 2026         return NULL;
 2027     }
 2028     listen_sslctx_setup_2(ctx);
 2029     if(verifypem && verifypem[0]) {
 2030         if(!SSL_CTX_load_verify_locations(ctx, verifypem, NULL)) {
 2031             log_crypto_err("Error in SSL_CTX verify locations");
 2032             SSL_CTX_free(ctx);
 2033             return NULL;
 2034         }
 2035         SSL_CTX_set_client_CA_list(ctx, SSL_load_client_CA_file(verifypem));
 2036         SSL_CTX_set_verify(ctx, SSL_VERIFY_PEER, NULL);
 2037     }
 2038     return ctx;
 2039 }
 2040 
 2041 SSL_CTX*
 2042 server_tls_ctx_create(struct nsd* nsd, char* verifypem, char* ocspfile)
 2043 {
 2044     char *key, *pem;
 2045     SSL_CTX *ctx;
 2046 
 2047     key = nsd->options->tls_service_key;
 2048     pem = nsd->options->tls_service_pem;
 2049     if(!key || key[0] == 0) {
 2050         log_msg(LOG_ERR, "error: no tls-service-key file specified");
 2051         return NULL;
 2052     }
 2053     if(!pem || pem[0] == 0) {
 2054         log_msg(LOG_ERR, "error: no tls-service-pem file specified");
 2055         return NULL;
 2056     }
 2057 
 2058     /* NOTE:This mimics the existing code in Unbound 1.5.1 by supporting SSL but
 2059      * raft-ietf-uta-tls-bcp-08 recommends only using TLSv1.2*/
 2060     ctx = server_tls_ctx_setup(key, pem, verifypem);
 2061     if(!ctx) {
 2062         log_msg(LOG_ERR, "could not setup server TLS context");
 2063         return NULL;
 2064     }
 2065     if(ocspfile && ocspfile[0]) {
 2066         if ((ocspdata_len = get_ocsp(ocspfile, &ocspdata)) < 0) {
 2067             log_crypto_err("Error reading OCSPfile");
 2068             SSL_CTX_free(ctx);
 2069             return NULL;
 2070         } else {
 2071             VERBOSITY(2, (LOG_INFO, "ocspfile %s loaded", ocspfile));
 2072             if(!SSL_CTX_set_tlsext_status_cb(ctx, add_ocsp_data_cb)) {
 2073                 log_crypto_err("Error in SSL_CTX_set_tlsext_status_cb");
 2074                 SSL_CTX_free(ctx);
 2075                 return NULL;
 2076             }
 2077         }
 2078     }
 2079     return ctx;
 2080 }
 2081 
 2082 /* check if tcp_handler_accept_data created for TLS dedicated port */
 2083 int
 2084 using_tls_port(struct sockaddr* addr, const char* tls_port)
 2085 {
 2086     in_port_t port = 0;
 2087 
 2088     if (addr->sa_family == AF_INET)
 2089         port = ((struct sockaddr_in*)addr)->sin_port;
 2090 #ifndef HAVE_STRUCT_SOCKADDR_IN6
 2091     else
 2092         port = ((struct sockaddr_in6*)addr)->sin6_port;
 2093 #endif /* HAVE_STRUCT_SOCKADDR_IN6 */
 2094     if (atoi(tls_port) == ntohs(port))
 2095         return 1;
 2096 
 2097     return 0;
 2098 }
 2099 #endif
 2100 
 2101 /* pass timeout=-1 for blocking. Returns size, 0, -1(err), or -2(timeout) */
 2102 ssize_t
 2103 block_read(struct nsd* nsd, int s, void* p, ssize_t sz, int timeout)
 2104 {
 2105     uint8_t* buf = (uint8_t*) p;
 2106     ssize_t total = 0;
 2107     struct pollfd fd;
 2108     memset(&fd, 0, sizeof(fd));
 2109     fd.fd = s;
 2110     fd.events = POLLIN;
 2111     
 2112     while( total < sz) {
 2113         ssize_t ret;
 2114         ret = poll(&fd, 1, (timeout==-1)?-1:timeout*1000);
 2115         if(ret == -1) {
 2116             if(errno == EAGAIN)
 2117                 /* blocking read */
 2118                 continue;
 2119             if(errno == EINTR) {
 2120                 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown))
 2121                     return -1;
 2122                 /* other signals can be handled later */
 2123                 continue;
 2124             }
 2125             /* some error */
 2126             return -1;
 2127         }
 2128         if(ret == 0) {
 2129             /* operation timed out */
 2130             return -2;
 2131         }
 2132         ret = read(s, buf+total, sz-total);
 2133         if(ret == -1) {
 2134             if(errno == EAGAIN)
 2135                 /* blocking read */
 2136                 continue;
 2137             if(errno == EINTR) {
 2138                 if(nsd && (nsd->signal_hint_quit || nsd->signal_hint_shutdown))
 2139                     return -1;
 2140                 /* other signals can be handled later */
 2141                 continue;
 2142             }
 2143             /* some error */
 2144             return -1;
 2145         }
 2146         if(ret == 0) {
 2147             /* closed connection! */
 2148             return 0;
 2149         }
 2150         total += ret;
 2151     }
 2152     return total;
 2153 }
 2154 
 2155 static void
 2156 reload_process_tasks(struct nsd* nsd, udb_ptr* last_task, int cmdsocket)
 2157 {
 2158     sig_atomic_t cmd = NSD_QUIT_SYNC;
 2159     udb_ptr t, next;
 2160     udb_base* u = nsd->task[nsd->mytask];
 2161     udb_ptr_init(&next, u);
 2162     udb_ptr_new(&t, u, udb_base_get_userdata(u));
 2163     udb_base_set_userdata(u, 0);
 2164     while(!udb_ptr_is_null(&t)) {
 2165         /* store next in list so this one can be deleted or reused */
 2166         udb_ptr_set_rptr(&next, u, &TASKLIST(&t)->next);
 2167         udb_rptr_zero(&TASKLIST(&t)->next, u);
 2168 
 2169         /* process task t */
 2170         /* append results for task t and update last_task */
 2171         task_process_in_reload(nsd, u, last_task, &t);
 2172 
 2173         /* go to next */
 2174         udb_ptr_set_ptr(&t, u, &next);
 2175 
 2176         /* if the parent has quit, we must quit too, poll the fd for cmds */
 2177         if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) {
 2178             DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd));
 2179             if(cmd == NSD_QUIT) {
 2180                 DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd"));
 2181                 /* sync to disk (if needed) */
 2182                 udb_base_sync(nsd->db->udb, 0);
 2183                 /* unlink files of remainder of tasks */
 2184                 while(!udb_ptr_is_null(&t)) {
 2185                     if(TASKLIST(&t)->task_type == task_apply_xfr) {
 2186                         xfrd_unlink_xfrfile(nsd, TASKLIST(&t)->yesno);
 2187                     }
 2188                     udb_ptr_set_rptr(&t, u, &TASKLIST(&t)->next);
 2189                 }
 2190                 udb_ptr_unlink(&t, u);
 2191                 udb_ptr_unlink(&next, u);
 2192                 exit(0);
 2193             }
 2194         }
 2195 
 2196     }
 2197     udb_ptr_unlink(&t, u);
 2198     udb_ptr_unlink(&next, u);
 2199 }
 2200 
 2201 #ifdef BIND8_STATS
 2202 static void
 2203 parent_send_stats(struct nsd* nsd, int cmdfd)
 2204 {
 2205     size_t i;
 2206     if(!write_socket(cmdfd, &nsd->st, sizeof(nsd->st))) {
 2207         log_msg(LOG_ERR, "could not write stats to reload");
 2208         return;
 2209     }
 2210     for(i=0; i<nsd->child_count; i++)
 2211         if(!write_socket(cmdfd, &nsd->children[i].query_count,
 2212             sizeof(stc_type))) {
 2213             log_msg(LOG_ERR, "could not write stats to reload");
 2214             return;
 2215         }
 2216 }
 2217 
 2218 static void
 2219 reload_do_stats(int cmdfd, struct nsd* nsd, udb_ptr* last)
 2220 {
 2221     struct nsdst s;
 2222     stc_type* p;
 2223     size_t i;
 2224     if(block_read(nsd, cmdfd, &s, sizeof(s),
 2225         RELOAD_SYNC_TIMEOUT) != sizeof(s)) {
 2226         log_msg(LOG_ERR, "could not read stats from oldpar");
 2227         return;
 2228     }
 2229     s.db_disk = (nsd->db->udb?nsd->db->udb->base_size:0);
 2230     s.db_mem = region_get_mem(nsd->db->region);
 2231     p = (stc_type*)task_new_stat_info(nsd->task[nsd->mytask], last, &s,
 2232         nsd->child_count);
 2233     if(!p) return;
 2234     for(i=0; i<nsd->child_count; i++) {
 2235         if(block_read(nsd, cmdfd, p++, sizeof(stc_type), 1)!=
 2236             sizeof(stc_type))
 2237             return;
 2238     }
 2239 }
 2240 #endif /* BIND8_STATS */
 2241 
 2242 /*
 2243  * Reload the database, stop parent, re-fork children and continue.
 2244  * as server_main.
 2245  */
 2246 static void
 2247 server_reload(struct nsd *nsd, region_type* server_region, netio_type* netio,
 2248     int cmdsocket)
 2249 {
 2250     pid_t mypid;
 2251     sig_atomic_t cmd = NSD_QUIT_SYNC;
 2252     int ret;
 2253     udb_ptr last_task;
 2254     struct sigaction old_sigchld, ign_sigchld;
 2255     /* ignore SIGCHLD from the previous server_main that used this pid */
 2256     memset(&ign_sigchld, 0, sizeof(ign_sigchld));
 2257     ign_sigchld.sa_handler = SIG_IGN;
 2258     sigaction(SIGCHLD, &ign_sigchld, &old_sigchld);
 2259 
 2260 #ifdef HAVE_SETPROCTITLE
 2261     setproctitle("main");
 2262 #endif
 2263 #ifdef HAVE_CPUSET_T
 2264     if(nsd->use_cpu_affinity) {
 2265         set_cpu_affinity(nsd->cpuset);
 2266     }
 2267 #endif
 2268 
 2269     /* see what tasks we got from xfrd */
 2270     task_remap(nsd->task[nsd->mytask]);
 2271     udb_ptr_init(&last_task, nsd->task[nsd->mytask]);
 2272     udb_compact_inhibited(nsd->db->udb, 1);
 2273     reload_process_tasks(nsd, &last_task, cmdsocket);
 2274     udb_compact_inhibited(nsd->db->udb, 0);
 2275     udb_compact(nsd->db->udb);
 2276 
 2277 #ifndef NDEBUG
 2278     if(nsd_debug_level >= 1)
 2279         region_log_stats(nsd->db->region);
 2280 #endif /* NDEBUG */
 2281     /* sync to disk (if needed) */
 2282     udb_base_sync(nsd->db->udb, 0);
 2283 
 2284     initialize_dname_compression_tables(nsd);
 2285 
 2286 #ifdef BIND8_STATS
 2287     /* Restart dumping stats if required.  */
 2288     time(&nsd->st.boot);
 2289     set_bind8_alarm(nsd);
 2290 #endif
 2291 #ifdef USE_ZONE_STATS
 2292     server_zonestat_realloc(nsd); /* realloc for new children */
 2293     server_zonestat_switch(nsd);
 2294 #endif
 2295 
 2296     /* listen for the signals of failed children again */
 2297     sigaction(SIGCHLD, &old_sigchld, NULL);
 2298 #ifdef USE_DNSTAP
 2299     if (nsd->dt_collector) {
 2300         int *swap_fd_send;
 2301         DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: swap dnstap collector pipes"));
 2302         /* Swap fd_send with fd_swap so old serve child and new serve
 2303          * childs will not write to the same pipe ends simultaneously */
 2304         swap_fd_send = nsd->dt_collector_fd_send;
 2305         nsd->dt_collector_fd_send = nsd->dt_collector_fd_swap;
 2306         nsd->dt_collector_fd_swap = swap_fd_send;
 2307 
 2308     }
 2309 #endif
 2310     /* Start new child processes */
 2311     if (server_start_children(nsd, server_region, netio, &nsd->
 2312         xfrd_listener->fd) != 0) {
 2313         send_children_quit(nsd);
 2314         exit(1);
 2315     }
 2316 
 2317     /* if the parent has quit, we must quit too, poll the fd for cmds */
 2318     if(block_read(nsd, cmdsocket, &cmd, sizeof(cmd), 0) == sizeof(cmd)) {
 2319         DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc command from main %d", (int)cmd));
 2320         if(cmd == NSD_QUIT) {
 2321             DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: quit to follow nsd"));
 2322             send_children_quit(nsd);
 2323             exit(0);
 2324         }
 2325     }
 2326 
 2327     /* Send quit command to parent: blocking, wait for receipt. */
 2328     do {
 2329         DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc send quit to main"));
 2330         if (!write_socket(cmdsocket, &cmd, sizeof(cmd)))
 2331         {
 2332             log_msg(LOG_ERR, "problems sending command from reload to oldnsd: %s",
 2333                 strerror(errno));
 2334         }
 2335         /* blocking: wait for parent to really quit. (it sends RELOAD as ack) */
 2336         DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc wait for ack main"));
 2337         ret = block_read(nsd, cmdsocket, &cmd, sizeof(cmd),
 2338             RELOAD_SYNC_TIMEOUT);
 2339         if(ret == -2) {
 2340             DEBUG(DEBUG_IPC, 1, (LOG_ERR, "reload timeout QUITSYNC. retry"));
 2341         }
 2342     } while (ret == -2);
 2343     if(ret == -1) {
 2344         log_msg(LOG_ERR, "reload: could not wait for parent to quit: %s",
 2345             strerror(errno));
 2346     }
 2347     DEBUG(DEBUG_IPC,1, (LOG_INFO, "reload: ipc reply main %d %d", ret, (int)cmd));
 2348     if(cmd == NSD_QUIT) {
 2349         /* small race condition possible here, parent got quit cmd. */
 2350         send_children_quit(nsd);
 2351         exit(1);
 2352     }
 2353     assert(ret==-1 || ret == 0 || cmd == NSD_RELOAD);
 2354 #ifdef BIND8_STATS
 2355     reload_do_stats(cmdsocket, nsd, &last_task);
 2356 #endif
 2357     udb_ptr_unlink(&last_task, nsd->task[nsd->mytask]);
 2358     task_process_sync(nsd->task[nsd->mytask]);
 2359 #ifdef USE_ZONE_STATS
 2360     server_zonestat_realloc(nsd); /* realloc for next children */
 2361 #endif
 2362 
 2363     /* send soainfo to the xfrd process, signal it that reload is done,
 2364      * it picks up the taskudb */
 2365     cmd = NSD_RELOAD_DONE;
 2366     if(!write_socket(nsd->xfrd_listener->fd, &cmd,  sizeof(cmd))) {
 2367         log_msg(LOG_ERR, "problems sending reload_done xfrd: %s",
 2368             strerror(errno));
 2369     }
 2370     mypid = getpid();
 2371     if(!write_socket(nsd->xfrd_listener->fd, &mypid,  sizeof(mypid))) {
 2372         log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s",
 2373             strerror(errno));
 2374     }
 2375 
 2376     /* try to reopen file */
 2377     if (nsd->file_rotation_ok)
 2378         log_reopen(nsd->log_filename, 1);
 2379     /* exit reload, continue as new server_main */
 2380 }
 2381 
 2382 /*
 2383  * Get the mode depending on the signal hints that have been received.
 2384  * Multiple signal hints can be received and will be handled in turn.
 2385  */
 2386 static sig_atomic_t
 2387 server_signal_mode(struct nsd *nsd)
 2388 {
 2389     if(nsd->signal_hint_quit) {
 2390         nsd->signal_hint_quit = 0;
 2391         return NSD_QUIT;
 2392     }
 2393     else if(nsd->signal_hint_shutdown) {
 2394         nsd->signal_hint_shutdown = 0;
 2395         return NSD_SHUTDOWN;
 2396     }
 2397     else if(nsd->signal_hint_child) {
 2398         nsd->signal_hint_child = 0;
 2399         return NSD_REAP_CHILDREN;
 2400     }
 2401     else if(nsd->signal_hint_reload) {
 2402         nsd->signal_hint_reload = 0;
 2403         return NSD_RELOAD;
 2404     }
 2405     else if(nsd->signal_hint_reload_hup) {
 2406         nsd->signal_hint_reload_hup = 0;
 2407         return NSD_RELOAD_REQ;
 2408     }
 2409     else if(nsd->signal_hint_stats) {
 2410         nsd->signal_hint_stats = 0;
 2411 #ifdef BIND8_STATS
 2412         set_bind8_alarm(nsd);
 2413 #endif
 2414         return NSD_STATS;
 2415     }
 2416     else if(nsd->signal_hint_statsusr) {
 2417         nsd->signal_hint_statsusr = 0;
 2418         return NSD_STATS;
 2419     }
 2420     return NSD_RUN;
 2421 }
 2422 
 2423 /*
 2424  * The main server simply waits for signals and child processes to
 2425  * terminate.  Child processes are restarted as necessary.
 2426  */
 2427 void
 2428 server_main(struct nsd *nsd)
 2429 {
 2430     region_type *server_region = region_create(xalloc, free);
 2431     netio_type *netio = netio_create(server_region);
 2432     netio_handler_type reload_listener;
 2433     int reload_sockets[2] = {-1, -1};
 2434     struct timespec timeout_spec;
 2435     int status;
 2436     pid_t child_pid;
 2437     pid_t reload_pid = -1;
 2438     sig_atomic_t mode;
 2439 
 2440     /* Ensure we are the main process */
 2441     assert(nsd->server_kind == NSD_SERVER_MAIN);
 2442 
 2443     /* Add listener for the XFRD process */
 2444     netio_add_handler(netio, nsd->xfrd_listener);
 2445 
 2446     /* Start the child processes that handle incoming queries */
 2447     if (server_start_children(nsd, server_region, netio,
 2448         &nsd->xfrd_listener->fd) != 0) {
 2449         send_children_quit(nsd);
 2450         exit(1);
 2451     }
 2452     reload_listener.fd = -1;
 2453 
 2454     /* This_child MUST be 0, because this is the parent process */
 2455     assert(nsd->this_child == 0);
 2456 
 2457     /* Run the server until we get a shutdown signal */
 2458     while ((mode = nsd->mode) != NSD_SHUTDOWN) {
 2459         /* Did we receive a signal that changes our mode? */
 2460         if(mode == NSD_RUN) {
 2461             nsd->mode = mode = server_signal_mode(nsd);
 2462         }
 2463 
 2464         switch (mode) {
 2465         case NSD_RUN:
 2466             /* see if any child processes terminated */
 2467             while((child_pid = waitpid(-1, &status, WNOHANG)) != -1 && child_pid != 0) {
 2468                 int is_child = delete_child_pid(nsd, child_pid);
 2469                 if (is_child != -1 && nsd->children[is_child].need_to_exit) {
 2470                     if(nsd->children[is_child].child_fd == -1)
 2471                         nsd->children[is_child].has_exited = 1;
 2472                     parent_check_all_children_exited(nsd);
 2473                 } else if(is_child != -1) {
 2474                     log_msg(LOG_WARNING,
 2475                            "server %d died unexpectedly with status %d, restarting",
 2476                            (int) child_pid, status);
 2477                     restart_child_servers(nsd, server_region, netio,
 2478                         &nsd->xfrd_listener->fd);
 2479                 } else if (child_pid == reload_pid) {
 2480                     sig_atomic_t cmd = NSD_RELOAD_DONE;
 2481                     pid_t mypid;
 2482                     log_msg(LOG_WARNING,
 2483                            "Reload process %d failed with status %d, continuing with old database",
 2484                            (int) child_pid, status);
 2485                     reload_pid = -1;
 2486                     if(reload_listener.fd != -1) close(reload_listener.fd);
 2487                     reload_listener.fd = -1;
 2488                     reload_listener.event_types = NETIO_EVENT_NONE;
 2489                     task_process_sync(nsd->task[nsd->mytask]);
 2490                     /* inform xfrd reload attempt ended */
 2491                     if(!write_socket(nsd->xfrd_listener->fd,
 2492                         &cmd, sizeof(cmd))) {
 2493                         log_msg(LOG_ERR, "problems "
 2494                           "sending SOAEND to xfrd: %s",
 2495                           strerror(errno));
 2496                     }
 2497                     mypid = getpid();
 2498                     if(!write_socket(nsd->xfrd_listener->fd, &mypid,  sizeof(mypid))) {
 2499                         log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s",
 2500                             strerror(errno));
 2501                     }
 2502 #ifdef USE_DNSTAP
 2503                 } else if(nsd->dt_collector && child_pid == nsd->dt_collector->dt_pid) {
 2504                     log_msg(LOG_WARNING,
 2505                            "dnstap-collector %d terminated with status %d",
 2506                            (int) child_pid, status);
 2507                     if(nsd->dt_collector) {
 2508                         dt_collector_close(nsd->dt_collector, nsd);
 2509                         dt_collector_destroy(nsd->dt_collector, nsd);
 2510                         nsd->dt_collector = NULL;
 2511                     }
 2512                     /* Only respawn a crashed (or exited)
 2513                      * dnstap-collector when not reloading,
 2514                      * to not induce a reload during a
 2515                      * reload (which would seriously
 2516                      * disrupt nsd procedures and lead to
 2517                      * unpredictable results)!
 2518                      *
 2519                      * This will *leave* a dnstap-collector
 2520                      * process terminated, but because
 2521                      * signalling of the reload process to
 2522                      * the main process to respawn in this
 2523                      * situation will be cumbersome, and
 2524                      * because this situation is so
 2525                      * specific (and therefore hopefully
 2526                      * extremely rare or non-existing at
 2527                      * all), plus the fact that we are left
 2528                      * with a perfectly function NSD
 2529                      * (besides not logging dnstap
 2530                      * messages), I consider it acceptable
 2531                      * to leave this unresolved.
 2532                      */
 2533                     if(reload_pid == -1 && nsd->options->dnstap_enable) {
 2534                         nsd->dt_collector = dt_collector_create(nsd);
 2535                         dt_collector_start(nsd->dt_collector, nsd);
 2536                         nsd->mode = NSD_RELOAD_REQ;
 2537                     }
 2538 #endif
 2539                 } else if(status != 0) {
 2540                     /* check for status, because we get
 2541                      * the old-servermain because reload
 2542                      * is the process-parent of old-main,
 2543                      * and we get older server-processes
 2544                      * that are exiting after a reload */
 2545                     log_msg(LOG_WARNING,
 2546                            "process %d terminated with status %d",
 2547                            (int) child_pid, status);
 2548                 }
 2549             }
 2550             if (child_pid == -1) {
 2551                 if (errno == EINTR) {
 2552                     continue;
 2553                 }
 2554                 if (errno != ECHILD)
 2555                     log_msg(LOG_WARNING, "wait failed: %s", strerror(errno));
 2556             }
 2557             if (nsd->mode != NSD_RUN)
 2558                 break;
 2559 
 2560             /* timeout to collect processes. In case no sigchild happens. */
 2561             timeout_spec.tv_sec = 60;
 2562             timeout_spec.tv_nsec = 0;
 2563 
 2564             /* listen on ports, timeout for collecting terminated children */
 2565             if(netio_dispatch(netio, &timeout_spec, 0) == -1) {
 2566                 if (errno != EINTR) {
 2567                     log_msg(LOG_ERR, "netio_dispatch failed: %s", strerror(errno));
 2568                 }
 2569             }
 2570             if(nsd->restart_children) {
 2571                 restart_child_servers(nsd, server_region, netio,
 2572                     &nsd->xfrd_listener->fd);
 2573                 nsd->restart_children = 0;
 2574             }
 2575             if(nsd->reload_failed) {
 2576                 sig_atomic_t cmd = NSD_RELOAD_DONE;
 2577                 pid_t mypid;
 2578                 nsd->reload_failed = 0;
 2579                 log_msg(LOG_WARNING,
 2580                        "Reload process %d failed, continuing with old database",
 2581                        (int) reload_pid);
 2582                 reload_pid = -1;
 2583                 if(reload_listener.fd != -1) close(reload_listener.fd);
 2584                 reload_listener.fd = -1;
 2585                 reload_listener.event_types = NETIO_EVENT_NONE;
 2586                 task_process_sync(nsd->task[nsd->mytask]);
 2587                 /* inform xfrd reload attempt ended */
 2588                 if(!write_socket(nsd->xfrd_listener->fd,
 2589                     &cmd, sizeof(cmd))) {
 2590                     log_msg(LOG_ERR, "problems "
 2591                       "sending SOAEND to xfrd: %s",
 2592                       strerror(errno));
 2593                 }
 2594                 mypid = getpid();
 2595                 if(!write_socket(nsd->xfrd_listener->fd, &mypid,  sizeof(mypid))) {
 2596                     log_msg(LOG_ERR, "problems sending reloadpid to xfrd: %s",
 2597                         strerror(errno));
 2598                 }
 2599             }
 2600 
 2601             break;
 2602         case NSD_RELOAD_REQ: {
 2603             sig_atomic_t cmd = NSD_RELOAD_REQ;
 2604             log_msg(LOG_WARNING, "SIGHUP received, reloading...");
 2605             DEBUG(DEBUG_IPC,1, (LOG_INFO,
 2606                 "main: ipc send reload_req to xfrd"));
 2607             if(!write_socket(nsd->xfrd_listener->fd,
 2608                 &cmd, sizeof(cmd))) {
 2609                 log_msg(LOG_ERR, "server_main: could not send "
 2610                 "reload_req to xfrd: %s", strerror(errno));
 2611             }
 2612             nsd->mode = NSD_RUN;
 2613             } break;
 2614         case NSD_RELOAD:
 2615             /* Continue to run nsd after reload */
 2616             nsd->mode = NSD_RUN;
 2617             DEBUG(DEBUG_IPC,1, (LOG_INFO, "reloading..."));
 2618             if (reload_pid != -1) {
 2619                 log_msg(LOG_WARNING, "Reload already in progress (pid = %d)",
 2620                        (int) reload_pid);
 2621                 break;
 2622             }
 2623 
 2624             /* switch the mytask to keep track of who owns task*/
 2625             nsd->mytask = 1 - nsd->mytask;
 2626             if (socketpair(AF_UNIX, SOCK_STREAM, 0, reload_sockets) == -1) {
 2627                 log_msg(LOG_ERR, "reload failed on socketpair: %s", strerror(errno));
 2628                 reload_pid = -1;
 2629                 break;
 2630             }
 2631 
 2632             /* Do actual reload */
 2633             reload_pid = fork();
 2634             switch (reload_pid) {
 2635             case -1:
 2636                 log_msg(LOG_ERR, "fork failed: %s", strerror(errno));
 2637                 break;
 2638             default:
 2639                 /* PARENT */
 2640                 close(reload_sockets[0]);
 2641                 server_reload(nsd, server_region, netio,
 2642                     reload_sockets[1]);
 2643                 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload exited to become new main"));
 2644                 close(reload_sockets[1]);
 2645                 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload closed"));
 2646                 /* drop stale xfrd ipc data */
 2647                 ((struct ipc_handler_conn_data*)nsd->
 2648                     xfrd_listener->user_data)
 2649                     ->conn->is_reading = 0;
 2650                 reload_pid = -1;
 2651                 reload_listener.fd = -1;
 2652                 reload_listener.event_types = NETIO_EVENT_NONE;
 2653                 DEBUG(DEBUG_IPC,2, (LOG_INFO, "Reload resetup; run"));
 2654                 break;
 2655             case 0:
 2656                 /* CHILD */
 2657                 /* server_main keep running until NSD_QUIT_SYNC
 2658                  * received from reload. */
 2659                 close(reload_sockets[1]);
 2660                 reload_listener.fd = reload_sockets[0];
 2661                 reload_listener.timeout = NULL;
 2662                 reload_listener.user_data = nsd;
 2663                 reload_listener.event_types = NETIO_EVENT_READ;
 2664                 reload_listener.event_handler = parent_handle_reload_command; /* listens to Quit */
 2665                 netio_add_handler(netio, &reload_listener);
 2666                 reload_pid = getppid();
 2667                 break;
 2668             }
 2669             break;
 2670         case NSD_QUIT_SYNC:
 2671             /* synchronisation of xfrd, parent and reload */
 2672             if(!nsd->quit_sync_done && reload_listener.fd != -1) {
 2673                 sig_atomic_t cmd = NSD_RELOAD;
 2674                 /* stop xfrd ipc writes in progress */
 2675                 DEBUG(DEBUG_IPC,1, (LOG_INFO,
 2676                     "main: ipc send indication reload"));
 2677                 if(!write_socket(nsd->xfrd_listener->fd,
 2678                     &cmd, sizeof(cmd))) {
 2679                     log_msg(LOG_ERR, "server_main: could not send reload "
 2680                     "indication to xfrd: %s", strerror(errno));
 2681                 }
 2682                 /* wait for ACK from xfrd */
 2683                 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: wait ipc reply xfrd"));
 2684                 nsd->quit_sync_done = 1;
 2685             }
 2686             nsd->mode = NSD_RUN;
 2687             break;
 2688         case NSD_QUIT:
 2689             /* silent shutdown during reload */
 2690             if(reload_listener.fd != -1) {
 2691                 /* acknowledge the quit, to sync reload that we will really quit now */
 2692                 sig_atomic_t cmd = NSD_RELOAD;
 2693                 DEBUG(DEBUG_IPC,1, (LOG_INFO, "main: ipc ack reload"));
 2694                 if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) {
 2695                     log_msg(LOG_ERR, "server_main: "
 2696                         "could not ack quit: %s", strerror(errno));
 2697                 }
 2698 #ifdef BIND8_STATS
 2699                 parent_send_stats(nsd, reload_listener.fd);
 2700 #endif /* BIND8_STATS */
 2701                 close(reload_listener.fd);
 2702             }
 2703             DEBUG(DEBUG_IPC,1, (LOG_INFO, "server_main: shutdown sequence"));
 2704             /* only quit children after xfrd has acked */
 2705             send_children_quit(nsd);
 2706 
 2707 #ifdef MEMCLEAN /* OS collects memory pages */
 2708             region_destroy(server_region);
 2709 #endif
 2710             server_shutdown(nsd);
 2711 
 2712             /* ENOTREACH */
 2713             break;
 2714         case NSD_SHUTDOWN:
 2715             break;
 2716         case NSD_REAP_CHILDREN:
 2717             /* continue; wait for child in run loop */
 2718             nsd->mode = NSD_RUN;
 2719             break;
 2720         case NSD_STATS:
 2721 #ifdef BIND8_STATS
 2722             set_children_stats(nsd);
 2723 #endif
 2724             nsd->mode = NSD_RUN;
 2725             break;
 2726         default:
 2727             log_msg(LOG_WARNING, "NSD main server mode invalid: %d", (int)nsd->mode);
 2728             nsd->mode = NSD_RUN;
 2729             break;
 2730         }
 2731     }
 2732     log_msg(LOG_WARNING, "signal received, shutting down...");
 2733 
 2734     /* close opened ports to avoid race with restart of nsd */
 2735     server_close_all_sockets(nsd->udp, nsd->ifs);
 2736     server_close_all_sockets(nsd->tcp, nsd->ifs);
 2737 #ifdef HAVE_SSL
 2738     daemon_remote_close(nsd->rc);
 2739 #endif
 2740     send_children_quit_and_wait(nsd);
 2741 
 2742     /* Unlink it if possible... */
 2743     unlinkpid(nsd->pidfile);
 2744     unlink(nsd->task[0]->fname);
 2745     unlink(nsd->task[1]->fname);
 2746 #ifdef USE_ZONE_STATS
 2747     unlink(nsd->zonestatfname[0]);
 2748     unlink(nsd->zonestatfname[1]);
 2749 #endif
 2750 #ifdef USE_DNSTAP
 2751     dt_collector_close(nsd->dt_collector, nsd);
 2752 #endif
 2753 
 2754     if(reload_listener.fd != -1) {
 2755         sig_atomic_t cmd = NSD_QUIT;
 2756         DEBUG(DEBUG_IPC,1, (LOG_INFO,
 2757             "main: ipc send quit to reload-process"));
 2758         if(!write_socket(reload_listener.fd, &cmd, sizeof(cmd))) {
 2759             log_msg(LOG_ERR, "server_main: could not send quit to reload: %s",
 2760                 strerror(errno));
 2761         }
 2762         fsync(reload_listener.fd);
 2763         close(reload_listener.fd);
 2764         /* wait for reload to finish processing */
 2765         while(1) {
 2766             if(waitpid(reload_pid, NULL, 0) == -1) {
 2767                 if(errno == EINTR) continue;
 2768                 if(errno == ECHILD) break;
 2769                 log_msg(LOG_ERR, "waitpid(reload %d): %s",
 2770                     (int)reload_pid, strerror(errno));
 2771             }
 2772             break;
 2773         }
 2774     }
 2775     if(nsd->xfrd_listener->fd != -1) {
 2776         /* complete quit, stop xfrd */
 2777         sig_atomic_t cmd = NSD_QUIT;
 2778         DEBUG(DEBUG_IPC,1, (LOG_INFO,
 2779             "main: ipc send quit to xfrd"));
 2780         if(!write_socket(nsd->xfrd_listener->fd, &cmd, sizeof(cmd))) {
 2781             log_msg(LOG_ERR, "server_main: could not send quit to xfrd: %s",
 2782                 strerror(errno));
 2783         }
 2784         fsync(nsd->xfrd_listener->fd);
 2785         close(nsd->xfrd_listener->fd);
 2786         (void)kill(nsd->pid, SIGTERM);
 2787     }
 2788 
 2789 #ifdef MEMCLEAN /* OS collects memory pages */
 2790     region_destroy(server_region);
 2791 #endif
 2792     /* write the nsd.db to disk, wait for it to complete */
 2793     udb_base_sync(nsd->db->udb, 1);
 2794     udb_base_close(nsd->db->udb);
 2795     server_shutdown(nsd);
 2796 }
 2797 
 2798 static query_state_type
 2799 server_process_query(struct nsd *nsd, struct query *query, uint32_t *now_p)
 2800 {
 2801     return query_process(query, nsd, now_p);
 2802 }
 2803 
 2804 static query_state_type
 2805 server_process_query_udp(struct nsd *nsd, struct query *query, uint32_t *now_p)
 2806 {
 2807 #ifdef RATELIMIT
 2808     if(query_process(query, nsd, now_p) != QUERY_DISCARDED) {
 2809         if(query->edns.cookie_status != COOKIE_VALID
 2810         && query->edns.cookie_status != COOKIE_VALID_REUSE
 2811         && rrl_process_query(query))
 2812             return rrl_slip(query);
 2813         else    return QUERY_PROCESSED;
 2814     }
 2815     return QUERY_DISCARDED;
 2816 #else
 2817     return query_process(query, nsd, now_p);
 2818 #endif
 2819 }
 2820 
 2821 const char*
 2822 nsd_event_vs(void)
 2823 {
 2824 #ifdef USE_MINI_EVENT
 2825     return "";
 2826 #else
 2827     return event_get_version();
 2828 #endif
 2829 }
 2830 
 2831 #if !defined(USE_MINI_EVENT) && defined(EV_FEATURE_BACKENDS)
 2832 static const char* ub_ev_backend2str(int b)
 2833 {
 2834     switch(b) {
 2835     case EVBACKEND_SELECT:  return "select";
 2836     case EVBACKEND_POLL:    return "poll";
 2837     case EVBACKEND_EPOLL:   return "epoll";
 2838     case EVBACKEND_KQUEUE:  return "kqueue";
 2839     case EVBACKEND_DEVPOLL: return "devpoll";
 2840     case EVBACKEND_PORT:    return "evport";
 2841     }
 2842     return "unknown";
 2843 }
 2844 #endif
 2845 
 2846 const char*
 2847 nsd_event_method(void)
 2848 {
 2849 #ifdef USE_MINI_EVENT
 2850     return "select";
 2851 #else
 2852     struct event_base* b = nsd_child_event_base();
 2853     const char* m = "?";
 2854 #  ifdef EV_FEATURE_BACKENDS
 2855     m = ub_ev_backend2str(ev_backend((struct ev_loop*)b));
 2856 #  elif defined(HAVE_EVENT_BASE_GET_METHOD)
 2857     m = event_base_get_method(b);
 2858 #  endif
 2859 #  ifdef MEMCLEAN
 2860     event_base_free(b);
 2861 #  endif
 2862     return m;
 2863 #endif
 2864 }
 2865 
 2866 struct event_base*
 2867 nsd_child_event_base(void)
 2868 {
 2869     struct event_base* base;
 2870 #ifdef USE_MINI_EVENT
 2871     static time_t secs;
 2872     static struct timeval now;
 2873     base = event_init(&secs, &now);
 2874 #else
 2875 #  if defined(HAVE_EV_LOOP) || defined(HAVE_EV_DEFAULT_LOOP)
 2876     /* libev */
 2877     base = (struct event_base *)ev_default_loop(EVFLAG_AUTO);
 2878 #  else
 2879     /* libevent */
 2880 #    ifdef HAVE_EVENT_BASE_NEW
 2881     base = event_base_new();
 2882 #    else
 2883     base = event_init();
 2884 #    endif
 2885 #  endif
 2886 #endif
 2887     return base;
 2888 }
 2889 
 2890 static void
 2891 add_udp_handler(
 2892     struct nsd *nsd,
 2893     struct nsd_socket *sock,
 2894     struct udp_handler_data *data)
 2895 {
 2896     struct event *handler = &data->event;
 2897 
 2898     data->nsd = nsd;
 2899     data->socket = sock;
 2900 
 2901     memset(handler, 0, sizeof(*handler));
 2902     event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_udp, data);
 2903     if(event_base_set(nsd->event_base, handler) != 0)
 2904         log_msg(LOG_ERR, "nsd udp: event_base_set failed");
 2905     if(event_add(handler, NULL) != 0)
 2906         log_msg(LOG_ERR, "nsd udp: event_add failed");
 2907 }
 2908 
 2909 void
 2910 add_tcp_handler(
 2911     struct nsd *nsd,
 2912     struct nsd_socket *sock,
 2913     struct tcp_accept_handler_data *data)
 2914 {
 2915     struct event *handler = &data->event;
 2916 
 2917     data->nsd = nsd;
 2918     data->socket = sock;
 2919 
 2920 #ifdef HAVE_SSL
 2921     if (nsd->tls_ctx &&
 2922         nsd->options->tls_port &&
 2923         using_tls_port((struct sockaddr *)&sock->addr.ai_addr, nsd->options->tls_port))
 2924     {
 2925         data->tls_accept = 1;
 2926         if(verbosity >= 2) {
 2927             char buf[48];
 2928             addrport2str((void*)(struct sockaddr_storage*)&sock->addr.ai_addr, buf, sizeof(buf));
 2929             VERBOSITY(4, (LOG_NOTICE, "setup TCP for TLS service on interface %s", buf));
 2930         }
 2931     } else {
 2932         data->tls_accept = 0;
 2933     }
 2934 #endif
 2935 
 2936     memset(handler, 0, sizeof(*handler));
 2937     event_set(handler, sock->s, EV_PERSIST|EV_READ, handle_tcp_accept, data);
 2938     if(event_base_set(nsd->event_base, handler) != 0)
 2939         log_msg(LOG_ERR, "nsd tcp: event_base_set failed");
 2940     if(event_add(handler, NULL) != 0)
 2941         log_msg(LOG_ERR, "nsd tcp: event_add failed");
 2942     data->event_added = 1;
 2943 }
 2944 
 2945 /*
 2946  * Serve DNS requests.
 2947  */
 2948 void
 2949 server_child(struct nsd *nsd)
 2950 {
 2951     size_t i, from, numifs;
 2952     region_type *server_region = region_create(xalloc, free);
 2953     struct event_base* event_base = nsd_child_event_base();
 2954     sig_atomic_t mode;
 2955 
 2956     if(!event_base) {
 2957         log_msg(LOG_ERR, "nsd server could not create event base");
 2958         exit(1);
 2959     }
 2960     nsd->event_base = event_base;
 2961     nsd->server_region = server_region;
 2962 
 2963 #ifdef RATELIMIT
 2964     rrl_init(nsd->this_child->child_num);
 2965 #endif
 2966 
 2967     assert(nsd->server_kind != NSD_SERVER_MAIN);
 2968     DEBUG(DEBUG_IPC, 2, (LOG_INFO, "child process started"));
 2969 
 2970 #ifdef HAVE_SETPROCTITLE
 2971     setproctitle("server %d", nsd->this_child->child_num + 1);
 2972 #endif
 2973 #ifdef HAVE_CPUSET_T
 2974     if(nsd->use_cpu_affinity) {
 2975         set_cpu_affinity(nsd->this_child->cpuset);
 2976     }
 2977 #endif
 2978 
 2979     if (!(nsd->server_kind & NSD_SERVER_TCP)) {
 2980         server_close_all_sockets(nsd->tcp, nsd->ifs);
 2981     }
 2982     if (!(nsd->server_kind & NSD_SERVER_UDP)) {
 2983         server_close_all_sockets(nsd->udp, nsd->ifs);
 2984     }
 2985 
 2986     if (nsd->this_child->parent_fd != -1) {
 2987         struct event *handler;
 2988         struct ipc_handler_conn_data* user_data =
 2989             (struct ipc_handler_conn_data*)region_alloc(
 2990             server_region, sizeof(struct ipc_handler_conn_data));
 2991         user_data->nsd = nsd;
 2992         user_data->conn = xfrd_tcp_create(server_region, QIOBUFSZ);
 2993 
 2994         handler = (struct event*) region_alloc(
 2995             server_region, sizeof(*handler));
 2996         memset(handler, 0, sizeof(*handler));
 2997         event_set(handler, nsd->this_child->parent_fd, EV_PERSIST|
 2998             EV_READ, child_handle_parent_command, user_data);
 2999         if(event_base_set(event_base, handler) != 0)
 3000             log_msg(LOG_ERR, "nsd ipcchild: event_base_set failed");
 3001         if(event_add(handler, NULL) != 0)
 3002             log_msg(LOG_ERR, "nsd ipcchild: event_add failed");
 3003     }
 3004 
 3005     if(nsd->reuseport) {
 3006         numifs = nsd->ifs / nsd->reuseport;
 3007         from = numifs * nsd->this_child->child_num;
 3008         if(from+numifs > nsd->ifs) { /* should not happen */
 3009             from = 0;
 3010             numifs = nsd->ifs;
 3011         }
 3012     } else {
 3013         from = 0;
 3014         numifs = nsd->ifs;
 3015     }
 3016 
 3017     if (nsd->server_kind & NSD_SERVER_UDP) {
 3018         int child = nsd->this_child->child_num;
 3019         memset(msgs, 0, sizeof(msgs));
 3020         for (i = 0; i < NUM_RECV_PER_SELECT; i++) {
 3021             queries[i] = query_create(server_region,
 3022                 compressed_dname_offsets,
 3023                 compression_table_size, compressed_dnames);
 3024             query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
 3025             iovecs[i].iov_base          = buffer_begin(queries[i]->packet);
 3026             iovecs[i].iov_len           = buffer_remaining(queries[i]->packet);
 3027             msgs[i].msg_hdr.msg_iov     = &iovecs[i];
 3028             msgs[i].msg_hdr.msg_iovlen  = 1;
 3029             msgs[i].msg_hdr.msg_name    = &queries[i]->addr;
 3030             msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen;
 3031         }
 3032 
 3033         for (i = 0; i < nsd->ifs; i++) {
 3034             int listen;
 3035             struct udp_handler_data *data;
 3036 
 3037             listen = nsd_bitset_isset(nsd->udp[i].servers, child);
 3038 
 3039             if(i >= from && i < (from + numifs) && listen) {
 3040                 data = region_alloc_zero(
 3041                     nsd->server_region, sizeof(*data));
 3042                 add_udp_handler(nsd, &nsd->udp[i], data);
 3043             } else {
 3044                 /* close sockets intended for other servers */
 3045                 server_close_socket(&nsd->udp[i]);
 3046             }
 3047         }
 3048     }
 3049 
 3050     /*
 3051      * Keep track of all the TCP accept handlers so we can enable
 3052      * and disable them based on the current number of active TCP
 3053      * connections.
 3054      */
 3055     if (nsd->server_kind & NSD_SERVER_TCP) {
 3056         int child = nsd->this_child->child_num;
 3057         tcp_accept_handler_count = numifs;
 3058         tcp_accept_handlers = region_alloc_array(server_region,
 3059             numifs, sizeof(*tcp_accept_handlers));
 3060 
 3061         for (i = 0; i < nsd->ifs; i++) {
 3062             int listen;
 3063             struct tcp_accept_handler_data *data;
 3064 
 3065             listen = nsd_bitset_isset(nsd->tcp[i].servers, child);
 3066 
 3067             if(i >= from && i < (from + numifs) && listen) {
 3068                 data = &tcp_accept_handlers[i-from];
 3069                 memset(data, 0, sizeof(*data));
 3070                 add_tcp_handler(nsd, &nsd->tcp[i], data);
 3071             } else {
 3072                 /* close sockets intended for other servers */
 3073                 /*
 3074                  * uncomment this once tcp servers are no
 3075                  * longer copied in the tcp fd copy line
 3076                  * in server_init().
 3077                 server_close_socket(&nsd->tcp[i]);
 3078                 */
 3079                 /* close sockets not meant for this server*/
 3080                 if(!listen)
 3081                     server_close_socket(&nsd->tcp[i]);
 3082             }
 3083         }
 3084     } else {
 3085         tcp_accept_handler_count = 0;
 3086     }
 3087 
 3088     /* The main loop... */
 3089     while ((mode = nsd->mode) != NSD_QUIT) {
 3090         if(mode == NSD_RUN) nsd->mode = mode = server_signal_mode(nsd);
 3091 
 3092         /* Do we need to do the statistics... */
 3093         if (mode == NSD_STATS) {
 3094 #ifdef BIND8_STATS
 3095             int p = nsd->st.period;
 3096             nsd->st.period = 1; /* force stats printout */
 3097             /* Dump the statistics */
 3098             bind8_stats(nsd);
 3099             nsd->st.period = p;
 3100 #else /* !BIND8_STATS */
 3101             log_msg(LOG_NOTICE, "Statistics support not enabled at compile time.");
 3102 #endif /* BIND8_STATS */
 3103 
 3104             nsd->mode = NSD_RUN;
 3105         }
 3106         else if (mode == NSD_REAP_CHILDREN) {
 3107             /* got signal, notify parent. parent reaps terminated children. */
 3108             if (nsd->this_child->parent_fd != -1) {
 3109                 sig_atomic_t parent_notify = NSD_REAP_CHILDREN;
 3110                 if (write(nsd->this_child->parent_fd,
 3111                     &parent_notify,
 3112                     sizeof(parent_notify)) == -1)
 3113                 {
 3114                     log_msg(LOG_ERR, "problems sending command from %d to parent: %s",
 3115                         (int) nsd->this_child->pid, strerror(errno));
 3116                 }
 3117             } else /* no parent, so reap 'em */
 3118                 while (waitpid(-1, NULL, WNOHANG) > 0) ;
 3119             nsd->mode = NSD_RUN;
 3120         }
 3121         else if(mode == NSD_RUN) {
 3122             /* Wait for a query... */
 3123             if(event_base_loop(event_base, EVLOOP_ONCE) == -1) {
 3124                 if (errno != EINTR) {
 3125                     log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno));
 3126                     break;
 3127                 }
 3128             }
 3129         } else if(mode == NSD_QUIT) {
 3130             /* ignore here, quit */
 3131         } else {
 3132             log_msg(LOG_ERR, "mode bad value %d, back to service.",
 3133                 (int)mode);
 3134             nsd->mode = NSD_RUN;
 3135         }
 3136     }
 3137 
 3138     service_remaining_tcp(nsd);
 3139 #ifdef  BIND8_STATS
 3140     bind8_stats(nsd);
 3141 #endif /* BIND8_STATS */
 3142 
 3143 #ifdef MEMCLEAN /* OS collects memory pages */
 3144 #ifdef RATELIMIT
 3145     rrl_deinit(nsd->this_child->child_num);
 3146 #endif
 3147     event_base_free(event_base);
 3148     region_destroy(server_region);
 3149 #endif
 3150     server_shutdown(nsd);
 3151 }
 3152 
 3153 static void remaining_tcp_timeout(int ATTR_UNUSED(fd), short event, void* arg)
 3154 {
 3155     int* timed_out = (int*)arg;
 3156         assert(event & EV_TIMEOUT); (void)event;
 3157     /* wake up the service tcp thread, note event is no longer
 3158      * registered */
 3159     *timed_out = 1;
 3160 }
 3161 
 3162 void
 3163 service_remaining_tcp(struct nsd* nsd)
 3164 {
 3165     struct tcp_handler_data* p;
 3166     struct event_base* event_base;
 3167     /* check if it is needed */
 3168     if(nsd->current_tcp_count == 0 || tcp_active_list == NULL)
 3169         return;
 3170     VERBOSITY(4, (LOG_INFO, "service remaining TCP connections"));
 3171 #ifdef USE_DNSTAP
 3172     /* remove dnstap collector, we cannot write there because the new
 3173      * child process is using the file descriptor, or the child
 3174      * process after that. */
 3175     dt_collector_destroy(nsd->dt_collector, nsd);
 3176     nsd->dt_collector = NULL;
 3177 #endif
 3178     /* setup event base */
 3179     event_base = nsd_child_event_base();
 3180     if(!event_base) {
 3181         log_msg(LOG_ERR, "nsd remain tcp could not create event base");
 3182         return;
 3183     }
 3184     /* register tcp connections */
 3185     for(p = tcp_active_list; p != NULL; p = p->next) {
 3186         struct timeval timeout;
 3187         int fd = p->event.ev_fd;
 3188 #ifdef USE_MINI_EVENT
 3189         short event = p->event.ev_flags & (EV_READ|EV_WRITE);
 3190 #else
 3191         short event = p->event.ev_events & (EV_READ|EV_WRITE);
 3192 #endif
 3193         void (*fn)(int, short, void*);
 3194 #ifdef HAVE_SSL
 3195         if(p->tls) {
 3196             if((event&EV_READ))
 3197                 fn = handle_tls_reading;
 3198             else    fn = handle_tls_writing;
 3199         } else {
 3200 #endif
 3201             if((event&EV_READ))
 3202                 fn = handle_tcp_reading;
 3203             else    fn = handle_tcp_writing;
 3204 #ifdef HAVE_SSL
 3205         }
 3206 #endif
 3207 
 3208         p->tcp_no_more_queries = 1;
 3209         /* set timeout to 1/10 second */
 3210         if(p->tcp_timeout > 100)
 3211             p->tcp_timeout = 100;
 3212         timeout.tv_sec = p->tcp_timeout / 1000;
 3213         timeout.tv_usec = (p->tcp_timeout % 1000)*1000;
 3214         event_del(&p->event);
 3215         memset(&p->event, 0, sizeof(p->event));
 3216         event_set(&p->event, fd, EV_PERSIST | event | EV_TIMEOUT,
 3217             fn, p);
 3218         if(event_base_set(event_base, &p->event) != 0)
 3219             log_msg(LOG_ERR, "event base set failed");
 3220         if(event_add(&p->event, &timeout) != 0)
 3221             log_msg(LOG_ERR, "event add failed");
 3222     }
 3223 
 3224     /* handle it */
 3225     while(nsd->current_tcp_count > 0) {
 3226         mode_t m = server_signal_mode(nsd);
 3227         struct event timeout;
 3228         struct timeval tv;
 3229         int timed_out = 0;
 3230         if(m == NSD_QUIT || m == NSD_SHUTDOWN ||
 3231             m == NSD_REAP_CHILDREN) {
 3232             /* quit */
 3233             break;
 3234         }
 3235         /* timer */
 3236         /* have to do something every second */
 3237         tv.tv_sec = 1;
 3238         tv.tv_usec = 0;
 3239         memset(&timeout, 0, sizeof(timeout));
 3240         event_set(&timeout, -1, EV_TIMEOUT, remaining_tcp_timeout,
 3241             &timed_out);
 3242         if(event_base_set(event_base, &timeout) != 0)
 3243             log_msg(LOG_ERR, "remaintcp timer: event_base_set failed");
 3244         if(event_add(&timeout, &tv) != 0)
 3245             log_msg(LOG_ERR, "remaintcp timer: event_add failed");
 3246 
 3247         /* service loop */
 3248         if(event_base_loop(event_base, EVLOOP_ONCE) == -1) {
 3249             if (errno != EINTR) {
 3250                 log_msg(LOG_ERR, "dispatch failed: %s", strerror(errno));
 3251                 break;
 3252             }
 3253         }
 3254         if(!timed_out) {
 3255             event_del(&timeout);
 3256         } else {
 3257             /* timed out, quit */
 3258             VERBOSITY(4, (LOG_INFO, "service remaining TCP connections: timed out, quit"));
 3259             break;
 3260         }
 3261     }
 3262 #ifdef MEMCLEAN
 3263     event_base_free(event_base);
 3264 #endif
 3265     /* continue to quit after return */
 3266 }
 3267 
 3268 /* Implement recvmmsg and sendmmsg if the platform does not. These functions
 3269  * are always used, even if nonblocking operations are broken, in which case
 3270  * NUM_RECV_PER_SELECT is defined to 1 (one).
 3271  */
 3272 #if defined(HAVE_RECVMMSG)
 3273 #define nsd_recvmmsg recvmmsg
 3274 #else /* !HAVE_RECVMMSG */
 3275 
 3276 static int
 3277 nsd_recvmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen,
 3278              int flags, struct timespec *timeout)
 3279 {
 3280     unsigned int vpos = 0;
 3281     ssize_t rcvd;
 3282 
 3283     /* timeout is ignored, ensure caller does not expect it to work */
 3284     assert(timeout == NULL); (void)timeout;
 3285 
 3286     while(vpos < vlen) {
 3287         rcvd = recvfrom(sockfd,
 3288                         msgvec[vpos].msg_hdr.msg_iov->iov_base,
 3289                         msgvec[vpos].msg_hdr.msg_iov->iov_len,
 3290                         flags,
 3291                         msgvec[vpos].msg_hdr.msg_name,
 3292                        &msgvec[vpos].msg_hdr.msg_namelen);
 3293         if(rcvd < 0) {
 3294             break;
 3295         } else {
 3296             assert((unsigned long long)rcvd <= (unsigned long long)UINT_MAX);
 3297             msgvec[vpos].msg_len = (unsigned int)rcvd;
 3298             vpos++;
 3299         }
 3300     }
 3301 
 3302     if(vpos) {
 3303         /* error will be picked up next time */
 3304         return (int)vpos;
 3305     } else if(errno == 0) {
 3306         return 0;
 3307     } else if(errno == EAGAIN) {
 3308         return 0;
 3309     }
 3310 
 3311     return -1;
 3312 }
 3313 #endif /* HAVE_RECVMMSG */
 3314 
 3315 #ifdef HAVE_SENDMMSG
 3316 #define nsd_sendmmsg(...) sendmmsg(__VA_ARGS__)
 3317 #else /* !HAVE_SENDMMSG */
 3318 
 3319 static int
 3320 nsd_sendmmsg(int sockfd, struct mmsghdr *msgvec, unsigned int vlen, int flags)
 3321 {
 3322     unsigned int vpos = 0;
 3323     ssize_t snd;
 3324 
 3325     while(vpos < vlen) {
 3326         assert(msgvec[vpos].msg_hdr.msg_iovlen == 1);
 3327         snd = sendto(sockfd,
 3328                      msgvec[vpos].msg_hdr.msg_iov->iov_base,
 3329                      msgvec[vpos].msg_hdr.msg_iov->iov_len,
 3330                      flags,
 3331                      msgvec[vpos].msg_hdr.msg_name,
 3332                      msgvec[vpos].msg_hdr.msg_namelen);
 3333         if(snd < 0) {
 3334             break;
 3335         } else {
 3336             msgvec[vpos].msg_len = (unsigned int)snd;
 3337             vpos++;
 3338         }
 3339     }
 3340 
 3341     if(vpos) {
 3342         return (int)vpos;
 3343     } else if(errno == 0) {
 3344         return 0;
 3345     }
 3346 
 3347     return -1;
 3348 }
 3349 #endif /* HAVE_SENDMMSG */
 3350 
 3351 static int
 3352 port_is_zero(
 3353 #ifdef INET6
 3354         struct sockaddr_storage *addr
 3355 #else
 3356         struct sockaddr_in *addr
 3357 #endif
 3358     )
 3359 {
 3360 #ifdef INET6
 3361     if(addr->ss_family == AF_INET6) {
 3362         return (((struct sockaddr_in6 *)addr)->sin6_port) == 0;
 3363     } else if(addr->ss_family == AF_INET) {
 3364         return (((struct sockaddr_in *)addr)->sin_port) == 0;
 3365     }
 3366     return 0;
 3367 #else
 3368     if(addr->sin_family == AF_INET) {
 3369         return addr->sin_port == 0;
 3370     }
 3371     return 0;
 3372 #endif
 3373 }
 3374 
 3375 static void
 3376 handle_udp(int fd, short event, void* arg)
 3377 {
 3378     struct udp_handler_data *data = (struct udp_handler_data *) arg;
 3379     int received, sent, recvcount, i;
 3380     struct query *q;
 3381     uint32_t now = 0;
 3382 
 3383     if (!(event & EV_READ)) {
 3384         return;
 3385     }
 3386     recvcount = nsd_recvmmsg(fd, msgs, NUM_RECV_PER_SELECT, 0, NULL);
 3387     /* this printf strangely gave a performance increase on Linux */
 3388     /* printf("recvcount %d \n", recvcount); */
 3389     if (recvcount == -1) {
 3390         if (errno != EAGAIN && errno != EINTR) {
 3391             log_msg(LOG_ERR, "recvmmsg failed: %s", strerror(errno));
 3392             STATUP(data->nsd, rxerr);
 3393             /* No zone statup */
 3394         }
 3395         /* Simply no data available */
 3396         return;
 3397     }
 3398     for (i = 0; i < recvcount; i++) {
 3399     loopstart:
 3400         received = msgs[i].msg_len;
 3401         queries[i]->addrlen = msgs[i].msg_hdr.msg_namelen;
 3402         q = queries[i];
 3403         if (received == -1) {
 3404             log_msg(LOG_ERR, "recvmmsg %d failed %s", i, strerror(
 3405 #if defined(HAVE_RECVMMSG)
 3406                 msgs[i].msg_hdr.msg_flags
 3407 #else
 3408                 errno
 3409 #endif
 3410                 ));
 3411             STATUP(data->nsd, rxerr);
 3412             /* No zone statup */
 3413             query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
 3414             iovecs[i].iov_len = buffer_remaining(q->packet);
 3415             msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen;
 3416             goto swap_drop;
 3417         }
 3418 
 3419         /* Account... */
 3420 #ifdef BIND8_STATS
 3421         if (data->socket->addr.ai_family == AF_INET) {
 3422             STATUP(data->nsd, qudp);
 3423         } else if (data->socket->addr.ai_family == AF_INET6) {
 3424             STATUP(data->nsd, qudp6);
 3425         }
 3426 #endif
 3427 
 3428         buffer_skip(q->packet, received);
 3429         buffer_flip(q->packet);
 3430 #ifdef USE_DNSTAP
 3431         /*
 3432          * sending UDP-query with server address (local) and client address to dnstap process
 3433          */
 3434         log_addr("query from client", &q->addr);
 3435         log_addr("to server (local)", (void*)&data->socket->addr.ai_addr);
 3436         dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &q->addr, q->addrlen,
 3437             q->tcp, q->packet);
 3438 #endif /* USE_DNSTAP */
 3439 
 3440         /* Process and answer the query... */
 3441         if (server_process_query_udp(data->nsd, q, &now) != QUERY_DISCARDED) {
 3442             if (RCODE(q->packet) == RCODE_OK && !AA(q->packet)) {
 3443                 STATUP(data->nsd, nona);
 3444                 ZTATUP(data->nsd, q->zone, nona);
 3445             }
 3446 
 3447 #ifdef USE_ZONE_STATS
 3448             if (data->socket->addr.ai_family == AF_INET) {
 3449                 ZTATUP(data->nsd, q->zone, qudp);
 3450             } else if (data->socket->addr.ai_family == AF_INET6) {
 3451                 ZTATUP(data->nsd, q->zone, qudp6);
 3452             }
 3453 #endif
 3454 
 3455             /* Add EDNS0 and TSIG info if necessary.  */
 3456             query_add_optional(q, data->nsd, &now);
 3457 
 3458             buffer_flip(q->packet);
 3459             iovecs[i].iov_len = buffer_remaining(q->packet);
 3460 #ifdef BIND8_STATS
 3461             /* Account the rcode & TC... */
 3462             STATUP2(data->nsd, rcode, RCODE(q->packet));
 3463             ZTATUP2(data->nsd, q->zone, rcode, RCODE(q->packet));
 3464             if (TC(q->packet)) {
 3465                 STATUP(data->nsd, truncated);
 3466                 ZTATUP(data->nsd, q->zone, truncated);
 3467             }
 3468 #endif /* BIND8_STATS */
 3469 #ifdef USE_DNSTAP
 3470             /*
 3471              * sending UDP-response with server address (local) and client address to dnstap process
 3472              */
 3473             log_addr("from server (local)", (void*)&data->socket->addr.ai_addr);
 3474             log_addr("response to client", &q->addr);
 3475             dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr,
 3476                 &q->addr, q->addrlen, q->tcp, q->packet,
 3477                 q->zone);
 3478 #endif /* USE_DNSTAP */
 3479         } else {
 3480             query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
 3481             iovecs[i].iov_len = buffer_remaining(q->packet);
 3482             msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen;
 3483         swap_drop:
 3484             STATUP(data->nsd, dropped);
 3485             ZTATUP(data->nsd, q->zone, dropped);
 3486             if(i != recvcount-1) {
 3487                 /* swap with last and decrease recvcount */
 3488                 struct mmsghdr mtmp = msgs[i];
 3489                 struct iovec iotmp = iovecs[i];
 3490                 recvcount--;
 3491                 msgs[i] = msgs[recvcount];
 3492                 iovecs[i] = iovecs[recvcount];
 3493                 queries[i] = queries[recvcount];
 3494                 msgs[recvcount] = mtmp;
 3495                 iovecs[recvcount] = iotmp;
 3496                 queries[recvcount] = q;
 3497                 msgs[i].msg_hdr.msg_iov = &iovecs[i];
 3498                 msgs[recvcount].msg_hdr.msg_iov = &iovecs[recvcount];
 3499                 goto loopstart;
 3500             } else { recvcount --; }
 3501         }
 3502     }
 3503 
 3504     /* send until all are sent */
 3505     i = 0;
 3506     while(i<recvcount) {
 3507         sent = nsd_sendmmsg(fd, &msgs[i], recvcount-i, 0);
 3508         if(sent == -1) {
 3509             if(errno == ENOBUFS ||
 3510 #ifdef EWOULDBLOCK
 3511                 errno == EWOULDBLOCK ||
 3512 #endif
 3513                 errno == EAGAIN) {
 3514                 /* block to wait until send buffer avail */
 3515                 int flag, errstore;
 3516                 if((flag = fcntl(fd, F_GETFL)) == -1) {
 3517                     log_msg(LOG_ERR, "cannot fcntl F_GETFL: %s", strerror(errno));
 3518                     flag = 0;
 3519                 }
 3520                 flag &= ~O_NONBLOCK;
 3521                 if(fcntl(fd, F_SETFL, flag) == -1)
 3522                     log_msg(LOG_ERR, "cannot fcntl F_SETFL 0: %s", strerror(errno));
 3523                 sent = nsd_sendmmsg(fd, &msgs[i], recvcount-i, 0);
 3524                 errstore = errno;
 3525                 flag |= O_NONBLOCK;
 3526                 if(fcntl(fd, F_SETFL, flag) == -1)
 3527                     log_msg(LOG_ERR, "cannot fcntl F_SETFL O_NONBLOCK: %s", strerror(errno));
 3528                 if(sent != -1) {
 3529                     i += sent;
 3530                     continue;
 3531                 }
 3532                 errno = errstore;
 3533             }
 3534             if(errno == EINVAL) {
 3535                 /* skip the invalid argument entry,
 3536                  * send the remaining packets in the list */
 3537                 if(!(port_is_zero((void*)&queries[i]->addr) &&
 3538                     verbosity < 3)) {
 3539                     const char* es = strerror(errno);
 3540                     char a[64];
 3541                     addrport2str((void*)&queries[i]->addr, a, sizeof(a));
 3542                     log_msg(LOG_ERR, "sendmmsg skip invalid argument [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es);
 3543                 }
 3544                 i += 1;
 3545                 continue;
 3546             }
 3547             /* don't log transient network full errors, unless
 3548              * on higher verbosity */
 3549             if(!(errno == ENOBUFS && verbosity < 1) &&
 3550 #ifdef EWOULDBLOCK
 3551                errno != EWOULDBLOCK &&
 3552 #endif
 3553                errno != EAGAIN) {
 3554                 const char* es = strerror(errno);
 3555                 char a[64];
 3556                 addrport2str((void*)&queries[i]->addr, a, sizeof(a));
 3557                 log_msg(LOG_ERR, "sendmmsg [0]=%s count=%d failed: %s", a, (int)(recvcount-i), es);
 3558             }
 3559 #ifdef BIND8_STATS
 3560             data->nsd->st.txerr += recvcount-i;
 3561 #endif /* BIND8_STATS */
 3562             break;
 3563         }
 3564         i += sent;
 3565     }
 3566     for(i=0; i<recvcount; i++) {
 3567         query_reset(queries[i], UDP_MAX_MESSAGE_LEN, 0);
 3568         iovecs[i].iov_len = buffer_remaining(queries[i]->packet);
 3569         msgs[i].msg_hdr.msg_namelen = queries[i]->addrlen;
 3570     }
 3571 }
 3572 
 3573 #ifdef HAVE_SSL
 3574 /*
 3575  * Setup an event for the tcp handler.
 3576  */
 3577 static void
 3578 tcp_handler_setup_event(struct tcp_handler_data* data, void (*fn)(int, short, void *),
 3579        int fd, short event)
 3580 {
 3581     struct timeval timeout;
 3582     struct event_base* ev_base;
 3583 
 3584     timeout.tv_sec = data->nsd->tcp_timeout;
 3585     timeout.tv_usec = 0L;
 3586 
 3587     ev_base = data->event.ev_base;
 3588     event_del(&data->event);
 3589     memset(&data->event, 0, sizeof(data->event));
 3590     event_set(&data->event, fd, event, fn, data);
 3591     if(event_base_set(ev_base, &data->event) != 0)
 3592         log_msg(LOG_ERR, "event base set failed");
 3593     if(event_add(&data->event, &timeout) != 0)
 3594         log_msg(LOG_ERR, "event add failed");
 3595 }
 3596 #endif /* HAVE_SSL */
 3597 
 3598 static void
 3599 cleanup_tcp_handler(struct tcp_handler_data* data)
 3600 {
 3601     event_del(&data->event);
 3602 #ifdef HAVE_SSL
 3603     if(data->tls) {
 3604         SSL_shutdown(data->tls);
 3605         SSL_free(data->tls);
 3606         data->tls = NULL;
 3607     }
 3608 #endif
 3609     close(data->event.ev_fd);
 3610     if(data->prev)
 3611         data->prev->next = data->next;
 3612     else    tcp_active_list = data->next;
 3613     if(data->next)
 3614         data->next->prev = data->prev;
 3615 
 3616     /*
 3617      * Enable the TCP accept handlers when the current number of
 3618      * TCP connections is about to drop below the maximum number
 3619      * of TCP connections.
 3620      */
 3621     if (slowaccept || data->nsd->current_tcp_count == data->nsd->maximum_tcp_count) {
 3622         configure_handler_event_types(EV_READ|EV_PERSIST);
 3623         if(slowaccept) {
 3624             event_del(&slowaccept_event);
 3625             slowaccept = 0;
 3626         }
 3627     }
 3628     --data->nsd->current_tcp_count;
 3629     assert(data->nsd->current_tcp_count >= 0);
 3630 
 3631     region_destroy(data->region);
 3632 }
 3633 
 3634 static void
 3635 handle_tcp_reading(int fd, short event, void* arg)
 3636 {
 3637     struct tcp_handler_data *data = (struct tcp_handler_data *) arg;
 3638     ssize_t received;
 3639     struct event_base* ev_base;
 3640     struct timeval timeout;
 3641     uint32_t now = 0;
 3642 
 3643     if ((event & EV_TIMEOUT)) {
 3644         /* Connection timed out.  */
 3645         cleanup_tcp_handler(data);
 3646         return;
 3647     }
 3648 
 3649     if ((data->nsd->tcp_query_count > 0 &&
 3650         data->query_count >= data->nsd->tcp_query_count) ||
 3651         data->tcp_no_more_queries) {
 3652         /* No more queries allowed on this tcp connection. */
 3653         cleanup_tcp_handler(data);
 3654         return;
 3655     }
 3656 
 3657     assert((event & EV_READ));
 3658 
 3659     if (data->bytes_transmitted == 0) {
 3660         query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1);
 3661     }
 3662 
 3663     /*
 3664      * Check if we received the leading packet length bytes yet.
 3665      */
 3666     if (data->bytes_transmitted < sizeof(uint16_t)) {
 3667         received = read(fd,
 3668                 (char *) &data->query->tcplen
 3669                 + data->bytes_transmitted,
 3670                 sizeof(uint16_t) - data->bytes_transmitted);
 3671         if (received == -1) {
 3672             if (errno == EAGAIN || errno == EINTR) {
 3673                 /*
 3674                  * Read would block, wait until more
 3675                  * data is available.
 3676                  */
 3677                 return;
 3678             } else {
 3679                 char buf[48];
 3680                 addr2str(&data->query->addr, buf, sizeof(buf));
 3681 #ifdef ECONNRESET
 3682                 if (verbosity >= 2 || errno != ECONNRESET)
 3683 #endif /* ECONNRESET */
 3684                 log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno));
 3685                 cleanup_tcp_handler(data);
 3686                 return;
 3687             }
 3688         } else if (received == 0) {
 3689             /* EOF */
 3690             cleanup_tcp_handler(data);
 3691             return;
 3692         }
 3693 
 3694         data->bytes_transmitted += received;
 3695         if (data->bytes_transmitted < sizeof(uint16_t)) {
 3696             /*
 3697              * Not done with the tcplen yet, wait for more
 3698              * data to become available.
 3699              */
 3700             return;
 3701         }
 3702 
 3703         assert(data->bytes_transmitted == sizeof(uint16_t));
 3704 
 3705         data->query->tcplen = ntohs(data->query->tcplen);
 3706 
 3707         /*
 3708          * Minimum query size is:
 3709          *
 3710          *     Size of the header (12)
 3711          *   + Root domain name   (1)
 3712          *   + Query class        (2)
 3713          *   + Query type         (2)
 3714          */
 3715         if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) {
 3716             VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection"));
 3717             cleanup_tcp_handler(data);
 3718             return;
 3719         }
 3720 
 3721         if (data->query->tcplen > data->query->maxlen) {
 3722             VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection"));
 3723             cleanup_tcp_handler(data);
 3724             return;
 3725         }
 3726 
 3727         buffer_set_limit(data->query->packet, data->query->tcplen);
 3728     }
 3729 
 3730     assert(buffer_remaining(data->query->packet) > 0);
 3731 
 3732     /* Read the (remaining) query data.  */
 3733     received = read(fd,
 3734             buffer_current(data->query->packet),
 3735             buffer_remaining(data->query->packet));
 3736     if (received == -1) {
 3737         if (errno == EAGAIN || errno == EINTR) {
 3738             /*
 3739              * Read would block, wait until more data is
 3740              * available.
 3741              */
 3742             return;
 3743         } else {
 3744             char buf[48];
 3745             addr2str(&data->query->addr, buf, sizeof(buf));
 3746 #ifdef ECONNRESET
 3747             if (verbosity >= 2 || errno != ECONNRESET)
 3748 #endif /* ECONNRESET */
 3749             log_msg(LOG_ERR, "failed reading from %s tcp: %s", buf, strerror(errno));
 3750             cleanup_tcp_handler(data);
 3751             return;
 3752         }
 3753     } else if (received == 0) {
 3754         /* EOF */
 3755         cleanup_tcp_handler(data);
 3756         return;
 3757     }
 3758 
 3759     data->bytes_transmitted += received;
 3760     buffer_skip(data->query->packet, received);
 3761     if (buffer_remaining(data->query->packet) > 0) {
 3762         /*
 3763          * Message not yet complete, wait for more data to
 3764          * become available.
 3765          */
 3766         return;
 3767     }
 3768 
 3769     assert(buffer_position(data->query->packet) == data->query->tcplen);
 3770 
 3771     /* Account... */
 3772 #ifdef BIND8_STATS
 3773 #ifndef INET6
 3774     STATUP(data->nsd, ctcp);
 3775 #else
 3776     if (data->query->addr.ss_family == AF_INET) {
 3777         STATUP(data->nsd, ctcp);
 3778     } else if (data->query->addr.ss_family == AF_INET6) {
 3779         STATUP(data->nsd, ctcp6);
 3780     }
 3781 #endif
 3782 #endif /* BIND8_STATS */
 3783 
 3784     /* We have a complete query, process it.  */
 3785 
 3786     /* tcp-query-count: handle query counter ++ */
 3787     data->query_count++;
 3788 
 3789     buffer_flip(data->query->packet);
 3790 #ifdef USE_DNSTAP
 3791     /*
 3792      * and send TCP-query with found address (local) and client address to dnstap process
 3793      */
 3794     log_addr("query from client", &data->query->addr);
 3795     log_addr("to server (local)", (void*)&data->socket->addr.ai_addr);
 3796     dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->addr,
 3797         data->query->addrlen, data->query->tcp, data->query->packet);
 3798 #endif /* USE_DNSTAP */
 3799     data->query_state = server_process_query(data->nsd, data->query, &now);
 3800     if (data->query_state == QUERY_DISCARDED) {
 3801         /* Drop the packet and the entire connection... */
 3802         STATUP(data->nsd, dropped);
 3803         ZTATUP(data->nsd, data->query->zone, dropped);
 3804         cleanup_tcp_handler(data);
 3805         return;
 3806     }
 3807 
 3808 #ifdef BIND8_STATS
 3809     if (RCODE(data->query->packet) == RCODE_OK
 3810         && !AA(data->query->packet))
 3811     {
 3812         STATUP(data->nsd, nona);
 3813         ZTATUP(data->nsd, data->query->zone, nona);
 3814     }
 3815 #endif /* BIND8_STATS */
 3816 
 3817 #ifdef USE_ZONE_STATS
 3818 #ifndef INET6
 3819     ZTATUP(data->nsd, data->query->zone, ctcp);
 3820 #else
 3821     if (data->query->addr.ss_family == AF_INET) {
 3822         ZTATUP(data->nsd, data->query->zone, ctcp);
 3823     } else if (data->query->addr.ss_family == AF_INET6) {
 3824         ZTATUP(data->nsd, data->query->zone, ctcp6);
 3825     }
 3826 #endif
 3827 #endif /* USE_ZONE_STATS */
 3828 
 3829     query_add_optional(data->query, data->nsd, &now);
 3830 
 3831     /* Switch to the tcp write handler.  */
 3832     buffer_flip(data->query->packet);
 3833     data->query->tcplen = buffer_remaining(data->query->packet);
 3834 #ifdef BIND8_STATS
 3835     /* Account the rcode & TC... */
 3836     STATUP2(data->nsd, rcode, RCODE(data->query->packet));
 3837     ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet));
 3838     if (TC(data->query->packet)) {
 3839         STATUP(data->nsd, truncated);
 3840         ZTATUP(data->nsd, data->query->zone, truncated);
 3841     }
 3842 #endif /* BIND8_STATS */
 3843 #ifdef USE_DNSTAP
 3844     /*
 3845      * sending TCP-response with found (earlier) address (local) and client address to dnstap process
 3846      */
 3847     log_addr("from server (local)", (void*)&data->socket->addr.ai_addr);
 3848     log_addr("response to client", &data->query->addr);
 3849     dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->addr,
 3850         data->query->addrlen, data->query->tcp, data->query->packet,
 3851         data->query->zone);
 3852 #endif /* USE_DNSTAP */
 3853     data->bytes_transmitted = 0;
 3854 
 3855     timeout.tv_sec = data->tcp_timeout / 1000;
 3856     timeout.tv_usec = (data->tcp_timeout % 1000)*1000;
 3857 
 3858     ev_base = data->event.ev_base;
 3859     event_del(&data->event);
 3860     memset(&data->event, 0, sizeof(data->event));
 3861     event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT,
 3862         handle_tcp_reading, data);
 3863     if(event_base_set(ev_base, &data->event) != 0)
 3864         log_msg(LOG_ERR, "event base set tcpr failed");
 3865     if(event_add(&data->event, &timeout) != 0)
 3866         log_msg(LOG_ERR, "event add tcpr failed");
 3867     /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/
 3868     handle_tcp_writing(fd, EV_WRITE, data);
 3869 }
 3870 
 3871 static void
 3872 handle_tcp_writing(int fd, short event, void* arg)
 3873 {
 3874     struct tcp_handler_data *data = (struct tcp_handler_data *) arg;
 3875     ssize_t sent;
 3876     struct query *q = data->query;
 3877     struct timeval timeout;
 3878     struct event_base* ev_base;
 3879     uint32_t now = 0;
 3880 
 3881     if ((event & EV_TIMEOUT)) {
 3882         /* Connection timed out.  */
 3883         cleanup_tcp_handler(data);
 3884         return;
 3885     }
 3886 
 3887     assert((event & EV_WRITE));
 3888 
 3889     if (data->bytes_transmitted < sizeof(q->tcplen)) {
 3890         /* Writing the response packet length.  */
 3891         uint16_t n_tcplen = htons(q->tcplen);
 3892 #ifdef HAVE_WRITEV
 3893         struct iovec iov[2];
 3894         iov[0].iov_base = (uint8_t*)&n_tcplen + data->bytes_transmitted;
 3895         iov[0].iov_len = sizeof(n_tcplen) - data->bytes_transmitted; 
 3896         iov[1].iov_base = buffer_begin(q->packet);
 3897         iov[1].iov_len = buffer_limit(q->packet);
 3898         sent = writev(fd, iov, 2);
 3899 #else /* HAVE_WRITEV */
 3900         sent = write(fd,
 3901                  (const char *) &n_tcplen + data->bytes_transmitted,
 3902                  sizeof(n_tcplen) - data->bytes_transmitted);
 3903 #endif /* HAVE_WRITEV */
 3904         if (sent == -1) {
 3905             if (errno == EAGAIN || errno == EINTR) {
 3906                 /*
 3907                  * Write would block, wait until
 3908                  * socket becomes writable again.
 3909                  */
 3910                 return;
 3911             } else {
 3912 #ifdef ECONNRESET
 3913                 if(verbosity >= 2 || errno != ECONNRESET)
 3914 #endif /* ECONNRESET */
 3915 #ifdef EPIPE
 3916                   if(verbosity >= 2 || errno != EPIPE)
 3917 #endif /* EPIPE 'broken pipe' */
 3918                     log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno));
 3919                 cleanup_tcp_handler(data);
 3920                 return;
 3921             }
 3922         }
 3923 
 3924         data->bytes_transmitted += sent;
 3925         if (data->bytes_transmitted < sizeof(q->tcplen)) {
 3926             /*
 3927              * Writing not complete, wait until socket
 3928              * becomes writable again.
 3929              */
 3930             return;
 3931         }
 3932 
 3933 #ifdef HAVE_WRITEV
 3934         sent -= sizeof(n_tcplen);
 3935         /* handle potential 'packet done' code */
 3936         goto packet_could_be_done;
 3937 #endif
 3938     }
 3939  
 3940     sent = write(fd,
 3941              buffer_current(q->packet),
 3942              buffer_remaining(q->packet));
 3943     if (sent == -1) {
 3944         if (errno == EAGAIN || errno == EINTR) {
 3945             /*
 3946              * Write would block, wait until
 3947              * socket becomes writable again.
 3948              */
 3949             return;
 3950         } else {
 3951 #ifdef ECONNRESET
 3952             if(verbosity >= 2 || errno != ECONNRESET)
 3953 #endif /* ECONNRESET */
 3954 #ifdef EPIPE
 3955                   if(verbosity >= 2 || errno != EPIPE)
 3956 #endif /* EPIPE 'broken pipe' */
 3957             log_msg(LOG_ERR, "failed writing to tcp: %s", strerror(errno));
 3958             cleanup_tcp_handler(data);
 3959             return;
 3960         }
 3961     }
 3962 
 3963     data->bytes_transmitted += sent;
 3964 #ifdef HAVE_WRITEV
 3965   packet_could_be_done:
 3966 #endif
 3967     buffer_skip(q->packet, sent);
 3968     if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) {
 3969         /*
 3970          * Still more data to write when socket becomes
 3971          * writable again.
 3972          */
 3973         return;
 3974     }
 3975 
 3976     assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen));
 3977 
 3978     if (data->query_state == QUERY_IN_AXFR) {
 3979         /* Continue processing AXFR and writing back results.  */
 3980         buffer_clear(q->packet);
 3981         data->query_state = query_axfr(data->nsd, q);
 3982         if (data->query_state != QUERY_PROCESSED) {
 3983             query_add_optional(data->query, data->nsd, &now);
 3984 
 3985             /* Reset data. */
 3986             buffer_flip(q->packet);
 3987             q->tcplen = buffer_remaining(q->packet);
 3988             data->bytes_transmitted = 0;
 3989             /* Reset timeout.  */
 3990             timeout.tv_sec = data->tcp_timeout / 1000;
 3991             timeout.tv_usec = (data->tcp_timeout % 1000)*1000;
 3992             ev_base = data->event.ev_base;
 3993             event_del(&data->event);
 3994             memset(&data->event, 0, sizeof(data->event));
 3995             event_set(&data->event, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT,
 3996                 handle_tcp_writing, data);
 3997             if(event_base_set(ev_base, &data->event) != 0)
 3998                 log_msg(LOG_ERR, "event base set tcpw failed");
 3999             if(event_add(&data->event, &timeout) != 0)
 4000                 log_msg(LOG_ERR, "event add tcpw failed");
 4001 
 4002             /*
 4003              * Write data if/when the socket is writable
 4004              * again.
 4005              */
 4006             return;
 4007         }
 4008     }
 4009 
 4010     /*
 4011      * Done sending, wait for the next request to arrive on the
 4012      * TCP socket by installing the TCP read handler.
 4013      */
 4014     if ((data->nsd->tcp_query_count > 0 &&
 4015         data->query_count >= data->nsd->tcp_query_count) ||
 4016         data->tcp_no_more_queries) {
 4017 
 4018         (void) shutdown(fd, SHUT_WR);
 4019     }
 4020 
 4021     data->bytes_transmitted = 0;
 4022 
 4023     timeout.tv_sec = data->tcp_timeout / 1000;
 4024     timeout.tv_usec = (data->tcp_timeout % 1000)*1000;
 4025     ev_base = data->event.ev_base;
 4026     event_del(&data->event);
 4027     memset(&data->event, 0, sizeof(data->event));
 4028     event_set(&data->event, fd, EV_PERSIST | EV_READ | EV_TIMEOUT,
 4029         handle_tcp_reading, data);
 4030     if(event_base_set(ev_base, &data->event) != 0)
 4031         log_msg(LOG_ERR, "event base set tcpw failed");
 4032     if(event_add(&data->event, &timeout) != 0)
 4033         log_msg(LOG_ERR, "event add tcpw failed");
 4034 }
 4035 
 4036 #ifdef HAVE_SSL
 4037 /** create SSL object and associate fd */
 4038 static SSL*
 4039 incoming_ssl_fd(SSL_CTX* ctx, int fd)
 4040 {
 4041     SSL* ssl = SSL_new((SSL_CTX*)ctx);
 4042     if(!ssl) {
 4043         log_crypto_err("could not SSL_new");
 4044         return NULL;
 4045     }
 4046     SSL_set_accept_state(ssl);
 4047     (void)SSL_set_mode(ssl, SSL_MODE_AUTO_RETRY);
 4048     if(!SSL_set_fd(ssl, fd)) {
 4049         log_crypto_err("could not SSL_set_fd");
 4050         SSL_free(ssl);
 4051         return NULL;
 4052     }
 4053     return ssl;
 4054 }
 4055 
 4056 /** TLS handshake to upgrade TCP connection */
 4057 static int
 4058 tls_handshake(struct tcp_handler_data* data, int fd, int writing)
 4059 {
 4060     int r;
 4061     if(data->shake_state == tls_hs_read_event) {
 4062         /* read condition satisfied back to writing */
 4063         tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE);
 4064         data->shake_state = tls_hs_none;
 4065         return 1;
 4066     }
 4067     if(data->shake_state == tls_hs_write_event) {
 4068         /* write condition satisfied back to reading */
 4069         tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ);
 4070         data->shake_state = tls_hs_none;
 4071         return 1;
 4072     }
 4073 
 4074     /* (continue to) setup the TLS connection */
 4075     ERR_clear_error();
 4076     r = SSL_do_handshake(data->tls);
 4077 
 4078     if(r != 1) {
 4079         int want = SSL_get_error(data->tls, r);
 4080         if(want == SSL_ERROR_WANT_READ) {
 4081             if(data->shake_state == tls_hs_read) {
 4082                 /* try again later */
 4083                 return 1;
 4084             }
 4085             data->shake_state = tls_hs_read;
 4086             /* switch back to reading mode */
 4087             tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ);
 4088             return 1;
 4089         } else if(want == SSL_ERROR_WANT_WRITE) {
 4090             if(data->shake_state == tls_hs_write) {
 4091                 /* try again later */
 4092                 return 1;
 4093             }
 4094             data->shake_state = tls_hs_write;
 4095             /* switch back to writing mode */
 4096             tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE);
 4097             return 1;
 4098         } else {
 4099             if(r == 0)
 4100                 VERBOSITY(3, (LOG_ERR, "TLS handshake: connection closed prematurely"));
 4101             else {
 4102                 unsigned long err = ERR_get_error();
 4103                 if(!squelch_err_ssl_handshake(err)) {
 4104                     char a[64], s[256];
 4105                     addr2str(&data->query->addr, a, sizeof(a));
 4106                     snprintf(s, sizeof(s), "TLS handshake failed from %s", a);
 4107                     log_crypto_from_err(s, err);
 4108                 }
 4109             }
 4110             cleanup_tcp_handler(data);
 4111             return 0;
 4112         }
 4113     }
 4114 
 4115     /* Use to log successful upgrade for testing - could be removed*/
 4116     VERBOSITY(3, (LOG_INFO, "TLS handshake succeeded."));
 4117     /* set back to the event we need to have when reading (or writing) */
 4118     if(data->shake_state == tls_hs_read && writing) {
 4119         tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST|EV_TIMEOUT|EV_WRITE);
 4120     } else if(data->shake_state == tls_hs_write && !writing) {
 4121         tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST|EV_TIMEOUT|EV_READ);
 4122     }
 4123     data->shake_state = tls_hs_none;
 4124     return 1;
 4125 }
 4126 
 4127 /** handle TLS reading of incoming query */
 4128 static void
 4129 handle_tls_reading(int fd, short event, void* arg)
 4130 {
 4131     struct tcp_handler_data *data = (struct tcp_handler_data *) arg;
 4132     ssize_t received;
 4133     uint32_t now = 0;
 4134 
 4135     if ((event & EV_TIMEOUT)) {
 4136         /* Connection timed out.  */
 4137         cleanup_tcp_handler(data);
 4138         return;
 4139     }
 4140 
 4141     if ((data->nsd->tcp_query_count > 0 &&
 4142         data->query_count >= data->nsd->tcp_query_count) ||
 4143         data->tcp_no_more_queries) {
 4144         /* No more queries allowed on this tcp connection. */
 4145         cleanup_tcp_handler(data);
 4146         return;
 4147     }
 4148 
 4149     assert((event & EV_READ));
 4150 
 4151     if (data->bytes_transmitted == 0) {
 4152         query_reset(data->query, TCP_MAX_MESSAGE_LEN, 1);
 4153     }
 4154 
 4155     if(data->shake_state != tls_hs_none) {
 4156         if(!tls_handshake(data, fd, 0))
 4157             return;
 4158         if(data->shake_state != tls_hs_none)
 4159             return;
 4160     }
 4161 
 4162     /*
 4163      * Check if we received the leading packet length bytes yet.
 4164      */
 4165     if(data->bytes_transmitted < sizeof(uint16_t)) {
 4166         ERR_clear_error();
 4167         if((received=SSL_read(data->tls, (char *) &data->query->tcplen
 4168             + data->bytes_transmitted,
 4169             sizeof(uint16_t) - data->bytes_transmitted)) <= 0) {
 4170             int want = SSL_get_error(data->tls, received);
 4171             if(want == SSL_ERROR_ZERO_RETURN) {
 4172                 cleanup_tcp_handler(data);
 4173                 return; /* shutdown, closed */
 4174             } else if(want == SSL_ERROR_WANT_READ) {
 4175                 /* wants to be called again */
 4176                 return;
 4177             }
 4178             else if(want == SSL_ERROR_WANT_WRITE) {
 4179                 /* switch to writing */
 4180                 data->shake_state = tls_hs_write_event;
 4181                 tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT);
 4182                 return;
 4183             }
 4184             cleanup_tcp_handler(data);
 4185             log_crypto_err("could not SSL_read");
 4186             return;
 4187         }
 4188 
 4189         data->bytes_transmitted += received;
 4190         if (data->bytes_transmitted < sizeof(uint16_t)) {
 4191             /*
 4192              * Not done with the tcplen yet, wait for more
 4193              * data to become available.
 4194              */
 4195             return;
 4196         }
 4197 
 4198         assert(data->bytes_transmitted == sizeof(uint16_t));
 4199 
 4200         data->query->tcplen = ntohs(data->query->tcplen);
 4201 
 4202         /*
 4203          * Minimum query size is:
 4204          *
 4205          *     Size of the header (12)
 4206          *   + Root domain name   (1)
 4207          *   + Query class        (2)
 4208          *   + Query type         (2)
 4209          */
 4210         if (data->query->tcplen < QHEADERSZ + 1 + sizeof(uint16_t) + sizeof(uint16_t)) {
 4211             VERBOSITY(2, (LOG_WARNING, "packet too small, dropping tcp connection"));
 4212             cleanup_tcp_handler(data);
 4213             return;
 4214         }
 4215 
 4216         if (data->query->tcplen > data->query->maxlen) {
 4217             VERBOSITY(2, (LOG_WARNING, "insufficient tcp buffer, dropping connection"));
 4218             cleanup_tcp_handler(data);
 4219             return;
 4220         }
 4221 
 4222         buffer_set_limit(data->query->packet, data->query->tcplen);
 4223     }
 4224 
 4225     assert(buffer_remaining(data->query->packet) > 0);
 4226 
 4227     /* Read the (remaining) query data.  */
 4228     ERR_clear_error();
 4229     received = SSL_read(data->tls, (void*)buffer_current(data->query->packet),
 4230                 (int)buffer_remaining(data->query->packet));
 4231     if(received <= 0) {
 4232         int want = SSL_get_error(data->tls, received);
 4233         if(want == SSL_ERROR_ZERO_RETURN) {
 4234             cleanup_tcp_handler(data);
 4235             return; /* shutdown, closed */
 4236         } else if(want == SSL_ERROR_WANT_READ) {
 4237             /* wants to be called again */
 4238             return;
 4239         }
 4240         else if(want == SSL_ERROR_WANT_WRITE) {
 4241             /* switch back writing */
 4242             data->shake_state = tls_hs_write_event;
 4243             tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT);
 4244             return;
 4245         }
 4246         cleanup_tcp_handler(data);
 4247         log_crypto_err("could not SSL_read");
 4248         return;
 4249     }
 4250 
 4251     data->bytes_transmitted += received;
 4252     buffer_skip(data->query->packet, received);
 4253     if (buffer_remaining(data->query->packet) > 0) {
 4254         /*
 4255          * Message not yet complete, wait for more data to
 4256          * become available.
 4257          */
 4258         return;
 4259     }
 4260 
 4261     assert(buffer_position(data->query->packet) == data->query->tcplen);
 4262 
 4263     /* Account... */
 4264 #ifndef INET6
 4265     STATUP(data->nsd, ctls);
 4266 #else
 4267     if (data->query->addr.ss_family == AF_INET) {
 4268         STATUP(data->nsd, ctls);
 4269     } else if (data->query->addr.ss_family == AF_INET6) {
 4270         STATUP(data->nsd, ctls6);
 4271     }
 4272 #endif
 4273 
 4274     /* We have a complete query, process it.  */
 4275 
 4276     /* tcp-query-count: handle query counter ++ */
 4277     data->query_count++;
 4278 
 4279     buffer_flip(data->query->packet);
 4280 #ifdef USE_DNSTAP
 4281     /*
 4282      * and send TCP-query with found address (local) and client address to dnstap process
 4283      */
 4284     log_addr("query from client", &data->query->addr);
 4285     log_addr("to server (local)", (void*)&data->socket->addr.ai_addr);
 4286     dt_collector_submit_auth_query(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->addr,
 4287         data->query->addrlen, data->query->tcp, data->query->packet);
 4288 #endif /* USE_DNSTAP */
 4289     data->query_state = server_process_query(data->nsd, data->query, &now);
 4290     if (data->query_state == QUERY_DISCARDED) {
 4291         /* Drop the packet and the entire connection... */
 4292         STATUP(data->nsd, dropped);
 4293         ZTATUP(data->nsd, data->query->zone, dropped);
 4294         cleanup_tcp_handler(data);
 4295         return;
 4296     }
 4297 
 4298 #ifdef BIND8_STATS
 4299     if (RCODE(data->query->packet) == RCODE_OK
 4300         && !AA(data->query->packet))
 4301     {
 4302         STATUP(data->nsd, nona);
 4303         ZTATUP(data->nsd, data->query->zone, nona);
 4304     }
 4305 #endif /* BIND8_STATS */
 4306 
 4307 #ifdef USE_ZONE_STATS
 4308 #ifndef INET6
 4309     ZTATUP(data->nsd, data->query->zone, ctls);
 4310 #else
 4311     if (data->query->addr.ss_family == AF_INET) {
 4312         ZTATUP(data->nsd, data->query->zone, ctls);
 4313     } else if (data->query->addr.ss_family == AF_INET6) {
 4314         ZTATUP(data->nsd, data->query->zone, ctls6);
 4315     }
 4316 #endif
 4317 #endif /* USE_ZONE_STATS */
 4318 
 4319     query_add_optional(data->query, data->nsd, &now);
 4320 
 4321     /* Switch to the tcp write handler.  */
 4322     buffer_flip(data->query->packet);
 4323     data->query->tcplen = buffer_remaining(data->query->packet);
 4324 #ifdef BIND8_STATS
 4325     /* Account the rcode & TC... */
 4326     STATUP2(data->nsd, rcode, RCODE(data->query->packet));
 4327     ZTATUP2(data->nsd, data->query->zone, rcode, RCODE(data->query->packet));
 4328     if (TC(data->query->packet)) {
 4329         STATUP(data->nsd, truncated);
 4330         ZTATUP(data->nsd, data->query->zone, truncated);
 4331     }
 4332 #endif /* BIND8_STATS */
 4333 #ifdef USE_DNSTAP
 4334     /*
 4335      * sending TCP-response with found (earlier) address (local) and client address to dnstap process
 4336      */
 4337     log_addr("from server (local)", (void*)&data->socket->addr.ai_addr);
 4338     log_addr("response to client", &data->query->addr);
 4339     dt_collector_submit_auth_response(data->nsd, (void*)&data->socket->addr.ai_addr, &data->query->addr,
 4340         data->query->addrlen, data->query->tcp, data->query->packet,
 4341         data->query->zone);
 4342 #endif /* USE_DNSTAP */
 4343     data->bytes_transmitted = 0;
 4344 
 4345     tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT);
 4346 
 4347     /* see if we can write the answer right away(usually so,EAGAIN ifnot)*/
 4348     handle_tls_writing(fd, EV_WRITE, data);
 4349 }
 4350 
 4351 /** handle TLS writing of outgoing response */
 4352 static void
 4353 handle_tls_writing(int fd, short event, void* arg)
 4354 {
 4355     struct tcp_handler_data *data = (struct tcp_handler_data *) arg;
 4356     ssize_t sent;
 4357     struct query *q = data->query;
 4358     /* static variable that holds reassembly buffer used to put the
 4359      * TCP length in front of the packet, like writev. */
 4360     static buffer_type* global_tls_temp_buffer = NULL;
 4361     buffer_type* write_buffer;
 4362     uint32_t now = 0;
 4363 
 4364     if ((event & EV_TIMEOUT)) {
 4365         /* Connection timed out.  */
 4366         cleanup_tcp_handler(data);
 4367         return;
 4368     }
 4369 
 4370     assert((event & EV_WRITE));
 4371 
 4372     if(data->shake_state != tls_hs_none) {
 4373         if(!tls_handshake(data, fd, 1))
 4374             return;
 4375         if(data->shake_state != tls_hs_none)
 4376             return;
 4377     }
 4378 
 4379     (void)SSL_set_mode(data->tls, SSL_MODE_ENABLE_PARTIAL_WRITE);
 4380 
 4381     /* If we are writing the start of a message, we must include the length
 4382      * this is done with a copy into write_buffer. */
 4383     write_buffer = NULL;
 4384     if (data->bytes_transmitted == 0) {
 4385         if(!global_tls_temp_buffer) {
 4386             /* gets deallocated when nsd shuts down from
 4387              * nsd.region */
 4388             global_tls_temp_buffer = buffer_create(nsd.region,
 4389                 QIOBUFSZ + sizeof(q->tcplen));
 4390             if (!global_tls_temp_buffer) {
 4391                 return;
 4392             }
 4393         }
 4394         write_buffer = global_tls_temp_buffer;
 4395         buffer_clear(write_buffer);
 4396         buffer_write_u16(write_buffer, q->tcplen);
 4397         buffer_write(write_buffer, buffer_current(q->packet),
 4398             (int)buffer_remaining(q->packet));
 4399         buffer_flip(write_buffer);
 4400     } else {
 4401         write_buffer = q->packet;
 4402     }
 4403 
 4404     /* Write the response */
 4405     ERR_clear_error();
 4406     sent = SSL_write(data->tls, buffer_current(write_buffer), buffer_remaining(write_buffer));
 4407     if(sent <= 0) {
 4408         int want = SSL_get_error(data->tls, sent);
 4409         if(want == SSL_ERROR_ZERO_RETURN) {
 4410             cleanup_tcp_handler(data);
 4411             /* closed */
 4412         } else if(want == SSL_ERROR_WANT_READ) {
 4413             /* switch back to reading */
 4414             data->shake_state = tls_hs_read_event;
 4415             tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT);
 4416         } else if(want != SSL_ERROR_WANT_WRITE) {
 4417             cleanup_tcp_handler(data);
 4418             log_crypto_err("could not SSL_write");
 4419         }
 4420         return;
 4421     }
 4422 
 4423     buffer_skip(write_buffer, sent);
 4424     if(buffer_remaining(write_buffer) != 0) {
 4425         /* If not all sent, sync up the real buffer if it wasn't used.*/
 4426         if (data->bytes_transmitted == 0 && (ssize_t)sent > (ssize_t)sizeof(q->tcplen)) {
 4427             buffer_skip(q->packet, (ssize_t)sent - (ssize_t)sizeof(q->tcplen));
 4428         }
 4429     }
 4430 
 4431     data->bytes_transmitted += sent;
 4432     if (data->bytes_transmitted < q->tcplen + sizeof(q->tcplen)) {
 4433         /*
 4434          * Still more data to write when socket becomes
 4435          * writable again.
 4436          */
 4437         return;
 4438     }
 4439 
 4440     assert(data->bytes_transmitted == q->tcplen + sizeof(q->tcplen));
 4441 
 4442     if (data->query_state == QUERY_IN_AXFR) {
 4443         /* Continue processing AXFR and writing back results.  */
 4444         buffer_clear(q->packet);
 4445         data->query_state = query_axfr(data->nsd, q);
 4446         if (data->query_state != QUERY_PROCESSED) {
 4447             query_add_optional(data->query, data->nsd, &now);
 4448 
 4449             /* Reset data. */
 4450             buffer_flip(q->packet);
 4451             q->tcplen = buffer_remaining(q->packet);
 4452             data->bytes_transmitted = 0;
 4453             /* Reset to writing mode.  */
 4454             tcp_handler_setup_event(data, handle_tls_writing, fd, EV_PERSIST | EV_WRITE | EV_TIMEOUT);
 4455 
 4456             /*
 4457              * Write data if/when the socket is writable
 4458              * again.
 4459              */
 4460             return;
 4461         }
 4462     }
 4463 
 4464     /*
 4465      * Done sending, wait for the next request to arrive on the
 4466      * TCP socket by installing the TCP read handler.
 4467      */
 4468     if ((data->nsd->tcp_query_count > 0 &&
 4469         data->query_count >= data->nsd->tcp_query_count) ||
 4470         data->tcp_no_more_queries) {
 4471 
 4472         (void) shutdown(fd, SHUT_WR);
 4473     }
 4474 
 4475     data->bytes_transmitted = 0;
 4476 
 4477     tcp_handler_setup_event(data, handle_tls_reading, fd, EV_PERSIST | EV_READ | EV_TIMEOUT);
 4478 }
 4479 #endif
 4480 
 4481 static void
 4482 handle_slowaccept_timeout(int ATTR_UNUSED(fd), short ATTR_UNUSED(event),
 4483     void* ATTR_UNUSED(arg))
 4484 {
 4485     if(slowaccept) {
 4486         configure_handler_event_types(EV_PERSIST | EV_READ);
 4487         slowaccept = 0;
 4488     }
 4489 }
 4490 
 4491 static int perform_accept(int fd, struct sockaddr *addr, socklen_t *addrlen)
 4492 {
 4493 #ifndef HAVE_ACCEPT4
 4494     int s = accept(fd, addr, addrlen);
 4495     if (s != -1) {
 4496         if (fcntl(s, F_SETFL, O_NONBLOCK) == -1) {
 4497             log_msg(LOG_ERR, "fcntl failed: %s", strerror(errno));
 4498             close(s);
 4499             s = -1;
 4500             errno=EINTR; /* stop error printout as error in accept4
 4501                 by setting this errno, it omits printout, in
 4502                 later code that calls nsd_accept4 */
 4503         }
 4504     }
 4505     return s;
 4506 #else
 4507     return accept4(fd, addr, addrlen, SOCK_NONBLOCK);
 4508 #endif /* HAVE_ACCEPT4 */
 4509 }
 4510 
 4511 /*
 4512  * Handle an incoming TCP connection.  The connection is accepted and
 4513  * a new TCP reader event handler is added.  The TCP handler
 4514  * is responsible for cleanup when the connection is closed.
 4515  */
 4516 static void
 4517 handle_tcp_accept(int fd, short event, void* arg)
 4518 {
 4519     struct tcp_accept_handler_data *data
 4520         = (struct tcp_accept_handler_data *) arg;
 4521     int s;
 4522     int reject = 0;
 4523     struct tcp_handler_data *tcp_data;
 4524     region_type *tcp_region;
 4525 #ifdef INET6
 4526     struct sockaddr_storage addr;
 4527 #else
 4528     struct sockaddr_in addr;
 4529 #endif
 4530     socklen_t addrlen;
 4531     struct timeval timeout;
 4532 
 4533     if (!(event & EV_READ)) {
 4534         return;
 4535     }
 4536 
 4537     if (data->nsd->current_tcp_count >= data->nsd->maximum_tcp_count) {
 4538         reject = data->nsd->options->tcp_reject_overflow;
 4539         if (!reject) {
 4540             return;
 4541         }
 4542     }
 4543 
 4544     /* Accept it... */
 4545     addrlen = sizeof(addr);
 4546     s = perform_accept(fd, (struct sockaddr *) &addr, &addrlen);
 4547     if (s == -1) {
 4548         /**
 4549          * EMFILE and ENFILE is a signal that the limit of open
 4550          * file descriptors has been reached. Pause accept().
 4551          * EINTR is a signal interrupt. The others are various OS ways
 4552          * of saying that the client has closed the connection.
 4553          */
 4554         if (errno == EMFILE || errno == ENFILE) {
 4555             if (!slowaccept) {
 4556                 /* disable accept events */
 4557                 struct timeval tv;
 4558                 configure_handler_event_types(0);
 4559                 tv.tv_sec = SLOW_ACCEPT_TIMEOUT;
 4560                 tv.tv_usec = 0L;
 4561                 memset(&slowaccept_event, 0,
 4562                     sizeof(slowaccept_event));
 4563                 event_set(&slowaccept_event, -1, EV_TIMEOUT,
 4564                     handle_slowaccept_timeout, NULL);
 4565                 (void)event_base_set(data->event.ev_base,
 4566                     &slowaccept_event);
 4567                 (void)event_add(&slowaccept_event, &tv);
 4568                 slowaccept = 1;
 4569                 /* We don't want to spam the logs here */
 4570             }
 4571         } else if (errno != EINTR
 4572             && errno != EWOULDBLOCK
 4573 #ifdef ECONNABORTED
 4574             && errno != ECONNABORTED
 4575 #endif /* ECONNABORTED */
 4576 #ifdef EPROTO
 4577             && errno != EPROTO
 4578 #endif /* EPROTO */
 4579             ) {
 4580             log_msg(LOG_ERR, "accept failed: %s", strerror(errno));
 4581         }
 4582         return;
 4583     }
 4584 
 4585     if (reject) {
 4586         shutdown(s, SHUT_RDWR);
 4587         close(s);
 4588         return;
 4589     }
 4590 
 4591     /*
 4592      * This region is deallocated when the TCP connection is
 4593      * closed by the TCP handler.
 4594      */
 4595     tcp_region = region_create(xalloc, free);
 4596     tcp_data = (struct tcp_handler_data *) region_alloc(
 4597         tcp_region, sizeof(struct tcp_handler_data));
 4598     tcp_data->region = tcp_region;
 4599     tcp_data->query = query_create(tcp_region, compressed_dname_offsets,
 4600         compression_table_size, compressed_dnames);
 4601     tcp_data->nsd = data->nsd;
 4602     tcp_data->query_count = 0;
 4603 #ifdef HAVE_SSL
 4604     tcp_data->shake_state = tls_hs_none;
 4605     tcp_data->tls = NULL;
 4606 #endif
 4607     tcp_data->prev = NULL;
 4608     tcp_data->next = NULL;
 4609 
 4610     tcp_data->query_state = QUERY_PROCESSED;
 4611     tcp_data->bytes_transmitted = 0;
 4612     memcpy(&tcp_data->query->addr, &addr, addrlen);
 4613     tcp_data->query->addrlen = addrlen;
 4614 
 4615     tcp_data->tcp_no_more_queries = 0;
 4616     tcp_data->tcp_timeout = data->nsd->tcp_timeout * 1000;
 4617     if (data->nsd->current_tcp_count > data->nsd->maximum_tcp_count/2) {
 4618         /* very busy, give smaller timeout */
 4619         tcp_data->tcp_timeout = 200;
 4620     }
 4621     memset(&tcp_data->event, 0, sizeof(tcp_data->event));
 4622     timeout.tv_sec = tcp_data->tcp_timeout / 1000;
 4623     timeout.tv_usec = (tcp_data->tcp_timeout % 1000)*1000;
 4624 
 4625 #ifdef USE_DNSTAP
 4626     /* save the address of the connection */
 4627     tcp_data->socket = data->socket;
 4628 #endif /* USE_DNSTAP */
 4629 
 4630 #ifdef HAVE_SSL
 4631     if (data->tls_accept) {
 4632         tcp_data->tls = incoming_ssl_fd(tcp_data->nsd->tls_ctx, s);
 4633         if(!tcp_data->tls) {
 4634             close(s);
 4635             return;
 4636         }
 4637         tcp_data->shake_state = tls_hs_read;
 4638         memset(&tcp_data->event, 0, sizeof(tcp_data->event));
 4639         event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT,
 4640               handle_tls_reading, tcp_data);
 4641     } else {
 4642 #endif
 4643         memset(&tcp_data->event, 0, sizeof(tcp_data->event));
 4644         event_set(&tcp_data->event, s, EV_PERSIST | EV_READ | EV_TIMEOUT,
 4645               handle_tcp_reading, tcp_data);
 4646 #ifdef HAVE_SSL
 4647     }
 4648 #endif
 4649     if(event_base_set(data->event.ev_base, &tcp_data->event) != 0) {
 4650         log_msg(LOG_ERR, "cannot set tcp event base");
 4651         close(s);
 4652         region_destroy(tcp_region);
 4653         return;
 4654     }
 4655     if(event_add(&tcp_data->event, &timeout) != 0) {
 4656         log_msg(LOG_ERR, "cannot add tcp to event base");
 4657         close(s);
 4658         region_destroy(tcp_region);
 4659         return;
 4660     }
 4661     if(tcp_active_list) {
 4662         tcp_active_list->prev = tcp_data;
 4663         tcp_data->next = tcp_active_list;
 4664     }
 4665     tcp_active_list = tcp_data;
 4666 
 4667     /*
 4668      * Keep track of the total number of TCP handlers installed so
 4669      * we can stop accepting connections when the maximum number
 4670      * of simultaneous TCP connections is reached.
 4671      *
 4672      * If tcp-reject-overflow is enabled, however, then we do not
 4673      * change the handler event type; we keep it as-is and accept
 4674      * overflow TCP connections only so that we can forcibly kill
 4675      * them off.
 4676      */
 4677     ++data->nsd->current_tcp_count;
 4678     if (!data->nsd->options->tcp_reject_overflow &&
 4679          data->nsd->current_tcp_count == data->nsd->maximum_tcp_count)
 4680     {
 4681         configure_handler_event_types(0);
 4682     }
 4683 }
 4684 
 4685 static void
 4686 send_children_command(struct nsd* nsd, sig_atomic_t command, int timeout)
 4687 {
 4688     size_t i;
 4689     assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0);
 4690     for (i = 0; i < nsd->child_count; ++i) {
 4691         if (nsd->children[i].pid > 0 && nsd->children[i].child_fd != -1) {
 4692             if (write(nsd->children[i].child_fd,
 4693                 &command,
 4694                 sizeof(command)) == -1)
 4695             {
 4696                 if(errno != EAGAIN && errno != EINTR)
 4697                     log_msg(LOG_ERR, "problems sending command %d to server %d: %s",
 4698                     (int) command,
 4699                     (int) nsd->children[i].pid,
 4700                     strerror(errno));
 4701             } else if (timeout > 0) {
 4702                 (void)block_read(NULL,
 4703                     nsd->children[i].child_fd,
 4704                     &command, sizeof(command), timeout);
 4705             }
 4706             fsync(nsd->children[i].child_fd);
 4707             close(nsd->children[i].child_fd);
 4708             nsd->children[i].child_fd = -1;
 4709         }
 4710     }
 4711 }
 4712 
 4713 static void
 4714 send_children_quit(struct nsd* nsd)
 4715 {
 4716     DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit"));
 4717     send_children_command(nsd, NSD_QUIT, 0);
 4718 }
 4719 
 4720 static void
 4721 send_children_quit_and_wait(struct nsd* nsd)
 4722 {
 4723     DEBUG(DEBUG_IPC, 1, (LOG_INFO, "send children quit and wait"));
 4724     send_children_command(nsd, NSD_QUIT_CHILD, 3);
 4725 }
 4726 
 4727 #ifdef BIND8_STATS
 4728 static void
 4729 set_children_stats(struct nsd* nsd)
 4730 {
 4731     size_t i;
 4732     assert(nsd->server_kind == NSD_SERVER_MAIN && nsd->this_child == 0);
 4733     DEBUG(DEBUG_IPC, 1, (LOG_INFO, "parent set stats to send to children"));
 4734     for (i = 0; i < nsd->child_count; ++i) {
 4735         nsd->children[i].need_to_send_STATS = 1;
 4736         nsd->children[i].handler->event_types |= NETIO_EVENT_WRITE;
 4737     }
 4738 }
 4739 #endif /* BIND8_STATS */
 4740 
 4741 static void
 4742 configure_handler_event_types(short event_types)
 4743 {
 4744     size_t i;
 4745 
 4746     for (i = 0; i < tcp_accept_handler_count; ++i) {
 4747         struct event* handler = &tcp_accept_handlers[i].event;
 4748         if(event_types) {
 4749             /* reassign */
 4750             int fd = handler->ev_fd;
 4751             struct event_base* base = handler->ev_base;
 4752             if(tcp_accept_handlers[i].event_added)
 4753                 event_del(handler);
 4754             memset(handler, 0, sizeof(*handler));
 4755             event_set(handler, fd, event_types,
 4756                 handle_tcp_accept, &tcp_accept_handlers[i]);
 4757             if(event_base_set(base, handler) != 0)
 4758                 log_msg(LOG_ERR, "conhand: cannot event_base");
 4759             if(event_add(handler, NULL) != 0)
 4760                 log_msg(LOG_ERR, "conhand: cannot event_add");
 4761             tcp_accept_handlers[i].event_added = 1;
 4762         } else {
 4763             /* remove */
 4764             if(tcp_accept_handlers[i].event_added) {
 4765                 event_del(handler);
 4766                 tcp_accept_handlers[i].event_added = 0;
 4767             }
 4768         }
 4769     }
 4770 }