"Fossies" - the Fresh Open Source Software Archive

Member "bind-9.17.5/lib/dns/rbtdb.c" (4 Sep 2020, 290387 Bytes) of package /linux/misc/dns/bind9/9.17.5/bind-9.17.5.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "rbtdb.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 9.17.4_vs_9.17.5.

    1 /*
    2  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
    3  *
    4  * This Source Code Form is subject to the terms of the Mozilla Public
    5  * License, v. 2.0. If a copy of the MPL was not distributed with this
    6  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
    7  *
    8  * See the COPYRIGHT file distributed with this work for additional
    9  * information regarding copyright ownership.
   10  */
   11 
   12 /*! \file */
   13 
   14 /* #define inline */
   15 
   16 #include <inttypes.h>
   17 #include <stdbool.h>
   18 
   19 #include <isc/atomic.h>
   20 #include <isc/crc64.h>
   21 #include <isc/event.h>
   22 #include <isc/file.h>
   23 #include <isc/hash.h>
   24 #include <isc/heap.h>
   25 #include <isc/hex.h>
   26 #include <isc/mem.h>
   27 #include <isc/mutex.h>
   28 #include <isc/once.h>
   29 #include <isc/platform.h>
   30 #include <isc/print.h>
   31 #include <isc/random.h>
   32 #include <isc/refcount.h>
   33 #include <isc/rwlock.h>
   34 #include <isc/serial.h>
   35 #include <isc/socket.h>
   36 #include <isc/stdio.h>
   37 #include <isc/string.h>
   38 #include <isc/task.h>
   39 #include <isc/time.h>
   40 #include <isc/util.h>
   41 
   42 #include <dns/callbacks.h>
   43 #include <dns/db.h>
   44 #include <dns/dbiterator.h>
   45 #include <dns/events.h>
   46 #include <dns/fixedname.h>
   47 #include <dns/lib.h>
   48 #include <dns/log.h>
   49 #include <dns/masterdump.h>
   50 #include <dns/nsec.h>
   51 #include <dns/nsec3.h>
   52 #include <dns/rbt.h>
   53 #include <dns/rdata.h>
   54 #include <dns/rdataset.h>
   55 #include <dns/rdatasetiter.h>
   56 #include <dns/rdataslab.h>
   57 #include <dns/rdatastruct.h>
   58 #include <dns/result.h>
   59 #include <dns/stats.h>
   60 #include <dns/time.h>
   61 #include <dns/view.h>
   62 #include <dns/zone.h>
   63 #include <dns/zonekey.h>
   64 
   65 #ifndef WIN32
   66 #include <sys/mman.h>
   67 #else /* ifndef WIN32 */
   68 #define PROT_READ   0x01
   69 #define PROT_WRITE  0x02
   70 #define MAP_PRIVATE 0x0002
   71 #define MAP_FAILED  ((void *)-1)
   72 #endif /* ifndef WIN32 */
   73 
   74 #include "rbtdb.h"
   75 
   76 #define RBTDB_MAGIC ISC_MAGIC('R', 'B', 'D', '4')
   77 
   78 #define CHECK(op)                            \
   79     do {                                 \
   80         result = (op);               \
   81         if (result != ISC_R_SUCCESS) \
   82             goto failure;        \
   83     } while (0)
   84 
   85 /*
   86  * This is the map file header for RBTDB images.  It is populated, and then
   87  * written, as the LAST thing done to the file.  Writing this last (with
   88  * zeros in the header area initially) will ensure that the header is only
   89  * valid when the RBTDB image is also valid.
   90  */
   91 typedef struct rbtdb_file_header rbtdb_file_header_t;
   92 
   93 /* Header length, always the same size regardless of structure size */
   94 #define RBTDB_HEADER_LENGTH 1024
   95 
   96 struct rbtdb_file_header {
   97     char version1[32];
   98     uint32_t ptrsize;
   99     unsigned int bigendian : 1;
  100     uint64_t tree;
  101     uint64_t nsec;
  102     uint64_t nsec3;
  103 
  104     char version2[32]; /* repeated; must match version1 */
  105 };
  106 
  107 /*%
  108  * Note that "impmagic" is not the first four bytes of the struct, so
  109  * ISC_MAGIC_VALID cannot be used.
  110  */
  111 #define VALID_RBTDB(rbtdb) \
  112     ((rbtdb) != NULL && (rbtdb)->common.impmagic == RBTDB_MAGIC)
  113 
  114 typedef uint32_t rbtdb_serial_t;
  115 typedef uint32_t rbtdb_rdatatype_t;
  116 
  117 #define RBTDB_RDATATYPE_BASE(type) ((dns_rdatatype_t)((type)&0xFFFF))
  118 #define RBTDB_RDATATYPE_EXT(type)  ((dns_rdatatype_t)((type) >> 16))
  119 #define RBTDB_RDATATYPE_VALUE(base, ext)              \
  120     ((rbtdb_rdatatype_t)(((uint32_t)ext) << 16) | \
  121      (((uint32_t)base) & 0xffff))
  122 
  123 #define RBTDB_RDATATYPE_SIGNSEC \
  124     RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
  125 #define RBTDB_RDATATYPE_SIGNSEC3 \
  126     RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
  127 #define RBTDB_RDATATYPE_SIGNS \
  128     RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
  129 #define RBTDB_RDATATYPE_SIGCNAME \
  130     RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
  131 #define RBTDB_RDATATYPE_SIGDNAME \
  132     RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
  133 #define RBTDB_RDATATYPE_SIGDS \
  134     RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ds)
  135 #define RBTDB_RDATATYPE_SIGSOA \
  136     RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_soa)
  137 #define RBTDB_RDATATYPE_NCACHEANY RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
  138 
  139 #define RBTDB_INITLOCK(l)    isc_rwlock_init((l), 0, 0)
  140 #define RBTDB_DESTROYLOCK(l) isc_rwlock_destroy(l)
  141 #define RBTDB_LOCK(l, t)     RWLOCK((l), (t))
  142 #define RBTDB_UNLOCK(l, t)   RWUNLOCK((l), (t))
  143 
  144 /*
  145  * Since node locking is sensitive to both performance and memory footprint,
  146  * we need some trick here.  If we have both high-performance rwlock and
  147  * high performance and small-memory reference counters, we use rwlock for
  148  * node lock and isc_refcount for node references.  In this case, we don't have
  149  * to protect the access to the counters by locks.
  150  * Otherwise, we simply use ordinary mutex lock for node locking, and use
  151  * simple integers as reference counters which is protected by the lock.
  152  * In most cases, we can simply use wrapper macros such as NODE_LOCK and
  153  * NODE_UNLOCK.  In some other cases, however, we need to protect reference
  154  * counters first and then protect other parts of a node as read-only data.
  155  * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
  156  * provided for these special cases.  When we can use the efficient backend
  157  * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
  158  * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
  159  * section including the access to the reference counter.
  160  * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
  161  * section is also protected by NODE_STRONGLOCK().
  162  */
  163 typedef isc_rwlock_t nodelock_t;
  164 
  165 #define NODE_INITLOCK(l)    isc_rwlock_init((l), 0, 0)
  166 #define NODE_DESTROYLOCK(l) isc_rwlock_destroy(l)
  167 #define NODE_LOCK(l, t)     RWLOCK((l), (t))
  168 #define NODE_UNLOCK(l, t)   RWUNLOCK((l), (t))
  169 #define NODE_TRYUPGRADE(l)  isc_rwlock_tryupgrade(l)
  170 #define NODE_DOWNGRADE(l)   isc_rwlock_downgrade(l)
  171 
  172 /*%
  173  * Whether to rate-limit updating the LRU to avoid possible thread contention.
  174  * Updating LRU requires write locking, so we don't do it every time the
  175  * record is touched - only after some time passes.
  176  */
  177 #ifndef DNS_RBTDB_LIMITLRUUPDATE
  178 #define DNS_RBTDB_LIMITLRUUPDATE 1
  179 #endif
  180 
  181 /*% Time after which we update LRU for glue records, 5 minutes */
  182 #define DNS_RBTDB_LRUUPDATE_GLUE 300
  183 /*% Time after which we update LRU for all other records, 10 minutes */
  184 #define DNS_RBTDB_LRUUPDATE_REGULAR 600
  185 
  186 /*
  187  * Allow clients with a virtual time of up to 5 minutes in the past to see
  188  * records that would have otherwise have expired.
  189  */
  190 #define RBTDB_VIRTUAL 300
  191 
  192 struct noqname {
  193     dns_name_t name;
  194     void *neg;
  195     void *negsig;
  196     dns_rdatatype_t type;
  197 };
  198 
  199 typedef struct rdatasetheader {
  200     /*%
  201      * Locked by the owning node's lock.
  202      */
  203     rbtdb_serial_t serial;
  204     dns_ttl_t rdh_ttl;
  205     rbtdb_rdatatype_t type;
  206     atomic_uint_least16_t attributes;
  207     dns_trust_t trust;
  208     struct noqname *noqname;
  209     struct noqname *closest;
  210     unsigned int is_mmapped : 1;
  211     unsigned int next_is_relative : 1;
  212     unsigned int node_is_relative : 1;
  213     unsigned int resign_lsb : 1;
  214     /*%<
  215      * We don't use the LIST macros, because the LIST structure has
  216      * both head and tail pointers, and is doubly linked.
  217      */
  218 
  219     struct rdatasetheader *next;
  220     /*%<
  221      * If this is the top header for an rdataset, 'next' points
  222      * to the top header for the next rdataset (i.e., the next type).
  223      * Otherwise, it points up to the header whose down pointer points
  224      * at this header.
  225      */
  226 
  227     struct rdatasetheader *down;
  228     /*%<
  229      * Points to the header for the next older version of
  230      * this rdataset.
  231      */
  232 
  233     atomic_uint_fast32_t count;
  234     /*%<
  235      * Monotonously increased every time this rdataset is bound so that
  236      * it is used as the base of the starting point in DNS responses
  237      * when the "cyclic" rrset-order is required.
  238      */
  239 
  240     dns_rbtnode_t *node;
  241     isc_stdtime_t last_used;
  242     ISC_LINK(struct rdatasetheader) link;
  243 
  244     unsigned int heap_index;
  245     /*%<
  246      * Used for TTL-based cache cleaning.
  247      */
  248     isc_stdtime_t resign;
  249     /*%<
  250      * Case vector.  If the bit is set then the corresponding
  251      * character in the owner name needs to be AND'd with 0x20,
  252      * rendering that character upper case.
  253      */
  254     unsigned char upper[32];
  255 } rdatasetheader_t;
  256 
  257 typedef ISC_LIST(rdatasetheader_t) rdatasetheaderlist_t;
  258 typedef ISC_LIST(dns_rbtnode_t) rbtnodelist_t;
  259 
  260 #define RDATASET_ATTR_NONEXISTENT 0x0001
  261 /*%< May be potentially served as stale data. */
  262 #define RDATASET_ATTR_STALE      0x0002
  263 #define RDATASET_ATTR_IGNORE         0x0004
  264 #define RDATASET_ATTR_RETAIN         0x0008
  265 #define RDATASET_ATTR_NXDOMAIN       0x0010
  266 #define RDATASET_ATTR_RESIGN         0x0020
  267 #define RDATASET_ATTR_STATCOUNT      0x0040
  268 #define RDATASET_ATTR_OPTOUT         0x0080
  269 #define RDATASET_ATTR_NEGATIVE       0x0100
  270 #define RDATASET_ATTR_PREFETCH       0x0200
  271 #define RDATASET_ATTR_CASESET        0x0400
  272 #define RDATASET_ATTR_ZEROTTL        0x0800
  273 #define RDATASET_ATTR_CASEFULLYLOWER 0x1000
  274 /*%< Ancient - awaiting cleanup. */
  275 #define RDATASET_ATTR_ANCIENT 0x2000
  276 
  277 /*
  278  * XXX
  279  * When the cache will pre-expire data (due to memory low or other
  280  * situations) before the rdataset's TTL has expired, it MUST
  281  * respect the RETAIN bit and not expire the data until its TTL is
  282  * expired.
  283  */
  284 
  285 #undef IGNORE /* WIN32 winbase.h defines this. */
  286 
  287 #define EXISTS(header)                                 \
  288     ((atomic_load_acquire(&(header)->attributes) & \
  289       RDATASET_ATTR_NONEXISTENT) == 0)
  290 #define NONEXISTENT(header)                            \
  291     ((atomic_load_acquire(&(header)->attributes) & \
  292       RDATASET_ATTR_NONEXISTENT) != 0)
  293 #define IGNORE(header)                                 \
  294     ((atomic_load_acquire(&(header)->attributes) & \
  295       RDATASET_ATTR_IGNORE) != 0)
  296 #define RETAIN(header)                                 \
  297     ((atomic_load_acquire(&(header)->attributes) & \
  298       RDATASET_ATTR_RETAIN) != 0)
  299 #define NXDOMAIN(header)                               \
  300     ((atomic_load_acquire(&(header)->attributes) & \
  301       RDATASET_ATTR_NXDOMAIN) != 0)
  302 #define STALE(header)                                                          \
  303     ((atomic_load_acquire(&(header)->attributes) & RDATASET_ATTR_STALE) != \
  304      0)
  305 #define RESIGN(header)                                 \
  306     ((atomic_load_acquire(&(header)->attributes) & \
  307       RDATASET_ATTR_RESIGN) != 0)
  308 #define OPTOUT(header)                                 \
  309     ((atomic_load_acquire(&(header)->attributes) & \
  310       RDATASET_ATTR_OPTOUT) != 0)
  311 #define NEGATIVE(header)                               \
  312     ((atomic_load_acquire(&(header)->attributes) & \
  313       RDATASET_ATTR_NEGATIVE) != 0)
  314 #define PREFETCH(header)                               \
  315     ((atomic_load_acquire(&(header)->attributes) & \
  316       RDATASET_ATTR_PREFETCH) != 0)
  317 #define CASESET(header)                                \
  318     ((atomic_load_acquire(&(header)->attributes) & \
  319       RDATASET_ATTR_CASESET) != 0)
  320 #define ZEROTTL(header)                                \
  321     ((atomic_load_acquire(&(header)->attributes) & \
  322       RDATASET_ATTR_ZEROTTL) != 0)
  323 #define CASEFULLYLOWER(header)                         \
  324     ((atomic_load_acquire(&(header)->attributes) & \
  325       RDATASET_ATTR_CASEFULLYLOWER) != 0)
  326 #define ANCIENT(header)                                \
  327     ((atomic_load_acquire(&(header)->attributes) & \
  328       RDATASET_ATTR_ANCIENT) != 0)
  329 #define STATCOUNT(header)                              \
  330     ((atomic_load_acquire(&(header)->attributes) & \
  331       RDATASET_ATTR_STATCOUNT) != 0)
  332 
  333 #define RDATASET_ATTR_GET(header, attribute) \
  334     (atomic_load_acquire(&(header)->attributes) & attribute)
  335 #define RDATASET_ATTR_SET(header, attribute) \
  336     atomic_fetch_or_release(&(header)->attributes, attribute)
  337 #define RDATASET_ATTR_CLR(header, attribute) \
  338     atomic_fetch_and_release(&(header)->attributes, ~(attribute))
  339 
  340 #define ACTIVE(header, now)             \
  341     (((header)->rdh_ttl > (now)) || \
  342      ((header)->rdh_ttl == (now) && ZEROTTL(header)))
  343 
  344 #define DEFAULT_NODE_LOCK_COUNT     53 /*%< Should be prime. */
  345 #define RBTDB_GLUE_TABLE_INIT_BITS  2U
  346 #define RBTDB_GLUE_TABLE_MAX_BITS   32U
  347 #define RBTDB_GLUE_TABLE_OVERCOMMIT 3
  348 
  349 #define GOLDEN_RATIO_32 0x61C88647
  350 #define HASHSIZE(bits)  (UINT64_C(1) << (bits))
  351 
  352 static inline uint32_t
  353 hash_32(uint32_t val, unsigned int bits) {
  354     REQUIRE(bits <= RBTDB_GLUE_TABLE_MAX_BITS);
  355     /* High bits are more random. */
  356     return (val * GOLDEN_RATIO_32 >> (32 - bits));
  357 }
  358 
  359 /*%
  360  * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
  361  * There is a tradeoff issue about configuring this value: if this is too
  362  * small, it may cause heavier contention between threads; if this is too large,
  363  * LRU purge algorithm won't work well (entries tend to be purged prematurely).
  364  * The default value should work well for most environments, but this can
  365  * also be configurable at compilation time via the
  366  * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
  367  * 1 due to the assumption of overmem_purge().
  368  */
  369 #ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
  370 #if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
  371 #error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
  372 #else /* if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1 */
  373 #define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
  374 #endif /* if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1 */
  375 #else  /* ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
  376 #define DEFAULT_CACHE_NODE_LOCK_COUNT 97
  377 #endif /* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
  378 
  379 typedef struct {
  380     nodelock_t lock;
  381     /* Protected in the refcount routines. */
  382     isc_refcount_t references;
  383     /* Locked by lock. */
  384     bool exiting;
  385 } rbtdb_nodelock_t;
  386 
  387 typedef struct rbtdb_changed {
  388     dns_rbtnode_t *node;
  389     bool dirty;
  390     ISC_LINK(struct rbtdb_changed) link;
  391 } rbtdb_changed_t;
  392 
  393 typedef ISC_LIST(rbtdb_changed_t) rbtdb_changedlist_t;
  394 
  395 typedef enum { dns_db_insecure, dns_db_partial, dns_db_secure } dns_db_secure_t;
  396 
  397 typedef struct dns_rbtdb dns_rbtdb_t;
  398 
  399 /* Reason for expiring a record from cache */
  400 typedef enum { expire_lru, expire_ttl, expire_flush } expire_t;
  401 
  402 typedef struct rbtdb_glue rbtdb_glue_t;
  403 
  404 typedef struct rbtdb_glue_table_node {
  405     struct rbtdb_glue_table_node *next;
  406     dns_rbtnode_t *node;
  407     rbtdb_glue_t *glue_list;
  408 } rbtdb_glue_table_node_t;
  409 
  410 typedef enum {
  411     rdataset_ttl_fresh,
  412     rdataset_ttl_stale,
  413     rdataset_ttl_ancient
  414 } rdataset_ttl_t;
  415 
  416 typedef struct rbtdb_version {
  417     /* Not locked */
  418     rbtdb_serial_t serial;
  419     dns_rbtdb_t *rbtdb;
  420     /*
  421      * Protected in the refcount routines.
  422      * XXXJT: should we change the lock policy based on the refcount
  423      * performance?
  424      */
  425     isc_refcount_t references;
  426     /* Locked by database lock. */
  427     bool writer;
  428     bool commit_ok;
  429     rbtdb_changedlist_t changed_list;
  430     rdatasetheaderlist_t resigned_list;
  431     ISC_LINK(struct rbtdb_version) link;
  432     dns_db_secure_t secure;
  433     bool havensec3;
  434     /* NSEC3 parameters */
  435     dns_hash_t hash;
  436     uint8_t flags;
  437     uint16_t iterations;
  438     uint8_t salt_length;
  439     unsigned char salt[DNS_NSEC3_SALTSIZE];
  440 
  441     /*
  442      * records and xfrsize are covered by rwlock.
  443      */
  444     isc_rwlock_t rwlock;
  445     uint64_t records;
  446     uint64_t xfrsize;
  447 
  448     isc_rwlock_t glue_rwlock;
  449     size_t glue_table_bits;
  450     size_t glue_table_nodecount;
  451     rbtdb_glue_table_node_t **glue_table;
  452 } rbtdb_version_t;
  453 
  454 typedef ISC_LIST(rbtdb_version_t) rbtdb_versionlist_t;
  455 
  456 struct dns_rbtdb {
  457     /* Unlocked. */
  458     dns_db_t common;
  459     /* Locks the data in this struct */
  460     isc_rwlock_t lock;
  461     /* Locks the tree structure (prevents nodes appearing/disappearing) */
  462     isc_rwlock_t tree_lock;
  463     /* Locks for individual tree nodes */
  464     unsigned int node_lock_count;
  465     rbtdb_nodelock_t *node_locks;
  466     dns_rbtnode_t *origin_node;
  467     dns_rbtnode_t *nsec3_origin_node;
  468     dns_stats_t *rrsetstats;     /* cache DB only */
  469     isc_stats_t *cachestats;     /* cache DB only */
  470     isc_stats_t *gluecachestats; /* zone DB only */
  471     /* Locked by lock. */
  472     unsigned int active;
  473     isc_refcount_t references;
  474     unsigned int attributes;
  475     rbtdb_serial_t current_serial;
  476     rbtdb_serial_t least_serial;
  477     rbtdb_serial_t next_serial;
  478     rbtdb_version_t *current_version;
  479     rbtdb_version_t *future_version;
  480     rbtdb_versionlist_t open_versions;
  481     isc_task_t *task;
  482     dns_dbnode_t *soanode;
  483     dns_dbnode_t *nsnode;
  484 
  485     /*
  486      * Maximum length of time to keep using a stale answer past its
  487      * normal TTL expiry.
  488      */
  489     dns_ttl_t serve_stale_ttl;
  490 
  491     /*
  492      * This is a linked list used to implement the LRU cache.  There will
  493      * be node_lock_count linked lists here.  Nodes in bucket 1 will be
  494      * placed on the linked list rdatasets[1].
  495      */
  496     rdatasetheaderlist_t *rdatasets;
  497 
  498     /*%
  499      * Temporary storage for stale cache nodes and dynamically deleted
  500      * nodes that await being cleaned up.
  501      */
  502     rbtnodelist_t *deadnodes;
  503 
  504     /*
  505      * Heaps.  These are used for TTL based expiry in a cache,
  506      * or for zone resigning in a zone DB.  hmctx is the memory
  507      * context to use for the heap (which differs from the main
  508      * database memory context in the case of a cache).
  509      */
  510     isc_mem_t *hmctx;
  511     isc_heap_t **heaps;
  512 
  513     /*
  514      * Base values for the mmap() code.
  515      */
  516     void *mmap_location;
  517     size_t mmap_size;
  518 
  519     /* Locked by tree_lock. */
  520     dns_rbt_t *tree;
  521     dns_rbt_t *nsec;
  522     dns_rbt_t *nsec3;
  523 
  524     /* Unlocked */
  525     unsigned int quantum;
  526 };
  527 
  528 #define RBTDB_ATTR_LOADED  0x01
  529 #define RBTDB_ATTR_LOADING 0x02
  530 
  531 #define KEEPSTALE(rbtdb) ((rbtdb)->serve_stale_ttl > 0)
  532 
  533 /*%
  534  * Search Context
  535  */
  536 typedef struct {
  537     dns_rbtdb_t *rbtdb;
  538     rbtdb_version_t *rbtversion;
  539     rbtdb_serial_t serial;
  540     unsigned int options;
  541     dns_rbtnodechain_t chain;
  542     bool copy_name;
  543     bool need_cleanup;
  544     bool wild;
  545     dns_rbtnode_t *zonecut;
  546     rdatasetheader_t *zonecut_rdataset;
  547     rdatasetheader_t *zonecut_sigrdataset;
  548     dns_fixedname_t zonecut_name;
  549     isc_stdtime_t now;
  550 } rbtdb_search_t;
  551 
  552 /*%
  553  * Load Context
  554  */
  555 typedef struct {
  556     dns_rbtdb_t *rbtdb;
  557     isc_stdtime_t now;
  558 } rbtdb_load_t;
  559 
  560 static void
  561 delete_callback(void *data, void *arg);
  562 static void
  563 rdataset_disassociate(dns_rdataset_t *rdataset);
  564 static isc_result_t
  565 rdataset_first(dns_rdataset_t *rdataset);
  566 static isc_result_t
  567 rdataset_next(dns_rdataset_t *rdataset);
  568 static void
  569 rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
  570 static void
  571 rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
  572 static unsigned int
  573 rdataset_count(dns_rdataset_t *rdataset);
  574 static isc_result_t
  575 rdataset_getnoqname(dns_rdataset_t *rdataset, dns_name_t *name,
  576             dns_rdataset_t *neg, dns_rdataset_t *negsig);
  577 static isc_result_t
  578 rdataset_getclosest(dns_rdataset_t *rdataset, dns_name_t *name,
  579             dns_rdataset_t *neg, dns_rdataset_t *negsig);
  580 static inline bool
  581 need_headerupdate(rdatasetheader_t *header, isc_stdtime_t now);
  582 static void
  583 update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, isc_stdtime_t now);
  584 static void
  585 expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, bool tree_locked,
  586           expire_t reason);
  587 static void
  588 overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start, isc_stdtime_t now,
  589           bool tree_locked);
  590 static isc_result_t
  591 resign_insert(dns_rbtdb_t *rbtdb, int idx, rdatasetheader_t *newheader);
  592 static void
  593 resign_delete(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
  594           rdatasetheader_t *header);
  595 static void
  596 prune_tree(isc_task_t *task, isc_event_t *event);
  597 static void
  598 rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
  599 static void
  600 rdataset_expire(dns_rdataset_t *rdataset);
  601 static void
  602 rdataset_clearprefetch(dns_rdataset_t *rdataset);
  603 static void
  604 rdataset_setownercase(dns_rdataset_t *rdataset, const dns_name_t *name);
  605 static void
  606 rdataset_getownercase(const dns_rdataset_t *rdataset, dns_name_t *name);
  607 static isc_result_t
  608 rdataset_addglue(dns_rdataset_t *rdataset, dns_dbversion_t *version,
  609          dns_message_t *msg);
  610 static void
  611 free_gluetable(rbtdb_version_t *version);
  612 static isc_result_t
  613 nodefullname(dns_db_t *db, dns_dbnode_t *node, dns_name_t *name);
  614 
  615 static dns_rdatasetmethods_t rdataset_methods = { rdataset_disassociate,
  616                           rdataset_first,
  617                           rdataset_next,
  618                           rdataset_current,
  619                           rdataset_clone,
  620                           rdataset_count,
  621                           NULL, /* addnoqname */
  622                           rdataset_getnoqname,
  623                           NULL, /* addclosest */
  624                           rdataset_getclosest,
  625                           rdataset_settrust,
  626                           rdataset_expire,
  627                           rdataset_clearprefetch,
  628                           rdataset_setownercase,
  629                           rdataset_getownercase,
  630                           rdataset_addglue };
  631 
  632 static dns_rdatasetmethods_t slab_methods = {
  633     rdataset_disassociate,
  634     rdataset_first,
  635     rdataset_next,
  636     rdataset_current,
  637     rdataset_clone,
  638     rdataset_count,
  639     NULL, /* addnoqname */
  640     NULL, /* getnoqname */
  641     NULL, /* addclosest */
  642     NULL, /* getclosest */
  643     NULL, /* settrust */
  644     NULL, /* expire */
  645     NULL, /* clearprefetch */
  646     NULL, /* setownercase */
  647     NULL, /* getownercase */
  648     NULL  /* addglue */
  649 };
  650 
  651 static void
  652 rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
  653 static isc_result_t
  654 rdatasetiter_first(dns_rdatasetiter_t *iterator);
  655 static isc_result_t
  656 rdatasetiter_next(dns_rdatasetiter_t *iterator);
  657 static void
  658 rdatasetiter_current(dns_rdatasetiter_t *iterator, dns_rdataset_t *rdataset);
  659 
  660 static dns_rdatasetitermethods_t rdatasetiter_methods = {
  661     rdatasetiter_destroy, rdatasetiter_first, rdatasetiter_next,
  662     rdatasetiter_current
  663 };
  664 
  665 typedef struct rbtdb_rdatasetiter {
  666     dns_rdatasetiter_t common;
  667     rdatasetheader_t *current;
  668 } rbtdb_rdatasetiter_t;
  669 
  670 /*
  671  * Note that these iterators, unless created with either DNS_DB_NSEC3ONLY or
  672  * DNS_DB_NONSEC3, will transparently move between the last node of the
  673  * "regular" RBT ("chain" field) and the root node of the NSEC3 RBT
  674  * ("nsec3chain" field) of the database in question, as if the latter was a
  675  * successor to the former in lexical order.  The "current" field always holds
  676  * the address of either "chain" or "nsec3chain", depending on which RBT is
  677  * being traversed at given time.
  678  */
  679 static void
  680 dbiterator_destroy(dns_dbiterator_t **iteratorp);
  681 static isc_result_t
  682 dbiterator_first(dns_dbiterator_t *iterator);
  683 static isc_result_t
  684 dbiterator_last(dns_dbiterator_t *iterator);
  685 static isc_result_t
  686 dbiterator_seek(dns_dbiterator_t *iterator, const dns_name_t *name);
  687 static isc_result_t
  688 dbiterator_prev(dns_dbiterator_t *iterator);
  689 static isc_result_t
  690 dbiterator_next(dns_dbiterator_t *iterator);
  691 static isc_result_t
  692 dbiterator_current(dns_dbiterator_t *iterator, dns_dbnode_t **nodep,
  693            dns_name_t *name);
  694 static isc_result_t
  695 dbiterator_pause(dns_dbiterator_t *iterator);
  696 static isc_result_t
  697 dbiterator_origin(dns_dbiterator_t *iterator, dns_name_t *name);
  698 
  699 static dns_dbiteratormethods_t dbiterator_methods = {
  700     dbiterator_destroy, dbiterator_first, dbiterator_last,
  701     dbiterator_seek,    dbiterator_prev,  dbiterator_next,
  702     dbiterator_current, dbiterator_pause, dbiterator_origin
  703 };
  704 
  705 #define DELETION_BATCH_MAX 64
  706 
  707 /*
  708  * If 'paused' is true, then the tree lock is not being held.
  709  */
  710 typedef struct rbtdb_dbiterator {
  711     dns_dbiterator_t common;
  712     bool paused;
  713     bool new_origin;
  714     isc_rwlocktype_t tree_locked;
  715     isc_result_t result;
  716     dns_fixedname_t name;
  717     dns_fixedname_t origin;
  718     dns_rbtnodechain_t chain;
  719     dns_rbtnodechain_t nsec3chain;
  720     dns_rbtnodechain_t *current;
  721     dns_rbtnode_t *node;
  722     dns_rbtnode_t *deletions[DELETION_BATCH_MAX];
  723     int delcnt;
  724     bool nsec3only;
  725     bool nonsec3;
  726 } rbtdb_dbiterator_t;
  727 
  728 #define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB) != 0)
  729 #define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
  730 
  731 static void
  732 free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event);
  733 static void
  734 overmem(dns_db_t *db, bool over);
  735 static void
  736 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version);
  737 static void
  738 setownercase(rdatasetheader_t *header, const dns_name_t *name);
  739 
  740 static bool
  741 match_header_version(rbtdb_file_header_t *header);
  742 
  743 /* Pad to 32 bytes */
  744 static char FILE_VERSION[32] = "\0";
  745 
  746 /*%
  747  * 'init_count' is used to initialize 'newheader->count' which inturn
  748  * is used to determine where in the cycle rrset-order cyclic starts.
  749  * We don't lock this as we don't care about simultaneous updates.
  750  *
  751  * Note:
  752  *      Both init_count and header->count can be UINT32_MAX.
  753  *      The count on the returned rdataset however can't be as
  754  *      that indicates that the database does not implement cyclic
  755  *      processing.
  756  */
  757 static atomic_uint_fast32_t init_count;
  758 
  759 /*
  760  * Locking
  761  *
  762  * If a routine is going to lock more than one lock in this module, then
  763  * the locking must be done in the following order:
  764  *
  765  *      Tree Lock
  766  *
  767  *      Node Lock       (Only one from the set may be locked at one time by
  768  *                       any caller)
  769  *
  770  *      Database Lock
  771  *
  772  * Failure to follow this hierarchy can result in deadlock.
  773  */
  774 
  775 /*
  776  * Deleting Nodes
  777  *
  778  * For zone databases the node for the origin of the zone MUST NOT be deleted.
  779  */
  780 
  781 /*
  782  * Debugging routines
  783  */
  784 #ifdef DEBUG
  785 static void
  786 hexdump(const char *desc, unsigned char *data, size_t size) {
  787     char hexdump[BUFSIZ * 2 + 1];
  788     isc_buffer_t b;
  789     isc_region_t r;
  790     isc_result_t result;
  791     size_t bytes;
  792 
  793     fprintf(stderr, "%s: ", desc);
  794     do {
  795         isc_buffer_init(&b, hexdump, sizeof(hexdump));
  796         r.base = data;
  797         r.length = bytes = (size > BUFSIZ) ? BUFSIZ : size;
  798         result = isc_hex_totext(&r, 0, "", &b);
  799         RUNTIME_CHECK(result == ISC_R_SUCCESS);
  800         isc_buffer_putuint8(&b, 0);
  801         fprintf(stderr, "%s", hexdump);
  802         data += bytes;
  803         size -= bytes;
  804     } while (size > 0);
  805     fprintf(stderr, "\n");
  806 }
  807 #endif /* ifdef DEBUG */
  808 
  809 /* Fixed RRSet helper macros */
  810 
  811 #define DNS_RDATASET_LENGTH 2;
  812 
  813 #if DNS_RDATASET_FIXED
  814 #define DNS_RDATASET_ORDER 2
  815 #define DNS_RDATASET_COUNT (count * 4)
  816 #else /* !DNS_RDATASET_FIXED */
  817 #define DNS_RDATASET_ORDER 0
  818 #define DNS_RDATASET_COUNT 0
  819 #endif /* DNS_RDATASET_FIXED */
  820 
  821 /*
  822  * DB Routines
  823  */
  824 
  825 static void
  826 attach(dns_db_t *source, dns_db_t **targetp) {
  827     dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
  828 
  829     REQUIRE(VALID_RBTDB(rbtdb));
  830 
  831     isc_refcount_increment(&rbtdb->references);
  832 
  833     *targetp = source;
  834 }
  835 
  836 static void
  837 free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
  838     dns_rbtdb_t *rbtdb = event->ev_arg;
  839 
  840     UNUSED(task);
  841 
  842     free_rbtdb(rbtdb, true, event);
  843 }
  844 
  845 static void
  846 update_cachestats(dns_rbtdb_t *rbtdb, isc_result_t result) {
  847     INSIST(IS_CACHE(rbtdb));
  848 
  849     if (rbtdb->cachestats == NULL) {
  850         return;
  851     }
  852 
  853     switch (result) {
  854     case ISC_R_SUCCESS:
  855     case DNS_R_CNAME:
  856     case DNS_R_DNAME:
  857     case DNS_R_DELEGATION:
  858     case DNS_R_NCACHENXDOMAIN:
  859     case DNS_R_NCACHENXRRSET:
  860         isc_stats_increment(rbtdb->cachestats,
  861                     dns_cachestatscounter_hits);
  862         break;
  863     default:
  864         isc_stats_increment(rbtdb->cachestats,
  865                     dns_cachestatscounter_misses);
  866     }
  867 }
  868 
  869 static bool
  870 do_stats(rdatasetheader_t *header) {
  871     return (EXISTS(header) && STATCOUNT(header));
  872 }
  873 
  874 static void
  875 update_rrsetstats(dns_rbtdb_t *rbtdb, const rbtdb_rdatatype_t htype,
  876           const uint_least16_t hattributes, const bool increment) {
  877     dns_rdatastatstype_t statattributes = 0;
  878     dns_rdatastatstype_t base = 0;
  879     dns_rdatastatstype_t type;
  880     rdatasetheader_t *header = &(rdatasetheader_t){
  881         .type = htype, .attributes = ATOMIC_VAR_INIT(hattributes)
  882     };
  883 
  884     if (!do_stats(header)) {
  885         return;
  886     }
  887 
  888     /* At the moment we count statistics only for cache DB */
  889     INSIST(IS_CACHE(rbtdb));
  890 
  891     if (NEGATIVE(header)) {
  892         if (NXDOMAIN(header)) {
  893             statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
  894         } else {
  895             statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
  896             base = RBTDB_RDATATYPE_EXT(header->type);
  897         }
  898     } else {
  899         base = RBTDB_RDATATYPE_BASE(header->type);
  900     }
  901 
  902     if (STALE(header)) {
  903         statattributes |= DNS_RDATASTATSTYPE_ATTR_STALE;
  904     }
  905     if (ANCIENT(header)) {
  906         statattributes |= DNS_RDATASTATSTYPE_ATTR_ANCIENT;
  907     }
  908 
  909     type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
  910     if (increment) {
  911         dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
  912     } else {
  913         dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
  914     }
  915 }
  916 
  917 static void
  918 set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
  919     int idx;
  920     isc_heap_t *heap;
  921     dns_ttl_t oldttl;
  922 
  923     if (!IS_CACHE(rbtdb)) {
  924         header->rdh_ttl = newttl;
  925         return;
  926     }
  927 
  928     oldttl = header->rdh_ttl;
  929     header->rdh_ttl = newttl;
  930 
  931     /*
  932      * It's possible the rbtdb is not a cache.  If this is the case,
  933      * we will not have a heap, and we move on.  If we do, though,
  934      * we might need to adjust things.
  935      */
  936     if (header->heap_index == 0 || newttl == oldttl) {
  937         return;
  938     }
  939     idx = header->node->locknum;
  940     if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL) {
  941         return;
  942     }
  943     heap = rbtdb->heaps[idx];
  944 
  945     if (newttl < oldttl) {
  946         isc_heap_increased(heap, header->heap_index);
  947     } else {
  948         isc_heap_decreased(heap, header->heap_index);
  949     }
  950 }
  951 
  952 /*%
  953  * These functions allow the heap code to rank the priority of each
  954  * element.  It returns true if v1 happens "sooner" than v2.
  955  */
  956 static bool
  957 ttl_sooner(void *v1, void *v2) {
  958     rdatasetheader_t *h1 = v1;
  959     rdatasetheader_t *h2 = v2;
  960 
  961     return (h1->rdh_ttl < h2->rdh_ttl);
  962 }
  963 
  964 /*%
  965  * Return which RRset should be resigned sooner.  If the RRsets have the
  966  * same signing time, prefer the other RRset over the SOA RRset.
  967  */
  968 static bool
  969 resign_sooner(void *v1, void *v2) {
  970     rdatasetheader_t *h1 = v1;
  971     rdatasetheader_t *h2 = v2;
  972 
  973     return (h1->resign < h2->resign ||
  974         (h1->resign == h2->resign && h1->resign_lsb < h2->resign_lsb) ||
  975         (h1->resign == h2->resign && h1->resign_lsb == h2->resign_lsb &&
  976          h2->type == RBTDB_RDATATYPE_SIGSOA));
  977 }
  978 
  979 /*%
  980  * This function sets the heap index into the header.
  981  */
  982 static void
  983 set_index(void *what, unsigned int idx) {
  984     rdatasetheader_t *h = what;
  985 
  986     h->heap_index = idx;
  987 }
  988 
  989 /*%
  990  * Work out how many nodes can be deleted in the time between two
  991  * requests to the nameserver.  Smooth the resulting number and use it
  992  * as a estimate for the number of nodes to be deleted in the next
  993  * iteration.
  994  */
  995 static unsigned int
  996 adjust_quantum(unsigned int old, isc_time_t *start) {
  997     unsigned int pps = dns_pps; /* packets per second */
  998     unsigned int interval;
  999     uint64_t usecs;
 1000     isc_time_t end;
 1001     unsigned int nodes;
 1002 
 1003     if (pps < 100) {
 1004         pps = 100;
 1005     }
 1006     isc_time_now(&end);
 1007 
 1008     interval = 1000000 / pps; /* interval in usec */
 1009     if (interval == 0) {
 1010         interval = 1;
 1011     }
 1012     usecs = isc_time_microdiff(&end, start);
 1013     if (usecs == 0) {
 1014         /*
 1015          * We were unable to measure the amount of time taken.
 1016          * Double the nodes deleted next time.
 1017          */
 1018         old *= 2;
 1019         if (old > 1000) {
 1020             old = 1000;
 1021         }
 1022         return (old);
 1023     }
 1024     nodes = old * interval;
 1025     nodes /= (unsigned int)usecs;
 1026     if (nodes == 0) {
 1027         nodes = 1;
 1028     } else if (nodes > 1000) {
 1029         nodes = 1000;
 1030     }
 1031 
 1032     /* Smooth */
 1033     nodes = (nodes + old * 3) / 4;
 1034 
 1035     if (nodes != old) {
 1036         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
 1037                   DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
 1038                   "adjust_quantum: old=%d, new=%d", old, nodes);
 1039     }
 1040 
 1041     return (nodes);
 1042 }
 1043 
 1044 static void
 1045 free_rbtdb(dns_rbtdb_t *rbtdb, bool log, isc_event_t *event) {
 1046     unsigned int i;
 1047     isc_result_t result;
 1048     char buf[DNS_NAME_FORMATSIZE];
 1049     dns_rbt_t **treep;
 1050     isc_time_t start;
 1051     dns_dbonupdatelistener_t *listener, *listener_next;
 1052 
 1053     if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in) {
 1054         overmem((dns_db_t *)rbtdb, (bool)-1);
 1055     }
 1056 
 1057     REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
 1058     REQUIRE(rbtdb->future_version == NULL);
 1059 
 1060     if (rbtdb->current_version != NULL) {
 1061         isc_refcount_decrementz(&rbtdb->current_version->references);
 1062         UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
 1063         isc_rwlock_destroy(&rbtdb->current_version->glue_rwlock);
 1064         isc_refcount_destroy(&rbtdb->current_version->references);
 1065         isc_rwlock_destroy(&rbtdb->current_version->rwlock);
 1066         isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
 1067                 sizeof(rbtdb_version_t));
 1068     }
 1069 
 1070     /*
 1071      * We assume the number of remaining dead nodes is reasonably small;
 1072      * the overhead of unlinking all nodes here should be negligible.
 1073      */
 1074     for (i = 0; i < rbtdb->node_lock_count; i++) {
 1075         dns_rbtnode_t *node;
 1076 
 1077         node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
 1078         while (node != NULL) {
 1079             ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
 1080             node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
 1081         }
 1082     }
 1083 
 1084     if (event == NULL) {
 1085         rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
 1086     }
 1087 
 1088     for (;;) {
 1089         /*
 1090          * pick the next tree to (start to) destroy
 1091          */
 1092         treep = &rbtdb->tree;
 1093         if (*treep == NULL) {
 1094             treep = &rbtdb->nsec;
 1095             if (*treep == NULL) {
 1096                 treep = &rbtdb->nsec3;
 1097                 /*
 1098                  * we're finished after clear cutting
 1099                  */
 1100                 if (*treep == NULL) {
 1101                     break;
 1102                 }
 1103             }
 1104         }
 1105 
 1106         isc_time_now(&start);
 1107         result = dns_rbt_destroy2(treep, rbtdb->quantum);
 1108         if (result == ISC_R_QUOTA) {
 1109             INSIST(rbtdb->task != NULL);
 1110             if (rbtdb->quantum != 0) {
 1111                 rbtdb->quantum = adjust_quantum(rbtdb->quantum,
 1112                                 &start);
 1113             }
 1114             if (event == NULL) {
 1115                 event = isc_event_allocate(
 1116                     rbtdb->common.mctx, NULL,
 1117                     DNS_EVENT_FREESTORAGE,
 1118                     free_rbtdb_callback, rbtdb,
 1119                     sizeof(isc_event_t));
 1120             }
 1121             isc_task_send(rbtdb->task, &event);
 1122             return;
 1123         }
 1124         INSIST(result == ISC_R_SUCCESS && *treep == NULL);
 1125     }
 1126 
 1127     if (event != NULL) {
 1128         isc_event_free(&event);
 1129     }
 1130     if (log) {
 1131         if (dns_name_dynamic(&rbtdb->common.origin)) {
 1132             dns_name_format(&rbtdb->common.origin, buf,
 1133                     sizeof(buf));
 1134         } else {
 1135             strlcpy(buf, "<UNKNOWN>", sizeof(buf));
 1136         }
 1137         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
 1138                   DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
 1139                   "done free_rbtdb(%s)", buf);
 1140     }
 1141     if (dns_name_dynamic(&rbtdb->common.origin)) {
 1142         dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
 1143     }
 1144     for (i = 0; i < rbtdb->node_lock_count; i++) {
 1145         isc_refcount_destroy(&rbtdb->node_locks[i].references);
 1146         NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
 1147     }
 1148 
 1149     /*
 1150      * Clean up LRU / re-signing order lists.
 1151      */
 1152     if (rbtdb->rdatasets != NULL) {
 1153         for (i = 0; i < rbtdb->node_lock_count; i++) {
 1154             INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
 1155         }
 1156         isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
 1157                 rbtdb->node_lock_count *
 1158                     sizeof(rdatasetheaderlist_t));
 1159     }
 1160     /*
 1161      * Clean up dead node buckets.
 1162      */
 1163     if (rbtdb->deadnodes != NULL) {
 1164         for (i = 0; i < rbtdb->node_lock_count; i++) {
 1165             INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
 1166         }
 1167         isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
 1168                 rbtdb->node_lock_count * sizeof(rbtnodelist_t));
 1169     }
 1170     /*
 1171      * Clean up heap objects.
 1172      */
 1173     if (rbtdb->heaps != NULL) {
 1174         for (i = 0; i < rbtdb->node_lock_count; i++) {
 1175             isc_heap_destroy(&rbtdb->heaps[i]);
 1176         }
 1177         isc_mem_put(rbtdb->hmctx, rbtdb->heaps,
 1178                 rbtdb->node_lock_count * sizeof(isc_heap_t *));
 1179     }
 1180 
 1181     if (rbtdb->rrsetstats != NULL) {
 1182         dns_stats_detach(&rbtdb->rrsetstats);
 1183     }
 1184     if (rbtdb->cachestats != NULL) {
 1185         isc_stats_detach(&rbtdb->cachestats);
 1186     }
 1187     if (rbtdb->gluecachestats != NULL) {
 1188         isc_stats_detach(&rbtdb->gluecachestats);
 1189     }
 1190 
 1191     isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
 1192             rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
 1193     isc_rwlock_destroy(&rbtdb->tree_lock);
 1194     isc_refcount_destroy(&rbtdb->references);
 1195     if (rbtdb->task != NULL) {
 1196         isc_task_detach(&rbtdb->task);
 1197     }
 1198 
 1199     RBTDB_DESTROYLOCK(&rbtdb->lock);
 1200     rbtdb->common.magic = 0;
 1201     rbtdb->common.impmagic = 0;
 1202     isc_mem_detach(&rbtdb->hmctx);
 1203 
 1204     if (rbtdb->mmap_location != NULL) {
 1205         isc_file_munmap(rbtdb->mmap_location, (size_t)rbtdb->mmap_size);
 1206     }
 1207 
 1208     for (listener = ISC_LIST_HEAD(rbtdb->common.update_listeners);
 1209          listener != NULL; listener = listener_next)
 1210     {
 1211         listener_next = ISC_LIST_NEXT(listener, link);
 1212         ISC_LIST_UNLINK(rbtdb->common.update_listeners, listener, link);
 1213         isc_mem_put(rbtdb->common.mctx, listener,
 1214                 sizeof(dns_dbonupdatelistener_t));
 1215     }
 1216 
 1217     isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
 1218 }
 1219 
 1220 static inline void
 1221 maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
 1222     bool want_free = false;
 1223     unsigned int i;
 1224     unsigned int inactive = 0;
 1225 
 1226     /* XXX check for open versions here */
 1227 
 1228     if (rbtdb->soanode != NULL) {
 1229         dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
 1230     }
 1231     if (rbtdb->nsnode != NULL) {
 1232         dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);
 1233     }
 1234 
 1235     /*
 1236      * The current version's glue table needs to be freed early
 1237      * so the nodes are dereferenced before we check the active
 1238      * node count below.
 1239      */
 1240     if (rbtdb->current_version != NULL) {
 1241         free_gluetable(rbtdb->current_version);
 1242     }
 1243 
 1244     /*
 1245      * Even though there are no external direct references, there still
 1246      * may be nodes in use.
 1247      */
 1248     for (i = 0; i < rbtdb->node_lock_count; i++) {
 1249         NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
 1250         rbtdb->node_locks[i].exiting = true;
 1251         NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
 1252         if (isc_refcount_current(&rbtdb->node_locks[i].references) == 0)
 1253         {
 1254             inactive++;
 1255         }
 1256     }
 1257 
 1258     if (inactive != 0) {
 1259         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
 1260         rbtdb->active -= inactive;
 1261         if (rbtdb->active == 0) {
 1262             want_free = true;
 1263         }
 1264         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
 1265         if (want_free) {
 1266             char buf[DNS_NAME_FORMATSIZE];
 1267             if (dns_name_dynamic(&rbtdb->common.origin)) {
 1268                 dns_name_format(&rbtdb->common.origin, buf,
 1269                         sizeof(buf));
 1270             } else {
 1271                 strlcpy(buf, "<UNKNOWN>", sizeof(buf));
 1272             }
 1273             isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
 1274                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
 1275                       "calling free_rbtdb(%s)", buf);
 1276             free_rbtdb(rbtdb, true, NULL);
 1277         }
 1278     }
 1279 }
 1280 
 1281 static void
 1282 detach(dns_db_t **dbp) {
 1283     REQUIRE(dbp != NULL && VALID_RBTDB((dns_rbtdb_t *)(*dbp)));
 1284     dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
 1285     *dbp = NULL;
 1286 
 1287     if (isc_refcount_decrement(&rbtdb->references) == 1) {
 1288         maybe_free_rbtdb(rbtdb);
 1289     }
 1290 }
 1291 
 1292 static void
 1293 currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
 1294     dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
 1295     rbtdb_version_t *version;
 1296 
 1297     REQUIRE(VALID_RBTDB(rbtdb));
 1298 
 1299     RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
 1300     version = rbtdb->current_version;
 1301     isc_refcount_increment(&version->references);
 1302     RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
 1303 
 1304     *versionp = (dns_dbversion_t *)version;
 1305 }
 1306 
 1307 static inline rbtdb_version_t *
 1308 allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
 1309          unsigned int references, bool writer) {
 1310     isc_result_t result;
 1311     rbtdb_version_t *version;
 1312     size_t size;
 1313 
 1314     version = isc_mem_get(mctx, sizeof(*version));
 1315     version->serial = serial;
 1316 
 1317     isc_refcount_init(&version->references, references);
 1318 
 1319     result = isc_rwlock_init(&version->glue_rwlock, 0, 0);
 1320     if (result != ISC_R_SUCCESS) {
 1321         isc_refcount_destroy(&version->references);
 1322         isc_mem_put(mctx, version, sizeof(*version));
 1323         return (NULL);
 1324     }
 1325 
 1326     version->glue_table_bits = RBTDB_GLUE_TABLE_INIT_BITS;
 1327     version->glue_table_nodecount = 0U;
 1328 
 1329     size = HASHSIZE(version->glue_table_bits) *
 1330            sizeof(version->glue_table[0]);
 1331     version->glue_table = isc_mem_get(mctx, size);
 1332     memset(version->glue_table, 0, size);
 1333 
 1334     version->writer = writer;
 1335     version->commit_ok = false;
 1336     ISC_LIST_INIT(version->changed_list);
 1337     ISC_LIST_INIT(version->resigned_list);
 1338     ISC_LINK_INIT(version, link);
 1339 
 1340     return (version);
 1341 }
 1342 
 1343 static isc_result_t
 1344 newversion(dns_db_t *db, dns_dbversion_t **versionp) {
 1345     isc_result_t result;
 1346     dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
 1347     rbtdb_version_t *version;
 1348 
 1349     REQUIRE(VALID_RBTDB(rbtdb));
 1350     REQUIRE(versionp != NULL && *versionp == NULL);
 1351     REQUIRE(rbtdb->future_version == NULL);
 1352 
 1353     RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
 1354     RUNTIME_CHECK(rbtdb->next_serial != 0); /* XXX Error? */
 1355     version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
 1356                    true);
 1357     if (version != NULL) {
 1358         version->rbtdb = rbtdb;
 1359         version->commit_ok = true;
 1360         version->secure = rbtdb->current_version->secure;
 1361         version->havensec3 = rbtdb->current_version->havensec3;
 1362         if (version->havensec3) {
 1363             version->flags = rbtdb->current_version->flags;
 1364             version->iterations =
 1365                 rbtdb->current_version->iterations;
 1366             version->hash = rbtdb->current_version->hash;
 1367             version->salt_length =
 1368                 rbtdb->current_version->salt_length;
 1369             memmove(version->salt, rbtdb->current_version->salt,
 1370                 version->salt_length);
 1371         } else {
 1372             version->flags = 0;
 1373             version->iterations = 0;
 1374             version->hash = 0;
 1375             version->salt_length = 0;
 1376             memset(version->salt, 0, sizeof(version->salt));
 1377         }
 1378         result = isc_rwlock_init(&version->rwlock, 0, 0);
 1379         if (result != ISC_R_SUCCESS) {
 1380             free_gluetable(version);
 1381             isc_rwlock_destroy(&version->glue_rwlock);
 1382             isc_refcount_destroy(&version->references);
 1383             isc_mem_put(rbtdb->common.mctx, version,
 1384                     sizeof(*version));
 1385             version = NULL;
 1386         } else {
 1387             RWLOCK(&rbtdb->current_version->rwlock,
 1388                    isc_rwlocktype_read);
 1389             version->records = rbtdb->current_version->records;
 1390             version->xfrsize = rbtdb->current_version->xfrsize;
 1391             RWUNLOCK(&rbtdb->current_version->rwlock,
 1392                  isc_rwlocktype_read);
 1393             rbtdb->next_serial++;
 1394             rbtdb->future_version = version;
 1395         }
 1396     } else {
 1397         result = ISC_R_NOMEMORY;
 1398     }
 1399     RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
 1400 
 1401     if (version == NULL) {
 1402         return (result);
 1403     }
 1404 
 1405     *versionp = version;
 1406 
 1407     return (ISC_R_SUCCESS);
 1408 }
 1409 
 1410 static void
 1411 attachversion(dns_db_t *db, dns_dbversion_t *source,
 1412           dns_dbversion_t **targetp) {
 1413     dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
 1414     rbtdb_version_t *rbtversion = source;
 1415 
 1416     REQUIRE(VALID_RBTDB(rbtdb));
 1417     INSIST(rbtversion != NULL && rbtversion->rbtdb == rbtdb);
 1418 
 1419     isc_refcount_increment(&rbtversion->references);
 1420 
 1421     *targetp = rbtversion;
 1422 }
 1423 
 1424 static rbtdb_changed_t *
 1425 add_changed(dns_rbtdb_t *rbtdb, rbtdb_version_t *version, dns_rbtnode_t *node) {
 1426     rbtdb_changed_t *changed;
 1427 
 1428     /*
 1429      * Caller must be holding the node lock if its reference must be
 1430      * protected by the lock.
 1431      */
 1432 
 1433     changed = isc_mem_get(rbtdb->common.mctx, sizeof(*changed));
 1434 
 1435     RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
 1436 
 1437     REQUIRE(version->writer);
 1438 
 1439     if (changed != NULL) {
 1440         isc_refcount_increment(&node->references);
 1441         changed->node = node;
 1442         changed->dirty = false;
 1443         ISC_LIST_INITANDAPPEND(version->changed_list, changed, link);
 1444     } else {
 1445         version->commit_ok = false;
 1446     }
 1447 
 1448     RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
 1449 
 1450     return (changed);
 1451 }
 1452 
 1453 static inline void
 1454 free_noqname(isc_mem_t *mctx, struct noqname **noqname) {
 1455     if (dns_name_dynamic(&(*noqname)->name)) {
 1456         dns_name_free(&(*noqname)->name, mctx);
 1457     }
 1458     if ((*noqname)->neg != NULL) {
 1459         isc_mem_put(mctx, (*noqname)->neg,
 1460                 dns_rdataslab_size((*noqname)->neg, 0));
 1461     }
 1462     if ((*noqname)->negsig != NULL) {
 1463         isc_mem_put(mctx, (*noqname)->negsig,
 1464                 dns_rdataslab_size((*noqname)->negsig, 0));
 1465     }
 1466     isc_mem_put(mctx, *noqname, sizeof(**noqname));
 1467     *noqname = NULL;
 1468 }
 1469 
 1470 static inline void
 1471 init_rdataset(dns_rbtdb_t *rbtdb, rdatasetheader_t *h) {
 1472     ISC_LINK_INIT(h, link);
 1473     h->heap_index = 0;
 1474     h->is_mmapped = 0;
 1475     h->next_is_relative = 0;
 1476     h->node_is_relative = 0;
 1477     atomic_init(&h->attributes, 0);
 1478 
 1479 #ifndef ISC_MUTEX_ATOMICS
 1480     STATIC_ASSERT((sizeof(h->attributes) == 2),
 1481               "The .attributes field of rdatasetheader_t needs to be "
 1482               "16-bit int type exactly.");
 1483 #endif /* !ISC_MUTEX_ATOMICS */
 1484 
 1485 #if TRACE_HEADER
 1486     if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in) {
 1487         fprintf(stderr, "initialized header: %p\n", h);
 1488     }
 1489 #else  /* if TRACE_HEADER */
 1490     UNUSED(rbtdb);
 1491 #endif /* if TRACE_HEADER */
 1492 }
 1493 
 1494 /*
 1495  * Update the copied values of 'next' and 'node' if they are relative.
 1496  */
 1497 static void
 1498 update_newheader(rdatasetheader_t *newh, rdatasetheader_t *old) {
 1499     char *p;
 1500 
 1501     if (old->next_is_relative) {
 1502         p = (char *)old;
 1503         p += (uintptr_t)old->next;
 1504         newh->next = (rdatasetheader_t *)p;
 1505     }
 1506     if (old->node_is_relative) {
 1507         p = (char *)old;
 1508         p += (uintptr_t)old->node;
 1509         newh->node = (dns_rbtnode_t *)p;
 1510     }
 1511     if (CASESET(old)) {
 1512         uint_least16_t attr = RDATASET_ATTR_GET(
 1513             old,
 1514             (RDATASET_ATTR_CASESET | RDATASET_ATTR_CASEFULLYLOWER));
 1515         RDATASET_ATTR_SET(newh, attr);
 1516         memmove(newh->upper, old->upper, sizeof(old->upper));
 1517     }
 1518 }
 1519 
 1520 static inline rdatasetheader_t *
 1521 new_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx) {
 1522     rdatasetheader_t *h;
 1523 
 1524     h = isc_mem_get(mctx, sizeof(*h));
 1525 
 1526 #if TRACE_HEADER
 1527     if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in) {
 1528         fprintf(stderr, "allocated header: %p\n", h);
 1529     }
 1530 #endif /* if TRACE_HEADER */
 1531     memset(h->upper, 0xeb, sizeof(h->upper));
 1532     init_rdataset(rbtdb, h);
 1533     h->rdh_ttl = 0;
 1534     return (h);
 1535 }
 1536 
 1537 static inline void
 1538 free_rdataset(dns_rbtdb_t *rbtdb, isc_mem_t *mctx, rdatasetheader_t *rdataset) {
 1539     unsigned int size;
 1540     int idx;
 1541 
 1542     update_rrsetstats(rbtdb, rdataset->type,
 1543               atomic_load_acquire(&rdataset->attributes), false);
 1544 
 1545     idx = rdataset->node->locknum;
 1546     if (ISC_LINK_LINKED(rdataset, link)) {
 1547         INSIST(IS_CACHE(rbtdb));
 1548         ISC_LIST_UNLINK(rbtdb->rdatasets[idx], rdataset, link);
 1549     }
 1550 
 1551     if (rdataset->heap_index != 0) {
 1552         isc_heap_delete(rbtdb->heaps[idx], rdataset->heap_index);
 1553     }
 1554     rdataset->heap_index = 0;
 1555 
 1556     if (rdataset->noqname != NULL) {
 1557         free_noqname(mctx, &rdataset->noqname);
 1558     }
 1559     if (rdataset->closest != NULL) {
 1560         free_noqname(mctx, &rdataset->closest);
 1561     }
 1562 
 1563     if (NONEXISTENT(rdataset)) {
 1564         size = sizeof(*rdataset);
 1565     } else {
 1566         size = dns_rdataslab_size((unsigned char *)rdataset,
 1567                       sizeof(*rdataset));
 1568     }
 1569 
 1570     if (rdataset->is_mmapped == 1) {
 1571         return;
 1572     }
 1573 
 1574     isc_mem_put(mctx, rdataset, size);
 1575 }
 1576 
 1577 static inline void
 1578 rollback_node(dns_rbtnode_t *node, rbtdb_serial_t serial) {
 1579     rdatasetheader_t *header, *dcurrent;
 1580     bool make_dirty = false;
 1581 
 1582     /*
 1583      * Caller must hold the node lock.
 1584      */
 1585 
 1586     /*
 1587      * We set the IGNORE attribute on rdatasets with serial number
 1588      * 'serial'.  When the reference count goes to zero, these rdatasets
 1589      * will be cleaned up; until that time, they will be ignored.
 1590      */
 1591     for (header = node->data; header != NULL; header = header->next) {
 1592         if (header->serial == serial) {
 1593             RDATASET_ATTR_SET(header, RDATASET_ATTR_IGNORE);
 1594             make_dirty = true;
 1595         }
 1596         for (dcurrent = header->down; dcurrent != NULL;
 1597              dcurrent = dcurrent->down) {
 1598             if (dcurrent->serial == serial) {
 1599                 RDATASET_ATTR_SET(dcurrent,
 1600                           RDATASET_ATTR_IGNORE);
 1601                 make_dirty = true;
 1602             }
 1603         }
 1604     }
 1605     if (make_dirty) {
 1606         node->dirty = 1;
 1607     }
 1608 }
 1609 
 1610 static inline void
 1611 mark_header_ancient(dns_rbtdb_t *rbtdb, rdatasetheader_t *header) {
 1612     uint_least16_t attributes = atomic_load_acquire(&header->attributes);
 1613     uint_least16_t newattributes = 0;
 1614 
 1615     /*
 1616      * If we are already ancient there is nothing to do.
 1617      */
 1618     do {
 1619         if ((attributes & RDATASET_ATTR_ANCIENT) != 0) {
 1620             return;
 1621         }
 1622         newattributes = attributes | RDATASET_ATTR_ANCIENT;
 1623     } while (!atomic_compare_exchange_weak_acq_rel(
 1624         &header->attributes, &attributes, newattributes));
 1625 
 1626     /*
 1627      * Decrement the stats counter for the appropriate RRtype.
 1628      * If the STALE attribute is set, this will decrement the
 1629      * stale type counter, otherwise it decrements the active
 1630      * stats type counter.
 1631      */
 1632     update_rrsetstats(rbtdb, header->type, attributes, false);
 1633     header->node->dirty = 1;
 1634 
 1635     /* Increment the stats counter for the ancient RRtype. */
 1636     update_rrsetstats(rbtdb, header->type, newattributes, true);
 1637 }
 1638 
 1639 static inline void
 1640 mark_header_stale(dns_rbtdb_t *rbtdb, rdatasetheader_t *header) {
 1641     uint_least16_t attributes = atomic_load_acquire(&header->attributes);
 1642     uint_least16_t newattributes = 0;
 1643 
 1644     INSIST((attributes & RDATASET_ATTR_ZEROTTL) == 0);
 1645 
 1646     /*
 1647      * If we are already stale there is nothing to do.
 1648      */
 1649     do {
 1650         if ((attributes & RDATASET_ATTR_STALE) != 0) {
 1651             return;
 1652         }
 1653         newattributes = attributes | RDATASET_ATTR_STALE;
 1654     } while (!atomic_compare_exchange_weak_acq_rel(
 1655         &header->attributes, &attributes, newattributes));
 1656 
 1657     /* Decrement the stats counter for the appropriate RRtype.
 1658      * If the ANCIENT attribute is set (although it is very
 1659      * unlikely that an RRset goes from ANCIENT to STALE), this
 1660      * will decrement the ancient stale type counter, otherwise it
 1661      * decrements the active stats type counter.
 1662      */
 1663 
 1664     update_rrsetstats(rbtdb, header->type, attributes, false);
 1665     update_rrsetstats(rbtdb, header->type, newattributes, true);
 1666 }
 1667 
 1668 static inline void
 1669 clean_stale_headers(dns_rbtdb_t *rbtdb, isc_mem_t *mctx,
 1670             rdatasetheader_t *top) {
 1671     rdatasetheader_t *d, *down_next;
 1672 
 1673     for (d = top->down; d != NULL; d = down_next) {
 1674         down_next = d->down;
 1675         free_rdataset(rbtdb, mctx, d);
 1676     }
 1677     top->down = NULL;
 1678 }
 1679 
 1680 static inline void
 1681 clean_cache_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
 1682     rdatasetheader_t *current, *top_prev, *top_next;
 1683     isc_mem_t *mctx = rbtdb->common.mctx;
 1684 
 1685     /*
 1686      * Caller must be holding the node lock.
 1687      */
 1688 
 1689     top_prev = NULL;
 1690     for (current = node->data; current != NULL; current = top_next) {
 1691         top_next = current->next;
 1692         clean_stale_headers(rbtdb, mctx, current);
 1693         /*
 1694          * If current is nonexistent, ancient, or stale and
 1695          * we are not keeping stale, we can clean it up.
 1696          */
 1697         if (NONEXISTENT(current) || ANCIENT(current) ||
 1698             (STALE(current) && !KEEPSTALE(rbtdb)))
 1699         {
 1700             if (top_prev != NULL) {
 1701                 top_prev->next = current->next;
 1702             } else {
 1703                 node->data = current->next;
 1704             }
 1705             free_rdataset(rbtdb, mctx, current);
 1706         } else {
 1707             top_prev = current;
 1708         }
 1709     }
 1710     node->dirty = 0;
 1711 }
 1712 
 1713 static inline void
 1714 clean_zone_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
 1715         rbtdb_serial_t least_serial) {
 1716     rdatasetheader_t *current, *dcurrent, *down_next, *dparent;
 1717     rdatasetheader_t *top_prev, *top_next;
 1718     isc_mem_t *mctx = rbtdb->common.mctx;
 1719     bool still_dirty = false;
 1720 
 1721     /*
 1722      * Caller must be holding the node lock.
 1723      */
 1724     REQUIRE(least_serial != 0);
 1725 
 1726     top_prev = NULL;
 1727     for (current = node->data; current != NULL; current = top_next) {
 1728         top_next = current->next;
 1729 
 1730         /*
 1731          * First, we clean up any instances of multiple rdatasets
 1732          * with the same serial number, or that have the IGNORE
 1733          * attribute.
 1734          */
 1735         dparent = current;
 1736         for (dcurrent = current->down; dcurrent != NULL;
 1737              dcurrent = down_next) {
 1738             down_next = dcurrent->down;
 1739             INSIST(dcurrent->serial <= dparent->serial);
 1740             if (dcurrent->serial == dparent->serial ||
 1741                 IGNORE(dcurrent)) {
 1742                 if (down_next != NULL) {
 1743                     down_next->next = dparent;
 1744                 }
 1745                 dparent->down = down_next;
 1746                 free_rdataset(rbtdb, mctx, dcurrent);
 1747             } else {
 1748                 dparent = dcurrent;
 1749             }
 1750         }
 1751 
 1752         /*
 1753          * We've now eliminated all IGNORE datasets with the possible
 1754          * exception of current, which we now check.
 1755          */
 1756         if (IGNORE(current)) {
 1757             down_next = current->down;
 1758             if (down_next == NULL) {
 1759                 if (top_prev != NULL) {
 1760                     top_prev->next = current->next;
 1761                 } else {
 1762                     node->data = current->next;
 1763                 }
 1764                 free_rdataset(rbtdb, mctx, current);
 1765                 /*
 1766                  * current no longer exists, so we can
 1767                  * just continue with the loop.
 1768                  */
 1769                 continue;
 1770             } else {
 1771                 /*
 1772                  * Pull up current->down, making it the new
 1773                  * current.
 1774                  */
 1775                 if (top_prev != NULL) {
 1776                     top_prev->next = down_next;
 1777                 } else {
 1778                     node->data = down_next;
 1779                 }
 1780                 down_next->next = top_next;
 1781                 free_rdataset(rbtdb, mctx, current);
 1782                 current = down_next;
 1783             }
 1784         }
 1785 
 1786         /*
 1787          * We now try to find the first down node less than the
 1788          * least serial.
 1789          */
 1790         dparent = current;
 1791         for (dcurrent = current->down; dcurrent != NULL;
 1792              dcurrent = down_next) {
 1793             down_next = dcurrent->down;
 1794             if (dcurrent->serial < least_serial) {
 1795                 break;
 1796             }
 1797             dparent = dcurrent;
 1798         }
 1799 
 1800         /*
 1801          * If there is a such an rdataset, delete it and any older
 1802          * versions.
 1803          */
 1804         if (dcurrent != NULL) {
 1805             do {
 1806                 down_next = dcurrent->down;
 1807                 INSIST(dcurrent->serial <= least_serial);
 1808                 free_rdataset(rbtdb, mctx, dcurrent);
 1809                 dcurrent = down_next;
 1810             } while (dcurrent != NULL);
 1811             dparent->down = NULL;
 1812         }
 1813 
 1814         /*
 1815          * Note.  The serial number of 'current' might be less than
 1816          * least_serial too, but we cannot delete it because it is
 1817          * the most recent version, unless it is a NONEXISTENT
 1818          * rdataset.
 1819          */
 1820         if (current->down != NULL) {
 1821             still_dirty = true;
 1822             top_prev = current;
 1823         } else {
 1824             /*
 1825              * If this is a NONEXISTENT rdataset, we can delete it.
 1826              */
 1827             if (NONEXISTENT(current)) {
 1828                 if (top_prev != NULL) {
 1829                     top_prev->next = current->next;
 1830                 } else {
 1831                     node->data = current->next;
 1832                 }
 1833                 free_rdataset(rbtdb, mctx, current);
 1834             } else {
 1835                 top_prev = current;
 1836             }
 1837         }
 1838     }
 1839     if (!still_dirty) {
 1840         node->dirty = 0;
 1841     }
 1842 }
 1843 
 1844 /*
 1845  * tree_lock(write) must be held.
 1846  */
 1847 static void
 1848 delete_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node) {
 1849     dns_rbtnode_t *nsecnode;
 1850     dns_fixedname_t fname;
 1851     dns_name_t *name;
 1852     isc_result_t result = ISC_R_UNEXPECTED;
 1853 
 1854     INSIST(!ISC_LINK_LINKED(node, deadlink));
 1855 
 1856     if (isc_log_wouldlog(dns_lctx, ISC_LOG_DEBUG(1))) {
 1857         char printname[DNS_NAME_FORMATSIZE];
 1858         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
 1859                   DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
 1860                   "delete_node(): %p %s (bucket %d)", node,
 1861                   dns_rbt_formatnodename(node, printname,
 1862                              sizeof(printname)),
 1863                   node->locknum);
 1864     }
 1865 
 1866     switch (node->nsec) {
 1867     case DNS_RBT_NSEC_NORMAL:
 1868         /*
 1869          * Though this may be wasteful, it has to be done before
 1870          * node is deleted.
 1871          */
 1872         name = dns_fixedname_initname(&fname);
 1873         dns_rbt_fullnamefromnode(node, name);
 1874 
 1875         result = dns_rbt_deletenode(rbtdb->tree, node, false);
 1876         break;
 1877     case DNS_RBT_NSEC_HAS_NSEC:
 1878         name = dns_fixedname_initname(&fname);
 1879         dns_rbt_fullnamefromnode(node, name);
 1880         /*
 1881          * Delete the corresponding node from the auxiliary NSEC
 1882          * tree before deleting from the main tree.
 1883          */
 1884         nsecnode = NULL;
 1885         result = dns_rbt_findnode(rbtdb->nsec, name, NULL, &nsecnode,
 1886                       NULL, DNS_RBTFIND_EMPTYDATA, NULL,
 1887                       NULL);
 1888         if (result != ISC_R_SUCCESS) {
 1889             isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
 1890                       DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
 1891                       "delete_node: "
 1892                       "dns_rbt_findnode(nsec): %s",
 1893                       isc_result_totext(result));
 1894         } else {
 1895             result = dns_rbt_deletenode(rbtdb->nsec, nsecnode,
 1896                             false);
 1897             if (result != ISC_R_SUCCESS) {
 1898                 isc_log_write(
 1899                     dns_lctx, DNS_LOGCATEGORY_DATABASE,
 1900                     DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
 1901                     "delete_node(): "
 1902                     "dns_rbt_deletenode(nsecnode): %s",
 1903                     isc_result_totext(result));
 1904             }
 1905         }
 1906         result = dns_rbt_deletenode(rbtdb->tree, node, false);
 1907         break;
 1908     case DNS_RBT_NSEC_NSEC:
 1909         result = dns_rbt_deletenode(rbtdb->nsec, node, false);
 1910         break;
 1911     case DNS_RBT_NSEC_NSEC3:
 1912         result = dns_rbt_deletenode(rbtdb->nsec3, node, false);
 1913         break;
 1914     }
 1915     if (result != ISC_R_SUCCESS) {
 1916         isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
 1917                   DNS_LOGMODULE_CACHE, ISC_LOG_WARNING,
 1918                   "delete_node(): "
 1919                   "dns_rbt_deletenode: %s",
 1920                   isc_result_totext(result));
 1921     }
 1922 }
 1923 
 1924 /*
 1925  * Caller must be holding the node lock.
 1926  */
 1927 static inline void
 1928 new_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
 1929           isc_rwlocktype_t locktype) {
 1930     if (locktype == isc_rwlocktype_write && ISC_LINK_LINKED(node, deadlink))
 1931     {
 1932         ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum], node,
 1933                 deadlink);
 1934     }
 1935     if (isc_refcount_increment0(&node->references) == 0) {
 1936         /* this is the first reference to the node */
 1937         isc_refcount_increment0(
 1938             &rbtdb->node_locks[node->locknum].references);
 1939     }
 1940 }
 1941 
 1942 /*%
 1943  * The tree lock must be held for the result to be valid.
 1944  */
 1945 static inline bool
 1946 is_leaf(dns_rbtnode_t *node) {
 1947     return (node->parent != NULL && node->parent->down == node &&
 1948         node->left == NULL && node->right == NULL);
 1949 }
 1950 
 1951 static inline void
 1952 send_to_prune_tree(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
 1953            isc_rwlocktype_t locktype) {
 1954     isc_event_t *ev;
 1955     dns_db_t *db;
 1956 
 1957     ev = isc_event_allocate(rbtdb->common.mctx, NULL, DNS_EVENT_RBTPRUNE,
 1958                 prune_tree, node, sizeof(isc_event_t));
 1959     new_reference(rbtdb, node, locktype);
 1960     db = NULL;
 1961     attach((dns_db_t *)rbtdb, &db);
 1962     ev->ev_sender = db;
 1963     isc_task_send(rbtdb->task, &ev);
 1964 }
 1965 
 1966 /*%
 1967  * Clean up dead nodes.  These are nodes which have no references, and
 1968  * have no data.  They are dead but we could not or chose not to delete
 1969  * them when we deleted all the data at that node because we did not want
 1970  * to wait for the tree write lock.
 1971  *
 1972  * The caller must hold a tree write lock and bucketnum'th node (write) lock.
 1973  */
 1974 static void
 1975 cleanup_dead_nodes(dns_rbtdb_t *rbtdb, int bucketnum) {
 1976     dns_rbtnode_t *node;
 1977     int count = 10; /* XXXJT: should be adjustable */
 1978 
 1979     node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
 1980     while (node != NULL && count > 0) {
 1981         ISC_LIST_UNLINK(rbtdb->deadnodes[bucketnum], node, deadlink);
 1982 
 1983         /*
 1984          * We might have reactivated this node without a tree write
 1985          * lock, so we couldn't remove this node from deadnodes then
 1986          * and we have to do it now.
 1987          */
 1988         if (isc_refcount_current(&node->references) != 0 ||
 1989             node->data != NULL) {
 1990             node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
 1991             count--;
 1992             continue;
 1993         }
 1994 
 1995         if (is_leaf(node) && rbtdb->task != NULL) {
 1996             send_to_prune_tree(rbtdb, node, isc_rwlocktype_write);
 1997         } else if (node->down == NULL && node->data == NULL) {
 1998             /*
 1999              * Not a interior node and not needing to be
 2000              * reactivated.
 2001              */
 2002             delete_node(rbtdb, node);
 2003         } else if (node->data == NULL) {
 2004             /*
 2005              * A interior node without data. Leave linked to
 2006              * to be cleaned up when node->down becomes NULL.
 2007              */
 2008             ISC_LIST_APPEND(rbtdb->deadnodes[bucketnum], node,
 2009                     deadlink);
 2010         }
 2011         node = ISC_LIST_HEAD(rbtdb->deadnodes[bucketnum]);
 2012         count--;
 2013     }
 2014 }
 2015 
 2016 /*
 2017  * This function is assumed to be called when a node is newly referenced
 2018  * and can be in the deadnode list.  In that case the node must be retrieved
 2019  * from the list because it is going to be used.  In addition, if the caller
 2020  * happens to hold a write lock on the tree, it's a good chance to purge dead
 2021  * nodes.
 2022  * Note: while a new reference is gained in multiple places, there are only very
 2023  * few cases where the node can be in the deadnode list (only empty nodes can
 2024  * have been added to the list).
 2025  */
 2026 static inline void
 2027 reactivate_node(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
 2028         isc_rwlocktype_t treelocktype) {
 2029     isc_rwlocktype_t locktype = isc_rwlocktype_read;
 2030     nodelock_t *nodelock = &rbtdb->node_locks[node->locknum].lock;
 2031     bool maybe_cleanup = false;
 2032 
 2033     POST(locktype);
 2034 
 2035     NODE_LOCK(nodelock, locktype);
 2036 
 2037     /*
 2038      * Check if we can possibly cleanup the dead node.  If so, upgrade
 2039      * the node lock below to perform the cleanup.
 2040      */
 2041     if (!ISC_LIST_EMPTY(rbtdb->deadnodes[node->locknum]) &&
 2042         treelocktype == isc_rwlocktype_write)
 2043     {
 2044         maybe_cleanup = true;
 2045     }
 2046 
 2047     if (ISC_LINK_LINKED(node, deadlink) || maybe_cleanup) {
 2048         /*
 2049          * Upgrade the lock and test if we still need to unlink.
 2050          */
 2051         NODE_UNLOCK(nodelock, locktype);
 2052         locktype = isc_rwlocktype_write;
 2053         POST(locktype);
 2054         NODE_LOCK(nodelock, locktype);
 2055         if (ISC_LINK_LINKED(node, deadlink)) {
 2056             ISC_LIST_UNLINK(rbtdb->deadnodes[node->locknum], node,
 2057                     deadlink);
 2058         }
 2059         if (maybe_cleanup) {
 2060             cleanup_dead_nodes(rbtdb, node->locknum);
 2061         }
 2062     }
 2063 
 2064     new_reference(rbtdb, node, locktype);
 2065 
 2066     NODE_UNLOCK(nodelock, locktype);
 2067 }
 2068 
 2069 /*
 2070  * Caller must be holding the node lock; either the "strong", read or write
 2071  * lock.  Note that the lock must be held even when node references are
 2072  * atomically modified; in that case the decrement operation itself does not
 2073  * have to be protected, but we must avoid a race condition where multiple
 2074  * threads are decreasing the reference to zero simultaneously and at least
 2075  * one of them is going to free the node.
 2076  *
 2077  * This function returns true if and only if the node reference decreases
 2078  * to zero.
 2079  *
 2080  * NOTE: Decrementing the reference count of a node to zero does not mean it
 2081  * will be immediately freed.
 2082  */
 2083 static bool
 2084 decrement_reference(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node,
 2085             rbtdb_serial_t least_serial, isc_rwlocktype_t nlock,
 2086             isc_rwlocktype_t tlock, bool pruning) {
 2087     isc_result_t result;
 2088     bool write_locked;
 2089     bool locked = tlock != isc_rwlocktype_none;
 2090     rbtdb_nodelock_t *nodelock;
 2091     int bucket = node->locknum;
 2092     bool no_reference = true;
 2093     uint_fast32_t refs;
 2094 
 2095     nodelock = &rbtdb->node_locks[bucket];
 2096 
 2097 #define KEEP_NODE(n, r, l)                                  \
 2098     ((n)->data != NULL || ((l) && (n)->down != NULL) || \
 2099      (n) == (r)->origin_node || (n) == (r)->nsec3_origin_node)
 2100 
 2101     /* Handle easy and typical case first. */
 2102     if (!node->dirty && KEEP_NODE(node, rbtdb, locked)) {
 2103         if (isc_refcount_decrement(&node->references) == 1) {
 2104             refs = isc_refcount_decrement(&nodelock->references);
 2105             INSIST(refs > 0);
 2106             return (true);
 2107         } else {
 2108             return (false);
 2109         }
 2110     }
 2111 
 2112     /* Upgrade the lock? */
 2113     if (nlock == isc_rwlocktype_read) {
 2114         NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
 2115         NODE_LOCK(&nodelock->lock, isc_rwlocktype_write);
 2116     }
 2117 
 2118     if (isc_refcount_decrement(&node->references) > 1) {
 2119         /* Restore the lock? */
 2120         if (nlock == isc_rwlocktype_read) {
 2121             NODE_DOWNGRADE(&nodelock->lock);
 2122         }
 2123         return (false);
 2124     }
 2125 
 2126     if (node->dirty) {
 2127         if (IS_CACHE(rbtdb)) {
 2128             clean_cache_node(rbtdb, node);
 2129         } else {
 2130             if (least_serial == 0) {
 2131                 /*
 2132                  * Caller doesn't know the least serial.
 2133                  * Get it.
 2134                  */
 2135                 RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
 2136                 least_serial = rbtdb->least_serial;
 2137                 RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
 2138             }
 2139             clean_zone_node(rbtdb, node, least_serial);
 2140         }
 2141     }
 2142 
 2143     /*
 2144      * Attempt to switch to a write lock on the tree.  If this fails,
 2145      * we will add this node to a linked list of nodes in this locking
 2146      * bucket which we will free later.
 2147      */
 2148     if (tlock != isc_rwlocktype_write) {
 2149         /*
 2150          * Locking hierarchy notwithstanding, we don't need to free
 2151          * the node lock before acquiring the tree write lock because
 2152          * we only do a trylock.
 2153          */
 2154         if (tlock == isc_rwlocktype_read) {
 2155             result = isc_rwlock_tryupgrade(&rbtdb->tree_lock);
 2156         } else {
 2157             result = isc_rwlock_trylock(&rbtdb->tree_lock,
 2158                             isc_rwlocktype_write);
 2159         }
 2160         RUNTIME_CHECK(result == ISC_R_SUCCESS ||
 2161                   result == ISC_R_LOCKBUSY);
 2162 
 2163         write_locked = (result == ISC_R_SUCCESS);
 2164     } else {
 2165         write_locked = true;
 2166     }
 2167 
 2168     refs = isc_refcount_decrement(&nodelock->references);
 2169     INSIST(refs > 0);
 2170 
 2171     if (KEEP_NODE(node, rbtdb, locked || write_locked)) {
 2172         goto restore_locks;
 2173     }
 2174 
 2175 #undef KEEP_NODE
 2176 
 2177     if (write_locked) {
 2178         /*
 2179          * We can now delete the node.
 2180          */
 2181 
 2182         /*
 2183          * If this node is the only one in the level it's in, deleting
 2184          * this node may recursively make its parent the only node in
 2185          * the parent level; if so, and if no one is currently using
 2186          * the parent node, this is almost the only opportunity to
 2187          * clean it up.  But the recursive cleanup is not that trivial
 2188          * since the child and parent may be in different lock buckets,
 2189          * which would cause a lock order reversal problem.  To avoid
 2190          * the trouble, we'll dispatch a separate event for batch
 2191          * cleaning.  We need to check whether we're deleting the node
 2192          * as a result of pruning to avoid infinite dispatching.
 2193          * Note: pruning happens only when a task has been set for the
 2194          * rbtdb.  If the user of the rbtdb chooses not to set a task,
 2195          * it's their responsibility to purge stale leaves (e.g. by
 2196          * periodic walk-through).
 2197          */
 2198         if (!pruning && is_leaf(node) && rbtdb->task != NULL) {
 2199             send_to_prune_tree(rbtdb, node, isc_rwlocktype_write);
 2200             no_reference = false;
 2201         } else {
 2202             delete_node(rbtdb, node);
 2203         }
 2204     } else {
 2205         INSIST(node->data == NULL);
 2206         if (!ISC_LINK_LINKED(node, deadlink)) {
 2207             ISC_LIST_APPEND(rbtdb->deadnodes[bucket], node,
 2208                     deadlink);
 2209         }
 2210     }
 2211 
 2212 restore_locks:
 2213     /* Restore the lock? */
 2214     if (nlock == isc_rwlocktype_read) {
 2215         NODE_DOWNGRADE(&nodelock->lock);
 2216     }
 2217 
 2218     /*
 2219      * Relock a read lock, or unlock the write lock if no lock was held.
 2220      */
 2221     if (tlock == isc_rwlocktype_none) {
 2222         if (write_locked) {
 2223             RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
 2224         }
 2225     }
 2226 
 2227     if (tlock == isc_rwlocktype_read) {
 2228         if (write_locked) {
 2229             isc_rwlock_downgrade(&rbtdb->tree_lock);
 2230         }
 2231     }
 2232 
 2233     return (no_reference);
 2234 }
 2235 
 2236 /*
 2237  * Prune the tree by recursively cleaning-up single leaves.  In the worst
 2238  * case, the number of iteration is the number of tree levels, which is at
 2239  * most the maximum number of domain name labels, i.e, 127.  In practice, this
 2240  * should be much smaller (only a few times), and even the worst case would be
 2241  * acceptable for a single event.
 2242  */
 2243 static void
 2244 prune_tree(isc_task_t *task, isc_event_t *event) {
 2245     dns_rbtdb_t *rbtdb = event->ev_sender;
 2246     dns_rbtnode_t *node = event->ev_arg;
 2247     dns_rbtnode_t *parent;
 2248     unsigned int locknum;
 2249 
 2250     UNUSED(task);
 2251 
 2252     isc_event_free(&event);
 2253 
 2254     RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
 2255     locknum = node->locknum;
 2256     NODE_LOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
 2257     do {
 2258         parent = node->parent;
 2259         decrement_reference(rbtdb, node, 0, isc_rwlocktype_write,
 2260                     isc_rwlocktype_write, true);
 2261 
 2262         if (parent != NULL && parent->down == NULL) {
 2263             /*
 2264              * node was the only down child of the parent and has
 2265              * just been removed.  We'll then need to examine the
 2266              * parent.  Keep the lock if possible; otherwise,
 2267              * release the old lock and acquire one for the parent.
 2268              */
 2269             if (parent->locknum != locknum) {
 2270                 NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
 2271                         isc_rwlocktype_write);
 2272                 locknum = parent->locknum;
 2273                 NODE_LOCK(&rbtdb->node_locks[locknum].lock,
 2274                       isc_rwlocktype_write);
 2275             }
 2276 
 2277             /*
 2278              * We need to gain a reference to the node before
 2279              * decrementing it in the next iteration.
 2280              */
 2281             if (ISC_LINK_LINKED(parent, deadlink)) {
 2282                 ISC_LIST_UNLINK(rbtdb->deadnodes[locknum],
 2283                         parent, deadlink);
 2284             }
 2285             new_reference(rbtdb, parent, isc_rwlocktype_write);
 2286         } else {
 2287             parent = NULL;
 2288         }
 2289 
 2290         node = parent;
 2291     } while (node != NULL);
 2292     NODE_UNLOCK(&rbtdb->node_locks[locknum].lock, isc_rwlocktype_write);
 2293     RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
 2294 
 2295     detach((dns_db_t **)&rbtdb);
 2296 }
 2297 
 2298 static inline void
 2299 make_least_version(dns_rbtdb_t *rbtdb, rbtdb_version_t *version,
 2300            rbtdb_changedlist_t *cleanup_list) {
 2301     /*
 2302      * Caller must be holding the database lock.
 2303      */
 2304 
 2305     rbtdb->least_serial = version->serial;
 2306     *cleanup_list = version->changed_list;
 2307     ISC_LIST_INIT(version->changed_list);
 2308 }
 2309 
 2310 static inline void
 2311 cleanup_nondirty(rbtdb_version_t *version, rbtdb_changedlist_t *cleanup_list) {
 2312     rbtdb_changed_t *changed, *next_changed;
 2313 
 2314     /*
 2315      * If the changed record is dirty, then
 2316      * an update created multiple versions of
 2317      * a given rdataset.  We keep this list
 2318      * until we're the least open version, at
 2319      * which point it's safe to get rid of any
 2320      * older versions.
 2321      *
 2322      * If the changed record isn't dirty, then
 2323      * we don't need it anymore since we're
 2324      * committing and not rolling back.
 2325      *
 2326      * The caller must be holding the database lock.
 2327      */
 2328     for (changed = HEAD(version->changed_list); changed != NULL;
 2329          changed = next_changed)
 2330     {
 2331         next_changed = NEXT(changed, link);
 2332         if (!changed->dirty) {
 2333             UNLINK(version->changed_list, changed, link);
 2334             APPEND(*cleanup_list, changed, link);
 2335         }
 2336     }
 2337 }
 2338 
 2339 static void
 2340 iszonesecure(dns_db_t *db, rbtdb_version_t *version, dns_dbnode_t *origin) {
 2341     dns_rdataset_t keyset;
 2342     dns_rdataset_t nsecset, signsecset;
 2343     bool haszonekey = false;
 2344     bool hasnsec = false;
 2345     isc_result_t result;
 2346 
 2347     dns_rdataset_init(&keyset);
 2348     result = dns_db_findrdataset(db, origin, version, dns_rdatatype_dnskey,
 2349                      0, 0, &keyset, NULL);
 2350     if (result == ISC_R_SUCCESS) {
 2351         result = dns_rdataset_first(&keyset);
 2352         while (result == ISC_R_SUCCESS) {
 2353             dns_rdata_t keyrdata = DNS_RDATA_INIT;
 2354             dns_rdataset_current(&keyset, &keyrdata);
 2355             if (dns_zonekey_iszonekey(&keyrdata)) {
 2356                 haszonekey = true;
 2357                 break;
 2358             }
 2359             result = dns_rdataset_next(&keyset);
 2360         }
 2361         dns_rdataset_disassociate(&keyset);
 2362     }
 2363     if (!haszonekey) {
 2364         version->secure = dns_db_insecure;
 2365         version->havensec3 = false;
 2366         return;
 2367     }
 2368 
 2369     dns_rdataset_init(&nsecset);
 2370     dns_rdataset_init(&signsecset);
 2371     result = dns_db_findrdataset(db, origin, version, dns_rdatatype_nsec, 0,
 2372                      0, &nsecset, &signsecset);
 2373     if (result == ISC_R_SUCCESS) {
 2374         if (dns_rdataset_isassociated(&signsecset)) {
 2375             hasnsec = true;
 2376             dns_rdataset_disassociate(&signsecset);
 2377         }
 2378         dns_rdataset_disassociate(&nsecset);
 2379     }
 2380 
 2381     setnsec3parameters(db, version);
 2382 
 2383     /*
 2384      * Do we have a valid NSEC/NSEC3 chain?
 2385      */
 2386     if (version->havensec3 || hasnsec) {
 2387         version->secure = dns_db_secure;
 2388     } else {
 2389         version->secure = dns_db_insecure;
 2390     }
 2391 }
 2392 
 2393 /*%<
 2394  * Walk the origin node looking for NSEC3PARAM records.
 2395  * Cache the nsec3 parameters.
 2396  */
 2397 static void
 2398 setnsec3parameters(dns_db_t *db, rbtdb_version_t *version) {
 2399     dns_rbtnode_t *node;
 2400     dns_rdata_nsec3param_t nsec3param;
 2401     dns_rdata_t rdata = DNS_RDATA_INIT;
 2402     isc_region_t region;
 2403     isc_result_t result;
 2404     rdatasetheader_t *header, *header_next;
 2405     unsigned char *raw; /* RDATASLAB */
 2406     unsigned int count, length;
 2407     dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
 2408 
 2409     RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
 2410     version->havensec3 = false;
 2411     node = rbtdb->origin_node;
 2412     NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
 2413           isc_rwlocktype_read);
 2414     for (header = node->data; header != NULL; header = header_next) {
 2415         header_next = header->next;
 2416         do {
 2417             if (header->serial <= version->serial &&
 2418                 !IGNORE(header)) {
 2419                 if (NONEXISTENT(header)) {
 2420                     header = NULL;
 2421                 }
 2422                 break;
 2423             } else {
 2424                 header = header->down;
 2425             }
 2426         } while (header != NULL);
 2427 
 2428         if (header != NULL &&
 2429             (header->type == dns_rdatatype_nsec3param)) {
 2430             /*
 2431              * Find A NSEC3PARAM with a supported algorithm.
 2432              */
 2433             raw = (unsigned char *)header + sizeof(*header);
 2434             count = raw[0] * 256 + raw[1]; /* count */
 2435             raw += DNS_RDATASET_COUNT + DNS_RDATASET_LENGTH;
 2436             while (count-- > 0U) {
 2437                 length = raw[0] * 256 + raw[1];
 2438                 raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH;
 2439                 region.base = raw;
 2440                 region.length = length;
 2441                 raw += length;
 2442                 dns_rdata_fromregion(
 2443                     &rdata, rbtdb->common.rdclass,
 2444                     dns_rdatatype_nsec3param, &region);
 2445                 result = dns_rdata_tostruct(&rdata, &nsec3param,
 2446                                 NULL);
 2447                 INSIST(result == ISC_R_SUCCESS);
 2448                 dns_rdata_reset(&rdata);
 2449 
 2450                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG &&
 2451                     !dns_nsec3_supportedhash(nsec3param.hash))
 2452                 {
 2453                     continue;
 2454                 }
 2455 
 2456                 if (nsec3param.flags != 0) {
 2457                     continue;
 2458                 }
 2459 
 2460                 memmove(version->salt, nsec3param.salt,
 2461                     nsec3param.salt_length);
 2462                 version->hash = nsec3param.hash;
 2463                 version->salt_length = nsec3param.salt_length;
 2464                 version->iterations = nsec3param.iterations;
 2465                 version->flags = nsec3param.flags;
 2466                 version->havensec3 = true;
 2467                 /*
 2468                  * Look for a better algorithm than the
 2469                  * unknown test algorithm.
 2470                  */
 2471                 if (nsec3param.hash != DNS_NSEC3_UNKNOWNALG) {
 2472                     goto unlock;
 2473                 }
 2474             }
 2475         }
 2476     }
 2477 unlock:
 2478     NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
 2479             isc_rwlocktype_read);
 2480     RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_read);
 2481 }
 2482 
 2483 static void
 2484 cleanup_dead_nodes_callback(isc_task_t *task, isc_event_t *event) {
 2485     dns_rbtdb_t *rbtdb = event->ev_arg;
 2486     bool again = false;
 2487     unsigned int locknum;
 2488 
 2489     RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
 2490     for (locknum = 0; locknum < rbtdb->node_lock_count; locknum++) {
 2491         NODE_LOCK(&rbtdb->node_locks[locknum].lock,
 2492               isc_rwlocktype_write);
 2493         cleanup_dead_nodes(rbtdb, locknum);
 2494         if (ISC_LIST_HEAD(rbtdb->deadnodes[locknum]) != NULL) {
 2495             again = true;
 2496         }
 2497         NODE_UNLOCK(&rbtdb->node_locks[locknum].lock,
 2498                 isc_rwlocktype_write);
 2499     }
 2500     RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
 2501     if (again) {
 2502         isc_task_send(task, &event);
 2503     } else {
 2504         isc_event_free(&event);
 2505         if (isc_refcount_decrement(&rbtdb->references) == 1) {
 2506             (void)isc_refcount_current(&rbtdb->references);
 2507             maybe_free_rbtdb(rbtdb);
 2508         }
 2509     }
 2510 }
 2511 
 2512 static void
 2513 closeversion(dns_db_t *db, dns_dbversion_t **versionp, bool commit) {
 2514     dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
 2515     rbtdb_version_t *version, *cleanup_version, *least_greater;
 2516     bool rollback = false;
 2517     rbtdb_changedlist_t cleanup_list;
 2518     rdatasetheaderlist_t resigned_list;
 2519     rbtdb_changed_t *changed, *next_changed;
 2520     rbtdb_serial_t serial, least_serial;
 2521     dns_rbtnode_t *rbtnode;
 2522     rdatasetheader_t *header;
 2523 
 2524     REQUIRE(VALID_RBTDB(rbtdb));
 2525     version = (rbtdb_version_t *)*versionp;
 2526     INSIST(version->rbtdb == rbtdb);
 2527 
 2528     cleanup_version = NULL;
 2529     ISC_LIST_INIT(cleanup_list);
 2530     ISC_LIST_INIT(resigned_list);
 2531 
 2532     if (isc_refcount_decrement(&version->references) > 1) {
 2533         /* typical and easy case first */
 2534         if (commit) {
 2535             RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
 2536             INSIST(!version->writer);
 2537             RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
 2538         }
 2539         goto end;
 2540     }
 2541 
 2542     /*
 2543      * Update the zone's secure status in version before making
 2544      * it the current version.
 2545      */
 2546     if (version->writer && commit && !IS_CACHE(rbtdb)) {
 2547         iszonesecure(db, version, rbtdb->origin_node);
 2548     }
 2549 
 2550     RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
 2551     serial = version->serial;
 2552     if (version->writer) {
 2553         if (commit) {
 2554             unsigned cur_ref;
 2555             rbtdb_version_t *cur_version;
 2556 
 2557             INSIST(version->commit_ok);
 2558             INSIST(version == rbtdb->future_version);
 2559             /*
 2560              * The current version is going to be replaced.
 2561              * Release the (likely last) reference to it from the
 2562              * DB itself and unlink it from the open list.
 2563              */
 2564             cur_version = rbtdb->current_version;
 2565             cur_ref = isc_refcount_decrement(
 2566                 &cur_version->references);
 2567             if (cur_ref == 1) {
 2568                 (void)isc_refcount_current(
 2569                     &cur_version->references);
 2570                 if (cur_version->serial == rbtdb->least_serial)
 2571                 {
 2572                     INSIST(EMPTY(
 2573                         cur_version->changed_list));
 2574                 }
 2575                 UNLINK(rbtdb->open_versions, cur_version, link);
 2576             }
 2577             if (EMPTY(rbtdb->open_versions)) {
 2578                 /*
 2579                  * We're going to become the least open
 2580                  * version.
 2581                  */
 2582                 make_least_version(rbtdb, version,
 2583                            &cleanup_list);
 2584             } else {
 2585                 /*
 2586                  * Some other open version is the
 2587                  * least version.  We can't cleanup
 2588                  * records that were changed in this
 2589                  * version because the older versions
 2590                  * may still be in use by an open
 2591                  * version.
 2592                  *
 2593                  * We can, however, discard the
 2594                  * changed records for things that
 2595                  * we've added that didn't exist in
 2596                  * prior versions.
 2597                  */
 2598                 cleanup_nondirty(version, &cleanup_list);
 2599             }
 2600             /*
 2601              * If the (soon to be former) current version
 2602              * isn't being used by anyone, we can clean
 2603              * it up.
 2604              */
 2605             if (cur_ref == 1) {
 2606                 cleanup_version = cur_version;
 2607                 APPENDLIST(version->changed_list,
 2608                        cleanup_version->changed_list, link);
 2609             }
 2610             /*
 2611              * Become the current version.
 2612              */
 2613             version->writer = false;
 2614             rbtdb->current_version = version;
 2615             rbtdb->current_serial = version->serial;
 2616             rbtdb->future_version = NULL;
 2617 
 2618             /*
 2619              * Keep the current version in the open list, and
 2620              * gain a reference for the DB itself (see the DB
 2621              * creation function below).  This must be the only
 2622              * case where we need to increment the counter from
 2623              * zero and need to use isc_refcount_increment0().
 2624              */
 2625             INSIST(isc_refcount_increment0(&version->references) ==
 2626                    0);
 2627             PREPEND(rbtdb->open_versions, rbtdb->current_version,
 2628                 link);
 2629             resigned_list = version->resigned_list;
 2630             ISC_LIST_INIT(version->resigned_list);
 2631         } else {
 2632             /*
 2633              * We're rolling back this transaction.
 2634              */
 2635             cleanup_list = version->changed_list;
 2636             ISC_LIST_INIT(version->changed_list);
 2637             resigned_list = version->resigned_list;
 2638             ISC_LIST_INIT(version->resigned_list);
 2639             rollback = true;
 2640             cleanup_version = version;
 2641             rbtdb->future_version = NULL;
 2642         }
 2643     } else {
 2644         if (version != rbtdb->current_version) {
 2645             /*
 2646              * There are no external or internal references
 2647              * to this version and it can be cleaned up.
 2648              */
 2649             cleanup_version = version;
 2650 
 2651             /*
 2652              * Find the version with the least serial
 2653              * number greater than ours.
 2654              */
 2655             least_greater = PREV(version, link);
 2656             if (least_greater == NULL) {
 2657                 least_greater = rbtdb->current_version;
 2658             }
 2659 
 2660             INSIST(version->serial < least_greater->serial);
 2661             /*
 2662              * Is this the least open version?
 2663              */
 2664             if (version->serial == rbtdb->least_serial) {
 2665                 /*
 2666                  * Yes.  Install the new least open
 2667                  * version.
 2668                  */
 2669                 make_least_version(rbtdb, least_greater,
 2670                            &cleanup_list);
 2671             } else {
 2672                 /*
 2673                  * Add any unexecuted cleanups to
 2674                  * those of the least greater version.
 2675                  */
 2676                 APPENDLIST(least_greater->changed_list,
 2677                        version->changed_list, link);
 2678             }
 2679         } else if (version->serial == rbtdb->least_serial) {
 2680             INSIST(EMPTY(version->changed_list));
 2681         }
 2682         UNLINK(rbtdb->open_versions, version, link);
 2683     }
 2684     least_serial = rbtdb->least_serial;
 2685     RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
 2686 
 2687     if (cleanup_version != NULL) {
 2688         INSIST(EMPTY(cleanup_version->changed_list));
 2689         free_gluetable(cleanup_version);
 2690         isc_rwlock_destroy(&cleanup_version->glue_rwlock);
 2691         isc_rwlock_destroy(&cleanup_version->rwlock);
 2692         isc_mem_put(rbtdb->common.mctx, cleanup_version,
 2693                 sizeof(*cleanup_version));
 2694     }
 2695 
 2696     /*
 2697      * Commit/rollback re-signed headers.
 2698      */
 2699     for (header = HEAD(resigned_list); header != NULL;
 2700          header = HEAD(resigned_list)) {
 2701         nodelock_t *lock;
 2702 
 2703         ISC_LIST_UNLINK(resigned_list, header, link);
 2704 
 2705         lock = &rbtdb->node_locks[header->node->locknum].lock;
 2706         NODE_LOCK(lock, isc_rwlocktype_write);
 2707         if (rollback && !IGNORE(header)) {
 2708             isc_result_t result;
 2709             result = resign_insert(rbtdb, header->node->locknum,
 2710                            header);
 2711             if (result != ISC_R_SUCCESS) {
 2712                 isc_log_write(dns_lctx,
 2713                           DNS_LOGCATEGORY_DATABASE,
 2714                           DNS_LOGMODULE_ZONE, ISC_LOG_ERROR,
 2715                           "Unable to reinsert header to "
 2716                           "re-signing heap: %s",
 2717                           dns_result_totext(result));
 2718             }
 2719         }
 2720         decrement_reference(rbtdb, header->node, least_serial,
 2721                     isc_rwlocktype_write, isc_rwlocktype_none,
 2722                     false);
 2723         NODE_UNLOCK(lock, isc_rwlocktype_write);
 2724     }
 2725 
 2726     if (!EMPTY(cleanup_list)) {
 2727         isc_event_t *event = NULL;
 2728         isc_rwlocktype_t tlock = isc_rwlocktype_none;
 2729 
 2730         if (rbtdb->task != NULL) {
 2731             event = isc_event_allocate(rbtdb->common.mctx, NULL,
 2732                            DNS_EVENT_RBTDEADNODES,
 2733                            cleanup_dead_nodes_callback,
 2734                            rbtdb, sizeof(isc_event_t));
 2735         }
 2736         if (event == NULL) {
 2737             /*
 2738              * We acquire a tree write lock here in order to make
 2739              * sure that stale nodes will be removed in
 2740              * decrement_reference().  If we didn't have the lock,
 2741              * those nodes could miss the chance to be removed
 2742              * until the server stops.  The write lock is
 2743              * expensive, but this event should be rare enough
 2744              * to justify the cost.
 2745              */
 2746             RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
 2747             tlock = isc_rwlocktype_write;
 2748         }
 2749 
 2750         for (changed = HEAD(cleanup_list); changed != NULL;
 2751              changed = next_changed) {
 2752             nodelock_t *lock;
 2753 
 2754             next_changed = NEXT(changed, link);
 2755             rbtnode = changed->node;
 2756             lock = &rbtdb->node_locks[rbtnode->locknum].lock;
 2757 
 2758             NODE_LOCK(lock, isc_rwlocktype_write);
 2759             /*
 2760              * This is a good opportunity to purge any dead nodes,
 2761              * so use it.
 2762              */
 2763             if (event == NULL) {
 2764                 cleanup_dead_nodes(rbtdb, rbtnode->locknum);
 2765             }
 2766 
 2767             if (rollback) {
 2768                 rollback_node(rbtnode, serial);
 2769             }
 2770             decrement_reference(rbtdb, rbtnode, least_serial,
 2771                         isc_rwlocktype_write, tlock, false);
 2772 
 2773             NODE_UNLOCK(lock, isc_rwlocktype_write);
 2774 
 2775             isc_mem_put(rbtdb->common.mctx, changed,
 2776                     sizeof(*changed));
 2777         }
 2778         if (event != NULL) {
 2779             isc_refcount_increment(&rbtdb->references);
 2780             isc_task_send(rbtdb->task, &event);
 2781         } else {
 2782             RWUNLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
 2783         }
 2784     }
 2785 
 2786 end:
 2787     *versionp = NULL;
 2788 }
 2789 
 2790 /*
 2791  * Add the necessary magic for the wildcard name 'name'
 2792  * to be found in 'rbtdb'.
 2793  *
 2794  * In order for wildcard matching to work correctly in
 2795  * zone_find(), we must ensure that a node for the wildcarding
 2796  * level exists in the database, and has its 'find_callback'
 2797  * and 'wild' bits set.
 2798  *
 2799  * E.g. if the wildcard name is "*.sub.example." then we
 2800  * must ensure that "sub.example." exists and is marked as
 2801  * a wildcard level.
 2802  *
 2803  * tree_lock(write) must be held.
 2804  */
 2805 static isc_result_t
 2806 add_wildcard_magic(dns_rbtdb_t *rbtdb, const dns_name_t *name) {
 2807     isc_result_t result;
 2808     dns_name_t foundname;
 2809     dns_offsets_t offsets;
 2810     unsigned int n;
 2811     dns_rbtnode_t *node = NULL;
 2812 
 2813     dns_name_init(&foundname, offsets);
 2814     n = dns_name_countlabels(name);
 2815     INSIST(n >= 2);
 2816     n--;
 2817     dns_name_getlabelsequence(name, 1, n, &foundname);
 2818     result = dns_rbt_addnode(rbtdb->tree, &foundname, &node);
 2819     if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) {
 2820         return (result);
 2821     }
 2822     if (result == ISC_R_SUCCESS) {
 2823         node->nsec = DNS_RBT_NSEC_NORMAL;
 2824     }
 2825     node->find_callback = 1;
 2826     node->wild = 1;
 2827     return (ISC_R_SUCCESS);
 2828 }
 2829 
 2830 /*
 2831  * tree_lock(write) must be held.
 2832  */
 2833 static isc_result_t
 2834 add_empty_wildcards(dns_rbtdb_t *rbtdb, const dns_name_t *name) {
 2835     isc_result_t result;
 2836     dns_name_t foundname;
 2837     dns_offsets_t offsets;
 2838     unsigned int n, l, i;
 2839 
 2840     dns_name_init(&foundname, offsets);
 2841     n = dns_name_countlabels(name);
 2842     l = dns_name_countlabels(&rbtdb->common.origin);
 2843     i = l + 1;
 2844     while (i < n) {
 2845         dns_rbtnode_t *node = NULL; /* dummy */
 2846         dns_name_getlabelsequence(name, n - i, i, &foundname);
 2847         if (dns_name_iswildcard(&foundname)) {
 2848             result = add_wildcard_magic(rbtdb, &foundname);
 2849             if (result != ISC_R_SUCCESS) {
 2850                 return (result);
 2851             }
 2852             result = dns_rbt_addnode(rbtdb->tree, &foundname,
 2853                          &node);
 2854             if (result != ISC_R_SUCCESS && result != ISC_R_EXISTS) {
 2855                 return (result);
 2856             }
 2857             if (result == ISC_R_SUCCESS) {
 2858                 node->nsec = DNS_RBT_NSEC_NORMAL;
 2859             }
 2860         }
 2861         i++;
 2862     }
 2863     return (ISC_R_SUCCESS);
 2864 }
 2865 
 2866 static isc_result_t
 2867 findnodeintree(dns_rbtdb_t *rbtdb, dns_rbt_t *tree, const dns_name_t *name,
 2868            bool create, dns_dbnode_t **nodep) {
 2869     dns_rbtnode_t *node = NULL;
 2870     dns_name_t nodename;
 2871     isc_result_t result;
 2872     isc_rwlocktype_t locktype = isc_rwlocktype_read;
 2873 
 2874     INSIST(tree == rbtdb->tree || tree == rbtdb->nsec3);
 2875 
 2876     dns_name_init(&nodename, NULL);
 2877     RWLOCK(&rbtdb->tree_lock, locktype);
 2878     result = dns_rbt_findnode(tree, name, NULL, &node, NULL,
 2879                   DNS_RBTFIND_EMPTYDATA, NULL, NULL);
 2880     if (result != ISC_R_SUCCESS) {
 2881         RWUNLOCK(&rbtdb->tree_lock, locktype);
 2882         if (!create) {
 2883             if (result == DNS_R_PARTIALMATCH) {
 2884                 result = ISC_R_NOTFOUND;
 2885             }
 2886             return (result);
 2887         }
 2888         /*
 2889          * It would be nice to try to upgrade the lock instead of
 2890          * unlocking then relocking.
 2891          */
 2892         locktype = isc_rwlocktype_write;
 2893         RWLOCK(&rbtdb->tree_lock, locktype);
 2894         node = NULL;
 2895         result = dns_rbt_addnode(tree, name, &node);
 2896         if (result == ISC_R_SUCCESS) {
 2897             dns_rbt_namefromnode(node, &nodename);
 2898             node->locknum = node->hashval % rbtdb->node_lock_count;
 2899             if (tree == rbtdb->tree) {
 2900                 add_empty_wildcards(rbtdb, name);
 2901 
 2902                 if (dns_name_iswildcard(name)) {
 2903                     result = add_wildcard_magic(rbtdb,
 2904                                     name);
 2905                     if (result != ISC_R_SUCCESS) {
 2906                         RWUNLOCK(&rbtdb->tree_lock,
 2907                              locktype);
 2908                         return (result);
 2909                     }
 2910                 }
 2911             }
 2912             if (tree == rbtdb->nsec3) {
 2913                 node->nsec = DNS_RBT_NSEC_NSEC3;
 2914             }
 2915         } else if (result != ISC_R_EXISTS) {
 2916             RWUNLOCK(&rbtdb->tree_lock, locktype);
 2917             return (result);
 2918         }
 2919     }
 2920 
 2921     if (tree == rbtdb->nsec3) {
 2922         INSIST(node->nsec == DNS_RBT_NSEC_NSEC3);
 2923     }
 2924 
 2925     reactivate_node(rbtdb, node, locktype);
 2926 
 2927     RWUNLOCK(&rbtdb->tree_lock, locktype);
 2928 
 2929     *nodep = (dns_dbnode_t *)node;
 2930 
 2931     return (ISC_R_SUCCESS);
 2932 }
 2933 
 2934 static isc_result_t
 2935 findnode(dns_db_t *db, const dns_name_t *name, bool create,
 2936      dns_dbnode_t **nodep) {
 2937     dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
 2938 
 2939     REQUIRE(VALID_RBTDB(rbtdb));
 2940 
 2941     return (findnodeintree(rbtdb, rbtdb->tree, name, create, nodep));
 2942 }
 2943 
 2944 static isc_result_t
 2945 findnsec3node(dns_db_t *db, const dns_name_t *name, bool create,
 2946           dns_dbnode_t **nodep) {
 2947     dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
 2948 
 2949     REQUIRE(VALID_RBTDB(rbtdb));
 2950 
 2951     return (findnodeintree(rbtdb, rbtdb->nsec3, name, create, nodep));
 2952 }
 2953 
 2954 static isc_result_t
 2955 zone_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
 2956     rbtdb_search_t *search = arg;
 2957     rdatasetheader_t *header, *header_next;
 2958     rdatasetheader_t *dname_header, *sigdname_header, *ns_header;
 2959     rdatasetheader_t *found;
 2960     isc_result_t result;
 2961     dns_rbtnode_t *onode;
 2962 
 2963     /*
 2964      * We only want to remember the topmost zone cut, since it's the one
 2965      * that counts, so we'll just continue if we've already found a
 2966      * zonecut.
 2967      */
 2968     if (search->zonecut != NULL) {
 2969         return (DNS_R_CONTINUE);
 2970     }
 2971 
 2972     found = NULL;
 2973     result = DNS_R_CONTINUE;
 2974     onode = search->rbtdb->origin_node;
 2975 
 2976     NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
 2977           isc_rwlocktype_read);
 2978 
 2979     /*
 2980      * Look for an NS or DNAME rdataset active in our version.
 2981      */
 2982     ns_header = NULL;
 2983     dname_header = NULL;
 2984     sigdname_header = NULL;
 2985     for (header = node->data; header != NULL; header = header_next) {
 2986         header_next = header->next;
 2987         if (header->type == dns_rdatatype_ns ||
 2988             header->type == dns_rdatatype_dname ||
 2989             header->type == RBTDB_RDATATYPE_SIGDNAME)
 2990         {
 2991             do {
 2992                 if (header->serial <= search->serial &&
 2993                     !IGNORE(header)) {
 2994                     /*
 2995                      * Is this a "this rdataset doesn't
 2996                      * exist" record?
 2997                      */
 2998                     if (NONEXISTENT(header)) {
 2999                         header = NULL;
 3000                     }
 3001                     break;
 3002                 } else {
 3003                     header = header->down;
 3004                 }
 3005             } while (header != NULL);
 3006             if (header != NULL) {
 3007                 if (header->type == dns_rdatatype_dname) {
 3008                     dname_header = header;
 3009                 } else if (header->type ==
 3010                        RBTDB_RDATATYPE_SIGDNAME) {
 3011                     sigdname_header = header;
 3012                 } else if (node != onode ||
 3013                        IS_STUB(search->rbtdb)) {
 3014                     /*
 3015                      * We've found an NS rdataset that
 3016                      * isn't at the origin node.  We check
 3017                      * that they're not at the origin node,
 3018                      * because otherwise we'd erroneously
 3019                      * treat the zone top as if it were
 3020                      * a delegation.
 3021                      */
 3022                     ns_header = header;
 3023                 }
 3024             }
 3025         }
 3026     }
 3027 
 3028     /*
 3029      * Did we find anything?
 3030      */
 3031     if (!IS_CACHE(search->rbtdb) && !IS_STUB(search->rbtdb) &&
 3032         ns_header != NULL) {
 3033         /*
 3034          * Note that NS has precedence over DNAME if both exist
 3035          * in a zone.  Otherwise DNAME take precedence over NS.
 3036          */
 3037         found = ns_header;
 3038         search->zonecut_sigrdataset = NULL;
 3039     } else if (dname_header != NULL) {
 3040         found = dname_header;
 3041         search->zonecut_sigrdataset = sigdname_header;
 3042     } else if (ns_header != NULL) {
 3043         found = ns_header;
 3044         search->zonecut_sigrdataset = NULL;
 3045     }
 3046 
 3047     if (found != NULL) {
 3048         /*
 3049          * We increment the reference count on node to ensure that
 3050          * search->zonecut_rdataset will still be valid later.
 3051          */
 3052         new_reference(search->rbtdb, node, isc_rwlocktype_read);
 3053         search->zonecut = node;
 3054         search->zonecut_rdataset = found;
 3055         search->need_cleanup = true;
 3056         /*
 3057          * Since we've found a zonecut, anything beneath it is
 3058          * glue and is not subject to wildcard matching, so we
 3059          * may clear search->wild.
 3060          */
 3061         search->wild = false;
 3062         if ((search->options & DNS_DBFIND_GLUEOK) == 0) {
 3063             /*
 3064              * If the caller does not want to find glue, then
 3065              * this is the best answer and the search should
 3066              * stop now.
 3067              */
 3068             result = DNS_R_PARTIALMATCH;
 3069         } else {
 3070             dns_name_t *zcname;
 3071 
 3072             /*
 3073              * The search will continue beneath the zone cut.
 3074              * This may or may not be the best match.  In case it
 3075              * is, we need to remember the node name.
 3076              */
 3077             zcname = dns_fixedname_name(&search->zonecut_name);
 3078             dns_name_copynf(name, zcname);
 3079             search->copy_name = true;
 3080         }
 3081     } else {
 3082         /*
 3083          * There is no zonecut at this node which is active in this
 3084          * version.
 3085          *
 3086          * If this is a "wild" node and the caller hasn't disabled
 3087          * wildcard matching, remember that we've seen a wild node
 3088          * in case we need to go searching for wildcard matches
 3089          * later on.
 3090          */
 3091         if (node->wild && (search->options & DNS_DBFIND_NOWILD) == 0) {
 3092             search->wild = true;
 3093         }
 3094     }
 3095 
 3096     NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
 3097             isc_rwlocktype_read);
 3098 
 3099     return (result);
 3100 }
 3101 
 3102 static inline void
 3103 bind_rdataset(dns_rbtdb_t *rbtdb, dns_rbtnode_t *node, rdatasetheader_t *header,
 3104           isc_stdtime_t now, isc_rwlocktype_t locktype,
 3105           dns_rdataset_t *rdataset) {
 3106     unsigned char *raw; /* RDATASLAB */
 3107 
 3108     /*
 3109      * Caller must be holding the node reader lock.
 3110      * XXXJT: technically, we need a writer lock, since we'll increment
 3111      * the header count below.  However, since the actual counter value
 3112      * doesn't matter, we prioritize performance here.  (We may want to
 3113      * use atomic increment when available).
 3114      */
 3115 
 3116     if (rdataset == NULL) {
 3117         return;
 3118     }
 3119 
 3120     new_reference(rbtdb, node, locktype);
 3121 
 3122     INSIST(rdataset->methods == NULL); /* We must be disassociated. */
 3123 
 3124     rdataset->methods = &rdataset_methods;
 3125     rdataset->rdclass = rbtdb->common.rdclass;
 3126     rdataset->type = RBTDB_RDATATYPE_BASE(header->type);
 3127     rdataset->covers = RBTDB_RDATATYPE_EXT(header->type);
 3128     rdataset->ttl = header->rdh_ttl - now;
 3129     rdataset->trust = header->trust;
 3130     if (NEGATIVE(header)) {
 3131         rdataset->attributes |= DNS_RDATASETATTR_NEGATIVE;
 3132     }
 3133     if (NXDOMAIN(header)) {
 3134         rdataset->attributes |= DNS_RDATASETATTR_NXDOMAIN;
 3135     }
 3136     if (OPTOUT(header)) {
 3137         rdataset->attributes |= DNS_RDATASETATTR_OPTOUT;
 3138     }
 3139     if (PREFETCH(header)) {
 3140         rdataset->attributes |= DNS_RDATASETATTR_PREFETCH;
 3141     }
 3142     if (STALE(header)) {
 3143         rdataset->attributes |= DNS_RDATASETATTR_STALE;
 3144         rdataset->stale_ttl =
 3145             (rbtdb->serve_stale_ttl + header->rdh_ttl) - now;
 3146         rdataset->ttl = 0;
 3147     }
 3148     rdataset->private1 = rbtdb;
 3149     rdataset->private2 = node;
 3150     raw = (unsigned char *)header + sizeof(*header);
 3151     rdataset->private3 = raw;
 3152     rdataset->count = atomic_fetch_add_relaxed(&header->count, 1);
 3153     if (rdataset->count == UINT32_MAX) {
 3154         rdataset->count = 0;
 3155     }
 3156 
 3157     /*
 3158      * Reset iterator state.
 3159      */
 3160     rdataset->privateuint4 = 0;
 3161     rdataset->private5 = NULL;
 3162 
 3163     /*
 3164      * Add noqname proof.
 3165      */
 3166     rdataset->private6 = header->noqname;
 3167     if (rdataset->private6 != NULL) {
 3168         rdataset->attributes |= DNS_RDATASETATTR_NOQNAME;
 3169     }
 3170     rdataset->private7 = header->closest;
 3171     if (rdataset->private7 != NULL) {
 3172         rdataset->attributes |= DNS_RDATASETATTR_CLOSEST;
 3173     }
 3174 
 3175     /*
 3176      * Copy out re-signing information.
 3177      */
 3178     if (RESIGN(header)) {
 3179         rdataset->attributes |= DNS_RDATASETATTR_RESIGN;
 3180         rdataset->resign = (header->resign << 1) | header->resign_lsb;
 3181     } else {
 3182         rdataset->resign = 0;
 3183     }
 3184 }
 3185 
 3186 static inline isc_result_t
 3187 setup_delegation(rbtdb_search_t *search, dns_dbnode_t **nodep,
 3188          dns_name_t *foundname, dns_rdataset_t *rdataset,
 3189          dns_rdataset_t *sigrdataset) {
 3190     dns_name_t *zcname;
 3191     rbtdb_rdatatype_t type;
 3192     dns_rbtnode_t *node;
 3193 
 3194     /*
 3195      * The caller MUST NOT be holding any node locks.
 3196      */
 3197 
 3198     node = search->zonecut;
 3199     type = search->zonecut_rdataset->type;
 3200 
 3201     /*
 3202      * If we have to set foundname, we do it before anything else.
 3203      * If we were to set foundname after we had set nodep or bound the
 3204      * rdataset, then we'd have to undo that work if dns_name_copy()
 3205      * failed.  By setting foundname first, there's nothing to undo if
 3206      * we have trouble.
 3207      */
 3208     if (foundname != NULL && search->copy_name) {
 3209         zcname = dns_fixedname_name(&search->zonecut_name);
 3210         dns_name_copynf(zcname, foundname);
 3211     }
 3212     if (nodep != NULL) {
 3213         /*
 3214          * Note that we don't have to increment the node's reference
 3215          * count here because we're going to use the reference we
 3216          * already have in the search block.
 3217          */
 3218         *nodep = node;
 3219         search->need_cleanup = false;
 3220     }
 3221     if (rdataset != NULL) {
 3222         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
 3223               isc_rwlocktype_read);
 3224         bind_rdataset(search->rbtdb, node, search->zonecut_rdataset,
 3225                   search->now, isc_rwlocktype_read, rdataset);
 3226         if (sigrdataset != NULL && search->zonecut_sigrdataset != NULL)
 3227         {
 3228             bind_rdataset(search->rbtdb, node,
 3229                       search->zonecut_sigrdataset, search->now,
 3230                       isc_rwlocktype_read, sigrdataset);
 3231         }
 3232         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
 3233                 isc_rwlocktype_read);
 3234     }
 3235 
 3236     if (type == dns_rdatatype_dname) {
 3237         return (DNS_R_DNAME);
 3238     }
 3239     return (DNS_R_DELEGATION);
 3240 }
 3241 
 3242 static inline bool
 3243 valid_glue(rbtdb_search_t *search, dns_name_t *name, rbtdb_rdatatype_t type,
 3244        dns_rbtnode_t *node) {
 3245     unsigned char *raw; /* RDATASLAB */
 3246     unsigned int count, size;
 3247     dns_name_t ns_name;
 3248     bool valid = false;
 3249     dns_offsets_t offsets;
 3250     isc_region_t region;
 3251     rdatasetheader_t *header;
 3252 
 3253     /*
 3254      * No additional locking is required.
 3255      */
 3256 
 3257     /*
 3258      * Valid glue types are A, AAAA, A6.  NS is also a valid glue type
 3259      * if it occurs at a zone cut, but is not valid below it.
 3260      */
 3261     if (type == dns_rdatatype_ns) {
 3262         if (node != search->zonecut) {
 3263             return (false);
 3264         }
 3265     } else if (type != dns_rdatatype_a && type != dns_rdatatype_aaaa &&
 3266            type != dns_rdatatype_a6)
 3267     {
 3268         return (false);
 3269     }
 3270 
 3271     header = search->zonecut_rdataset;
 3272     raw = (unsigned char *)header + sizeof(*header);
 3273     count = raw[0] * 256 + raw[1];
 3274     raw += DNS_RDATASET_COUNT + DNS_RDATASET_LENGTH;
 3275 
 3276     while (count > 0) {
 3277         count--;
 3278         size = raw[0] * 256 + raw[1];
 3279         raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH;
 3280         region.base = raw;
 3281         region.length = size;
 3282         raw += size;
 3283         /*
 3284          * XXX Until we have rdata structures, we have no choice but
 3285          * to directly access the rdata format.
 3286          */
 3287         dns_name_init(&ns_name, offsets);
 3288         dns_name_fromregion(&ns_name, &region);
 3289         if (dns_name_compare(&ns_name, name) == 0) {
 3290             valid = true;
 3291             break;
 3292         }
 3293     }
 3294 
 3295     return (valid);
 3296 }
 3297 
 3298 static inline bool
 3299 activeempty(rbtdb_search_t *search, dns_rbtnodechain_t *chain,
 3300         const dns_name_t *name) {
 3301     dns_fixedname_t fnext;
 3302     dns_fixedname_t forigin;
 3303     dns_name_t *next;
 3304     dns_name_t *origin;
 3305     dns_name_t prefix;
 3306     dns_rbtdb_t *rbtdb;
 3307     dns_rbtnode_t *node;
 3308     isc_result_t result;
 3309     bool answer = false;
 3310     rdatasetheader_t *header;
 3311 
 3312     rbtdb = search->rbtdb;
 3313 
 3314     dns_name_init(&prefix, NULL);
 3315     next = dns_fixedname_initname(&fnext);
 3316     origin = dns_fixedname_initname(&forigin);
 3317 
 3318     result = dns_rbtnodechain_next(chain, NULL, NULL);
 3319     while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
 3320         node = NULL;
 3321         result = dns_rbtnodechain_current(chain, &prefix, origin,
 3322                           &node);
 3323         if (result != ISC_R_SUCCESS) {
 3324             break;
 3325         }
 3326         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
 3327               isc_rwlocktype_read);
 3328         for (header = node->data; header != NULL; header = header->next)
 3329         {
 3330             if (header->serial <= search->serial &&
 3331                 !IGNORE(header) && EXISTS(header)) {
 3332                 break;
 3333             }
 3334         }
 3335         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
 3336                 isc_rwlocktype_read);
 3337         if (header != NULL) {
 3338             break;
 3339         }
 3340         result = dns_rbtnodechain_next(chain, NULL, NULL);
 3341     }
 3342     if (result == ISC_R_SUCCESS) {
 3343         result = dns_name_concatenate(&prefix, origin, next, NULL);
 3344     }
 3345     if (result == ISC_R_SUCCESS && dns_name_issubdomain(next, name)) {
 3346         answer = true;
 3347     }
 3348     return (answer);
 3349 }
 3350 
 3351 static inline bool
 3352 activeemptynode(rbtdb_search_t *search, const dns_name_t *qname,
 3353         dns_name_t *wname) {
 3354     dns_fixedname_t fnext;
 3355     dns_fixedname_t forigin;
 3356     dns_fixedname_t fprev;
 3357     dns_name_t *next;
 3358     dns_name_t *origin;
 3359     dns_name_t *prev;
 3360     dns_name_t name;
 3361     dns_name_t rname;
 3362     dns_name_t tname;
 3363     dns_rbtdb_t *rbtdb;
 3364     dns_rbtnode_t *node;
 3365     dns_rbtnodechain_t chain;
 3366     bool check_next = true;
 3367     bool check_prev = true;
 3368     bool answer = false;
 3369     isc_result_t result;
 3370     rdatasetheader_t *header;
 3371     unsigned int n;
 3372 
 3373     rbtdb = search->rbtdb;
 3374 
 3375     dns_name_init(&name, NULL);
 3376     dns_name_init(&tname, NULL);
 3377     dns_name_init(&rname, NULL);
 3378     next = dns_fixedname_initname(&fnext);
 3379     prev = dns_fixedname_initname(&fprev);
 3380     origin = dns_fixedname_initname(&forigin);
 3381 
 3382     /*
 3383      * Find if qname is at or below a empty node.
 3384      * Use our own copy of the chain.
 3385      */
 3386 
 3387     chain = search->chain;
 3388     do {
 3389         node = NULL;
 3390         result = dns_rbtnodechain_current(&chain, &name, origin, &node);
 3391         if (result != ISC_R_SUCCESS) {
 3392             break;
 3393         }
 3394         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
 3395               isc_rwlocktype_read);
 3396         for (header = node->data; header != NULL; header = header->next)
 3397         {
 3398             if (header->serial <= search->serial &&
 3399                 !IGNORE(header) && EXISTS(header)) {
 3400                 break;
 3401             }
 3402         }
 3403         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
 3404                 isc_rwlocktype_read);
 3405         if (header != NULL) {
 3406             break;
 3407         }
 3408         result = dns_rbtnodechain_prev(&chain, NULL, NULL);
 3409     } while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN);
 3410     if (result == ISC_R_SUCCESS) {
 3411         result = dns_name_concatenate(&name, origin, prev, NULL);
 3412     }
 3413     if (result != ISC_R_SUCCESS) {
 3414         check_prev = false;
 3415     }
 3416 
 3417     result = dns_rbtnodechain_next(&chain, NULL, NULL);
 3418     while (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
 3419         node = NULL;
 3420         result = dns_rbtnodechain_current(&chain, &name, origin, &node);
 3421         if (result != ISC_R_SUCCESS) {
 3422             break;
 3423         }
 3424         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
 3425               isc_rwlocktype_read);
 3426         for (header = node->data; header != NULL; header = header->next)
 3427         {
 3428             if (header->serial <= search->serial &&
 3429                 !IGNORE(header) && EXISTS(header)) {
 3430                 break;
 3431             }
 3432         }
 3433         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
 3434                 isc_rwlocktype_read);
 3435         if (header != NULL) {
 3436             break;
 3437         }
 3438         result = dns_rbtnodechain_next(&chain, NULL, NULL);
 3439     }
 3440     if (result == ISC_R_SUCCESS) {
 3441         result = dns_name_concatenate(&name, origin, next, NULL);
 3442     }
 3443     if (result != ISC_R_SUCCESS) {
 3444         check_next = false;
 3445     }
 3446 
 3447     dns_name_clone(qname, &rname);
 3448 
 3449     /*
 3450      * Remove the wildcard label to find the terminal name.
 3451      */
 3452     n = dns_name_countlabels(wname);
 3453     dns_name_getlabelsequence(wname, 1, n - 1, &tname);
 3454 
 3455     do {
 3456         if ((check_prev && dns_name_issubdomain(prev, &rname)) ||
 3457             (check_next && dns_name_issubdomain(next, &rname)))
 3458         {
 3459             answer = true;
 3460             break;
 3461         }
 3462         /*
 3463          * Remove the left hand label.
 3464          */
 3465         n = dns_name_countlabels(&rname);
 3466         dns_name_getlabelsequence(&rname, 1, n - 1, &rname);
 3467     } while (!dns_name_equal(&rname, &tname));
 3468     return (answer);
 3469 }
 3470 
 3471 static inline isc_result_t
 3472 find_wildcard(rbtdb_search_t *search, dns_rbtnode_t **nodep,
 3473           const dns_name_t *qname) {
 3474     unsigned int i, j;
 3475     dns_rbtnode_t *node, *level_node, *wnode;
 3476     rdatasetheader_t *header;
 3477     isc_result_t result = ISC_R_NOTFOUND;
 3478     dns_name_t name;
 3479     dns_name_t *wname;
 3480     dns_fixedname_t fwname;
 3481     dns_rbtdb_t *rbtdb;
 3482     bool done, wild, active;
 3483     dns_rbtnodechain_t wchain;
 3484 
 3485     /*
 3486      * Caller must be holding the tree lock and MUST NOT be holding
 3487      * any node locks.
 3488      */
 3489 
 3490     /*
 3491      * Examine each ancestor level.  If the level's wild bit
 3492      * is set, then construct the corresponding wildcard name and
 3493      * search for it.  If the wildcard node exists, and is active in
 3494      * this version, we're done.  If not, then we next check to see
 3495      * if the ancestor is active in this version.  If so, then there
 3496      * can be no possible wildcard match and again we're done.  If not,
 3497      * continue the search.
 3498      */
 3499 
 3500     rbtdb = search->rbtdb;
 3501     i = search->chain.level_matches;
 3502     done = false;
 3503     node = *nodep;
 3504     do {
 3505         NODE_LOCK(&(rbtdb->node_locks[node->locknum].lock),
 3506               isc_rwlocktype_read);
 3507 
 3508         /*
 3509          * First we try to figure out if this node is active in
 3510          * the search's version.  We do this now, even though we
 3511          * may not need the information, because it simplifies the
 3512          * locking and code flow.
 3513          */
 3514         for (header = node->data; header != NULL; header = header->next)
 3515         {
 3516             if (header->serial <= search->serial &&
 3517                 !IGNORE(header) && EXISTS(header)) {
 3518                 break;
 3519             }
 3520         }
 3521         if (header != NULL) {
 3522             active = true;
 3523         } else {
 3524             active = false;
 3525         }
 3526 
 3527         if (node->wild) {
 3528             wild = true;
 3529         } else {
 3530             wild = false;
 3531         }
 3532 
 3533         NODE_UNLOCK(&(rbtdb->node_locks[node->locknum].lock),
 3534                 isc_rwlocktype_read);
 3535 
 3536         if (wild) {
 3537             /*
 3538              * Construct the wildcard name for this level.
 3539              */
 3540             dns_name_init(&name, NULL);
 3541             dns_rbt_namefromnode(node, &name);
 3542             wname = dns_fixedname_initname(&fwname);
 3543             result = dns_name_concatenate(dns_wildcardname, &name,
 3544                               wname, NULL);
 3545             j = i;
 3546             while (result == ISC_R_SUCCESS && j != 0) {
 3547                 j--;
 3548                 level_node = search->chain.levels[j];
 3549                 dns_name_init(&name, NULL);
 3550                 dns_rbt_namefromnode(level_node, &name);
 3551                 result = dns_name_concatenate(wname, &name,
 3552                                   wname, NULL);
 3553             }
 3554             if (result != ISC_R_SUCCESS) {
 3555                 break;
 3556             }
 3557 
 3558             wnode = NULL;
 3559             dns_rbtnodechain_init(&wchain);
 3560             result = dns_rbt_findnode(
 3561                 rbtdb->tree, wname, NULL, &wnode, &wchain,
 3562                 DNS_RBTFIND_EMPTYDATA, NULL, NULL);
 3563             if (result == ISC_R_SUCCESS) {
 3564                 nodelock_t *lock;
 3565 
 3566                 /*
 3567                  * We have found the wildcard node.  If it
 3568                  * is active in the search's version, we're
 3569                  * done.
 3570                  */
 3571                 lock = &rbtdb->node_locks[wnode->locknum].lock;
 3572                 NODE_LOCK(lock, isc_rwlocktype_read);
 3573                 for (header = wnode->data; header != NULL;
 3574                      header = header->next) {
 3575                     if (header->serial <= search->serial &&
 3576                         !IGNORE(header) && EXISTS(header)) {
 3577                         break;
 3578                     }
 3579                 }
 3580                 NODE_UNLOCK(lock, isc_rwlocktype_read);
 3581                 if (header != NULL ||
 3582                     activeempty(search, &wchain, wname)) {
 3583                     if (activeemptynode(search, qname,
 3584                                 wname)) {
 3585                         return (ISC_R_NOTFOUND);
 3586                     }
 3587                     /*
 3588                      * The wildcard node is active!
 3589                      *
 3590                      * Note: result is still ISC_R_SUCCESS
 3591                      * so we don't have to set it.
 3592                      */
 3593                     *nodep = wnode;
 3594                     break;
 3595                 }
 3596             } else if (result != ISC_R_NOTFOUND &&
 3597                    result != DNS_R_PARTIALMATCH) {
 3598                 /*
 3599                  * An error has occurred.  Bail out.
 3600                  */
 3601                 break;
 3602             }
 3603         }
 3604 
 3605         if (active) {
 3606             /*
 3607              * The level node is active.  Any wildcarding
 3608              * present at higher levels has no
 3609              * effect and we're done.
 3610              */
 3611             result = ISC_R_NOTFOUND;
 3612             break;
 3613         }
 3614 
 3615         if (i > 0) {
 3616             i--;
 3617             node = search->chain.levels[i];
 3618         } else {
 3619             done = true;
 3620         }
 3621     } while (!done);
 3622 
 3623     return (result);
 3624 }
 3625 
 3626 static bool
 3627 matchparams(rdatasetheader_t *header, rbtdb_search_t *search) {
 3628     dns_rdata_t rdata = DNS_RDATA_INIT;
 3629     dns_rdata_nsec3_t nsec3;
 3630     unsigned char *raw; /* RDATASLAB */
 3631     unsigned int rdlen, count;
 3632     isc_region_t region;
 3633     isc_result_t result;
 3634 
 3635     REQUIRE(header->type == dns_rdatatype_nsec3);
 3636 
 3637     raw = (unsigned char *)header + sizeof(*header);
 3638     count = raw[0] * 256 + raw[1]; /* count */
 3639     raw += DNS_RDATASET_COUNT + DNS_RDATASET_LENGTH;
 3640 
 3641     while (count-- > 0) {
 3642         rdlen = raw[0] * 256 + raw[1];
 3643         raw += DNS_RDATASET_ORDER + DNS_RDATASET_LENGTH;
 3644         region.base = raw;
 3645         region.length = rdlen;
 3646         dns_rdata_fromregion(&rdata, search->rbtdb->common.rdclass,
 3647                      dns_rdatatype_nsec3, &region);
 3648         raw += rdlen;
 3649         result = dns_rdata_tostruct(&rdata, &nsec3, NULL);
 3650         INSIST(result == ISC_R_SUCCESS);
 3651         if (nsec3.hash == search->rbtversion->hash &&
 3652             nsec3.iterations == search->rbtversion->iterations &&
 3653             nsec3.salt_length == search->rbtversion->salt_length &&
 3654             memcmp(nsec3.salt, search->rbtversion->salt,
 3655                nsec3.salt_length) == 0)
 3656         {
 3657             return (true);
 3658         }
 3659         dns_rdata_reset(&rdata);
 3660     }
 3661     return (false);
 3662 }
 3663 
 3664 /*
 3665  * Find node of the NSEC/NSEC3 record that is 'name'.
 3666  */
 3667 static inline isc_result_t
 3668 previous_closest_nsec(dns_rdatatype_t type, rbtdb_search_t *search,
 3669               dns_name_t *name, dns_name_t *origin,
 3670               dns_rbtnode_t **nodep, dns_rbtnodechain_t *nsecchain,
 3671               bool *firstp) {
 3672     dns_fixedname_t ftarget;
 3673     dns_name_t *target;
 3674     dns_rbtnode_t *nsecnode;
 3675     isc_result_t result;
 3676 
 3677     REQUIRE(nodep != NULL && *nodep == NULL);
 3678 
 3679     if (type == dns_rdatatype_nsec3) {
 3680         result = dns_rbtnodechain_prev(&search->chain, NULL, NULL);
 3681         if (result != ISC_R_SUCCESS && result != DNS_R_NEWORIGIN) {
 3682             return (result);
 3683         }
 3684         result = dns_rbtnodechain_current(&search->chain, name, origin,
 3685                           nodep);
 3686         return (result);
 3687     }
 3688 
 3689     target = dns_fixedname_initname(&ftarget);
 3690 
 3691     for (;;) {
 3692         if (*firstp) {
 3693             /*
 3694              * Construct the name of the second node to check.
 3695              * It is the first node sought in the NSEC tree.
 3696              */
 3697             *firstp = false;
 3698             dns_rbtnodechain_init(nsecchain);
 3699             result = dns_name_concatenate(name, origin, target,
 3700                               NULL);
 3701             if (result != ISC_R_SUCCESS) {
 3702                 return (result);
 3703             }
 3704             nsecnode = NULL;
 3705             result = dns_rbt_findnode(
 3706                 search->rbtdb->nsec, target, NULL, &nsecnode,
 3707                 nsecchain, DNS_RBTFIND_NOOPTIONS, NULL, NULL);
 3708             if (result == ISC_R_SUCCESS) {
 3709                 /*
 3710                  * Since this was the first loop, finding the
 3711                  * name in the NSEC tree implies that the first
 3712                  * node checked in the main tree had an
 3713                  * unacceptable NSEC record.
 3714                  * Try the previous node in the NSEC tree.
 3715                  */
 3716                 result = dns_rbtnodechain_prev(nsecchain, name,
 3717                                    origin);
 3718                 if (result == DNS_R_NEWORIGIN) {
 3719                     result = ISC_R_SUCCESS;
 3720                 }
 3721             } else if (result == ISC_R_NOTFOUND ||
 3722                    result == DNS_R_PARTIALMATCH) {
 3723                 result = dns_rbtnodechain_current(
 3724                     nsecchain, name, origin, NULL);
 3725                 if (result == ISC_R_NOTFOUND) {
 3726                     result = ISC_R_NOMORE;
 3727                 }
 3728             }
 3729         } else {
 3730             /*
 3731              * This is a second or later trip through the auxiliary
 3732              * tree for the name of a third or earlier NSEC node in
 3733              * the main tree.  Previous trips through the NSEC tree
 3734              * must have found nodes in the main tree with NSEC
 3735              * records.  Perhaps they lacked signature records.
 3736              */
 3737             result = dns_rbtnodechain_prev(nsecchain, name, origin);
 3738             if (result == DNS_R_NEWORIGIN) {
 3739                 result = ISC_R_SUCCESS;
 3740             }
 3741         }
 3742         if (result != ISC_R_SUCCESS) {
 3743             return (result);
 3744         }
 3745 
 3746         /*
 3747          * Construct the name to seek in the main tree.
 3748          */
 3749         result = dns_name_concatenate(name, origin, target, NULL);
 3750         if (result != ISC_R_SUCCESS) {
 3751             return (result);
 3752         }
 3753 
 3754         *nodep = NULL;
 3755         result = dns_rbt_findnode(search->rbtdb->tree, target, NULL,
 3756                       nodep, &search->chain,
 3757                       DNS_RBTFIND_NOOPTIONS, NULL, NULL);
 3758         if (result == ISC_R_SUCCESS) {
 3759             return (result);
 3760         }
 3761 
 3762         /*
 3763          * There should always be a node in the main tree with the
 3764          * same name as the node in the auxiliary NSEC tree, except for
 3765          * nodes in the auxiliary tree that are awaiting deletion.
 3766          */
 3767         if (result != DNS_R_PARTIALMATCH && result != ISC_R_NOTFOUND) {
 3768             isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
 3769                       DNS_LOGMODULE_CACHE, ISC_LOG_ERROR,
 3770                       "previous_closest_nsec(): %s",
 3771                       isc_result_totext(result));
 3772             return (DNS_R_BADDB);
 3773         }
 3774     }
 3775 }
 3776 
 3777 /*
 3778  * Find the NSEC/NSEC3 which is or before the current point on the
 3779  * search chain.  For NSEC3 records only NSEC3 records that match the
 3780  * current NSEC3PARAM record are considered.
 3781  */
 3782 static inline isc_result_t
 3783 find_closest_nsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
 3784           dns_name_t *foundname, dns_rdataset_t *rdataset,
 3785           dns_rdataset_t *sigrdataset, dns_rbt_t *tree,
 3786           dns_db_secure_t secure) {
 3787     dns_rbtnode_t *node, *prevnode;
 3788     rdatasetheader_t *header, *header_next, *found, *foundsig;
 3789     dns_rbtnodechain_t nsecchain;
 3790     bool empty_node;
 3791     isc_result_t result;
 3792     dns_fixedname_t fname, forigin;
 3793     dns_name_t *name, *origin;
 3794     dns_rdatatype_t type;
 3795     rbtdb_rdatatype_t sigtype;
 3796     bool wraps;
 3797     bool first = true;
 3798     bool need_sig = (secure == dns_db_secure);
 3799 
 3800     if (tree == search->rbtdb->nsec3) {
 3801         type = dns_rdatatype_nsec3;
 3802         sigtype = RBTDB_RDATATYPE_SIGNSEC3;
 3803         wraps = true;
 3804     } else {
 3805         type = dns_rdatatype_nsec;
 3806         sigtype = RBTDB_RDATATYPE_SIGNSEC;
 3807         wraps = false;
 3808     }
 3809 
 3810     /*
 3811      * Use the auxiliary tree only starting with the second node in the
 3812      * hope that the original node will be right much of the time.
 3813      */
 3814     name = dns_fixedname_initname(&fname);
 3815     origin = dns_fixedname_initname(&forigin);
 3816 again:
 3817     node = NULL;
 3818     prevnode = NULL;
 3819     result = dns_rbtnodechain_current(&search->chain, name, origin, &node);
 3820     if (result != ISC_R_SUCCESS) {
 3821         return (result);
 3822     }
 3823     do {
 3824         NODE_LOCK(&(search->rbtdb->node_locks[node->locknum].lock),
 3825               isc_rwlocktype_read);
 3826         found = NULL;
 3827         foundsig = NULL;
 3828         empty_node = true;
 3829         for (header = node->data; header != NULL; header = header_next)
 3830         {
 3831             header_next = header->next;
 3832             /*
 3833              * Look for an active, extant NSEC or RRSIG NSEC.
 3834              */
 3835             do {
 3836                 if (header->serial <= search->serial &&
 3837                     !IGNORE(header)) {
 3838                     /*
 3839                      * Is this a "this rdataset doesn't
 3840                      * exist" record?
 3841                      */
 3842                     if (NONEXISTENT(header)) {
 3843                         header = NULL;
 3844                     }
 3845                     break;
 3846                 } else {
 3847                     header = header->down;
 3848                 }
 3849             } while (header != NULL);
 3850             if (header != NULL) {
 3851                 /*
 3852                  * We now know that there is at least one
 3853                  * active rdataset at this node.
 3854                  */
 3855                 empty_node = false;
 3856                 if (header->type == type) {
 3857                     found = header;
 3858                     if (foundsig != NULL) {
 3859                         break;
 3860                     }
 3861                 } else if (header->type == sigtype) {
 3862                     foundsig = header;
 3863                     if (found != NULL) {
 3864                         break;
 3865                     }
 3866                 }
 3867             }
 3868         }
 3869         if (!empty_node) {
 3870             if (found != NULL && search->rbtversion->havensec3 &&
 3871                 found->type == dns_rdatatype_nsec3 &&
 3872                 !matchparams(found, search))
 3873             {
 3874                 empty_node = true;
 3875                 found = NULL;
 3876                 foundsig = NULL;
 3877                 result = previous_closest_nsec(
 3878                     type, search, name, origin, &prevnode,
 3879                     NULL, NULL);
 3880             } else if (found != NULL &&
 3881                    (foundsig != NULL || !need_sig)) {
 3882                 /*
 3883                  * We've found the right NSEC/NSEC3 record.
 3884                  *
 3885                  * Note: for this to really be the right
 3886                  * NSEC record, it's essential that the NSEC
 3887                  * records of any nodes obscured by a zone
 3888                  * cut have been removed; we assume this is
 3889                  * the case.
 3890                  */
 3891                 result = dns_name_concatenate(name, origin,
 3892                                   foundname, NULL);
 3893                 if (result == ISC_R_SUCCESS) {
 3894                     if (nodep != NULL) {
 3895                         new_reference(
 3896                             search->rbtdb, node,
 3897                             isc_rwlocktype_read);
 3898                         *nodep = node;
 3899                     }
 3900                     bind_rdataset(search->rbtdb, node,
 3901                               found, search->now,
 3902                               isc_rwlocktype_read,
 3903                               rdataset);
 3904                     if (foundsig != NULL) {
 3905                         bind_rdataset(
 3906                             search->rbtdb, node,
 3907                             foundsig, search->now,
 3908                             isc_rwlocktype_read,
 3909                             sigrdataset);
 3910                     }
 3911                 }
 3912             } else if (found == NULL && foundsig == NULL) {
 3913                 /*
 3914                  * This node is active, but has no NSEC or
 3915                  * RRSIG NSEC.  That means it's glue or
 3916                  * other obscured zone data that isn't
 3917                  * relevant for our search.  Treat the
 3918                  * node as if it were empty and keep looking.
 3919                  */
 3920                 empty_node = true;
 3921                 result = previous_closest_nsec(
 3922                     type, search, name, origin, &prevnode,
 3923                     &nsecchain, &first);
 3924             } else {
 3925                 /*
 3926                  * We found an active node, but either the
 3927                  * NSEC or the RRSIG NSEC is missing.  This
 3928                  * shouldn't happen.
 3929                  */
 3930                 result = DNS_R_BADDB;
 3931             }
 3932         } else {
 3933             /*
 3934              * This node isn't active.  We've got to keep
 3935              * looking.
 3936              */
 3937             result = previous_closest_nsec(type, search, name,
 3938                                origin, &prevnode,
 3939                                &nsecchain, &first);
 3940         }
 3941         NODE_UNLOCK(&(search->rbtdb->node_locks[node->locknum].lock),
 3942                 isc_rwlocktype_read);
 3943         node = prevnode;
 3944         prevnode = NULL;
 3945     } while (empty_node && result == ISC_R_SUCCESS);
 3946 
 3947     if (!first) {
 3948         dns_rbtnodechain_invalidate(&nsecchain);
 3949     }
 3950 
 3951     if (result == ISC_R_NOMORE && wraps) {
 3952         result = dns_rbtnodechain_last(&search->chain, tree, NULL,
 3953                            NULL);
 3954         if (result == ISC_R_SUCCESS || result == DNS_R_NEWORIGIN) {
 3955             wraps = false;
 3956             goto again;
 3957         }
 3958     }
 3959 
 3960     /*
 3961      * If the result is ISC_R_NOMORE, then we got to the beginning of
 3962      * the database and didn't find a NSEC record.  This shouldn't
 3963      * happen.
 3964      */
 3965     if (result == ISC_R_NOMORE) {
 3966         result = DNS_R_BADDB;
 3967     }
 3968 
 3969     return (result);
 3970 }
 3971 
 3972 static isc_result_t
 3973 zone_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version,
 3974       dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
 3975       dns_dbnode_t **nodep, dns_name_t *foundname, dns_rdataset_t *rdataset,
 3976       dns_rdataset_t *sigrdataset) {
 3977     dns_rbtnode_t *node = NULL;
 3978     isc_result_t result;
 3979     rbtdb_search_t search;
 3980     bool cname_ok = true;
 3981     bool close_version = false;
 3982     bool maybe_zonecut = false;
 3983     bool at_zonecut = false;
 3984     bool wild;
 3985     bool empty_node;
 3986     rdatasetheader_t *header, *header_next, *found, *nsecheader;
 3987     rdatasetheader_t *foundsig, *cnamesig, *nsecsig;
 3988     rbtdb_rdatatype_t sigtype;
 3989     bool active;
 3990     nodelock_t *lock;
 3991     dns_rbt_t *tree;
 3992 
 3993     search.rbtdb = (dns_rbtdb_t *)db;
 3994 
 3995     REQUIRE(VALID_RBTDB(search.rbtdb));
 3996     INSIST(version == NULL ||
 3997            ((rbtdb_version_t *)version)->rbtdb == (dns_rbtdb_t *)db);
 3998 
 3999     /*
 4000      * We don't care about 'now'.
 4001      */
 4002     UNUSED(now);
 4003 
 4004     /*
 4005      * If the caller didn't supply a version, attach to the current
 4006      * version.
 4007      */
 4008     if (version == NULL) {
 4009         currentversion(db, &version);
 4010         close_version = true;
 4011     }
 4012 
 4013     search.rbtversion = version;
 4014     search.serial = search.rbtversion->serial;
 4015     search.options = options;
 4016     search.copy_name = false;
 4017     search.need_cleanup = false;
 4018     search.wild = false;
 4019     search.zonecut = NULL;
 4020     dns_fixedname_init(&search.zonecut_name);
 4021     dns_rbtnodechain_init(&search.chain);
 4022     search.now = 0;
 4023 
 4024     /*
 4025      * 'wild' will be true iff. we've matched a wildcard.
 4026      */
 4027     wild = false;
 4028 
 4029     RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
 4030 
 4031     /*
 4032      * Search down from the root of the tree.  If, while going down, we
 4033      * encounter a callback node, zone_zonecut_callback() will search the
 4034      * rdatasets at the zone cut for active DNAME or NS rdatasets.
 4035      */
 4036     tree = (options & DNS_DBFIND_FORCENSEC3) != 0 ? search.rbtdb->nsec3
 4037                               : search.rbtdb->tree;
 4038     result = dns_rbt_findnode(tree, name, foundname, &node, &search.chain,
 4039                   DNS_RBTFIND_EMPTYDATA, zone_zonecut_callback,
 4040                   &search);
 4041 
 4042     if (result == DNS_R_PARTIALMATCH) {
 4043     partial_match:
 4044         if (search.zonecut != NULL) {
 4045             result = setup_delegation(&search, nodep, foundname,
 4046                           rdataset, sigrdataset);
 4047             goto tree_exit;
 4048         }
 4049 
 4050         if (search.wild) {
 4051             /*
 4052              * At least one of the levels in the search chain
 4053              * potentially has a wildcard.  For each such level,
 4054              * we must see if there's a matching wildcard active
 4055              * in the current version.
 4056              */
 4057             result = find_wildcard(&search, &node, name);
 4058             if (result == ISC_R_SUCCESS) {
 4059                 dns_name_copynf(name, foundname);
 4060                 wild = true;
 4061                 goto found;
 4062             } else if (result != ISC_R_NOTFOUND) {
 4063                 goto tree_exit;
 4064             }
 4065         }
 4066 
 4067         active = false;
 4068         if ((options & DNS_DBFIND_FORCENSEC3) == 0) {
 4069             /*
 4070              * The NSEC3 tree won't have empty nodes,
 4071              * so it isn't necessary to check for them.
 4072              */
 4073             dns_rbtnodechain_t chain = search.chain;
 4074             active = activeempty(&search, &chain, name);
 4075         }
 4076 
 4077         /*
 4078          * If we're here, then the name does not exist, is not
 4079          * beneath a zonecut, and there's no matching wildcard.
 4080          */
 4081         if ((search.rbtversion->secure == dns_db_secure &&
 4082              !search.rbtversion->havensec3) ||
 4083             (search.options & DNS_DBFIND_FORCENSEC) != 0 ||
 4084             (search.options & DNS_DBFIND_FORCENSEC3) != 0)
 4085         {
 4086             result = find_closest_nsec(&search, nodep, foundname,
 4087                            rdataset, sigrdataset, tree,
 4088                            search.rbtversion->secure);
 4089             if (result == ISC_R_SUCCESS) {
 4090                 result = active ? DNS_R_EMPTYNAME
 4091                         : DNS_R_NXDOMAIN;
 4092             }
 4093         } else {
 4094             result = active ? DNS_R_EMPTYNAME : DNS_R_NXDOMAIN;
 4095         }
 4096         goto tree_exit;
 4097     } else if (result != ISC_R_SUCCESS) {
 4098         goto tree_exit;
 4099     }
 4100 
 4101 found:
 4102     /*
 4103      * We have found a node whose name is the desired name, or we
 4104      * have matched a wildcard.
 4105      */
 4106 
 4107     if (search.zonecut != NULL) {
 4108         /*
 4109          * If we're beneath a zone cut, we don't want to look for
 4110          * CNAMEs because they're not legitimate zone glue.
 4111          */
 4112         cname_ok = false;
 4113     } else {
 4114         /*
 4115          * The node may be a zone cut itself.  If it might be one,
 4116          * make sure we check for it later.
 4117          *
 4118          * DS records live above the zone cut in ordinary zone so
 4119          * we want to ignore any referral.
 4120          *
 4121          * Stub zones don't have anything "above" the delegation so
 4122          * we always return a referral.
 4123          */
 4124         if (node->find_callback &&
 4125             ((node != search.rbtdb->origin_node &&
 4126               !dns_rdatatype_atparent(type)) ||
 4127              IS_STUB(search.rbtdb)))
 4128         {
 4129             maybe_zonecut = true;
 4130         }
 4131     }
 4132 
 4133     /*
 4134      * Certain DNSSEC types are not subject to CNAME matching
 4135      * (RFC4035, section 2.5 and RFC3007).
 4136      *
 4137      * We don't check for RRSIG, because we don't store RRSIG records
 4138      * directly.
 4139      */
 4140     if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) {
 4141         cname_ok = false;
 4142     }
 4143 
 4144     /*
 4145      * We now go looking for rdata...
 4146      */
 4147 
 4148     lock = &search.rbtdb->node_locks[node->locknum].lock;
 4149     NODE_LOCK(lock, isc_rwlocktype_read);
 4150 
 4151     found = NULL;
 4152     foundsig = NULL;
 4153     sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
 4154     nsecheader = NULL;
 4155     nsecsig = NULL;
 4156     cnamesig = NULL;
 4157     empty_node = true;
 4158     for (header = node->data; header != NULL; header = header_next) {
 4159         header_next = header->next;
 4160         /*
 4161          * Look for an active, extant rdataset.
 4162          */
 4163         do {
 4164             if (header->serial <= search.serial && !IGNORE(header))
 4165             {
 4166                 /*
 4167                  * Is this a "this rdataset doesn't
 4168                  * exist" record?
 4169                  */
 4170                 if (NONEXISTENT(header)) {
 4171                     header = NULL;
 4172                 }
 4173                 break;
 4174             } else {
 4175                 header = header->down;
 4176             }
 4177         } while (header != NULL);
 4178         if (header != NULL) {
 4179             /*
 4180              * We now know that there is at least one active
 4181              * rdataset at this node.
 4182              */
 4183             empty_node = false;
 4184 
 4185             /*
 4186              * Do special zone cut handling, if requested.
 4187              */
 4188             if (maybe_zonecut && header->type == dns_rdatatype_ns) {
 4189                 /*
 4190                  * We increment the reference count on node to
 4191                  * ensure that search->zonecut_rdataset will
 4192                  * still be valid later.
 4193                  */
 4194                 new_reference(search.rbtdb, node,
 4195                           isc_rwlocktype_read);
 4196                 search.zonecut = node;
 4197                 search.zonecut_rdataset = header;
 4198                 search.zonecut_sigrdataset = NULL;
 4199                 search.need_cleanup = true;
 4200                 maybe_zonecut = false;
 4201                 at_zonecut = true;
 4202                 /*
 4203                  * It is not clear if KEY should still be
 4204                  * allowed at the parent side of the zone
 4205                  * cut or not.  It is needed for RFC3007
 4206                  * validated updates.
 4207                  */
 4208                 if ((search.options & DNS_DBFIND_GLUEOK) == 0 &&
 4209                     type != dns_rdatatype_nsec &&
 4210                     type != dns_rdatatype_key)
 4211                 {
 4212                     /*
 4213                      * Glue is not OK, but any answer we
 4214                      * could return would be glue.  Return
 4215                      * the delegation.
 4216                      */
 4217                     found = NULL;
 4218                     break;
 4219                 }
 4220                 if (found != NULL && foundsig != NULL) {
 4221                     break;
 4222                 }
 4223             }
 4224 
 4225             /*
 4226              * If the NSEC3 record doesn't match the chain
 4227              * we are using behave as if it isn't here.
 4228              */
 4229             if (header->type == dns_rdatatype_nsec3 &&
 4230                 !matchparams(header, &search)) {
 4231                 NODE_UNLOCK(lock, isc_rwlocktype_read);
 4232                 goto partial_match;
 4233             }
 4234             /*
 4235              * If we found a type we were looking for,
 4236              * remember it.
 4237              */
 4238             if (header->type == type || type == dns_rdatatype_any ||
 4239                 (header->type == dns_rdatatype_cname && cname_ok))
 4240             {
 4241                 /*
 4242                  * We've found the answer!
 4243                  */
 4244                 found = header;
 4245                 if (header->type == dns_rdatatype_cname &&
 4246                     cname_ok) {
 4247                     /*
 4248                      * We may be finding a CNAME instead
 4249                      * of the desired type.
 4250                      *
 4251                      * If we've already got the CNAME RRSIG,
 4252                      * use it, otherwise change sigtype
 4253                      * so that we find it.
 4254                      */
 4255                     if (cnamesig != NULL) {
 4256                         foundsig = cnamesig;
 4257                     } else {
 4258                         sigtype =
 4259                             RBTDB_RDATATYPE_SIGCNAME;
 4260                     }
 4261                 }
 4262                 /*
 4263                  * If we've got all we need, end the search.
 4264                  */
 4265                 if (!maybe_zonecut && foundsig != NULL) {
 4266                     break;
 4267                 }
 4268             } else if (header->type == sigtype) {
 4269                 /*
 4270                  * We've found the RRSIG rdataset for our
 4271                  * target type.  Remember it.
 4272                  */
 4273                 foundsig = header;
 4274                 /*
 4275                  * If we've got all we need, end the search.
 4276                  */
 4277                 if (!maybe_zonecut && found != NULL) {
 4278                     break;
 4279                 }
 4280             } else if (header->type == dns_rdatatype_nsec &&
 4281                    !search.rbtversion->havensec3) {
 4282                 /*
 4283                  * Remember a NSEC rdataset even if we're
 4284                  * not specifically looking for it, because
 4285                  * we might need it later.
 4286                  */
 4287                 nsecheader = header;
 4288             } else if (header->type == RBTDB_RDATATYPE_SIGNSEC &&
 4289                    !search.rbtversion->havensec3)
 4290             {
 4291                 /*
 4292                  * If we need the NSEC rdataset, we'll also
 4293                  * need its signature.
 4294                  */
 4295                 nsecsig = header;
 4296             } else if (cname_ok &&
 4297                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
 4298                 /*
 4299                  * If we get a CNAME match, we'll also need
 4300                  * its signature.
 4301                  */
 4302                 cnamesig = header;
 4303             }
 4304         }
 4305     }
 4306 
 4307     if (empty_node) {
 4308         /*
 4309          * We have an exact match for the name, but there are no
 4310          * active rdatasets in the desired version.  That means that
 4311          * this node doesn't exist in the desired version, and that
 4312          * we really have a partial match.
 4313          */
 4314         if (!wild) {
 4315             NODE_UNLOCK(lock, isc_rwlocktype_read);
 4316             goto partial_match;
 4317         }
 4318     }
 4319 
 4320     /*
 4321      * If we didn't find what we were looking for...
 4322      */
 4323     if (found == NULL) {
 4324         if (search.zonecut != NULL) {
 4325             /*
 4326              * We were trying to find glue at a node beneath a
 4327              * zone cut, but didn't.
 4328              *
 4329              * Return the delegation.
 4330              */
 4331             NODE_UNLOCK(lock, isc_rwlocktype_read);
 4332             result = setup_delegation(&search, nodep, foundname,
 4333                           rdataset, sigrdataset);
 4334             goto tree_exit;
 4335         }
 4336         /*
 4337          * The desired type doesn't exist.
 4338          */
 4339         result = DNS_R_NXRRSET;
 4340         if (search.rbtversion->secure == dns_db_secure &&
 4341             !search.rbtversion->havensec3 &&
 4342             (nsecheader == NULL || nsecsig == NULL))
 4343         {
 4344             /*
 4345              * The zone is secure but there's no NSEC,
 4346              * or the NSEC has no signature!
 4347              */
 4348             if (!wild) {
 4349                 result = DNS_R_BADDB;
 4350                 goto node_exit;
 4351             }
 4352 
 4353             NODE_UNLOCK(lock, isc_rwlocktype_read);
 4354             result = find_closest_nsec(&search, nodep, foundname,
 4355                            rdataset, sigrdataset,
 4356                            search.rbtdb->tree,
 4357                            search.rbtversion->secure);
 4358             if (result == ISC_R_SUCCESS) {
 4359                 result = DNS_R_EMPTYWILD;
 4360             }
 4361             goto tree_exit;
 4362         }
 4363         if ((search.options & DNS_DBFIND_FORCENSEC) != 0 &&
 4364             nsecheader == NULL) {
 4365             /*
 4366              * There's no NSEC record, and we were told
 4367              * to find one.
 4368              */
 4369             result = DNS_R_BADDB;
 4370             goto node_exit;
 4371         }
 4372         if (nodep != NULL) {
 4373             new_reference(search.rbtdb, node, isc_rwlocktype_read);
 4374             *nodep = node;
 4375         }
 4376         if ((search.rbtversion->secure == dns_db_secure &&
 4377              !search.rbtversion->havensec3) ||
 4378             (search.options & DNS_DBFIND_FORCENSEC) != 0)
 4379         {
 4380             bind_rdataset(search.rbtdb, node, nsecheader, 0,
 4381                       isc_rwlocktype_read, rdataset);
 4382             if (nsecsig != NULL) {
 4383                 bind_rdataset(search.rbtdb, node, nsecsig, 0,
 4384                           isc_rwlocktype_read, sigrdataset);
 4385             }
 4386         }
 4387         if (wild) {
 4388             foundname->attributes |= DNS_NAMEATTR_WILDCARD;
 4389         }
 4390         goto node_exit;
 4391     }
 4392 
 4393     /*
 4394      * We found what we were looking for, or we found a CNAME.
 4395      */
 4396 
 4397     if (type != found->type && type != dns_rdatatype_any &&
 4398         found->type == dns_rdatatype_cname)
 4399     {
 4400         /*
 4401          * We weren't doing an ANY query and we found a CNAME instead
 4402          * of the type we were looking for, so we need to indicate
 4403          * that result to the caller.
 4404          */
 4405         result = DNS_R_CNAME;
 4406     } else if (search.zonecut != NULL) {
 4407         /*
 4408          * If we're beneath a zone cut, we must indicate that the
 4409          * result is glue, unless we're actually at the zone cut
 4410          * and the type is NSEC or KEY.
 4411          */
 4412         if (search.zonecut == node) {
 4413             /*
 4414              * It is not clear if KEY should still be
 4415              * allowed at the parent side of the zone
 4416              * cut or not.  It is needed for RFC3007
 4417              * validated updates.
 4418              */
 4419             if (type == dns_rdatatype_nsec ||
 4420                 type == dns_rdatatype_nsec3 ||
 4421                 type == dns_rdatatype_key)
 4422             {
 4423                 result = ISC_R_SUCCESS;
 4424             } else if (type == dns_rdatatype_any) {
 4425                 result = DNS_R_ZONECUT;
 4426             } else {
 4427                 result = DNS_R_GLUE;
 4428             }
 4429         } else {
 4430             result = DNS_R_GLUE;
 4431         }
 4432         /*
 4433          * We might have found data that isn't glue, but was occluded
 4434          * by a dynamic update.  If the caller cares about this, they
 4435          * will have told us to validate glue.
 4436          *
 4437          * XXX We should cache the glue validity state!
 4438          */
 4439         if (result == DNS_R_GLUE &&
 4440             (search.options & DNS_DBFIND_VALIDATEGLUE) != 0 &&
 4441             !valid_glue(&search, foundname, type, node))
 4442         {
 4443             NODE_UNLOCK(lock, isc_rwlocktype_read);
 4444             result = setup_delegation(&search, nodep, foundname,
 4445                           rdataset, sigrdataset);
 4446             goto tree_exit;
 4447         }
 4448     } else {
 4449         /*
 4450          * An ordinary successful query!
 4451          */
 4452         result = ISC_R_SUCCESS;
 4453     }
 4454 
 4455     if (nodep != NULL) {
 4456         if (!at_zonecut) {
 4457             new_reference(search.rbtdb, node, isc_rwlocktype_read);
 4458         } else {
 4459             search.need_cleanup = false;
 4460         }
 4461         *nodep = node;
 4462     }
 4463 
 4464     if (type != dns_rdatatype_any) {
 4465         bind_rdataset(search.rbtdb, node, found, 0, isc_rwlocktype_read,
 4466                   rdataset);
 4467         if (foundsig != NULL) {
 4468             bind_rdataset(search.rbtdb, node, foundsig, 0,
 4469                       isc_rwlocktype_read, sigrdataset);
 4470         }
 4471     }
 4472 
 4473     if (wild) {
 4474         foundname->attributes |= DNS_NAMEATTR_WILDCARD;
 4475     }
 4476 
 4477 node_exit:
 4478     NODE_UNLOCK(lock, isc_rwlocktype_read);
 4479 
 4480 tree_exit:
 4481     RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
 4482 
 4483     /*
 4484      * If we found a zonecut but aren't going to use it, we have to
 4485      * let go of it.
 4486      */
 4487     if (search.need_cleanup) {
 4488         node = search.zonecut;
 4489         INSIST(node != NULL);
 4490         lock = &(search.rbtdb->node_locks[node->locknum].lock);
 4491 
 4492         NODE_LOCK(lock, isc_rwlocktype_read);
 4493         decrement_reference(search.rbtdb, node, 0, isc_rwlocktype_read,
 4494                     isc_rwlocktype_none, false);
 4495         NODE_UNLOCK(lock, isc_rwlocktype_read);
 4496     }
 4497 
 4498     if (close_version) {
 4499         closeversion(db, &version, false);
 4500     }
 4501 
 4502     dns_rbtnodechain_reset(&search.chain);
 4503 
 4504     return (result);
 4505 }
 4506 
 4507 static isc_result_t
 4508 zone_findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options,
 4509          isc_stdtime_t now, dns_dbnode_t **nodep, dns_name_t *foundname,
 4510          dns_name_t *dcname, dns_rdataset_t *rdataset,
 4511          dns_rdataset_t *sigrdataset) {
 4512     UNUSED(db);
 4513     UNUSED(name);
 4514     UNUSED(options);
 4515     UNUSED(now);
 4516     UNUSED(nodep);
 4517     UNUSED(foundname);
 4518     UNUSED(dcname);
 4519     UNUSED(rdataset);
 4520     UNUSED(sigrdataset);
 4521 
 4522     FATAL_ERROR(__FILE__, __LINE__, "zone_findzonecut() called!");
 4523 
 4524     /* NOTREACHED */
 4525     return (ISC_R_NOTIMPLEMENTED);
 4526 }
 4527 
 4528 static bool
 4529 check_stale_header(dns_rbtnode_t *node, rdatasetheader_t *header,
 4530            isc_rwlocktype_t *locktype, nodelock_t *lock,
 4531            rbtdb_search_t *search, rdatasetheader_t **header_prev) {
 4532     if (!ACTIVE(header, search->now)) {
 4533         dns_ttl_t stale = header->rdh_ttl +
 4534                   search->rbtdb->serve_stale_ttl;
 4535         /*
 4536          * If this data is in the stale window keep it and if
 4537          * DNS_DBFIND_STALEOK is not set we tell the caller to
 4538          * skip this record.  We skip the records with ZEROTTL
 4539          * (these records should not be cached anyway).
 4540          */
 4541         if (!ZEROTTL(header) && KEEPSTALE(search->rbtdb) &&
 4542             stale > search->now) {
 4543             mark_header_stale(search->rbtdb, header);
 4544             *header_prev = header;
 4545             return ((search->options & DNS_DBFIND_STALEOK) == 0);
 4546         }
 4547 
 4548         /*
 4549          * This rdataset is stale.  If no one else is using the
 4550          * node, we can clean it up right now, otherwise we mark
 4551          * it as ancient, and the node as dirty, so it will get
 4552          * cleaned up later.
 4553          */
 4554         if ((header->rdh_ttl < search->now - RBTDB_VIRTUAL) &&
 4555             (*locktype == isc_rwlocktype_write ||
 4556              NODE_TRYUPGRADE(lock) == ISC_R_SUCCESS))
 4557         {
 4558             /*
 4559              * We update the node's status only when we can
 4560              * get write access; otherwise, we leave others
 4561              * to this work.  Periodical cleaning will
 4562              * eventually take the job as the last resort.
 4563              * We won't downgrade the lock, since other
 4564              * rdatasets are probably stale, too.
 4565              */
 4566             *locktype = isc_rwlocktype_write;
 4567 
 4568             if (isc_refcount_current(&node->references) == 0) {
 4569                 isc_mem_t *mctx;
 4570 
 4571                 /*
 4572                  * header->down can be non-NULL if the
 4573                  * refcount has just decremented to 0
 4574                  * but decrement_reference() has not
 4575                  * performed clean_cache_node(), in
 4576                  * which case we need to purge the stale
 4577                  * headers first.
 4578                  */
 4579                 mctx = search->rbtdb->common.mctx;
 4580                 clean_stale_headers(search->rbtdb, mctx,
 4581                             header);
 4582                 if (*header_prev != NULL) {
 4583                     (*header_prev)->next = header->next;
 4584                 } else {
 4585                     node->data = header->next;
 4586                 }
 4587                 free_rdataset(search->rbtdb, mctx, header);
 4588             } else {
 4589                 mark_header_ancient(search->rbtdb, header);
 4590                 *header_prev = header;
 4591             }
 4592         } else {
 4593             *header_prev = header;
 4594         }
 4595         return (true);
 4596     }
 4597     return (false);
 4598 }
 4599 
 4600 static isc_result_t
 4601 cache_zonecut_callback(dns_rbtnode_t *node, dns_name_t *name, void *arg) {
 4602     rbtdb_search_t *search = arg;
 4603     rdatasetheader_t *header, *header_prev, *header_next;
 4604     rdatasetheader_t *dname_header, *sigdname_header;
 4605     isc_result_t result;
 4606     nodelock_t *lock;
 4607     isc_rwlocktype_t locktype;
 4608 
 4609     /* XXX comment */
 4610 
 4611     REQUIRE(search->zonecut == NULL);
 4612 
 4613     /*
 4614      * Keep compiler silent.
 4615      */
 4616     UNUSED(name);
 4617 
 4618     lock = &(search->rbtdb->node_locks[node->locknum].lock);
 4619     locktype = isc_rwlocktype_read;
 4620     NODE_LOCK(lock, locktype);
 4621 
 4622     /*
 4623      * Look for a DNAME or RRSIG DNAME rdataset.
 4624      */
 4625     dname_header = NULL;
 4626     sigdname_header = NULL;
 4627     header_prev = NULL;
 4628     for (header = node->data; header != NULL; header = header_next) {
 4629         header_next = header->next;
 4630         if (check_stale_header(node, header, &locktype, lock, search,
 4631                        &header_prev)) {
 4632             /* Do nothing. */
 4633         } else if (header->type == dns_rdatatype_dname &&
 4634                EXISTS(header)) {
 4635             dname_header = header;
 4636             header_prev = header;
 4637         } else if (header->type == RBTDB_RDATATYPE_SIGDNAME &&
 4638                EXISTS(header)) {
 4639             sigdname_header = header;
 4640             header_prev = header;
 4641         } else {
 4642             header_prev = header;
 4643         }
 4644     }
 4645 
 4646     if (dname_header != NULL &&
 4647         (!DNS_TRUST_PENDING(dname_header->trust) ||
 4648          (search->options & DNS_DBFIND_PENDINGOK) != 0))
 4649     {
 4650         /*
 4651          * We increment the reference count on node to ensure that
 4652          * search->zonecut_rdataset will still be valid later.
 4653          */
 4654         new_reference(search->rbtdb, node, locktype);
 4655         search->zonecut = node;
 4656         search->zonecut_rdataset = dname_header;
 4657         search->zonecut_sigrdataset = sigdname_header;
 4658         search->need_cleanup = true;
 4659         result = DNS_R_PARTIALMATCH;
 4660     } else {
 4661         result = DNS_R_CONTINUE;
 4662     }
 4663 
 4664     NODE_UNLOCK(lock, locktype);
 4665 
 4666     return (result);
 4667 }
 4668 
 4669 static inline isc_result_t
 4670 find_deepest_zonecut(rbtdb_search_t *search, dns_rbtnode_t *node,
 4671              dns_dbnode_t **nodep, dns_name_t *foundname,
 4672              dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) {
 4673     unsigned int i;
 4674     dns_rbtnode_t *level_node;
 4675     rdatasetheader_t *header, *header_prev, *header_next;
 4676     rdatasetheader_t *found, *foundsig;
 4677     isc_result_t result = ISC_R_NOTFOUND;
 4678     dns_name_t name;
 4679     dns_rbtdb_t *rbtdb;
 4680     bool done;
 4681     nodelock_t *lock;
 4682     isc_rwlocktype_t locktype;
 4683 
 4684     /*
 4685      * Caller must be holding the tree lock.
 4686      */
 4687 
 4688     rbtdb = search->rbtdb;
 4689     i = search->chain.level_matches;
 4690     done = false;
 4691     do {
 4692         locktype = isc_rwlocktype_read;
 4693         lock = &rbtdb->node_locks[node->locknum].lock;
 4694         NODE_LOCK(lock, locktype);
 4695 
 4696         /*
 4697          * Look for NS and RRSIG NS rdatasets.
 4698          */
 4699         found = NULL;
 4700         foundsig = NULL;
 4701         header_prev = NULL;
 4702         for (header = node->data; header != NULL; header = header_next)
 4703         {
 4704             header_next = header->next;
 4705             if (check_stale_header(node, header, &locktype, lock,
 4706                            search, &header_prev)) {
 4707                 /* Do nothing. */
 4708             } else if (EXISTS(header)) {
 4709                 /*
 4710                  * We've found an extant rdataset.  See if
 4711                  * we're interested in it.
 4712                  */
 4713                 if (header->type == dns_rdatatype_ns) {
 4714                     found = header;
 4715                     if (foundsig != NULL) {
 4716                         break;
 4717                     }
 4718                 } else if (header->type ==
 4719                        RBTDB_RDATATYPE_SIGNS) {
 4720                     foundsig = header;
 4721                     if (found != NULL) {
 4722                         break;
 4723                     }
 4724                 }
 4725                 header_prev = header;
 4726             } else {
 4727                 header_prev = header;
 4728             }
 4729         }
 4730 
 4731         if (found != NULL) {
 4732             /*
 4733              * If we have to set foundname, we do it before
 4734              * anything else.  If we were to set foundname after
 4735              * we had set nodep or bound the rdataset, then we'd
 4736              * have to undo that work if dns_name_concatenate()
 4737              * failed.  By setting foundname first, there's
 4738              * nothing to undo if we have trouble.
 4739              */
 4740             if (foundname != NULL) {
 4741                 dns_name_init(&name, NULL);
 4742                 dns_rbt_namefromnode(node, &name);
 4743                 dns_name_copynf(&name, foundname);
 4744                 while (i > 0) {
 4745                     i--;
 4746                     level_node = search->chain.levels[i];
 4747                     dns_name_init(&name, NULL);
 4748                     dns_rbt_namefromnode(level_node, &name);
 4749                     result = dns_name_concatenate(
 4750                         foundname, &name, foundname,
 4751                         NULL);
 4752                     if (result != ISC_R_SUCCESS) {
 4753                         if (nodep != NULL) {
 4754                             *nodep = NULL;
 4755                         }
 4756                         goto node_exit;
 4757                     }
 4758                 }
 4759             }
 4760             result = DNS_R_DELEGATION;
 4761             if (nodep != NULL) {
 4762                 new_reference(search->rbtdb, node, locktype);
 4763                 *nodep = node;
 4764             }
 4765             bind_rdataset(search->rbtdb, node, found, search->now,
 4766                       locktype, rdataset);
 4767             if (foundsig != NULL) {
 4768                 bind_rdataset(search->rbtdb, node, foundsig,
 4769                           search->now, locktype,
 4770                           sigrdataset);
 4771             }
 4772             if (need_headerupdate(found, search->now) ||
 4773                 (foundsig != NULL &&
 4774                  need_headerupdate(foundsig, search->now)))
 4775             {
 4776                 if (locktype != isc_rwlocktype_write) {
 4777                     NODE_UNLOCK(lock, locktype);
 4778                     NODE_LOCK(lock, isc_rwlocktype_write);
 4779                     locktype = isc_rwlocktype_write;
 4780                     POST(locktype);
 4781                 }
 4782                 if (need_headerupdate(found, search->now)) {
 4783                     update_header(search->rbtdb, found,
 4784                               search->now);
 4785                 }
 4786                 if (foundsig != NULL &&
 4787                     need_headerupdate(foundsig, search->now)) {
 4788                     update_header(search->rbtdb, foundsig,
 4789                               search->now);
 4790                 }
 4791             }
 4792         }
 4793 
 4794     node_exit:
 4795         NODE_UNLOCK(lock, locktype);
 4796 
 4797         if (found == NULL && i > 0) {
 4798             i--;
 4799             node = search->chain.levels[i];
 4800         } else {
 4801             done = true;
 4802         }
 4803     } while (!done);
 4804 
 4805     return (result);
 4806 }
 4807 
 4808 static isc_result_t
 4809 find_coveringnsec(rbtdb_search_t *search, dns_dbnode_t **nodep,
 4810           isc_stdtime_t now, dns_name_t *foundname,
 4811           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) {
 4812     dns_rbtnode_t *node;
 4813     rdatasetheader_t *header, *header_next, *header_prev;
 4814     rdatasetheader_t *found, *foundsig;
 4815     bool empty_node;
 4816     isc_result_t result;
 4817     dns_fixedname_t fname, forigin;
 4818     dns_name_t *name, *origin;
 4819     rbtdb_rdatatype_t matchtype, sigmatchtype;
 4820     nodelock_t *lock;
 4821     isc_rwlocktype_t locktype;
 4822     dns_rbtnodechain_t chain;
 4823 
 4824     chain = search->chain;
 4825 
 4826     matchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_nsec, 0);
 4827     sigmatchtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig,
 4828                          dns_rdatatype_nsec);
 4829 
 4830     do {
 4831         node = NULL;
 4832         name = dns_fixedname_initname(&fname);
 4833         origin = dns_fixedname_initname(&forigin);
 4834         result = dns_rbtnodechain_current(&chain, name, origin, &node);
 4835         if (result != ISC_R_SUCCESS) {
 4836             return (result);
 4837         }
 4838         locktype = isc_rwlocktype_read;
 4839         lock = &(search->rbtdb->node_locks[node->locknum].lock);
 4840         NODE_LOCK(lock, locktype);
 4841         found = NULL;
 4842         foundsig = NULL;
 4843         empty_node = true;
 4844         header_prev = NULL;
 4845         for (header = node->data; header != NULL; header = header_next)
 4846         {
 4847             header_next = header->next;
 4848             if (check_stale_header(node, header, &locktype, lock,
 4849                            search, &header_prev)) {
 4850                 continue;
 4851             }
 4852             if (NONEXISTENT(header) ||
 4853                 RBTDB_RDATATYPE_BASE(header->type) == 0) {
 4854                 header_prev = header;
 4855                 continue;
 4856             }
 4857             /*
 4858              * Don't stop on provable noqname / RRSIG.
 4859              */
 4860             if (header->noqname == NULL &&
 4861                 RBTDB_RDATATYPE_BASE(header->type) !=
 4862                     dns_rdatatype_rrsig)
 4863             {
 4864                 empty_node = false;
 4865             }
 4866             if (header->type == matchtype) {
 4867                 found = header;
 4868             } else if (header->type == sigmatchtype) {
 4869                 foundsig = header;
 4870             }
 4871             header_prev = header;
 4872         }
 4873         if (found != NULL) {
 4874             result = dns_name_concatenate(name, origin, foundname,
 4875                               NULL);
 4876             if (result != ISC_R_SUCCESS) {
 4877                 goto unlock_node;
 4878             }
 4879             bind_rdataset(search->rbtdb, node, found, now, locktype,
 4880                       rdataset);
 4881             if (foundsig != NULL) {
 4882                 bind_rdataset(search->rbtdb, node, foundsig,
 4883                           now, locktype, sigrdataset);
 4884             }
 4885             new_reference(search->rbtdb, node, locktype);
 4886             *nodep = node;
 4887             result = DNS_R_COVERINGNSEC;
 4888         } else if (!empty_node) {
 4889             result = ISC_R_NOTFOUND;
 4890         } else {
 4891             result = dns_rbtnodechain_prev(&chain, NULL, NULL);
 4892         }
 4893     unlock_node:
 4894         NODE_UNLOCK(lock, locktype);
 4895     } while (empty_node && result == ISC_R_SUCCESS);
 4896     return (result);
 4897 }
 4898 
 4899 static isc_result_t
 4900 cache_find(dns_db_t *db, const dns_name_t *name, dns_dbversion_t *version,
 4901        dns_rdatatype_t type, unsigned int options, isc_stdtime_t now,
 4902        dns_dbnode_t **nodep, dns_name_t *foundname,
 4903        dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) {
 4904     dns_rbtnode_t *node = NULL;
 4905     isc_result_t result;
 4906     rbtdb_search_t search;
 4907     bool cname_ok = true;
 4908     bool empty_node;
 4909     nodelock_t *lock;
 4910     isc_rwlocktype_t locktype;
 4911     rdatasetheader_t *header, *header_prev, *header_next;
 4912     rdatasetheader_t *found, *nsheader;
 4913     rdatasetheader_t *foundsig, *nssig, *cnamesig;
 4914     rdatasetheader_t *update, *updatesig;
 4915     rdatasetheader_t *nsecheader, *nsecsig;
 4916     rbtdb_rdatatype_t sigtype, negtype;
 4917 
 4918     UNUSED(version);
 4919 
 4920     search.rbtdb = (dns_rbtdb_t *)db;
 4921 
 4922     REQUIRE(VALID_RBTDB(search.rbtdb));
 4923     REQUIRE(version == NULL);
 4924 
 4925     if (now == 0) {
 4926         isc_stdtime_get(&now);
 4927     }
 4928 
 4929     search.rbtversion = NULL;
 4930     search.serial = 1;
 4931     search.options = options;
 4932     search.copy_name = false;
 4933     search.need_cleanup = false;
 4934     search.wild = false;
 4935     search.zonecut = NULL;
 4936     dns_fixedname_init(&search.zonecut_name);
 4937     dns_rbtnodechain_init(&search.chain);
 4938     search.now = now;
 4939     update = NULL;
 4940     updatesig = NULL;
 4941 
 4942     RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
 4943 
 4944     /*
 4945      * Search down from the root of the tree.  If, while going down, we
 4946      * encounter a callback node, cache_zonecut_callback() will search the
 4947      * rdatasets at the zone cut for a DNAME rdataset.
 4948      */
 4949     result = dns_rbt_findnode(search.rbtdb->tree, name, foundname, &node,
 4950                   &search.chain, DNS_RBTFIND_EMPTYDATA,
 4951                   cache_zonecut_callback, &search);
 4952 
 4953     if (result == DNS_R_PARTIALMATCH) {
 4954         if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0) {
 4955             result = find_coveringnsec(&search, nodep, now,
 4956                            foundname, rdataset,
 4957                            sigrdataset);
 4958             if (result == DNS_R_COVERINGNSEC) {
 4959                 goto tree_exit;
 4960             }
 4961         }
 4962         if (search.zonecut != NULL) {
 4963             result = setup_delegation(&search, nodep, foundname,
 4964                           rdataset, sigrdataset);
 4965             goto tree_exit;
 4966         } else {
 4967         find_ns:
 4968             result = find_deepest_zonecut(&search, node, nodep,
 4969                               foundname, rdataset,
 4970                               sigrdataset);
 4971             goto tree_exit;
 4972         }
 4973     } else if (result != ISC_R_SUCCESS) {
 4974         goto tree_exit;
 4975     }
 4976 
 4977     /*
 4978      * Certain DNSSEC types are not subject to CNAME matching
 4979      * (RFC4035, section 2.5 and RFC3007).
 4980      *
 4981      * We don't check for RRSIG, because we don't store RRSIG records
 4982      * directly.
 4983      */
 4984     if (type == dns_rdatatype_key || type == dns_rdatatype_nsec) {
 4985         cname_ok = false;
 4986     }
 4987 
 4988     /*
 4989      * We now go looking for rdata...
 4990      */
 4991 
 4992     lock = &(search.rbtdb->node_locks[node->locknum].lock);
 4993     locktype = isc_rwlocktype_read;
 4994     NODE_LOCK(lock, locktype);
 4995 
 4996     found = NULL;
 4997     foundsig = NULL;
 4998     sigtype = RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, type);
 4999     negtype = RBTDB_RDATATYPE_VALUE(0, type);
 5000     nsheader = NULL;
 5001     nsecheader = NULL;
 5002     nssig = NULL;
 5003     nsecsig = NULL;
 5004     cnamesig = NULL;
 5005     empty_node = true;
 5006     header_prev = NULL;
 5007     for (header = node->data; header != NULL; header = header_next) {
 5008         header_next = header->next;
 5009         if (check_stale_header(node, header, &locktype, lock, &search,
 5010                        &header_prev)) {
 5011             /* Do nothing. */
 5012         } else if (EXISTS(header) && !ANCIENT(header)) {
 5013             /*
 5014              * We now know that there is at least one active
 5015              * non-stale rdataset at this node.
 5016              */
 5017             empty_node = false;
 5018 
 5019             /*
 5020              * If we found a type we were looking for, remember
 5021              * it.
 5022              */
 5023             if (header->type == type ||
 5024                 (type == dns_rdatatype_any &&
 5025                  RBTDB_RDATATYPE_BASE(header->type) != 0) ||
 5026                 (cname_ok && header->type == dns_rdatatype_cname))
 5027             {
 5028                 /*
 5029                  * We've found the answer.
 5030                  */
 5031                 found = header;
 5032                 if (header->type == dns_rdatatype_cname &&
 5033                     cname_ok && cnamesig != NULL) {
 5034                     /*
 5035                      * If we've already got the
 5036                      * CNAME RRSIG, use it.
 5037                      */
 5038                     foundsig = cnamesig;
 5039                 }
 5040             } else if (header->type == sigtype) {
 5041                 /*
 5042                  * We've found the RRSIG rdataset for our
 5043                  * target type.  Remember it.
 5044                  */
 5045                 foundsig = header;
 5046             } else if (header->type == RBTDB_RDATATYPE_NCACHEANY ||
 5047                    header->type == negtype) {
 5048                 /*
 5049                  * We've found a negative cache entry.
 5050                  */
 5051                 found = header;
 5052             } else if (header->type == dns_rdatatype_ns) {
 5053                 /*
 5054                  * Remember a NS rdataset even if we're
 5055                  * not specifically looking for it, because
 5056                  * we might need it later.
 5057                  */
 5058                 nsheader = header;
 5059             } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
 5060                 /*
 5061                  * If we need the NS rdataset, we'll also
 5062                  * need its signature.
 5063                  */
 5064                 nssig = header;
 5065             } else if (header->type == dns_rdatatype_nsec) {
 5066                 nsecheader = header;
 5067             } else if (header->type == RBTDB_RDATATYPE_SIGNSEC) {
 5068                 nsecsig = header;
 5069             } else if (cname_ok &&
 5070                    header->type == RBTDB_RDATATYPE_SIGCNAME) {
 5071                 /*
 5072                  * If we get a CNAME match, we'll also need
 5073                  * its signature.
 5074                  */
 5075                 cnamesig = header;
 5076             }
 5077             header_prev = header;
 5078         } else {
 5079             header_prev = header;
 5080         }
 5081     }
 5082 
 5083     if (empty_node) {
 5084         /*
 5085          * We have an exact match for the name, but there are no
 5086          * extant rdatasets.  That means that this node doesn't
 5087          * meaningfully exist, and that we really have a partial match.
 5088          */
 5089         NODE_UNLOCK(lock, locktype);
 5090         goto find_ns;
 5091     }
 5092 
 5093     /*
 5094      * If we didn't find what we were looking for...
 5095      */
 5096     if (found == NULL ||
 5097         (DNS_TRUST_ADDITIONAL(found->trust) &&
 5098          ((options & DNS_DBFIND_ADDITIONALOK) == 0)) ||
 5099         (found->trust == dns_trust_glue &&
 5100          ((options & DNS_DBFIND_GLUEOK) == 0)) ||
 5101         (DNS_TRUST_PENDING(found->trust) &&
 5102          ((options & DNS_DBFIND_PENDINGOK) == 0)))
 5103     {
 5104         /*
 5105          * Return covering NODATA NSEC record.
 5106          */
 5107         if ((search.options & DNS_DBFIND_COVERINGNSEC) != 0 &&
 5108             nsecheader != NULL) {
 5109             if (nodep != NULL) {
 5110                 new_reference(search.rbtdb, node, locktype);
 5111                 *nodep = node;
 5112             }
 5113             bind_rdataset(search.rbtdb, node, nsecheader,
 5114                       search.now, locktype, rdataset);
 5115             if (need_headerupdate(nsecheader, search.now)) {
 5116                 update = nsecheader;
 5117             }
 5118             if (nsecsig != NULL) {
 5119                 bind_rdataset(search.rbtdb, node, nsecsig,
 5120                           search.now, locktype,
 5121                           sigrdataset);
 5122                 if (need_headerupdate(nsecsig, search.now)) {
 5123                     updatesig = nsecsig;
 5124                 }
 5125             }
 5126             result = DNS_R_COVERINGNSEC;
 5127             goto node_exit;
 5128         }
 5129 
 5130         /*
 5131          * If there is an NS rdataset at this node, then this is the
 5132          * deepest zone cut.
 5133          */
 5134         if (nsheader != NULL) {
 5135             if (nodep != NULL) {
 5136                 new_reference(search.rbtdb, node, locktype);
 5137                 *nodep = node;
 5138             }
 5139             bind_rdataset(search.rbtdb, node, nsheader, search.now,
 5140                       locktype, rdataset);
 5141             if (need_headerupdate(nsheader, search.now)) {
 5142                 update = nsheader;
 5143             }
 5144             if (nssig != NULL) {
 5145                 bind_rdataset(search.rbtdb, node, nssig,
 5146                           search.now, locktype,
 5147                           sigrdataset);
 5148                 if (need_headerupdate(nssig, search.now)) {
 5149                     updatesig = nssig;
 5150                 }
 5151             }
 5152             result = DNS_R_DELEGATION;
 5153             goto node_exit;
 5154         }
 5155 
 5156         /*
 5157          * Go find the deepest zone cut.
 5158          */
 5159         NODE_UNLOCK(lock, locktype);
 5160         goto find_ns;
 5161     }
 5162 
 5163     /*
 5164      * We found what we were looking for, or we found a CNAME.
 5165      */
 5166 
 5167     if (nodep != NULL) {
 5168         new_reference(search.rbtdb, node, locktype);
 5169         *nodep = node;
 5170     }
 5171 
 5172     if (NEGATIVE(found)) {
 5173         /*
 5174          * We found a negative cache entry.
 5175          */
 5176         if (NXDOMAIN(found)) {
 5177             result = DNS_R_NCACHENXDOMAIN;
 5178         } else {
 5179             result = DNS_R_NCACHENXRRSET;
 5180         }
 5181     } else if (type != found->type && type != dns_rdatatype_any &&
 5182            found->type == dns_rdatatype_cname)
 5183     {
 5184         /*
 5185          * We weren't doing an ANY query and we found a CNAME instead
 5186          * of the type we were looking for, so we need to indicate
 5187          * that result to the caller.
 5188          */
 5189         result = DNS_R_CNAME;
 5190     } else {
 5191         /*
 5192          * An ordinary successful query!
 5193          */
 5194         result = ISC_R_SUCCESS;
 5195     }
 5196 
 5197     if (type != dns_rdatatype_any || result == DNS_R_NCACHENXDOMAIN ||
 5198         result == DNS_R_NCACHENXRRSET)
 5199     {
 5200         bind_rdataset(search.rbtdb, node, found, search.now, locktype,
 5201                   rdataset);
 5202         if (need_headerupdate(found, search.now)) {
 5203             update = found;
 5204         }
 5205         if (!NEGATIVE(found) && foundsig != NULL) {
 5206             bind_rdataset(search.rbtdb, node, foundsig, search.now,
 5207                       locktype, sigrdataset);
 5208             if (need_headerupdate(foundsig, search.now)) {
 5209                 updatesig = foundsig;
 5210             }
 5211         }
 5212     }
 5213 
 5214 node_exit:
 5215     if ((update != NULL || updatesig != NULL) &&
 5216         locktype != isc_rwlocktype_write) {
 5217         NODE_UNLOCK(lock, locktype);
 5218         NODE_LOCK(lock, isc_rwlocktype_write);
 5219         locktype = isc_rwlocktype_write;
 5220         POST(locktype);
 5221     }
 5222     if (update != NULL && need_headerupdate(update, search.now)) {
 5223         update_header(search.rbtdb, update, search.now);
 5224     }
 5225     if (updatesig != NULL && need_headerupdate(updatesig, search.now)) {
 5226         update_header(search.rbtdb, updatesig, search.now);
 5227     }
 5228 
 5229     NODE_UNLOCK(lock, locktype);
 5230 
 5231 tree_exit:
 5232     RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
 5233 
 5234     /*
 5235      * If we found a zonecut but aren't going to use it, we have to
 5236      * let go of it.
 5237      */
 5238     if (search.need_cleanup) {
 5239         node = search.zonecut;
 5240         INSIST(node != NULL);
 5241         lock = &(search.rbtdb->node_locks[node->locknum].lock);
 5242 
 5243         NODE_LOCK(lock, isc_rwlocktype_read);
 5244         decrement_reference(search.rbtdb, node, 0, isc_rwlocktype_read,
 5245                     isc_rwlocktype_none, false);
 5246         NODE_UNLOCK(lock, isc_rwlocktype_read);
 5247     }
 5248 
 5249     dns_rbtnodechain_reset(&search.chain);
 5250 
 5251     update_cachestats(search.rbtdb, result);
 5252     return (result);
 5253 }
 5254 
 5255 static isc_result_t
 5256 cache_findzonecut(dns_db_t *db, const dns_name_t *name, unsigned int options,
 5257           isc_stdtime_t now, dns_dbnode_t **nodep,
 5258           dns_name_t *foundname, dns_name_t *dcname,
 5259           dns_rdataset_t *rdataset, dns_rdataset_t *sigrdataset) {
 5260     dns_rbtnode_t *node = NULL;
 5261     nodelock_t *lock;
 5262     isc_result_t result;
 5263     rbtdb_search_t search;
 5264     rdatasetheader_t *header, *header_prev, *header_next;
 5265     rdatasetheader_t *found, *foundsig;
 5266     unsigned int rbtoptions = DNS_RBTFIND_EMPTYDATA;
 5267     isc_rwlocktype_t locktype;
 5268     bool dcnull = (dcname == NULL);
 5269 
 5270     search.rbtdb = (dns_rbtdb_t *)db;
 5271 
 5272     REQUIRE(VALID_RBTDB(search.rbtdb));
 5273 
 5274     if (now == 0) {
 5275         isc_stdtime_get(&now);
 5276     }
 5277 
 5278     search.rbtversion = NULL;
 5279     search.serial = 1;
 5280     search.options = options;
 5281     search.copy_name = false;
 5282     search.need_cleanup = false;
 5283     search.wild = false;
 5284     search.zonecut = NULL;
 5285     dns_fixedname_init(&search.zonecut_name);
 5286     dns_rbtnodechain_init(&search.chain);
 5287     search.now = now;
 5288 
 5289     if (dcnull) {
 5290         dcname = foundname;
 5291     }
 5292 
 5293     if ((options & DNS_DBFIND_NOEXACT) != 0) {
 5294         rbtoptions |= DNS_RBTFIND_NOEXACT;
 5295     }
 5296 
 5297     RWLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
 5298 
 5299     /*
 5300      * Search down from the root of the tree.
 5301      */
 5302     result = dns_rbt_findnode(search.rbtdb->tree, name, dcname, &node,
 5303                   &search.chain, rbtoptions, NULL, &search);
 5304 
 5305     if (result == DNS_R_PARTIALMATCH) {
 5306         result = find_deepest_zonecut(&search, node, nodep, foundname,
 5307                           rdataset, sigrdataset);
 5308         goto tree_exit;
 5309     } else if (result != ISC_R_SUCCESS) {
 5310         goto tree_exit;
 5311     } else if (!dcnull) {
 5312         dns_name_copynf(dcname, foundname);
 5313     }
 5314     /*
 5315      * We now go looking for an NS rdataset at the node.
 5316      */
 5317 
 5318     lock = &(search.rbtdb->node_locks[node->locknum].lock);
 5319     locktype = isc_rwlocktype_read;
 5320     NODE_LOCK(lock, locktype);
 5321 
 5322     found = NULL;
 5323     foundsig = NULL;
 5324     header_prev = NULL;
 5325     for (header = node->data; header != NULL; header = header_next) {
 5326         header_next = header->next;
 5327         if (check_stale_header(node, header, &locktype, lock, &search,
 5328                        &header_prev)) {
 5329             /* Do nothing. */
 5330         } else if (EXISTS(header)) {
 5331             /*
 5332              * If we found a type we were looking for, remember
 5333              * it.
 5334              */
 5335             if (header->type == dns_rdatatype_ns) {
 5336                 /*
 5337                  * Remember a NS rdataset even if we're
 5338                  * not specifically looking for it, because
 5339                  * we might need it later.
 5340                  */
 5341                 found = header;
 5342             } else if (header->type == RBTDB_RDATATYPE_SIGNS) {
 5343                 /*
 5344                  * If we need the NS rdataset, we'll also
 5345                  * need its signature.
 5346                  */
 5347                 foundsig = header;
 5348             }
 5349             header_prev = header;
 5350         } else {
 5351             header_prev = header;
 5352         }
 5353     }
 5354 
 5355     if (found == NULL) {
 5356         /*
 5357          * No NS records here.
 5358          */
 5359         NODE_UNLOCK(lock, locktype);
 5360         result = find_deepest_zonecut(&search, node, nodep, foundname,
 5361                           rdataset, sigrdataset);
 5362         goto tree_exit;
 5363     }
 5364 
 5365     if (nodep != NULL) {
 5366         new_reference(search.rbtdb, node, locktype);
 5367         *nodep = node;
 5368     }
 5369 
 5370     bind_rdataset(search.rbtdb, node, found, search.now, locktype,
 5371               rdataset);
 5372     if (foundsig != NULL) {
 5373         bind_rdataset(search.rbtdb, node, foundsig, search.now,
 5374                   locktype, sigrdataset);
 5375     }
 5376 
 5377     if (need_headerupdate(found, search.now) ||
 5378         (foundsig != NULL && need_headerupdate(foundsig, search.now)))
 5379     {
 5380         if (locktype != isc_rwlocktype_write) {
 5381             NODE_UNLOCK(lock, locktype);
 5382             NODE_LOCK(lock, isc_rwlocktype_write);
 5383             locktype = isc_rwlocktype_write;
 5384             POST(locktype);
 5385         }
 5386         if (need_headerupdate(found, search.now)) {
 5387             update_header(search.rbtdb, found, search.now);
 5388         }
 5389         if (foundsig != NULL && need_headerupdate(foundsig, search.now))
 5390         {
 5391             update_header(search.rbtdb, foundsig, search.now);
 5392         }
 5393     }
 5394 
 5395     NODE_UNLOCK(lock, locktype);
 5396 
 5397 tree_exit:
 5398     RWUNLOCK(&search.rbtdb->tree_lock, isc_rwlocktype_read);
 5399 
 5400     INSIST(!search.need_cleanup);
 5401 
 5402     dns_rbtnodechain_reset(&search.chain);
 5403 
 5404     if (result == DNS_R_DELEGATION) {
 5405         result = ISC_R_SUCCESS;
 5406     }
 5407 
 5408     return (result);
 5409 }
 5410 
 5411 static void
 5412 attachnode(dns_db_t *db, dns_dbnode_t *source, dns_dbnode_t **targetp) {
 5413     dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
 5414     dns_rbtnode_t *node = (dns_rbtnode_t *)source;
 5415 
 5416     REQUIRE(VALID_RBTDB(rbtdb));
 5417     REQUIRE(targetp != NULL && *targetp == NULL);
 5418 
 5419     isc_refcount_increment(&node->references);
 5420 
 5421     *targetp = source;
 5422 }
 5423 
 5424 static void
 5425 detachnode(dns_db_t *db, dns_dbnode_t **targetp) {
 5426     dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
 5427     dns_rbtnode_t *node;
 5428     bool want_free = false;
 5429     bool inactive = false;
 5430     rbtdb_nodelock_t *nodelock;
 5431 
 5432     REQUIRE(VALID_RBTDB(rbtdb));
 5433     REQUIRE(targetp != NULL && *targetp != NULL);
 5434 
 5435     node = (dns_rbtnode_t *)(*targetp);
 5436     nodelock = &rbtdb->node_locks[node->locknum];
 5437 
 5438     NODE_LOCK(&nodelock->lock, isc_rwlocktype_read);
 5439 
 5440     if (decrement_reference(rbtdb, node, 0, isc_rwlocktype_read,
 5441                 isc_rwlocktype_none, false))
 5442     {
 5443         if (isc_refcount_current(&nodelock->references) == 0 &&
 5444             nodelock->exiting) {
 5445             inactive = true;
 5446         }
 5447     }
 5448 
 5449     NODE_UNLOCK(&nodelock->lock, isc_rwlocktype_read);
 5450 
 5451     *targetp = NULL;
 5452 
 5453     if (inactive) {
 5454         RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
 5455         rbtdb->active--;
 5456         if (rbtdb->active == 0) {
 5457             want_free = true;
 5458         }
 5459         RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
 5460         if (want_free) {
 5461             char buf[DNS_NAME_FORMATSIZE];
 5462             if (dns_name_dynamic(&rbtdb->common.origin)) {
 5463                 dns_name_format(&rbtdb->common.origin, buf,
 5464                         sizeof(buf));
 5465             } else {
 5466                 strlcpy(buf, "<UNKNOWN>", sizeof(buf));
 5467             }
 5468             isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
 5469                       DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
 5470                       "calling free_rbtdb(%s)", buf);
 5471             free_rbtdb(rbtdb, true, NULL);
 5472         }
 5473     }
 5474 }
 5475 
 5476 static isc_result_t
 5477 expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
 5478     dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
 5479     dns_rbtnode_t *rbtnode = node;
 5480     rdatasetheader_t *header;
 5481     bool force_expire = false;
 5482     /*
 5483      * These are the category and module used by the cache cleaner.
 5484      */
 5485     bool log = false;
 5486     isc_logcategory_t *category = DNS_LOGCATEGORY_DATABASE;
 5487     isc_logmodule_t *module = DNS_LOGMODULE_CACHE;
 5488     int level = ISC_LOG_DEBUG(2);
 5489     char printname[DNS_NAME_FORMATSIZE];
 5490 
 5491     REQUIRE(VALID_RBTDB(rbtdb));
 5492 
 5493     /*
 5494      * Caller must hold a tree lock.
 5495      */
 5496 
 5497     if (now == 0) {
 5498         isc_stdtime_get(&now);
 5499     }
 5500 
 5501     if (isc_mem_isovermem(rbtdb->common.mctx)) {
 5502         /*
 5503          * Force expire with 25% probability.
 5504          * XXXDCL Could stand to have a better policy, like LRU.
 5505          */
 5506         force_expire = (rbtnode->down == NULL &&
 5507                 (isc_random32() % 4) == 0);
 5508 
 5509         /*
 5510          * Note that 'log' can be true IFF overmem is also true.
 5511          * overmem can currently only be true for cache
 5512          * databases -- hence all of the "overmem cache" log strings.
 5513          */
 5514         log = isc_log_wouldlog(dns_lctx, level);
 5515         if (log) {
 5516             isc_log_write(
 5517                 dns_lctx, category, module, level,
 5518                 "overmem cache: %s %s",
 5519                 force_expire ? "FORCE" : "check",
 5520                 dns_rbt_formatnodename(rbtnode, printname,
 5521                                sizeof(printname)));
 5522         }
 5523     }
 5524 
 5525     /*
 5526      * We may not need write access, but this code path is not performance
 5527      * sensitive, so it should be okay to always lock as a writer.
 5528      */
 5529     NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
 5530           isc_rwlocktype_write);
 5531 
 5532     for (header = rbtnode->data; header != NULL; header = header->next) {
 5533         if (header->rdh_ttl <= now - RBTDB_VIRTUAL) {
 5534             /*
 5535              * We don't check if refcurrent(rbtnode) == 0 and try
 5536              * to free like we do in cache_find(), because
 5537              * refcurrent(rbtnode) must be non-zero.  This is so
 5538              * because 'node' is an argument to the function.
 5539              */
 5540             mark_header_ancient(rbtdb, header);
 5541             if (log) {
 5542                 isc_log_write(dns_lctx, category, module, level,
 5543                           "overmem cache: ancient %s",
 5544                           printname);
 5545             }
 5546         } else if (force_expire) {
 5547             if (!RETAIN(header)) {
 5548                 set_ttl(rbtdb, header, 0);
 5549                 mark_header_ancient(rbtdb, header);
 5550             } else if (log) {
 5551                 isc_log_write(dns_lctx, category, module, level,
 5552                           "overmem cache: "
 5553                           "reprieve by RETAIN() %s",
 5554                           printname);
 5555             }
 5556         } else if (isc_mem_isovermem(rbtdb->common.mctx) && log) {
 5557             isc_log_write(dns_lctx, category, module, level,
 5558                       "overmem cache: saved %s", printname);
 5559         }
 5560     }
 5561 
 5562     NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
 5563             isc_rwlocktype_write);
 5564 
 5565     return (ISC_R_SUCCESS);
 5566 }
 5567 
 5568 static void
 5569 overmem(dns_db_t *db, bool over) {
 5570     /* This is an empty callback.  See adb.c:water() */
 5571 
 5572     UNUSED(db);
 5573     UNUSED(over);
 5574 
 5575     return;
 5576 }
 5577 
 5578 static void
 5579 printnode(dns_db_t *db, dns_dbnode_t *node, FILE *out) {
 5580     dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
 5581     dns_rbtnode_t *rbtnode = node;
 5582     bool first;
 5583     uint32_t refs;
 5584 
 5585     REQUIRE(VALID_RBTDB(rbtdb));
 5586 
 5587     NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
 5588           isc_rwlocktype_read);
 5589 
 5590     refs = isc_refcount_current(&rbtnode->references);
 5591     fprintf(out, "node %p, %" PRIu32 " references, locknum = %u\n", rbtnode,
 5592         refs, rbtnode->locknum);
 5593     if (rbtnode->data != NULL) {
 5594         rdatasetheader_t *current, *top_next;
 5595 
 5596         for (current = rbtnode->data; current != NULL;
 5597              current = top_next) {
 5598             top_next = current->next;
 5599             first = true;
 5600             fprintf(out, "\ttype %u", current->type);
 5601             do {
 5602                 uint_least16_t attributes = atomic_load_acquire(
 5603                     &current->attributes);
 5604                 if (!first) {
 5605                     fprintf(out, "\t");
 5606                 }
 5607                 first = false;
 5608                 fprintf(out,
 5609                     "\tserial = %lu, ttl = %u, "
 5610                     "trust = %u, attributes = %" PRIuLEAST16
 5611                     ", "
 5612                     "resign = %u\n",
 5613                     (unsigned long)current->serial,
 5614                     current->rdh_ttl, current->trust,
 5615                     attributes,
 5616                     (current->resign << 1) |
 5617                         current->resign_lsb);
 5618                 current = current->down;
 5619             } while (current != NULL);
 5620         }
 5621     } else {
 5622         fprintf(out, "(empty)\n");
 5623     }
 5624 
 5625     NODE_UNLOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
 5626             isc_rwlocktype_read);
 5627 }
 5628 
 5629 static isc_result_t
 5630 createiterator(dns_db_t *db, unsigned int options,
 5631            dns_dbiterator_t **iteratorp) {
 5632     dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
 5633     rbtdb_dbiterator_t *rbtdbiter;
 5634 
 5635     REQUIRE(VALID_RBTDB(rbtdb));
 5636 
 5637     rbtdbiter = isc_mem_get(rbtdb->common.mctx, sizeof(*rbtdbiter));
 5638 
 5639     rbtdbiter->common.methods = &dbiterator_methods;
 5640     rbtdbiter->common.db = NULL;
 5641     dns_db_attach(db, &rbtdbiter->common.db);
 5642     rbtdbiter->common.relative_names = ((options & DNS_DB_RELATIVENAMES) !=
 5643                         0);
 5644     rbtdbiter->common.magic = DNS_DBITERATOR_MAGIC;
 5645     rbtdbiter->common.cleaning = false;
 5646     rbtdbiter->paused = true;
 5647     rbtdbiter->tree_locked = isc_rwlocktype_none;
 5648     rbtdbiter->result = ISC_R_SUCCESS;
 5649     dns_fixedname_init(&rbtdbiter->name);
 5650     dns_fixedname_init(&rbtdbiter->origin);
 5651     rbtdbiter->node = NULL;
 5652     rbtdbiter->delcnt = 0;
 5653     rbtdbiter->nsec3only = ((options & DNS_DB_NSEC3ONLY) != 0);
 5654     rbtdbiter->nonsec3 = ((options & DNS_DB_NONSEC3) != 0);
 5655     memset(rbtdbiter->deletions, 0, sizeof(rbtdbiter->deletions));
 5656     dns_rbtnodechain_init(&rbtdbiter->chain);
 5657     dns_rbtnodechain_init(&rbtdbiter->nsec3chain);
 5658     if (rbtdbiter->nsec3only) {
 5659         rbtdbiter->current = &rbtdbiter->nsec3chain;
 5660     } else {
 5661         rbtdbiter->current = &rbtdbiter->chain;
 5662     }
 5663 
 5664     *iteratorp = (dns_dbiterator_t *)rbtdbiter;
 5665 
 5666     return (ISC_R_SUCCESS);
 5667 }
 5668 
 5669 static isc_result_t
 5670 zone_findrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
 5671           dns_rdatatype_t type, dns_rdatatype_t covers,
 5672