"Fossies" - the Fresh Open Source Software Archive

Member "knot-2.8.3/src/knot/modules/rrl/functions.c" (16 Jul 2019, 13586 Bytes) of package /linux/misc/dns/knot-2.8.3.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "functions.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 2.8.2_vs_2.8.3.

    1 /*  Copyright (C) 2019 CZ.NIC, z.s.p.o. <knot-dns@labs.nic.cz>
    2 
    3     This program is free software: you can redistribute it and/or modify
    4     it under the terms of the GNU General Public License as published by
    5     the Free Software Foundation, either version 3 of the License, or
    6     (at your option) any later version.
    7 
    8     This program is distributed in the hope that it will be useful,
    9     but WITHOUT ANY WARRANTY; without even the implied warranty of
   10     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   11     GNU General Public License for more details.
   12 
   13     You should have received a copy of the GNU General Public License
   14     along with this program.  If not, see <https://www.gnu.org/licenses/>.
   15  */
   16 
   17 #include <assert.h>
   18 #include <time.h>
   19 
   20 #include "knot/modules/rrl/functions.h"
   21 #include "contrib/openbsd/strlcat.h"
   22 #include "contrib/sockaddr.h"
   23 #include "contrib/time.h"
   24 #include "libdnssec/error.h"
   25 #include "libdnssec/random.h"
   26 
   27 /* Hopscotch defines. */
   28 #define HOP_LEN (sizeof(unsigned)*8)
   29 /* Limits (class, ipv6 remote, dname) */
   30 #define RRL_CLSBLK_MAXLEN (1 + 8 + 255)
   31 /* CIDR block prefix lengths for v4/v6 */
   32 #define RRL_V4_PREFIX_LEN 3 /* /24 */
   33 #define RRL_V6_PREFIX_LEN 7 /* /56 */
   34 /* Defaults */
   35 #define RRL_SSTART 2 /* 1/Nth of the rate for slow start */
   36 #define RRL_PSIZE_LARGE 1024
   37 #define RRL_CAPACITY 4 /* Window size in seconds */
   38 #define RRL_LOCK_GRANULARITY 32 /* Last digit granularity */
   39 
   40 /* Classification */
   41 enum {
   42     CLS_NULL     = 0 << 0, /* Empty bucket. */
   43     CLS_NORMAL   = 1 << 0, /* Normal response. */
   44     CLS_ERROR    = 1 << 1, /* Error response. */
   45     CLS_NXDOMAIN = 1 << 2, /* NXDOMAIN (special case of error). */
   46     CLS_EMPTY    = 1 << 3, /* Empty response. */
   47     CLS_LARGE    = 1 << 4, /* Response size over threshold (1024k). */
   48     CLS_WILDCARD = 1 << 5, /* Wildcard query. */
   49     CLS_ANY      = 1 << 6, /* ANY query (spec. class). */
   50     CLS_DNSSEC   = 1 << 7  /* DNSSEC related RR query (spec. class) */
   51 };
   52 
   53 /* Classification string. */
   54 struct cls_name {
   55     int code;
   56     const char *name;
   57 };
   58 
   59 static const struct cls_name rrl_cls_names[] = {
   60     { CLS_NORMAL,   "POSITIVE" },
   61     { CLS_ERROR,    "ERROR" },
   62     { CLS_NXDOMAIN, "NXDOMAIN"},
   63     { CLS_EMPTY,    "EMPTY"},
   64     { CLS_LARGE,    "LARGE"},
   65     { CLS_WILDCARD, "WILDCARD"},
   66     { CLS_ANY,      "ANY"},
   67     { CLS_DNSSEC,   "DNSSEC"},
   68     { CLS_NULL,     "NULL"},
   69     { CLS_NULL,     NULL}
   70 };
   71 
   72 static inline const char *rrl_clsstr(int code)
   73 {
   74     for (const struct cls_name *c = rrl_cls_names; c->name; c++) {
   75         if (c->code == code) {
   76             return c->name;
   77         }
   78     }
   79 
   80     return "unknown class";
   81 }
   82 
   83 /* Bucket flags. */
   84 enum {
   85     RRL_BF_NULL   = 0 << 0, /* No flags. */
   86     RRL_BF_SSTART = 1 << 0, /* Bucket in slow-start after collision. */
   87     RRL_BF_ELIMIT = 1 << 1  /* Bucket is rate-limited. */
   88 };
   89 
   90 static uint8_t rrl_clsid(rrl_req_t *p)
   91 {
   92     /* Check error code */
   93     int ret = CLS_NULL;
   94     switch (knot_wire_get_rcode(p->w)) {
   95     case KNOT_RCODE_NOERROR: ret = CLS_NORMAL; break;
   96     case KNOT_RCODE_NXDOMAIN: return CLS_NXDOMAIN; break;
   97     default: return CLS_ERROR; break;
   98     }
   99 
  100     /* Check if answered from a qname */
  101     if (ret == CLS_NORMAL && p->flags & RRL_REQ_WILDCARD) {
  102         return CLS_WILDCARD;
  103     }
  104 
  105     /* Check query type for spec. classes. */
  106     if (p->query) {
  107         switch(knot_pkt_qtype(p->query)) {
  108         case KNOT_RRTYPE_ANY:      /* ANY spec. class */
  109             return CLS_ANY;
  110             break;
  111         case KNOT_RRTYPE_DNSKEY:
  112         case KNOT_RRTYPE_RRSIG:
  113         case KNOT_RRTYPE_DS:      /* DNSSEC-related RR class. */
  114             return CLS_DNSSEC;
  115             break;
  116         default:
  117             break;
  118         }
  119     }
  120 
  121     /* Check packet size for threshold. */
  122     if (p->len >= RRL_PSIZE_LARGE) {
  123         return CLS_LARGE;
  124     }
  125 
  126     /* Check ancount */
  127     if (knot_wire_get_ancount(p->w) == 0) {
  128         return CLS_EMPTY;
  129     }
  130 
  131     return ret;
  132 }
  133 
  134 static int rrl_clsname(uint8_t *dst, size_t maxlen, uint8_t cls, rrl_req_t *req,
  135                        const knot_dname_t *name)
  136 {
  137     if (name == NULL) {
  138         /* Fallback for errors etc. */
  139         name = (const knot_dname_t *)"\x00";
  140     }
  141 
  142     switch (cls) {
  143     case CLS_ERROR:    /* Could be a non-existent zone or garbage. */
  144     case CLS_NXDOMAIN: /* Queries to non-existent names in zone. */
  145     case CLS_WILDCARD: /* Queries to names covered by a wildcard. */
  146         break;
  147     default:
  148         /* Use QNAME */
  149         if (req->query) {
  150             name = knot_pkt_qname(req->query);
  151         }
  152         break;
  153     }
  154 
  155     /* Write to wire */
  156     return knot_dname_to_wire(dst, name, maxlen);
  157 }
  158 
  159 static int rrl_classify(uint8_t *dst, size_t maxlen, const struct sockaddr_storage *a,
  160                         rrl_req_t *req, const knot_dname_t *name)
  161 {
  162     /* Class */
  163     uint8_t cls = rrl_clsid(req);
  164     *dst = cls;
  165     int blklen = sizeof(cls);
  166 
  167     /* Address (in network byteorder, adjust masks). */
  168     uint64_t netblk = 0;
  169     if (a->ss_family == AF_INET6) {
  170         struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)a;
  171         memcpy(&netblk, &ipv6->sin6_addr, RRL_V6_PREFIX_LEN);
  172     } else {
  173         struct sockaddr_in *ipv4 = (struct sockaddr_in *)a;
  174         memcpy(&netblk, &ipv4->sin_addr, RRL_V4_PREFIX_LEN);
  175     }
  176     if (blklen + sizeof(netblk) > maxlen) {
  177         return KNOT_ESPACE;
  178     }
  179     memcpy(dst + blklen, &netblk, sizeof(netblk));
  180     blklen += sizeof(netblk);
  181 
  182     /* Name */
  183     int ret = rrl_clsname(dst + blklen, maxlen - blklen, cls, req, name);
  184     if (ret < 0) {
  185         return ret;
  186     }
  187     uint8_t len = ret;
  188     blklen += len;
  189 
  190     return blklen;
  191 }
  192 
  193 static int bucket_free(rrl_item_t *b, uint32_t now)
  194 {
  195     return b->cls == CLS_NULL || (b->time + 1 < now);
  196 }
  197 
  198 static int bucket_match(rrl_item_t *b, rrl_item_t *m)
  199 {
  200     return b->cls    == m->cls &&
  201            b->netblk == m->netblk &&
  202            b->qname  == m->qname;
  203 }
  204 
  205 static int find_free(rrl_table_t *t, unsigned i, uint32_t now)
  206 {
  207     rrl_item_t *np = t->arr + t->size;
  208     rrl_item_t *b = NULL;
  209     for (b = t->arr + i; b != np; ++b) {
  210         if (bucket_free(b, now)) {
  211             return b - (t->arr + i);
  212         }
  213     }
  214     np = t->arr + i;
  215     for (b = t->arr; b != np; ++b) {
  216         if (bucket_free(b, now)) {
  217             return (b - t->arr) + (t->size - i);
  218         }
  219     }
  220 
  221     /* this happens if table is full... force vacate current elm */
  222     return i;
  223 }
  224 
  225 static inline unsigned find_match(rrl_table_t *t, uint32_t id, rrl_item_t *m)
  226 {
  227     unsigned f = 0;
  228     unsigned d = 0;
  229     unsigned match = t->arr[id].hop;
  230     while (match != 0) {
  231         d = __builtin_ctz(match);
  232         f = (id + d) % t->size;
  233         if (bucket_match(t->arr + f, m)) {
  234             return d;
  235         } else {
  236             match &= ~(1<<d); /* clear potential match */
  237         }
  238     }
  239 
  240     return HOP_LEN + 1;
  241 }
  242 
  243 static inline unsigned reduce_dist(rrl_table_t *t, unsigned id, unsigned d, unsigned *f)
  244 {
  245     unsigned rd = HOP_LEN - 1;
  246     while (rd > 0) {
  247         unsigned s = (t->size + *f - rd) % t->size; /* bucket to be vacated */
  248         if (t->arr[s].hop != 0) {
  249             unsigned o = __builtin_ctz(t->arr[s].hop);  /* offset of first valid bucket */
  250             if (o < rd) {                               /* only offsets in <s, f> are interesting */
  251                 unsigned e = (s + o) % t->size;     /* this item will be displaced to [f] */
  252                 unsigned keep_hop = t->arr[*f].hop; /* unpredictable padding */
  253                 memcpy(t->arr + *f, t->arr + e, sizeof(rrl_item_t));
  254                 t->arr[*f].hop = keep_hop;
  255                 t->arr[e].cls = CLS_NULL;
  256                 t->arr[s].hop &= ~(1<<o);
  257                 t->arr[s].hop |= 1<<rd;
  258                 *f = e;
  259                 return d - (rd - o);
  260             }
  261         }
  262         --rd;
  263     }
  264 
  265     assert(rd == 0); /* this happens with p=1/fact(HOP_LEN) */
  266     *f = id;
  267     d = 0; /* force vacate initial element */
  268     return d;
  269 }
  270 
  271 static void subnet_tostr(char *dst, size_t maxlen, const struct sockaddr_storage *ss)
  272 {
  273     const char *suffix;
  274     uint8_t addr[16] = { 0 };
  275 
  276     if (ss->ss_family == AF_INET6) {
  277         struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)ss;
  278         memcpy(addr, &ipv6->sin6_addr, RRL_V6_PREFIX_LEN);
  279         suffix = "/56";
  280     } else {
  281         struct sockaddr_in *ipv4 = (struct sockaddr_in *)ss;
  282         memcpy(addr, &ipv4->sin_addr, RRL_V4_PREFIX_LEN);
  283         suffix = "/24";
  284     }
  285 
  286     if (inet_ntop(ss->ss_family, &addr, dst, maxlen) != NULL) {
  287         strlcat(dst, suffix, maxlen);
  288     } else {
  289         dst[0] = '\0';
  290     }
  291 }
  292 
  293 static void rrl_log_state(knotd_mod_t *mod, const struct sockaddr_storage *ss,
  294                           uint16_t flags, uint8_t cls)
  295 {
  296     if (mod == NULL || ss == NULL) {
  297         return;
  298     }
  299 
  300     char addr_str[SOCKADDR_STRLEN];
  301     subnet_tostr(addr_str, sizeof(addr_str), ss);
  302 
  303     const char *what = "leaves";
  304     if (flags & RRL_BF_ELIMIT) {
  305         what = "enters";
  306     }
  307 
  308     knotd_mod_log(mod, LOG_NOTICE, "subnet %s, class %s, %s limiting",
  309                   addr_str, rrl_clsstr(cls), what);
  310 }
  311 
  312 static void rrl_lock(rrl_table_t *t, int lk_id)
  313 {
  314     assert(lk_id > -1);
  315     pthread_mutex_lock(t->lk + lk_id);
  316 }
  317 
  318 static void rrl_unlock(rrl_table_t *t, int lk_id)
  319 {
  320     assert(lk_id > -1);
  321     pthread_mutex_unlock(t->lk + lk_id);
  322 }
  323 
  324 static int rrl_setlocks(rrl_table_t *rrl, uint32_t granularity)
  325 {
  326     assert(!rrl->lk); /* Cannot change while locks are used. */
  327     assert(granularity <= rrl->size / 10); /* Due to int. division err. */
  328 
  329     if (pthread_mutex_init(&rrl->ll, NULL) < 0) {
  330         return KNOT_ENOMEM;
  331     }
  332 
  333     /* Alloc new locks. */
  334     rrl->lk = malloc(granularity * sizeof(pthread_mutex_t));
  335     if (!rrl->lk) {
  336         return KNOT_ENOMEM;
  337     }
  338     memset(rrl->lk, 0, granularity * sizeof(pthread_mutex_t));
  339 
  340     /* Initialize. */
  341     for (size_t i = 0; i < granularity; ++i) {
  342         if (pthread_mutex_init(rrl->lk + i, NULL) < 0) {
  343             break;
  344         }
  345         ++rrl->lk_count;
  346     }
  347 
  348     /* Incomplete initialization */
  349     if (rrl->lk_count != granularity) {
  350         for (size_t i = 0; i < rrl->lk_count; ++i) {
  351             pthread_mutex_destroy(rrl->lk + i);
  352         }
  353         free(rrl->lk);
  354         rrl->lk_count = 0;
  355         return KNOT_ERROR;
  356     }
  357 
  358     return KNOT_EOK;
  359 }
  360 
  361 rrl_table_t *rrl_create(size_t size, uint32_t rate)
  362 {
  363     if (size == 0) {
  364         return NULL;
  365     }
  366 
  367     const size_t tbl_len = sizeof(rrl_table_t) + size * sizeof(rrl_item_t);
  368     rrl_table_t *t = calloc(1, tbl_len);
  369     if (!t) {
  370         return NULL;
  371     }
  372     t->size = size;
  373     t->rate = rate;
  374 
  375     if (dnssec_random_buffer((uint8_t *)&t->key, sizeof(t->key)) != DNSSEC_EOK) {
  376         free(t);
  377         return NULL;
  378     }
  379 
  380     if (rrl_setlocks(t, RRL_LOCK_GRANULARITY) != KNOT_EOK) {
  381         free(t);
  382         return NULL;
  383     }
  384 
  385     return t;
  386 }
  387 
  388 /*! \brief Get bucket for current combination of parameters. */
  389 static rrl_item_t *rrl_hash(rrl_table_t *t, const struct sockaddr_storage *a,
  390                             rrl_req_t *req, const knot_dname_t *zone, uint32_t stamp,
  391                             int *lock)
  392 {
  393     uint8_t buf[RRL_CLSBLK_MAXLEN];
  394     int len = rrl_classify(buf, sizeof(buf), a, req, zone);
  395     if (len < 0) {
  396         return NULL;
  397     }
  398 
  399     uint32_t id = SipHash24(&t->key, buf, len) % t->size;
  400 
  401     /* Lock for lookup. */
  402     pthread_mutex_lock(&t->ll);
  403 
  404     /* Find an exact match in <id, id + HOP_LEN). */
  405     uint8_t *qname = buf + sizeof(uint8_t) + sizeof(uint64_t);
  406     uint64_t netblk;
  407     memcpy(&netblk, buf + sizeof(uint8_t), sizeof(netblk));
  408     rrl_item_t match = {
  409         .hop = 0,
  410         .netblk = netblk,
  411         .ntok = t->rate * RRL_CAPACITY,
  412         .cls = buf[0],
  413         .flags = RRL_BF_NULL,
  414         .qname = SipHash24(&t->key, qname, knot_dname_size(qname)),
  415         .time = stamp
  416     };
  417 
  418     unsigned d = find_match(t, id, &match);
  419     if (d > HOP_LEN) { /* not an exact match, find free element [f] */
  420         d = find_free(t, id, stamp);
  421     }
  422 
  423     /* Reduce distance to fit <id, id + HOP_LEN) */
  424     unsigned f = (id + d) % t->size;
  425     while (d >= HOP_LEN) {
  426         d = reduce_dist(t, id, d, &f);
  427     }
  428 
  429     /* Assign granular lock and unlock lookup. */
  430     *lock = f % t->lk_count;
  431     rrl_lock(t, *lock);
  432     pthread_mutex_unlock(&t->ll);
  433 
  434     /* found free elm 'k' which is in <id, id + HOP_LEN) */
  435     t->arr[id].hop |= (1 << d);
  436     rrl_item_t *b = t->arr + f;
  437     assert(f == (id+d) % t->size);
  438 
  439     /* Inspect bucket state. */
  440     unsigned hop = b->hop;
  441     if (b->cls == CLS_NULL) {
  442         memcpy(b, &match, sizeof(rrl_item_t));
  443         b->hop = hop;
  444     }
  445     /* Check for collisions. */
  446     if (!bucket_match(b, &match)) {
  447         if (!(b->flags & RRL_BF_SSTART)) {
  448             memcpy(b, &match, sizeof(rrl_item_t));
  449             b->hop = hop;
  450             b->ntok = t->rate + t->rate / RRL_SSTART;
  451             b->flags |= RRL_BF_SSTART;
  452         }
  453     }
  454 
  455     return b;
  456 }
  457 
  458 int rrl_query(rrl_table_t *rrl, const struct sockaddr_storage *a, rrl_req_t *req,
  459               const knot_dname_t *zone, knotd_mod_t *mod)
  460 {
  461     if (!rrl || !req || !a) {
  462         return KNOT_EINVAL;
  463     }
  464 
  465     /* Calculate hash and fetch */
  466     int ret = KNOT_EOK;
  467     int lock = -1;
  468     uint32_t now = time_now().tv_sec;
  469     rrl_item_t *b = rrl_hash(rrl, a, req, zone, now, &lock);
  470     if (!b) {
  471         if (lock > -1) {
  472             rrl_unlock(rrl, lock);
  473         }
  474         return KNOT_ERROR;
  475     }
  476 
  477     /* Calculate rate for dT */
  478     uint32_t dt = now - b->time;
  479     if (dt > RRL_CAPACITY) {
  480         dt = RRL_CAPACITY;
  481     }
  482     /* Visit bucket. */
  483     b->time = now;
  484     if (dt > 0) { /* Window moved. */
  485 
  486         /* Check state change. */
  487         if ((b->ntok > 0 || dt > 1) && (b->flags & RRL_BF_ELIMIT)) {
  488             b->flags &= ~RRL_BF_ELIMIT;
  489             rrl_log_state(mod, a, b->flags, b->cls);
  490         }
  491 
  492         /* Add new tokens. */
  493         uint32_t dn = rrl->rate * dt;
  494         if (b->flags & RRL_BF_SSTART) { /* Bucket in slow-start. */
  495             b->flags &= ~RRL_BF_SSTART;
  496         }
  497         b->ntok += dn;
  498         if (b->ntok > RRL_CAPACITY * rrl->rate) {
  499             b->ntok = RRL_CAPACITY * rrl->rate;
  500         }
  501     }
  502 
  503     /* Last item taken. */
  504     if (b->ntok == 1 && !(b->flags & RRL_BF_ELIMIT)) {
  505         b->flags |= RRL_BF_ELIMIT;
  506         rrl_log_state(mod, a, b->flags, b->cls);
  507     }
  508 
  509     /* Decay current bucket. */
  510     if (b->ntok > 0) {
  511         --b->ntok;
  512     } else if (b->ntok == 0) {
  513         ret = KNOT_ELIMIT;
  514     }
  515 
  516     if (lock > -1) {
  517         rrl_unlock(rrl, lock);
  518     }
  519     return ret;
  520 }
  521 
  522 bool rrl_slip_roll(int n_slip)
  523 {
  524     /* Now n_slip means every Nth answer slips.
  525      * That represents a chance of 1/N that answer slips.
  526      * Therefore, on average, from 100 answers 100/N will slip. */
  527     int threshold = RRL_SLIP_MAX / n_slip;
  528     int roll = dnssec_random_uint16_t() % RRL_SLIP_MAX;
  529     return (roll < threshold);
  530 }
  531 
  532 void rrl_destroy(rrl_table_t *rrl)
  533 {
  534     if (rrl) {
  535         if (rrl->lk_count > 0) {
  536             pthread_mutex_destroy(&rrl->ll);
  537         }
  538         for (size_t i = 0; i < rrl->lk_count; ++i) {
  539             pthread_mutex_destroy(rrl->lk + i);
  540         }
  541         free(rrl->lk);
  542     }
  543 
  544     free(rrl);
  545 }