"Fossies" - the Fresh Open Source Software Archive

Member "bind-9.17.5/lib/isc/utf8.c" (4 Sep 2020, 2176 Bytes) of package /linux/misc/dns/bind9/9.17.5/bind-9.17.5.tar.xz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "utf8.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
    3  *
    4  * This Source Code Form is subject to the terms of the Mozilla Public
    5  * License, v. 2.0. If a copy of the MPL was not distributed with this
    6  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
    7  *
    8  * See the COPYRIGHT file distributed with this work for additional
    9  * information regarding copyright ownership.
   10  */
   11 
   12 #include <string.h>
   13 
   14 #include <isc/utf8.h>
   15 #include <isc/util.h>
   16 
   17 /*
   18  * UTF-8 is defined in "The Unicode Standard -- Version 4.0"
   19  * Also see RFC 3629.
   20  *
   21  * Char. number range  |        UTF-8 octet sequence
   22  *    (hexadecimal)    |              (binary)
   23  *  --------------------+---------------------------------------------
   24  * 0000 0000-0000 007F | 0xxxxxxx
   25  * 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
   26  * 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
   27  * 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
   28  */
   29 bool
   30 isc_utf8_valid(const unsigned char *buf, size_t len) {
   31     REQUIRE(buf != NULL);
   32 
   33     for (size_t i = 0; i < len; i++) {
   34         if (buf[i] <= 0x7f) {
   35             continue;
   36         }
   37         if ((i + 1) < len && (buf[i] & 0xe0) == 0xc0 &&
   38             (buf[i + 1] & 0xc0) == 0x80) {
   39             unsigned int w;
   40             w = (buf[i] & 0x1f) << 6;
   41             w |= (buf[++i] & 0x3f);
   42             if (w < 0x80) {
   43                 return (false);
   44             }
   45             continue;
   46         }
   47         if ((i + 2) < len && (buf[i] & 0xf0) == 0xe0 &&
   48             (buf[i + 1] & 0xc0) == 0x80 && (buf[i + 2] & 0xc0) == 0x80)
   49         {
   50             unsigned int w;
   51             w = (buf[i] & 0x0f) << 12;
   52             w |= (buf[++i] & 0x3f) << 6;
   53             w |= (buf[++i] & 0x3f);
   54             if (w < 0x0800) {
   55                 return (false);
   56             }
   57             continue;
   58         }
   59         if ((i + 3) < len && (buf[i] & 0xf8) == 0xf0 &&
   60             (buf[i + 1] & 0xc0) == 0x80 &&
   61             (buf[i + 2] & 0xc0) == 0x80 && (buf[i + 3] & 0xc0) == 0x80)
   62         {
   63             unsigned int w;
   64             w = (buf[i] & 0x07) << 18;
   65             w |= (buf[++i] & 0x3f) << 12;
   66             w |= (buf[++i] & 0x3f) << 6;
   67             w |= (buf[++i] & 0x3f);
   68             if (w < 0x10000 || w > 0x10FFFF) {
   69                 return (false);
   70             }
   71             continue;
   72         }
   73         return (false);
   74     }
   75     return (true);
   76 }
   77 
   78 bool
   79 isc_utf8_bom(const unsigned char *buf, size_t len) {
   80     REQUIRE(buf != NULL);
   81 
   82     if (len >= 3U && !memcmp(buf, "\xef\xbb\xbf", 3)) {
   83         return (true);
   84     }
   85     return (false);
   86 }