"Fossies" - the Fresh Open Source Software Archive

Member "file-5.35/src/is_json.c" (15 Oct 2018, 9402 Bytes) of package /linux/misc/file-5.35.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "is_json.c" see the Fossies "Dox" file reference documentation.

    1 /*-
    2  * Copyright (c) 2018 Christos Zoulas
    3  * All rights reserved.
    4  *
    5  * Redistribution and use in source and binary forms, with or without
    6  * modification, are permitted provided that the following conditions
    7  * are met:
    8  * 1. Redistributions of source code must retain the above copyright
    9  *    notice, this list of conditions and the following disclaimer.
   10  * 2. Redistributions in binary form must reproduce the above copyright
   11  *    notice, this list of conditions and the following disclaimer in the
   12  *    documentation and/or other materials provided with the distribution.
   13  *
   14  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   15  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   16  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   17  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   18  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   19  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   20  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   21  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   22  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   23  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   24  * POSSIBILITY OF SUCH DAMAGE.
   25  */
   26 
   27 /*
   28  * Parse JSON object serialization format (RFC-7159)
   29  */
   30 
   31 #ifndef TEST
   32 #include "file.h"
   33 
   34 #ifndef lint
   35 FILE_RCSID("@(#)$File: is_json.c,v 1.11 2018/10/15 16:29:16 christos Exp $")
   36 #endif
   37 
   38 #include <string.h>
   39 #include "magic.h"
   40 #endif
   41 
   42 #ifdef DEBUG
   43 #include <stdio.h>
   44 #define DPRINTF(a, b, c)    \
   45     printf("%s [%.2x/%c] %.20s\n", (a), *(b), *(b), (const char *)(c))
   46 #else
   47 #define DPRINTF(a, b, c)    (void)0
   48 #endif
   49 
   50 #define JSON_ARRAY  0
   51 #define JSON_CONSTANT   1
   52 #define JSON_NUMBER 2
   53 #define JSON_OBJECT 3
   54 #define JSON_STRING 4
   55 #define JSON_MAX    5
   56 
   57 /*
   58  * if JSON_COUNT != 0:
   59  *  count all the objects, require that we have the whole data file
   60  * otherwise:
   61  *  stop if we find an object or an array
   62  */
   63 #ifndef JSON_COUNT
   64 #define JSON_COUNT 0
   65 #endif
   66 
   67 static int json_parse(const unsigned char **, const unsigned char *, size_t *,
   68     size_t);
   69 
   70 static int
   71 json_isspace(const unsigned char uc)
   72 {
   73     switch (uc) {
   74     case ' ':
   75     case '\n':
   76     case '\r':
   77     case '\t':
   78         return 1;
   79     default:
   80         return 0;
   81     }
   82 }
   83 
   84 static int
   85 json_isdigit(unsigned char uc)
   86 {
   87     switch (uc) {
   88     case '0': case '1': case '2': case '3': case '4':
   89     case '5': case '6': case '7': case '8': case '9':
   90         return 1;
   91     default:
   92         return 0;
   93     }
   94 }
   95 
   96 static int
   97 json_isxdigit(unsigned char uc)
   98 {
   99     if (json_isdigit(uc))
  100         return 1;
  101     switch (uc) {
  102     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
  103     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
  104         return 1;
  105     default:
  106         return 0;
  107     }
  108 }
  109 
  110 static const unsigned char *
  111 json_skip_space(const unsigned char *uc, const unsigned char *ue)
  112 {
  113     while (uc < ue && json_isspace(*uc))
  114         uc++;
  115     return uc;
  116 }
  117 
  118 static int
  119 json_parse_string(const unsigned char **ucp, const unsigned char *ue)
  120 {
  121     const unsigned char *uc = *ucp;
  122     size_t i;
  123 
  124     DPRINTF("Parse string: ", uc, *ucp);
  125     while (uc < ue) {
  126         switch (*uc++) {
  127         case '\0':
  128             goto out;
  129         case '\\':
  130             if (uc == ue)
  131                 goto out;
  132             switch (*uc++) {
  133             case '\0':
  134                 goto out;
  135             case '"':
  136             case '\\':
  137             case '/':
  138             case 'b':
  139             case 'f':
  140             case 'n':
  141             case 'r':
  142             case 't':
  143                 continue;
  144             case 'u':
  145                 if (ue - uc < 4) {
  146                     uc = ue;
  147                     goto out;
  148                 }
  149                 for (i = 0; i < 4; i++)
  150                     if (!json_isxdigit(*uc++))
  151                         goto out;
  152                 continue;
  153             default:
  154                 goto out;
  155             }
  156         case '"':
  157             *ucp = uc;
  158             return 1;
  159         default:
  160             continue;
  161         }
  162     }
  163 out:
  164     DPRINTF("Bad string: ", uc, *ucp);
  165     *ucp = uc;
  166     return 0;
  167 }
  168 
  169 static int
  170 json_parse_array(const unsigned char **ucp, const unsigned char *ue,
  171     size_t *st, size_t lvl)
  172 {
  173     const unsigned char *uc = *ucp;
  174 
  175     DPRINTF("Parse array: ", uc, *ucp);
  176     while (uc < ue) {
  177         if (!json_parse(&uc, ue, st, lvl + 1))
  178             goto out;
  179         if (uc == ue)
  180             goto out;
  181         switch (*uc) {
  182         case ',':
  183             uc++;
  184             continue;
  185         case ']':
  186             *ucp = uc + 1;
  187             return 1;
  188         default:
  189             goto out;
  190         }
  191     }
  192 out:
  193     DPRINTF("Bad array: ", uc,  *ucp);
  194     *ucp = uc;
  195     return 0;
  196 }
  197 
  198 static int
  199 json_parse_object(const unsigned char **ucp, const unsigned char *ue,
  200     size_t *st, size_t lvl)
  201 {
  202     const unsigned char *uc = *ucp;
  203     DPRINTF("Parse object: ", uc, *ucp);
  204     while (uc < ue) {
  205         uc = json_skip_space(uc, ue);
  206         if (uc == ue)
  207             goto out;
  208         if (*uc++ != '"') {
  209             DPRINTF("not string", uc, *ucp);
  210             goto out;
  211         }
  212         DPRINTF("next field", uc, *ucp);
  213         if (!json_parse_string(&uc, ue)) {
  214             DPRINTF("not string", uc, *ucp);
  215             goto out;
  216         }
  217         uc = json_skip_space(uc, ue);
  218         if (uc == ue)
  219             goto out;
  220         if (*uc++ != ':') {
  221             DPRINTF("not colon", uc, *ucp);
  222             goto out;
  223         }
  224         if (!json_parse(&uc, ue, st, lvl + 1)) {
  225             DPRINTF("not json", uc, *ucp);
  226             goto out;
  227         }
  228         if (uc == ue)
  229             goto out;
  230         switch (*uc++) {
  231         case ',':
  232             continue;
  233         case '}': /* { */
  234             *ucp = uc;
  235             DPRINTF("Good object: ", uc, *ucp);
  236             return 1;
  237         default:
  238             *ucp = uc - 1;
  239             DPRINTF("not more", uc, *ucp);
  240             goto out;
  241         }
  242     }
  243 out:
  244     DPRINTF("Bad object: ", uc, *ucp);
  245     *ucp = uc;
  246     return 0;
  247 }
  248 
  249 static int
  250 json_parse_number(const unsigned char **ucp, const unsigned char *ue)
  251 {
  252     const unsigned char *uc = *ucp;
  253     int got = 0;
  254 
  255     DPRINTF("Parse number: ", uc, *ucp);
  256     if (uc == ue)
  257         return 0;
  258     if (*uc == '-')
  259         uc++;
  260 
  261     for (; uc < ue; uc++) {
  262         if (!json_isdigit(*uc))
  263             break;
  264         got = 1;
  265     }
  266     if (uc == ue)
  267         goto out;
  268     if (*uc == '.')
  269         uc++;
  270     for (; uc < ue; uc++) {
  271         if (!json_isdigit(*uc))
  272             break;
  273         got = 1;
  274     }
  275     if (uc == ue)
  276         goto out;
  277     if (got && (*uc == 'e' || *uc == 'E')) {
  278         uc++;
  279         got = 0;
  280         if (uc == ue)
  281             goto out;
  282         if (*uc == '+' || *uc == '-')
  283             uc++;
  284         for (; uc < ue; uc++) {
  285             if (!json_isdigit(*uc))
  286                 break;
  287             got = 1;
  288         }
  289     }
  290 out:
  291     if (!got)
  292         DPRINTF("Bad number: ", uc, *ucp);
  293     else
  294         DPRINTF("Good number: ", uc, *ucp);
  295     *ucp = uc;
  296     return got;
  297 }
  298 
  299 static int
  300 json_parse_const(const unsigned char **ucp, const unsigned char *ue,
  301     const char *str, size_t len)
  302 {
  303     const unsigned char *uc = *ucp;
  304 
  305     DPRINTF("Parse const: ", uc, *ucp);
  306     for (len--; uc < ue && --len;) {
  307         if (*uc++ == *++str)
  308             continue;
  309     }
  310     if (len)
  311         DPRINTF("Bad const: ", uc, *ucp);
  312     *ucp = uc;
  313     return len == 0;
  314 }
  315 
  316 static int
  317 json_parse(const unsigned char **ucp, const unsigned char *ue,
  318     size_t *st, size_t lvl)
  319 {
  320     const unsigned char *uc;
  321     int rv = 0;
  322     int t;
  323 
  324     uc = json_skip_space(*ucp, ue);
  325     if (uc == ue)
  326         goto out;
  327 
  328     // Avoid recursion
  329     if (lvl > 20)
  330         return 0;
  331 #if JSON_COUNT
  332     /* bail quickly if not counting */
  333     if (lvl > 1 && (st[JSON_OBJECT] || st[JSON_ARRAY]))
  334         return 1;
  335 #endif
  336 
  337     DPRINTF("Parse general: ", uc, *ucp);
  338     switch (*uc++) {
  339     case '"':
  340         rv = json_parse_string(&uc, ue);
  341         t = JSON_STRING;
  342         break;
  343     case '[':
  344         rv = json_parse_array(&uc, ue, st, lvl + 1);
  345         t = JSON_ARRAY;
  346         break;
  347     case '{': /* '}' */
  348         rv = json_parse_object(&uc, ue, st, lvl + 1);
  349         t = JSON_OBJECT;
  350         break;
  351     case 't':
  352         rv = json_parse_const(&uc, ue, "true", sizeof("true"));
  353         t = JSON_CONSTANT;
  354         break;
  355     case 'f':
  356         rv = json_parse_const(&uc, ue, "false", sizeof("false"));
  357         t = JSON_CONSTANT;
  358         break;
  359     case 'n':
  360         rv = json_parse_const(&uc, ue, "null", sizeof("null"));
  361         t = JSON_CONSTANT;
  362         break;
  363     default:
  364         --uc;
  365         rv = json_parse_number(&uc, ue);
  366         t = JSON_NUMBER;
  367         break;
  368     }
  369     if (rv)
  370         st[t]++;
  371     uc = json_skip_space(uc, ue);
  372 out:
  373     *ucp = uc;
  374     DPRINTF("End general: ", uc, *ucp);
  375     if (lvl == 0)
  376         return rv && (st[JSON_ARRAY] || st[JSON_OBJECT]);
  377     return rv;
  378 }
  379 
  380 #ifndef TEST
  381 int
  382 file_is_json(struct magic_set *ms, const struct buffer *b)
  383 {
  384     const unsigned char *uc = CAST(const unsigned char *, b->fbuf);
  385     const unsigned char *ue = uc + b->flen;
  386     size_t st[JSON_MAX];
  387     int mime = ms->flags & MAGIC_MIME;
  388 
  389 
  390     if ((ms->flags & (MAGIC_APPLE|MAGIC_EXTENSION)) != 0)
  391         return 0;
  392 
  393     memset(st, 0, sizeof(st));
  394 
  395     if (!json_parse(&uc, ue, st, 0))
  396         return 0;
  397 
  398     if (mime == MAGIC_MIME_ENCODING)
  399         return 1;
  400     if (mime) {
  401         if (file_printf(ms, "application/json") == -1)
  402             return -1;
  403         return 1;
  404     }
  405     if (file_printf(ms, "JSON data") == -1)
  406         return -1;
  407 #if JSON_COUNT
  408 #define P(n) st[n], st[n] > 1 ? "s" : ""
  409     if (file_printf(ms, " (%" SIZE_T_FORMAT "u object%s, %" SIZE_T_FORMAT
  410         "u array%s, %" SIZE_T_FORMAT "u string%s, %" SIZE_T_FORMAT
  411         "u constant%s, %" SIZE_T_FORMAT "u number%s)", P(JSON_OBJECT),
  412         P(JSON_ARRAY), P(JSON_STRING), P(JSON_CONSTANT), P(JSON_NUMBER))
  413         == -1)
  414         return -1;
  415 #endif
  416     return 1;
  417 }
  418 
  419 #else
  420 
  421 #include <sys/types.h>
  422 #include <sys/stat.h>
  423 #include <stdio.h>
  424 #include <fcntl.h>
  425 #include <unistd.h>
  426 #include <stdlib.h>
  427 #include <stdint.h>
  428 #include <err.h>
  429 
  430 int
  431 main(int argc, char *argv[])
  432 {
  433     int fd, rv;
  434     struct stat st;
  435     unsigned char *p;
  436     size_t stats[JSON_MAX];
  437 
  438     if ((fd = open(argv[1], O_RDONLY)) == -1)
  439         err(EXIT_FAILURE, "Can't open `%s'", argv[1]);
  440 
  441     if (fstat(fd, &st) == -1)
  442         err(EXIT_FAILURE, "Can't stat `%s'", argv[1]);
  443 
  444     if ((p = malloc(st.st_size)) == NULL)
  445         err(EXIT_FAILURE, "Can't allocate %jd bytes",
  446             (intmax_t)st.st_size);
  447     if (read(fd, p, st.st_size) != st.st_size)
  448         err(EXIT_FAILURE, "Can't read %jd bytes",
  449             (intmax_t)st.st_size);
  450     memset(stats, 0, sizeof(stats));
  451     printf("is json %d\n", json_parse((const unsigned char **)&p,
  452         p + st.st_size, stats, 0));
  453     return 0;
  454 }
  455 #endif