"Fossies" - the Fresh Open Source Software Archive

Member "file-5.35/src/apprentice.c" (10 Oct 2018, 74021 Bytes) of package /linux/misc/file-5.35.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "apprentice.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 5.34_vs_5.35.

    1 /*
    2  * Copyright (c) Ian F. Darwin 1986-1995.
    3  * Software written by Ian F. Darwin and others;
    4  * maintained 1995-present by Christos Zoulas and others.
    5  *
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice immediately at the beginning of the file, without modification,
   11  *    this list of conditions, and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
   20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 /*
   29  * apprentice - make one pass through /etc/magic, learning its secrets.
   30  */
   31 
   32 #include "file.h"
   33 
   34 #ifndef lint
   35 FILE_RCSID("@(#)$File: apprentice.c,v 1.281 2018/10/10 17:41:10 christos Exp $")
   36 #endif  /* lint */
   37 
   38 #include "magic.h"
   39 #include <stdlib.h>
   40 #ifdef HAVE_UNISTD_H
   41 #include <unistd.h>
   42 #endif
   43 #include <stddef.h>
   44 #include <string.h>
   45 #include <assert.h>
   46 #include <ctype.h>
   47 #include <fcntl.h>
   48 #ifdef QUICK
   49 #include <sys/mman.h>
   50 #endif
   51 #include <dirent.h>
   52 #include <limits.h>
   53 
   54 
   55 #define EATAB {while (isascii((unsigned char) *l) && \
   56               isspace((unsigned char) *l))  ++l;}
   57 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
   58             tolower((unsigned char) (l)) : (l))
   59 /*
   60  * Work around a bug in headers on Digital Unix.
   61  * At least confirmed for: OSF1 V4.0 878
   62  */
   63 #if defined(__osf__) && defined(__DECC)
   64 #ifdef MAP_FAILED
   65 #undef MAP_FAILED
   66 #endif
   67 #endif
   68 
   69 #ifndef MAP_FAILED
   70 #define MAP_FAILED (void *) -1
   71 #endif
   72 
   73 #ifndef MAP_FILE
   74 #define MAP_FILE 0
   75 #endif
   76 
   77 #define ALLOC_CHUNK (size_t)10
   78 #define ALLOC_INCR  (size_t)200
   79 
   80 #define MAP_TYPE_USER   0
   81 #define MAP_TYPE_MALLOC 1
   82 #define MAP_TYPE_MMAP   2
   83 
   84 struct magic_entry {
   85     struct magic *mp;
   86     uint32_t cont_count;
   87     uint32_t max_count;
   88 };
   89 
   90 struct magic_entry_set {
   91     struct magic_entry *me;
   92     uint32_t count;
   93     uint32_t max;
   94 };
   95 
   96 struct magic_map {
   97     void *p;
   98     size_t len;
   99     int type;
  100     struct magic *magic[MAGIC_SETS];
  101     uint32_t nmagic[MAGIC_SETS];
  102 };
  103 
  104 int file_formats[FILE_NAMES_SIZE];
  105 const size_t file_nformats = FILE_NAMES_SIZE;
  106 const char *file_names[FILE_NAMES_SIZE];
  107 const size_t file_nnames = FILE_NAMES_SIZE;
  108 
  109 private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
  110 private int hextoint(int);
  111 private const char *getstr(struct magic_set *, struct magic *, const char *,
  112     int);
  113 private int parse(struct magic_set *, struct magic_entry *, const char *,
  114     size_t, int);
  115 private void eatsize(const char **);
  116 private int apprentice_1(struct magic_set *, const char *, int);
  117 private size_t apprentice_magic_strength(const struct magic *);
  118 private int apprentice_sort(const void *, const void *);
  119 private void apprentice_list(struct mlist *, int );
  120 private struct magic_map *apprentice_load(struct magic_set *,
  121     const char *, int);
  122 private struct mlist *mlist_alloc(void);
  123 private void mlist_free(struct mlist *);
  124 private void byteswap(struct magic *, uint32_t);
  125 private void bs1(struct magic *);
  126 private uint16_t swap2(uint16_t);
  127 private uint32_t swap4(uint32_t);
  128 private uint64_t swap8(uint64_t);
  129 private char *mkdbname(struct magic_set *, const char *, int);
  130 private struct magic_map *apprentice_buf(struct magic_set *, struct magic *,
  131     size_t);
  132 private struct magic_map *apprentice_map(struct magic_set *, const char *);
  133 private int check_buffer(struct magic_set *, struct magic_map *, const char *);
  134 private void apprentice_unmap(struct magic_map *);
  135 private int apprentice_compile(struct magic_set *, struct magic_map *,
  136     const char *);
  137 private int check_format_type(const char *, int, const char **);
  138 private int check_format(struct magic_set *, struct magic *);
  139 private int get_op(char);
  140 private int parse_mime(struct magic_set *, struct magic_entry *, const char *);
  141 private int parse_strength(struct magic_set *, struct magic_entry *, const char *);
  142 private int parse_apple(struct magic_set *, struct magic_entry *, const char *);
  143 private int parse_ext(struct magic_set *, struct magic_entry *, const char *);
  144 
  145 
  146 private size_t magicsize = sizeof(struct magic);
  147 
  148 private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
  149 
  150 private struct {
  151     const char *name;
  152     size_t len;
  153     int (*fun)(struct magic_set *, struct magic_entry *, const char *);
  154 } bang[] = {
  155 #define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
  156     DECLARE_FIELD(mime),
  157     DECLARE_FIELD(apple),
  158     DECLARE_FIELD(ext),
  159     DECLARE_FIELD(strength),
  160 #undef  DECLARE_FIELD
  161     { NULL, 0, NULL }
  162 };
  163 
  164 #ifdef COMPILE_ONLY
  165 
  166 int main(int, char *[]);
  167 
  168 int
  169 main(int argc, char *argv[])
  170 {
  171     int ret;
  172     struct magic_set *ms;
  173     char *progname;
  174 
  175     if ((progname = strrchr(argv[0], '/')) != NULL)
  176         progname++;
  177     else
  178         progname = argv[0];
  179 
  180     if (argc != 2) {
  181         (void)fprintf(stderr, "Usage: %s file\n", progname);
  182         return 1;
  183     }
  184 
  185     if ((ms = magic_open(MAGIC_CHECK)) == NULL) {
  186         (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
  187         return 1;
  188     }
  189     ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0;
  190     if (ret == 1)
  191         (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms));
  192     magic_close(ms);
  193     return ret;
  194 }
  195 #endif /* COMPILE_ONLY */
  196 
  197 struct type_tbl_s {
  198     const char name[16];
  199     const size_t len;
  200     const int type;
  201     const int format;
  202 };
  203 
  204 /*
  205  * XXX - the actual Single UNIX Specification says that "long" means "long",
  206  * as in the C data type, but we treat it as meaning "4-byte integer".
  207  * Given that the OS X version of file 5.04 did the same, I guess that passes
  208  * the actual test; having "long" be dependent on how big a "long" is on
  209  * the machine running "file" is silly.
  210  */
  211 static const struct type_tbl_s type_tbl[] = {
  212 # define XX(s)      s, (sizeof(s) - 1)
  213 # define XX_NULL    "", 0
  214     { XX("invalid"),    FILE_INVALID,       FILE_FMT_NONE },
  215     { XX("byte"),       FILE_BYTE,      FILE_FMT_NUM },
  216     { XX("short"),      FILE_SHORT,     FILE_FMT_NUM },
  217     { XX("default"),    FILE_DEFAULT,       FILE_FMT_NONE },
  218     { XX("long"),       FILE_LONG,      FILE_FMT_NUM },
  219     { XX("string"),     FILE_STRING,        FILE_FMT_STR },
  220     { XX("date"),       FILE_DATE,      FILE_FMT_STR },
  221     { XX("beshort"),    FILE_BESHORT,       FILE_FMT_NUM },
  222     { XX("belong"),     FILE_BELONG,        FILE_FMT_NUM },
  223     { XX("bedate"),     FILE_BEDATE,        FILE_FMT_STR },
  224     { XX("leshort"),    FILE_LESHORT,       FILE_FMT_NUM },
  225     { XX("lelong"),     FILE_LELONG,        FILE_FMT_NUM },
  226     { XX("ledate"),     FILE_LEDATE,        FILE_FMT_STR },
  227     { XX("pstring"),    FILE_PSTRING,       FILE_FMT_STR },
  228     { XX("ldate"),      FILE_LDATE,     FILE_FMT_STR },
  229     { XX("beldate"),    FILE_BELDATE,       FILE_FMT_STR },
  230     { XX("leldate"),    FILE_LELDATE,       FILE_FMT_STR },
  231     { XX("regex"),      FILE_REGEX,     FILE_FMT_STR },
  232     { XX("bestring16"), FILE_BESTRING16,    FILE_FMT_STR },
  233     { XX("lestring16"), FILE_LESTRING16,    FILE_FMT_STR },
  234     { XX("search"),     FILE_SEARCH,        FILE_FMT_STR },
  235     { XX("medate"),     FILE_MEDATE,        FILE_FMT_STR },
  236     { XX("meldate"),    FILE_MELDATE,       FILE_FMT_STR },
  237     { XX("melong"),     FILE_MELONG,        FILE_FMT_NUM },
  238     { XX("quad"),       FILE_QUAD,      FILE_FMT_QUAD },
  239     { XX("lequad"),     FILE_LEQUAD,        FILE_FMT_QUAD },
  240     { XX("bequad"),     FILE_BEQUAD,        FILE_FMT_QUAD },
  241     { XX("qdate"),      FILE_QDATE,     FILE_FMT_STR },
  242     { XX("leqdate"),    FILE_LEQDATE,       FILE_FMT_STR },
  243     { XX("beqdate"),    FILE_BEQDATE,       FILE_FMT_STR },
  244     { XX("qldate"),     FILE_QLDATE,        FILE_FMT_STR },
  245     { XX("leqldate"),   FILE_LEQLDATE,      FILE_FMT_STR },
  246     { XX("beqldate"),   FILE_BEQLDATE,      FILE_FMT_STR },
  247     { XX("float"),      FILE_FLOAT,     FILE_FMT_FLOAT },
  248     { XX("befloat"),    FILE_BEFLOAT,       FILE_FMT_FLOAT },
  249     { XX("lefloat"),    FILE_LEFLOAT,       FILE_FMT_FLOAT },
  250     { XX("double"),     FILE_DOUBLE,        FILE_FMT_DOUBLE },
  251     { XX("bedouble"),   FILE_BEDOUBLE,      FILE_FMT_DOUBLE },
  252     { XX("ledouble"),   FILE_LEDOUBLE,      FILE_FMT_DOUBLE },
  253     { XX("leid3"),      FILE_LEID3,     FILE_FMT_NUM },
  254     { XX("beid3"),      FILE_BEID3,     FILE_FMT_NUM },
  255     { XX("indirect"),   FILE_INDIRECT,      FILE_FMT_NUM },
  256     { XX("qwdate"),     FILE_QWDATE,        FILE_FMT_STR },
  257     { XX("leqwdate"),   FILE_LEQWDATE,      FILE_FMT_STR },
  258     { XX("beqwdate"),   FILE_BEQWDATE,      FILE_FMT_STR },
  259     { XX("name"),       FILE_NAME,      FILE_FMT_NONE },
  260     { XX("use"),        FILE_USE,       FILE_FMT_NONE },
  261     { XX("clear"),      FILE_CLEAR,     FILE_FMT_NONE },
  262     { XX("der"),        FILE_DER,       FILE_FMT_STR },
  263     { XX_NULL,      FILE_INVALID,       FILE_FMT_NONE },
  264 };
  265 
  266 /*
  267  * These are not types, and cannot be preceded by "u" to make them
  268  * unsigned.
  269  */
  270 static const struct type_tbl_s special_tbl[] = {
  271     { XX("der"),        FILE_DER,       FILE_FMT_STR },
  272     { XX("name"),       FILE_NAME,      FILE_FMT_STR },
  273     { XX("use"),        FILE_USE,       FILE_FMT_STR },
  274     { XX_NULL,      FILE_INVALID,       FILE_FMT_NONE },
  275 };
  276 # undef XX
  277 # undef XX_NULL
  278 
  279 private int
  280 get_type(const struct type_tbl_s *tbl, const char *l, const char **t)
  281 {
  282     const struct type_tbl_s *p;
  283 
  284     for (p = tbl; p->len; p++) {
  285         if (strncmp(l, p->name, p->len) == 0) {
  286             if (t)
  287                 *t = l + p->len;
  288             break;
  289         }
  290     }
  291     return p->type;
  292 }
  293 
  294 private off_t
  295 maxoff_t(void) {
  296     if (sizeof(off_t) == sizeof(int))
  297         return CAST(off_t, INT_MAX);
  298     if (sizeof(off_t) == sizeof(long))
  299         return CAST(off_t, LONG_MAX);
  300     return 0x7fffffff;
  301 }
  302 
  303 private int
  304 get_standard_integer_type(const char *l, const char **t)
  305 {
  306     int type;
  307 
  308     if (isalpha((unsigned char)l[1])) {
  309         switch (l[1]) {
  310         case 'C':
  311             /* "dC" and "uC" */
  312             type = FILE_BYTE;
  313             break;
  314         case 'S':
  315             /* "dS" and "uS" */
  316             type = FILE_SHORT;
  317             break;
  318         case 'I':
  319         case 'L':
  320             /*
  321              * "dI", "dL", "uI", and "uL".
  322              *
  323              * XXX - the actual Single UNIX Specification says
  324              * that "L" means "long", as in the C data type,
  325              * but we treat it as meaning "4-byte integer".
  326              * Given that the OS X version of file 5.04 did
  327              * the same, I guess that passes the actual SUS
  328              * validation suite; having "dL" be dependent on
  329              * how big a "long" is on the machine running
  330              * "file" is silly.
  331              */
  332             type = FILE_LONG;
  333             break;
  334         case 'Q':
  335             /* "dQ" and "uQ" */
  336             type = FILE_QUAD;
  337             break;
  338         default:
  339             /* "d{anything else}", "u{anything else}" */
  340             return FILE_INVALID;
  341         }
  342         l += 2;
  343     } else if (isdigit((unsigned char)l[1])) {
  344         /*
  345          * "d{num}" and "u{num}"; we only support {num} values
  346          * of 1, 2, 4, and 8 - the Single UNIX Specification
  347          * doesn't say anything about whether arbitrary
  348          * values should be supported, but both the Solaris 10
  349          * and OS X Mountain Lion versions of file passed the
  350          * Single UNIX Specification validation suite, and
  351          * neither of them support values bigger than 8 or
  352          * non-power-of-2 values.
  353          */
  354         if (isdigit((unsigned char)l[2])) {
  355             /* Multi-digit, so > 9 */
  356             return FILE_INVALID;
  357         }
  358         switch (l[1]) {
  359         case '1':
  360             type = FILE_BYTE;
  361             break;
  362         case '2':
  363             type = FILE_SHORT;
  364             break;
  365         case '4':
  366             type = FILE_LONG;
  367             break;
  368         case '8':
  369             type = FILE_QUAD;
  370             break;
  371         default:
  372             /* XXX - what about 3, 5, 6, or 7? */
  373             return FILE_INVALID;
  374         }
  375         l += 2;
  376     } else {
  377         /*
  378          * "d" or "u" by itself.
  379          */
  380         type = FILE_LONG;
  381         ++l;
  382     }
  383     if (t)
  384         *t = l;
  385     return type;
  386 }
  387 
  388 private void
  389 init_file_tables(void)
  390 {
  391     static int done = 0;
  392     const struct type_tbl_s *p;
  393 
  394     if (done)
  395         return;
  396     done++;
  397 
  398     for (p = type_tbl; p->len; p++) {
  399         assert(p->type < FILE_NAMES_SIZE);
  400         file_names[p->type] = p->name;
  401         file_formats[p->type] = p->format;
  402     }
  403     assert(p - type_tbl == FILE_NAMES_SIZE);
  404 }
  405 
  406 private int
  407 add_mlist(struct mlist *mlp, struct magic_map *map, size_t idx)
  408 {
  409     struct mlist *ml;
  410 
  411     mlp->map = NULL;
  412     if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL)
  413         return -1;
  414 
  415     ml->map = idx == 0 ? map : NULL;
  416     ml->magic = map->magic[idx];
  417     ml->nmagic = map->nmagic[idx];
  418 
  419     mlp->prev->next = ml;
  420     ml->prev = mlp->prev;
  421     ml->next = mlp;
  422     mlp->prev = ml;
  423     return 0;
  424 }
  425 
  426 /*
  427  * Handle one file or directory.
  428  */
  429 private int
  430 apprentice_1(struct magic_set *ms, const char *fn, int action)
  431 {
  432     struct magic_map *map;
  433 #ifndef COMPILE_ONLY
  434     struct mlist *ml;
  435     size_t i;
  436 #endif
  437 
  438     if (magicsize != FILE_MAGICSIZE) {
  439         file_error(ms, 0, "magic element size %lu != %lu",
  440             (unsigned long)sizeof(*map->magic[0]),
  441             (unsigned long)FILE_MAGICSIZE);
  442         return -1;
  443     }
  444 
  445     if (action == FILE_COMPILE) {
  446         map = apprentice_load(ms, fn, action);
  447         if (map == NULL)
  448             return -1;
  449         return apprentice_compile(ms, map, fn);
  450     }
  451 
  452 #ifndef COMPILE_ONLY
  453     map = apprentice_map(ms, fn);
  454     if (map == (struct magic_map *)-1)
  455         return -1;
  456     if (map == NULL) {
  457         if (ms->flags & MAGIC_CHECK)
  458             file_magwarn(ms, "using regular magic file `%s'", fn);
  459         map = apprentice_load(ms, fn, action);
  460         if (map == NULL)
  461             return -1;
  462     }
  463 
  464     for (i = 0; i < MAGIC_SETS; i++) {
  465         if (add_mlist(ms->mlist[i], map, i) == -1) {
  466             file_oomem(ms, sizeof(*ml));
  467             return -1;
  468         }
  469     }
  470 
  471     if (action == FILE_LIST) {
  472         for (i = 0; i < MAGIC_SETS; i++) {
  473             printf("Set %" SIZE_T_FORMAT "u:\nBinary patterns:\n",
  474                 i);
  475             apprentice_list(ms->mlist[i], BINTEST);
  476             printf("Text patterns:\n");
  477             apprentice_list(ms->mlist[i], TEXTTEST);
  478         }
  479     }
  480     return 0;
  481 #else
  482     return 0;
  483 #endif /* COMPILE_ONLY */
  484 }
  485 
  486 protected void
  487 file_ms_free(struct magic_set *ms)
  488 {
  489     size_t i;
  490     if (ms == NULL)
  491         return;
  492     for (i = 0; i < MAGIC_SETS; i++)
  493         mlist_free(ms->mlist[i]);
  494     free(ms->o.pbuf);
  495     free(ms->o.buf);
  496     free(ms->c.li);
  497     free(ms);
  498 }
  499 
  500 protected struct magic_set *
  501 file_ms_alloc(int flags)
  502 {
  503     struct magic_set *ms;
  504     size_t i, len;
  505 
  506     if ((ms = CAST(struct magic_set *, calloc((size_t)1,
  507         sizeof(struct magic_set)))) == NULL)
  508         return NULL;
  509 
  510     if (magic_setflags(ms, flags) == -1) {
  511         errno = EINVAL;
  512         goto free;
  513     }
  514 
  515     ms->o.buf = ms->o.pbuf = NULL;
  516     len = (ms->c.len = 10) * sizeof(*ms->c.li);
  517 
  518     if ((ms->c.li = CAST(struct level_info *, malloc(len))) == NULL)
  519         goto free;
  520 
  521     ms->event_flags = 0;
  522     ms->error = -1;
  523     for (i = 0; i < MAGIC_SETS; i++)
  524         ms->mlist[i] = NULL;
  525     ms->file = "unknown";
  526     ms->line = 0;
  527     ms->indir_max = FILE_INDIR_MAX;
  528     ms->name_max = FILE_NAME_MAX;
  529     ms->elf_shnum_max = FILE_ELF_SHNUM_MAX;
  530     ms->elf_phnum_max = FILE_ELF_PHNUM_MAX;
  531     ms->elf_notes_max = FILE_ELF_NOTES_MAX;
  532     ms->regex_max = FILE_REGEX_MAX;
  533     ms->bytes_max = FILE_BYTES_MAX;
  534     return ms;
  535 free:
  536     free(ms);
  537     return NULL;
  538 }
  539 
  540 private void
  541 apprentice_unmap(struct magic_map *map)
  542 {
  543     size_t i;
  544     if (map == NULL)
  545         return;
  546 
  547     switch (map->type) {
  548     case MAP_TYPE_USER:
  549         break;
  550     case MAP_TYPE_MALLOC:
  551         for (i = 0; i < MAGIC_SETS; i++) {
  552             void *b = map->magic[i];
  553             void *p = map->p;
  554             if (CAST(char *, b) >= CAST(char *, p) &&
  555                 CAST(char *, b) <= CAST(char *, p) + map->len)
  556                 continue;
  557             free(map->magic[i]);
  558         }
  559         free(map->p);
  560         break;
  561 #ifdef QUICK
  562     case MAP_TYPE_MMAP:
  563         if (map->p && map->p != MAP_FAILED)
  564             (void)munmap(map->p, map->len);
  565         break;
  566 #endif
  567     default:
  568         abort();
  569     }
  570     free(map);
  571 }
  572 
  573 private struct mlist *
  574 mlist_alloc(void)
  575 {
  576     struct mlist *mlist;
  577     if ((mlist = CAST(struct mlist *, calloc(1, sizeof(*mlist)))) == NULL) {
  578         return NULL;
  579     }
  580     mlist->next = mlist->prev = mlist;
  581     return mlist;
  582 }
  583 
  584 private void
  585 mlist_free_one(struct mlist *ml)
  586 {
  587     if (ml->map)
  588         apprentice_unmap(CAST(struct magic_map *, ml->map));
  589     free(ml);
  590 }
  591 
  592 private void
  593 mlist_free(struct mlist *mlist)
  594 {
  595     struct mlist *ml, *next;
  596 
  597     if (mlist == NULL)
  598         return;
  599 
  600     for (ml = mlist->next; ml != mlist; ml = next) {
  601         next = ml->next;
  602         mlist_free_one(ml);
  603     }
  604     mlist_free_one(mlist);
  605 }
  606 
  607 #ifndef COMPILE_ONLY
  608 /* void **bufs: an array of compiled magic files */
  609 protected int
  610 buffer_apprentice(struct magic_set *ms, struct magic **bufs,
  611     size_t *sizes, size_t nbufs)
  612 {
  613     size_t i, j;
  614     struct mlist *ml;
  615     struct magic_map *map;
  616 
  617     if (nbufs == 0)
  618         return -1;
  619 
  620     (void)file_reset(ms, 0);
  621 
  622     init_file_tables();
  623 
  624     for (i = 0; i < MAGIC_SETS; i++) {
  625         mlist_free(ms->mlist[i]);
  626         if ((ms->mlist[i] = mlist_alloc()) == NULL) {
  627             file_oomem(ms, sizeof(*ms->mlist[i]));
  628             goto fail;
  629         }
  630     }
  631 
  632     for (i = 0; i < nbufs; i++) {
  633         map = apprentice_buf(ms, bufs[i], sizes[i]);
  634         if (map == NULL)
  635             goto fail;
  636 
  637         for (j = 0; j < MAGIC_SETS; j++) {
  638             if (add_mlist(ms->mlist[j], map, j) == -1) {
  639                 file_oomem(ms, sizeof(*ml));
  640                 goto fail;
  641             }
  642         }
  643     }
  644 
  645     return 0;
  646 fail:
  647     for (i = 0; i < MAGIC_SETS; i++) {
  648         mlist_free(ms->mlist[i]);
  649         ms->mlist[i] = NULL;
  650     }
  651     return -1;
  652 }
  653 #endif
  654 
  655 /* const char *fn: list of magic files and directories */
  656 protected int
  657 file_apprentice(struct magic_set *ms, const char *fn, int action)
  658 {
  659     char *p, *mfn;
  660     int fileerr, errs = -1;
  661     size_t i;
  662 
  663     (void)file_reset(ms, 0);
  664 
  665     if ((fn = magic_getpath(fn, action)) == NULL)
  666         return -1;
  667 
  668     init_file_tables();
  669 
  670     if ((mfn = strdup(fn)) == NULL) {
  671         file_oomem(ms, strlen(fn));
  672         return -1;
  673     }
  674 
  675     for (i = 0; i < MAGIC_SETS; i++) {
  676         mlist_free(ms->mlist[i]);
  677         if ((ms->mlist[i] = mlist_alloc()) == NULL) {
  678             file_oomem(ms, sizeof(*ms->mlist[i]));
  679             while (i-- > 0) {
  680                 mlist_free(ms->mlist[i]);
  681                 ms->mlist[i] = NULL;
  682             }
  683             free(mfn);
  684             return -1;
  685         }
  686     }
  687     fn = mfn;
  688 
  689     while (fn) {
  690         p = strchr(fn, PATHSEP);
  691         if (p)
  692             *p++ = '\0';
  693         if (*fn == '\0')
  694             break;
  695         fileerr = apprentice_1(ms, fn, action);
  696         errs = MAX(errs, fileerr);
  697         fn = p;
  698     }
  699 
  700     free(mfn);
  701 
  702     if (errs == -1) {
  703         for (i = 0; i < MAGIC_SETS; i++) {
  704             mlist_free(ms->mlist[i]);
  705             ms->mlist[i] = NULL;
  706         }
  707         file_error(ms, 0, "could not find any valid magic files!");
  708         return -1;
  709     }
  710 
  711 #if 0
  712     /*
  713      * Always leave the database loaded
  714      */
  715     if (action == FILE_LOAD)
  716         return 0;
  717 
  718     for (i = 0; i < MAGIC_SETS; i++) {
  719         mlist_free(ms->mlist[i]);
  720         ms->mlist[i] = NULL;
  721     }
  722 #endif
  723 
  724     switch (action) {
  725     case FILE_LOAD:
  726     case FILE_COMPILE:
  727     case FILE_CHECK:
  728     case FILE_LIST:
  729         return 0;
  730     default:
  731         file_error(ms, 0, "Invalid action %d", action);
  732         return -1;
  733     }
  734 }
  735 
  736 /*
  737  * Compute the real length of a magic expression, for the purposes
  738  * of determining how "strong" a magic expression is (approximating
  739  * how specific its matches are):
  740  *  - magic characters count 0 unless escaped.
  741  *  - [] expressions count 1
  742  *  - {} expressions count 0
  743  *  - regular characters or escaped magic characters count 1
  744  *  - 0 length expressions count as one
  745  */
  746 private size_t
  747 nonmagic(const char *str)
  748 {
  749     const char *p;
  750     size_t rv = 0;
  751 
  752     for (p = str; *p; p++)
  753         switch (*p) {
  754         case '\\':  /* Escaped anything counts 1 */
  755             if (!*++p)
  756                 p--;
  757             rv++;
  758             continue;
  759         case '?':   /* Magic characters count 0 */
  760         case '*':
  761         case '.':
  762         case '+':
  763         case '^':
  764         case '$':
  765             continue;
  766         case '[':   /* Bracketed expressions count 1 the ']' */
  767             while (*p && *p != ']')
  768                 p++;
  769             p--;
  770             continue;
  771         case '{':   /* Braced expressions count 0 */
  772             while (*p && *p != '}')
  773                 p++;
  774             if (!*p)
  775                 p--;
  776             continue;
  777         default:    /* Anything else counts 1 */
  778             rv++;
  779             continue;
  780         }
  781 
  782     return rv == 0 ? 1 : rv;    /* Return at least 1 */
  783 }
  784 
  785 
  786 private size_t
  787 typesize(int type)
  788 {
  789     switch (type) {
  790     case FILE_BYTE:
  791         return 1;
  792 
  793     case FILE_SHORT:
  794     case FILE_LESHORT:
  795     case FILE_BESHORT:
  796         return 2;
  797 
  798     case FILE_LONG:
  799     case FILE_LELONG:
  800     case FILE_BELONG:
  801     case FILE_MELONG:
  802         return 4;
  803 
  804     case FILE_DATE:
  805     case FILE_LEDATE:
  806     case FILE_BEDATE:
  807     case FILE_MEDATE:
  808     case FILE_LDATE:
  809     case FILE_LELDATE:
  810     case FILE_BELDATE:
  811     case FILE_MELDATE:
  812     case FILE_FLOAT:
  813     case FILE_BEFLOAT:
  814     case FILE_LEFLOAT:
  815         return 4;
  816 
  817     case FILE_QUAD:
  818     case FILE_BEQUAD:
  819     case FILE_LEQUAD:
  820     case FILE_QDATE:
  821     case FILE_LEQDATE:
  822     case FILE_BEQDATE:
  823     case FILE_QLDATE:
  824     case FILE_LEQLDATE:
  825     case FILE_BEQLDATE:
  826     case FILE_QWDATE:
  827     case FILE_LEQWDATE:
  828     case FILE_BEQWDATE:
  829     case FILE_DOUBLE:
  830     case FILE_BEDOUBLE:
  831     case FILE_LEDOUBLE:
  832         return 8;
  833     default:
  834         return (size_t)~0;
  835     }
  836 }
  837 
  838 /*
  839  * Get weight of this magic entry, for sorting purposes.
  840  */
  841 private size_t
  842 apprentice_magic_strength(const struct magic *m)
  843 {
  844 #define MULT 10U
  845     size_t ts, v;
  846     ssize_t val = 2 * MULT; /* baseline strength */
  847 
  848     switch (m->type) {
  849     case FILE_DEFAULT:  /* make sure this sorts last */
  850         if (m->factor_op != FILE_FACTOR_OP_NONE)
  851             abort();
  852         return 0;
  853 
  854     case FILE_BYTE:
  855     case FILE_SHORT:
  856     case FILE_LESHORT:
  857     case FILE_BESHORT:
  858     case FILE_LONG:
  859     case FILE_LELONG:
  860     case FILE_BELONG:
  861     case FILE_MELONG:
  862     case FILE_DATE:
  863     case FILE_LEDATE:
  864     case FILE_BEDATE:
  865     case FILE_MEDATE:
  866     case FILE_LDATE:
  867     case FILE_LELDATE:
  868     case FILE_BELDATE:
  869     case FILE_MELDATE:
  870     case FILE_FLOAT:
  871     case FILE_BEFLOAT:
  872     case FILE_LEFLOAT:
  873     case FILE_QUAD:
  874     case FILE_BEQUAD:
  875     case FILE_LEQUAD:
  876     case FILE_QDATE:
  877     case FILE_LEQDATE:
  878     case FILE_BEQDATE:
  879     case FILE_QLDATE:
  880     case FILE_LEQLDATE:
  881     case FILE_BEQLDATE:
  882     case FILE_QWDATE:
  883     case FILE_LEQWDATE:
  884     case FILE_BEQWDATE:
  885     case FILE_DOUBLE:
  886     case FILE_BEDOUBLE:
  887     case FILE_LEDOUBLE:
  888         ts = typesize(m->type);
  889         if (ts == (size_t)~0)
  890             abort();
  891         val += ts * MULT;
  892         break;
  893 
  894     case FILE_PSTRING:
  895     case FILE_STRING:
  896         val += m->vallen * MULT;
  897         break;
  898 
  899     case FILE_BESTRING16:
  900     case FILE_LESTRING16:
  901         val += m->vallen * MULT / 2;
  902         break;
  903 
  904     case FILE_SEARCH:
  905         if (m->vallen == 0)
  906             break;
  907         val += m->vallen * MAX(MULT / m->vallen, 1);
  908         break;
  909 
  910     case FILE_REGEX:
  911         v = nonmagic(m->value.s);
  912         val += v * MAX(MULT / v, 1);
  913         break;
  914 
  915     case FILE_INDIRECT:
  916     case FILE_NAME:
  917     case FILE_USE:
  918         break;
  919 
  920     case FILE_DER:
  921         val += MULT;
  922         break;
  923 
  924     default:
  925         (void)fprintf(stderr, "Bad type %d\n", m->type);
  926         abort();
  927     }
  928 
  929     switch (m->reln) {
  930     case 'x':   /* matches anything penalize */
  931     case '!':       /* matches almost anything penalize */
  932         val = 0;
  933         break;
  934 
  935     case '=':   /* Exact match, prefer */
  936         val += MULT;
  937         break;
  938 
  939     case '>':
  940     case '<':   /* comparison match reduce strength */
  941         val -= 2 * MULT;
  942         break;
  943 
  944     case '^':
  945     case '&':   /* masking bits, we could count them too */
  946         val -= MULT;
  947         break;
  948 
  949     default:
  950         (void)fprintf(stderr, "Bad relation %c\n", m->reln);
  951         abort();
  952     }
  953 
  954     switch (m->factor_op) {
  955     case FILE_FACTOR_OP_NONE:
  956         break;
  957     case FILE_FACTOR_OP_PLUS:
  958         val += m->factor;
  959         break;
  960     case FILE_FACTOR_OP_MINUS:
  961         val -= m->factor;
  962         break;
  963     case FILE_FACTOR_OP_TIMES:
  964         val *= m->factor;
  965         break;
  966     case FILE_FACTOR_OP_DIV:
  967         val /= m->factor;
  968         break;
  969     default:
  970         abort();
  971     }
  972 
  973     if (val <= 0)   /* ensure we only return 0 for FILE_DEFAULT */
  974         val = 1;
  975 
  976     /*
  977      * Magic entries with no description get a bonus because they depend
  978      * on subsequent magic entries to print something.
  979      */
  980     if (m->desc[0] == '\0')
  981         val++;
  982     return val;
  983 }
  984 
  985 /*
  986  * Sort callback for sorting entries by "strength" (basically length)
  987  */
  988 private int
  989 apprentice_sort(const void *a, const void *b)
  990 {
  991     const struct magic_entry *ma = CAST(const struct magic_entry *, a);
  992     const struct magic_entry *mb = CAST(const struct magic_entry *, b);
  993     size_t sa = apprentice_magic_strength(ma->mp);
  994     size_t sb = apprentice_magic_strength(mb->mp);
  995     if (sa == sb)
  996         return 0;
  997     else if (sa > sb)
  998         return -1;
  999     else
 1000         return 1;
 1001 }
 1002 
 1003 /*
 1004  * Shows sorted patterns list in the order which is used for the matching
 1005  */
 1006 private void
 1007 apprentice_list(struct mlist *mlist, int mode)
 1008 {
 1009     uint32_t magindex = 0;
 1010     struct mlist *ml;
 1011     for (ml = mlist->next; ml != mlist; ml = ml->next) {
 1012         for (magindex = 0; magindex < ml->nmagic; magindex++) {
 1013             struct magic *m = &ml->magic[magindex];
 1014             if ((m->flag & mode) != mode) {
 1015                 /* Skip sub-tests */
 1016                 while (magindex + 1 < ml->nmagic &&
 1017                        ml->magic[magindex + 1].cont_level != 0)
 1018                     ++magindex;
 1019                 continue; /* Skip to next top-level test*/
 1020             }
 1021 
 1022             /*
 1023              * Try to iterate over the tree until we find item with
 1024              * description/mimetype.
 1025              */
 1026             while (magindex + 1 < ml->nmagic &&
 1027                    ml->magic[magindex + 1].cont_level != 0 &&
 1028                    *ml->magic[magindex].desc == '\0' &&
 1029                    *ml->magic[magindex].mimetype == '\0')
 1030                 magindex++;
 1031 
 1032             printf("Strength = %3" SIZE_T_FORMAT "u@%u: %s [%s]\n",
 1033                 apprentice_magic_strength(m),
 1034                 ml->magic[magindex].lineno,
 1035                 ml->magic[magindex].desc,
 1036                 ml->magic[magindex].mimetype);
 1037         }
 1038     }
 1039 }
 1040 
 1041 private void
 1042 set_test_type(struct magic *mstart, struct magic *m)
 1043 {
 1044     switch (m->type) {
 1045     case FILE_BYTE:
 1046     case FILE_SHORT:
 1047     case FILE_LONG:
 1048     case FILE_DATE:
 1049     case FILE_BESHORT:
 1050     case FILE_BELONG:
 1051     case FILE_BEDATE:
 1052     case FILE_LESHORT:
 1053     case FILE_LELONG:
 1054     case FILE_LEDATE:
 1055     case FILE_LDATE:
 1056     case FILE_BELDATE:
 1057     case FILE_LELDATE:
 1058     case FILE_MEDATE:
 1059     case FILE_MELDATE:
 1060     case FILE_MELONG:
 1061     case FILE_QUAD:
 1062     case FILE_LEQUAD:
 1063     case FILE_BEQUAD:
 1064     case FILE_QDATE:
 1065     case FILE_LEQDATE:
 1066     case FILE_BEQDATE:
 1067     case FILE_QLDATE:
 1068     case FILE_LEQLDATE:
 1069     case FILE_BEQLDATE:
 1070     case FILE_QWDATE:
 1071     case FILE_LEQWDATE:
 1072     case FILE_BEQWDATE:
 1073     case FILE_FLOAT:
 1074     case FILE_BEFLOAT:
 1075     case FILE_LEFLOAT:
 1076     case FILE_DOUBLE:
 1077     case FILE_BEDOUBLE:
 1078     case FILE_LEDOUBLE:
 1079     case FILE_DER:
 1080         mstart->flag |= BINTEST;
 1081         break;
 1082     case FILE_STRING:
 1083     case FILE_PSTRING:
 1084     case FILE_BESTRING16:
 1085     case FILE_LESTRING16:
 1086         /* Allow text overrides */
 1087         if (mstart->str_flags & STRING_TEXTTEST)
 1088             mstart->flag |= TEXTTEST;
 1089         else
 1090             mstart->flag |= BINTEST;
 1091         break;
 1092     case FILE_REGEX:
 1093     case FILE_SEARCH:
 1094         /* Check for override */
 1095         if (mstart->str_flags & STRING_BINTEST)
 1096             mstart->flag |= BINTEST;
 1097         if (mstart->str_flags & STRING_TEXTTEST)
 1098             mstart->flag |= TEXTTEST;
 1099 
 1100         if (mstart->flag & (TEXTTEST|BINTEST))
 1101             break;
 1102 
 1103         /* binary test if pattern is not text */
 1104         if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL,
 1105             NULL) <= 0)
 1106             mstart->flag |= BINTEST;
 1107         else
 1108             mstart->flag |= TEXTTEST;
 1109         break;
 1110     case FILE_DEFAULT:
 1111         /* can't deduce anything; we shouldn't see this at the
 1112            top level anyway */
 1113         break;
 1114     case FILE_INVALID:
 1115     default:
 1116         /* invalid search type, but no need to complain here */
 1117         break;
 1118     }
 1119 }
 1120 
 1121 private int
 1122 addentry(struct magic_set *ms, struct magic_entry *me,
 1123    struct magic_entry_set *mset)
 1124 {
 1125     size_t i = me->mp->type == FILE_NAME ? 1 : 0;
 1126     if (mset[i].count == mset[i].max) {
 1127         struct magic_entry *mp;
 1128 
 1129         mset[i].max += ALLOC_INCR;
 1130         if ((mp = CAST(struct magic_entry *,
 1131             realloc(mset[i].me, sizeof(*mp) * mset[i].max))) ==
 1132             NULL) {
 1133             file_oomem(ms, sizeof(*mp) * mset[i].max);
 1134             return -1;
 1135         }
 1136         (void)memset(&mp[mset[i].count], 0, sizeof(*mp) *
 1137             ALLOC_INCR);
 1138         mset[i].me = mp;
 1139     }
 1140     mset[i].me[mset[i].count++] = *me;
 1141     memset(me, 0, sizeof(*me));
 1142     return 0;
 1143 }
 1144 
 1145 /*
 1146  * Load and parse one file.
 1147  */
 1148 private void
 1149 load_1(struct magic_set *ms, int action, const char *fn, int *errs,
 1150    struct magic_entry_set *mset)
 1151 {
 1152     size_t lineno = 0, llen = 0;
 1153     char *line = NULL;
 1154     ssize_t len;
 1155     struct magic_entry me;
 1156 
 1157     FILE *f = fopen(ms->file = fn, "r");
 1158     if (f == NULL) {
 1159         if (errno != ENOENT)
 1160             file_error(ms, errno, "cannot read magic file `%s'",
 1161                    fn);
 1162         (*errs)++;
 1163         return;
 1164     }
 1165 
 1166     memset(&me, 0, sizeof(me));
 1167     /* read and parse this file */
 1168     for (ms->line = 1; (len = getline(&line, &llen, f)) != -1;
 1169         ms->line++) {
 1170         if (len == 0) /* null line, garbage, etc */
 1171             continue;
 1172         if (line[len - 1] == '\n') {
 1173             lineno++;
 1174             line[len - 1] = '\0'; /* delete newline */
 1175         }
 1176         switch (line[0]) {
 1177         case '\0':  /* empty, do not parse */
 1178         case '#':   /* comment, do not parse */
 1179             continue;
 1180         case '!':
 1181             if (line[1] == ':') {
 1182                 size_t i;
 1183 
 1184                 for (i = 0; bang[i].name != NULL; i++) {
 1185                     if ((size_t)(len - 2) > bang[i].len &&
 1186                         memcmp(bang[i].name, line + 2,
 1187                         bang[i].len) == 0)
 1188                         break;
 1189                 }
 1190                 if (bang[i].name == NULL) {
 1191                     file_error(ms, 0,
 1192                         "Unknown !: entry `%s'", line);
 1193                     (*errs)++;
 1194                     continue;
 1195                 }
 1196                 if (me.mp == NULL) {
 1197                     file_error(ms, 0,
 1198                         "No current entry for :!%s type",
 1199                         bang[i].name);
 1200                     (*errs)++;
 1201                     continue;
 1202                 }
 1203                 if ((*bang[i].fun)(ms, &me,
 1204                     line + bang[i].len + 2) != 0) {
 1205                     (*errs)++;
 1206                     continue;
 1207                 }
 1208                 continue;
 1209             }
 1210             /*FALLTHROUGH*/
 1211         default:
 1212         again:
 1213             switch (parse(ms, &me, line, lineno, action)) {
 1214             case 0:
 1215                 continue;
 1216             case 1:
 1217                 (void)addentry(ms, &me, mset);
 1218                 goto again;
 1219             default:
 1220                 (*errs)++;
 1221                 break;
 1222             }
 1223         }
 1224     }
 1225     if (me.mp)
 1226         (void)addentry(ms, &me, mset);
 1227     free(line);
 1228     (void)fclose(f);
 1229 }
 1230 
 1231 /*
 1232  * parse a file or directory of files
 1233  * const char *fn: name of magic file or directory
 1234  */
 1235 private int
 1236 cmpstrp(const void *p1, const void *p2)
 1237 {
 1238         return strcmp(*(char *const *)p1, *(char *const *)p2);
 1239 }
 1240 
 1241 
 1242 private uint32_t
 1243 set_text_binary(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
 1244     uint32_t starttest)
 1245 {
 1246     static const char text[] = "text";
 1247     static const char binary[] = "binary";
 1248     static const size_t len = sizeof(text);
 1249 
 1250     uint32_t i = starttest;
 1251 
 1252     do {
 1253         set_test_type(me[starttest].mp, me[i].mp);
 1254         if ((ms->flags & MAGIC_DEBUG) == 0)
 1255             continue;
 1256         (void)fprintf(stderr, "%s%s%s: %s\n",
 1257             me[i].mp->mimetype,
 1258             me[i].mp->mimetype[0] == '\0' ? "" : "; ",
 1259             me[i].mp->desc[0] ? me[i].mp->desc : "(no description)",
 1260             me[i].mp->flag & BINTEST ? binary : text);
 1261         if (me[i].mp->flag & BINTEST) {
 1262             char *p = strstr(me[i].mp->desc, text);
 1263             if (p && (p == me[i].mp->desc ||
 1264                 isspace((unsigned char)p[-1])) &&
 1265                 (p + len - me[i].mp->desc == MAXstring
 1266                 || (p[len] == '\0' ||
 1267                 isspace((unsigned char)p[len]))))
 1268                 (void)fprintf(stderr, "*** Possible "
 1269                     "binary test for text type\n");
 1270         }
 1271     } while (++i < nme && me[i].mp->cont_level != 0);
 1272     return i;
 1273 }
 1274 
 1275 private void
 1276 set_last_default(struct magic_set *ms, struct magic_entry *me, uint32_t nme)
 1277 {
 1278     uint32_t i;
 1279     for (i = 0; i < nme; i++) {
 1280         if (me[i].mp->cont_level == 0 &&
 1281             me[i].mp->type == FILE_DEFAULT) {
 1282             while (++i < nme)
 1283                 if (me[i].mp->cont_level == 0)
 1284                     break;
 1285             if (i != nme) {
 1286                 /* XXX - Ugh! */
 1287                 ms->line = me[i].mp->lineno;
 1288                 file_magwarn(ms,
 1289                     "level 0 \"default\" did not sort last");
 1290             }
 1291             return;
 1292         }
 1293     }
 1294 }
 1295 
 1296 private int
 1297 coalesce_entries(struct magic_set *ms, struct magic_entry *me, uint32_t nme,
 1298     struct magic **ma, uint32_t *nma)
 1299 {
 1300     uint32_t i, mentrycount = 0;
 1301     size_t slen;
 1302 
 1303     for (i = 0; i < nme; i++)
 1304         mentrycount += me[i].cont_count;
 1305 
 1306     slen = sizeof(**ma) * mentrycount;
 1307     if ((*ma = CAST(struct magic *, malloc(slen))) == NULL) {
 1308         file_oomem(ms, slen);
 1309         return -1;
 1310     }
 1311 
 1312     mentrycount = 0;
 1313     for (i = 0; i < nme; i++) {
 1314         (void)memcpy(*ma + mentrycount, me[i].mp,
 1315             me[i].cont_count * sizeof(**ma));
 1316         mentrycount += me[i].cont_count;
 1317     }
 1318     *nma = mentrycount;
 1319     return 0;
 1320 }
 1321 
 1322 private void
 1323 magic_entry_free(struct magic_entry *me, uint32_t nme)
 1324 {
 1325     uint32_t i;
 1326     if (me == NULL)
 1327         return;
 1328     for (i = 0; i < nme; i++)
 1329         free(me[i].mp);
 1330     free(me);
 1331 }
 1332 
 1333 private struct magic_map *
 1334 apprentice_load(struct magic_set *ms, const char *fn, int action)
 1335 {
 1336     int errs = 0;
 1337     uint32_t i, j;
 1338     size_t files = 0, maxfiles = 0;
 1339     char **filearr = NULL, *mfn;
 1340     struct stat st;
 1341     struct magic_map *map;
 1342     struct magic_entry_set mset[MAGIC_SETS];
 1343     DIR *dir;
 1344     struct dirent *d;
 1345 
 1346     memset(mset, 0, sizeof(mset));
 1347     ms->flags |= MAGIC_CHECK;   /* Enable checks for parsed files */
 1348 
 1349 
 1350     if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL)
 1351     {
 1352         file_oomem(ms, sizeof(*map));
 1353         return NULL;
 1354     }
 1355     map->type = MAP_TYPE_MALLOC;
 1356 
 1357     /* print silly verbose header for USG compat. */
 1358     if (action == FILE_CHECK)
 1359         (void)fprintf(stderr, "%s\n", usg_hdr);
 1360 
 1361     /* load directory or file */
 1362     if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
 1363         dir = opendir(fn);
 1364         if (!dir) {
 1365             errs++;
 1366             goto out;
 1367         }
 1368         while ((d = readdir(dir)) != NULL) {
 1369             if (d->d_name[0] == '.')
 1370                 continue;
 1371             if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) {
 1372                 file_oomem(ms,
 1373                     strlen(fn) + strlen(d->d_name) + 2);
 1374                 errs++;
 1375                 closedir(dir);
 1376                 goto out;
 1377             }
 1378             if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
 1379                 free(mfn);
 1380                 continue;
 1381             }
 1382             if (files >= maxfiles) {
 1383                 size_t mlen;
 1384                 maxfiles = (maxfiles + 1) * 2;
 1385                 mlen = maxfiles * sizeof(*filearr);
 1386                 if ((filearr = CAST(char **,
 1387                     realloc(filearr, mlen))) == NULL) {
 1388                     file_oomem(ms, mlen);
 1389                     free(mfn);
 1390                     closedir(dir);
 1391                     errs++;
 1392                     goto out;
 1393                 }
 1394             }
 1395             filearr[files++] = mfn;
 1396         }
 1397         closedir(dir);
 1398         if (filearr) {
 1399             qsort(filearr, files, sizeof(*filearr), cmpstrp);
 1400             for (i = 0; i < files; i++) {
 1401                 load_1(ms, action, filearr[i], &errs, mset);
 1402                 free(filearr[i]);
 1403             }
 1404             free(filearr);
 1405         }
 1406     } else
 1407         load_1(ms, action, fn, &errs, mset);
 1408     if (errs)
 1409         goto out;
 1410 
 1411     for (j = 0; j < MAGIC_SETS; j++) {
 1412         /* Set types of tests */
 1413         for (i = 0; i < mset[j].count; ) {
 1414             if (mset[j].me[i].mp->cont_level != 0) {
 1415                 i++;
 1416                 continue;
 1417             }
 1418             i = set_text_binary(ms, mset[j].me, mset[j].count, i);
 1419         }
 1420         if (mset[j].me)
 1421             qsort(mset[j].me, mset[j].count, sizeof(*mset[j].me),
 1422                 apprentice_sort);
 1423 
 1424         /*
 1425          * Make sure that any level 0 "default" line is last
 1426          * (if one exists).
 1427          */
 1428         set_last_default(ms, mset[j].me, mset[j].count);
 1429 
 1430         /* coalesce per file arrays into a single one */
 1431         if (coalesce_entries(ms, mset[j].me, mset[j].count,
 1432             &map->magic[j], &map->nmagic[j]) == -1) {
 1433             errs++;
 1434             goto out;
 1435         }
 1436     }
 1437 
 1438 out:
 1439     for (j = 0; j < MAGIC_SETS; j++)
 1440         magic_entry_free(mset[j].me, mset[j].count);
 1441 
 1442     if (errs) {
 1443         apprentice_unmap(map);
 1444         return NULL;
 1445     }
 1446     return map;
 1447 }
 1448 
 1449 /*
 1450  * extend the sign bit if the comparison is to be signed
 1451  */
 1452 protected uint64_t
 1453 file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
 1454 {
 1455     if (!(m->flag & UNSIGNED)) {
 1456         switch(m->type) {
 1457         /*
 1458          * Do not remove the casts below.  They are
 1459          * vital.  When later compared with the data,
 1460          * the sign extension must have happened.
 1461          */
 1462         case FILE_BYTE:
 1463             v = (signed char) v;
 1464             break;
 1465         case FILE_SHORT:
 1466         case FILE_BESHORT:
 1467         case FILE_LESHORT:
 1468             v = (short) v;
 1469             break;
 1470         case FILE_DATE:
 1471         case FILE_BEDATE:
 1472         case FILE_LEDATE:
 1473         case FILE_MEDATE:
 1474         case FILE_LDATE:
 1475         case FILE_BELDATE:
 1476         case FILE_LELDATE:
 1477         case FILE_MELDATE:
 1478         case FILE_LONG:
 1479         case FILE_BELONG:
 1480         case FILE_LELONG:
 1481         case FILE_MELONG:
 1482         case FILE_FLOAT:
 1483         case FILE_BEFLOAT:
 1484         case FILE_LEFLOAT:
 1485             v = (int32_t) v;
 1486             break;
 1487         case FILE_QUAD:
 1488         case FILE_BEQUAD:
 1489         case FILE_LEQUAD:
 1490         case FILE_QDATE:
 1491         case FILE_QLDATE:
 1492         case FILE_QWDATE:
 1493         case FILE_BEQDATE:
 1494         case FILE_BEQLDATE:
 1495         case FILE_BEQWDATE:
 1496         case FILE_LEQDATE:
 1497         case FILE_LEQLDATE:
 1498         case FILE_LEQWDATE:
 1499         case FILE_DOUBLE:
 1500         case FILE_BEDOUBLE:
 1501         case FILE_LEDOUBLE:
 1502             v = (int64_t) v;
 1503             break;
 1504         case FILE_STRING:
 1505         case FILE_PSTRING:
 1506         case FILE_BESTRING16:
 1507         case FILE_LESTRING16:
 1508         case FILE_REGEX:
 1509         case FILE_SEARCH:
 1510         case FILE_DEFAULT:
 1511         case FILE_INDIRECT:
 1512         case FILE_NAME:
 1513         case FILE_USE:
 1514         case FILE_CLEAR:
 1515         case FILE_DER:
 1516             break;
 1517         default:
 1518             if (ms->flags & MAGIC_CHECK)
 1519                 file_magwarn(ms, "cannot happen: m->type=%d\n",
 1520                     m->type);
 1521             return ~0U;
 1522         }
 1523     }
 1524     return v;
 1525 }
 1526 
 1527 private int
 1528 string_modifier_check(struct magic_set *ms, struct magic *m)
 1529 {
 1530     if ((ms->flags & MAGIC_CHECK) == 0)
 1531         return 0;
 1532 
 1533     if ((m->type != FILE_REGEX || (m->str_flags & REGEX_LINE_COUNT) == 0) &&
 1534         (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0)) {
 1535         file_magwarn(ms,
 1536             "'/BHhLl' modifiers are only allowed for pascal strings\n");
 1537         return -1;
 1538     }
 1539     switch (m->type) {
 1540     case FILE_BESTRING16:
 1541     case FILE_LESTRING16:
 1542         if (m->str_flags != 0) {
 1543             file_magwarn(ms,
 1544                 "no modifiers allowed for 16-bit strings\n");
 1545             return -1;
 1546         }
 1547         break;
 1548     case FILE_STRING:
 1549     case FILE_PSTRING:
 1550         if ((m->str_flags & REGEX_OFFSET_START) != 0) {
 1551             file_magwarn(ms,
 1552                 "'/%c' only allowed on regex and search\n",
 1553                 CHAR_REGEX_OFFSET_START);
 1554             return -1;
 1555         }
 1556         break;
 1557     case FILE_SEARCH:
 1558         if (m->str_range == 0) {
 1559             file_magwarn(ms,
 1560                 "missing range; defaulting to %d\n",
 1561                             STRING_DEFAULT_RANGE);
 1562             m->str_range = STRING_DEFAULT_RANGE;
 1563             return -1;
 1564         }
 1565         break;
 1566     case FILE_REGEX:
 1567         if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
 1568             file_magwarn(ms, "'/%c' not allowed on regex\n",
 1569                 CHAR_COMPACT_WHITESPACE);
 1570             return -1;
 1571         }
 1572         if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
 1573             file_magwarn(ms, "'/%c' not allowed on regex\n",
 1574                 CHAR_COMPACT_OPTIONAL_WHITESPACE);
 1575             return -1;
 1576         }
 1577         break;
 1578     default:
 1579         file_magwarn(ms, "coding error: m->type=%d\n",
 1580             m->type);
 1581         return -1;
 1582     }
 1583     return 0;
 1584 }
 1585 
 1586 private int
 1587 get_op(char c)
 1588 {
 1589     switch (c) {
 1590     case '&':
 1591         return FILE_OPAND;
 1592     case '|':
 1593         return FILE_OPOR;
 1594     case '^':
 1595         return FILE_OPXOR;
 1596     case '+':
 1597         return FILE_OPADD;
 1598     case '-':
 1599         return FILE_OPMINUS;
 1600     case '*':
 1601         return FILE_OPMULTIPLY;
 1602     case '/':
 1603         return FILE_OPDIVIDE;
 1604     case '%':
 1605         return FILE_OPMODULO;
 1606     default:
 1607         return -1;
 1608     }
 1609 }
 1610 
 1611 #ifdef ENABLE_CONDITIONALS
 1612 private int
 1613 get_cond(const char *l, const char **t)
 1614 {
 1615     static const struct cond_tbl_s {
 1616         char name[8];
 1617         size_t len;
 1618         int cond;
 1619     } cond_tbl[] = {
 1620         { "if",     2,  COND_IF },
 1621         { "elif",   4,  COND_ELIF },
 1622         { "else",   4,  COND_ELSE },
 1623         { "",       0,  COND_NONE },
 1624     };
 1625     const struct cond_tbl_s *p;
 1626 
 1627     for (p = cond_tbl; p->len; p++) {
 1628         if (strncmp(l, p->name, p->len) == 0 &&
 1629             isspace((unsigned char)l[p->len])) {
 1630             if (t)
 1631                 *t = l + p->len;
 1632             break;
 1633         }
 1634     }
 1635     return p->cond;
 1636 }
 1637 
 1638 private int
 1639 check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
 1640 {
 1641     int last_cond;
 1642     last_cond = ms->c.li[cont_level].last_cond;
 1643 
 1644     switch (cond) {
 1645     case COND_IF:
 1646         if (last_cond != COND_NONE && last_cond != COND_ELIF) {
 1647             if (ms->flags & MAGIC_CHECK)
 1648                 file_magwarn(ms, "syntax error: `if'");
 1649             return -1;
 1650         }
 1651         last_cond = COND_IF;
 1652         break;
 1653 
 1654     case COND_ELIF:
 1655         if (last_cond != COND_IF && last_cond != COND_ELIF) {
 1656             if (ms->flags & MAGIC_CHECK)
 1657                 file_magwarn(ms, "syntax error: `elif'");
 1658             return -1;
 1659         }
 1660         last_cond = COND_ELIF;
 1661         break;
 1662 
 1663     case COND_ELSE:
 1664         if (last_cond != COND_IF && last_cond != COND_ELIF) {
 1665             if (ms->flags & MAGIC_CHECK)
 1666                 file_magwarn(ms, "syntax error: `else'");
 1667             return -1;
 1668         }
 1669         last_cond = COND_NONE;
 1670         break;
 1671 
 1672     case COND_NONE:
 1673         last_cond = COND_NONE;
 1674         break;
 1675     }
 1676 
 1677     ms->c.li[cont_level].last_cond = last_cond;
 1678     return 0;
 1679 }
 1680 #endif /* ENABLE_CONDITIONALS */
 1681 
 1682 private int
 1683 parse_indirect_modifier(struct magic_set *ms, struct magic *m, const char **lp)
 1684 {
 1685     const char *l = *lp;
 1686 
 1687     while (!isspace((unsigned char)*++l))
 1688         switch (*l) {
 1689         case CHAR_INDIRECT_RELATIVE:
 1690             m->str_flags |= INDIRECT_RELATIVE;
 1691             break;
 1692         default:
 1693             if (ms->flags & MAGIC_CHECK)
 1694                 file_magwarn(ms, "indirect modifier `%c' "
 1695                     "invalid", *l);
 1696             *lp = l;
 1697             return -1;
 1698         }
 1699     *lp = l;
 1700     return 0;
 1701 }
 1702 
 1703 private void
 1704 parse_op_modifier(struct magic_set *ms, struct magic *m, const char **lp,
 1705     int op)
 1706 {
 1707     const char *l = *lp;
 1708     char *t;
 1709     uint64_t val;
 1710 
 1711     ++l;
 1712     m->mask_op |= op;
 1713     val = (uint64_t)strtoull(l, &t, 0);
 1714     l = t;
 1715     m->num_mask = file_signextend(ms, m, val);
 1716     eatsize(&l);
 1717     *lp = l;
 1718 }
 1719 
 1720 private int
 1721 parse_string_modifier(struct magic_set *ms, struct magic *m, const char **lp)
 1722 {
 1723     const char *l = *lp;
 1724     char *t;
 1725     int have_range = 0;
 1726 
 1727     while (!isspace((unsigned char)*++l)) {
 1728         switch (*l) {
 1729         case '0':  case '1':  case '2':
 1730         case '3':  case '4':  case '5':
 1731         case '6':  case '7':  case '8':
 1732         case '9':
 1733             if (have_range && (ms->flags & MAGIC_CHECK))
 1734                 file_magwarn(ms, "multiple ranges");
 1735             have_range = 1;
 1736             m->str_range = CAST(uint32_t, strtoul(l, &t, 0));
 1737             if (m->str_range == 0)
 1738                 file_magwarn(ms, "zero range");
 1739             l = t - 1;
 1740             break;
 1741         case CHAR_COMPACT_WHITESPACE:
 1742             m->str_flags |= STRING_COMPACT_WHITESPACE;
 1743             break;
 1744         case CHAR_COMPACT_OPTIONAL_WHITESPACE:
 1745             m->str_flags |= STRING_COMPACT_OPTIONAL_WHITESPACE;
 1746             break;
 1747         case CHAR_IGNORE_LOWERCASE:
 1748             m->str_flags |= STRING_IGNORE_LOWERCASE;
 1749             break;
 1750         case CHAR_IGNORE_UPPERCASE:
 1751             m->str_flags |= STRING_IGNORE_UPPERCASE;
 1752             break;
 1753         case CHAR_REGEX_OFFSET_START:
 1754             m->str_flags |= REGEX_OFFSET_START;
 1755             break;
 1756         case CHAR_BINTEST:
 1757             m->str_flags |= STRING_BINTEST;
 1758             break;
 1759         case CHAR_TEXTTEST:
 1760             m->str_flags |= STRING_TEXTTEST;
 1761             break;
 1762         case CHAR_TRIM:
 1763             m->str_flags |= STRING_TRIM;
 1764             break;
 1765         case CHAR_PSTRING_1_LE:
 1766 #define SET_LENGTH(a) m->str_flags = (m->str_flags & ~PSTRING_LEN) | (a)
 1767             if (m->type != FILE_PSTRING)
 1768                 goto bad;
 1769             SET_LENGTH(PSTRING_1_LE);
 1770             break;
 1771         case CHAR_PSTRING_2_BE:
 1772             if (m->type != FILE_PSTRING)
 1773                 goto bad;
 1774             SET_LENGTH(PSTRING_2_BE);
 1775             break;
 1776         case CHAR_PSTRING_2_LE:
 1777             if (m->type != FILE_PSTRING)
 1778                 goto bad;
 1779             SET_LENGTH(PSTRING_2_LE);
 1780             break;
 1781         case CHAR_PSTRING_4_BE:
 1782             if (m->type != FILE_PSTRING)
 1783                 goto bad;
 1784             SET_LENGTH(PSTRING_4_BE);
 1785             break;
 1786         case CHAR_PSTRING_4_LE:
 1787             switch (m->type) {
 1788             case FILE_PSTRING:
 1789             case FILE_REGEX:
 1790                 break;
 1791             default:
 1792                 goto bad;
 1793             }
 1794             SET_LENGTH(PSTRING_4_LE);
 1795             break;
 1796         case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
 1797             if (m->type != FILE_PSTRING)
 1798                 goto bad;
 1799             m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
 1800             break;
 1801         default:
 1802         bad:
 1803             if (ms->flags & MAGIC_CHECK)
 1804                 file_magwarn(ms, "string modifier `%c' "
 1805                     "invalid", *l);
 1806             goto out;
 1807         }
 1808         /* allow multiple '/' for readability */
 1809         if (l[1] == '/' && !isspace((unsigned char)l[2]))
 1810             l++;
 1811     }
 1812     if (string_modifier_check(ms, m) == -1)
 1813         goto out;
 1814     *lp = l;
 1815     return 0;
 1816 out:
 1817     *lp = l;
 1818     return -1;
 1819 }
 1820 
 1821 /*
 1822  * parse one line from magic file, put into magic[index++] if valid
 1823  */
 1824 private int
 1825 parse(struct magic_set *ms, struct magic_entry *me, const char *line,
 1826     size_t lineno, int action)
 1827 {
 1828 #ifdef ENABLE_CONDITIONALS
 1829     static uint32_t last_cont_level = 0;
 1830 #endif
 1831     size_t i;
 1832     struct magic *m;
 1833     const char *l = line;
 1834     char *t;
 1835     int op;
 1836     uint32_t cont_level;
 1837     int32_t diff;
 1838 
 1839     cont_level = 0;
 1840 
 1841     /*
 1842      * Parse the offset.
 1843      */
 1844     while (*l == '>') {
 1845         ++l;        /* step over */
 1846         cont_level++;
 1847     }
 1848 #ifdef ENABLE_CONDITIONALS
 1849     if (cont_level == 0 || cont_level > last_cont_level)
 1850         if (file_check_mem(ms, cont_level) == -1)
 1851             return -1;
 1852     last_cont_level = cont_level;
 1853 #endif
 1854     if (cont_level != 0) {
 1855         if (me->mp == NULL) {
 1856             file_magerror(ms, "No current entry for continuation");
 1857             return -1;
 1858         }
 1859         if (me->cont_count == 0) {
 1860             file_magerror(ms, "Continuations present with 0 count");
 1861             return -1;
 1862         }
 1863         m = &me->mp[me->cont_count - 1];
 1864         diff = (int32_t)cont_level - (int32_t)m->cont_level;
 1865         if (diff > 1)
 1866             file_magwarn(ms, "New continuation level %u is more "
 1867                 "than one larger than current level %u", cont_level,
 1868                 m->cont_level);
 1869         if (me->cont_count == me->max_count) {
 1870             struct magic *nm;
 1871             size_t cnt = me->max_count + ALLOC_CHUNK;
 1872             if ((nm = CAST(struct magic *, realloc(me->mp,
 1873                 sizeof(*nm) * cnt))) == NULL) {
 1874                 file_oomem(ms, sizeof(*nm) * cnt);
 1875                 return -1;
 1876             }
 1877             me->mp = nm;
 1878             me->max_count = CAST(uint32_t, cnt);
 1879         }
 1880         m = &me->mp[me->cont_count++];
 1881         (void)memset(m, 0, sizeof(*m));
 1882         m->cont_level = cont_level;
 1883     } else {
 1884         static const size_t len = sizeof(*m) * ALLOC_CHUNK;
 1885         if (me->mp != NULL)
 1886             return 1;
 1887         if ((m = CAST(struct magic *, malloc(len))) == NULL) {
 1888             file_oomem(ms, len);
 1889             return -1;
 1890         }
 1891         me->mp = m;
 1892         me->max_count = ALLOC_CHUNK;
 1893         (void)memset(m, 0, sizeof(*m));
 1894         m->factor_op = FILE_FACTOR_OP_NONE;
 1895         m->cont_level = 0;
 1896         me->cont_count = 1;
 1897     }
 1898     m->lineno = CAST(uint32_t, lineno);
 1899 
 1900     if (*l == '&') {  /* m->cont_level == 0 checked below. */
 1901                 ++l;            /* step over */
 1902                 m->flag |= OFFADD;
 1903         }
 1904     if (*l == '(') {
 1905         ++l;        /* step over */
 1906         m->flag |= INDIR;
 1907         if (m->flag & OFFADD)
 1908             m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
 1909 
 1910         if (*l == '&') {  /* m->cont_level == 0 checked below */
 1911             ++l;            /* step over */
 1912             m->flag |= OFFADD;
 1913         }
 1914     }
 1915     /* Indirect offsets are not valid at level 0. */
 1916     if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD))) {
 1917         if (ms->flags & MAGIC_CHECK)
 1918             file_magwarn(ms, "relative offset at level 0");
 1919         return -1;
 1920     }
 1921 
 1922     /* get offset, then skip over it */
 1923     m->offset = (int32_t)strtol(l, &t, 0);
 1924         if (l == t) {
 1925         if (ms->flags & MAGIC_CHECK)
 1926             file_magwarn(ms, "offset `%s' invalid", l);
 1927         return -1;
 1928     }
 1929 #if 0
 1930         if (m->offset < 0 && cont_level != 0 &&
 1931         (m->flag & (OFFADD | INDIROFFADD)) == 0) {
 1932         if (ms->flags & MAGIC_CHECK) {
 1933             file_magwarn(ms,
 1934                 "negative direct offset `%s' at level %u",
 1935                 l, cont_level);
 1936         }
 1937         return -1;
 1938     }
 1939 #endif
 1940         l = t;
 1941 
 1942     if (m->flag & INDIR) {
 1943         m->in_type = FILE_LONG;
 1944         m->in_offset = 0;
 1945         m->in_op = 0;
 1946         /*
 1947          * read [.,lbs][+-]nnnnn)
 1948          */
 1949         if (*l == '.' || *l == ',') {
 1950             if (*l == ',')
 1951                 m->in_op |= FILE_OPSIGNED;
 1952             l++;
 1953             switch (*l) {
 1954             case 'l':
 1955                 m->in_type = FILE_LELONG;
 1956                 break;
 1957             case 'L':
 1958                 m->in_type = FILE_BELONG;
 1959                 break;
 1960             case 'm':
 1961                 m->in_type = FILE_MELONG;
 1962                 break;
 1963             case 'h':
 1964             case 's':
 1965                 m->in_type = FILE_LESHORT;
 1966                 break;
 1967             case 'H':
 1968             case 'S':
 1969                 m->in_type = FILE_BESHORT;
 1970                 break;
 1971             case 'c':
 1972             case 'b':
 1973             case 'C':
 1974             case 'B':
 1975                 m->in_type = FILE_BYTE;
 1976                 break;
 1977             case 'e':
 1978             case 'f':
 1979             case 'g':
 1980                 m->in_type = FILE_LEDOUBLE;
 1981                 break;
 1982             case 'E':
 1983             case 'F':
 1984             case 'G':
 1985                 m->in_type = FILE_BEDOUBLE;
 1986                 break;
 1987             case 'i':
 1988                 m->in_type = FILE_LEID3;
 1989                 break;
 1990             case 'I':
 1991                 m->in_type = FILE_BEID3;
 1992                 break;
 1993             case 'q':
 1994                 m->in_type = FILE_LEQUAD;
 1995                 break;
 1996             case 'Q':
 1997                 m->in_type = FILE_BEQUAD;
 1998                 break;
 1999             default:
 2000                 if (ms->flags & MAGIC_CHECK)
 2001                     file_magwarn(ms,
 2002                         "indirect offset type `%c' invalid",
 2003                         *l);
 2004                 return -1;
 2005             }
 2006             l++;
 2007         }
 2008 
 2009         if (*l == '~') {
 2010             m->in_op |= FILE_OPINVERSE;
 2011             l++;
 2012         }
 2013         if ((op = get_op(*l)) != -1) {
 2014             m->in_op |= op;
 2015             l++;
 2016         }
 2017         if (*l == '(') {
 2018             m->in_op |= FILE_OPINDIRECT;
 2019             l++;
 2020         }
 2021         if (isdigit((unsigned char)*l) || *l == '-') {
 2022             m->in_offset = (int32_t)strtol(l, &t, 0);
 2023             if (l == t) {
 2024                 if (ms->flags & MAGIC_CHECK)
 2025                     file_magwarn(ms,
 2026                         "in_offset `%s' invalid", l);
 2027                 return -1;
 2028             }
 2029             l = t;
 2030         }
 2031         if (*l++ != ')' ||
 2032             ((m->in_op & FILE_OPINDIRECT) && *l++ != ')')) {
 2033             if (ms->flags & MAGIC_CHECK)
 2034                 file_magwarn(ms,
 2035                     "missing ')' in indirect offset");
 2036             return -1;
 2037         }
 2038     }
 2039     EATAB;
 2040 
 2041 #ifdef ENABLE_CONDITIONALS
 2042     m->cond = get_cond(l, &l);
 2043     if (check_cond(ms, m->cond, cont_level) == -1)
 2044         return -1;
 2045 
 2046     EATAB;
 2047 #endif
 2048 
 2049     /*
 2050      * Parse the type.
 2051      */
 2052     if (*l == 'u') {
 2053         /*
 2054          * Try it as a keyword type prefixed by "u"; match what
 2055          * follows the "u".  If that fails, try it as an SUS
 2056          * integer type.
 2057          */
 2058         m->type = get_type(type_tbl, l + 1, &l);
 2059         if (m->type == FILE_INVALID) {
 2060             /*
 2061              * Not a keyword type; parse it as an SUS type,
 2062              * 'u' possibly followed by a number or C/S/L.
 2063              */
 2064             m->type = get_standard_integer_type(l, &l);
 2065         }
 2066         /* It's unsigned. */
 2067         if (m->type != FILE_INVALID)
 2068             m->flag |= UNSIGNED;
 2069     } else {
 2070         /*
 2071          * Try it as a keyword type.  If that fails, try it as
 2072          * an SUS integer type if it begins with "d" or as an
 2073          * SUS string type if it begins with "s".  In any case,
 2074          * it's not unsigned.
 2075          */
 2076         m->type = get_type(type_tbl, l, &l);
 2077         if (m->type == FILE_INVALID) {
 2078             /*
 2079              * Not a keyword type; parse it as an SUS type,
 2080              * either 'd' possibly followed by a number or
 2081              * C/S/L, or just 's'.
 2082              */
 2083             if (*l == 'd')
 2084                 m->type = get_standard_integer_type(l, &l);
 2085             else if (*l == 's' && !isalpha((unsigned char)l[1])) {
 2086                 m->type = FILE_STRING;
 2087                 ++l;
 2088             }
 2089         }
 2090     }
 2091 
 2092     if (m->type == FILE_INVALID) {
 2093         /* Not found - try it as a special keyword. */
 2094         m->type = get_type(special_tbl, l, &l);
 2095     }
 2096 
 2097     if (m->type == FILE_INVALID) {
 2098         if (ms->flags & MAGIC_CHECK)
 2099             file_magwarn(ms, "type `%s' invalid", l);
 2100         return -1;
 2101     }
 2102 
 2103     /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
 2104     /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
 2105 
 2106     m->mask_op = 0;
 2107     if (*l == '~') {
 2108         if (!IS_STRING(m->type))
 2109             m->mask_op |= FILE_OPINVERSE;
 2110         else if (ms->flags & MAGIC_CHECK)
 2111             file_magwarn(ms, "'~' invalid for string types");
 2112         ++l;
 2113     }
 2114     m->str_range = 0;
 2115     m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
 2116     if ((op = get_op(*l)) != -1) {
 2117         if (IS_STRING(m->type)) {
 2118             int r;
 2119 
 2120             if (op != FILE_OPDIVIDE) {
 2121                 if (ms->flags & MAGIC_CHECK)
 2122                     file_magwarn(ms,
 2123                         "invalid string/indirect op: "
 2124                         "`%c'", *t);
 2125                 return -1;
 2126             }
 2127 
 2128             if (m->type == FILE_INDIRECT)
 2129                 r = parse_indirect_modifier(ms, m, &l);
 2130             else
 2131                 r = parse_string_modifier(ms, m, &l);
 2132             if (r == -1)
 2133                 return -1;
 2134         } else
 2135             parse_op_modifier(ms, m, &l, op);
 2136     }
 2137 
 2138     /*
 2139      * We used to set mask to all 1's here, instead let's just not do
 2140      * anything if mask = 0 (unless you have a better idea)
 2141      */
 2142     EATAB;
 2143 
 2144     switch (*l) {
 2145     case '>':
 2146     case '<':
 2147         m->reln = *l;
 2148         ++l;
 2149         if (*l == '=') {
 2150             if (ms->flags & MAGIC_CHECK) {
 2151                 file_magwarn(ms, "%c= not supported",
 2152                     m->reln);
 2153                 return -1;
 2154             }
 2155            ++l;
 2156         }
 2157         break;
 2158     /* Old-style anding: "0 byte &0x80 dynamically linked" */
 2159     case '&':
 2160     case '^':
 2161     case '=':
 2162         m->reln = *l;
 2163         ++l;
 2164         if (*l == '=') {
 2165            /* HP compat: ignore &= etc. */
 2166            ++l;
 2167         }
 2168         break;
 2169     case '!':
 2170         m->reln = *l;
 2171         ++l;
 2172         break;
 2173     default:
 2174         m->reln = '=';  /* the default relation */
 2175         if (*l == 'x' && ((isascii((unsigned char)l[1]) &&
 2176             isspace((unsigned char)l[1])) || !l[1])) {
 2177             m->reln = *l;
 2178             ++l;
 2179         }
 2180         break;
 2181     }
 2182     /*
 2183      * Grab the value part, except for an 'x' reln.
 2184      */
 2185     if (m->reln != 'x' && getvalue(ms, m, &l, action))
 2186         return -1;
 2187 
 2188     /*
 2189      * TODO finish this macro and start using it!
 2190      * #define offsetcheck {if (offset > ms->bytes_max -1)
 2191      *  magwarn("offset too big"); }
 2192      */
 2193 
 2194     /*
 2195      * Now get last part - the description
 2196      */
 2197     EATAB;
 2198     if (l[0] == '\b') {
 2199         ++l;
 2200         m->flag |= NOSPACE;
 2201     } else if ((l[0] == '\\') && (l[1] == 'b')) {
 2202         ++l;
 2203         ++l;
 2204         m->flag |= NOSPACE;
 2205     }
 2206     for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
 2207         continue;
 2208     if (i == sizeof(m->desc)) {
 2209         m->desc[sizeof(m->desc) - 1] = '\0';
 2210         if (ms->flags & MAGIC_CHECK)
 2211             file_magwarn(ms, "description `%s' truncated", m->desc);
 2212     }
 2213 
 2214         /*
 2215      * We only do this check while compiling, or if any of the magic
 2216      * files were not compiled.
 2217          */
 2218         if (ms->flags & MAGIC_CHECK) {
 2219         if (check_format(ms, m) == -1)
 2220             return -1;
 2221     }
 2222 #ifndef COMPILE_ONLY
 2223     if (action == FILE_CHECK) {
 2224         file_mdump(m);
 2225     }
 2226 #endif
 2227     m->mimetype[0] = '\0';      /* initialise MIME type to none */
 2228     return 0;
 2229 }
 2230 
 2231 /*
 2232  * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
 2233  * if valid
 2234  */
 2235 private int
 2236 parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line)
 2237 {
 2238     const char *l = line;
 2239     char *el;
 2240     unsigned long factor;
 2241     struct magic *m = &me->mp[0];
 2242 
 2243     if (m->factor_op != FILE_FACTOR_OP_NONE) {
 2244         file_magwarn(ms,
 2245             "Current entry already has a strength type: %c %d",
 2246             m->factor_op, m->factor);
 2247         return -1;
 2248     }
 2249     if (m->type == FILE_NAME) {
 2250         file_magwarn(ms, "%s: Strength setting is not supported in "
 2251             "\"name\" magic entries", m->value.s);
 2252         return -1;
 2253     }
 2254     EATAB;
 2255     switch (*l) {
 2256     case FILE_FACTOR_OP_NONE:
 2257     case FILE_FACTOR_OP_PLUS:
 2258     case FILE_FACTOR_OP_MINUS:
 2259     case FILE_FACTOR_OP_TIMES:
 2260     case FILE_FACTOR_OP_DIV:
 2261         m->factor_op = *l++;
 2262         break;
 2263     default:
 2264         file_magwarn(ms, "Unknown factor op `%c'", *l);
 2265         return -1;
 2266     }
 2267     EATAB;
 2268     factor = strtoul(l, &el, 0);
 2269     if (factor > 255) {
 2270         file_magwarn(ms, "Too large factor `%lu'", factor);
 2271         goto out;
 2272     }
 2273     if (*el && !isspace((unsigned char)*el)) {
 2274         file_magwarn(ms, "Bad factor `%s'", l);
 2275         goto out;
 2276     }
 2277     m->factor = (uint8_t)factor;
 2278     if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
 2279         file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
 2280             m->factor_op, m->factor);
 2281         goto out;
 2282     }
 2283     return 0;
 2284 out:
 2285     m->factor_op = FILE_FACTOR_OP_NONE;
 2286     m->factor = 0;
 2287     return -1;
 2288 }
 2289 
 2290 private int
 2291 goodchar(unsigned char x, const char *extra)
 2292 {
 2293     return (isascii(x) && isalnum(x)) || strchr(extra, x);
 2294 }
 2295 
 2296 private int
 2297 parse_extra(struct magic_set *ms, struct magic_entry *me, const char *line,
 2298     off_t off, size_t len, const char *name, const char *extra, int nt)
 2299 {
 2300     size_t i;
 2301     const char *l = line;
 2302     struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
 2303     char *buf = CAST(char *, CAST(void *, m)) + off;
 2304 
 2305     if (buf[0] != '\0') {
 2306         len = nt ? strlen(buf) : len;
 2307         file_magwarn(ms, "Current entry already has a %s type "
 2308             "`%.*s', new type `%s'", name, (int)len, buf, l);
 2309         return -1;
 2310     }
 2311 
 2312     if (*m->desc == '\0') {
 2313         file_magwarn(ms, "Current entry does not yet have a "
 2314             "description for adding a %s type", name);
 2315         return -1;
 2316     }
 2317 
 2318     EATAB;
 2319     for (i = 0; *l && i < len && goodchar(*l, extra); buf[i++] = *l++)
 2320         continue;
 2321 
 2322     if (i == len && *l) {
 2323         if (nt)
 2324             buf[len - 1] = '\0';
 2325         if (ms->flags & MAGIC_CHECK)
 2326             file_magwarn(ms, "%s type `%s' truncated %"
 2327                 SIZE_T_FORMAT "u", name, line, i);
 2328     } else {
 2329         if (!isspace((unsigned char)*l) && !goodchar(*l, extra))
 2330             file_magwarn(ms, "%s type `%s' has bad char '%c'",
 2331                 name, line, *l);
 2332         if (nt)
 2333             buf[i] = '\0';
 2334     }
 2335 
 2336     if (i > 0)
 2337         return 0;
 2338 
 2339     file_magerror(ms, "Bad magic entry '%s'", line);
 2340     return -1;
 2341 }
 2342 
 2343 /*
 2344  * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
 2345  * magic[index - 1]
 2346  */
 2347 private int
 2348 parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
 2349 {
 2350     struct magic *m = &me->mp[0];
 2351 
 2352     return parse_extra(ms, me, line,
 2353         CAST(off_t, offsetof(struct magic, apple)),
 2354         sizeof(m->apple), "APPLE", "!+-./?", 0);
 2355 }
 2356 
 2357 /*
 2358  * Parse a comma-separated list of extensions
 2359  */
 2360 private int
 2361 parse_ext(struct magic_set *ms, struct magic_entry *me, const char *line)
 2362 {
 2363     struct magic *m = &me->mp[0];
 2364 
 2365     return parse_extra(ms, me, line,
 2366         CAST(off_t, offsetof(struct magic, ext)),
 2367         sizeof(m->ext), "EXTENSION", ",!+-/@?_$", 0);
 2368 }
 2369 
 2370 /*
 2371  * parse a MIME annotation line from magic file, put into magic[index - 1]
 2372  * if valid
 2373  */
 2374 private int
 2375 parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
 2376 {
 2377     struct magic *m = &me->mp[0];
 2378 
 2379     return parse_extra(ms, me, line,
 2380         CAST(off_t, offsetof(struct magic, mimetype)),
 2381         sizeof(m->mimetype), "MIME", "+-/.$?:{}", 1);
 2382 }
 2383 
 2384 private int
 2385 check_format_type(const char *ptr, int type, const char **estr)
 2386 {
 2387     int quad = 0, h;
 2388     size_t len, cnt;
 2389     if (*ptr == '\0') {
 2390         /* Missing format string; bad */
 2391         *estr = "missing format spec";
 2392         return -1;
 2393     }
 2394 
 2395     switch (file_formats[type]) {
 2396     case FILE_FMT_QUAD:
 2397         quad = 1;
 2398         /*FALLTHROUGH*/
 2399     case FILE_FMT_NUM:
 2400         if (quad == 0) {
 2401             switch (type) {
 2402             case FILE_BYTE:
 2403                 h = 2;
 2404                 break;
 2405             case FILE_SHORT:
 2406             case FILE_BESHORT:
 2407             case FILE_LESHORT:
 2408                 h = 1;
 2409                 break;
 2410             case FILE_LONG:
 2411             case FILE_BELONG:
 2412             case FILE_LELONG:
 2413             case FILE_MELONG:
 2414             case FILE_LEID3:
 2415             case FILE_BEID3:
 2416             case FILE_INDIRECT:
 2417                 h = 0;
 2418                 break;
 2419             default:
 2420                 abort();
 2421             }
 2422         } else
 2423             h = 0;
 2424         if (*ptr == '-')
 2425             ptr++;
 2426         if (*ptr == '.')
 2427             ptr++;
 2428         if (*ptr == '#')
 2429             ptr++;
 2430 #define CHECKLEN() do { \
 2431     for (len = cnt = 0; isdigit((unsigned char)*ptr); ptr++, cnt++) \
 2432         len = len * 10 + (*ptr - '0'); \
 2433     if (cnt > 5 || len > 1024) \
 2434         goto toolong; \
 2435 } while (/*CONSTCOND*/0)
 2436 
 2437         CHECKLEN();
 2438         if (*ptr == '.')
 2439             ptr++;
 2440         CHECKLEN();
 2441         if (quad) {
 2442             if (*ptr++ != 'l')
 2443                 goto invalid;
 2444             if (*ptr++ != 'l')
 2445                 goto invalid;
 2446         }
 2447 
 2448         switch (*ptr++) {
 2449 #ifdef STRICT_FORMAT    /* "long" formats are int formats for us */
 2450         /* so don't accept the 'l' modifier */
 2451         case 'l':
 2452             switch (*ptr++) {
 2453             case 'i':
 2454             case 'd':
 2455             case 'u':
 2456             case 'o':
 2457             case 'x':
 2458             case 'X':
 2459                 if (h == 0)
 2460                     return 0;
 2461                 /*FALLTHROUGH*/
 2462             default:
 2463                 goto invalid;
 2464             }
 2465 
 2466         /*
 2467          * Don't accept h and hh modifiers. They make writing
 2468          * magic entries more complicated, for very little benefit
 2469          */
 2470         case 'h':
 2471             if (h-- <= 0)
 2472                 goto invalid;
 2473             switch (*ptr++) {
 2474             case 'h':
 2475                 if (h-- <= 0)
 2476                     goto invalid;
 2477                 switch (*ptr++) {
 2478                 case 'i':
 2479                 case 'd':
 2480                 case 'u':
 2481                 case 'o':
 2482                 case 'x':
 2483                 case 'X':
 2484                     return 0;
 2485                 default:
 2486                     goto invalid;
 2487                 }
 2488             case 'i':
 2489             case 'd':
 2490             case 'u':
 2491             case 'o':
 2492             case 'x':
 2493             case 'X':
 2494                 if (h == 0)
 2495                     return 0;
 2496                 /*FALLTHROUGH*/
 2497             default:
 2498                 goto invalid;
 2499             }
 2500 #endif
 2501         case 'c':
 2502             if (h == 2)
 2503                 return 0;
 2504             goto invalid;
 2505         case 'i':
 2506         case 'd':
 2507         case 'u':
 2508         case 'o':
 2509         case 'x':
 2510         case 'X':
 2511 #ifdef STRICT_FORMAT
 2512             if (h == 0)
 2513                 return 0;
 2514             /*FALLTHROUGH*/
 2515 #else
 2516             return 0;
 2517 #endif
 2518         default:
 2519             goto invalid;
 2520         }
 2521 
 2522     case FILE_FMT_FLOAT:
 2523     case FILE_FMT_DOUBLE:
 2524         if (*ptr == '-')
 2525             ptr++;
 2526         if (*ptr == '.')
 2527             ptr++;
 2528         CHECKLEN();
 2529         if (*ptr == '.')
 2530             ptr++;
 2531         CHECKLEN();
 2532         switch (*ptr++) {
 2533         case 'e':
 2534         case 'E':
 2535         case 'f':
 2536         case 'F':
 2537         case 'g':
 2538         case 'G':
 2539             return 0;
 2540 
 2541         default:
 2542             goto invalid;
 2543         }
 2544 
 2545 
 2546     case FILE_FMT_STR:
 2547         if (*ptr == '-')
 2548             ptr++;
 2549         while (isdigit((unsigned char )*ptr))
 2550             ptr++;
 2551         if (*ptr == '.') {
 2552             ptr++;
 2553             while (isdigit((unsigned char )*ptr))
 2554                 ptr++;
 2555         }
 2556 
 2557         switch (*ptr++) {
 2558         case 's':
 2559             return 0;
 2560         default:
 2561             goto invalid;
 2562         }
 2563 
 2564     default:
 2565         /* internal error */
 2566         abort();
 2567     }
 2568 invalid:
 2569     *estr = "not valid";
 2570 toolong:
 2571     *estr = "too long";
 2572     return -1;
 2573 }
 2574 
 2575 /*
 2576  * Check that the optional printf format in description matches
 2577  * the type of the magic.
 2578  */
 2579 private int
 2580 check_format(struct magic_set *ms, struct magic *m)
 2581 {
 2582     char *ptr;
 2583     const char *estr;
 2584 
 2585     for (ptr = m->desc; *ptr; ptr++)
 2586         if (*ptr == '%')
 2587             break;
 2588     if (*ptr == '\0') {
 2589         /* No format string; ok */
 2590         return 1;
 2591     }
 2592 
 2593     assert(file_nformats == file_nnames);
 2594 
 2595     if (m->type >= file_nformats) {
 2596         file_magwarn(ms, "Internal error inconsistency between "
 2597             "m->type and format strings");
 2598         return -1;
 2599     }
 2600     if (file_formats[m->type] == FILE_FMT_NONE) {
 2601         file_magwarn(ms, "No format string for `%s' with description "
 2602             "`%s'", m->desc, file_names[m->type]);
 2603         return -1;
 2604     }
 2605 
 2606     ptr++;
 2607     if (check_format_type(ptr, m->type, &estr) == -1) {
 2608         /*
 2609          * TODO: this error message is unhelpful if the format
 2610          * string is not one character long
 2611          */
 2612         file_magwarn(ms, "Printf format is %s for type "
 2613             "`%s' in description `%s'", estr,
 2614             file_names[m->type], m->desc);
 2615         return -1;
 2616     }
 2617 
 2618     for (; *ptr; ptr++) {
 2619         if (*ptr == '%') {
 2620             file_magwarn(ms,
 2621                 "Too many format strings (should have at most one) "
 2622                 "for `%s' with description `%s'",
 2623                 file_names[m->type], m->desc);
 2624             return -1;
 2625         }
 2626     }
 2627     return 0;
 2628 }
 2629 
 2630 /*
 2631  * Read a numeric value from a pointer, into the value union of a magic
 2632  * pointer, according to the magic type.  Update the string pointer to point
 2633  * just after the number read.  Return 0 for success, non-zero for failure.
 2634  */
 2635 private int
 2636 getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
 2637 {
 2638     char *ep;
 2639     uint64_t ull;
 2640 
 2641     switch (m->type) {
 2642     case FILE_BESTRING16:
 2643     case FILE_LESTRING16:
 2644     case FILE_STRING:
 2645     case FILE_PSTRING:
 2646     case FILE_REGEX:
 2647     case FILE_SEARCH:
 2648     case FILE_NAME:
 2649     case FILE_USE:
 2650     case FILE_DER:
 2651         *p = getstr(ms, m, *p, action == FILE_COMPILE);
 2652         if (*p == NULL) {
 2653             if (ms->flags & MAGIC_CHECK)
 2654                 file_magwarn(ms, "cannot get string from `%s'",
 2655                     m->value.s);
 2656             return -1;
 2657         }
 2658         if (m->type == FILE_REGEX) {
 2659             file_regex_t rx;
 2660             int rc = file_regcomp(&rx, m->value.s, REG_EXTENDED);
 2661             if (rc) {
 2662                 if (ms->flags & MAGIC_CHECK)
 2663                     file_regerror(&rx, rc, ms);
 2664             }
 2665             file_regfree(&rx);
 2666             return rc ? -1 : 0;
 2667         }
 2668         return 0;
 2669     default:
 2670         if (m->reln == 'x')
 2671             return 0;
 2672         break;
 2673     }
 2674 
 2675     switch (m->type) {
 2676     case FILE_FLOAT:
 2677     case FILE_BEFLOAT:
 2678     case FILE_LEFLOAT:
 2679         errno = 0;
 2680 #ifdef HAVE_STRTOF
 2681         m->value.f = strtof(*p, &ep);
 2682 #else
 2683         m->value.f = (float)strtod(*p, &ep);
 2684 #endif
 2685         if (errno == 0)
 2686             *p = ep;
 2687         return 0;
 2688     case FILE_DOUBLE:
 2689     case FILE_BEDOUBLE:
 2690     case FILE_LEDOUBLE:
 2691         errno = 0;
 2692         m->value.d = strtod(*p, &ep);
 2693         if (errno == 0)
 2694             *p = ep;
 2695         return 0;
 2696     default:
 2697         errno = 0;
 2698         ull = (uint64_t)strtoull(*p, &ep, 0);
 2699         m->value.q = file_signextend(ms, m, ull);
 2700         if (*p == ep) {
 2701             file_magwarn(ms, "Unparseable number `%s'", *p);
 2702         } else {
 2703             size_t ts = typesize(m->type);
 2704             uint64_t x;
 2705             const char *q;
 2706 
 2707             if (ts == (size_t)~0) {
 2708                 file_magwarn(ms,
 2709                     "Expected numeric type got `%s'",
 2710                     type_tbl[m->type].name);
 2711             }
 2712             for (q = *p; isspace((unsigned char)*q); q++)
 2713                 continue;
 2714             if (*q == '-')
 2715                 ull = -(int64_t)ull;
 2716             switch (ts) {
 2717             case 1:
 2718                 x = (uint64_t)(ull & ~0xffULL);
 2719                 break;
 2720             case 2:
 2721                 x = (uint64_t)(ull & ~0xffffULL);
 2722                 break;
 2723             case 4:
 2724                 x = (uint64_t)(ull & ~0xffffffffULL);
 2725                 break;
 2726             case 8:
 2727                 x = 0;
 2728                 break;
 2729             default:
 2730                 abort();
 2731             }
 2732             if (x) {
 2733                 file_magwarn(ms, "Overflow for numeric"
 2734                     " type `%s' value %#" PRIx64,
 2735                     type_tbl[m->type].name, ull);
 2736             }
 2737         }
 2738         if (errno == 0) {
 2739             *p = ep;
 2740             eatsize(p);
 2741         }
 2742         return 0;
 2743     }
 2744 }
 2745 
 2746 /*
 2747  * Convert a string containing C character escapes.  Stop at an unescaped
 2748  * space or tab.
 2749  * Copy the converted version to "m->value.s", and the length in m->vallen.
 2750  * Return updated scan pointer as function result. Warn if set.
 2751  */
 2752 private const char *
 2753 getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
 2754 {
 2755     const char *origs = s;
 2756     char    *p = m->value.s;
 2757     size_t  plen = sizeof(m->value.s);
 2758     char    *origp = p;
 2759     char    *pmax = p + plen - 1;
 2760     int c;
 2761     int val;
 2762 
 2763     while ((c = *s++) != '\0') {
 2764         if (isspace((unsigned char) c))
 2765             break;
 2766         if (p >= pmax) {
 2767             file_error(ms, 0, "string too long: `%s'", origs);
 2768             return NULL;
 2769         }
 2770         if (c == '\\') {
 2771             switch(c = *s++) {
 2772 
 2773             case '\0':
 2774                 if (warn)
 2775                     file_magwarn(ms, "incomplete escape");
 2776                 s--;
 2777                 goto out;
 2778 
 2779             case '\t':
 2780                 if (warn) {
 2781                     file_magwarn(ms,
 2782                         "escaped tab found, use \\t instead");
 2783                     warn = 0;   /* already did */
 2784                 }
 2785                 /*FALLTHROUGH*/
 2786             default:
 2787                 if (warn) {
 2788                     if (isprint((unsigned char)c)) {
 2789                         /* Allow escaping of
 2790                          * ``relations'' */
 2791                         if (strchr("<>&^=!", c) == NULL
 2792                             && (m->type != FILE_REGEX ||
 2793                             strchr("[]().*?^$|{}", c)
 2794                             == NULL)) {
 2795                             file_magwarn(ms, "no "
 2796                                 "need to escape "
 2797                                 "`%c'", c);
 2798                         }
 2799                     } else {
 2800                         file_magwarn(ms,
 2801                             "unknown escape sequence: "
 2802                             "\\%03o", c);
 2803                     }
 2804                 }
 2805                 /*FALLTHROUGH*/
 2806             /* space, perhaps force people to use \040? */
 2807             case ' ':
 2808 #if 0
 2809             /*
 2810              * Other things people escape, but shouldn't need to,
 2811              * so we disallow them
 2812              */
 2813             case '\'':
 2814             case '"':
 2815             case '?':
 2816 #endif
 2817             /* Relations */
 2818             case '>':
 2819             case '<':
 2820             case '&':
 2821             case '^':
 2822             case '=':
 2823             case '!':
 2824             /* and baskslash itself */
 2825             case '\\':
 2826                 *p++ = (char) c;
 2827                 break;
 2828 
 2829             case 'a':
 2830                 *p++ = '\a';
 2831                 break;
 2832 
 2833             case 'b':
 2834                 *p++ = '\b';
 2835                 break;
 2836 
 2837             case 'f':
 2838                 *p++ = '\f';
 2839                 break;
 2840 
 2841             case 'n':
 2842                 *p++ = '\n';
 2843                 break;
 2844 
 2845             case 'r':
 2846                 *p++ = '\r';
 2847                 break;
 2848 
 2849             case 't':
 2850                 *p++ = '\t';
 2851                 break;
 2852 
 2853             case 'v':
 2854                 *p++ = '\v';
 2855                 break;
 2856 
 2857             /* \ and up to 3 octal digits */
 2858             case '0':
 2859             case '1':
 2860             case '2':
 2861             case '3':
 2862             case '4':
 2863             case '5':
 2864             case '6':
 2865             case '7':
 2866                 val = c - '0';
 2867                 c = *s++;  /* try for 2 */
 2868                 if (c >= '0' && c <= '7') {
 2869                     val = (val << 3) | (c - '0');
 2870                     c = *s++;  /* try for 3 */
 2871                     if (c >= '0' && c <= '7')
 2872                         val = (val << 3) | (c-'0');
 2873                     else
 2874                         --s;
 2875                 }
 2876                 else
 2877                     --s;
 2878                 *p++ = (char)val;
 2879                 break;
 2880 
 2881             /* \x and up to 2 hex digits */
 2882             case 'x':
 2883                 val = 'x';  /* Default if no digits */
 2884                 c = hextoint(*s++); /* Get next char */
 2885                 if (c >= 0) {
 2886                     val = c;
 2887                     c = hextoint(*s++);
 2888                     if (c >= 0)
 2889                         val = (val << 4) + c;
 2890                     else
 2891                         --s;
 2892                 } else
 2893                     --s;
 2894                 *p++ = (char)val;
 2895                 break;
 2896             }
 2897         } else
 2898             *p++ = (char)c;
 2899     }
 2900     --s;
 2901 out:
 2902     *p = '\0';
 2903     m->vallen = CAST(unsigned char, (p - origp));
 2904     if (m->type == FILE_PSTRING)
 2905         m->vallen += (unsigned char)file_pstring_length_size(m);
 2906     return s;
 2907 }
 2908 
 2909 
 2910 /* Single hex char to int; -1 if not a hex char. */
 2911 private int
 2912 hextoint(int c)
 2913 {
 2914     if (!isascii((unsigned char) c))
 2915         return -1;
 2916     if (isdigit((unsigned char) c))
 2917         return c - '0';
 2918     if ((c >= 'a') && (c <= 'f'))
 2919         return c + 10 - 'a';
 2920     if (( c>= 'A') && (c <= 'F'))
 2921         return c + 10 - 'A';
 2922     return -1;
 2923 }
 2924 
 2925 
 2926 /*
 2927  * Print a string containing C character escapes.
 2928  */
 2929 protected void
 2930 file_showstr(FILE *fp, const char *s, size_t len)
 2931 {
 2932     char    c;
 2933 
 2934     for (;;) {
 2935         if (len == ~0U) {
 2936             c = *s++;
 2937             if (c == '\0')
 2938                 break;
 2939         }
 2940         else  {
 2941             if (len-- == 0)
 2942                 break;
 2943             c = *s++;
 2944         }
 2945         if (c >= 040 && c <= 0176)  /* TODO isprint && !iscntrl */
 2946             (void) fputc(c, fp);
 2947         else {
 2948             (void) fputc('\\', fp);
 2949             switch (c) {
 2950             case '\a':
 2951                 (void) fputc('a', fp);
 2952                 break;
 2953 
 2954             case '\b':
 2955                 (void) fputc('b', fp);
 2956                 break;
 2957 
 2958             case '\f':
 2959                 (void) fputc('f', fp);
 2960                 break;
 2961 
 2962             case '\n':
 2963                 (void) fputc('n', fp);
 2964                 break;
 2965 
 2966             case '\r':
 2967                 (void) fputc('r', fp);
 2968                 break;
 2969 
 2970             case '\t':
 2971                 (void) fputc('t', fp);
 2972                 break;
 2973 
 2974             case '\v':
 2975                 (void) fputc('v', fp);
 2976                 break;
 2977 
 2978             default:
 2979                 (void) fprintf(fp, "%.3o", c & 0377);
 2980                 break;
 2981             }
 2982         }
 2983     }
 2984 }
 2985 
 2986 /*
 2987  * eatsize(): Eat the size spec from a number [eg. 10UL]
 2988  */
 2989 private void
 2990 eatsize(const char **p)
 2991 {
 2992     const char *l = *p;
 2993 
 2994     if (LOWCASE(*l) == 'u')
 2995         l++;
 2996 
 2997     switch (LOWCASE(*l)) {
 2998     case 'l':    /* long */
 2999     case 's':    /* short */
 3000     case 'h':    /* short */
 3001     case 'b':    /* char/byte */
 3002     case 'c':    /* char/byte */
 3003         l++;
 3004         /*FALLTHROUGH*/
 3005     default:
 3006         break;
 3007     }
 3008 
 3009     *p = l;
 3010 }
 3011 
 3012 /*
 3013  * handle a buffer containing a compiled file.
 3014  */
 3015 private struct magic_map *
 3016 apprentice_buf(struct magic_set *ms, struct magic *buf, size_t len)
 3017 {
 3018     struct magic_map *map;
 3019 
 3020     if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
 3021         file_oomem(ms, sizeof(*map));
 3022         return NULL;
 3023     }
 3024     map->len = len;
 3025     map->p = buf;
 3026     map->type = MAP_TYPE_USER;
 3027     if (check_buffer(ms, map, "buffer") != 0) {
 3028         apprentice_unmap(map);
 3029         return NULL;
 3030     }
 3031     return map;
 3032 }
 3033 
 3034 /*
 3035  * handle a compiled file.
 3036  */
 3037 
 3038 private struct magic_map *
 3039 apprentice_map(struct magic_set *ms, const char *fn)
 3040 {
 3041     int fd;
 3042     struct stat st;
 3043     char *dbname = NULL;
 3044     struct magic_map *map;
 3045     struct magic_map *rv = NULL;
 3046 
 3047     fd = -1;
 3048     if ((map = CAST(struct magic_map *, calloc(1, sizeof(*map)))) == NULL) {
 3049         file_oomem(ms, sizeof(*map));
 3050         goto error;
 3051     }
 3052     map->type = MAP_TYPE_USER;  /* unspecified */
 3053 
 3054     dbname = mkdbname(ms, fn, 0);
 3055     if (dbname == NULL)
 3056         goto error;
 3057 
 3058     if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1)
 3059         goto error;
 3060 
 3061     if (fstat(fd, &st) == -1) {
 3062         file_error(ms, errno, "cannot stat `%s'", dbname);
 3063         goto error;
 3064     }
 3065     if (st.st_size < 8 || st.st_size > maxoff_t()) {
 3066         file_error(ms, 0, "file `%s' is too %s", dbname,
 3067             st.st_size < 8 ? "small" : "large");
 3068         goto error;
 3069     }
 3070 
 3071     map->len = (size_t)st.st_size;
 3072 #ifdef QUICK
 3073     map->type = MAP_TYPE_MMAP;
 3074     if ((map->p = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE,
 3075         MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
 3076         file_error(ms, errno, "cannot map `%s'", dbname);
 3077         goto error;
 3078     }
 3079 #else
 3080     map->type = MAP_TYPE_MALLOC;
 3081     if ((map->p = CAST(void *, malloc(map->len))) == NULL) {
 3082         file_oomem(ms, map->len);
 3083         goto error;
 3084     }
 3085     if (read(fd, map->p, map->len) != (ssize_t)map->len) {
 3086         file_badread(ms);
 3087         goto error;
 3088     }
 3089 #define RET 1
 3090 #endif
 3091     (void)close(fd);
 3092     fd = -1;
 3093 
 3094     if (check_buffer(ms, map, dbname) != 0) {
 3095         rv = (struct magic_map *)-1;
 3096         goto error;
 3097     }
 3098 #ifdef QUICK
 3099     if (mprotect(map->p, (size_t)st.st_size, PROT_READ) == -1) {
 3100         file_error(ms, errno, "cannot mprotect `%s'", dbname);
 3101         goto error;
 3102     }
 3103 #endif
 3104 
 3105     free(dbname);
 3106     return map;
 3107 
 3108 error:
 3109     if (fd != -1)
 3110         (void)close(fd);
 3111     apprentice_unmap(map);
 3112     free(dbname);
 3113     return rv;
 3114 }
 3115 
 3116 private int
 3117 check_buffer(struct magic_set *ms, struct magic_map *map, const char *dbname)
 3118 {
 3119     uint32_t *ptr;
 3120     uint32_t entries, nentries;
 3121     uint32_t version;
 3122     int i, needsbyteswap;
 3123 
 3124     ptr = CAST(uint32_t *, map->p);
 3125     if (*ptr != MAGICNO) {
 3126         if (swap4(*ptr) != MAGICNO) {
 3127             file_error(ms, 0, "bad magic in `%s'", dbname);
 3128             return -1;
 3129         }
 3130         needsbyteswap = 1;
 3131     } else
 3132         needsbyteswap = 0;
 3133     if (needsbyteswap)
 3134         version = swap4(ptr[1]);
 3135     else
 3136         version = ptr[1];
 3137     if (version != VERSIONNO) {
 3138         file_error(ms, 0, "File %s supports only version %d magic "
 3139             "files. `%s' is version %d", VERSION,
 3140             VERSIONNO, dbname, version);
 3141         return -1;
 3142     }
 3143     entries = (uint32_t)(map->len / sizeof(struct magic));
 3144     if ((entries * sizeof(struct magic)) != map->len) {
 3145         file_error(ms, 0, "Size of `%s' %" SIZE_T_FORMAT "u is not "
 3146             "a multiple of %" SIZE_T_FORMAT "u",
 3147             dbname, map->len, sizeof(struct magic));
 3148         return -1;
 3149     }
 3150     map->magic[0] = CAST(struct magic *, map->p) + 1;
 3151     nentries = 0;
 3152     for (i = 0; i < MAGIC_SETS; i++) {
 3153         if (needsbyteswap)
 3154             map->nmagic[i] = swap4(ptr[i + 2]);
 3155         else
 3156             map->nmagic[i] = ptr[i + 2];
 3157         if (i != MAGIC_SETS - 1)
 3158             map->magic[i + 1] = map->magic[i] + map->nmagic[i];
 3159         nentries += map->nmagic[i];
 3160     }
 3161     if (entries != nentries + 1) {
 3162         file_error(ms, 0, "Inconsistent entries in `%s' %u != %u",
 3163             dbname, entries, nentries + 1);
 3164         return -1;
 3165     }
 3166     if (needsbyteswap)
 3167         for (i = 0; i < MAGIC_SETS; i++)
 3168             byteswap(map->magic[i], map->nmagic[i]);
 3169     return 0;
 3170 }
 3171 
 3172 /*
 3173  * handle an mmaped file.
 3174  */
 3175 private int
 3176 apprentice_compile(struct magic_set *ms, struct magic_map *map, const char *fn)
 3177 {
 3178     static const size_t nm = sizeof(*map->nmagic) * MAGIC_SETS;
 3179     static const size_t m = sizeof(**map->magic);
 3180     int fd = -1;
 3181     size_t len;
 3182     char *dbname;
 3183     int rv = -1;
 3184     uint32_t i;
 3185     union {
 3186         struct magic m;
 3187         uint32_t h[2 + MAGIC_SETS];
 3188     } hdr;
 3189 
 3190     dbname = mkdbname(ms, fn, 1);
 3191 
 3192     if (dbname == NULL)
 3193         goto out;
 3194 
 3195     if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1)
 3196     {
 3197         file_error(ms, errno, "cannot open `%s'", dbname);
 3198         goto out;
 3199     }
 3200     memset(&hdr, 0, sizeof(hdr));
 3201     hdr.h[0] = MAGICNO;
 3202     hdr.h[1] = VERSIONNO;
 3203     memcpy(hdr.h + 2, map->nmagic, nm);
 3204 
 3205     if (write(fd, &hdr, sizeof(hdr)) != (ssize_t)sizeof(hdr)) {
 3206         file_error(ms, errno, "error writing `%s'", dbname);
 3207         goto out2;
 3208     }
 3209 
 3210     for (i = 0; i < MAGIC_SETS; i++) {
 3211         len = m * map->nmagic[i];
 3212         if (write(fd, map->magic[i], len) != (ssize_t)len) {
 3213             file_error(ms, errno, "error writing `%s'", dbname);
 3214             goto out2;
 3215         }
 3216     }
 3217 
 3218     rv = 0;
 3219 out2:
 3220     if (fd != -1)
 3221         (void)close(fd);
 3222 out:
 3223     apprentice_unmap(map);
 3224     free(dbname);
 3225     return rv;
 3226 }
 3227 
 3228 private const char ext[] = ".mgc";
 3229 /*
 3230  * make a dbname
 3231  */
 3232 private char *
 3233 mkdbname(struct magic_set *ms, const char *fn, int strip)
 3234 {
 3235     const char *p, *q;
 3236     char *buf;
 3237 
 3238     if (strip) {
 3239         if ((p = strrchr(fn, '/')) != NULL)
 3240             fn = ++p;
 3241     }
 3242 
 3243     for (q = fn; *q; q++)
 3244         continue;
 3245     /* Look for .mgc */
 3246     for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
 3247         if (*p != *q)
 3248             break;
 3249 
 3250     /* Did not find .mgc, restore q */
 3251     if (p >= ext)
 3252         while (*q)
 3253             q++;
 3254 
 3255     q++;
 3256     /* Compatibility with old code that looked in .mime */
 3257     if (ms->flags & MAGIC_MIME) {
 3258         if (asprintf(&buf, "%.*s.mime%s", (int)(q - fn), fn, ext) < 0)
 3259             return NULL;
 3260         if (access(buf, R_OK) != -1) {
 3261             ms->flags &= MAGIC_MIME_TYPE;
 3262             return buf;
 3263         }
 3264         free(buf);
 3265     }
 3266     if (asprintf(&buf, "%.*s%s", (int)(q - fn), fn, ext) < 0)
 3267         return NULL;
 3268 
 3269     /* Compatibility with old code that looked in .mime */
 3270     if (strstr(fn, ".mime") != NULL)
 3271         ms->flags &= MAGIC_MIME_TYPE;
 3272     return buf;
 3273 }
 3274 
 3275 /*
 3276  * Byteswap an mmap'ed file if needed
 3277  */
 3278 private void
 3279 byteswap(struct magic *magic, uint32_t nmagic)
 3280 {
 3281     uint32_t i;
 3282     for (i = 0; i < nmagic; i++)
 3283         bs1(&magic[i]);
 3284 }
 3285 
 3286 /*
 3287  * swap a short
 3288  */
 3289 private uint16_t
 3290 swap2(uint16_t sv)
 3291 {
 3292     uint16_t rv;
 3293     uint8_t *s = (uint8_t *)(void *)&sv;
 3294     uint8_t *d = (uint8_t *)(void *)&rv;
 3295     d[0] = s[1];
 3296     d[1] = s[0];
 3297     return rv;
 3298 }
 3299 
 3300 /*
 3301  * swap an int
 3302  */
 3303 private uint32_t
 3304 swap4(uint32_t sv)
 3305 {
 3306     uint32_t rv;
 3307     uint8_t *s = (uint8_t *)(void *)&sv;
 3308     uint8_t *d = (uint8_t *)(void *)&rv;
 3309     d[0] = s[3];
 3310     d[1] = s[2];
 3311     d[2] = s[1];
 3312     d[3] = s[0];
 3313     return rv;
 3314 }
 3315 
 3316 /*
 3317  * swap a quad
 3318  */
 3319 private uint64_t
 3320 swap8(uint64_t sv)
 3321 {
 3322     uint64_t rv;
 3323     uint8_t *s = (uint8_t *)(void *)&sv;
 3324     uint8_t *d = (uint8_t *)(void *)&rv;
 3325 #if 0
 3326     d[0] = s[3];
 3327     d[1] = s[2];
 3328     d[2] = s[1];
 3329     d[3] = s[0];
 3330     d[4] = s[7];
 3331     d[5] = s[6];
 3332     d[6] = s[5];
 3333     d[7] = s[4];
 3334 #else
 3335     d[0] = s[7];
 3336     d[1] = s[6];
 3337     d[2] = s[5];
 3338     d[3] = s[4];
 3339     d[4] = s[3];
 3340     d[5] = s[2];
 3341     d[6] = s[1];
 3342     d[7] = s[0];
 3343 #endif
 3344     return rv;
 3345 }
 3346 
 3347 /*
 3348  * byteswap a single magic entry
 3349  */
 3350 private void
 3351 bs1(struct magic *m)
 3352 {
 3353     m->cont_level = swap2(m->cont_level);
 3354     m->offset = swap4((uint32_t)m->offset);
 3355     m->in_offset = swap4((uint32_t)m->in_offset);
 3356     m->lineno = swap4((uint32_t)m->lineno);
 3357     if (IS_STRING(m->type)) {
 3358         m->str_range = swap4(m->str_range);
 3359         m->str_flags = swap4(m->str_flags);
 3360     }
 3361     else {
 3362         m->value.q = swap8(m->value.q);
 3363         m->num_mask = swap8(m->num_mask);
 3364     }
 3365 }
 3366 
 3367 protected size_t
 3368 file_pstring_length_size(const struct magic *m)
 3369 {
 3370     switch (m->str_flags & PSTRING_LEN) {
 3371     case PSTRING_1_LE:
 3372         return 1;
 3373     case PSTRING_2_LE:
 3374     case PSTRING_2_BE:
 3375         return 2;
 3376     case PSTRING_4_LE:
 3377     case PSTRING_4_BE:
 3378         return 4;
 3379     default:
 3380         abort();    /* Impossible */
 3381         return 1;
 3382     }
 3383 }
 3384 protected size_t
 3385 file_pstring_get_length(const struct magic *m, const char *ss)
 3386 {
 3387     size_t len = 0;
 3388     const unsigned char *s = (const unsigned char *)ss;
 3389     unsigned int s3, s2, s1, s0;
 3390 
 3391     switch (m->str_flags & PSTRING_LEN) {
 3392     case PSTRING_1_LE:
 3393         len = *s;
 3394         break;
 3395     case PSTRING_2_LE:
 3396         s0 = s[0];
 3397         s1 = s[1];
 3398         len = (s1 << 8) | s0;
 3399         break;
 3400     case PSTRING_2_BE:
 3401         s0 = s[0];
 3402         s1 = s[1];
 3403         len = (s0 << 8) | s1;
 3404         break;
 3405     case PSTRING_4_LE:
 3406         s0 = s[0];
 3407         s1 = s[1];
 3408         s2 = s[2];
 3409         s3 = s[3];
 3410         len = (s3 << 24) | (s2 << 16) | (s1 << 8) | s0;
 3411         break;
 3412     case PSTRING_4_BE:
 3413         s0 = s[0];
 3414         s1 = s[1];
 3415         s2 = s[2];
 3416         s3 = s[3];
 3417         len = (s0 << 24) | (s1 << 16) | (s2 << 8) | s3;
 3418         break;
 3419     default:
 3420         abort();    /* Impossible */
 3421     }
 3422 
 3423     if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF)
 3424         len -= file_pstring_length_size(m);
 3425 
 3426     return len;
 3427 }
 3428 
 3429 protected int
 3430 file_magicfind(struct magic_set *ms, const char *name, struct mlist *v)
 3431 {
 3432     uint32_t i, j;
 3433     struct mlist *mlist, *ml;
 3434 
 3435     mlist = ms->mlist[1];
 3436 
 3437     for (ml = mlist->next; ml != mlist; ml = ml->next) {
 3438         struct magic *ma = ml->magic;
 3439         uint32_t nma = ml->nmagic;
 3440         for (i = 0; i < nma; i++) {
 3441             if (ma[i].type != FILE_NAME)
 3442                 continue;
 3443             if (strcmp(ma[i].value.s, name) == 0) {
 3444                 v->magic = &ma[i];
 3445                 for (j = i + 1; j < nma; j++)
 3446                     if (ma[j].cont_level == 0)
 3447                         break;
 3448                 v->nmagic = j - i;
 3449                 return 0;
 3450             }
 3451         }
 3452     }
 3453     return -1;
 3454 }