"Fossies" - the Fresh Open Source Software Archive

Member "sleuthkit-4.7.0/framework/modules/c_FileTypeSigModule/file-5.08/src/apprentice.c" (11 Oct 2019, 54912 Bytes) of package /linux/privat/sleuthkit-4.7.0.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "apprentice.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * Copyright (c) Ian F. Darwin 1986-1995.
    3  * Software written by Ian F. Darwin and others;
    4  * maintained 1995-present by Christos Zoulas and others.
    5  * 
    6  * Redistribution and use in source and binary forms, with or without
    7  * modification, are permitted provided that the following conditions
    8  * are met:
    9  * 1. Redistributions of source code must retain the above copyright
   10  *    notice immediately at the beginning of the file, without modification,
   11  *    this list of conditions, and the following disclaimer.
   12  * 2. Redistributions in binary form must reproduce the above copyright
   13  *    notice, this list of conditions and the following disclaimer in the
   14  *    documentation and/or other materials provided with the distribution.
   15  *  
   16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
   17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   19  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
   20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   26  * SUCH DAMAGE.
   27  */
   28 /*
   29  * apprentice - make one pass through /etc/magic, learning its secrets.
   30  */
   31 
   32 #include "file.h"
   33 
   34 #ifndef lint
   35 FILE_RCSID("@(#)$File: apprentice.c,v 1.170 2011/06/10 09:23:28 christos Exp $")
   36 #endif  /* lint */
   37 
   38 #include "magic.h"
   39 #include <stdlib.h>
   40 #ifdef HAVE_UNISTD_H
   41 #include <unistd.h>
   42 #endif
   43 #include <string.h>
   44 #include <assert.h>
   45 #include <ctype.h>
   46 #include <fcntl.h>
   47 #ifdef QUICK
   48 #include <sys/mman.h>
   49 #endif
   50 #include <dirent.h>
   51 
   52 #define EATAB {while (isascii((unsigned char) *l) && \
   53               isspace((unsigned char) *l))  ++l;}
   54 #define LOWCASE(l) (isupper((unsigned char) (l)) ? \
   55             tolower((unsigned char) (l)) : (l))
   56 /*
   57  * Work around a bug in headers on Digital Unix.
   58  * At least confirmed for: OSF1 V4.0 878
   59  */
   60 #if defined(__osf__) && defined(__DECC)
   61 #ifdef MAP_FAILED
   62 #undef MAP_FAILED
   63 #endif
   64 #endif
   65 
   66 #ifndef MAP_FAILED
   67 #define MAP_FAILED (void *) -1
   68 #endif
   69 
   70 #ifndef MAP_FILE
   71 #define MAP_FILE 0
   72 #endif
   73 
   74 struct magic_entry {
   75     struct magic *mp;   
   76     uint32_t cont_count;
   77     uint32_t max_count;
   78 };
   79 
   80 int file_formats[FILE_NAMES_SIZE];
   81 const size_t file_nformats = FILE_NAMES_SIZE;
   82 const char *file_names[FILE_NAMES_SIZE];
   83 const size_t file_nnames = FILE_NAMES_SIZE;
   84 
   85 private int getvalue(struct magic_set *ms, struct magic *, const char **, int);
   86 private int hextoint(int);
   87 private const char *getstr(struct magic_set *, struct magic *, const char *,
   88     int);
   89 private int parse(struct magic_set *, struct magic_entry **, uint32_t *,
   90     const char *, size_t, int);
   91 private void eatsize(const char **);
   92 private int apprentice_1(struct magic_set *, const char *, int, struct mlist *);
   93 private size_t apprentice_magic_strength(const struct magic *);
   94 private int apprentice_sort(const void *, const void *);
   95 private void apprentice_list(struct mlist *, int );
   96 private int apprentice_load(struct magic_set *, struct magic **, uint32_t *,
   97     const char *, int);
   98 private void byteswap(struct magic *, uint32_t);
   99 private void bs1(struct magic *);
  100 private uint16_t swap2(uint16_t);
  101 private uint32_t swap4(uint32_t);
  102 private uint64_t swap8(uint64_t);
  103 private char *mkdbname(struct magic_set *, const char *, int);
  104 private int apprentice_map(struct magic_set *, struct magic **, uint32_t *,
  105     const char *);
  106 private int apprentice_compile(struct magic_set *, struct magic **, uint32_t *,
  107     const char *);
  108 private int check_format_type(const char *, int);
  109 private int check_format(struct magic_set *, struct magic *);
  110 private int get_op(char);
  111 private int parse_mime(struct magic_set *, struct magic_entry *, const char *);
  112 private int parse_strength(struct magic_set *, struct magic_entry *, const char *);
  113 private int parse_apple(struct magic_set *, struct magic_entry *, const char *);
  114 
  115 
  116 private size_t maxmagic = 0;
  117 private size_t magicsize = sizeof(struct magic);
  118 
  119 private const char usg_hdr[] = "cont\toffset\ttype\topcode\tmask\tvalue\tdesc";
  120 
  121 private struct {
  122     const char *name;
  123     size_t len;
  124     int (*fun)(struct magic_set *, struct magic_entry *, const char *);
  125 } bang[] = {
  126 #define DECLARE_FIELD(name) { # name, sizeof(# name) - 1, parse_ ## name }
  127     DECLARE_FIELD(mime),
  128     DECLARE_FIELD(apple),
  129     DECLARE_FIELD(strength),
  130 #undef  DECLARE_FIELD
  131     { NULL, 0, NULL }
  132 };
  133 
  134 #ifdef COMPILE_ONLY
  135 
  136 int main(int, char *[]);
  137 
  138 int
  139 main(int argc, char *argv[])
  140 {
  141     int ret;
  142     struct magic_set *ms;
  143     char *progname;
  144 
  145     if ((progname = strrchr(argv[0], '/')) != NULL)
  146         progname++;
  147     else
  148         progname = argv[0];
  149 
  150     if (argc != 2) {
  151         (void)fprintf(stderr, "Usage: %s file\n", progname);
  152         return 1;
  153     }
  154 
  155     if ((ms = magic_open(MAGIC_CHECK)) == NULL) {
  156         (void)fprintf(stderr, "%s: %s\n", progname, strerror(errno));
  157         return 1;
  158     }
  159     ret = magic_compile(ms, argv[1]) == -1 ? 1 : 0;
  160     if (ret == 1)
  161         (void)fprintf(stderr, "%s: %s\n", progname, magic_error(ms));
  162     magic_close(ms);
  163     return ret;
  164 }
  165 #endif /* COMPILE_ONLY */
  166 
  167 static const struct type_tbl_s {
  168     const char name[16];
  169     const size_t len;
  170     const int type;
  171     const int format;
  172 } type_tbl[] = {
  173 # define XX(s)      s, (sizeof(s) - 1)
  174 # define XX_NULL    "", 0
  175     { XX("byte"),       FILE_BYTE,      FILE_FMT_NUM },
  176     { XX("short"),      FILE_SHORT,     FILE_FMT_NUM },
  177     { XX("default"),    FILE_DEFAULT,       FILE_FMT_STR },
  178     { XX("long"),       FILE_LONG,      FILE_FMT_NUM },
  179     { XX("string"),     FILE_STRING,        FILE_FMT_STR },
  180     { XX("date"),       FILE_DATE,      FILE_FMT_STR },
  181     { XX("beshort"),    FILE_BESHORT,       FILE_FMT_NUM },
  182     { XX("belong"),     FILE_BELONG,        FILE_FMT_NUM },
  183     { XX("bedate"),     FILE_BEDATE,        FILE_FMT_STR },
  184     { XX("leshort"),    FILE_LESHORT,       FILE_FMT_NUM },
  185     { XX("lelong"),     FILE_LELONG,        FILE_FMT_NUM },
  186     { XX("ledate"),     FILE_LEDATE,        FILE_FMT_STR },
  187     { XX("pstring"),    FILE_PSTRING,       FILE_FMT_STR },
  188     { XX("ldate"),      FILE_LDATE,     FILE_FMT_STR },
  189     { XX("beldate"),    FILE_BELDATE,       FILE_FMT_STR },
  190     { XX("leldate"),    FILE_LELDATE,       FILE_FMT_STR },
  191     { XX("regex"),      FILE_REGEX,     FILE_FMT_STR },
  192     { XX("bestring16"), FILE_BESTRING16,    FILE_FMT_STR },
  193     { XX("lestring16"), FILE_LESTRING16,    FILE_FMT_STR },
  194     { XX("search"),     FILE_SEARCH,        FILE_FMT_STR },
  195     { XX("medate"),     FILE_MEDATE,        FILE_FMT_STR },
  196     { XX("meldate"),    FILE_MELDATE,       FILE_FMT_STR },
  197     { XX("melong"),     FILE_MELONG,        FILE_FMT_NUM },
  198     { XX("quad"),       FILE_QUAD,      FILE_FMT_QUAD },
  199     { XX("lequad"),     FILE_LEQUAD,        FILE_FMT_QUAD },
  200     { XX("bequad"),     FILE_BEQUAD,        FILE_FMT_QUAD },
  201     { XX("qdate"),      FILE_QDATE,     FILE_FMT_STR },
  202     { XX("leqdate"),    FILE_LEQDATE,       FILE_FMT_STR },
  203     { XX("beqdate"),    FILE_BEQDATE,       FILE_FMT_STR },
  204     { XX("qldate"),     FILE_QLDATE,        FILE_FMT_STR },
  205     { XX("leqldate"),   FILE_LEQLDATE,      FILE_FMT_STR },
  206     { XX("beqldate"),   FILE_BEQLDATE,      FILE_FMT_STR },
  207     { XX("float"),      FILE_FLOAT,     FILE_FMT_FLOAT },
  208     { XX("befloat"),    FILE_BEFLOAT,       FILE_FMT_FLOAT },
  209     { XX("lefloat"),    FILE_LEFLOAT,       FILE_FMT_FLOAT },
  210     { XX("double"),     FILE_DOUBLE,        FILE_FMT_DOUBLE },
  211     { XX("bedouble"),   FILE_BEDOUBLE,      FILE_FMT_DOUBLE },
  212     { XX("ledouble"),   FILE_LEDOUBLE,      FILE_FMT_DOUBLE },
  213     { XX("leid3"),      FILE_LEID3,     FILE_FMT_NUM },
  214     { XX("beid3"),      FILE_BEID3,     FILE_FMT_NUM },
  215     { XX("indirect"),   FILE_INDIRECT,      FILE_FMT_NONE },
  216     { XX_NULL,      FILE_INVALID,       FILE_FMT_NONE },
  217 # undef XX
  218 # undef XX_NULL
  219 };
  220 
  221 private int
  222 get_type(const char *l, const char **t)
  223 {
  224     const struct type_tbl_s *p;
  225 
  226     for (p = type_tbl; p->len; p++) {
  227         if (strncmp(l, p->name, p->len) == 0) {
  228             if (t)
  229                 *t = l + p->len;
  230             break;
  231         }
  232     }
  233     return p->type;
  234 }
  235 
  236 private void
  237 init_file_tables(void)
  238 {
  239     static int done = 0;
  240     const struct type_tbl_s *p;
  241 
  242     if (done)
  243         return;
  244     done++;
  245 
  246     for (p = type_tbl; p->len; p++) {
  247         assert(p->type < FILE_NAMES_SIZE);
  248         file_names[p->type] = p->name;
  249         file_formats[p->type] = p->format;
  250     }
  251 }
  252 
  253 /*
  254  * Handle one file or directory.
  255  */
  256 private int
  257 apprentice_1(struct magic_set *ms, const char *fn, int action,
  258     struct mlist *mlist)
  259 {
  260     struct magic *magic = NULL;
  261     uint32_t nmagic = 0;
  262     struct mlist *ml;
  263     int rv = -1;
  264     int mapped;
  265 
  266     if (magicsize != FILE_MAGICSIZE) {
  267         file_error(ms, 0, "magic element size %lu != %lu",
  268             (unsigned long)sizeof(*magic),
  269             (unsigned long)FILE_MAGICSIZE);
  270         return -1;
  271     }
  272 
  273     if (action == FILE_COMPILE) {
  274         rv = apprentice_load(ms, &magic, &nmagic, fn, action);
  275         if (rv != 0)
  276             return -1;
  277         rv = apprentice_compile(ms, &magic, &nmagic, fn);
  278         free(magic);
  279         return rv;
  280     }
  281 
  282 #ifndef COMPILE_ONLY
  283     if ((rv = apprentice_map(ms, &magic, &nmagic, fn)) == -1) {
  284         if (ms->flags & MAGIC_CHECK)
  285             file_magwarn(ms, "using regular magic file `%s'", fn);
  286         rv = apprentice_load(ms, &magic, &nmagic, fn, action);
  287         if (rv != 0)
  288             return -1;
  289     }
  290 
  291     mapped = rv;
  292          
  293     if (magic == NULL) {
  294         file_delmagic(magic, mapped, nmagic);
  295         return -1;
  296     }
  297 
  298     if ((ml = CAST(struct mlist *, malloc(sizeof(*ml)))) == NULL) {
  299         file_delmagic(magic, mapped, nmagic);
  300         file_oomem(ms, sizeof(*ml));
  301         return -1;
  302     }
  303 
  304     ml->magic = magic;
  305     ml->nmagic = nmagic;
  306     ml->mapped = mapped;
  307 
  308     mlist->prev->next = ml;
  309     ml->prev = mlist->prev;
  310     ml->next = mlist;
  311     mlist->prev = ml;
  312 
  313     if (action == FILE_LIST) {
  314         printf("Binary patterns:\n");
  315         apprentice_list(mlist, BINTEST);
  316         printf("Text patterns:\n");
  317         apprentice_list(mlist, TEXTTEST);
  318     }
  319     
  320     return 0;
  321 #endif /* COMPILE_ONLY */
  322 }
  323 
  324 protected void
  325 file_delmagic(struct magic *p, int type, size_t entries)
  326 {
  327     if (p == NULL)
  328         return;
  329     switch (type) {
  330     case 2:
  331 #ifdef QUICK
  332         p--;
  333         (void)munmap((void *)p, sizeof(*p) * (entries + 1));
  334         break;
  335 #else
  336         (void)&entries;
  337         abort();
  338         /*NOTREACHED*/
  339 #endif
  340     case 1:
  341         p--;
  342         /*FALLTHROUGH*/
  343     case 0:
  344         free(p);
  345         break;
  346     default:
  347         abort();
  348     }
  349 }
  350 
  351 /* const char *fn: list of magic files and directories */
  352 protected struct mlist *
  353 file_apprentice(struct magic_set *ms, const char *fn, int action)
  354 {
  355     char *p, *mfn;
  356     int file_err, errs = -1;
  357     struct mlist *mlist;
  358 
  359     if ((fn = magic_getpath(fn, action)) == NULL)
  360         return NULL;
  361 
  362     init_file_tables();
  363 
  364     if ((mfn = strdup(fn)) == NULL) {
  365         file_oomem(ms, strlen(fn));
  366         return NULL;
  367     }
  368     fn = mfn;
  369 
  370     if ((mlist = CAST(struct mlist *, malloc(sizeof(*mlist)))) == NULL) {
  371         free(mfn);
  372         file_oomem(ms, sizeof(*mlist));
  373         return NULL;
  374     }
  375     mlist->next = mlist->prev = mlist;
  376 
  377     while (fn) {
  378         p = strchr(fn, PATHSEP);
  379         if (p)
  380             *p++ = '\0';
  381         if (*fn == '\0')
  382             break;
  383         file_err = apprentice_1(ms, fn, action, mlist);
  384         errs = MAX(errs, file_err);
  385         fn = p;
  386     }
  387     if (errs == -1) {
  388         free(mfn);
  389         free(mlist);
  390         mlist = NULL;
  391         file_error(ms, 0, "could not find any magic files!");
  392         return NULL;
  393     }
  394     free(mfn);
  395     return mlist;
  396 }
  397 
  398 /*
  399  * Get weight of this magic entry, for sorting purposes.
  400  */
  401 private size_t
  402 apprentice_magic_strength(const struct magic *m)
  403 {
  404 #define MULT 10
  405     size_t val = 2 * MULT;  /* baseline strength */
  406 
  407     switch (m->type) {
  408     case FILE_DEFAULT:  /* make sure this sorts last */
  409         if (m->factor_op != FILE_FACTOR_OP_NONE)
  410             abort();
  411         return 0;
  412 
  413     case FILE_BYTE:
  414         val += 1 * MULT;
  415         break;
  416 
  417     case FILE_SHORT:
  418     case FILE_LESHORT:
  419     case FILE_BESHORT:
  420         val += 2 * MULT;
  421         break;
  422 
  423     case FILE_LONG:
  424     case FILE_LELONG:
  425     case FILE_BELONG:
  426     case FILE_MELONG:
  427         val += 4 * MULT;
  428         break;
  429 
  430     case FILE_PSTRING:
  431     case FILE_STRING:
  432         val += m->vallen * MULT;
  433         break;
  434 
  435     case FILE_BESTRING16:
  436     case FILE_LESTRING16:
  437         val += m->vallen * MULT / 2;
  438         break;
  439 
  440     case FILE_SEARCH:
  441     case FILE_REGEX:
  442         val += m->vallen * MAX(MULT / m->vallen, 1);
  443         break;
  444 
  445     case FILE_DATE:
  446     case FILE_LEDATE:
  447     case FILE_BEDATE:
  448     case FILE_MEDATE:
  449     case FILE_LDATE:
  450     case FILE_LELDATE:
  451     case FILE_BELDATE:
  452     case FILE_MELDATE:
  453     case FILE_FLOAT:
  454     case FILE_BEFLOAT:
  455     case FILE_LEFLOAT:
  456         val += 4 * MULT;
  457         break;
  458 
  459     case FILE_QUAD:
  460     case FILE_BEQUAD:
  461     case FILE_LEQUAD:
  462     case FILE_QDATE:
  463     case FILE_LEQDATE:
  464     case FILE_BEQDATE:
  465     case FILE_QLDATE:
  466     case FILE_LEQLDATE:
  467     case FILE_BEQLDATE:
  468     case FILE_DOUBLE:
  469     case FILE_BEDOUBLE:
  470     case FILE_LEDOUBLE:
  471         val += 8 * MULT;
  472         break;
  473 
  474     default:
  475         val = 0;
  476         (void)fprintf(stderr, "Bad type %d\n", m->type);
  477         abort();
  478     }
  479 
  480     switch (m->reln) {
  481     case 'x':   /* matches anything penalize */
  482     case '!':       /* matches almost anything penalize */
  483         val = 0;
  484         break;
  485 
  486     case '=':   /* Exact match, prefer */
  487         val += MULT;
  488         break;
  489 
  490     case '>':
  491     case '<':   /* comparison match reduce strength */
  492         val -= 2 * MULT;
  493         break;
  494 
  495     case '^':
  496     case '&':   /* masking bits, we could count them too */
  497         val -= MULT;
  498         break;
  499 
  500     default:
  501         (void)fprintf(stderr, "Bad relation %c\n", m->reln);
  502         abort();
  503     }
  504 
  505     if (val == 0)   /* ensure we only return 0 for FILE_DEFAULT */
  506         val = 1;
  507 
  508     switch (m->factor_op) {
  509     case FILE_FACTOR_OP_NONE:
  510         break;
  511     case FILE_FACTOR_OP_PLUS:
  512         val += m->factor;
  513         break;
  514     case FILE_FACTOR_OP_MINUS:
  515         val -= m->factor;
  516         break;
  517     case FILE_FACTOR_OP_TIMES:
  518         val *= m->factor;
  519         break;
  520     case FILE_FACTOR_OP_DIV:
  521         val /= m->factor;
  522         break;
  523     default:
  524         abort();
  525     }
  526 
  527     /*
  528      * Magic entries with no description get a bonus because they depend
  529      * on subsequent magic entries to print something.
  530      */
  531     if (m->desc[0] == '\0')
  532         val++;
  533     return val;
  534 }
  535 
  536 /*  
  537  * Sort callback for sorting entries by "strength" (basically length)
  538  */
  539 private int
  540 apprentice_sort(const void *a, const void *b)
  541 {
  542     const struct magic_entry *ma = CAST(const struct magic_entry *, a);
  543     const struct magic_entry *mb = CAST(const struct magic_entry *, b);
  544     size_t sa = apprentice_magic_strength(ma->mp);
  545     size_t sb = apprentice_magic_strength(mb->mp);
  546     if (sa == sb)
  547         return 0;
  548     else if (sa > sb)
  549         return -1;
  550     else
  551         return 1;
  552 }
  553 
  554 /*  
  555  * Shows sorted patterns list in the order which is used for the matching
  556  */
  557 private void
  558 apprentice_list(struct mlist *mlist, int mode)
  559 {
  560     uint32_t magindex = 0;
  561     struct mlist *ml;
  562     for (ml = mlist->next; ml != mlist; ml = ml->next) {
  563         for (magindex = 0; magindex < ml->nmagic; magindex++) {
  564             struct magic *m = &ml->magic[magindex];
  565             if ((m->flag & mode) != mode) {
  566                 /* Skip sub-tests */
  567                 while (magindex + 1 < ml->nmagic &&
  568                        ml->magic[magindex + 1].cont_level != 0)
  569                     ++magindex;
  570                 continue; /* Skip to next top-level test*/
  571             }
  572 
  573             /*
  574              * Try to iterate over the tree until we find item with
  575              * description/mimetype.
  576              */
  577             while (magindex + 1 < ml->nmagic &&
  578                    ml->magic[magindex + 1].cont_level != 0 &&
  579                    *ml->magic[magindex].desc == '\0' &&
  580                    *ml->magic[magindex].mimetype == '\0')
  581                 magindex++;
  582 
  583             printf("Strength = %3" SIZE_T_FORMAT "u : %s [%s]\n",
  584                 apprentice_magic_strength(m),
  585                 ml->magic[magindex].desc,
  586                 ml->magic[magindex].mimetype);
  587         }
  588     }
  589 }
  590 
  591 private void
  592 set_test_type(struct magic *mstart, struct magic *m)
  593 {
  594     switch (m->type) {
  595     case FILE_BYTE:
  596     case FILE_SHORT:
  597     case FILE_LONG:
  598     case FILE_DATE:
  599     case FILE_BESHORT:
  600     case FILE_BELONG:
  601     case FILE_BEDATE:
  602     case FILE_LESHORT:
  603     case FILE_LELONG:
  604     case FILE_LEDATE:
  605     case FILE_LDATE:
  606     case FILE_BELDATE:
  607     case FILE_LELDATE:
  608     case FILE_MEDATE:
  609     case FILE_MELDATE:
  610     case FILE_MELONG:
  611     case FILE_QUAD:
  612     case FILE_LEQUAD:
  613     case FILE_BEQUAD:
  614     case FILE_QDATE:
  615     case FILE_LEQDATE:
  616     case FILE_BEQDATE:
  617     case FILE_QLDATE:
  618     case FILE_LEQLDATE:
  619     case FILE_BEQLDATE:
  620     case FILE_FLOAT:
  621     case FILE_BEFLOAT:
  622     case FILE_LEFLOAT:
  623     case FILE_DOUBLE:
  624     case FILE_BEDOUBLE:
  625     case FILE_LEDOUBLE:
  626         mstart->flag |= BINTEST;
  627         break;
  628     case FILE_STRING:
  629     case FILE_PSTRING:
  630     case FILE_BESTRING16:
  631     case FILE_LESTRING16:
  632         /* Allow text overrides */
  633         if (mstart->str_flags & STRING_TEXTTEST)
  634             mstart->flag |= TEXTTEST;
  635         else
  636             mstart->flag |= BINTEST;
  637         break;
  638     case FILE_REGEX:
  639     case FILE_SEARCH:
  640         /* Check for override */
  641         if (mstart->str_flags & STRING_BINTEST)
  642             mstart->flag |= BINTEST;
  643         if (mstart->str_flags & STRING_TEXTTEST)
  644             mstart->flag |= TEXTTEST;
  645             
  646         if (mstart->flag & (TEXTTEST|BINTEST))
  647             break;
  648 
  649         /* binary test if pattern is not text */
  650         if (file_looks_utf8(m->value.us, (size_t)m->vallen, NULL,
  651             NULL) <= 0)
  652             mstart->flag |= BINTEST;
  653         else
  654             mstart->flag |= TEXTTEST;
  655         break;
  656     case FILE_DEFAULT:
  657         /* can't deduce anything; we shouldn't see this at the
  658            top level anyway */
  659         break;
  660     case FILE_INVALID:
  661     default:
  662         /* invalid search type, but no need to complain here */
  663         break;
  664     }
  665 }
  666 
  667 /*
  668  * Load and parse one file.
  669  */
  670 private void
  671 load_1(struct magic_set *ms, int action, const char *fn, int *errs,
  672    struct magic_entry **marray, uint32_t *marraycount)
  673 {
  674     size_t lineno = 0, llen = 0;
  675     char *line = NULL;
  676     ssize_t len;
  677 
  678     FILE *f = fopen(ms->file = fn, "r");
  679     if (f == NULL) {
  680         if (errno != ENOENT)
  681             file_error(ms, errno, "cannot read magic file `%s'",
  682                    fn);
  683         (*errs)++;
  684         return;
  685     }
  686 
  687     /* read and parse this file */
  688     for (ms->line = 1; (len = getline(&line, &llen, f)) != -1;
  689         ms->line++) {
  690         if (len == 0) /* null line, garbage, etc */
  691             continue;
  692         if (line[len - 1] == '\n') {
  693             lineno++;
  694             line[len - 1] = '\0'; /* delete newline */
  695         }
  696         switch (line[0]) {
  697         case '\0':  /* empty, do not parse */
  698         case '#':   /* comment, do not parse */
  699             continue;
  700         case '!':
  701             if (line[1] == ':') {
  702                 size_t i;
  703 
  704                 for (i = 0; bang[i].name != NULL; i++) {
  705                     if ((size_t)(len - 2) > bang[i].len &&
  706                         memcmp(bang[i].name, line + 2,
  707                         bang[i].len) == 0)
  708                         break;
  709                 }
  710                 if (bang[i].name == NULL) {
  711                     file_error(ms, 0,
  712                         "Unknown !: entry `%s'", line);
  713                     (*errs)++;
  714                     continue;
  715                 }
  716                 if (*marraycount == 0) {
  717                     file_error(ms, 0,
  718                         "No current entry for :!%s type",
  719                         bang[i].name);
  720                     (*errs)++;
  721                     continue;
  722                 }
  723                 if ((*bang[i].fun)(ms, 
  724                     &(*marray)[*marraycount - 1],
  725                     line + bang[i].len + 2) != 0) {
  726                     (*errs)++;
  727                     continue;
  728                 }
  729                 continue;
  730             }
  731             /*FALLTHROUGH*/
  732         default:
  733             if (parse(ms, marray, marraycount, line, lineno,
  734                 action) != 0)
  735                 (*errs)++;
  736             break;
  737         }
  738     }
  739     if (line)
  740         free(line);
  741     (void)fclose(f);
  742 }
  743 
  744 /*
  745  * parse a file or directory of files
  746  * const char *fn: name of magic file or directory
  747  */
  748 private int
  749 cmpstrp(const void *p1, const void *p2)
  750 {
  751         return strcmp(*(char *const *)p1, *(char *const *)p2);
  752 }
  753 
  754 private int
  755 apprentice_load(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
  756     const char *fn, int action)
  757 {
  758     int errs = 0;
  759     struct magic_entry *marray;
  760     uint32_t marraycount, i, mentrycount = 0, starttest;
  761     size_t slen, files = 0, maxfiles = 0;
  762     char **filearr = NULL, *mfn;
  763     struct stat st;
  764     DIR *dir;
  765     struct dirent *d;
  766 
  767     ms->flags |= MAGIC_CHECK;   /* Enable checks for parsed files */
  768 
  769         maxmagic = MAXMAGIS;
  770     if ((marray = CAST(struct magic_entry *, calloc(maxmagic,
  771         sizeof(*marray)))) == NULL) {
  772         file_oomem(ms, maxmagic * sizeof(*marray));
  773         return -1;
  774     }
  775     marraycount = 0;
  776 
  777     /* print silly verbose header for USG compat. */
  778     if (action == FILE_CHECK)
  779         (void)fprintf(stderr, "%s\n", usg_hdr);
  780 
  781     /* load directory or file */
  782     if (stat(fn, &st) == 0 && S_ISDIR(st.st_mode)) {
  783         dir = opendir(fn);
  784         if (!dir) {
  785             errs++;
  786             goto out;
  787         }
  788         while ((d = readdir(dir)) != NULL) {
  789             if (asprintf(&mfn, "%s/%s", fn, d->d_name) < 0) {
  790                 file_oomem(ms,
  791                     strlen(fn) + strlen(d->d_name) + 2);
  792                 errs++;
  793                 goto out;
  794             }
  795             if (stat(mfn, &st) == -1 || !S_ISREG(st.st_mode)) {
  796                 free(mfn);
  797                 continue;
  798             }
  799             if (files >= maxfiles) {
  800                 size_t mlen;
  801                 maxfiles = (maxfiles + 1) * 2;
  802                 mlen = maxfiles * sizeof(*filearr);
  803                 if ((filearr = CAST(char **,
  804                     realloc(filearr, mlen))) == NULL) {
  805                     file_oomem(ms, mlen);
  806                     free(mfn);
  807                     errs++;
  808                     goto out;
  809                 }
  810             }
  811             filearr[files++] = mfn;
  812         }
  813         closedir(dir);
  814         qsort(filearr, files, sizeof(*filearr), cmpstrp);
  815         for (i = 0; i < files; i++) {
  816             load_1(ms, action, filearr[i], &errs, &marray,
  817                 &marraycount);
  818             free(filearr[i]);
  819         }
  820         free(filearr);
  821     } else
  822         load_1(ms, action, fn, &errs, &marray, &marraycount);
  823     if (errs)
  824         goto out;
  825 
  826     /* Set types of tests */
  827     for (i = 0; i < marraycount; ) {
  828         if (marray[i].mp->cont_level != 0) {
  829             i++;
  830             continue;
  831         }
  832 
  833         starttest = i;
  834         do {
  835             static const char text[] = "text";
  836             static const char binary[] = "binary";
  837             static const size_t len = sizeof(text);
  838             set_test_type(marray[starttest].mp, marray[i].mp);
  839             if ((ms->flags & MAGIC_DEBUG) == 0)
  840                 continue;
  841             (void)fprintf(stderr, "%s%s%s: %s\n",
  842                 marray[i].mp->mimetype,
  843                 marray[i].mp->mimetype[0] == '\0' ? "" : "; ",
  844                 marray[i].mp->desc[0] ? marray[i].mp->desc :
  845                 "(no description)",
  846                 marray[i].mp->flag & BINTEST ? binary : text);
  847             if (marray[i].mp->flag & BINTEST) {
  848                 char *p = strstr(marray[i].mp->desc, text);
  849                 if (p && (p == marray[i].mp->desc ||
  850                     isspace((unsigned char)p[-1])) &&
  851                     (p + len - marray[i].mp->desc == 
  852                     MAXstring || (p[len] == '\0' ||
  853                     isspace((unsigned char)p[len]))))
  854                     (void)fprintf(stderr, "*** Possible "
  855                         "binary test for text type\n");
  856             }
  857         } while (++i < marraycount && marray[i].mp->cont_level != 0);
  858     }
  859 
  860     qsort(marray, marraycount, sizeof(*marray), apprentice_sort);
  861 
  862     /*
  863      * Make sure that any level 0 "default" line is last (if one exists).
  864      */
  865     for (i = 0; i < marraycount; i++) {
  866         if (marray[i].mp->cont_level == 0 &&
  867             marray[i].mp->type == FILE_DEFAULT) {
  868             while (++i < marraycount)
  869                 if (marray[i].mp->cont_level == 0)
  870                     break;
  871             if (i != marraycount) {
  872                 /* XXX - Ugh! */
  873                 ms->line = marray[i].mp->lineno;
  874                 file_magwarn(ms,
  875                     "level 0 \"default\" did not sort last");
  876             }
  877             break;                      
  878         }
  879     }
  880 
  881     for (i = 0; i < marraycount; i++)
  882         mentrycount += marray[i].cont_count;
  883 
  884     slen = sizeof(**magicp) * mentrycount;
  885     if ((*magicp = CAST(struct magic *, malloc(slen))) == NULL) {
  886         file_oomem(ms, slen);
  887         errs++;
  888         goto out;
  889     }
  890 
  891     mentrycount = 0;
  892     for (i = 0; i < marraycount; i++) {
  893         (void)memcpy(*magicp + mentrycount, marray[i].mp,
  894             marray[i].cont_count * sizeof(**magicp));
  895         mentrycount += marray[i].cont_count;
  896     }
  897 out:
  898     for (i = 0; i < marraycount; i++)
  899         free(marray[i].mp);
  900     free(marray);
  901     if (errs) {
  902         *magicp = NULL;
  903         *nmagicp = 0;
  904         return errs;
  905     } else {
  906         *nmagicp = mentrycount;
  907         return 0;
  908     }
  909 
  910 }
  911 
  912 /*
  913  * extend the sign bit if the comparison is to be signed
  914  */
  915 protected uint64_t
  916 file_signextend(struct magic_set *ms, struct magic *m, uint64_t v)
  917 {
  918     if (!(m->flag & UNSIGNED)) {
  919         switch(m->type) {
  920         /*
  921          * Do not remove the casts below.  They are
  922          * vital.  When later compared with the data,
  923          * the sign extension must have happened.
  924          */
  925         case FILE_BYTE:
  926             v = (char) v;
  927             break;
  928         case FILE_SHORT:
  929         case FILE_BESHORT:
  930         case FILE_LESHORT:
  931             v = (short) v;
  932             break;
  933         case FILE_DATE:
  934         case FILE_BEDATE:
  935         case FILE_LEDATE:
  936         case FILE_MEDATE:
  937         case FILE_LDATE:
  938         case FILE_BELDATE:
  939         case FILE_LELDATE:
  940         case FILE_MELDATE:
  941         case FILE_LONG:
  942         case FILE_BELONG:
  943         case FILE_LELONG:
  944         case FILE_MELONG:
  945         case FILE_FLOAT:
  946         case FILE_BEFLOAT:
  947         case FILE_LEFLOAT:
  948             v = (int32_t) v;
  949             break;
  950         case FILE_QUAD:
  951         case FILE_BEQUAD:
  952         case FILE_LEQUAD:
  953         case FILE_QDATE:
  954         case FILE_QLDATE:
  955         case FILE_BEQDATE:
  956         case FILE_BEQLDATE:
  957         case FILE_LEQDATE:
  958         case FILE_LEQLDATE:
  959         case FILE_DOUBLE:
  960         case FILE_BEDOUBLE:
  961         case FILE_LEDOUBLE:
  962             v = (int64_t) v;
  963             break;
  964         case FILE_STRING:
  965         case FILE_PSTRING:
  966         case FILE_BESTRING16:
  967         case FILE_LESTRING16:
  968         case FILE_REGEX:
  969         case FILE_SEARCH:
  970         case FILE_DEFAULT:
  971         case FILE_INDIRECT:
  972             break;
  973         default:
  974             if (ms->flags & MAGIC_CHECK)
  975                 file_magwarn(ms, "cannot happen: m->type=%d\n",
  976                     m->type);
  977             return ~0U;
  978         }
  979     }
  980     return v;
  981 }
  982 
  983 private int
  984 string_modifier_check(struct magic_set *ms, struct magic *m)
  985 {
  986     if ((ms->flags & MAGIC_CHECK) == 0)
  987         return 0;
  988 
  989     if (m->type != FILE_PSTRING && (m->str_flags & PSTRING_LEN) != 0) {
  990         file_magwarn(ms,
  991             "'/BHhLl' modifiers are only allowed for pascal strings\n");
  992         return -1;
  993     }
  994     switch (m->type) {
  995     case FILE_BESTRING16:
  996     case FILE_LESTRING16:
  997         if (m->str_flags != 0) {
  998             file_magwarn(ms,
  999                 "no modifiers allowed for 16-bit strings\n");
 1000             return -1;
 1001         }
 1002         break;
 1003     case FILE_STRING:
 1004     case FILE_PSTRING:
 1005         if ((m->str_flags & REGEX_OFFSET_START) != 0) {
 1006             file_magwarn(ms,
 1007                 "'/%c' only allowed on regex and search\n",
 1008                 CHAR_REGEX_OFFSET_START);
 1009             return -1;
 1010         }
 1011         break;
 1012     case FILE_SEARCH:
 1013         if (m->str_range == 0) {
 1014             file_magwarn(ms,
 1015                 "missing range; defaulting to %d\n",
 1016                             STRING_DEFAULT_RANGE);
 1017             m->str_range = STRING_DEFAULT_RANGE;
 1018             return -1;
 1019         }
 1020         break;
 1021     case FILE_REGEX:
 1022         if ((m->str_flags & STRING_COMPACT_WHITESPACE) != 0) {
 1023             file_magwarn(ms, "'/%c' not allowed on regex\n",
 1024                 CHAR_COMPACT_WHITESPACE);
 1025             return -1;
 1026         }
 1027         if ((m->str_flags & STRING_COMPACT_OPTIONAL_WHITESPACE) != 0) {
 1028             file_magwarn(ms, "'/%c' not allowed on regex\n",
 1029                 CHAR_COMPACT_OPTIONAL_WHITESPACE);
 1030             return -1;
 1031         }
 1032         break;
 1033     default:
 1034         file_magwarn(ms, "coding error: m->type=%d\n",
 1035             m->type);
 1036         return -1;
 1037     }
 1038     return 0;
 1039 }
 1040 
 1041 private int
 1042 get_op(char c)
 1043 {
 1044     switch (c) {
 1045     case '&':
 1046         return FILE_OPAND;
 1047     case '|':
 1048         return FILE_OPOR;
 1049     case '^':
 1050         return FILE_OPXOR;
 1051     case '+':
 1052         return FILE_OPADD;
 1053     case '-':
 1054         return FILE_OPMINUS;
 1055     case '*':
 1056         return FILE_OPMULTIPLY;
 1057     case '/':
 1058         return FILE_OPDIVIDE;
 1059     case '%':
 1060         return FILE_OPMODULO;
 1061     default:
 1062         return -1;
 1063     }
 1064 }
 1065 
 1066 #ifdef ENABLE_CONDITIONALS
 1067 private int
 1068 get_cond(const char *l, const char **t)
 1069 {
 1070     static const struct cond_tbl_s {
 1071         char name[8];
 1072         size_t len;
 1073         int cond;
 1074     } cond_tbl[] = {
 1075         { "if",     2,  COND_IF },
 1076         { "elif",   4,  COND_ELIF },
 1077         { "else",   4,  COND_ELSE },
 1078         { "",       0,  COND_NONE },
 1079     };
 1080     const struct cond_tbl_s *p;
 1081 
 1082     for (p = cond_tbl; p->len; p++) {
 1083         if (strncmp(l, p->name, p->len) == 0 &&
 1084             isspace((unsigned char)l[p->len])) {
 1085             if (t)
 1086                 *t = l + p->len;
 1087             break;
 1088         }
 1089     }
 1090     return p->cond;
 1091 }
 1092 
 1093 private int
 1094 check_cond(struct magic_set *ms, int cond, uint32_t cont_level)
 1095 {
 1096     int last_cond;
 1097     last_cond = ms->c.li[cont_level].last_cond;
 1098 
 1099     switch (cond) {
 1100     case COND_IF:
 1101         if (last_cond != COND_NONE && last_cond != COND_ELIF) {
 1102             if (ms->flags & MAGIC_CHECK)
 1103                 file_magwarn(ms, "syntax error: `if'");
 1104             return -1;
 1105         }
 1106         last_cond = COND_IF;
 1107         break;
 1108 
 1109     case COND_ELIF:
 1110         if (last_cond != COND_IF && last_cond != COND_ELIF) {
 1111             if (ms->flags & MAGIC_CHECK)
 1112                 file_magwarn(ms, "syntax error: `elif'");
 1113             return -1;
 1114         }
 1115         last_cond = COND_ELIF;
 1116         break;
 1117 
 1118     case COND_ELSE:
 1119         if (last_cond != COND_IF && last_cond != COND_ELIF) {
 1120             if (ms->flags & MAGIC_CHECK)
 1121                 file_magwarn(ms, "syntax error: `else'");
 1122             return -1;
 1123         }
 1124         last_cond = COND_NONE;
 1125         break;
 1126 
 1127     case COND_NONE:
 1128         last_cond = COND_NONE;
 1129         break;
 1130     }
 1131 
 1132     ms->c.li[cont_level].last_cond = last_cond;
 1133     return 0;
 1134 }
 1135 #endif /* ENABLE_CONDITIONALS */
 1136 
 1137 /*
 1138  * parse one line from magic file, put into magic[index++] if valid
 1139  */
 1140 private int
 1141 parse(struct magic_set *ms, struct magic_entry **mentryp, uint32_t *nmentryp, 
 1142     const char *line, size_t lineno, int action)
 1143 {
 1144 #ifdef ENABLE_CONDITIONALS
 1145     static uint32_t last_cont_level = 0;
 1146 #endif
 1147     size_t i;
 1148     struct magic_entry *me;
 1149     struct magic *m;
 1150     const char *l = line;
 1151     char *t;
 1152     int op;
 1153     uint32_t cont_level;
 1154 
 1155     cont_level = 0;
 1156 
 1157     while (*l == '>') {
 1158         ++l;        /* step over */
 1159         cont_level++; 
 1160     }
 1161 #ifdef ENABLE_CONDITIONALS
 1162     if (cont_level == 0 || cont_level > last_cont_level)
 1163         if (file_check_mem(ms, cont_level) == -1)
 1164             return -1;
 1165     last_cont_level = cont_level;
 1166 #endif
 1167 
 1168 #define ALLOC_CHUNK (size_t)10
 1169 #define ALLOC_INCR  (size_t)200
 1170 
 1171     if (cont_level != 0) {
 1172         if (*nmentryp == 0) {
 1173             file_error(ms, 0, "No current entry for continuation");
 1174             return -1;
 1175         }
 1176         me = &(*mentryp)[*nmentryp - 1];
 1177         if (me->cont_count == me->max_count) {
 1178             struct magic *nm;
 1179             size_t cnt = me->max_count + ALLOC_CHUNK;
 1180             if ((nm = CAST(struct magic *, realloc(me->mp,
 1181                 sizeof(*nm) * cnt))) == NULL) {
 1182                 file_oomem(ms, sizeof(*nm) * cnt);
 1183                 return -1;
 1184             }
 1185             me->mp = m = nm;
 1186             me->max_count = CAST(uint32_t, cnt);
 1187         }
 1188         m = &me->mp[me->cont_count++];
 1189         (void)memset(m, 0, sizeof(*m));
 1190         m->cont_level = cont_level;
 1191     } else {
 1192         if (*nmentryp == maxmagic) {
 1193             struct magic_entry *mp;
 1194 
 1195             maxmagic += ALLOC_INCR;
 1196             if ((mp = CAST(struct magic_entry *,
 1197                 realloc(*mentryp, sizeof(*mp) * maxmagic))) ==
 1198                 NULL) {
 1199                 file_oomem(ms, sizeof(*mp) * maxmagic);
 1200                 return -1;
 1201             }
 1202             (void)memset(&mp[*nmentryp], 0, sizeof(*mp) *
 1203                 ALLOC_INCR);
 1204             *mentryp = mp;
 1205         }
 1206         me = &(*mentryp)[*nmentryp];
 1207         if (me->mp == NULL) {
 1208             size_t len = sizeof(*m) * ALLOC_CHUNK;
 1209             if ((m = CAST(struct magic *, malloc(len))) == NULL) {
 1210                 file_oomem(ms, len);
 1211                 return -1;
 1212             }
 1213             me->mp = m;
 1214             me->max_count = ALLOC_CHUNK;
 1215         } else
 1216             m = me->mp;
 1217         (void)memset(m, 0, sizeof(*m));
 1218         m->factor_op = FILE_FACTOR_OP_NONE;
 1219         m->cont_level = 0;
 1220         me->cont_count = 1;
 1221     }
 1222     m->lineno = CAST(uint32_t, lineno);
 1223 
 1224     if (*l == '&') {  /* m->cont_level == 0 checked below. */
 1225                 ++l;            /* step over */
 1226                 m->flag |= OFFADD;
 1227         }
 1228     if (*l == '(') {
 1229         ++l;        /* step over */
 1230         m->flag |= INDIR;
 1231         if (m->flag & OFFADD)
 1232             m->flag = (m->flag & ~OFFADD) | INDIROFFADD;
 1233 
 1234         if (*l == '&') {  /* m->cont_level == 0 checked below */
 1235             ++l;            /* step over */
 1236             m->flag |= OFFADD;
 1237         }
 1238     }
 1239     /* Indirect offsets are not valid at level 0. */
 1240     if (m->cont_level == 0 && (m->flag & (OFFADD | INDIROFFADD)))
 1241         if (ms->flags & MAGIC_CHECK)
 1242             file_magwarn(ms, "relative offset at level 0");
 1243 
 1244     /* get offset, then skip over it */
 1245     m->offset = (uint32_t)strtoul(l, &t, 0);
 1246         if (l == t)
 1247         if (ms->flags & MAGIC_CHECK)
 1248             file_magwarn(ms, "offset `%s' invalid", l);
 1249         l = t;
 1250 
 1251     if (m->flag & INDIR) {
 1252         m->in_type = FILE_LONG;
 1253         m->in_offset = 0;
 1254         /*
 1255          * read [.lbs][+-]nnnnn)
 1256          */
 1257         if (*l == '.') {
 1258             l++;
 1259             switch (*l) {
 1260             case 'l':
 1261                 m->in_type = FILE_LELONG;
 1262                 break;
 1263             case 'L':
 1264                 m->in_type = FILE_BELONG;
 1265                 break;
 1266             case 'm':
 1267                 m->in_type = FILE_MELONG;
 1268                 break;
 1269             case 'h':
 1270             case 's':
 1271                 m->in_type = FILE_LESHORT;
 1272                 break;
 1273             case 'H':
 1274             case 'S':
 1275                 m->in_type = FILE_BESHORT;
 1276                 break;
 1277             case 'c':
 1278             case 'b':
 1279             case 'C':
 1280             case 'B':
 1281                 m->in_type = FILE_BYTE;
 1282                 break;
 1283             case 'e':
 1284             case 'f':
 1285             case 'g':
 1286                 m->in_type = FILE_LEDOUBLE;
 1287                 break;
 1288             case 'E':
 1289             case 'F':
 1290             case 'G':
 1291                 m->in_type = FILE_BEDOUBLE;
 1292                 break;
 1293             case 'i':
 1294                 m->in_type = FILE_LEID3;
 1295                 break;
 1296             case 'I':
 1297                 m->in_type = FILE_BEID3;
 1298                 break;
 1299             default:
 1300                 if (ms->flags & MAGIC_CHECK)
 1301                     file_magwarn(ms,
 1302                         "indirect offset type `%c' invalid",
 1303                         *l);
 1304                 break;
 1305             }
 1306             l++;
 1307         }
 1308 
 1309         m->in_op = 0;
 1310         if (*l == '~') {
 1311             m->in_op |= FILE_OPINVERSE;
 1312             l++;
 1313         }
 1314         if ((op = get_op(*l)) != -1) {
 1315             m->in_op |= op;
 1316             l++;
 1317         }
 1318         if (*l == '(') {
 1319             m->in_op |= FILE_OPINDIRECT;
 1320             l++;
 1321         }
 1322         if (isdigit((unsigned char)*l) || *l == '-') {
 1323             m->in_offset = (int32_t)strtol(l, &t, 0);
 1324             if (l == t)
 1325                 if (ms->flags & MAGIC_CHECK)
 1326                     file_magwarn(ms,
 1327                         "in_offset `%s' invalid", l);
 1328             l = t;
 1329         }
 1330         if (*l++ != ')' || 
 1331             ((m->in_op & FILE_OPINDIRECT) && *l++ != ')'))
 1332             if (ms->flags & MAGIC_CHECK)
 1333                 file_magwarn(ms,
 1334                     "missing ')' in indirect offset");
 1335     }
 1336     EATAB;
 1337 
 1338 #ifdef ENABLE_CONDITIONALS
 1339     m->cond = get_cond(l, &l);
 1340     if (check_cond(ms, m->cond, cont_level) == -1)
 1341         return -1;
 1342 
 1343     EATAB;
 1344 #endif
 1345 
 1346     if (*l == 'u') {
 1347         ++l;
 1348         m->flag |= UNSIGNED;
 1349     }
 1350 
 1351     m->type = get_type(l, &l);
 1352     if (m->type == FILE_INVALID) {
 1353         if (ms->flags & MAGIC_CHECK)
 1354             file_magwarn(ms, "type `%s' invalid", l);
 1355         return -1;
 1356     }
 1357 
 1358     /* New-style anding: "0 byte&0x80 =0x80 dynamically linked" */
 1359     /* New and improved: ~ & | ^ + - * / % -- exciting, isn't it? */
 1360 
 1361     m->mask_op = 0;
 1362     if (*l == '~') {
 1363         if (!IS_STRING(m->type))
 1364             m->mask_op |= FILE_OPINVERSE;
 1365         else if (ms->flags & MAGIC_CHECK)
 1366             file_magwarn(ms, "'~' invalid for string types");
 1367         ++l;
 1368     }
 1369     m->str_range = 0;
 1370     m->str_flags = m->type == FILE_PSTRING ? PSTRING_1_LE : 0;
 1371     if ((op = get_op(*l)) != -1) {
 1372         if (!IS_STRING(m->type)) {
 1373             uint64_t val;
 1374             ++l;
 1375             m->mask_op |= op;
 1376             val = (uint64_t)strtoull(l, &t, 0);
 1377             l = t;
 1378             m->num_mask = file_signextend(ms, m, val);
 1379             eatsize(&l);
 1380         }
 1381         else if (op == FILE_OPDIVIDE) {
 1382             int have_range = 0;
 1383             while (!isspace((unsigned char)*++l)) {
 1384                 switch (*l) {
 1385                 case '0':  case '1':  case '2':
 1386                 case '3':  case '4':  case '5':
 1387                 case '6':  case '7':  case '8':
 1388                 case '9':
 1389                     if (have_range &&
 1390                         (ms->flags & MAGIC_CHECK))
 1391                         file_magwarn(ms,
 1392                             "multiple ranges");
 1393                     have_range = 1;
 1394                     m->str_range = CAST(uint32_t,
 1395                         strtoul(l, &t, 0));
 1396                     if (m->str_range == 0)
 1397                         file_magwarn(ms,
 1398                             "zero range");
 1399                     l = t - 1;
 1400                     break;
 1401                 case CHAR_COMPACT_WHITESPACE:
 1402                     m->str_flags |=
 1403                         STRING_COMPACT_WHITESPACE;
 1404                     break;
 1405                 case CHAR_COMPACT_OPTIONAL_WHITESPACE:
 1406                     m->str_flags |=
 1407                         STRING_COMPACT_OPTIONAL_WHITESPACE;
 1408                     break;
 1409                 case CHAR_IGNORE_LOWERCASE:
 1410                     m->str_flags |= STRING_IGNORE_LOWERCASE;
 1411                     break;
 1412                 case CHAR_IGNORE_UPPERCASE:
 1413                     m->str_flags |= STRING_IGNORE_UPPERCASE;
 1414                     break;
 1415                 case CHAR_REGEX_OFFSET_START:
 1416                     m->str_flags |= REGEX_OFFSET_START;
 1417                     break;
 1418                 case CHAR_BINTEST:
 1419                     m->str_flags |= STRING_BINTEST;
 1420                     break;
 1421                 case CHAR_TEXTTEST:
 1422                     m->str_flags |= STRING_TEXTTEST;
 1423                     break;
 1424                 case CHAR_PSTRING_1_LE:
 1425                     if (m->type != FILE_PSTRING)
 1426                         goto bad;
 1427                     m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_1_LE;
 1428                     break;
 1429                 case CHAR_PSTRING_2_BE:
 1430                     if (m->type != FILE_PSTRING)
 1431                         goto bad;
 1432                     m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_BE;
 1433                     break;
 1434                 case CHAR_PSTRING_2_LE:
 1435                     if (m->type != FILE_PSTRING)
 1436                         goto bad;
 1437                     m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_2_LE;
 1438                     break;
 1439                 case CHAR_PSTRING_4_BE:
 1440                     if (m->type != FILE_PSTRING)
 1441                         goto bad;
 1442                     m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_BE;
 1443                     break;
 1444                 case CHAR_PSTRING_4_LE:
 1445                     if (m->type != FILE_PSTRING)
 1446                         goto bad;
 1447                     m->str_flags = (m->str_flags & ~PSTRING_LEN) | PSTRING_4_LE;
 1448                     break;
 1449                 case CHAR_PSTRING_LENGTH_INCLUDES_ITSELF:
 1450                     if (m->type != FILE_PSTRING)
 1451                         goto bad;
 1452                     m->str_flags |= PSTRING_LENGTH_INCLUDES_ITSELF;
 1453                     break;
 1454                 bad:
 1455                 default:
 1456                     if (ms->flags & MAGIC_CHECK)
 1457                         file_magwarn(ms,
 1458                             "string extension `%c' "
 1459                             "invalid", *l);
 1460                     return -1;
 1461                 }
 1462                 /* allow multiple '/' for readability */
 1463                 if (l[1] == '/' &&
 1464                     !isspace((unsigned char)l[2]))
 1465                     l++;
 1466             }
 1467             if (string_modifier_check(ms, m) == -1)
 1468                 return -1;
 1469         }
 1470         else {
 1471             if (ms->flags & MAGIC_CHECK)
 1472                 file_magwarn(ms, "invalid string op: %c", *t);
 1473             return -1;
 1474         }
 1475     }
 1476     /*
 1477      * We used to set mask to all 1's here, instead let's just not do
 1478      * anything if mask = 0 (unless you have a better idea)
 1479      */
 1480     EATAB;
 1481   
 1482     switch (*l) {
 1483     case '>':
 1484     case '<':
 1485         m->reln = *l;
 1486         ++l;
 1487         if (*l == '=') {
 1488             if (ms->flags & MAGIC_CHECK) {
 1489                 file_magwarn(ms, "%c= not supported",
 1490                     m->reln);
 1491                 return -1;
 1492             }
 1493            ++l;
 1494         }
 1495         break;
 1496     /* Old-style anding: "0 byte &0x80 dynamically linked" */
 1497     case '&':
 1498     case '^':
 1499     case '=':
 1500         m->reln = *l;
 1501         ++l;
 1502         if (*l == '=') {
 1503            /* HP compat: ignore &= etc. */
 1504            ++l;
 1505         }
 1506         break;
 1507     case '!':
 1508         m->reln = *l;
 1509         ++l;
 1510         break;
 1511     default:
 1512         m->reln = '=';  /* the default relation */
 1513         if (*l == 'x' && ((isascii((unsigned char)l[1]) && 
 1514             isspace((unsigned char)l[1])) || !l[1])) {
 1515             m->reln = *l;
 1516             ++l;
 1517         }
 1518         break;
 1519     }
 1520     /*
 1521      * Grab the value part, except for an 'x' reln.
 1522      */
 1523     if (m->reln != 'x' && getvalue(ms, m, &l, action))
 1524         return -1;
 1525 
 1526     /*
 1527      * TODO finish this macro and start using it!
 1528      * #define offsetcheck {if (offset > HOWMANY-1) 
 1529      *  magwarn("offset too big"); }
 1530      */
 1531 
 1532     /*
 1533      * Now get last part - the description
 1534      */
 1535     EATAB;
 1536     if (l[0] == '\b') {
 1537         ++l;
 1538         m->flag |= NOSPACE;
 1539     } else if ((l[0] == '\\') && (l[1] == 'b')) {
 1540         ++l;
 1541         ++l;
 1542         m->flag |= NOSPACE;
 1543     }
 1544     for (i = 0; (m->desc[i++] = *l++) != '\0' && i < sizeof(m->desc); )
 1545         continue;
 1546     if (i == sizeof(m->desc)) {
 1547         m->desc[sizeof(m->desc) - 1] = '\0';
 1548         if (ms->flags & MAGIC_CHECK)
 1549             file_magwarn(ms, "description `%s' truncated", m->desc);
 1550     }
 1551 
 1552         /*
 1553      * We only do this check while compiling, or if any of the magic
 1554      * files were not compiled.
 1555          */
 1556         if (ms->flags & MAGIC_CHECK) {
 1557         if (check_format(ms, m) == -1)
 1558             return -1;
 1559     }
 1560 #ifndef COMPILE_ONLY
 1561     if (action == FILE_CHECK) {
 1562         file_mdump(m);
 1563     }
 1564 #endif
 1565     m->mimetype[0] = '\0';      /* initialise MIME type to none */
 1566     if (m->cont_level == 0)
 1567         ++(*nmentryp);      /* make room for next */
 1568     return 0;
 1569 }
 1570 
 1571 /*
 1572  * parse a STRENGTH annotation line from magic file, put into magic[index - 1]
 1573  * if valid
 1574  */
 1575 private int
 1576 parse_strength(struct magic_set *ms, struct magic_entry *me, const char *line)
 1577 {
 1578     const char *l = line;
 1579     char *el;
 1580     unsigned long factor;
 1581     struct magic *m = &me->mp[0];
 1582 
 1583     if (m->factor_op != FILE_FACTOR_OP_NONE) {
 1584         file_magwarn(ms,
 1585             "Current entry already has a strength type: %c %d",
 1586             m->factor_op, m->factor);
 1587         return -1;
 1588     }
 1589     EATAB;
 1590     switch (*l) {
 1591     case FILE_FACTOR_OP_NONE:
 1592     case FILE_FACTOR_OP_PLUS:
 1593     case FILE_FACTOR_OP_MINUS:
 1594     case FILE_FACTOR_OP_TIMES:
 1595     case FILE_FACTOR_OP_DIV:
 1596         m->factor_op = *l++;
 1597         break;
 1598     default:
 1599         file_magwarn(ms, "Unknown factor op `%c'", *l);
 1600         return -1;
 1601     }
 1602     EATAB;
 1603     factor = strtoul(l, &el, 0);
 1604     if (factor > 255) {
 1605         file_magwarn(ms, "Too large factor `%lu'", factor);
 1606         goto out;
 1607     }
 1608     if (*el && !isspace((unsigned char)*el)) {
 1609         file_magwarn(ms, "Bad factor `%s'", l);
 1610         goto out;
 1611     }
 1612     m->factor = (uint8_t)factor;
 1613     if (m->factor == 0 && m->factor_op == FILE_FACTOR_OP_DIV) {
 1614         file_magwarn(ms, "Cannot have factor op `%c' and factor %u",
 1615             m->factor_op, m->factor);
 1616         goto out;
 1617     }
 1618     return 0;
 1619 out:
 1620     m->factor_op = FILE_FACTOR_OP_NONE;
 1621     m->factor = 0;
 1622     return -1;
 1623 }
 1624 
 1625 /*
 1626  * Parse an Apple CREATOR/TYPE annotation from magic file and put it into
 1627  * magic[index - 1]
 1628  */
 1629 private int
 1630 parse_apple(struct magic_set *ms, struct magic_entry *me, const char *line)
 1631 {
 1632     size_t i;
 1633     const char *l = line;
 1634     struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
 1635 
 1636     if (m->apple[0] != '\0') {
 1637         file_magwarn(ms, "Current entry already has a APPLE type "
 1638             "`%.8s', new type `%s'", m->mimetype, l);
 1639         return -1;
 1640     }   
 1641 
 1642     EATAB;
 1643     for (i = 0; *l && ((isascii((unsigned char)*l) &&
 1644         isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
 1645         i < sizeof(m->apple); m->apple[i++] = *l++)
 1646         continue;
 1647     if (i == sizeof(m->apple) && *l) {
 1648         /* We don't need to NUL terminate here, printing handles it */
 1649         if (ms->flags & MAGIC_CHECK)
 1650             file_magwarn(ms, "APPLE type `%s' truncated %"
 1651                 SIZE_T_FORMAT "u", line, i);
 1652     }
 1653 
 1654     if (i > 0)
 1655         return 0;
 1656     else
 1657         return -1;
 1658 }
 1659 
 1660 /*
 1661  * parse a MIME annotation line from magic file, put into magic[index - 1]
 1662  * if valid
 1663  */
 1664 private int
 1665 parse_mime(struct magic_set *ms, struct magic_entry *me, const char *line)
 1666 {
 1667     size_t i;
 1668     const char *l = line;
 1669     struct magic *m = &me->mp[me->cont_count == 0 ? 0 : me->cont_count - 1];
 1670 
 1671     if (m->mimetype[0] != '\0') {
 1672         file_magwarn(ms, "Current entry already has a MIME type `%s',"
 1673             " new type `%s'", m->mimetype, l);
 1674         return -1;
 1675     }   
 1676 
 1677     EATAB;
 1678     for (i = 0; *l && ((isascii((unsigned char)*l) &&
 1679         isalnum((unsigned char)*l)) || strchr("-+/.", *l)) &&
 1680         i < sizeof(m->mimetype); m->mimetype[i++] = *l++)
 1681         continue;
 1682     if (i == sizeof(m->mimetype)) {
 1683         m->mimetype[sizeof(m->mimetype) - 1] = '\0';
 1684         if (ms->flags & MAGIC_CHECK)
 1685             file_magwarn(ms, "MIME type `%s' truncated %"
 1686                 SIZE_T_FORMAT "u", m->mimetype, i);
 1687     } else
 1688         m->mimetype[i] = '\0';
 1689 
 1690     if (i > 0)
 1691         return 0;
 1692     else
 1693         return -1;
 1694 }
 1695 
 1696 private int
 1697 check_format_type(const char *ptr, int type)
 1698 {
 1699     int quad = 0;
 1700     if (*ptr == '\0') {
 1701         /* Missing format string; bad */
 1702         return -1;
 1703     }
 1704 
 1705     switch (type) {
 1706     case FILE_FMT_QUAD:
 1707         quad = 1;
 1708         /*FALLTHROUGH*/
 1709     case FILE_FMT_NUM:
 1710         if (*ptr == '-')
 1711             ptr++;
 1712         if (*ptr == '.')
 1713             ptr++;
 1714         while (isdigit((unsigned char)*ptr)) ptr++;
 1715         if (*ptr == '.')
 1716             ptr++;
 1717         while (isdigit((unsigned char)*ptr)) ptr++;
 1718         if (quad) {
 1719             if (*ptr++ != 'l')
 1720                 return -1;
 1721             if (*ptr++ != 'l')
 1722                 return -1;
 1723         }
 1724     
 1725         switch (*ptr++) {
 1726         case 'l':
 1727             switch (*ptr++) {
 1728             case 'i':
 1729             case 'd':
 1730             case 'u':
 1731             case 'x':
 1732             case 'X':
 1733                 return 0;
 1734             default:
 1735                 return -1;
 1736             }
 1737         
 1738         case 'h':
 1739             switch (*ptr++) {
 1740             case 'h':
 1741                 switch (*ptr++) {
 1742                 case 'i':
 1743                 case 'd':
 1744                 case 'u':
 1745                 case 'x':
 1746                 case 'X':
 1747                     return 0;
 1748                 default:
 1749                     return -1;
 1750                 }
 1751             case 'd':
 1752                 return 0;
 1753             default:
 1754                 return -1;
 1755             }
 1756 
 1757         case 'i':
 1758         case 'c':
 1759         case 'd':
 1760         case 'u':
 1761         case 'x':
 1762         case 'X':
 1763             return 0;
 1764             
 1765         default:
 1766             return -1;
 1767         }
 1768         
 1769     case FILE_FMT_FLOAT:
 1770     case FILE_FMT_DOUBLE:
 1771         if (*ptr == '-')
 1772             ptr++;
 1773         if (*ptr == '.')
 1774             ptr++;
 1775         while (isdigit((unsigned char)*ptr)) ptr++;
 1776         if (*ptr == '.')
 1777             ptr++;
 1778         while (isdigit((unsigned char)*ptr)) ptr++;
 1779     
 1780         switch (*ptr++) {
 1781         case 'e':
 1782         case 'E':
 1783         case 'f':
 1784         case 'F':
 1785         case 'g':
 1786         case 'G':
 1787             return 0;
 1788             
 1789         default:
 1790             return -1;
 1791         }
 1792         
 1793 
 1794     case FILE_FMT_STR:
 1795         if (*ptr == '-')
 1796             ptr++;
 1797         while (isdigit((unsigned char )*ptr))
 1798             ptr++;
 1799         if (*ptr == '.') {
 1800             ptr++;
 1801             while (isdigit((unsigned char )*ptr))
 1802                 ptr++;
 1803         }
 1804         
 1805         switch (*ptr++) {
 1806         case 's':
 1807             return 0;
 1808         default:
 1809             return -1;
 1810         }
 1811         
 1812     default:
 1813         /* internal error */
 1814         abort();
 1815     }
 1816     /*NOTREACHED*/
 1817     return -1;
 1818 }
 1819     
 1820 /*
 1821  * Check that the optional printf format in description matches
 1822  * the type of the magic.
 1823  */
 1824 private int
 1825 check_format(struct magic_set *ms, struct magic *m)
 1826 {
 1827     char *ptr;
 1828 
 1829     for (ptr = m->desc; *ptr; ptr++)
 1830         if (*ptr == '%')
 1831             break;
 1832     if (*ptr == '\0') {
 1833         /* No format string; ok */
 1834         return 1;
 1835     }
 1836 
 1837     assert(file_nformats == file_nnames);
 1838 
 1839     if (m->type >= file_nformats) {
 1840         file_magwarn(ms, "Internal error inconsistency between "
 1841             "m->type and format strings");      
 1842         return -1;
 1843     }
 1844     if (file_formats[m->type] == FILE_FMT_NONE) {
 1845         file_magwarn(ms, "No format string for `%s' with description "
 1846             "`%s'", m->desc, file_names[m->type]);
 1847         return -1;
 1848     }
 1849 
 1850     ptr++;
 1851     if (check_format_type(ptr, file_formats[m->type]) == -1) {
 1852         /*
 1853          * TODO: this error message is unhelpful if the format
 1854          * string is not one character long
 1855          */
 1856         file_magwarn(ms, "Printf format `%c' is not valid for type "
 1857             "`%s' in description `%s'", *ptr ? *ptr : '?',
 1858             file_names[m->type], m->desc);
 1859         return -1;
 1860     }
 1861     
 1862     for (; *ptr; ptr++) {
 1863         if (*ptr == '%') {
 1864             file_magwarn(ms,
 1865                 "Too many format strings (should have at most one) "
 1866                 "for `%s' with description `%s'",
 1867                 file_names[m->type], m->desc);
 1868             return -1;
 1869         }
 1870     }
 1871     return 0;
 1872 }
 1873 
 1874 /* 
 1875  * Read a numeric value from a pointer, into the value union of a magic 
 1876  * pointer, according to the magic type.  Update the string pointer to point 
 1877  * just after the number read.  Return 0 for success, non-zero for failure.
 1878  */
 1879 private int
 1880 getvalue(struct magic_set *ms, struct magic *m, const char **p, int action)
 1881 {
 1882     switch (m->type) {
 1883     case FILE_BESTRING16:
 1884     case FILE_LESTRING16:
 1885     case FILE_STRING:
 1886     case FILE_PSTRING:
 1887     case FILE_REGEX:
 1888     case FILE_SEARCH:
 1889         *p = getstr(ms, m, *p, action == FILE_COMPILE);
 1890         if (*p == NULL) {
 1891             if (ms->flags & MAGIC_CHECK)
 1892                 file_magwarn(ms, "cannot get string from `%s'",
 1893                     m->value.s);
 1894             return -1;
 1895         }
 1896         return 0;
 1897     case FILE_FLOAT:
 1898     case FILE_BEFLOAT:
 1899     case FILE_LEFLOAT:
 1900         if (m->reln != 'x') {
 1901             char *ep;
 1902 #ifdef HAVE_STRTOF
 1903             m->value.f = strtof(*p, &ep);
 1904 #else
 1905             m->value.f = (float)strtod(*p, &ep);
 1906 #endif
 1907             *p = ep;
 1908         }
 1909         return 0;
 1910     case FILE_DOUBLE:
 1911     case FILE_BEDOUBLE:
 1912     case FILE_LEDOUBLE:
 1913         if (m->reln != 'x') {
 1914             char *ep;
 1915             m->value.d = strtod(*p, &ep);
 1916             *p = ep;
 1917         }
 1918         return 0;
 1919     default:
 1920         if (m->reln != 'x') {
 1921             char *ep;
 1922             m->value.q = file_signextend(ms, m,
 1923                 (uint64_t)strtoull(*p, &ep, 0));
 1924             *p = ep;
 1925             eatsize(p);
 1926         }
 1927         return 0;
 1928     }
 1929 }
 1930 
 1931 /*
 1932  * Convert a string containing C character escapes.  Stop at an unescaped
 1933  * space or tab.
 1934  * Copy the converted version to "m->value.s", and the length in m->vallen.
 1935  * Return updated scan pointer as function result. Warn if set.
 1936  */
 1937 private const char *
 1938 getstr(struct magic_set *ms, struct magic *m, const char *s, int warn)
 1939 {
 1940     const char *origs = s;
 1941     char    *p = m->value.s;
 1942     size_t  plen = sizeof(m->value.s);
 1943     char    *origp = p;
 1944     char    *pmax = p + plen - 1;
 1945     int c;
 1946     int val;
 1947 
 1948     while ((c = *s++) != '\0') {
 1949         if (isspace((unsigned char) c))
 1950             break;
 1951         if (p >= pmax) {
 1952             file_error(ms, 0, "string too long: `%s'", origs);
 1953             return NULL;
 1954         }
 1955         if (c == '\\') {
 1956             switch(c = *s++) {
 1957 
 1958             case '\0':
 1959                 if (warn)
 1960                     file_magwarn(ms, "incomplete escape");
 1961                 goto out;
 1962 
 1963             case '\t':
 1964                 if (warn) {
 1965                     file_magwarn(ms,
 1966                         "escaped tab found, use \\t instead");
 1967                     warn = 0;   /* already did */
 1968                 }
 1969                 /*FALLTHROUGH*/
 1970             default:
 1971                 if (warn) {
 1972                     if (isprint((unsigned char)c)) {
 1973                         /* Allow escaping of 
 1974                          * ``relations'' */
 1975                         if (strchr("<>&^=!", c) == NULL
 1976                             && (m->type != FILE_REGEX ||
 1977                             strchr("[]().*?^$|{}", c)
 1978                             == NULL)) {
 1979                             file_magwarn(ms, "no "
 1980                                 "need to escape "
 1981                                 "`%c'", c);
 1982                         }
 1983                     } else {
 1984                         file_magwarn(ms,
 1985                             "unknown escape sequence: "
 1986                             "\\%03o", c);
 1987                     }
 1988                 }
 1989                 /*FALLTHROUGH*/
 1990             /* space, perhaps force people to use \040? */
 1991             case ' ':
 1992 #if 0
 1993             /*
 1994              * Other things people escape, but shouldn't need to,
 1995              * so we disallow them
 1996              */
 1997             case '\'':
 1998             case '"':
 1999             case '?':
 2000 #endif
 2001             /* Relations */
 2002             case '>':
 2003             case '<':
 2004             case '&':
 2005             case '^':
 2006             case '=':
 2007             case '!':
 2008             /* and baskslash itself */
 2009             case '\\':
 2010                 *p++ = (char) c;
 2011                 break;
 2012 
 2013             case 'a':
 2014                 *p++ = '\a';
 2015                 break;
 2016 
 2017             case 'b':
 2018                 *p++ = '\b';
 2019                 break;
 2020 
 2021             case 'f':
 2022                 *p++ = '\f';
 2023                 break;
 2024 
 2025             case 'n':
 2026                 *p++ = '\n';
 2027                 break;
 2028 
 2029             case 'r':
 2030                 *p++ = '\r';
 2031                 break;
 2032 
 2033             case 't':
 2034                 *p++ = '\t';
 2035                 break;
 2036 
 2037             case 'v':
 2038                 *p++ = '\v';
 2039                 break;
 2040 
 2041             /* \ and up to 3 octal digits */
 2042             case '0':
 2043             case '1':
 2044             case '2':
 2045             case '3':
 2046             case '4':
 2047             case '5':
 2048             case '6':
 2049             case '7':
 2050                 val = c - '0';
 2051                 c = *s++;  /* try for 2 */
 2052                 if (c >= '0' && c <= '7') {
 2053                     val = (val << 3) | (c - '0');
 2054                     c = *s++;  /* try for 3 */
 2055                     if (c >= '0' && c <= '7')
 2056                         val = (val << 3) | (c-'0');
 2057                     else
 2058                         --s;
 2059                 }
 2060                 else
 2061                     --s;
 2062                 *p++ = (char)val;
 2063                 break;
 2064 
 2065             /* \x and up to 2 hex digits */
 2066             case 'x':
 2067                 val = 'x';  /* Default if no digits */
 2068                 c = hextoint(*s++); /* Get next char */
 2069                 if (c >= 0) {
 2070                     val = c;
 2071                     c = hextoint(*s++);
 2072                     if (c >= 0)
 2073                         val = (val << 4) + c;
 2074                     else
 2075                         --s;
 2076                 } else
 2077                     --s;
 2078                 *p++ = (char)val;
 2079                 break;
 2080             }
 2081         } else
 2082             *p++ = (char)c;
 2083     }
 2084 out:
 2085     *p = '\0';
 2086     m->vallen = CAST(unsigned char, (p - origp));
 2087     if (m->type == FILE_PSTRING)
 2088         m->vallen += file_pstring_length_size(m);
 2089     return s;
 2090 }
 2091 
 2092 
 2093 /* Single hex char to int; -1 if not a hex char. */
 2094 private int
 2095 hextoint(int c)
 2096 {
 2097     if (!isascii((unsigned char) c))
 2098         return -1;
 2099     if (isdigit((unsigned char) c))
 2100         return c - '0';
 2101     if ((c >= 'a') && (c <= 'f'))
 2102         return c + 10 - 'a';
 2103     if (( c>= 'A') && (c <= 'F'))
 2104         return c + 10 - 'A';
 2105     return -1;
 2106 }
 2107 
 2108 
 2109 /*
 2110  * Print a string containing C character escapes.
 2111  */
 2112 protected void
 2113 file_showstr(FILE *fp, const char *s, size_t len)
 2114 {
 2115     char    c;
 2116 
 2117     for (;;) {
 2118         if (len == ~0U) {
 2119             c = *s++;
 2120             if (c == '\0')
 2121                 break;
 2122         }
 2123         else  {
 2124             if (len-- == 0)
 2125                 break;
 2126             c = *s++;
 2127         }
 2128         if (c >= 040 && c <= 0176)  /* TODO isprint && !iscntrl */
 2129             (void) fputc(c, fp);
 2130         else {
 2131             (void) fputc('\\', fp);
 2132             switch (c) {
 2133             case '\a':
 2134                 (void) fputc('a', fp);
 2135                 break;
 2136 
 2137             case '\b':
 2138                 (void) fputc('b', fp);
 2139                 break;
 2140 
 2141             case '\f':
 2142                 (void) fputc('f', fp);
 2143                 break;
 2144 
 2145             case '\n':
 2146                 (void) fputc('n', fp);
 2147                 break;
 2148 
 2149             case '\r':
 2150                 (void) fputc('r', fp);
 2151                 break;
 2152 
 2153             case '\t':
 2154                 (void) fputc('t', fp);
 2155                 break;
 2156 
 2157             case '\v':
 2158                 (void) fputc('v', fp);
 2159                 break;
 2160 
 2161             default:
 2162                 (void) fprintf(fp, "%.3o", c & 0377);
 2163                 break;
 2164             }
 2165         }
 2166     }
 2167 }
 2168 
 2169 /*
 2170  * eatsize(): Eat the size spec from a number [eg. 10UL]
 2171  */
 2172 private void
 2173 eatsize(const char **p)
 2174 {
 2175     const char *l = *p;
 2176 
 2177     if (LOWCASE(*l) == 'u') 
 2178         l++;
 2179 
 2180     switch (LOWCASE(*l)) {
 2181     case 'l':    /* long */
 2182     case 's':    /* short */
 2183     case 'h':    /* short */
 2184     case 'b':    /* char/byte */
 2185     case 'c':    /* char/byte */
 2186         l++;
 2187         /*FALLTHROUGH*/
 2188     default:
 2189         break;
 2190     }
 2191 
 2192     *p = l;
 2193 }
 2194 
 2195 /*
 2196  * handle a compiled file.
 2197  */
 2198 private int
 2199 apprentice_map(struct magic_set *ms, struct magic **magicp, uint32_t *nmagicp,
 2200     const char *fn)
 2201 {
 2202     int fd;
 2203     struct stat st;
 2204     uint32_t *ptr;
 2205     uint32_t version;
 2206     int needsbyteswap;
 2207     char *dbname = NULL;
 2208     void *mm = NULL;
 2209 
 2210     dbname = mkdbname(ms, fn, 0);
 2211     if (dbname == NULL)
 2212         goto error2;
 2213 
 2214     if ((fd = open(dbname, O_RDONLY|O_BINARY)) == -1)
 2215         goto error2;
 2216 
 2217     if (fstat(fd, &st) == -1) {
 2218         file_error(ms, errno, "cannot stat `%s'", dbname);
 2219         goto error1;
 2220     }
 2221     if (st.st_size < 8) {
 2222         file_error(ms, 0, "file `%s' is too small", dbname);
 2223         goto error1;
 2224     }
 2225 
 2226 #ifdef QUICK
 2227     if ((mm = mmap(0, (size_t)st.st_size, PROT_READ|PROT_WRITE,
 2228         MAP_PRIVATE|MAP_FILE, fd, (off_t)0)) == MAP_FAILED) {
 2229         file_error(ms, errno, "cannot map `%s'", dbname);
 2230         goto error1;
 2231     }
 2232 #define RET 2
 2233 #else
 2234     if ((mm = CAST(void *, malloc((size_t)st.st_size))) == NULL) {
 2235         file_oomem(ms, (size_t)st.st_size);
 2236         goto error1;
 2237     }
 2238     if (read(fd, mm, (size_t)st.st_size) != (ssize_t)st.st_size) {
 2239         file_badread(ms);
 2240         goto error1;
 2241     }
 2242 #define RET 1
 2243 #endif
 2244     *magicp = CAST(struct magic *, mm);
 2245     (void)close(fd);
 2246     fd = -1;
 2247     ptr = (uint32_t *)(void *)*magicp;
 2248     if (*ptr != MAGICNO) {
 2249         if (swap4(*ptr) != MAGICNO) {
 2250             file_error(ms, 0, "bad magic in `%s'", dbname);
 2251             goto error1;
 2252         }
 2253         needsbyteswap = 1;
 2254     } else
 2255         needsbyteswap = 0;
 2256     if (needsbyteswap)
 2257         version = swap4(ptr[1]);
 2258     else
 2259         version = ptr[1];
 2260     if (version != VERSIONNO) {
 2261         file_error(ms, 0, "File %s supports only version %d magic "
 2262             "files. `%s' is version %d", VERSION,
 2263             VERSIONNO, dbname, version);
 2264         goto error1;
 2265     }
 2266     *nmagicp = (uint32_t)(st.st_size / sizeof(struct magic));
 2267     if (*nmagicp > 0)
 2268         (*nmagicp)--;
 2269     (*magicp)++;
 2270     if (needsbyteswap)
 2271         byteswap(*magicp, *nmagicp);
 2272     free(dbname);
 2273     return RET;
 2274 
 2275 error1:
 2276     if (fd != -1)
 2277         (void)close(fd);
 2278     if (mm) {
 2279 #ifdef QUICK
 2280         (void)munmap((void *)mm, (size_t)st.st_size);
 2281 #else
 2282         free(mm);
 2283 #endif
 2284     } else {
 2285         *magicp = NULL;
 2286         *nmagicp = 0;
 2287     }
 2288 error2:
 2289     free(dbname);
 2290     return -1;
 2291 }
 2292 
 2293 private const uint32_t ar[] = {
 2294     MAGICNO, VERSIONNO
 2295 };
 2296 /*
 2297  * handle an mmaped file.
 2298  */
 2299 private int
 2300 apprentice_compile(struct magic_set *ms, struct magic **magicp,
 2301     uint32_t *nmagicp, const char *fn)
 2302 {
 2303     int fd;
 2304     char *dbname;
 2305     int rv = -1;
 2306 
 2307     dbname = mkdbname(ms, fn, 1);
 2308 
 2309     if (dbname == NULL) 
 2310         goto out;
 2311 
 2312     if ((fd = open(dbname, O_WRONLY|O_CREAT|O_TRUNC|O_BINARY, 0644)) == -1) {
 2313         file_error(ms, errno, "cannot open `%s'", dbname);
 2314         goto out;
 2315     }
 2316 
 2317     if (write(fd, ar, sizeof(ar)) != (ssize_t)sizeof(ar)) {
 2318         file_error(ms, errno, "error writing `%s'", dbname);
 2319         goto out;
 2320     }
 2321 
 2322     if (lseek(fd, (off_t)sizeof(struct magic), SEEK_SET)
 2323         != sizeof(struct magic)) {
 2324         file_error(ms, errno, "error seeking `%s'", dbname);
 2325         goto out;
 2326     }
 2327 
 2328     if (write(fd, *magicp, (sizeof(struct magic) * *nmagicp)) 
 2329         != (ssize_t)(sizeof(struct magic) * *nmagicp)) {
 2330         file_error(ms, errno, "error writing `%s'", dbname);
 2331         goto out;
 2332     }
 2333 
 2334     (void)close(fd);
 2335     rv = 0;
 2336 out:
 2337     free(dbname);
 2338     return rv;
 2339 }
 2340 
 2341 private const char ext[] = ".mgc";
 2342 /*
 2343  * make a dbname
 2344  */
 2345 private char *
 2346 mkdbname(struct magic_set *ms, const char *fn, int strip)
 2347 {
 2348     const char *p, *q;
 2349     char *buf;
 2350 
 2351     if (strip) {
 2352         if ((p = strrchr(fn, '/')) != NULL)
 2353             fn = ++p;
 2354     }
 2355 
 2356     for (q = fn; *q; q++)
 2357         continue;
 2358     /* Look for .mgc */
 2359     for (p = ext + sizeof(ext) - 1; p >= ext && q >= fn; p--, q--)
 2360         if (*p != *q)
 2361             break;
 2362 
 2363     /* Did not find .mgc, restore q */
 2364     if (p >= ext)
 2365         while (*q)
 2366             q++;
 2367 
 2368     q++;
 2369     /* Compatibility with old code that looked in .mime */
 2370     if (ms->flags & MAGIC_MIME) {
 2371         asprintf(&buf, "%.*s.mime%s", (int)(q - fn), fn, ext);
 2372         if (access(buf, R_OK) != -1) {
 2373             ms->flags &= MAGIC_MIME_TYPE;
 2374             return buf;
 2375         }
 2376         free(buf);
 2377     }
 2378     asprintf(&buf, "%.*s%s", (int)(q - fn), fn, ext);
 2379 
 2380     /* Compatibility with old code that looked in .mime */
 2381     if (strstr(p, ".mime") != NULL)
 2382         ms->flags &= MAGIC_MIME_TYPE;
 2383     return buf;
 2384 }
 2385 
 2386 /*
 2387  * Byteswap an mmap'ed file if needed
 2388  */
 2389 private void
 2390 byteswap(struct magic *magic, uint32_t nmagic)
 2391 {
 2392     uint32_t i;
 2393     for (i = 0; i < nmagic; i++)
 2394         bs1(&magic[i]);
 2395 }
 2396 
 2397 /*
 2398  * swap a short
 2399  */
 2400 private uint16_t
 2401 swap2(uint16_t sv)
 2402 {
 2403     uint16_t rv;
 2404     uint8_t *s = (uint8_t *)(void *)&sv; 
 2405     uint8_t *d = (uint8_t *)(void *)&rv; 
 2406     d[0] = s[1];
 2407     d[1] = s[0];
 2408     return rv;
 2409 }
 2410 
 2411 /*
 2412  * swap an int
 2413  */
 2414 private uint32_t
 2415 swap4(uint32_t sv)
 2416 {
 2417     uint32_t rv;
 2418     uint8_t *s = (uint8_t *)(void *)&sv; 
 2419     uint8_t *d = (uint8_t *)(void *)&rv; 
 2420     d[0] = s[3];
 2421     d[1] = s[2];
 2422     d[2] = s[1];
 2423     d[3] = s[0];
 2424     return rv;
 2425 }
 2426 
 2427 /*
 2428  * swap a quad
 2429  */
 2430 private uint64_t
 2431 swap8(uint64_t sv)
 2432 {
 2433     uint64_t rv;
 2434     uint8_t *s = (uint8_t *)(void *)&sv; 
 2435     uint8_t *d = (uint8_t *)(void *)&rv; 
 2436 #if 0
 2437     d[0] = s[3];
 2438     d[1] = s[2];
 2439     d[2] = s[1];
 2440     d[3] = s[0];
 2441     d[4] = s[7];
 2442     d[5] = s[6];
 2443     d[6] = s[5];
 2444     d[7] = s[4];
 2445 #else
 2446     d[0] = s[7];
 2447     d[1] = s[6];
 2448     d[2] = s[5];
 2449     d[3] = s[4];
 2450     d[4] = s[3];
 2451     d[5] = s[2];
 2452     d[6] = s[1];
 2453     d[7] = s[0];
 2454 #endif
 2455     return rv;
 2456 }
 2457 
 2458 /*
 2459  * byteswap a single magic entry
 2460  */
 2461 private void
 2462 bs1(struct magic *m)
 2463 {
 2464     m->cont_level = swap2(m->cont_level);
 2465     m->offset = swap4((uint32_t)m->offset);
 2466     m->in_offset = swap4((uint32_t)m->in_offset);
 2467     m->lineno = swap4((uint32_t)m->lineno);
 2468     if (IS_STRING(m->type)) {
 2469         m->str_range = swap4(m->str_range);
 2470         m->str_flags = swap4(m->str_flags);
 2471     }
 2472     else {
 2473         m->value.q = swap8(m->value.q);
 2474         m->num_mask = swap8(m->num_mask);
 2475     }
 2476 }
 2477 
 2478 protected size_t 
 2479 file_pstring_length_size(const struct magic *m)
 2480 {
 2481     switch (m->str_flags & PSTRING_LEN) {
 2482     case PSTRING_1_LE:
 2483         return 1;
 2484     case PSTRING_2_LE:
 2485     case PSTRING_2_BE:
 2486         return 2;
 2487     case PSTRING_4_LE:
 2488     case PSTRING_4_BE:
 2489         return 4;
 2490     default:
 2491         abort();    /* Impossible */
 2492         return 1;
 2493     }
 2494 }
 2495 protected size_t
 2496 file_pstring_get_length(const struct magic *m, const char *s)
 2497 {
 2498     size_t len = 0;
 2499 
 2500     switch (m->str_flags & PSTRING_LEN) {
 2501     case PSTRING_1_LE:
 2502         len = *s;
 2503         break;
 2504     case PSTRING_2_LE:
 2505         len = (s[1] << 8) | s[0];
 2506         break;
 2507     case PSTRING_2_BE:
 2508         len = (s[0] << 8) | s[1];
 2509         break;
 2510     case PSTRING_4_LE:
 2511         len = (s[3] << 24) | (s[2] << 16) | (s[1] << 8) | s[0];
 2512         break;
 2513     case PSTRING_4_BE:
 2514         len = (s[0] << 24) | (s[1] << 16) | (s[2] << 8) | s[3];
 2515         break;
 2516     default:
 2517         abort();    /* Impossible */
 2518     }
 2519 
 2520     if (m->str_flags & PSTRING_LENGTH_INCLUDES_ITSELF)
 2521         len -= file_pstring_length_size(m);
 2522 
 2523     return len;
 2524 }