"Fossies" - the Fresh Open Source Software Archive

Member "libextractor-1.11/src/plugins/deb_extractor.c" (30 Jan 2021, 12152 Bytes) of package /linux/privat/libextractor-1.11.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "deb_extractor.c" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 1.5_vs_1.6.

    1 /*
    2      This file is part of libextractor.
    3      Copyright (C) 2002, 2003, 2004, 2012 Vidyut Samanta and Christian Grothoff
    4 
    5      libextractor is free software; you can redistribute it and/or modify
    6      it under the terms of the GNU General Public License as published
    7      by the Free Software Foundation; either version 3, or (at your
    8      option) any later version.
    9 
   10      libextractor is distributed in the hope that it will be useful, but
   11      WITHOUT ANY WARRANTY; without even the implied warranty of
   12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13      General Public License for more details.
   14 
   15      You should have received a copy of the GNU General Public License
   16      along with libextractor; see the file COPYING.  If not, write to the
   17      Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
   18      Boston, MA 02110-1301, USA.
   19  */
   20 /**
   21  * @file plugins/deb_extractor.c
   22  * @brief plugin to support Debian archives
   23  * @author Christian Grothoff
   24  *
   25  * The .deb is an ar-chive file.  It contains a tar.gz file
   26  * named "control.tar.gz" which then contains a file 'control'
   27  * that has the meta-data.  And which variant of the various
   28  * ar file formats is used is also not quite certain. Yuck.
   29  *
   30  * References:
   31  * http://www.mkssoftware.com/docs/man4/tar.4.asp
   32  * http://lists.debian.org/debian-policy/2003/12/msg00000.html
   33  * http://www.opengroup.org/onlinepubs/009695399/utilities/ar.html
   34  */
   35 #include "platform.h"
   36 #include "extractor.h"
   37 #include <zlib.h>
   38 
   39 
   40 /**
   41  * Maximum file size we allow for control.tar.gz files.
   42  * This is a sanity check to avoid allocating huge amounts
   43  * of memory.
   44  */
   45 #define MAX_CONTROL_SIZE (1024 * 1024)
   46 
   47 
   48 /**
   49  * Re-implementation of 'strndup'.
   50  *
   51  * @param str string to duplicate
   52  * @param n maximum number of bytes to copy
   53  * @return NULL on error, otherwise 0-terminated copy of 'str'
   54  *         with at most n characters
   55  */
   56 static char *
   57 stndup (const char *str, size_t n)
   58 {
   59   char *tmp;
   60 
   61   if (NULL == (tmp = malloc (n + 1)))
   62     return NULL;
   63   tmp[n] = '\0';
   64   memcpy (tmp, str, n);
   65   return tmp;
   66 }
   67 
   68 
   69 /**
   70  * Entry in the mapping from control data to LE types.
   71  */
   72 struct Matches
   73 {
   74   /**
   75    * Key in the Debian control file.
   76    */
   77   const char *text;
   78 
   79   /**
   80    * Corresponding type in LE.
   81    */
   82   enum EXTRACTOR_MetaType type;
   83 };
   84 
   85 
   86 /**
   87  * Map from deb-control entries to LE types.
   88  *
   89  * see also: "man 5 deb-control"
   90  */
   91 static struct Matches tmap[] = {
   92   {"Package: ",       EXTRACTOR_METATYPE_PACKAGE_NAME},
   93   {"Version: ",       EXTRACTOR_METATYPE_PACKAGE_VERSION},
   94   {"Section: ",       EXTRACTOR_METATYPE_SECTION},
   95   {"Priority: ",      EXTRACTOR_METATYPE_UPLOAD_PRIORITY},
   96   {"Architecture: ",  EXTRACTOR_METATYPE_TARGET_ARCHITECTURE},
   97   {"Depends: ",       EXTRACTOR_METATYPE_PACKAGE_DEPENDENCY},
   98   {"Recommends: ",    EXTRACTOR_METATYPE_PACKAGE_RECOMMENDS},
   99   {"Suggests: ",      EXTRACTOR_METATYPE_PACKAGE_SUGGESTS},
  100   {"Installed-Size: ",EXTRACTOR_METATYPE_PACKAGE_INSTALLED_SIZE},
  101   {"Maintainer: ",    EXTRACTOR_METATYPE_PACKAGE_MAINTAINER},
  102   {"Description: ",   EXTRACTOR_METATYPE_DESCRIPTION},
  103   {"Source: ",        EXTRACTOR_METATYPE_PACKAGE_SOURCE},
  104   {"Pre-Depends: ",   EXTRACTOR_METATYPE_PACKAGE_PRE_DEPENDENCY},
  105   {"Conflicts: ",     EXTRACTOR_METATYPE_PACKAGE_CONFLICTS},
  106   {"Replaces: ",      EXTRACTOR_METATYPE_PACKAGE_REPLACES},
  107   {"Provides: ",      EXTRACTOR_METATYPE_PACKAGE_PROVIDES},
  108   {"Essential: ",     EXTRACTOR_METATYPE_PACKAGE_ESSENTIAL},
  109   {NULL, 0}
  110 };
  111 
  112 
  113 /**
  114  * Process the "control" file from the control.tar.gz
  115  *
  116  * @param data decompressed control data
  117  * @param size number of bytes in data
  118  * @param proc function to call with meta data
  119  * @param proc_cls closure for 'proc'
  120  * @return 0 to continue extracting, 1 if we are done
  121  */
  122 static int
  123 processControl (const char *data,
  124                 const size_t size,
  125                 EXTRACTOR_MetaDataProcessor proc,
  126                 void *proc_cls)
  127 {
  128   size_t pos;
  129   char *key;
  130   char *val;
  131   size_t colon;
  132   size_t eol;
  133   unsigned int i;
  134 
  135   pos = 0;
  136   while (pos < size)
  137   {
  138     for (colon = pos; ':' != data[colon]; colon++)
  139       if ((colon > size) || ('\n' == data[colon]))
  140         return 0;
  141     colon++;
  142     while ((colon < size) && (isspace ((unsigned char) data[colon])))
  143       colon++;
  144     eol = colon;
  145     while ((eol < size) &&
  146            (('\n' != data[eol]) ||
  147             ((eol + 1 < size) && (' '  == data[eol + 1]))))
  148       eol++;
  149     if ((eol == colon) || (eol > size))
  150       return 0;
  151     if (NULL == (key = stndup (&data[pos], colon - pos)))
  152       return 0;
  153     for (i = 0; NULL != tmap[i].text; i++)
  154     {
  155       if (0 != strcmp (key, tmap[i].text))
  156         continue;
  157       if (NULL == (val = stndup (&data[colon], eol - colon)))
  158       {
  159         free (key);
  160         return 0;
  161       }
  162       if (0 != proc (proc_cls,
  163                      "deb",
  164                      tmap[i].type,
  165                      EXTRACTOR_METAFORMAT_UTF8,
  166                      "text/plain",
  167                      val,
  168                      strlen (val) + 1))
  169       {
  170         free (val);
  171         free (key);
  172         return 1;
  173       }
  174       free (val);
  175       break;
  176     }
  177     free (key);
  178     pos = eol + 1;
  179   }
  180   return 0;
  181 }
  182 
  183 
  184 /**
  185  * Header of an entry in a TAR file.
  186  */
  187 struct TarHeader
  188 {
  189   /**
  190    * Filename.
  191    */
  192   char name[100];
  193 
  194   /**
  195    * File access modes.
  196    */
  197   char mode[8];
  198 
  199   /**
  200    * Owner of the file.
  201    */
  202   char userId[8];
  203 
  204   /**
  205    * Group of the file.
  206    */
  207   char groupId[8];
  208 
  209   /**
  210    * Size of the file, in octal.
  211    */
  212   char filesize[12];
  213 
  214   /**
  215    * Last modification time.
  216    */
  217   char lastModTime[12];
  218 
  219   /**
  220    * Checksum of the file.
  221    */
  222   char chksum[8];
  223 
  224   /**
  225    * Is the file a link?
  226    */
  227   char link;
  228 
  229   /**
  230    * Destination of the link.
  231    */
  232   char linkName[100];
  233 };
  234 
  235 
  236 /**
  237  * Extended TAR header for USTar format.
  238  */
  239 struct USTarHeader
  240 {
  241   /**
  242    * Original TAR header.
  243    */
  244   struct TarHeader tar;
  245 
  246   /**
  247    * Additinal magic for USTar.
  248    */
  249   char magic[6];
  250 
  251   /**
  252    * Format version.
  253    */
  254   char version[2];
  255 
  256   /**
  257    * User name.
  258    */
  259   char uname[32];
  260 
  261   /**
  262    * Group name.
  263    */
  264   char gname[32];
  265 
  266   /**
  267    * Device major number.
  268    */
  269   char devmajor[8];
  270 
  271   /**
  272    * Device minor number.
  273    */
  274   char devminor[8];
  275 
  276   /**
  277    * Unknown (padding?).
  278    */
  279   char prefix[155];
  280 };
  281 
  282 
  283 /**
  284  * Process the control.tar file.
  285  *
  286  * @param data the deflated control.tar file data
  287  * @param size number of bytes in data
  288  * @param proc function to call with meta data
  289  * @param proc_cls closure for 'proc'
  290  * @return 0 to continue extracting, 1 if we are done
  291  */
  292 static int
  293 processControlTar (const char *data,
  294                    size_t size,
  295                    EXTRACTOR_MetaDataProcessor proc,
  296                    void *proc_cls)
  297 {
  298   struct TarHeader *tar;
  299   struct USTarHeader *ustar;
  300   size_t pos;
  301 
  302   pos = 0;
  303   while (pos + sizeof (struct TarHeader) < size)
  304   {
  305     unsigned long long fsize;
  306     char buf[13];
  307 
  308     tar = (struct TarHeader *) &data[pos];
  309     if (pos + sizeof (struct USTarHeader) < size)
  310     {
  311       ustar = (struct USTarHeader *) &data[pos];
  312       if (0 == strncmp ("ustar", &ustar->magic[0], strlen ("ustar")))
  313         pos += 512;             /* sizeof (struct USTarHeader); */
  314       else
  315         pos += 257;             /* sizeof (struct TarHeader); minus gcc alignment... */
  316     }
  317     else
  318     {
  319       pos += 257;               /* sizeof (struct TarHeader); minus gcc alignment... */
  320     }
  321 
  322     memcpy (buf, &tar->filesize[0], 12);
  323     buf[12] = '\0';
  324     if (1 != sscanf (buf, "%12llo", &fsize))    /* octal! Yuck yuck! */
  325       return 0;
  326     if ((pos + fsize > size) || (fsize > size) || (pos + fsize < pos))
  327       return 0;
  328 
  329     if (0 == strncmp (&tar->name[0], "./control", strlen ("./control")))
  330     {
  331       /* found the 'control' file we were looking for */
  332       return processControl (&data[pos], fsize, proc, proc_cls);
  333     }
  334     if (0 != (fsize & 511))
  335       fsize = (fsize | 511) + 1;        /* round up! */
  336     if (pos + fsize < pos)
  337       return 0;
  338     pos += fsize;
  339   }
  340   return 0;
  341 }
  342 
  343 
  344 /**
  345  * Process the control.tar.gz file.
  346  *
  347  * @param ec extractor context with control.tar.gz at current read position
  348  * @param size number of bytes in the control file
  349  * @return 0 to continue extracting, 1 if we are done
  350  */
  351 static int
  352 processControlTGZ (struct EXTRACTOR_ExtractContext *ec,
  353                    unsigned long long size)
  354 {
  355   uint32_t bufSize;
  356   char *buf;
  357   void *data;
  358   unsigned char *cdata;
  359   z_stream strm;
  360   int ret;
  361   ssize_t sret;
  362   unsigned long long off;
  363 
  364   if (size > MAX_CONTROL_SIZE)
  365     return 0;
  366   if (0 == size)
  367     return 0;
  368   if (size < 4)
  369     return 0;
  370   if (NULL == (cdata = malloc (size)))
  371     return 0;
  372   off = 0;
  373   while (off < size)
  374   {
  375     if (0 >= (sret = ec->read (ec->cls, &data, size - off)))
  376     {
  377       free (cdata);
  378       return 0;
  379     }
  380     memcpy (&cdata[off],
  381             data,
  382             sret);
  383     off += sret;
  384   }
  385   bufSize = cdata[size - 4] + (cdata[size - 3] << 8) + (cdata[size - 2] << 16)
  386             + (cdata[size - 1] << 24);
  387   if (bufSize > MAX_CONTROL_SIZE)
  388   {
  389     free (cdata);
  390     return 0;
  391   }
  392   if (NULL == (buf = malloc (bufSize)))
  393   {
  394     free (cdata);
  395     return 0;
  396   }
  397   ret = 0;
  398   memset (&strm, 0, sizeof (z_stream));
  399   strm.next_in = (Bytef *) data;
  400   strm.avail_in = size;
  401   if (Z_OK == inflateInit2 (&strm, 15 + 32))
  402   {
  403     strm.next_out = (Bytef *) buf;
  404     strm.avail_out = bufSize;
  405     inflate (&strm, Z_FINISH);
  406     if (strm.total_out > 0)
  407       ret = processControlTar (buf, strm.total_out,
  408                                ec->proc, ec->cls);
  409     inflateEnd (&strm);
  410   }
  411   free (buf);
  412   free (cdata);
  413   return ret;
  414 }
  415 
  416 
  417 /**
  418  * Header of an object in an "AR"chive file.
  419  */
  420 struct ObjectHeader
  421 {
  422   /**
  423    * Name of the file.
  424    */
  425   char name[16];
  426 
  427   /**
  428    * Last modification time for the file.
  429    */
  430   char lastModTime[12];
  431 
  432   /**
  433    * User ID of the owner.
  434    */
  435   char userId[6];
  436 
  437   /**
  438    * Group ID of the owner.
  439    */
  440   char groupId[6];
  441 
  442   /**
  443    * File access modes.
  444    */
  445   char modeInOctal[8];
  446 
  447   /**
  448    * Size of the file (as decimal string)
  449    */
  450   char filesize[10];
  451 
  452   /**
  453    * Tailer of the object header ("`\n")
  454    */
  455   char trailer[2];
  456 };
  457 
  458 
  459 /**
  460  * Main entry method for the DEB extraction plugin.
  461  *
  462  * @param ec extraction context provided to the plugin
  463  */
  464 void
  465 EXTRACTOR_deb_extract_method (struct EXTRACTOR_ExtractContext *ec)
  466 {
  467   uint64_t pos;
  468   int done = 0;
  469   const struct ObjectHeader *hdr;
  470   uint64_t fsize;
  471   unsigned long long csize;
  472   char buf[11];
  473   void *data;
  474 
  475   fsize = ec->get_size (ec->cls);
  476   if (fsize < 128)
  477     return;
  478   if (8 !=
  479       ec->read (ec->cls, &data, 8))
  480     return;
  481   if (0 != strncmp ("!<arch>\n", data, 8))
  482     return;
  483   pos = 8;
  484   while (pos + sizeof (struct ObjectHeader) < fsize)
  485   {
  486     if (pos !=
  487         ec->seek (ec->cls, pos, SEEK_SET))
  488       return;
  489     if (sizeof (struct ObjectHeader) !=
  490         ec->read (ec->cls, &data, sizeof (struct ObjectHeader)))
  491       return;
  492     hdr = data;
  493     if (0 != strncmp (&hdr->trailer[0], "`\n", 2))
  494       return;
  495     memcpy (buf, &hdr->filesize[0], 10);
  496     buf[10] = '\0';
  497     if (1 != sscanf (buf, "%10llu", &csize))
  498       return;
  499     pos += sizeof (struct ObjectHeader);
  500     if ((pos + csize > fsize) || (csize > fsize) || (pos + csize < pos))
  501       return;
  502     if (0 == strncmp (&hdr->name[0],
  503                       "control.tar.gz",
  504                       strlen ("control.tar.gz")))
  505     {
  506       if (0 != processControlTGZ (ec,
  507                                   csize))
  508         return;
  509       done++;
  510     }
  511     if (0 == strncmp (&hdr->name[0],
  512                       "debian-binary", strlen ("debian-binary")))
  513     {
  514       if (0 != ec->proc (ec->cls,
  515                          "deb",
  516                          EXTRACTOR_METATYPE_MIMETYPE,
  517                          EXTRACTOR_METAFORMAT_UTF8,
  518                          "text/plain",
  519                          "application/x-debian-package",
  520                          strlen ("application/x-debian-package") + 1))
  521         return;
  522       done++;
  523     }
  524     pos += csize;
  525     if (2 == done)
  526       break;                    /* no need to process the rest of the archive */
  527   }
  528 }
  529 
  530 
  531 /* end of deb_extractor.c */