libextractor  1.11
About: GNU libextractor is a library used to extract meta-data from files of arbitrary type.
  Fossies Dox: libextractor-1.11.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

deb_extractor.c
Go to the documentation of this file.
1 /*
2  This file is part of libextractor.
3  Copyright (C) 2002, 2003, 2004, 2012 Vidyut Samanta and Christian Grothoff
4 
5  libextractor is free software; you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published
7  by the Free Software Foundation; either version 3, or (at your
8  option) any later version.
9 
10  libextractor is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with libextractor; see the file COPYING. If not, write to the
17  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  Boston, MA 02110-1301, USA.
19  */
20 /**
21  * @file plugins/deb_extractor.c
22  * @brief plugin to support Debian archives
23  * @author Christian Grothoff
24  *
25  * The .deb is an ar-chive file. It contains a tar.gz file
26  * named "control.tar.gz" which then contains a file 'control'
27  * that has the meta-data. And which variant of the various
28  * ar file formats is used is also not quite certain. Yuck.
29  *
30  * References:
31  * http://www.mkssoftware.com/docs/man4/tar.4.asp
32  * http://lists.debian.org/debian-policy/2003/12/msg00000.html
33  * http://www.opengroup.org/onlinepubs/009695399/utilities/ar.html
34  */
35 #include "platform.h"
36 #include "extractor.h"
37 #include <zlib.h>
38 
39 
40 /**
41  * Maximum file size we allow for control.tar.gz files.
42  * This is a sanity check to avoid allocating huge amounts
43  * of memory.
44  */
45 #define MAX_CONTROL_SIZE (1024 * 1024)
46 
47 
48 /**
49  * Re-implementation of 'strndup'.
50  *
51  * @param str string to duplicate
52  * @param n maximum number of bytes to copy
53  * @return NULL on error, otherwise 0-terminated copy of 'str'
54  * with at most n characters
55  */
56 static char *
57 stndup (const char *str, size_t n)
58 {
59  char *tmp;
60 
61  if (NULL == (tmp = malloc (n + 1)))
62  return NULL;
63  tmp[n] = '\0';
64  memcpy (tmp, str, n);
65  return tmp;
66 }
67 
68 
69 /**
70  * Entry in the mapping from control data to LE types.
71  */
72 struct Matches
73 {
74  /**
75  * Key in the Debian control file.
76  */
77  const char *text;
78 
79  /**
80  * Corresponding type in LE.
81  */
83 };
84 
85 
86 /**
87  * Map from deb-control entries to LE types.
88  *
89  * see also: "man 5 deb-control"
90  */
91 static struct Matches tmap[] = {
92  {"Package: ", EXTRACTOR_METATYPE_PACKAGE_NAME},
94  {"Section: ", EXTRACTOR_METATYPE_SECTION},
95  {"Priority: ", EXTRACTOR_METATYPE_UPLOAD_PRIORITY},
96  {"Architecture: ", EXTRACTOR_METATYPE_TARGET_ARCHITECTURE},
98  {"Recommends: ", EXTRACTOR_METATYPE_PACKAGE_RECOMMENDS},
100  {"Installed-Size: ",EXTRACTOR_METATYPE_PACKAGE_INSTALLED_SIZE},
101  {"Maintainer: ", EXTRACTOR_METATYPE_PACKAGE_MAINTAINER},
102  {"Description: ", EXTRACTOR_METATYPE_DESCRIPTION},
103  {"Source: ", EXTRACTOR_METATYPE_PACKAGE_SOURCE},
105  {"Conflicts: ", EXTRACTOR_METATYPE_PACKAGE_CONFLICTS},
106  {"Replaces: ", EXTRACTOR_METATYPE_PACKAGE_REPLACES},
107  {"Provides: ", EXTRACTOR_METATYPE_PACKAGE_PROVIDES},
108  {"Essential: ", EXTRACTOR_METATYPE_PACKAGE_ESSENTIAL},
109  {NULL, 0}
110 };
111 
112 
113 /**
114  * Process the "control" file from the control.tar.gz
115  *
116  * @param data decompressed control data
117  * @param size number of bytes in data
118  * @param proc function to call with meta data
119  * @param proc_cls closure for 'proc'
120  * @return 0 to continue extracting, 1 if we are done
121  */
122 static int
123 processControl (const char *data,
124  const size_t size,
126  void *proc_cls)
127 {
128  size_t pos;
129  char *key;
130  char *val;
131  size_t colon;
132  size_t eol;
133  unsigned int i;
134 
135  pos = 0;
136  while (pos < size)
137  {
138  for (colon = pos; ':' != data[colon]; colon++)
139  if ((colon > size) || ('\n' == data[colon]))
140  return 0;
141  colon++;
142  while ((colon < size) && (isspace ((unsigned char) data[colon])))
143  colon++;
144  eol = colon;
145  while ((eol < size) &&
146  (('\n' != data[eol]) ||
147  ((eol + 1 < size) && (' ' == data[eol + 1]))))
148  eol++;
149  if ((eol == colon) || (eol > size))
150  return 0;
151  if (NULL == (key = stndup (&data[pos], colon - pos)))
152  return 0;
153  for (i = 0; NULL != tmap[i].text; i++)
154  {
155  if (0 != strcmp (key, tmap[i].text))
156  continue;
157  if (NULL == (val = stndup (&data[colon], eol - colon)))
158  {
159  free (key);
160  return 0;
161  }
162  if (0 != proc (proc_cls,
163  "deb",
164  tmap[i].type,
166  "text/plain",
167  val,
168  strlen (val) + 1))
169  {
170  free (val);
171  free (key);
172  return 1;
173  }
174  free (val);
175  break;
176  }
177  free (key);
178  pos = eol + 1;
179  }
180  return 0;
181 }
182 
183 
184 /**
185  * Header of an entry in a TAR file.
186  */
187 struct TarHeader
188 {
189  /**
190  * Filename.
191  */
192  char name[100];
193 
194  /**
195  * File access modes.
196  */
197  char mode[8];
198 
199  /**
200  * Owner of the file.
201  */
202  char userId[8];
203 
204  /**
205  * Group of the file.
206  */
207  char groupId[8];
208 
209  /**
210  * Size of the file, in octal.
211  */
212  char filesize[12];
213 
214  /**
215  * Last modification time.
216  */
217  char lastModTime[12];
218 
219  /**
220  * Checksum of the file.
221  */
222  char chksum[8];
223 
224  /**
225  * Is the file a link?
226  */
227  char link;
228 
229  /**
230  * Destination of the link.
231  */
232  char linkName[100];
233 };
234 
235 
236 /**
237  * Extended TAR header for USTar format.
238  */
240 {
241  /**
242  * Original TAR header.
243  */
244  struct TarHeader tar;
245 
246  /**
247  * Additinal magic for USTar.
248  */
249  char magic[6];
250 
251  /**
252  * Format version.
253  */
254  char version[2];
255 
256  /**
257  * User name.
258  */
259  char uname[32];
260 
261  /**
262  * Group name.
263  */
264  char gname[32];
265 
266  /**
267  * Device major number.
268  */
269  char devmajor[8];
270 
271  /**
272  * Device minor number.
273  */
274  char devminor[8];
275 
276  /**
277  * Unknown (padding?).
278  */
279  char prefix[155];
280 };
281 
282 
283 /**
284  * Process the control.tar file.
285  *
286  * @param data the deflated control.tar file data
287  * @param size number of bytes in data
288  * @param proc function to call with meta data
289  * @param proc_cls closure for 'proc'
290  * @return 0 to continue extracting, 1 if we are done
291  */
292 static int
293 processControlTar (const char *data,
294  size_t size,
296  void *proc_cls)
297 {
298  struct TarHeader *tar;
299  struct USTarHeader *ustar;
300  size_t pos;
301 
302  pos = 0;
303  while (pos + sizeof (struct TarHeader) < size)
304  {
305  unsigned long long fsize;
306  char buf[13];
307 
308  tar = (struct TarHeader *) &data[pos];
309  if (pos + sizeof (struct USTarHeader) < size)
310  {
311  ustar = (struct USTarHeader *) &data[pos];
312  if (0 == strncmp ("ustar", &ustar->magic[0], strlen ("ustar")))
313  pos += 512; /* sizeof (struct USTarHeader); */
314  else
315  pos += 257; /* sizeof (struct TarHeader); minus gcc alignment... */
316  }
317  else
318  {
319  pos += 257; /* sizeof (struct TarHeader); minus gcc alignment... */
320  }
321 
322  memcpy (buf, &tar->filesize[0], 12);
323  buf[12] = '\0';
324  if (1 != sscanf (buf, "%12llo", &fsize)) /* octal! Yuck yuck! */
325  return 0;
326  if ((pos + fsize > size) || (fsize > size) || (pos + fsize < pos))
327  return 0;
328 
329  if (0 == strncmp (&tar->name[0], "./control", strlen ("./control")))
330  {
331  /* found the 'control' file we were looking for */
332  return processControl (&data[pos], fsize, proc, proc_cls);
333  }
334  if (0 != (fsize & 511))
335  fsize = (fsize | 511) + 1; /* round up! */
336  if (pos + fsize < pos)
337  return 0;
338  pos += fsize;
339  }
340  return 0;
341 }
342 
343 
344 /**
345  * Process the control.tar.gz file.
346  *
347  * @param ec extractor context with control.tar.gz at current read position
348  * @param size number of bytes in the control file
349  * @return 0 to continue extracting, 1 if we are done
350  */
351 static int
353  unsigned long long size)
354 {
355  uint32_t bufSize;
356  char *buf;
357  void *data;
358  unsigned char *cdata;
359  z_stream strm;
360  int ret;
361  ssize_t sret;
362  unsigned long long off;
363 
364  if (size > MAX_CONTROL_SIZE)
365  return 0;
366  if (0 == size)
367  return 0;
368  if (size < 4)
369  return 0;
370  if (NULL == (cdata = malloc (size)))
371  return 0;
372  off = 0;
373  while (off < size)
374  {
375  if (0 >= (sret = ec->read (ec->cls, &data, size - off)))
376  {
377  free (cdata);
378  return 0;
379  }
380  memcpy (&cdata[off],
381  data,
382  sret);
383  off += sret;
384  }
385  bufSize = cdata[size - 4] + (cdata[size - 3] << 8) + (cdata[size - 2] << 16)
386  + (cdata[size - 1] << 24);
387  if (bufSize > MAX_CONTROL_SIZE)
388  {
389  free (cdata);
390  return 0;
391  }
392  if (NULL == (buf = malloc (bufSize)))
393  {
394  free (cdata);
395  return 0;
396  }
397  ret = 0;
398  memset (&strm, 0, sizeof (z_stream));
399  strm.next_in = (Bytef *) data;
400  strm.avail_in = size;
401  if (Z_OK == inflateInit2 (&strm, 15 + 32))
402  {
403  strm.next_out = (Bytef *) buf;
404  strm.avail_out = bufSize;
405  inflate (&strm, Z_FINISH);
406  if (strm.total_out > 0)
407  ret = processControlTar (buf, strm.total_out,
408  ec->proc, ec->cls);
409  inflateEnd (&strm);
410  }
411  free (buf);
412  free (cdata);
413  return ret;
414 }
415 
416 
417 /**
418  * Header of an object in an "AR"chive file.
419  */
421 {
422  /**
423  * Name of the file.
424  */
425  char name[16];
426 
427  /**
428  * Last modification time for the file.
429  */
430  char lastModTime[12];
431 
432  /**
433  * User ID of the owner.
434  */
435  char userId[6];
436 
437  /**
438  * Group ID of the owner.
439  */
440  char groupId[6];
441 
442  /**
443  * File access modes.
444  */
445  char modeInOctal[8];
446 
447  /**
448  * Size of the file (as decimal string)
449  */
450  char filesize[10];
451 
452  /**
453  * Tailer of the object header ("`\n")
454  */
455  char trailer[2];
456 };
457 
458 
459 /**
460  * Main entry method for the DEB extraction plugin.
461  *
462  * @param ec extraction context provided to the plugin
463  */
464 void
466 {
467  uint64_t pos;
468  int done = 0;
469  const struct ObjectHeader *hdr;
470  uint64_t fsize;
471  unsigned long long csize;
472  char buf[11];
473  void *data;
474 
475  fsize = ec->get_size (ec->cls);
476  if (fsize < 128)
477  return;
478  if (8 !=
479  ec->read (ec->cls, &data, 8))
480  return;
481  if (0 != strncmp ("!<arch>\n", data, 8))
482  return;
483  pos = 8;
484  while (pos + sizeof (struct ObjectHeader) < fsize)
485  {
486  if (pos !=
487  ec->seek (ec->cls, pos, SEEK_SET))
488  return;
489  if (sizeof (struct ObjectHeader) !=
490  ec->read (ec->cls, &data, sizeof (struct ObjectHeader)))
491  return;
492  hdr = data;
493  if (0 != strncmp (&hdr->trailer[0], "`\n", 2))
494  return;
495  memcpy (buf, &hdr->filesize[0], 10);
496  buf[10] = '\0';
497  if (1 != sscanf (buf, "%10llu", &csize))
498  return;
499  pos += sizeof (struct ObjectHeader);
500  if ((pos + csize > fsize) || (csize > fsize) || (pos + csize < pos))
501  return;
502  if (0 == strncmp (&hdr->name[0],
503  "control.tar.gz",
504  strlen ("control.tar.gz")))
505  {
506  if (0 != processControlTGZ (ec,
507  csize))
508  return;
509  done++;
510  }
511  if (0 == strncmp (&hdr->name[0],
512  "debian-binary", strlen ("debian-binary")))
513  {
514  if (0 != ec->proc (ec->cls,
515  "deb",
518  "text/plain",
519  "application/x-debian-package",
520  strlen ("application/x-debian-package") + 1))
521  return;
522  done++;
523  }
524  pos += csize;
525  if (2 == done)
526  break; /* no need to process the rest of the archive */
527  }
528 }
529 
530 
531 /* end of deb_extractor.c */
static int processControlTar(const char *data, size_t size, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
static int processControl(const char *data, const size_t size, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
#define MAX_CONTROL_SIZE
Definition: deb_extractor.c:45
static char * stndup(const char *str, size_t n)
Definition: deb_extractor.c:57
static int processControlTGZ(struct EXTRACTOR_ExtractContext *ec, unsigned long long size)
void EXTRACTOR_deb_extract_method(struct EXTRACTOR_ExtractContext *ec)
static struct Matches tmap[]
Definition: deb_extractor.c:91
int(* EXTRACTOR_MetaDataProcessor)(void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_len)
Definition: extractor.h:460
@ EXTRACTOR_METAFORMAT_UTF8
Definition: extractor.h:102
#define NULL
Definition: getopt1.c:60
EXTRACTOR_MetaType
Definition: extractor.h:126
@ EXTRACTOR_METATYPE_SECTION
Definition: extractor.h:212
@ EXTRACTOR_METATYPE_PACKAGE_INSTALLED_SIZE
Definition: extractor.h:221
@ EXTRACTOR_METATYPE_TARGET_ARCHITECTURE
Definition: extractor.h:224
@ EXTRACTOR_METATYPE_PACKAGE_PROVIDES
Definition: extractor.h:217
@ EXTRACTOR_METATYPE_PACKAGE_PRE_DEPENDENCY
Definition: extractor.h:225
@ EXTRACTOR_METATYPE_PACKAGE_ESSENTIAL
Definition: extractor.h:223
@ EXTRACTOR_METATYPE_PACKAGE_DEPENDENCY
Definition: extractor.h:214
@ EXTRACTOR_METATYPE_PACKAGE_RECOMMENDS
Definition: extractor.h:218
@ EXTRACTOR_METATYPE_PACKAGE_VERSION
Definition: extractor.h:211
@ EXTRACTOR_METATYPE_UPLOAD_PRIORITY
Definition: extractor.h:213
@ EXTRACTOR_METATYPE_PACKAGE_CONFLICTS
Definition: extractor.h:215
@ EXTRACTOR_METATYPE_PACKAGE_REPLACES
Definition: extractor.h:216
@ EXTRACTOR_METATYPE_PACKAGE_SUGGESTS
Definition: extractor.h:219
@ EXTRACTOR_METATYPE_MIMETYPE
Definition: extractor.h:129
@ EXTRACTOR_METATYPE_PACKAGE_NAME
Definition: extractor.h:210
@ EXTRACTOR_METATYPE_PACKAGE_SOURCE
Definition: extractor.h:222
@ EXTRACTOR_METATYPE_PACKAGE_MAINTAINER
Definition: extractor.h:220
@ EXTRACTOR_METATYPE_DESCRIPTION
Definition: extractor.h:182
enum EXTRACTOR_MetaType type
plaform specifics
int64_t(* seek)(void *cls, int64_t pos, int whence)
Definition: extractor.h:509
uint64_t(* get_size)(void *cls)
Definition: extractor.h:520
EXTRACTOR_MetaDataProcessor proc
Definition: extractor.h:525
ssize_t(* read)(void *cls, void **data, size_t size)
Definition: extractor.h:494
const char * text
Definition: deb_extractor.c:77
enum EXTRACTOR_MetaType type
Definition: deb_extractor.c:82
char lastModTime[12]
char trailer[2]
char name[16]
char userId[6]
char modeInOctal[8]
char groupId[6]
char filesize[10]
char userId[8]
char mode[8]
char chksum[8]
char lastModTime[12]
char filesize[12]
char name[100]
char groupId[8]
char linkName[100]
char uname[32]
char devmajor[8]
char devminor[8]
char gname[32]
char magic[6]
char version[2]
char prefix[155]
struct TarHeader tar