libextractor  1.11
About: GNU libextractor is a library used to extract meta-data from files of arbitrary type.
  Fossies Dox: libextractor-1.11.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

png_extractor.c
Go to the documentation of this file.
1 /*
2  This file is part of libextractor.
3  Copyright (C) 2002, 2003, 2004, 2005, 2009, 2012 Vidyut Samanta and Christian Grothoff
4 
5  libextractor is free software; you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published
7  by the Free Software Foundation; either version 3, or (at your
8  option) any later version.
9 
10  libextractor is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with libextractor; see the file COPYING. If not, write to the
17  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  Boston, MA 02110-1301, USA.
19  */
20 /**
21  * @file plugins/png_extractor.c
22  * @brief plugin to support PNG files
23  * @author Christian Grothoff
24  */
25 #include "platform.h"
26 #include <zlib.h>
27 #include "extractor.h"
28 #include "convert.h"
29 
30 /**
31  * Header that every PNG file must start with.
32  */
33 #define PNG_HEADER "\211PNG\r\n\032\n"
34 
35 
36 /**
37  * Function to create 0-terminated string from the
38  * first n characters of the given input.
39  *
40  * @param str input string
41  * @param n length of the input
42  * @return n-bytes from str followed by 0-termination, NULL on error
43  */
44 static char *
45 stndup (const char *str,
46  size_t n)
47 {
48  char *tmp;
49 
50  if (n + 1 < n)
51  return NULL;
52  if (NULL == (tmp = malloc (n + 1)))
53  return NULL;
54  tmp[n] = '\0';
55  memcpy (tmp, str, n);
56  return tmp;
57 }
58 
59 
60 /**
61  * strnlen is GNU specific, let's redo it here to be
62  * POSIX compliant.
63  *
64  * @param str input string
65  * @param maxlen maximum length of str
66  * @return first position of 0-terminator in str, or maxlen
67  */
68 static size_t
69 stnlen (const char *str,
70  size_t maxlen)
71 {
72  size_t ret;
73 
74  ret = 0;
75  while ( (ret < maxlen) &&
76  ('\0' != str[ret]) )
77  ret++;
78  return ret;
79 }
80 
81 
82 /**
83  * Interpret the 4 bytes in 'buf' as a big-endian
84  * encoded 32-bit integer, convert and return.
85  *
86  * @param pos (unaligned) pointer to 4 byte integer
87  * @return converted integer in host byte order
88  */
89 static uint32_t
90 get_int_at (const void *pos)
91 {
92  uint32_t i;
93 
94  memcpy (&i, pos, sizeof (i));
95  return htonl (i);
96 }
97 
98 
99 /**
100  * Map from PNG meta data descriptor strings
101  * to LE types.
102  */
103 static struct
104 {
105  /**
106  * PNG name.
107  */
108  const char *name;
109 
110  /**
111  * Corresponding LE type.
112  */
114 } tagmap[] = {
115  { "Author", EXTRACTOR_METATYPE_AUTHOR_NAME },
116  { "Description", EXTRACTOR_METATYPE_DESCRIPTION },
117  { "Comment", EXTRACTOR_METATYPE_COMMENT },
118  { "Copyright", EXTRACTOR_METATYPE_COPYRIGHT },
119  { "Source", EXTRACTOR_METATYPE_SOURCE_DEVICE },
120  { "Creation Time", EXTRACTOR_METATYPE_CREATION_DATE },
121  { "Title", EXTRACTOR_METATYPE_TITLE },
123  { "Disclaimer", EXTRACTOR_METATYPE_DISCLAIMER },
124  { "Warning", EXTRACTOR_METATYPE_WARNING },
125  { "Signature", EXTRACTOR_METATYPE_UNKNOWN },
127 };
128 
129 
130 /**
131  * Give the given metadata to LE. Set "ret" to 1 and
132  * goto 'FINISH' if LE says we are done.
133  *
134  * @param t type of the metadata
135  * @param s utf8 string with the metadata
136  */
137 #define ADD(t,s) do { if (0 != (ret = ec->proc (ec->cls, "png", t, \
138  EXTRACTOR_METAFORMAT_UTF8, \
139  "text/plain", s, strlen (s) \
140  + 1))) goto FINISH; \
141 } while (0)
142 
143 
144 /**
145  * Give the given metadata to LE and free the memory. Set "ret" to 1 and
146  * goto 'FINISH' if LE says we are done.
147  *
148  * @param t type of the metadata
149  * @param s utf8 string with the metadata, to be freed afterwards
150  */
151 #define ADDF(t,s) do { if ( (NULL != s) && (0 != (ret = ec->proc (ec->cls, \
152  "png", t, \
153  EXTRACTOR_METAFORMAT_UTF8, \
154  "text/plain", \
155  s, strlen (s) \
156  + 1))) ) { \
157  free (s); goto FINISH; } if (NULL != s) free (s); \
158 } while (0)
159 
160 
161 /**
162  * Process EXt tag.
163  *
164  * @param ec extraction context
165  * @param length length of the tag
166  * @return 0 to continue extracting, 1 if we are done
167  */
168 static int
170  uint32_t length)
171 {
172  void *ptr;
173  unsigned char *data;
174  char *keyword;
175  size_t off;
176  unsigned int i;
177  int ret;
178 
179  if (length != ec->read (ec->cls, &ptr, length))
180  return 1;
181  data = ptr;
182  off = stnlen ((char*) data, length) + 1;
183  if (off >= length)
184  return 0; /* failed to find '\0' */
185  if (NULL == (keyword = EXTRACTOR_common_convert_to_utf8 ((char*) &data[off],
186  length - off,
187  "ISO-8859-1")))
188  return 0;
189  ret = 0;
190  for (i = 0; NULL != tagmap[i].name; i++)
191  if (0 == strcmp (tagmap[i].name, (char*) data))
192  {
193  ADDF (tagmap[i].type, keyword);
194  return 0;
195  }
197 FINISH:
198  return ret;
199 }
200 
201 
202 /**
203  * Process iTXt tag.
204  *
205  * @param ec extraction context
206  * @param length length of the tag
207  * @return 0 to continue extracting, 1 if we are done
208  */
209 static int
211  uint32_t length)
212 {
213  void *ptr;
214  unsigned char *data;
215  size_t pos;
216  char *keyword;
217  const char *language;
218  const char *translated;
219  unsigned int i;
220  int compressed;
221  char *buf;
222  char *lan;
223  uLongf bufLen;
224  int ret;
225  int zret;
226 
227  if (length != ec->read (ec->cls, &ptr, length))
228  return 1;
229  data = ptr;
230  pos = stnlen ((char *) data, length) + 1;
231  if (pos >= length)
232  return 0;
233  compressed = data[pos++];
234  if (compressed && (0 != data[pos++]))
235  return 0; /* bad compression method */
236  if (pos > length)
237  return 0;
238  language = (char *) &data[pos];
239  ret = 0;
240  if ( (stnlen (language, length - pos) > 0) &&
241  (NULL != (lan = stndup (language, length - pos))) )
243  pos += stnlen (language, length - pos) + 1;
244  if (pos + 1 >= length)
245  return 0;
246  translated = (char*) &data[pos]; /* already in utf-8! */
247  if ( (stnlen (translated, length - pos) > 0) &&
248  (NULL != (lan = stndup (translated, length - pos))) )
250  pos += stnlen (translated, length - pos) + 1;
251  if (pos >= length)
252  return 0;
253 
254  if (compressed)
255  {
256  bufLen = 1024 + 2 * (length - pos);
257  while (1)
258  {
259  if (bufLen * 2 < bufLen)
260  return 0;
261  bufLen *= 2;
262  if (bufLen > 50 * (length - pos))
263  {
264  /* printf("zlib problem"); */
265  return 0;
266  }
267  if (NULL == (buf = malloc (bufLen)))
268  {
269  /* printf("out of memory"); */
270  return 0; /* out of memory */
271  }
272  if (Z_OK ==
273  (zret = uncompress ((Bytef *) buf,
274  &bufLen,
275  (const Bytef *) &data[pos], length - pos)))
276  {
277  /* printf("zlib ok"); */
278  break;
279  }
280  free (buf);
281  if (Z_BUF_ERROR != zret)
282  return 0; /* unknown error, abort */
283  }
284  keyword = stndup (buf, bufLen);
285  free (buf);
286  }
287  else
288  {
289  keyword = stndup ((char *) &data[pos], length - pos);
290  }
291  if (NULL == keyword)
292  return ret;
293  for (i = 0; NULL != tagmap[i].name; i++)
294  if (0 == strcmp (tagmap[i].name, (char*) data))
295  {
296  ADDF (tagmap[i].type, keyword /* already in utf8 */);
297  return 0;
298  }
299  ADDF (EXTRACTOR_METATYPE_COMMENT, keyword);
300 FINISH:
301  return ret;
302 }
303 
304 
305 /**
306  * Process IHDR tag.
307  *
308  * @param ec extraction context
309  * @param length length of the tag
310  * @return 0 to continue extracting, 1 if we are done
311  */
312 static int
314  uint32_t length)
315 {
316  void *ptr;
317  unsigned char *data;
318  char tmp[128];
319  int ret;
320 
321  if (length < 12)
322  return 0;
323  if (length != ec->read (ec->cls, &ptr, length))
324  return 1;
325  data = ptr;
326  ret = 0;
327  snprintf (tmp,
328  sizeof (tmp),
329  "%ux%u",
330  get_int_at (data), get_int_at (&data[4]));
332 FINISH:
333  return ret;
334 }
335 
336 
337 /**
338  * Process zTXt tag.
339  *
340  * @param ec extraction context
341  * @param length length of the tag
342  * @return 0 to continue extracting, 1 if we are done
343  */
344 static int
346  uint32_t length)
347 {
348  void *ptr;
349  unsigned char *data;
350  char *keyword;
351  size_t off;
352  unsigned int i;
353  char *buf;
354  uLongf bufLen;
355  int zret;
356  int ret;
357 
358  if (length != ec->read (ec->cls, &ptr, length))
359  return 1;
360  data = ptr;
361  off = stnlen ((char *) data, length) + 1;
362  if (off >= length)
363  return 0; /* failed to find '\0' */
364  if (0 != data[off])
365  return 0; /* compression method must be 0 */
366  off++;
367  ret = 0;
368  bufLen = 1024 + 2 * (length - off);
369  while (1)
370  {
371  if (bufLen * 2 < bufLen)
372  return 0;
373  bufLen *= 2;
374  if (bufLen > 50 * (length - off))
375  {
376  /* printf("zlib problem"); */
377  return 0;
378  }
379  if (NULL == (buf = malloc (bufLen)))
380  {
381  /* printf("out of memory"); */
382  return 0; /* out of memory */
383  }
384  if (Z_OK ==
385  (zret = uncompress ((Bytef *) buf,
386  &bufLen,
387  (const Bytef *) &data[off],
388  length - off)))
389  {
390  /* printf("zlib ok"); */
391  break;
392  }
393  free (buf);
394  if (Z_BUF_ERROR != zret)
395  return 0; /* unknown error, abort */
396  }
397  keyword = EXTRACTOR_common_convert_to_utf8 (buf,
398  bufLen,
399  "ISO-8859-1");
400  free (buf);
401  for (i = 0; NULL != tagmap[i].name; i++)
402  if (0 == strcmp (tagmap[i].name, (char*) data))
403  {
404  ADDF (tagmap[i].type, keyword);
405  return 0;
406  }
407  ADDF (EXTRACTOR_METATYPE_COMMENT, keyword);
408 FINISH:
409  return ret;
410 }
411 
412 
413 /**
414  * Process IME tag.
415  *
416  * @param ec extraction context
417  * @param length length of the tag
418  * @return 0 to continue extracting, 1 if we are done
419  */
420 static int
422  uint32_t length)
423 {
424  void *ptr;
425  unsigned char *data;
426  unsigned short y;
427  unsigned int year;
428  unsigned int mo;
429  unsigned int day;
430  unsigned int h;
431  unsigned int m;
432  unsigned int s;
433  char val[256];
434  int ret;
435 
436  if (length != 7)
437  return 0;
438  if (length != ec->read (ec->cls, &ptr, length))
439  return 1;
440  data = ptr;
441  ret = 0;
442  memcpy (&y, data, sizeof (uint16_t));
443  year = ntohs (y);
444  mo = (unsigned char) data[6];
445  day = (unsigned char) data[7];
446  h = (unsigned char) data[8];
447  m = (unsigned char) data[9];
448  s = (unsigned char) data[10];
449  snprintf (val,
450  sizeof (val),
451  "%04u-%02u-%02u %02d:%02d:%02d",
452  year, mo, day, h, m, s);
454 FINISH:
455  return ret;
456 }
457 
458 
459 /**
460  * Main entry method for the 'image/png' extraction plugin.
461  *
462  * @param ec extraction context provided to the plugin
463  */
464 void
466 {
467  void *data;
468  uint32_t length;
469  int64_t pos;
470  int ret;
471  ssize_t len;
472 
473  len = strlen (PNG_HEADER);
474  if (len != ec->read (ec->cls, &data, len))
475  return;
476  if (0 != strncmp ((const char*) data, PNG_HEADER, len))
477  return;
478  ADD (EXTRACTOR_METATYPE_MIMETYPE, "image/png");
479  ret = 0;
480  while (0 == ret)
481  {
482  if (sizeof (uint32_t) + 4 != ec->read (ec->cls,
483  &data,
484  sizeof (uint32_t) + 4))
485  break;
486  length = get_int_at (data);
487  if (0 > (pos = ec->seek (ec->cls, 0, SEEK_CUR)))
488  break;
489  pos += length + 4; /* Chunk type, data, crc */
490  if (0 == strncmp ((char*) data + sizeof (uint32_t), "IHDR", 4))
491  ret = processIHDR (ec, length);
492  if (0 == strncmp ((char*) data + sizeof (uint32_t), "iTXt", 4))
493  ret = processiTXt (ec, length);
494  if (0 == strncmp ((char*) data + sizeof (uint32_t), "tEXt", 4))
495  ret = processtEXt (ec, length);
496  if (0 == strncmp ((char*) data + sizeof (uint32_t), "zTXt", 4))
497  ret = processzTXt (ec, length);
498  if (0 == strncmp ((char*) data + sizeof (uint32_t), "tIME", 4))
499  ret = processtIME (ec, length);
500  if (ret != 0)
501  break;
502  if (pos != ec->seek (ec->cls, pos, SEEK_SET))
503  break;
504  }
505 FINISH:
506  return;
507 }
508 
509 
510 /* end of png_extractor.c */
char * EXTRACTOR_common_convert_to_utf8(const char *input, size_t len, const char *charset)
Definition: convert.c:39
#define NULL
Definition: getopt1.c:60
EXTRACTOR_MetaType
Definition: extractor.h:126
@ EXTRACTOR_METATYPE_PRODUCED_BY_SOFTWARE
Definition: extractor.h:258
@ EXTRACTOR_METATYPE_WARNING
Definition: extractor.h:294
@ EXTRACTOR_METATYPE_UNKNOWN
Definition: extractor.h:181
@ EXTRACTOR_METATYPE_AUTHOR_NAME
Definition: extractor.h:143
@ EXTRACTOR_METATYPE_LANGUAGE
Definition: extractor.h:157
@ EXTRACTOR_METATYPE_COMMENT
Definition: extractor.h:131
@ EXTRACTOR_METATYPE_TITLE
Definition: extractor.h:134
@ EXTRACTOR_METATYPE_CREATION_DATE
Definition: extractor.h:196
@ EXTRACTOR_METATYPE_KEYWORDS
Definition: extractor.h:185
@ EXTRACTOR_METATYPE_MODIFICATION_DATE
Definition: extractor.h:197
@ EXTRACTOR_METATYPE_COPYRIGHT
Definition: extractor.h:183
@ EXTRACTOR_METATYPE_SOURCE_DEVICE
Definition: extractor.h:292
@ EXTRACTOR_METATYPE_MIMETYPE
Definition: extractor.h:129
@ EXTRACTOR_METATYPE_DISCLAIMER
Definition: extractor.h:293
@ EXTRACTOR_METATYPE_RESERVED
Definition: extractor.h:128
@ EXTRACTOR_METATYPE_IMAGE_DIMENSIONS
Definition: extractor.h:257
@ EXTRACTOR_METATYPE_DESCRIPTION
Definition: extractor.h:182
plaform specifics
static uint32_t get_int_at(const void *pos)
Definition: png_extractor.c:90
static int processzTXt(struct EXTRACTOR_ExtractContext *ec, uint32_t length)
static int processIHDR(struct EXTRACTOR_ExtractContext *ec, uint32_t length)
static int processiTXt(struct EXTRACTOR_ExtractContext *ec, uint32_t length)
static int processtIME(struct EXTRACTOR_ExtractContext *ec, uint32_t length)
static size_t stnlen(const char *str, size_t maxlen)
Definition: png_extractor.c:69
#define ADDF(t, s)
static char * stndup(const char *str, size_t n)
Definition: png_extractor.c:45
void EXTRACTOR_png_extract_method(struct EXTRACTOR_ExtractContext *ec)
const char * name
static int processtEXt(struct EXTRACTOR_ExtractContext *ec, uint32_t length)
static struct @2 tagmap[]
enum EXTRACTOR_MetaType type
#define ADD(t, s)
#define PNG_HEADER
Definition: png_extractor.c:33
int64_t(* seek)(void *cls, int64_t pos, int whence)
Definition: extractor.h:509
ssize_t(* read)(void *cls, void **data, size_t size)
Definition: extractor.h:494