libextractor  1.11
About: GNU libextractor is a library used to extract meta-data from files of arbitrary type.
  Fossies Dox: libextractor-1.11.tar.gz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

extract.c
Go to the documentation of this file.
1 /*
2  This file is part of libextractor.
3  Copyright (C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christian Grothoff
4 
5  libextractor is free software; you can redistribute it and/or modify
6  it under the terms of the GNU General Public License as published
7  by the Free Software Foundation; either version 3, or (at your
8  option) any later version.
9 
10  libextractor is distributed in the hope that it will be useful, but
11  WITHOUT ANY WARRANTY; without even the implied warranty of
12  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13  General Public License for more details.
14 
15  You should have received a copy of the GNU General Public License
16  along with libextractor; see the file COPYING. If not, write to the
17  Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18  Boston, MA 02110-1301, USA.
19 */
20 /**
21  * @file main/extract.c
22  * @brief command-line tool to run GNU libextractor
23  * @author Christian Grothoff
24  */
25 #include "platform.h"
26 #include "extractor.h"
27 #include "getopt.h"
28 #include <signal.h>
29 
30 #define YES 1
31 #define NO 0
32 
33 
34 /**
35  * Which keyword types should we print?
36  */
37 static int *print;
38 
39 /**
40  * How verbose are we supposed to be?
41  */
42 static int verbose;
43 
44 /**
45  * Run plugins in-process.
46  */
47 static int in_process;
48 
49 /**
50  * Read file contents into memory, then feed them to extractor.
51  */
52 static int from_memory;
53 
54 #ifndef WINDOWS
55 /**
56  * Install a signal handler to ignore SIGPIPE.
57  */
58 static void
60 {
61  struct sigaction oldsig;
62  struct sigaction sig;
63 
64  memset (&sig, 0, sizeof (struct sigaction));
65  sig.sa_handler = SIG_IGN;
66  sigemptyset (&sig.sa_mask);
67 #ifdef SA_INTERRUPT
68  sig.sa_flags = SA_INTERRUPT; /* SunOS */
69 #else
70  sig.sa_flags = SA_RESTART;
71 #endif
72  if (0 != sigaction (SIGPIPE, &sig, &oldsig))
73  fprintf (stderr,
74  "Failed to install SIGPIPE handler: %s\n", strerror (errno));
75 }
76 
77 
78 #endif
79 
80 
81 /**
82  * Information about command-line options.
83  */
84 struct Help
85 {
86  /**
87  * Single-character option name, '\0' for none.
88  */
89  char shortArg;
90 
91  /**
92  * Long name of the option.
93  */
94  const char *longArg;
95 
96  /**
97  * Name of the mandatory argument, NULL for no argument.
98  */
99  const char *mandatoryArg;
100 
101  /**
102  * Help text for the option.
103  */
104  const char *description;
105 };
106 
107 
108 /**
109  * Indentation for descriptions.
110  */
111 #define BORDER 29
112 
113 
114 /**
115  * Display help text (--help).
116  *
117  * @param general binary name
118  * @param description program description
119  * @param opt program options (NULL-terminated array)
120  */
121 static void
122 format_help (const char *general,
123  const char *description,
124  const struct Help *opt)
125 {
126  size_t slen;
127  unsigned int i;
128  ssize_t j;
129  size_t ml;
130  size_t p;
131  char scp[80];
132  const char *trans;
133 
134  printf (_ ("Usage: %s\n%s\n\n"),
135  gettext (general),
136  gettext (description));
137  printf (_ (
138  "Arguments mandatory for long options are also mandatory for short options.\n"));
139  slen = 0;
140  i = 0;
141  while (NULL != opt[i].description)
142  {
143  if (0 == opt[i].shortArg)
144  printf (" ");
145  else
146  printf (" -%c, ",
147  opt[i].shortArg);
148  printf ("--%s",
149  opt[i].longArg);
150  slen = 8 + strlen (opt[i].longArg);
151  if (NULL != opt[i].mandatoryArg)
152  {
153  printf ("=%s",
154  opt[i].mandatoryArg);
155  slen += 1 + strlen (opt[i].mandatoryArg);
156  }
157  if (slen > BORDER)
158  {
159  printf ("\n%*s", BORDER, "");
160  slen = BORDER;
161  }
162  if (slen < BORDER)
163  {
164  printf ("%*s", (int) (BORDER - slen), "");
165  slen = BORDER;
166  }
167  trans = gettext (opt[i].description);
168  ml = strlen (trans);
169  p = 0;
170 OUTER:
171  while (ml - p > 78 - slen)
172  {
173  for (j = p + 78 - slen; j>p; j--)
174  {
175  if (isspace ( (unsigned char) trans[j]))
176  {
177  memcpy (scp,
178  &trans[p],
179  j - p);
180  scp[j - p] = '\0';
181  printf ("%s\n%*s",
182  scp,
183  BORDER + 2,
184  "");
185  p = j + 1;
186  slen = BORDER + 2;
187  goto OUTER;
188  }
189  }
190  /* could not find space to break line */
191  memcpy (scp,
192  &trans[p],
193  78 - slen);
194  scp[78 - slen] = '\0';
195  printf ("%s\n%*s",
196  scp,
197  BORDER + 2,
198  "");
199  slen = BORDER + 2;
200  p = p + 78 - slen;
201  }
202  /* print rest */
203  if (p < ml)
204  printf ("%s\n",
205  &trans[p]);
206  i++;
207  }
208 }
209 
210 
211 /**
212  * Run --help.
213  */
214 static void
216 {
217  static struct Help help[] = {
218  { 'b', "bibtex", NULL,
219  gettext_noop ("print output in bibtex format") },
220  { 'g', "grep-friendly", NULL,
221  gettext_noop (
222  "produce grep-friendly output (all results on one line per file)") },
223  { 'h', "help", NULL,
224  gettext_noop ("print this help") },
225  { 'i', "in-process", NULL,
226  gettext_noop ("run plugins in-process (simplifies debugging)") },
227  { 'm', "from-memory", NULL,
228  gettext_noop (
229  "read data from file into memory and extract from memory") },
230  { 'l', "library", "LIBRARY",
231  gettext_noop ("load an extractor plugin named LIBRARY") },
232  { 'L', "list", NULL,
233  gettext_noop ("list all keyword types") },
234  { 'n', "nodefault", NULL,
235  gettext_noop ("do not use the default set of extractor plugins") },
236  { 'p', "print", "TYPE",
237  gettext_noop (
238  "print only keywords of the given TYPE (use -L to get a list)") },
239  { 'v', "version", NULL,
240  gettext_noop ("print the version number") },
241  { 'V', "verbose", NULL,
242  gettext_noop ("be verbose") },
243  { 'x', "exclude", "TYPE",
244  gettext_noop ("do not print keywords of the given TYPE") },
245  { 0, NULL, NULL, NULL },
246  };
247  format_help (_ ("extract [OPTIONS] [FILENAME]*"),
248  _ ("Extract metadata from files."),
249  help);
250 
251 }
252 
253 
254 #if HAVE_ICONV
255 #include "iconv.c"
256 #endif
257 
258 /**
259  * Print a keyword list to a file.
260  *
261  * @param cls closure, not used
262  * @param plugin_name name of the plugin that produced this value;
263  * special values can be used (i.e. '<zlib>' for zlib being
264  * used in the main libextractor library and yielding
265  * meta data).
266  * @param type libextractor-type describing the meta data
267  * @param format basic format information about data
268  * @param data_mime_type mime-type of data (not of the original file);
269  * can be NULL (if mime-type is not known)
270  * @param data actual meta-data found
271  * @param data_len number of bytes in data
272  * @return 0 to continue extracting, 1 to abort
273  */
274 static int
276  const char *plugin_name,
278  enum EXTRACTOR_MetaFormat format,
279  const char *data_mime_type,
280  const char *data,
281  size_t data_len)
282 {
283  char *keyword;
284 #if HAVE_ICONV
285  iconv_t cd;
286 #endif
287  const char *stype;
288  const char *mt;
289 
290  if (YES != print[type])
291  return 0;
292  if (verbose > 3)
293  fprintf (stdout,
294  _ ("Found by `%s' plugin:\n"),
295  plugin_name);
297  stype = (NULL == mt) ? _ ("unknown") : gettext (mt);
298  switch (format)
299  {
301  fprintf (stdout,
302  _ ("%s - (unknown, %u bytes)\n"),
303  stype,
304  (unsigned int) data_len);
305  break;
307  if (0 == data_len)
308  break;
309 #if HAVE_ICONV
310  cd = iconv_open (nl_langinfo (CODESET), "UTF-8");
311  if (((iconv_t) -1) != cd)
312  keyword = iconv_helper (cd,
313  data,
314  data_len);
315  else
316 #endif
317  keyword = strdup (data);
318  if (NULL != keyword)
319  {
320  fprintf (stdout,
321  "%s - %s\n",
322  stype,
323  keyword);
324  free (keyword);
325  }
326 #if HAVE_ICONV
327  if (((iconv_t) -1) != cd)
328  iconv_close (cd);
329 #endif
330  break;
332  fprintf (stdout,
333  _ ("%s - (binary, %u bytes)\n"),
334  stype,
335  (unsigned int) data_len);
336  break;
338  fprintf (stdout,
339  "%s - %.*s\n",
340  stype,
341  (int) data_len,
342  data);
343  break;
344  default:
345  break;
346  }
347  return 0;
348 }
349 
350 
351 /**
352  * Print a keyword list to a file without new lines.
353  *
354  * @param cls closure, not used
355  * @param plugin_name name of the plugin that produced this value;
356  * special values can be used (i.e. '<zlib>' for zlib being
357  * used in the main libextractor library and yielding
358  * meta data).
359  * @param type libextractor-type describing the meta data
360  * @param format basic format information about data
361  * @param data_mime_type mime-type of data (not of the original file);
362  * can be NULL (if mime-type is not known)
363  * @param data actual meta-data found
364  * @param data_len number of bytes in data
365  * @return 0 to continue extracting, 1 to abort
366  */
367 static int
369  const char *plugin_name,
371  enum EXTRACTOR_MetaFormat format,
372  const char *data_mime_type,
373  const char *data,
374  size_t data_len)
375 {
376  char *keyword;
377 #if HAVE_ICONV
378  iconv_t cd;
379 #endif
380  const char *mt;
381 
382  if (YES != print[type])
383  return 0;
385  if (NULL == mt)
386  mt = gettext_noop ("unknown");
387  switch (format)
388  {
390  break;
392  if (0 == data_len)
393  return 0;
394  if (verbose > 1)
395  fprintf (stdout,
396  "%s: ",
397  gettext (mt));
398 #if HAVE_ICONV
399  cd = iconv_open (nl_langinfo (CODESET), "UTF-8");
400  if (((iconv_t) -1) != cd)
401  keyword = iconv_helper (cd,
402  data,
403  data_len);
404  else
405 #endif
406  keyword = strdup (data);
407  if (NULL != keyword)
408  {
409  fprintf (stdout,
410  "`%s' ",
411  keyword);
412  free (keyword);
413  }
414 #if HAVE_ICONV
415  if (((iconv_t) -1) != cd)
416  iconv_close (cd);
417 #endif
418  break;
420  break;
422  if (verbose > 1)
423  fprintf (stdout,
424  "%s ",
425  gettext (mt));
426  fprintf (stdout,
427  "`%s'",
428  data);
429  break;
430  default:
431  break;
432  }
433  return 0;
434 }
435 
436 
437 /**
438  * Entry in the map we construct for each file.
439  */
440 struct BibTexMap
441 {
442  /**
443  * Name in bibTeX
444  */
445  const char *bibTexName;
446 
447  /**
448  * Meta type for the value.
449  */
451 
452  /**
453  * The value itself.
454  */
455  char *value;
456 };
457 
458 
459 /**
460  * Type of the entry for bibtex.
461  */
462 static char *entry_type;
463 
464 /**
465  * Mapping between bibTeX strings, libextractor
466  * meta data types and values for the current document.
467  */
468 static struct BibTexMap btm[] = {
469  { "title", EXTRACTOR_METATYPE_TITLE, NULL},
471  { "author", EXTRACTOR_METATYPE_AUTHOR_NAME, NULL },
473  { "edition", EXTRACTOR_METATYPE_BOOK_EDITION, NULL},
475  { "journal", EXTRACTOR_METATYPE_JOURNAL_NAME, NULL},
478  { "pages", EXTRACTOR_METATYPE_PAGE_COUNT, NULL },
479  { "pages", EXTRACTOR_METATYPE_PAGE_RANGE, NULL },
481  { "publisher", EXTRACTOR_METATYPE_PUBLISHER, NULL },
486  { "url", EXTRACTOR_METATYPE_URL, NULL},
487  { "note", EXTRACTOR_METATYPE_COMMENT, NULL},
488  { "eprint", EXTRACTOR_METATYPE_BIBTEX_EPRINT, NULL },
490  { NULL, 0, NULL }
491 };
492 
493 
494 /**
495  * Clean up the bibtex processor in preparation for the next round.
496  */
497 static void
499 {
500  unsigned int i;
501 
502  for (i = 0; NULL != btm[i].bibTexName; i++)
503  {
504  free (btm[i].value);
505  btm[i].value = NULL;
506  }
507  free (entry_type);
508  entry_type = NULL;
509 }
510 
511 
512 /**
513  * Callback function for printing meta data in bibtex format.
514  *
515  * @param cls closure, not used
516  * @param plugin_name name of the plugin that produced this value;
517  * special values can be used (i.e. '<zlib>' for zlib being
518  * used in the main libextractor library and yielding
519  * meta data).
520  * @param type libextractor-type describing the meta data
521  * @param format basic format information about data
522  * @param data_mime_type mime-type of data (not of the original file);
523  * can be NULL (if mime-type is not known)
524  * @param data actual meta-data found
525  * @param data_len number of bytes in data
526  * @return 0 to continue extracting (always)
527  */
528 static int
529 print_bibtex (void *cls,
530  const char *plugin_name,
532  enum EXTRACTOR_MetaFormat format,
533  const char *data_mime_type,
534  const char *data,
535  size_t data_len)
536 {
537  unsigned int i;
538 
539  if (0 == data_len)
540  return 0;
541  if (YES != print[type])
542  return 0;
543  if (EXTRACTOR_METAFORMAT_UTF8 != format)
544  return 0;
546  {
547  entry_type = strdup (data);
548  return 0;
549  }
550  for (i = 0; NULL != btm[i].bibTexName; i++)
551  if ( (NULL == btm[i].value) &&
552  (btm[i].le_type == type) )
553  btm[i].value = strdup (data);
554  return 0;
555 }
556 
557 
558 /**
559  * Print the computed bibTeX entry.
560  *
561  * @param fn file for which the entry was created.
562  */
563 static void
564 finish_bibtex (const char *fn)
565 {
566  unsigned int i;
567  ssize_t n;
568  const char *et;
569  char temp[20];
570 
571  if (NULL != entry_type)
572  et = entry_type;
573  else
574  et = "misc";
575  if ( (NULL == btm[0].value) ||
576  (NULL == btm[1].value) ||
577  (NULL == btm[2].value) )
578  fprintf (stdout,
579  "@%s %s { ",
580  et,
581  fn);
582  else
583  {
584  snprintf (temp,
585  sizeof (temp),
586  "%.5s%.5s%.5s",
587  btm[2].value,
588  btm[1].value,
589  btm[0].value);
590  for (n = strlen (temp) - 1; n>=0; n--)
591  if (! isalnum ( (unsigned char) temp[n]) )
592  temp[n] = '_';
593  else
594  temp[n] = tolower ( (unsigned char) temp[n]);
595  fprintf (stdout,
596  "@%s %s { ",
597  et,
598  temp);
599  }
600  for (i = 0; NULL != btm[i].bibTexName; i++)
601  if (NULL != btm[i].value)
602  fprintf (stdout,
603  "\t%s = {%s},\n",
604  btm[i].bibTexName,
605  btm[i].value);
606  fprintf (stdout, "%s", "}\n\n");
607 }
608 
609 
610 #ifdef WINDOWS
611 static int
612 _wchar_to_str (const wchar_t *wstr, char **retstr, UINT cp)
613 {
614  char *str;
615  int len, lenc;
616  BOOL lossy = FALSE;
617  DWORD error;
618 
619  SetLastError (0);
620  len = WideCharToMultiByte (cp, 0, wstr, -1, NULL, 0, NULL, (cp == CP_UTF8 ||
621  cp == CP_UTF7) ?
622  NULL : &lossy);
623  error = GetLastError ();
624  if (len <= 0)
625  return -1;
626 
627  str = malloc (sizeof (char) * len);
628 
629  SetLastError (0);
630  lenc = WideCharToMultiByte (cp, 0, wstr, -1, str, len, NULL, (cp == CP_UTF8 ||
631  cp == CP_UTF7) ?
632  NULL : &lossy);
633  error = GetLastError ();
634  if (lenc != len)
635  {
636  free (str);
637  return -3;
638  }
639  *retstr = str;
640  if (lossy)
641  return 1;
642  return 0;
643 }
644 
645 
646 #endif
647 
648 
649 /**
650  * Makes a copy of argv that consists of a single memory chunk that can be
651  * freed with a single call to free ();
652  */
653 static char **
654 _make_continuous_arg_copy (int argc, char *const *argv)
655 {
656  size_t argvsize = 0;
657  int i;
658  char **new_argv;
659  char *p;
660  for (i = 0; i < argc; i++)
661  argvsize += strlen (argv[i]) + 1 + sizeof (char *);
662  new_argv = malloc (argvsize + sizeof (char *));
663  if (NULL == new_argv)
664  return NULL;
665  p = (char *) &new_argv[argc + 1];
666  for (i = 0; i < argc; i++)
667  {
668  new_argv[i] = p;
669  strcpy (p, argv[i]);
670  p += strlen (argv[i]) + 1;
671  }
672  new_argv[argc] = NULL;
673  return (char **) new_argv;
674 }
675 
676 
677 /**
678  * Returns utf-8 encoded arguments.
679  * Returned argv has u8argv[u8argc] == NULL.
680  * Returned argv is a single memory block, and can be freed with a single
681  * free () call.
682  *
683  * @param argc argc (as given by main())
684  * @param argv argv (as given by main())
685  * @param u8argc a location to store new argc in (though it's th same as argc)
686  * @param u8argv a location to store new argv in
687  * @return 0 on success, -1 on failure
688  */
689 static int
690 _get_utf8_args (int argc, char *const *argv, int *u8argc, char ***u8argv)
691 {
692 #ifdef WINDOWS
693  wchar_t *wcmd;
694  wchar_t **wargv;
695  int wargc;
696  int i;
697  char **split_u8argv;
698 
699  wcmd = GetCommandLineW ();
700  if (NULL == wcmd)
701  return -1;
702  wargv = CommandLineToArgvW (wcmd, &wargc);
703  if (NULL == wargv)
704  return -1;
705 
706  split_u8argv = malloc (wargc * sizeof (char *));
707 
708  for (i = 0; i < wargc; i++)
709  {
710  if (_wchar_to_str (wargv[i], &split_u8argv[i], CP_UTF8) != 0)
711  {
712  int j;
713  int e = errno;
714  for (j = 0; j < i; j++)
715  free (split_u8argv[j]);
716  free (split_u8argv);
717  LocalFree (wargv);
718  errno = e;
719  return -1;
720  }
721  }
722 
723  *u8argv = _make_continuous_arg_copy (wargc, split_u8argv);
724  if (NULL == *u8argv)
725  {
726  free (split_u8argv);
727  return -1;
728  }
729  *u8argc = wargc;
730 
731  for (i = 0; i < wargc; i++)
732  free (split_u8argv[i]);
733  free (split_u8argv);
734 #else
735  *u8argv = _make_continuous_arg_copy (argc, argv);
736  if (NULL == *u8argv)
737  return -1;
738  *u8argc = argc;
739 #endif
740  return 0;
741 }
742 
743 
744 /**
745  * Main function for the 'extract' tool. Invoke with a list of
746  * filenames to extract keywords from.
747  *
748  * @param argc number of arguments in argv
749  * @param argv command line options and filename to run on
750  * @return 0 on success
751  */
752 int
753 main (int argc, char *argv[])
754 {
755  unsigned int i;
756  struct EXTRACTOR_PluginList *plugins;
757  int option_index;
758  int c;
759  char *libraries = NULL;
760  int nodefault = NO;
761  int defaultAll = YES;
762  int bibtex = NO;
763  int grepfriendly = NO;
764  int ret = 0;
765  EXTRACTOR_MetaDataProcessor processor = NULL;
766  char **utf8_argv;
767  int utf8_argc;
768 
769 #if ENABLE_NLS
770  setlocale (LC_ALL, "");
771  textdomain (PACKAGE);
772 #endif
773 #ifndef WINDOWS
774  ignore_sigpipe ();
775 #endif
776  if (NULL == (print = malloc (sizeof (int) * EXTRACTOR_metatype_get_max ())))
777  {
778  fprintf (stderr,
779  "malloc failed: %s\n",
780  strerror (errno));
781  return 1;
782  }
783  for (i = 0; i < EXTRACTOR_metatype_get_max (); i++)
784  print[i] = YES; /* default: print everything */
785 
786  if (0 != _get_utf8_args (argc, argv, &utf8_argc, &utf8_argv))
787  {
788  fprintf (stderr, "Failed to get arguments: %s\n", strerror (errno));
789  return 1;
790  }
791 
792  while (1)
793  {
794  static struct option long_options[] = {
795  {"bibtex", 0, 0, 'b'},
796  {"grep-friendly", 0, 0, 'g'},
797  {"help", 0, 0, 'h'},
798  {"in-process", 0, 0, 'i'},
799  {"from-memory", 0, 0, 'm'},
800  {"list", 0, 0, 'L'},
801  {"library", 1, 0, 'l'},
802  {"nodefault", 0, 0, 'n'},
803  {"print", 1, 0, 'p'},
804  {"verbose", 0, 0, 'V'},
805  {"version", 0, 0, 'v'},
806  {"exclude", 1, 0, 'x'},
807  {0, 0, 0, 0}
808  };
809  option_index = 0;
810  c = getopt_long (utf8_argc,
811  utf8_argv,
812  "abghiml:Lnp:vVx:",
813  long_options,
814  &option_index);
815 
816  if (c == -1)
817  break; /* No more flags to process */
818  switch (c)
819  {
820  case 'b':
821  bibtex = YES;
822  if (NULL != processor)
823  {
824  fprintf (stderr,
825  "%s",
826  _ (
827  "Illegal combination of options, cannot combine multiple styles of printing.\n"));
828  free (utf8_argv);
829  return 0;
830  }
831  processor = &print_bibtex;
832  break;
833  case 'g':
834  grepfriendly = YES;
835  if (NULL != processor)
836  {
837  fprintf (stderr,
838  "%s",
839  _ (
840  "Illegal combination of options, cannot combine multiple styles of printing.\n"));
841  free (utf8_argv);
842  return 0;
843  }
845  break;
846  case 'h':
847  print_help ();
848  free (utf8_argv);
849  return 0;
850  case 'i':
851  in_process = YES;
852  break;
853  case 'm':
854  from_memory = YES;
855  break;
856  case 'l':
857  libraries = optarg;
858  break;
859  case 'L':
860  i = 0;
861  while (NULL != EXTRACTOR_metatype_to_string (i))
862  printf ("%s\n",
864  free (utf8_argv);
865  return 0;
866  case 'n':
867  nodefault = YES;
868  break;
869  case 'p':
870  if (NULL == optarg)
871  {
872  fprintf (stderr,
873  _ (
874  "You must specify an argument for the `%s' option (option ignored).\n"),
875  "-p");
876  break;
877  }
878  if (YES == defaultAll)
879  {
880  defaultAll = NO;
881  i = 0;
882  while (NULL != EXTRACTOR_metatype_to_string (i))
883  print[i++] = NO;
884  }
885  i = 0;
886  while (NULL != EXTRACTOR_metatype_to_string (i))
887  {
888  if ( (0 == strcmp (optarg,
890  (0 == strcmp (optarg,
892 
893  {
894  print[i] = YES;
895  break;
896  }
897  i++;
898  }
900  {
901  fprintf (stderr,
902  "Unknown keyword type `%s', use option `%s' to get a list.\n",
903  optarg,
904  "-L");
905  free (utf8_argv);
906  return -1;
907  }
908  break;
909  case 'v':
910  printf ("extract v%s\n", PACKAGE_VERSION);
911  free (utf8_argv);
912  return 0;
913  case 'V':
914  verbose++;
915  break;
916  case 'x':
917  i = 0;
918  while (NULL != EXTRACTOR_metatype_to_string (i))
919  {
920  if ( (0 == strcmp (optarg,
922  (0 == strcmp (optarg,
924  {
925  print[i] = NO;
926  break;
927  }
928  i++;
929  }
931  {
932  fprintf (stderr,
933  "Unknown keyword type `%s', use option `%s' to get a list.\n",
934  optarg,
935  "-L");
936  free (utf8_argv);
937  return -1;
938  }
939  break;
940  default:
941  fprintf (stderr,
942  "%s",
943  _ ("Use --help to get a list of options.\n"));
944  free (utf8_argv);
945  return -1;
946  } /* end of parsing commandline */
947  } /* while (1) */
948  if (optind < 0)
949  {
950  fprintf (stderr,
951  "%s", "Unknown error parsing options\n");
952  free (print);
953  free (utf8_argv);
954  return -1;
955  }
956  if (utf8_argc - optind < 1)
957  {
958  fprintf (stderr,
959  "%s", "Invoke with list of filenames to extract keywords form!\n");
960  free (print);
961  free (utf8_argv);
962  return -1;
963  }
964 
965  /* build list of libraries */
966  if (NO == nodefault)
970  else
971  plugins = NULL;
972  if (NULL != libraries)
973  plugins = EXTRACTOR_plugin_add_config (plugins,
974  libraries,
975  in_process
978  if (NULL == processor)
979  processor = &print_selected_keywords;
980 
981  /* extract keywords */
982  if (YES == bibtex)
983  fprintf (stdout,
984  "%s", _ ("% BiBTeX file\n"));
985  for (i = optind; i < utf8_argc; i++)
986  {
987  errno = 0;
988  if (YES == grepfriendly)
989  fprintf (stdout, "%s ", utf8_argv[i]);
990  else if (NO == bibtex)
991  fprintf (stdout,
992  _ ("Keywords for file %s:\n"),
993  utf8_argv[i]);
994  else
995  cleanup_bibtex ();
996  if (NO == from_memory)
997  EXTRACTOR_extract (plugins,
998  utf8_argv[i],
999  NULL, 0,
1000  processor,
1001  NULL);
1002  else
1003  {
1004  struct stat sb;
1005  unsigned char *data = NULL;
1006  int f = open (utf8_argv[i], O_RDONLY
1007 #if WINDOWS
1008  | O_BINARY
1009 #endif
1010  );
1011  if ( (-1 != f) &&
1012  (0 == fstat (f, &sb)) &&
1013  (NULL != (data = malloc ((size_t) sb.st_size))) &&
1014  (sb.st_size == read (f, data, (size_t) sb.st_size) ) )
1015  {
1016  EXTRACTOR_extract (plugins,
1017  NULL,
1018  data, sb.st_size,
1019  processor,
1020  NULL);
1021  }
1022  else
1023  {
1024  if (verbose > 0)
1025  fprintf (stderr,
1026  "%s: %s: %s\n",
1027  utf8_argv[0], utf8_argv[i], strerror (errno));
1028  ret = 1;
1029  }
1030  if (NULL != data)
1031  free (data);
1032  if (-1 != f)
1033  (void) close (f);
1034  }
1035  if (YES == grepfriendly)
1036  fprintf (stdout, "%s", "\n");
1037  continue;
1038  }
1039  if (YES == grepfriendly)
1040  fprintf (stdout, "%s", "\n");
1041  if (bibtex)
1042  finish_bibtex (utf8_argv[i]);
1043  if (verbose > 0)
1044  fprintf (stdout, "%s", "\n");
1045  free (print);
1046  free (utf8_argv);
1047  EXTRACTOR_plugin_remove_all (plugins);
1048  plugins = NULL;
1049  cleanup_bibtex (); /* actually free's stuff */
1050  return ret;
1051 }
1052 
1053 
1054 /* end of extract.c */
static int verbose
Definition: extract.c:42
int main(int argc, char *argv[])
Definition: extract.c:753
static struct BibTexMap btm[]
Definition: extract.c:468
static void finish_bibtex(const char *fn)
Definition: extract.c:564
static int print_selected_keywords(void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_len)
Definition: extract.c:275
static void format_help(const char *general, const char *description, const struct Help *opt)
Definition: extract.c:122
static int print_bibtex(void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_len)
Definition: extract.c:529
static void cleanup_bibtex()
Definition: extract.c:498
static void print_help()
Definition: extract.c:215
static int _get_utf8_args(int argc, char *const *argv, int *u8argc, char ***u8argv)
Definition: extract.c:690
#define BORDER
Definition: extract.c:111
static int in_process
Definition: extract.c:47
#define YES
Definition: extract.c:30
static char ** _make_continuous_arg_copy(int argc, char *const *argv)
Definition: extract.c:654
static int print_selected_keywords_grep_friendly(void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_len)
Definition: extract.c:368
static int * print
Definition: extract.c:37
#define NO
Definition: extract.c:31
static char * entry_type
Definition: extract.c:462
static void ignore_sigpipe()
Definition: extract.c:59
static int from_memory
Definition: extract.c:52
struct EXTRACTOR_PluginList * EXTRACTOR_plugin_add_defaults(enum EXTRACTOR_Options flags)
@ EXTRACTOR_OPTION_DEFAULT_POLICY
Definition: extractor.h:67
@ EXTRACTOR_OPTION_IN_PROCESS
Definition: extractor.h:78
void EXTRACTOR_extract(struct EXTRACTOR_PluginList *plugins, const char *filename, const void *data, size_t size, EXTRACTOR_MetaDataProcessor proc, void *proc_cls)
Definition: extractor.c:597
void EXTRACTOR_plugin_remove_all(struct EXTRACTOR_PluginList *plugins)
int(* EXTRACTOR_MetaDataProcessor)(void *cls, const char *plugin_name, enum EXTRACTOR_MetaType type, enum EXTRACTOR_MetaFormat format, const char *data_mime_type, const char *data, size_t data_len)
Definition: extractor.h:460
struct EXTRACTOR_PluginList * EXTRACTOR_plugin_add_config(struct EXTRACTOR_PluginList *prev, const char *config, enum EXTRACTOR_Options flags)
EXTRACTOR_MetaFormat
Definition: extractor.h:92
@ EXTRACTOR_METAFORMAT_BINARY
Definition: extractor.h:107
@ EXTRACTOR_METAFORMAT_C_STRING
Definition: extractor.h:113
@ EXTRACTOR_METAFORMAT_UTF8
Definition: extractor.h:102
@ EXTRACTOR_METAFORMAT_UNKNOWN
Definition: extractor.h:96
#define NULL
Definition: getopt1.c:60
int optind
Definition: getopt.c:134
char * optarg
Definition: getopt.c:119
int getopt_long()
#define gettext_noop(String)
Definition: gettext.h:69
#define gettext(Msgid)
Definition: gettext.h:45
#define textdomain(Domainname)
Definition: gettext.h:56
enum EXTRACTOR_MetaType EXTRACTOR_metatype_get_max(void)
const char * EXTRACTOR_metatype_to_string(enum EXTRACTOR_MetaType type)
EXTRACTOR_MetaType
Definition: extractor.h:126
@ EXTRACTOR_METATYPE_BOOK_EDITION
Definition: extractor.h:136
@ EXTRACTOR_METATYPE_PUBLISHER_SERIES
Definition: extractor.h:149
@ EXTRACTOR_METATYPE_PUBLISHER_INSTITUTION
Definition: extractor.h:148
@ EXTRACTOR_METATYPE_JOURNAL_NAME
Definition: extractor.h:138
@ EXTRACTOR_METATYPE_JOURNAL_NUMBER
Definition: extractor.h:140
@ EXTRACTOR_METATYPE_BOOK_TITLE
Definition: extractor.h:135
@ EXTRACTOR_METATYPE_PUBLICATION_MONTH
Definition: extractor.h:152
@ EXTRACTOR_METATYPE_PUBLICATION_YEAR
Definition: extractor.h:151
@ EXTRACTOR_METATYPE_PUBLISHER_ADDRESS
Definition: extractor.h:147
@ EXTRACTOR_METATYPE_AUTHOR_NAME
Definition: extractor.h:143
@ EXTRACTOR_METATYPE_AUTHOR_INSTITUTION
Definition: extractor.h:145
@ EXTRACTOR_METATYPE_JOURNAL_VOLUME
Definition: extractor.h:139
@ EXTRACTOR_METATYPE_COMMENT
Definition: extractor.h:131
@ EXTRACTOR_METATYPE_TITLE
Definition: extractor.h:134
@ EXTRACTOR_METATYPE_PUBLICATION_TYPE
Definition: extractor.h:150
@ EXTRACTOR_METATYPE_PAGE_RANGE
Definition: extractor.h:142
@ EXTRACTOR_METATYPE_PAGE_COUNT
Definition: extractor.h:141
@ EXTRACTOR_METATYPE_BIBTEX_ENTRY_TYPE
Definition: extractor.h:156
@ EXTRACTOR_METATYPE_BOOK_CHAPTER_NUMBER
Definition: extractor.h:137
@ EXTRACTOR_METATYPE_PUBLISHER
Definition: extractor.h:146
@ EXTRACTOR_METATYPE_URL
Definition: extractor.h:159
@ EXTRACTOR_METATYPE_BIBTEX_EPRINT
Definition: extractor.h:155
enum EXTRACTOR_MetaType type
convenience functions for character conversion
static char * iconv_helper(iconv_t cd, const char *in, size_t inSize)
Definition: iconv.c:37
plaform specifics
#define _(a)
Definition: platform.h:32
char * value
Definition: extract.c:455
enum EXTRACTOR_MetaType le_type
Definition: extract.c:450
const char * bibTexName
Definition: extract.c:445
Definition: extract.c:85
const char * mandatoryArg
Definition: extract.c:99
const char * longArg
Definition: extract.c:94
const char * description
Definition: extract.c:104
char shortArg
Definition: extract.c:89
Definition: getopt.h:84