"Fossies" - the Fresh Open Source Software Archive

Member "libextractor-1.11/src/main/extract.c" (30 Jan 2021, 26396 Bytes) of package /linux/privat/libextractor-1.11.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "extract.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 1.10_vs_1.11.

    1 /*
    2      This file is part of libextractor.
    3      Copyright (C) 2002, 2003, 2004, 2005, 2006, 2009, 2012 Vidyut Samanta and Christian Grothoff
    4 
    5      libextractor is free software; you can redistribute it and/or modify
    6      it under the terms of the GNU General Public License as published
    7      by the Free Software Foundation; either version 3, or (at your
    8      option) any later version.
    9 
   10      libextractor is distributed in the hope that it will be useful, but
   11      WITHOUT ANY WARRANTY; without even the implied warranty of
   12      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   13      General Public License for more details.
   14 
   15      You should have received a copy of the GNU General Public License
   16      along with libextractor; see the file COPYING.  If not, write to the
   17      Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
   18      Boston, MA 02110-1301, USA.
   19 */
   20 /**
   21  * @file main/extract.c
   22  * @brief command-line tool to run GNU libextractor
   23  * @author Christian Grothoff
   24  */
   25 #include "platform.h"
   26 #include "extractor.h"
   27 #include "getopt.h"
   28 #include <signal.h>
   29 
   30 #define YES 1
   31 #define NO 0
   32 
   33 
   34 /**
   35  * Which keyword types should we print?
   36  */
   37 static int *print;
   38 
   39 /**
   40  * How verbose are we supposed to be?
   41  */
   42 static int verbose;
   43 
   44 /**
   45  * Run plugins in-process.
   46  */
   47 static int in_process;
   48 
   49 /**
   50  * Read file contents into memory, then feed them to extractor.
   51  */
   52 static int from_memory;
   53 
   54 #ifndef WINDOWS
   55 /**
   56  * Install a signal handler to ignore SIGPIPE.
   57  */
   58 static void
   59 ignore_sigpipe ()
   60 {
   61   struct sigaction oldsig;
   62   struct sigaction sig;
   63 
   64   memset (&sig, 0, sizeof (struct sigaction));
   65   sig.sa_handler = SIG_IGN;
   66   sigemptyset (&sig.sa_mask);
   67 #ifdef SA_INTERRUPT
   68   sig.sa_flags = SA_INTERRUPT;  /* SunOS */
   69 #else
   70   sig.sa_flags = SA_RESTART;
   71 #endif
   72   if (0 != sigaction (SIGPIPE, &sig, &oldsig))
   73     fprintf (stderr,
   74              "Failed to install SIGPIPE handler: %s\n", strerror (errno));
   75 }
   76 
   77 
   78 #endif
   79 
   80 
   81 /**
   82  * Information about command-line options.
   83  */
   84 struct Help
   85 {
   86   /**
   87    * Single-character option name, '\0' for none.
   88    */
   89   char shortArg;
   90 
   91   /**
   92    * Long name of the option.
   93    */
   94   const char *longArg;
   95 
   96   /**
   97    * Name of the mandatory argument, NULL for no argument.
   98    */
   99   const char *mandatoryArg;
  100 
  101   /**
  102    * Help text for the option.
  103    */
  104   const char *description;
  105 };
  106 
  107 
  108 /**
  109  * Indentation for descriptions.
  110  */
  111 #define BORDER 29
  112 
  113 
  114 /**
  115  * Display help text (--help).
  116  *
  117  * @param general binary name
  118  * @param description program description
  119  * @param opt program options (NULL-terminated array)
  120  */
  121 static void
  122 format_help (const char *general,
  123              const char *description,
  124              const struct Help *opt)
  125 {
  126   size_t slen;
  127   unsigned int i;
  128   ssize_t j;
  129   size_t ml;
  130   size_t p;
  131   char scp[80];
  132   const char *trans;
  133 
  134   printf (_ ("Usage: %s\n%s\n\n"),
  135           gettext (general),
  136           gettext (description));
  137   printf (_ (
  138             "Arguments mandatory for long options are also mandatory for short options.\n"));
  139   slen = 0;
  140   i = 0;
  141   while (NULL != opt[i].description)
  142   {
  143     if (0 == opt[i].shortArg)
  144       printf ("      ");
  145     else
  146       printf ("  -%c, ",
  147               opt[i].shortArg);
  148     printf ("--%s",
  149             opt[i].longArg);
  150     slen = 8 + strlen (opt[i].longArg);
  151     if (NULL != opt[i].mandatoryArg)
  152     {
  153       printf ("=%s",
  154               opt[i].mandatoryArg);
  155       slen += 1 + strlen (opt[i].mandatoryArg);
  156     }
  157     if (slen > BORDER)
  158     {
  159       printf ("\n%*s", BORDER, "");
  160       slen = BORDER;
  161     }
  162     if (slen < BORDER)
  163     {
  164       printf ("%*s", (int) (BORDER - slen), "");
  165       slen = BORDER;
  166     }
  167     trans = gettext (opt[i].description);
  168     ml = strlen (trans);
  169     p = 0;
  170 OUTER:
  171     while (ml - p > 78 - slen)
  172     {
  173       for (j = p + 78 - slen; j>p; j--)
  174       {
  175         if (isspace ( (unsigned char) trans[j]))
  176         {
  177           memcpy (scp,
  178                   &trans[p],
  179                   j - p);
  180           scp[j - p] = '\0';
  181           printf ("%s\n%*s",
  182                   scp,
  183                   BORDER + 2,
  184                   "");
  185           p = j + 1;
  186           slen = BORDER + 2;
  187           goto OUTER;
  188         }
  189       }
  190       /* could not find space to break line */
  191       memcpy (scp,
  192               &trans[p],
  193               78 - slen);
  194       scp[78 - slen] = '\0';
  195       printf ("%s\n%*s",
  196               scp,
  197               BORDER + 2,
  198               "");
  199       slen = BORDER + 2;
  200       p = p + 78 - slen;
  201     }
  202     /* print rest */
  203     if (p < ml)
  204       printf ("%s\n",
  205               &trans[p]);
  206     i++;
  207   }
  208 }
  209 
  210 
  211 /**
  212  * Run --help.
  213  */
  214 static void
  215 print_help ()
  216 {
  217   static struct Help help[] = {
  218     { 'b', "bibtex", NULL,
  219       gettext_noop ("print output in bibtex format") },
  220     { 'g', "grep-friendly", NULL,
  221       gettext_noop (
  222         "produce grep-friendly output (all results on one line per file)") },
  223     { 'h', "help", NULL,
  224       gettext_noop ("print this help") },
  225     { 'i', "in-process", NULL,
  226       gettext_noop ("run plugins in-process (simplifies debugging)") },
  227     { 'm', "from-memory", NULL,
  228       gettext_noop (
  229         "read data from file into memory and extract from memory") },
  230     { 'l', "library", "LIBRARY",
  231       gettext_noop ("load an extractor plugin named LIBRARY") },
  232     { 'L', "list", NULL,
  233       gettext_noop ("list all keyword types") },
  234     { 'n', "nodefault", NULL,
  235       gettext_noop ("do not use the default set of extractor plugins") },
  236     { 'p', "print", "TYPE",
  237       gettext_noop (
  238         "print only keywords of the given TYPE (use -L to get a list)") },
  239     { 'v', "version", NULL,
  240       gettext_noop ("print the version number") },
  241     { 'V', "verbose", NULL,
  242       gettext_noop ("be verbose") },
  243     { 'x', "exclude", "TYPE",
  244       gettext_noop ("do not print keywords of the given TYPE") },
  245     { 0, NULL, NULL, NULL },
  246   };
  247   format_help (_ ("extract [OPTIONS] [FILENAME]*"),
  248                _ ("Extract metadata from files."),
  249                help);
  250 
  251 }
  252 
  253 
  254 #if HAVE_ICONV
  255 #include "iconv.c"
  256 #endif
  257 
  258 /**
  259  * Print a keyword list to a file.
  260  *
  261  * @param cls closure, not used
  262  * @param plugin_name name of the plugin that produced this value;
  263  *        special values can be used (i.e. '<zlib>' for zlib being
  264  *        used in the main libextractor library and yielding
  265  *        meta data).
  266  * @param type libextractor-type describing the meta data
  267  * @param format basic format information about data
  268  * @param data_mime_type mime-type of data (not of the original file);
  269  *        can be NULL (if mime-type is not known)
  270  * @param data actual meta-data found
  271  * @param data_len number of bytes in data
  272  * @return 0 to continue extracting, 1 to abort
  273  */
  274 static int
  275 print_selected_keywords (void *cls,
  276                          const char *plugin_name,
  277                          enum EXTRACTOR_MetaType type,
  278                          enum EXTRACTOR_MetaFormat format,
  279                          const char *data_mime_type,
  280                          const char *data,
  281                          size_t data_len)
  282 {
  283   char *keyword;
  284 #if HAVE_ICONV
  285   iconv_t cd;
  286 #endif
  287   const char *stype;
  288   const char *mt;
  289 
  290   if (YES != print[type])
  291     return 0;
  292   if (verbose > 3)
  293     fprintf (stdout,
  294              _ ("Found by `%s' plugin:\n"),
  295              plugin_name);
  296   mt = EXTRACTOR_metatype_to_string (type);
  297   stype = (NULL == mt) ? _ ("unknown") : gettext (mt);
  298   switch (format)
  299   {
  300   case EXTRACTOR_METAFORMAT_UNKNOWN:
  301     fprintf (stdout,
  302              _ ("%s - (unknown, %u bytes)\n"),
  303              stype,
  304              (unsigned int) data_len);
  305     break;
  306   case EXTRACTOR_METAFORMAT_UTF8:
  307     if (0 == data_len)
  308       break;
  309 #if HAVE_ICONV
  310     cd = iconv_open (nl_langinfo (CODESET), "UTF-8");
  311     if (((iconv_t) -1) != cd)
  312       keyword = iconv_helper (cd,
  313                               data,
  314                               data_len);
  315     else
  316 #endif
  317     keyword = strdup (data);
  318     if (NULL != keyword)
  319     {
  320       fprintf (stdout,
  321                "%s - %s\n",
  322                stype,
  323                keyword);
  324       free (keyword);
  325     }
  326 #if HAVE_ICONV
  327     if (((iconv_t) -1) != cd)
  328       iconv_close (cd);
  329 #endif
  330     break;
  331   case EXTRACTOR_METAFORMAT_BINARY:
  332     fprintf (stdout,
  333              _ ("%s - (binary, %u bytes)\n"),
  334              stype,
  335              (unsigned int) data_len);
  336     break;
  337   case EXTRACTOR_METAFORMAT_C_STRING:
  338     fprintf (stdout,
  339              "%s - %.*s\n",
  340              stype,
  341              (int) data_len,
  342              data);
  343     break;
  344   default:
  345     break;
  346   }
  347   return 0;
  348 }
  349 
  350 
  351 /**
  352  * Print a keyword list to a file without new lines.
  353  *
  354  * @param cls closure, not used
  355  * @param plugin_name name of the plugin that produced this value;
  356  *        special values can be used (i.e. '<zlib>' for zlib being
  357  *        used in the main libextractor library and yielding
  358  *        meta data).
  359  * @param type libextractor-type describing the meta data
  360  * @param format basic format information about data
  361  * @param data_mime_type mime-type of data (not of the original file);
  362  *        can be NULL (if mime-type is not known)
  363  * @param data actual meta-data found
  364  * @param data_len number of bytes in data
  365  * @return 0 to continue extracting, 1 to abort
  366  */
  367 static int
  368 print_selected_keywords_grep_friendly (void *cls,
  369                                        const char *plugin_name,
  370                                        enum EXTRACTOR_MetaType type,
  371                                        enum EXTRACTOR_MetaFormat format,
  372                                        const char *data_mime_type,
  373                                        const char *data,
  374                                        size_t data_len)
  375 {
  376   char *keyword;
  377 #if HAVE_ICONV
  378   iconv_t cd;
  379 #endif
  380   const char *mt;
  381 
  382   if (YES != print[type])
  383     return 0;
  384   mt = EXTRACTOR_metatype_to_string (type);
  385   if (NULL == mt)
  386     mt = gettext_noop ("unknown");
  387   switch (format)
  388   {
  389   case EXTRACTOR_METAFORMAT_UNKNOWN:
  390     break;
  391   case EXTRACTOR_METAFORMAT_UTF8:
  392     if (0 == data_len)
  393       return 0;
  394     if (verbose > 1)
  395       fprintf (stdout,
  396                "%s: ",
  397                gettext (mt));
  398 #if HAVE_ICONV
  399     cd = iconv_open (nl_langinfo (CODESET), "UTF-8");
  400     if (((iconv_t) -1) != cd)
  401       keyword = iconv_helper (cd,
  402                               data,
  403                               data_len);
  404     else
  405 #endif
  406     keyword = strdup (data);
  407     if (NULL != keyword)
  408     {
  409       fprintf (stdout,
  410                "`%s' ",
  411                keyword);
  412       free (keyword);
  413     }
  414 #if HAVE_ICONV
  415     if (((iconv_t) -1) != cd)
  416       iconv_close (cd);
  417 #endif
  418     break;
  419   case EXTRACTOR_METAFORMAT_BINARY:
  420     break;
  421   case EXTRACTOR_METAFORMAT_C_STRING:
  422     if (verbose > 1)
  423       fprintf (stdout,
  424                "%s ",
  425                gettext (mt));
  426     fprintf (stdout,
  427              "`%s'",
  428              data);
  429     break;
  430   default:
  431     break;
  432   }
  433   return 0;
  434 }
  435 
  436 
  437 /**
  438  * Entry in the map we construct for each file.
  439  */
  440 struct BibTexMap
  441 {
  442   /**
  443    * Name in bibTeX
  444    */
  445   const char *bibTexName;
  446 
  447   /**
  448    * Meta type for the value.
  449    */
  450   enum EXTRACTOR_MetaType le_type;
  451 
  452   /**
  453    * The value itself.
  454    */
  455   char *value;
  456 };
  457 
  458 
  459 /**
  460  * Type of the entry for bibtex.
  461  */
  462 static char *entry_type;
  463 
  464 /**
  465  * Mapping between bibTeX strings, libextractor
  466  * meta data types and values for the current document.
  467  */
  468 static struct BibTexMap btm[] = {
  469   { "title", EXTRACTOR_METATYPE_TITLE, NULL},
  470   { "year", EXTRACTOR_METATYPE_PUBLICATION_YEAR, NULL },
  471   { "author", EXTRACTOR_METATYPE_AUTHOR_NAME, NULL },
  472   { "book", EXTRACTOR_METATYPE_BOOK_TITLE, NULL},
  473   { "edition", EXTRACTOR_METATYPE_BOOK_EDITION, NULL},
  474   { "chapter", EXTRACTOR_METATYPE_BOOK_CHAPTER_NUMBER, NULL},
  475   { "journal", EXTRACTOR_METATYPE_JOURNAL_NAME, NULL},
  476   { "volume", EXTRACTOR_METATYPE_JOURNAL_VOLUME, NULL},
  477   { "number", EXTRACTOR_METATYPE_JOURNAL_NUMBER, NULL},
  478   { "pages", EXTRACTOR_METATYPE_PAGE_COUNT, NULL },
  479   { "pages", EXTRACTOR_METATYPE_PAGE_RANGE, NULL },
  480   { "school", EXTRACTOR_METATYPE_AUTHOR_INSTITUTION, NULL},
  481   { "publisher", EXTRACTOR_METATYPE_PUBLISHER, NULL },
  482   { "address", EXTRACTOR_METATYPE_PUBLISHER_ADDRESS, NULL },
  483   { "institution", EXTRACTOR_METATYPE_PUBLISHER_INSTITUTION, NULL },
  484   { "series", EXTRACTOR_METATYPE_PUBLISHER_SERIES, NULL},
  485   { "month", EXTRACTOR_METATYPE_PUBLICATION_MONTH, NULL },
  486   { "url", EXTRACTOR_METATYPE_URL, NULL},
  487   { "note", EXTRACTOR_METATYPE_COMMENT, NULL},
  488   { "eprint", EXTRACTOR_METATYPE_BIBTEX_EPRINT, NULL },
  489   { "type", EXTRACTOR_METATYPE_PUBLICATION_TYPE, NULL },
  490   { NULL, 0, NULL }
  491 };
  492 
  493 
  494 /**
  495  * Clean up the bibtex processor in preparation for the next round.
  496  */
  497 static void
  498 cleanup_bibtex ()
  499 {
  500   unsigned int i;
  501 
  502   for (i = 0; NULL != btm[i].bibTexName; i++)
  503   {
  504     free (btm[i].value);
  505     btm[i].value = NULL;
  506   }
  507   free (entry_type);
  508   entry_type = NULL;
  509 }
  510 
  511 
  512 /**
  513  * Callback function for printing meta data in bibtex format.
  514  *
  515  * @param cls closure, not used
  516  * @param plugin_name name of the plugin that produced this value;
  517  *        special values can be used (i.e. '<zlib>' for zlib being
  518  *        used in the main libextractor library and yielding
  519  *        meta data).
  520  * @param type libextractor-type describing the meta data
  521  * @param format basic format information about data
  522  * @param data_mime_type mime-type of data (not of the original file);
  523  *        can be NULL (if mime-type is not known)
  524  * @param data actual meta-data found
  525  * @param data_len number of bytes in data
  526  * @return 0 to continue extracting (always)
  527  */
  528 static int
  529 print_bibtex (void *cls,
  530               const char *plugin_name,
  531               enum EXTRACTOR_MetaType type,
  532               enum EXTRACTOR_MetaFormat format,
  533               const char *data_mime_type,
  534               const char *data,
  535               size_t data_len)
  536 {
  537   unsigned int i;
  538 
  539   if (0 == data_len)
  540     return 0;
  541   if (YES != print[type])
  542     return 0;
  543   if (EXTRACTOR_METAFORMAT_UTF8 != format)
  544     return 0;
  545   if (EXTRACTOR_METATYPE_BIBTEX_ENTRY_TYPE == type)
  546   {
  547     entry_type = strdup (data);
  548     return 0;
  549   }
  550   for (i = 0; NULL != btm[i].bibTexName; i++)
  551     if ( (NULL == btm[i].value) &&
  552          (btm[i].le_type == type) )
  553       btm[i].value = strdup (data);
  554   return 0;
  555 }
  556 
  557 
  558 /**
  559  * Print the computed bibTeX entry.
  560  *
  561  * @param fn file for which the entry was created.
  562  */
  563 static void
  564 finish_bibtex (const char *fn)
  565 {
  566   unsigned int i;
  567   ssize_t n;
  568   const char *et;
  569   char temp[20];
  570 
  571   if (NULL != entry_type)
  572     et = entry_type;
  573   else
  574     et = "misc";
  575   if ( (NULL == btm[0].value) ||
  576        (NULL == btm[1].value) ||
  577        (NULL == btm[2].value) )
  578     fprintf (stdout,
  579              "@%s %s { ",
  580              et,
  581              fn);
  582   else
  583   {
  584     snprintf (temp,
  585               sizeof (temp),
  586               "%.5s%.5s%.5s",
  587               btm[2].value,
  588               btm[1].value,
  589               btm[0].value);
  590     for (n = strlen (temp) - 1; n>=0; n--)
  591       if (! isalnum ( (unsigned char) temp[n]) )
  592         temp[n] = '_';
  593       else
  594         temp[n] = tolower ( (unsigned char) temp[n]);
  595     fprintf (stdout,
  596              "@%s %s { ",
  597              et,
  598              temp);
  599   }
  600   for (i = 0; NULL != btm[i].bibTexName; i++)
  601     if (NULL != btm[i].value)
  602       fprintf (stdout,
  603                "\t%s = {%s},\n",
  604                btm[i].bibTexName,
  605                btm[i].value);
  606   fprintf (stdout, "%s", "}\n\n");
  607 }
  608 
  609 
  610 #ifdef WINDOWS
  611 static int
  612 _wchar_to_str (const wchar_t *wstr, char **retstr, UINT cp)
  613 {
  614   char *str;
  615   int len, lenc;
  616   BOOL lossy = FALSE;
  617   DWORD error;
  618 
  619   SetLastError (0);
  620   len = WideCharToMultiByte (cp, 0, wstr, -1, NULL, 0, NULL, (cp == CP_UTF8 ||
  621                                                               cp == CP_UTF7) ?
  622                              NULL : &lossy);
  623   error = GetLastError ();
  624   if (len <= 0)
  625     return -1;
  626 
  627   str = malloc (sizeof (char) * len);
  628 
  629   SetLastError (0);
  630   lenc = WideCharToMultiByte (cp, 0, wstr, -1, str, len, NULL, (cp == CP_UTF8 ||
  631                                                                 cp == CP_UTF7) ?
  632                               NULL : &lossy);
  633   error = GetLastError ();
  634   if (lenc != len)
  635   {
  636     free (str);
  637     return -3;
  638   }
  639   *retstr = str;
  640   if (lossy)
  641     return 1;
  642   return 0;
  643 }
  644 
  645 
  646 #endif
  647 
  648 
  649 /**
  650  * Makes a copy of argv that consists of a single memory chunk that can be
  651  * freed with a single call to free ();
  652  */
  653 static char **
  654 _make_continuous_arg_copy (int argc, char *const *argv)
  655 {
  656   size_t argvsize = 0;
  657   int i;
  658   char **new_argv;
  659   char *p;
  660   for (i = 0; i < argc; i++)
  661     argvsize += strlen (argv[i]) + 1 + sizeof (char *);
  662   new_argv = malloc (argvsize + sizeof (char *));
  663   if (NULL == new_argv)
  664     return NULL;
  665   p = (char *) &new_argv[argc + 1];
  666   for (i = 0; i < argc; i++)
  667   {
  668     new_argv[i] = p;
  669     strcpy (p, argv[i]);
  670     p += strlen (argv[i]) + 1;
  671   }
  672   new_argv[argc] = NULL;
  673   return (char **) new_argv;
  674 }
  675 
  676 
  677 /**
  678  * Returns utf-8 encoded arguments.
  679  * Returned argv has u8argv[u8argc] == NULL.
  680  * Returned argv is a single memory block, and can be freed with a single
  681  *   free () call.
  682  *
  683  * @param argc argc (as given by main())
  684  * @param argv argv (as given by main())
  685  * @param u8argc a location to store new argc in (though it's th same as argc)
  686  * @param u8argv a location to store new argv in
  687  * @return 0 on success, -1 on failure
  688  */
  689 static int
  690 _get_utf8_args (int argc, char *const *argv, int *u8argc, char ***u8argv)
  691 {
  692 #ifdef WINDOWS
  693   wchar_t *wcmd;
  694   wchar_t **wargv;
  695   int wargc;
  696   int i;
  697   char **split_u8argv;
  698 
  699   wcmd = GetCommandLineW ();
  700   if (NULL == wcmd)
  701     return -1;
  702   wargv = CommandLineToArgvW (wcmd, &wargc);
  703   if (NULL == wargv)
  704     return -1;
  705 
  706   split_u8argv = malloc (wargc * sizeof (char *));
  707 
  708   for (i = 0; i < wargc; i++)
  709   {
  710     if (_wchar_to_str (wargv[i], &split_u8argv[i], CP_UTF8) != 0)
  711     {
  712       int j;
  713       int e = errno;
  714       for (j = 0; j < i; j++)
  715         free (split_u8argv[j]);
  716       free (split_u8argv);
  717       LocalFree (wargv);
  718       errno = e;
  719       return -1;
  720     }
  721   }
  722 
  723   *u8argv = _make_continuous_arg_copy (wargc, split_u8argv);
  724   if (NULL == *u8argv)
  725   {
  726     free (split_u8argv);
  727     return -1;
  728   }
  729   *u8argc = wargc;
  730 
  731   for (i = 0; i < wargc; i++)
  732     free (split_u8argv[i]);
  733   free (split_u8argv);
  734 #else
  735   *u8argv = _make_continuous_arg_copy (argc, argv);
  736   if (NULL == *u8argv)
  737     return -1;
  738   *u8argc = argc;
  739 #endif
  740   return 0;
  741 }
  742 
  743 
  744 /**
  745  * Main function for the 'extract' tool.  Invoke with a list of
  746  * filenames to extract keywords from.
  747  *
  748  * @param argc number of arguments in argv
  749  * @param argv command line options and filename to run on
  750  * @return 0 on success
  751  */
  752 int
  753 main (int argc, char *argv[])
  754 {
  755   unsigned int i;
  756   struct EXTRACTOR_PluginList *plugins;
  757   int option_index;
  758   int c;
  759   char *libraries = NULL;
  760   int nodefault = NO;
  761   int defaultAll = YES;
  762   int bibtex = NO;
  763   int grepfriendly = NO;
  764   int ret = 0;
  765   EXTRACTOR_MetaDataProcessor processor = NULL;
  766   char **utf8_argv;
  767   int utf8_argc;
  768 
  769 #if ENABLE_NLS
  770   setlocale (LC_ALL, "");
  771   textdomain (PACKAGE);
  772 #endif
  773 #ifndef WINDOWS
  774   ignore_sigpipe ();
  775 #endif
  776   if (NULL == (print = malloc (sizeof (int) * EXTRACTOR_metatype_get_max ())))
  777   {
  778     fprintf (stderr,
  779              "malloc failed: %s\n",
  780              strerror (errno));
  781     return 1;
  782   }
  783   for (i = 0; i < EXTRACTOR_metatype_get_max (); i++)
  784     print[i] = YES;   /* default: print everything */
  785 
  786   if (0 != _get_utf8_args (argc, argv, &utf8_argc, &utf8_argv))
  787   {
  788     fprintf (stderr, "Failed to get arguments: %s\n", strerror (errno));
  789     return 1;
  790   }
  791 
  792   while (1)
  793   {
  794     static struct option long_options[] = {
  795       {"bibtex", 0, 0, 'b'},
  796       {"grep-friendly", 0, 0, 'g'},
  797       {"help", 0, 0, 'h'},
  798       {"in-process", 0, 0, 'i'},
  799       {"from-memory", 0, 0, 'm'},
  800       {"list", 0, 0, 'L'},
  801       {"library", 1, 0, 'l'},
  802       {"nodefault", 0, 0, 'n'},
  803       {"print", 1, 0, 'p'},
  804       {"verbose", 0, 0, 'V'},
  805       {"version", 0, 0, 'v'},
  806       {"exclude", 1, 0, 'x'},
  807       {0, 0, 0, 0}
  808     };
  809     option_index = 0;
  810     c = getopt_long (utf8_argc,
  811                      utf8_argv,
  812                      "abghiml:Lnp:vVx:",
  813                      long_options,
  814                      &option_index);
  815 
  816     if (c == -1)
  817       break;  /* No more flags to process */
  818     switch (c)
  819     {
  820     case 'b':
  821       bibtex = YES;
  822       if (NULL != processor)
  823       {
  824         fprintf (stderr,
  825                  "%s",
  826                  _ (
  827                    "Illegal combination of options, cannot combine multiple styles of printing.\n"));
  828         free (utf8_argv);
  829         return 0;
  830       }
  831       processor = &print_bibtex;
  832       break;
  833     case 'g':
  834       grepfriendly = YES;
  835       if (NULL != processor)
  836       {
  837         fprintf (stderr,
  838                  "%s",
  839                  _ (
  840                    "Illegal combination of options, cannot combine multiple styles of printing.\n"));
  841         free (utf8_argv);
  842         return 0;
  843       }
  844       processor = &print_selected_keywords_grep_friendly;
  845       break;
  846     case 'h':
  847       print_help ();
  848       free (utf8_argv);
  849       return 0;
  850     case 'i':
  851       in_process = YES;
  852       break;
  853     case 'm':
  854       from_memory = YES;
  855       break;
  856     case 'l':
  857       libraries = optarg;
  858       break;
  859     case 'L':
  860       i = 0;
  861       while (NULL != EXTRACTOR_metatype_to_string (i))
  862         printf ("%s\n",
  863                 gettext (EXTRACTOR_metatype_to_string (i++)));
  864       free (utf8_argv);
  865       return 0;
  866     case 'n':
  867       nodefault = YES;
  868       break;
  869     case 'p':
  870       if (NULL == optarg)
  871       {
  872         fprintf (stderr,
  873                  _ (
  874                    "You must specify an argument for the `%s' option (option ignored).\n"),
  875                  "-p");
  876         break;
  877       }
  878       if (YES == defaultAll)
  879       {
  880         defaultAll = NO;
  881         i = 0;
  882         while (NULL != EXTRACTOR_metatype_to_string (i))
  883           print[i++] = NO;
  884       }
  885       i = 0;
  886       while (NULL != EXTRACTOR_metatype_to_string (i))
  887       {
  888         if ( (0 == strcmp (optarg,
  889                            EXTRACTOR_metatype_to_string (i))) ||
  890              (0 == strcmp (optarg,
  891                            gettext (EXTRACTOR_metatype_to_string (i)))) )
  892 
  893         {
  894           print[i] = YES;
  895           break;
  896         }
  897         i++;
  898       }
  899       if (NULL == EXTRACTOR_metatype_to_string (i))
  900       {
  901         fprintf (stderr,
  902                  "Unknown keyword type `%s', use option `%s' to get a list.\n",
  903                  optarg,
  904                  "-L");
  905         free (utf8_argv);
  906         return -1;
  907       }
  908       break;
  909     case 'v':
  910       printf ("extract v%s\n", PACKAGE_VERSION);
  911       free (utf8_argv);
  912       return 0;
  913     case 'V':
  914       verbose++;
  915       break;
  916     case 'x':
  917       i = 0;
  918       while (NULL != EXTRACTOR_metatype_to_string (i))
  919       {
  920         if ( (0 == strcmp (optarg,
  921                            EXTRACTOR_metatype_to_string (i))) ||
  922              (0 == strcmp (optarg,
  923                            gettext (EXTRACTOR_metatype_to_string (i)))) )
  924         {
  925           print[i] = NO;
  926           break;
  927         }
  928         i++;
  929       }
  930       if (NULL == EXTRACTOR_metatype_to_string (i))
  931       {
  932         fprintf (stderr,
  933                  "Unknown keyword type `%s', use option `%s' to get a list.\n",
  934                  optarg,
  935                  "-L");
  936         free (utf8_argv);
  937         return -1;
  938       }
  939       break;
  940     default:
  941       fprintf (stderr,
  942                "%s",
  943                _ ("Use --help to get a list of options.\n"));
  944       free (utf8_argv);
  945       return -1;
  946     }   /* end of parsing commandline */
  947   }         /* while (1) */
  948   if (optind < 0)
  949   {
  950     fprintf (stderr,
  951              "%s", "Unknown error parsing options\n");
  952     free (print);
  953     free (utf8_argv);
  954     return -1;
  955   }
  956   if (utf8_argc - optind < 1)
  957   {
  958     fprintf (stderr,
  959              "%s", "Invoke with list of filenames to extract keywords form!\n");
  960     free (print);
  961     free (utf8_argv);
  962     return -1;
  963   }
  964 
  965   /* build list of libraries */
  966   if (NO == nodefault)
  967     plugins = EXTRACTOR_plugin_add_defaults (in_process
  968                                              ? EXTRACTOR_OPTION_IN_PROCESS
  969                                              : EXTRACTOR_OPTION_DEFAULT_POLICY);
  970   else
  971     plugins = NULL;
  972   if (NULL != libraries)
  973     plugins = EXTRACTOR_plugin_add_config (plugins,
  974                                            libraries,
  975                                            in_process
  976                                            ? EXTRACTOR_OPTION_IN_PROCESS
  977                                            : EXTRACTOR_OPTION_DEFAULT_POLICY);
  978   if (NULL == processor)
  979     processor = &print_selected_keywords;
  980 
  981   /* extract keywords */
  982   if (YES == bibtex)
  983     fprintf (stdout,
  984              "%s", _ ("% BiBTeX file\n"));
  985   for (i = optind; i < utf8_argc; i++)
  986   {
  987     errno = 0;
  988     if (YES == grepfriendly)
  989       fprintf (stdout, "%s ", utf8_argv[i]);
  990     else if (NO == bibtex)
  991       fprintf (stdout,
  992                _ ("Keywords for file %s:\n"),
  993                utf8_argv[i]);
  994     else
  995       cleanup_bibtex ();
  996     if (NO == from_memory)
  997       EXTRACTOR_extract (plugins,
  998                          utf8_argv[i],
  999                          NULL, 0,
 1000                          processor,
 1001                          NULL);
 1002     else
 1003     {
 1004       struct stat sb;
 1005       unsigned char *data = NULL;
 1006       int f = open (utf8_argv[i], O_RDONLY
 1007 #if WINDOWS
 1008                     | O_BINARY
 1009 #endif
 1010                     );
 1011       if ( (-1 != f) &&
 1012            (0 == fstat (f, &sb)) &&
 1013            (NULL != (data = malloc ((size_t) sb.st_size))) &&
 1014            (sb.st_size == read (f, data, (size_t) sb.st_size) ) )
 1015       {
 1016         EXTRACTOR_extract (plugins,
 1017                            NULL,
 1018                            data, sb.st_size,
 1019                            processor,
 1020                            NULL);
 1021       }
 1022       else
 1023       {
 1024         if (verbose > 0)
 1025           fprintf (stderr,
 1026                    "%s: %s: %s\n",
 1027                    utf8_argv[0], utf8_argv[i], strerror (errno));
 1028         ret = 1;
 1029       }
 1030       if (NULL != data)
 1031         free (data);
 1032       if (-1 != f)
 1033         (void) close (f);
 1034     }
 1035     if (YES == grepfriendly)
 1036       fprintf (stdout, "%s", "\n");
 1037     continue;
 1038   }
 1039   if (YES == grepfriendly)
 1040     fprintf (stdout, "%s", "\n");
 1041   if (bibtex)
 1042     finish_bibtex (utf8_argv[i]);
 1043   if (verbose > 0)
 1044     fprintf (stdout, "%s", "\n");
 1045   free (print);
 1046   free (utf8_argv);
 1047   EXTRACTOR_plugin_remove_all (plugins);
 1048   plugins = NULL;
 1049   cleanup_bibtex (); /* actually free's stuff */
 1050   return ret;
 1051 }
 1052 
 1053 
 1054 /* end of extract.c */