"Fossies" - the Fresh Open Source Software Archive

Member "odt2txt-0.5/regex.c" (18 Nov 2014, 5178 Bytes) of package /linux/privat/odt2txt-0.5.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "regex.c" see the Fossies "Dox" file reference documentation.

    1 /*
    2  * regex.c: String and regex operations for odt2txt
    3  *
    4  * Copyright (c) 2006-2009 Dennis Stosberg <dennis@stosberg.net>
    5  *
    6  * This program is free software; you can redistribute it and/or
    7  * modify it under the terms of the GNU General Public License,
    8  * version 2 as published by the Free Software Foundation
    9  */
   10 
   11 #include "mem.h"
   12 #include "regex.h"
   13 
   14 #define BUF_SZ 4096
   15 
   16 static char *headline(char line, const char *buf, regmatch_t matches[],
   17               size_t nmatch, size_t off);
   18 static size_t charlen_utf8(const char *s);
   19 
   20 static void print_regexp_err(int reg_errno, const regex_t *rx)
   21 {
   22     char *buf = ymalloc(BUF_SZ);
   23 
   24     regerror(reg_errno, rx, buf, BUF_SZ);
   25     fprintf(stderr, "%s\n", buf);
   26 
   27     yfree(buf);
   28 }
   29 
   30 int regex_subst(STRBUF *buf,
   31         const char *regex, int regopt,
   32         const void *subst)
   33 {
   34     int r;
   35     const char *bufp;
   36     size_t off = 0;
   37     const int i = 0;
   38     int match_count = 0;
   39 
   40     regex_t rx;
   41     const size_t nmatches = 10;
   42     regmatch_t matches[10];
   43 
   44     r = regcomp(&rx, regex, REG_EXTENDED);
   45     if (r) {
   46         print_regexp_err(r, &rx);
   47         exit(EXIT_FAILURE);
   48     }
   49 
   50     do {
   51         if (off > strbuf_len(buf))
   52             break;
   53 
   54         bufp = strbuf_get(buf) + off;
   55 
   56 #ifdef REG_STARTEND
   57         matches[0].rm_so = 0;
   58         matches[0].rm_eo = strbuf_len(buf) - off;
   59 
   60         if (0 != regexec(&rx, bufp, nmatches, matches, REG_STARTEND))
   61 #else
   62         if (0 != regexec(&rx, bufp, nmatches, matches, 0))
   63 #endif
   64             break;
   65 
   66         if (matches[i].rm_so != -1) {
   67             char *s;
   68             int subst_len;
   69 
   70             if (regopt & _REG_EXEC) {
   71                 s = (*(char *(*)
   72                        (const char *buf, regmatch_t matches[],
   73                     size_t nmatch, size_t off))subst)
   74                     (strbuf_get(buf), matches, nmatches, off);
   75             } else
   76                 s = (char*)subst;
   77 
   78             subst_len = strbuf_subst(buf,
   79                          matches[i].rm_so + off,
   80                          matches[i].rm_eo + off,
   81                          s);
   82             match_count++;
   83 
   84             if (regopt & _REG_EXEC)
   85                 yfree(s);
   86 
   87             off += matches[i].rm_so;
   88             if (subst_len >= 0)
   89                 off += subst_len + 1;
   90         }
   91     } while (regopt & _REG_GLOBAL);
   92 
   93     regfree(&rx);
   94     return match_count;
   95 }
   96 
   97 int regex_rm(STRBUF *buf,
   98          const char *regex, int regopt)
   99 {
  100     return regex_subst(buf, regex, regopt, "");
  101 }
  102 
  103 char *underline(char linechar, const char *str)
  104 {
  105     size_t i;
  106     char *tmp;
  107     STRBUF *line;
  108     size_t charlen = charlen_utf8(str);
  109 
  110     if (str[0] == '\0') {
  111         tmp = ymalloc(1);
  112         tmp[0] = '\0';
  113         return tmp;
  114     }
  115 
  116     line = strbuf_new();
  117     strbuf_append(line, str);
  118     strbuf_append(line, "\n");
  119 
  120     tmp = ymalloc(charlen);
  121     for (i = 0; i < charlen; i++) {
  122         tmp[i] = linechar;
  123     }
  124     strbuf_append_n(line, tmp, charlen);
  125     yfree(tmp);
  126 
  127     strbuf_append(line, "\n\n");
  128     return strbuf_spit(line);
  129 }
  130 
  131 static char *headline(char line, const char *buf, regmatch_t matches[],
  132               size_t nmatch, size_t off)
  133 {
  134     const int i = 1;
  135     char *result;
  136     size_t len;
  137     char *match;
  138 
  139     len = matches[i].rm_eo - matches[i].rm_so;
  140     match = ymalloc(len + 1);
  141 
  142     memcpy(match, buf + matches[i].rm_so + off, len);
  143     match[len] = '\0' ;
  144 
  145     result = underline(line, match);
  146 
  147     yfree(match);
  148     return result;
  149 }
  150 
  151 char *h1(const char *buf, regmatch_t matches[], size_t nmatch, size_t off)
  152 {
  153     return headline('=', buf, matches, nmatch, off);
  154 }
  155 
  156 char *h2(const char *buf, regmatch_t matches[], size_t nmatch, size_t off)
  157 {
  158     return headline('-', buf, matches, nmatch, off);
  159 }
  160 
  161 char *image(const char *buf, regmatch_t matches[], size_t nmatch, size_t off)
  162 {
  163     const int i = 1;
  164     const char *prefix = "[-- Image: ";
  165     const char *postfix = " --]";
  166     size_t pr_len, po_len, len;
  167     char *match;
  168 
  169     pr_len = strlen(prefix);
  170     len = matches[i].rm_eo - matches[i].rm_so;
  171     po_len = strlen(prefix);
  172 
  173     match = ymalloc(pr_len + len + po_len + 1);
  174     memcpy(match, prefix, pr_len);
  175     memcpy(match + pr_len, buf + matches[i].rm_so + off, len);
  176     memcpy(match + pr_len + len, postfix, po_len);
  177     match[pr_len + len + po_len] = '\0' ;
  178 
  179     return match;
  180 }
  181 
  182 static size_t charlen_utf8(const char *s)
  183 {
  184     size_t count = 0;
  185     unsigned char *t = (unsigned char*) s;
  186     while (*t != '\0') {
  187         if (*t > 0x80)
  188             t += utf8_length[*t - 0x80];
  189         count++;
  190         t++;
  191     }
  192     return count;
  193 }
  194 
  195 STRBUF *wrap(STRBUF *buf, int width)
  196 {
  197     const char *lf = "\n";
  198     const size_t lflen = strlen(lf);
  199     const char *bufp;
  200     const char *last;
  201     const char *lastspace = 0;
  202     size_t linelen = 0;
  203     STRBUF *out = strbuf_new();
  204 
  205     bufp = strbuf_get(buf);
  206     last = bufp;
  207 
  208     if (width == -1) {
  209         strbuf_append_n(out, strbuf_get(buf), strbuf_len(buf));
  210         return out;
  211     }
  212 
  213     strbuf_append_n(out, lf, lflen);
  214     while(bufp - strbuf_get(buf) < (ptrdiff_t)strbuf_len(buf)) {
  215         if (*bufp == ' ')
  216             lastspace = bufp;
  217         else if (*bufp == '\n') {
  218             strbuf_append_n(out, last, (size_t)(bufp - last));
  219             do {
  220                 strbuf_append_n(out, lf, lflen);
  221             } while (*++bufp == '\n');
  222             lastspace = NULL;
  223 
  224             while(*bufp == ' ') {
  225                 bufp++;
  226             }
  227             last = bufp;
  228             linelen = 0;
  229         }
  230 
  231         if (NULL != lastspace && (int)linelen > width) {
  232             strbuf_append_n(out, last, (size_t)(lastspace - last));
  233             strbuf_append_n(out, lf, lflen);
  234             last = lastspace;
  235             lastspace = NULL;
  236             linelen = (size_t)(bufp - last);
  237 
  238             while(*last == ' ') {
  239                 last++;
  240             }
  241             if(last > bufp)
  242                 bufp = last;
  243         }
  244 
  245         bufp++;
  246         linelen++;
  247         if ((unsigned char)*bufp > 0x80)
  248             bufp += utf8_length[(unsigned char)*bufp - 0x80];
  249     }
  250     strbuf_append_n(out, "\n", 1);
  251     return out;
  252 }
  253