"Fossies" - the Fresh Open Source Software Archive

Member "gentoo-0.20.7/src/strutil.c" (17 Oct 2015, 13168 Bytes) of package /linux/misc/gentoo-0.20.7.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "strutil.c" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 0.20.6_vs_0.20.7.

    1 /*
    2 ** 1998-08-02 - This module holds various utility functions sharing one common trait:
    3 **      they all deal with character strings in one way or another.
    4 */
    5 
    6 #include "gentoo.h"
    7 
    8 #include <ctype.h>
    9 #include <stdarg.h>
   10 #include <stdio.h>
   11 #include <stdlib.h>
   12 #include <string.h>
   13 #include <sys/stat.h>
   14 #include <unistd.h>
   15 
   16 #include "sizeutil.h"
   17 #include "strutil.h"
   18 
   19 /* ----------------------------------------------------------------------------------------- */
   20 
   21 /* 2002-04-28 - Create a textual representation of <n>, with <tick> inserted every
   22 **      3rd digit (counting from the right) into <buf>. Returns pointer to
   23 **      first digit.
   24 ** NOTE NOTE    That <buf> pointer should point at one position past where the **last**
   25 **      character of the tickified number is to appear. This is for speed reasons.
   26 */
   27 gchar * stu_tickify(gchar *buf, guint64 n, gchar tick)
   28 {
   29     register gint   cnt = 0;
   30 
   31     do
   32     {
   33         if(tick && cnt == 3)
   34             *--buf = tick, cnt = 0;
   35         *--buf = '0' + n % 10;
   36         n /= 10;
   37         cnt++;
   38     } while(n);
   39     return buf;
   40 }
   41 
   42 /* ----------------------------------------------------------------------------------------- */
   43 
   44 /* Attempt to find <string> in <vector> of strings. Returns vector index, or <def>. */
   45 gint stu_strcmp_vector(const gchar *string, const gchar **vector, gsize vector_size, gint def)
   46 {
   47     gsize   i;
   48 
   49     for(i = 0; i < vector_size && vector[i] != NULL; ++i)
   50     {
   51         if(strcmp(string, vector[i]) == 0)
   52             return (gint) i;
   53     }
   54     return def;
   55 }
   56 
   57 /* ----------------------------------------------------------------------------------------- */
   58 
   59 /* 1998-08-30 - Core glob->RE translator. Returns a pointer to a (dynamically allocated)
   60 **      piece of memory holding the RE. When done with that, please g_free() it.
   61 */
   62 gchar * stu_glob_to_re(const gchar *glob)
   63 {
   64     GString     *re;
   65     gchar       here, *ret;
   66     const gchar *ptr, *end;
   67 
   68     if((re = g_string_new(NULL)) == NULL)
   69         return NULL;
   70 
   71     for(ptr = glob; *ptr != '\0'; ptr++)
   72     {
   73         here = *ptr;
   74         if(here == '[' && ptr[1] != ']')        /* Character set begins? */
   75         {
   76             if((end = strchr(ptr + 1, ']')) != NULL)
   77             {
   78                 for(; ptr <= end; ptr++)
   79                     g_string_append_c(re, *ptr);
   80                 ptr--;
   81             }
   82         }
   83         else
   84         {
   85             switch(here)
   86             {
   87                 case '.':
   88                     g_string_append(re, "\\.");
   89                     break;
   90                 case '*':
   91                     g_string_append(re, ".*");
   92                     break;
   93                 case '+':
   94                     g_string_append(re, "\\+");
   95                     break;
   96                 case '?':
   97                     g_string_append_c(re, '.');
   98                     break;
   99                 default:
  100                     g_string_append_c(re, here);
  101             }
  102         }
  103     }
  104     ret = re->str;
  105     g_string_free(re, FALSE);   /* Keeps the buffer. */
  106     return ret;
  107 }
  108 
  109 /* 1998-08-30 - Translate a glob pattern into a System V8 regular expression. Thanks to the
  110 **      magic of GStrings, it will look to the caller as if the string is simply
  111 **      replaced by the translation. This code was moved from the good 'ol cmd_select
  112 **      module, since I wanted glob->RE translation in other places, too (types).
  113 **      This routine doesn't exactly fit in among the other, but since it deals with
  114 **      strings, I thought it could live here at least for a while.
  115 */
  116 void stu_gstring_glob_to_re(GString *glob)
  117 {
  118     gchar   *re;
  119 
  120     if((re = stu_glob_to_re(glob->str)) != NULL)
  121     {
  122         g_string_assign(glob, re);
  123         g_free(re);
  124     }
  125 }
  126 
  127 /* ----------------------------------------------------------------------------------------- */
  128 
  129 /* 1998-09-19 - Convert the protection <mode> to a (more) human-readable form stored at <buf>.
  130 **      Will not use more than <max> bytes of <buf>. Returns <buf>, or NULL on failure.
  131 ** 1999-01-05 - Finally sat down and experimentally deduced the way GNU 'ls' formats its mode
  132 **      strings, and did something similar here.
  133 ** 2000-09-14 - g_snprintf() is not scanf(). Remembered that.
  134 */
  135 gchar * stu_mode_to_text(gchar *buf, gsize buf_max, mode_t mode)
  136 {
  137     gchar   *grp[] = { "---", "--x", "-w-", "-wx", "r--", "r-x", "rw-", "rwx" };
  138     gint    u, g, o;
  139 
  140     if(buf_max < 12)            /* A lazy size limitation. */
  141         return NULL;
  142 
  143     u = (mode & S_IRWXU) >> 6;
  144     g = (mode & S_IRWXG) >> 3;
  145     o = (mode & S_IRWXO);
  146     if(g_snprintf(buf, buf_max, "-%s%s%s", grp[u], grp[g], grp[o]) < 0)
  147         return NULL;
  148 
  149     /* Set the left-most character according to the file's intrinsic type. */
  150     if(S_ISLNK(mode))
  151         buf[0] = 'l';
  152     else if(S_ISDIR(mode))
  153         buf[0] = 'd';
  154     else if(S_ISBLK(mode))
  155         buf[0] = 'b';
  156     else if(S_ISCHR(mode))
  157         buf[0] = 'c';
  158     else if(S_ISFIFO(mode))
  159         buf[0] = 'p';
  160     else if(S_ISSOCK(mode))     /* This is just a guess... */
  161         buf[0] = 's';
  162 
  163     /* This is magic until you understand how it works. The trick seems to be that one
  164     ** bit (e.g. "SETUID") is displayed on top of another bit (in this case user read)
  165     ** by changing that character either to 'S' (if it was not set) or 's' (if set).
  166     ** AFAIK, this is not documented anywhere (except perhaps in ls's source).
  167     */
  168     if(mode & S_ISVTX)      /* Sticky bit set? This is not POSIX... */
  169         buf[9] = (buf[9] == '-') ? 'T' : 't';
  170     if(mode & S_ISGID)      /* Set GID bit set? */
  171         buf[6] = (buf[6] == '-') ? 'S' : 's';
  172     if(mode & S_ISUID)      /* Set UID bit set? */
  173         buf[3] = (buf[3] == '-') ? 'S' : 's';
  174 
  175     return buf;
  176 }
  177 
  178 /* ----------------------------------------------------------------------------------------- */
  179 
  180 /* 1998-10-07 - Scan a string from <def>, and put a pointer to a dynamically allocated version
  181 **      of it into <str>. The string should be delimited by double quotes. Characters
  182 **      (such as commas and whitespace) between strings are ignored. Returns a pointer
  183 **      to the beginning of the next string (suitable for a repeat call), or NULL when
  184 **      no more strings were found.
  185 ** 1999-02-24 - Added support for backslash escaping. Might be useful when this routine is used
  186 **      to scan strings which are then parsed by the command argument stuff. Or, you
  187 **      could just use single quotes of course...
  188 */
  189 const gchar * stu_scan_string(const gchar *def, const gchar **str)
  190 {
  191     GString *tmp;
  192 
  193     if((def == NULL) || (str == NULL))
  194         return NULL;
  195 
  196     while(*def && *def != '"')
  197         def++;
  198 
  199     if(*def == '"')         /* Beginning of string actually found? */
  200     {
  201         def++;
  202         if((tmp = g_string_new(NULL)) != NULL)
  203         {
  204             while(*def && *def != '"')
  205             {
  206                 if(*def == '\\')
  207                 {
  208                     def++;
  209                     if(*def == '\0')
  210                         break;
  211                 }
  212                 g_string_append_c(tmp, *def++);
  213             }
  214             if(*def == '"')     /* Closing quote here, too? */
  215             {
  216                 *str = tmp->str;
  217                 g_string_free(tmp, FALSE);              
  218                 return ++def;   /* Then return with an OK status. */
  219             }
  220             g_string_free(tmp, TRUE);
  221         }
  222     }
  223     return NULL;
  224 }
  225 
  226 /* ----------------------------------------------------------------------------------------- */
  227 
  228 /* 1999-02-24 - Compute the length and content of the first word at <str>. Knows about
  229 **      quoting and backslash escapes. Stores word at <store>. Will typically be
  230 **      run twice on the same input, since you can't know how much space is going
  231 **      to be needed without running it once (with store == NULL).
  232 **      Quoting rules:  A word can contain whitespace (space, tab) only if quoted.
  233 **              Double (") and single (') quotes can both be used, and have
  234 **              the same "power". One quotes the other, so "'" and '"' are
  235 **              both legal 1-character words. To include the quote used for
  236 **              a word IN the word, it must be backslash escaped: "\"" is
  237 **              the 1-character word ".
  238 **      Returns pointer to first character after word, or NULL if there are no more
  239 **      words. Stores word length at <len> (if non-NULL).
  240 */
  241 const gchar * stu_word_length(const gchar *str, gsize *len, gchar *store)
  242 {
  243     gchar   quote = 0, here;
  244     gsize   l = 0;
  245 
  246     if(str == NULL)
  247         return NULL;
  248 
  249     while(*str && isspace((guchar) *str))       /* Skip inter-word spaces. */
  250         str++;
  251 
  252     if(*str == '\0')
  253         return NULL;
  254     for(; *str && !(quote == 0 && isspace((guchar) *str)); l++)
  255     {
  256         if((here = *str) == '\\')   /* Backslash escapade? */
  257         {
  258             here = *++str;
  259             if(here == '\0')    /* At end of string? */
  260                 break;
  261         }
  262         else if(here == '\'' || here == '"')
  263         {
  264             if(quote == 0 || quote == here) /* Ignore "other" quote. */
  265             {
  266                 if(quote == here)
  267                     quote = 0;
  268                 else
  269                     quote = here;
  270                 str++;
  271                 l--;            /* Don't count the quote. */
  272                 continue;       /* Avoid storing the quote. */
  273             }
  274         }
  275         if(store != NULL)
  276             *store++ = here;
  277         str++;
  278     }
  279     if(quote != '\0' && *str == quote)  /* Skip ending quote. */
  280         str++;
  281     if(len != NULL)
  282         *len = l;
  283 
  284     return str;
  285 }
  286 
  287 /* 1999-02-24 - This takes a string intended as a shell command and splits it into a word-
  288 **      vector as used by exec() functions. Think argv[]. Handles some quoting and
  289 **      escaped characters, too. The returned vector will be NULL-terminated, and
  290 **      can be freed by a single call to g_free().
  291 */
  292 gchar ** stu_split_args(const gchar *argstring)
  293 {
  294     gsize       wlen;
  295     gint        wtotlen, wnum, i;
  296     const gchar *ptr = argstring;
  297     gchar       **argv, *store;
  298 
  299     for(wnum = wlen = wtotlen = 0; (ptr = stu_word_length(ptr, &wlen, NULL)) != NULL; wnum++, wtotlen += wlen + 1)
  300         ;
  301 
  302     if(wnum == 0)       /* Nothing found? */
  303         return NULL;
  304 
  305     argv  = g_malloc((wnum + 1) * sizeof *argv + wtotlen);
  306     store = (gchar *) argv + (wnum + 1) * sizeof *argv;
  307     for(ptr = (gchar *) argstring, i = 0; (ptr = stu_word_length(ptr, &wlen, store)) != NULL; i++)
  308     {
  309         argv[i] = store;
  310         store[wlen] = '\0';
  311         store += (wlen + 1);
  312     }
  313     argv[i] = NULL;
  314 
  315     return argv;
  316 }
  317 
  318 /* ----------------------------------------------------------------------------------------- */
  319 
  320 /* 2003-11-25 - Create internal (static, be careful!) version of <string> where certain
  321 **      characters have been escaped by backslashes, and return pointer to it.
  322 */
  323 const gchar * stu_escape(const gchar *string)
  324 {
  325     static GString  *str = NULL;
  326 
  327     if(str == NULL)
  328         str = g_string_new("");
  329     else
  330         g_string_truncate(str, 0);
  331     for(; *string; string++)
  332     {
  333         if(*string == '"' || *string == '\'' || *string == '\\')
  334             g_string_append_c(str, '\\');
  335         g_string_append_c(str, *string);
  336     }
  337     return str->str;
  338 }
  339 
  340 /* ----------------------------------------------------------------------------------------- */
  341 
  342 /* 2009-10-11 - Do a simple "search and replace", on a single input string. The text in <find>
  343 **      is searched for, and replaced with <replace>. If <global>, all instances are
  344 **      replaced, otherwise only the first.
  345 **
  346 **      Note: All strings are assumed to be UTF-8 encoded.
  347 **      Example: input="fr�knar �r inte �ckligt", find="�", replace="e"
  348 */
  349 guint stu_replace_simple(GString *output, const gchar *input, const gchar *find, const gchar *replace, gboolean global, gboolean nocase)
  350 {
  351     gboolean    busy;
  352     gsize       flen;
  353     guint       count = 0;
  354 
  355     if(output == NULL || input == NULL || find == NULL || replace == NULL)
  356         return count;
  357 
  358     /* Make sure output is clean. */
  359     g_string_truncate(output, 0);
  360     flen = strlen(find);
  361 
  362     do
  363     {
  364         gchar   *hit;
  365 
  366         /* Plain old strstr() works on UTF-8. */
  367         if((hit = strstr(input, find)) != NULL)
  368         {
  369             gsize   pfx = hit - input;
  370             g_string_append_len(output, input, pfx);
  371             input += flen + pfx;
  372             g_string_append(output, replace);
  373             busy = global;
  374             count += 1;
  375         }
  376         else
  377             busy = FALSE;
  378     } while(busy);
  379     /* There might be a trailing tail, here. */
  380     g_string_append(output, input);
  381 
  382     return count;
  383 }
  384 
  385 /* ----------------------------------------------------------------------------------------- */
  386 
  387 /* 2010-03-07 - Tries to look up a character in the given string, by calling the filtering function for each
  388 **      character. If the function returns true, a pointer to the character is returned, else NULL.
  389 */
  390 const gchar * stu_utf8_find(const gchar *string, gboolean (*function)(gunichar ch, gpointer user), gpointer user)
  391 {
  392     if(string == NULL || function == NULL)
  393         return NULL;
  394 
  395     while(*string != '\0')
  396     {
  397         gunichar    here = g_utf8_get_char(string);
  398 
  399         if(function(here, user))
  400             return string;
  401         string = g_utf8_next_char(string);
  402     }
  403     return NULL;
  404 }
  405 
  406 /* ----------------------------------------------------------------------------------------- */
  407 
  408 /* 2010-12-03 - Interpolate things looking like "{this}" by replacing with the value for the key "this" from
  409 **      the dictionary. Fun for the whole family. To get an explicit brace, escape with backslash
  410 **      (which has the power to escape anything, for simplicity).
  411 */
  412 gboolean stu_interpolate_dictionary(gchar *out, gsize size, const gchar *format, const GHashTable *dictionary)
  413 {
  414     gchar       key[32], *kput = NULL, here, *out_end;
  415     gboolean    in_key = FALSE;
  416     gconstpointer   value;
  417 
  418     /* A NULL dictionary can be okay, but not if it's referenced. Lazy. */
  419     if(out == NULL || size <= 1 || format == NULL)
  420         return FALSE;
  421 
  422     out_end = out + size - 1;
  423     while((here = *format++) != '\0' && out < out_end)
  424     {
  425         if(in_key)
  426         {
  427             if(here == '}')
  428             {
  429                 *kput = '\0';
  430                 /* If we got this far without a hash, fail. */
  431                 if(!dictionary)
  432                     return FALSE;
  433                 if((value = g_hash_table_lookup((GHashTable *) dictionary, key)) != NULL)
  434                 {
  435                     const gsize vlen = strlen(value);
  436 
  437                     if(vlen <= (out_end - out))
  438                     {
  439                         strcpy(out, value);
  440                         out += vlen;
  441                     }
  442                 }
  443                 in_key = FALSE;
  444             }
  445             else if(kput < (key + sizeof key - 1))  /* Silently drop overflowing key characters. */
  446                 *kput++ = here;
  447             continue;
  448         }
  449         else if(here == '{')
  450         {
  451             kput = key;
  452             in_key = TRUE;
  453             continue;
  454         }
  455         else if(here == '\\')
  456         {
  457             if(*format != '\0')
  458             {
  459                 *out++ = *format;
  460                 format++;
  461                 continue;
  462             }
  463             else
  464                 break;
  465         }
  466         *out++ = here;
  467     }
  468     *out = '\0';
  469 
  470     return TRUE;
  471 }