"Fossies" - the Fresh Open Source Software Archive

Member "speech_tools/base_class/string/EST_String.cc" (4 Sep 2017, 28614 Bytes) of package /linux/misc/speech_tools-2.5.0-release.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "EST_String.cc" see the Fossies "Dox" file reference documentation and the last Fossies "Diffs" side-by-side code changes report: 2.4-release_vs_2.5.0-release.

    1  /*************************************************************************/
    2  /*                                                                       */
    3  /*                Centre for Speech Technology Research                  */
    4  /*                     University of Edinburgh, UK                       */
    5  /*                        Copyright (c) 1997                             */
    6  /*                        All Rights Reserved.                           */
    7  /*                                                                       */
    8  /*  Permission is hereby granted, free of charge, to use and distribute */
    9  /*  this software and its documentation without restriction, including  */
   10  /*  without limitation the rights to use, copy, modify, merge, publish, */
   11  /*  distribute, sublicense, and/or sell copies of this work, and to     */
   12  /*  permit persons to whom this work is furnished to do so, subject to  */
   13  /*  the following conditions:                                           */
   14  /*   1. The code must retain the above copyright notice, this list of   */
   15  /*      conditions and the following disclaimer.                        */
   16  /*   2. Any modifications must be clearly marked as such.               */
   17  /*   3. Original authors' names are not deleted.                        */
   18  /*   4. The authors' names are not used to endorse or promote products  */
   19  /*      derived from this software without specific prior written       */
   20  /*      permission.                                                     */
   21  /*                                                                       */
   22  /*  THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK        */
   23  /*  DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING      */
   24  /*  ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT   */
   25  /*  SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE     */
   26  /*  FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES    */
   27  /*  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN   */
   28  /*  AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,          */
   29  /*  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF       */
   30  /*  THIS SOFTWARE.                                                       */
   31  /*                                                                       */
   32  /*************************************************************************/
   33  /*               Authors :  Alan W Black (awb@cstr.ed.ac.uk)             */
   34  /*                Date   :  January, February 1997                       */
   35  /*  -------------------------------------------------------------------- */
   36  /*                                                                       */
   37  /*  A non-GNU implementation of a EST_String class to use with non-G++   */
   38  /*  compilers.                                                           */
   39  /*                                                                       */
   40  /*  Note this is not a full implementation of libg++'s EST_String class  */
   41  /*  just the bits we need                                                */
   42  /*                                                                       */
   43  /*************************************************************************/
   44 
   45 
   46 #include <iostream>
   47 #include <cstring>
   48 #include <cstdio>
   49 #include <cctype>
   50 #include "EST_String.h"
   51 // #include "EST_error.h"
   52 #include "string_version.h"
   53 #include "EST_math.h"
   54 
   55 extern "C" {
   56 #include "regexp.h"
   57 }
   58 
   59 const char *EST_String::version = "CSTR String Class " STRING_VERSION " " STRING_DATE;
   60 
   61 const EST_String EST_String::Empty("");
   62 
   63 EST_String EST_String_nullString = "";
   64 
   65 struct subst {
   66   EST_String::EST_string_size start, end;
   67   char *s;
   68   int slen;
   69 } ;
   70 
   71 #if !__GSUB_REENTRANT__
   72 static struct subst *substitutions=NULL;
   73 int num_substitutions=0;
   74 #endif
   75   
   76 
   77  /********************************************************************\
   78  *                                                                    *
   79  * Locate is the basic utility method behind many of the              *
   80  * manipulations, it finds something in a EST_String, returns a       *
   81  * success or failure flag and sets start and end to where it was.    *
   82  *                                                                    *
   83  \********************************************************************/
   84 
   85 int EST_String::locate(const char *s, int len, int from, int &start, int &end) const
   86 {
   87   CHECK_STRING_ARG(s);
   88       
   89   const char *sub=NULL;
   90 
   91   if (!s)
   92     return 0;
   93 
   94   if (from < 0 && -from < size)
   95     {
   96       int endpos=size+from+1;
   97       int p=0;
   98       const char *nextsub;
   99 
  100       while ((nextsub=strstr(str()+p, s)))
  101     {
  102       p=nextsub-str()+1;
  103       if (p > endpos)
  104         break;
  105       sub=nextsub;
  106     }
  107     }
  108   else if (from>=0 && from <= size)
  109     sub= strstr(str()+from, s);
  110   
  111   if (sub != NULL)
  112     {
  113       start = sub-str();
  114       end = start + len;
  115       return 1;
  116     }
  117   else
  118     {
  119       return 0;
  120     }
  121 
  122 }
  123 
  124 int EST_String::locate(EST_Regex &ex, int from, int &start, int &end, int *starts, int *ends) const
  125 {
  126   int match_start, match_end;
  127 
  128   if (from < 0 && -from < size)
  129     {
  130       int endpos=size+from+1;
  131       int p=0;
  132       int found=0;
  133 
  134       while (ex.run(str(), p, match_start, match_end, starts, ends))
  135     {
  136       found++;
  137       start=match_start;
  138       end=match_end;
  139       p = match_start+1;
  140       if (p > endpos)
  141         break;
  142     }
  143       return found >0;
  144     }
  145   else if (from >=0 && from <= size)
  146     {
  147       if (ex.run(str(), from, match_start, match_end, starts, ends))
  148     {
  149       start = match_start;
  150       end=match_end;
  151       return 1;
  152     }
  153       else
  154     return 0;
  155     }
  156   else
  157     return 0;
  158 }
  159 
  160 int EST_String::extract(const char *s, int len, int pos, int &start, int &end) const
  161 {
  162   CHECK_STRING_ARG(s);
  163       
  164   if (!s)
  165     return 0;
  166 
  167   if (pos < 0)
  168     return locate(s, len, 0, start, end);
  169 
  170   if (pos <= size-len && memcmp(str()+pos, s, len)==0)
  171     {
  172       start = pos;
  173       end = pos + len;
  174       return 1;
  175     }
  176   else
  177       return 0;
  178 }
  179 
  180 int EST_String::extract(EST_Regex &ex, int pos, int &start, int &end) const
  181 {
  182   int match_start, match_end;
  183 
  184   if (pos < 0)
  185     return locate(ex, 0, start, end);
  186 
  187   if (pos < size && ex.run(str(), pos, match_start, match_end) && match_start == pos)
  188     {
  189       start = match_start;
  190       end = match_end;
  191       return 1;
  192     }
  193   else
  194       return 0;
  195 }
  196 
  197 EST_String EST_String::chop_internal(int from, int len, EST_chop_direction mode) const
  198 {
  199   int start, end;
  200 
  201   if (from < 0)
  202     {
  203       start = size+from;
  204     }
  205   else
  206     {
  207       start = from;
  208     }
  209 
  210   end=start+len;
  211 
  212   if (start >=0 && end <=size && size > 0)
  213     switch (mode)
  214       {
  215       case Chop_Before:
  216     return EST_String(str(), size, 0, start); break;
  217       case Chop_At:
  218     return EST_String(str(), size, start, end-start); break;
  219       case Chop_After:
  220     return EST_String(str(), size, end, -1);
  221       }
  222   return EST_String();
  223 
  224 }
  225 
  226 EST_String EST_String::chop_internal(const char *it, int len, int from, EST_chop_direction mode) const
  227 {
  228   CHECK_STRING_ARG(it);
  229       
  230   int start, end;
  231   
  232   if (it && locate(it, len, from, start, end))
  233     switch (mode)
  234       {
  235       case Chop_Before:
  236     return EST_String(str(), size, 0, start); break;
  237       case Chop_At:
  238     return EST_String(str(), size, start, end-start); break;
  239       case Chop_After:
  240     return EST_String(str(), size, end, -1);
  241       }
  242   return EST_String();
  243 
  244 }
  245 
  246 EST_String EST_String::chop_internal (EST_Regex &it, int from, EST_chop_direction mode) const
  247 {
  248   int start=0, end=0;
  249   
  250   if (locate(it, from, start, end))
  251     switch (mode)
  252       {
  253       case Chop_Before:
  254     return EST_String(str(), size, 0, start); break;
  255       case Chop_At:
  256     return EST_String(str(), size, start, end-start); break;
  257       case Chop_After:
  258     return EST_String(str(), size, end, -1);
  259       }
  260   return EST_String();
  261 
  262 }
  263 
  264 
  265 int EST_String::gsub_internal (const char *os, int olength, const char *s, int length)
  266 {
  267   CHECK_STRING_ARG(os);
  268   CHECK_STRING_ARG(s);
  269       
  270   int pos=0, n=0, change=0;
  271   EST_ChunkPtr new_memory;
  272 
  273   const char *from;
  274   char *to;
  275   
  276 #if __GSUB_REENTRANT__
  277   struct subst {
  278     EST_String::EST_string_size start, end;
  279   } *substitutions=NULL;
  280 
  281   int num_substitutions=0;
  282 #endif
  283 
  284   if (s && os && size > 0 && *os != '\0')
  285     {
  286       {
  287     int start, end;
  288     while (locate(os, olength, pos, start, end))
  289       {
  290         if (num_substitutions <= n)
  291           substitutions = wrealloc(substitutions, struct subst, (num_substitutions +=10));
  292         
  293         substitutions[n].start = start;
  294         substitutions[n].end = end;
  295         
  296         change += length - (end-start);
  297         
  298         n++;
  299         pos=end;
  300       }
  301       }
  302 
  303       // dubious dealings with the inside of the string
  304 
  305       from = (const char *)memory;
  306 
  307       if (change > 0)
  308     {
  309       // Spurious braces make temporary ref to chunk go away
  310       {new_memory = chunk_allocate(size+change+1);}
  311       to = new_memory;
  312     }
  313       else
  314     {
  315       cp_make_updatable(memory, size);
  316       to = memory;
  317     }
  318 
  319       int i, at=0;
  320       char *p=to;
  321       
  322       for(i=0; i<n; i++)
  323     {
  324       int start = substitutions[i].start;
  325       int end = substitutions[i].end;
  326       memcpy(p, from+at, start-at);
  327       p += start-at;
  328       memcpy(p, s, length);
  329       p += length;
  330       at=end;
  331     }
  332       memcpy(p, from+at, size-at);
  333 
  334       p += size-at;
  335       *p = '\0';
  336   
  337       if (change > 0)
  338     memory = new_memory; 
  339       
  340 
  341       size += change;
  342     }
  343 
  344   //  cout << "gsub n=" << memory.count() << "\n";
  345 
  346 #if __GSUB_REENTRANT__
  347   if (substitutions)
  348     wfree(substitutions);
  349 #endif
  350 
  351   return n;
  352 
  353 }
  354 
  355 int EST_String::gsub_internal (EST_Regex &ex, const char *s, int length)
  356 {
  357 
  358   int bracket_num=-1;
  359 
  360   if (s==NULL)
  361     bracket_num = length;
  362       
  363   int pos=0, n=0, change=0;
  364   EST_ChunkPtr new_memory;
  365 
  366   const char *from;
  367   char *to;
  368   
  369 #if __GSUB_REENTRANT__
  370   struct subst *substitutions=NULL;
  371 
  372   int num_substitutions=0;
  373 #endif
  374 
  375   // printf("match '%s'\n", (const char *)(*this));
  376 
  377   if (size > 0)
  378     {
  379       {
  380     int start, starts[EST_Regex_max_subexpressions], ends[EST_Regex_max_subexpressions], mlen;
  381     while ((start = search(ex, mlen, pos, starts, ends))>=0)
  382       {
  383         // printf("match %d-%d, %d-%d, %d-%d\n", start, start+mlen, starts[0], ends[0], starts[1], ends[1]);
  384         if (num_substitutions <= n)
  385           substitutions = wrealloc(substitutions, struct subst, (num_substitutions +=10));
  386         
  387         substitutions[n].start = start;
  388         substitutions[n].end = start+mlen;
  389         
  390         if (s)
  391           change += length - mlen;
  392         else
  393           {
  394         int slen = ends[bracket_num]-starts[bracket_num];
  395         change += slen - mlen;
  396         substitutions[n].slen = slen;
  397         substitutions[n].s = walloc(char, slen);
  398         memcpy(substitutions[n].s, (const char *)memory+starts[bracket_num], slen);
  399           
  400           }
  401         
  402         n++;
  403         pos=start+mlen;
  404       }
  405       }
  406 
  407       // dubious dealings with the inside of the string
  408 
  409       from = (const char *)memory;
  410 
  411       if (change > 0)
  412     {
  413       // Spurious braces make temporary ref to chunk go away
  414       {new_memory = chunk_allocate(size+change+1);}
  415       to = new_memory;
  416     }
  417       else
  418     {
  419       cp_make_updatable(memory, size);
  420       to = memory;
  421     }
  422 
  423       int i, at=0;
  424       char *p=to;
  425       
  426       for(i=0; i<n; i++)
  427     {
  428       int start = substitutions[i].start;
  429       int end = substitutions[i].end;
  430       memcpy(p, from+at, start-at);
  431       p += start-at;
  432       if (s)
  433         {
  434           memcpy(p, s, length);
  435           p += length;
  436         }
  437       else
  438         {
  439           memcpy(p, substitutions[i].s, substitutions[i].slen);
  440           wfree(substitutions[i].s);
  441           substitutions[i].s=NULL;
  442           p += substitutions[i].slen;
  443         }
  444       at=end;
  445     }
  446       memcpy(p, from+at, size-at);
  447 
  448       p += size-at;
  449       *p = '\0';
  450   
  451       if (change > 0)
  452     memory = new_memory; 
  453       
  454       size += change;
  455     }
  456 
  457 #if __GSUB_REENTRANT__
  458   if (substitutions)
  459     wfree(substitutions);
  460 #endif
  461 
  462   return n;
  463 
  464 }
  465 
  466 int EST_String::subst(EST_String source, 
  467               int (&starts)[EST_Regex_max_subexpressions], 
  468               int (&ends)[EST_Regex_max_subexpressions])
  469 {
  470   int n=0, change=0;
  471   EST_ChunkPtr new_memory;
  472 
  473   const char *from;
  474   char *to;
  475   
  476 #if __GSUB_REENTRANT__
  477   struct subst *substitutions=NULL;
  478 
  479   int num_substitutions=0;
  480 #endif
  481 
  482   // printf("match '%s'\n", (const char *)(*this));
  483 
  484   int i;
  485   if (size > 0)
  486     {
  487     int escaped=0;
  488 
  489       for(i=0; i<size; i++)
  490     {
  491       if (escaped)
  492         {
  493           if (memory[i] >= '0' &&memory[i] <= '9')
  494         {
  495           int snum = memory[i] - '0';
  496           if (ends[snum] >= 0 && starts[snum] >=0)
  497             {
  498               if (num_substitutions <= n)
  499             substitutions = wrealloc(substitutions, struct subst, (num_substitutions +=10));
  500         
  501               substitutions[n].start = i-1;
  502               substitutions[n].end = i+1;
  503               substitutions[n].s = ((char *)(void *)(const char *)source.memory) + starts[snum];
  504               substitutions[n].slen = ends[snum] - starts[snum];
  505               change += substitutions[n].slen - 2;
  506 
  507               n++;
  508             }
  509         }
  510           escaped=0;
  511         }
  512       else if (memory[i] == '\\')
  513         escaped=1;
  514     }
  515 
  516 
  517       // dubious dealings with the inside of the string
  518 
  519       from = (const char *)memory;
  520 
  521       if (change > 0)
  522     {
  523       // Spurious braces make temporary ref to chunk go away
  524       {new_memory = chunk_allocate(size+change+1);}
  525       to = new_memory;
  526     }
  527       else
  528     {
  529       cp_make_updatable(memory, size);
  530       to = memory;
  531     }
  532 
  533       int at=0;
  534       char *p=to;
  535       
  536       for(i=0; i<n; i++)
  537     {
  538       int start = substitutions[i].start;
  539       int end = substitutions[i].end;
  540       memcpy(p, from+at, start-at);
  541       p += start-at;
  542 
  543       memcpy(p, substitutions[i].s, substitutions[i].slen);
  544       substitutions[i].s=NULL;
  545       p += substitutions[i].slen;
  546       at=end;
  547     }
  548       memcpy(p, from+at, size-at);
  549 
  550       p += size-at;
  551       *p = '\0';
  552   
  553       if (change > 0)
  554     memory = new_memory; 
  555       
  556       size += change;
  557     }
  558 
  559 #if __GSUB_REENTRANT__
  560   if (substitutions)
  561     wfree(substitutions);
  562 #endif
  563 
  564   return n;
  565 }
  566 
  567 // Pass in the two possible separators as pointers so we don't have to
  568 // duplicate all the code. Inline definitions of the friend functions
  569 // takes care of the pretty interface.
  570 
  571 int EST_String::split_internal(EST_String result[], int max, 
  572                    const char *s_seperator, int slen,
  573                    EST_Regex *re_seperator, 
  574                    char quote) const
  575 {
  576   int n=0;
  577   int pos=0;
  578   int start, end;
  579   int lastspace=0;
  580 
  581   if (size>0)
  582     {
  583       while (pos < length())
  584     {
  585       start= -1;
  586       end= -1;
  587       if ((*this)(pos) == quote)
  588         {
  589           start=pos;
  590           pos++;
  591           while (pos < length())
  592         {
  593           if ((*this)(pos) == quote)
  594             {
  595               pos++;
  596               if ((*this)(pos) != quote)
  597             break;
  598               else
  599             pos++;
  600             }
  601           else
  602             pos++;
  603         }
  604           end=pos;
  605         }
  606       else
  607         {
  608           int mstart, mend, matched;
  609           if (s_seperator)
  610         matched = locate(s_seperator, slen, pos, mstart, mend);
  611           else
  612         matched = locate(*re_seperator, pos, mstart, mend);
  613           
  614           if (matched)
  615         if (mstart != pos)
  616           {
  617             start=pos;
  618             end=mstart;
  619             pos=mend;
  620             lastspace=mend;
  621           }
  622         else if (pos ==lastspace)
  623           {
  624             start=pos;
  625             end=pos;
  626             pos=mend;
  627             lastspace=mend;
  628           }
  629         else
  630           {
  631             pos=mend;
  632             lastspace=mend;
  633           }
  634           else
  635         {
  636           start=pos;
  637           end=length();
  638           pos=end;
  639         }
  640         }
  641       if (start>=0)
  642         result[n++] = EST_String(*this, start, end-start);
  643       if (n==max)
  644         break;
  645     }
  646     }
  647 
  648   return n;
  649 }
  650 
  651 int EST_String::matches(const char *s, int pos) const
  652 {
  653   CHECK_STRING_ARG(s);
  654       
  655   int start, end;
  656 
  657   if (!s)
  658     return 0;
  659 
  660   int len=safe_strlen(s);
  661 
  662   if (extract(s, len, pos, start, end))
  663       return start==pos && end==len;
  664   else
  665       return 0;
  666 }
  667 
  668 int EST_String::matches(const EST_String &s, int pos) const
  669 {
  670   int start, end;
  671 
  672   if (extract(s.str(), s.size, pos, start, end))
  673       return start==pos && end==s.size;
  674   else
  675       return 0;
  676 }
  677 
  678 int EST_String::matches(EST_Regex &e, int pos, int *starts, int *ends) const
  679 {
  680   if (size==0)
  681     return e.run_match("", pos, starts, ends) >0;
  682   else
  683     return e.run_match(str(), pos, starts, ends) >0; 
  684 }
  685 
  686 
  687 EST_String operator + (const EST_String &a, const char *b)
  688 {
  689   CHECK_STRING_ARG(b);
  690 
  691     int al = a.size;
  692     int bl = safe_strlen(b);
  693 
  694     if (al == 0)
  695       return EST_String(b, 0, bl);
  696     if (bl == 0)
  697       return EST_String(a);
  698 
  699     EST_ChunkPtr c = chunk_allocate(al+bl+1, a.str(), al);
  700 
  701     if (bl>0)
  702       memmove((char *)c + al, b, bl);
  703     c(al+bl)='\0';
  704 
  705     return EST_String(al+bl, c);
  706 }
  707 
  708 EST_String operator + (const EST_String &a, const EST_String &b)
  709 {
  710     int al = a.size;
  711     int bl = b.size;
  712 
  713     if (al == 0)
  714       return EST_String(b);
  715     if (bl == 0)
  716       return EST_String(a);
  717 
  718     EST_ChunkPtr c = chunk_allocate(al+bl+1, a.str(), al);
  719 
  720     memmove((char *)c+al,b.str(),bl);
  721     c(al+bl)='\0';
  722 
  723     return EST_String(al+bl, c);
  724 }
  725 
  726 EST_String operator + (const char *a, const EST_String &b)
  727 {
  728   CHECK_STRING_ARG(a);
  729 
  730     int al = safe_strlen(a);
  731     int bl = b.size;
  732 
  733     if (bl == 0)
  734       return EST_String(a, 0, al);
  735     if (al == 0)
  736       return EST_String(b);
  737 
  738     EST_ChunkPtr c = chunk_allocate(al+bl+1, a, al);
  739 
  740     memmove((char *)c + al, b.str(), bl);
  741 
  742     c(al+bl)='\0';
  743 
  744     return EST_String(al+bl, c);
  745 }
  746 
  747 EST_String operator * (const EST_String &s, int n)
  748 {
  749 
  750   if (n<1)
  751     return "";
  752 
  753   int l = s.length();
  754   int sz = n * l;
  755 
  756   EST_String it(NULL, 0, sz);
  757   
  758   for(int j=0; j<n; j++)
  759     strncpy(((char *)it)+j*l, (const char *)s, l);
  760 
  761   return it;
  762 }
  763 
  764 EST_String &EST_String::operator +=(const char *b) 
  765 
  766 {
  767   CHECK_STRING_ARG(b);
  768       
  769     int bl = safe_strlen(b);
  770 
  771     if (size == 0)
  772       {
  773     memory = chunk_allocate(bl+1, b, bl);
  774     size = bl;
  775     return *this;
  776       }
  777 
  778     grow_chunk(memory, size, size+bl+1);
  779     
  780     memmove((char *)memory + size,b,bl);
  781     memory(size+bl)='\0';
  782     size += bl;
  783 
  784     return *this;
  785 }
  786 
  787 EST_String &EST_String::operator += (const EST_String b) 
  788 
  789 {
  790     int bl = b.size;
  791 
  792     if (size == 0)
  793       {
  794     memory = NON_CONST_CHUNKPTR(b.memory);
  795     size = b.size;
  796     return *this;
  797       }
  798 
  799     grow_chunk(memory, size, size+bl+1);
  800 
  801     if (bl >0)
  802       memmove((char *)memory + size,b.str(),bl);
  803 
  804     memory(size+bl)='\0';
  805     size += bl;
  806 
  807     return *this;
  808 }
  809 
  810 EST_String::EST_String(const char *s) 
  811 {
  812       CHECK_STRING_ARG(s);
  813       
  814       size=safe_strlen(s);
  815 
  816        if (size != 0)
  817      memory = chunk_allocate(size+1, s, size); 
  818        else 
  819      memory=NULL;
  820     }
  821 
  822 
  823 EST_String::EST_String(const char *s, int start_or_fill, int len) 
  824 {
  825 
  826   if (s)
  827     {
  828       int start= start_or_fill;
  829       if (len <0)
  830     len=safe_strlen(s)-start;
  831       
  832       size=len;
  833       if (size != 0)
  834     memory = chunk_allocate(len+1, s+start, len);
  835       else
  836     memory=NULL;
  837     }
  838   else
  839     {
  840       char fill = start_or_fill;
  841       if (len<0) len=0;
  842       size=len;
  843       if (size != 0)
  844     {
  845       memory = chunk_allocate(len+1);
  846       char *p = memory;
  847       for(int j=0; j<len;j++)
  848         p[j] = fill;
  849       p[len]='\0';
  850     }
  851       else
  852     memory=NULL;
  853     }
  854 }
  855 
  856 EST_String::EST_String(const char *s, int s_size, int start, int len) 
  857 {
  858   CHECK_STRING_ARG(s);
  859       
  860   if (len <0)
  861     len=s_size-start;
  862 
  863   size=len;
  864   if (size != 0)
  865     memory = chunk_allocate(len+1, s+start, len);
  866   else
  867     memory=NULL;
  868 }
  869 
  870 EST_String::EST_String(const EST_String &s, int start, int len) 
  871 {
  872   if (len <0)
  873     len=s.size-start;
  874       
  875   size=len;
  876 
  877   if (start == 0 && len == s.size)
  878     memory = NON_CONST_CHUNKPTR(s.memory);
  879   else if (size != 0)
  880     memory = chunk_allocate(len+1, s.memory, start, len);
  881   else
  882     memory = NULL;
  883 }
  884 
  885 /*
  886 EST_String::EST_String(const EST_String &s) 
  887 {
  888 #if 1
  889   static EST_ChunkPtr hack = NON_CONST_CHUNKPTR(s.memory);
  890   memory = NON_CONST_CHUNKPTR(s.memory);
  891   size = s.size;
  892 #else
  893     *(struct EST_dumb_string *)this = *(struct EST_dumb_string *)(&s);
  894 #endif
  895 }
  896 */
  897 
  898 #if __FSF_COMPATIBILITY__
  899 EST_String::EST_String(const char c) 
  900 {
  901       size=1;
  902       memory= chunk_allocate(2, &c, 1);
  903 }
  904 #endif
  905 
  906 EST_String &EST_String::operator = (const char *str) 
  907 {
  908       CHECK_STRING_ARG(str);
  909       int len = safe_strlen(str);
  910       if (!len)
  911     memory = NULL;
  912       else if (!shareing() && len < size)
  913     memcpy((char *)memory, str, len+1);
  914       else if (len)
  915     memory = chunk_allocate(len+1, str, len);
  916       size=len;
  917       return *this;
  918 }
  919 
  920 #if 0
  921 EST_String &EST_String::operator = (const char c)
  922 {
  923       memory = chunk_allocate(2, &c, 1);
  924       size=1;
  925       return *this;
  926 }
  927 
  928 EST_String &EST_String::operator = (const EST_String &s) 
  929 {
  930     const char *str = (const char *)s;
  931     CHECK_STRING_ARG(str);
  932     int len = safe_strlen(str);
  933     if (!len)
  934     memory = NULL;
  935     else if (!shareing() && len < size)
  936     memcpy((char *)memory, str, len+1);
  937     else if (len)
  938     memory = chunk_allocate(len+1, str, len);
  939     size=len;
  940     return *this;
  941     //      
  942     //#if 1
  943     ///*  static EST_ChunkPtr hack = s.memory;  */
  944     //  memory = NON_CONST_CHUNKPTR(s.memory);
  945     //  size = s.size;
  946     //#else
  947     //      *(struct EST_dumb_string *)this = *(struct EST_dumb_string *)(&s);
  948     //#endif
  949     //      return *this;
  950 }
  951 #endif
  952     
  953 EST_String downcase(const EST_String &s)
  954 {
  955     EST_String t = EST_String(s.size, chunk_allocate(s.size+1, s.str(), s.size));
  956     int i;
  957 
  958     for (i=0; i < s.length(); i++)
  959     if (isupper(s(i)))
  960         t[i] = tolower(s(i));
  961     else
  962         t[i] = s(i);
  963     return t;
  964 }   
  965 
  966 EST_String upcase(const EST_String &s)
  967 {
  968     EST_String t = EST_String(s.size, chunk_allocate(s.size+1, s.str(), s.size));
  969     int i;
  970 
  971     for (i=0; i < s.length(); i++)
  972     if (islower(s(i)))
  973         t[i] = toupper(s(i));
  974     else
  975         t[i] = s(i);
  976     return t;
  977 }   
  978 
  979 
  980 int
  981 EST_String::freq(const EST_String &s) const
  982 {
  983   int pos=0;
  984   int n=0;
  985   int start, end;
  986 
  987   while (locate(s, pos, start, end))
  988     {
  989       n++;
  990       pos=end;
  991     }
  992   return n;
  993 }
  994 
  995 int
  996 EST_String::freq(const char *s) const
  997 {
  998   CHECK_STRING_ARG(s);
  999       
 1000   int pos=0;
 1001   int n=0;
 1002   int start, end;
 1003   int len=safe_strlen(s);
 1004 
 1005   while (locate(s, len, pos, start, end))
 1006     {
 1007       n++;
 1008       pos=end;
 1009     }
 1010   return n;
 1011 }
 1012 
 1013 int
 1014 EST_String::freq(EST_Regex &ex) const
 1015 {
 1016   int pos=0;
 1017   int n=0;
 1018   int start, end=0;
 1019 
 1020   while (locate(ex, pos, start, end))
 1021     {
 1022       n++;
 1023       pos=end;
 1024     }
 1025   return n;
 1026 }
 1027 
 1028 EST_String EST_String::quote(const char quotec) const
 1029 {
 1030 
 1031   const char quotequote[3] = {quotec, quotec, '\0'};
 1032 
 1033   EST_String result(*this);
 1034 
 1035   result.gsub(quotequote+1, quotequote+0);
 1036 
 1037   return EST_String::cat(quotequote+1, result, quotequote+1);   
 1038 }
 1039 
 1040 EST_String EST_String::unquote(const char quotec) const
 1041 {
 1042 
 1043   const char quotequote[3] = {quotec, quotec, '\0'};
 1044 
 1045   EST_String result(*this);
 1046 
 1047   // cout << "before unqote '" << result << "'\n";
 1048 
 1049   result.gsub(quotequote+0, quotequote+1);
 1050 
 1051   //  cout << "after unqote '" << result << "'\n";
 1052 
 1053   if (result[0] == quotec && result[result.length()-1] == quotec )
 1054     {
 1055 #if 1
 1056       /* Spurious local variable to get arounf SunCC 4.0 being broken */
 1057       EST_String res= result.at(1, result.length()-2);
 1058       return res;
 1059 #else
 1060       return result.at(1, result.length()-2);
 1061 #endif
 1062     }
 1063   else
 1064     return result;
 1065 }
 1066 
 1067 EST_String EST_String::quote_if_needed(const char quotec) const
 1068 {
 1069 
 1070   if (contains(RXwhite) || contains(quotec))
 1071     return quote(quotec);
 1072 
 1073   return *this;
 1074 }
 1075 
 1076 
 1077 EST_String EST_String::unquote_if_needed(const char quotec) const
 1078 {
 1079 
 1080   if ((*this)(0) == quotec && (*this)(length()-1) == quotec )
 1081     return unquote(quotec);
 1082 
 1083   return *this;
 1084 }
 1085 
 1086 ostream &operator << (ostream &s, const EST_String &str)
 1087 
 1088 {
 1089   if (str.size > 0)
 1090     return (s << str.str());
 1091   else
 1092     return (s << "");
 1093 }
 1094 
 1095 EST_String EST_String::cat(const EST_String s1, 
 1096                const EST_String s2, 
 1097                const EST_String s3, 
 1098                const EST_String s4, 
 1099                const EST_String s5,
 1100                const EST_String s6,
 1101                const EST_String s7,
 1102                const EST_String s8,
 1103                const EST_String s9
 1104                )
 1105 {
 1106   int   len=(s1.length()+s2.length()+s3.length()+s4.length()+s5.length() +
 1107          s6.length()+s7.length()+s8.length()+s9.length());
 1108 
 1109   EST_String result;
 1110 
 1111   result.size=len;
 1112   result.memory= chunk_allocate(len+1, (const char *)s1, s1.length());
 1113 
 1114   int p = s1.length();
 1115   if (s2.length())
 1116     { strncpy((char *)result.memory + p, (const char *)s2, s2.length()); p += s2.length(); }
 1117   if (s3.length())
 1118     { strncpy((char *)result.memory + p, (const char *)s3, s3.length()); p += s3.length(); }
 1119   if (s4.length())
 1120     { strncpy((char *)result.memory + p, (const char *)s4, s4.length()); p += s4.length(); }
 1121   if (s5.length())
 1122     { strncpy((char *)result.memory + p, (const char *)s5, s5.length()); p += s5.length(); }
 1123   if (s6.length())
 1124     { strncpy((char *)result.memory + p, (const char *)s6, s6.length()); p += s6.length(); }
 1125   if (s7.length())
 1126     { strncpy((char *)result.memory + p, (const char *)s7, s7.length()); p += s7.length(); }
 1127   if (s8.length())
 1128     { strncpy((char *)result.memory + p, (const char *)s8, s8.length()); p += s8.length(); }
 1129   if (s9.length())
 1130     { strncpy((char *)result.memory + p, (const char *)s9, s9.length()); p += s9.length(); }
 1131 
 1132     result.memory(p) = '\0';
 1133 
 1134   return result;
 1135 }
 1136 
 1137 int compare(const EST_String &a, const EST_String &b)
 1138 {
 1139     if (a.size == 0 && b.size == 0)
 1140     return 0;
 1141     else if (a.size == 0)
 1142     return -1;
 1143     else if (b.size == 0)
 1144     return 1;
 1145     else 
 1146     return strcmp(a.str(), b.str());
 1147 }
 1148 
 1149 int compare(const EST_String &a, const char *b)
 1150 {
 1151     if (a.size == 0 && (b==NULL || *b == '\0'))
 1152     return 0;
 1153     else if (a.size == 0)
 1154     return -1;
 1155     else if (b == NULL || *b == '\0')
 1156     return 1;
 1157     else 
 1158     return strcmp(a.str(), b);
 1159 }
 1160 
 1161 int fcompare(const EST_String &a, const EST_String &b, 
 1162                 const unsigned char *table) 
 1163 {
 1164     if (a.size == 0 && b.size == 0)
 1165     return 0;
 1166     else if (a.size == 0)
 1167     return -1;
 1168     else if (b.size == 0)
 1169     return 1;
 1170     else 
 1171     return EST_strcasecmp(a.str(), b.str(), table);
 1172 }
 1173 
 1174 int fcompare(const EST_String &a, const char *b, 
 1175                 const unsigned char *table) 
 1176 {
 1177     int bsize = (b ? strlen((const char *)b) : 0);
 1178     if (a.size == 0 && bsize == 0)
 1179     return 0;
 1180     else if (a.size == 0)
 1181     return -1;
 1182     else if (bsize == 0)
 1183     return 1;
 1184     else 
 1185     return EST_strcasecmp(a.str(), (const char *)b, table);
 1186 }
 1187 
 1188 int operator == (const char *a, const EST_String &b)
 1189 {
 1190     CHECK_STRING_ARG(a);
 1191       
 1192     if (!a)
 1193     return 0;
 1194     else if (b.size==0)
 1195     return *a == '\0';
 1196     else 
 1197     return (*a == b(0)) && strcmp(a, b.str())==0;
 1198 }
 1199 
 1200 int operator == (const EST_String &a, const EST_String &b)
 1201 {
 1202     if (a.size==0)
 1203     return b.size == 0;
 1204     else if (b.size == 0)
 1205     return 0;
 1206     else 
 1207     return a.size == b.size && a(0) == b(0) && memcmp(a.str(),b.str(),a.size)==0;
 1208 };
 1209 
 1210 EST_String EST_String::Number(int i, int b)
 1211 {
 1212   char buf[64];
 1213   const char *format;
 1214 
 1215   switch (b)
 1216     {
 1217     case 8:
 1218       format="0%o";
 1219       break;
 1220     case 10:
 1221       format="%d";
 1222       break;
 1223     case 16:
 1224       format="0x%x";
 1225       break;
 1226     default:
 1227       format="??%d??";
 1228       break;
 1229     }
 1230   sprintf(buf, format, i);
 1231 
 1232   return EST_String(buf);
 1233 }
 1234 
 1235 EST_String EST_String::Number(long i, int b)
 1236 {
 1237   char buf[64];
 1238   const char *format;
 1239 
 1240   switch (b)
 1241     {
 1242     case 8:
 1243       format="0%lo";
 1244       break;
 1245     case 10:
 1246       format="%ld";
 1247       break;
 1248     case 16:
 1249       format="0x%lx";
 1250       break;
 1251     default:
 1252       format="??%ld??";
 1253       break;
 1254     }
 1255   sprintf(buf, format, i);
 1256 
 1257   return EST_String(buf);
 1258 }
 1259 
 1260 EST_String EST_String::Number(float f)
 1261 {
 1262   char buf[64];
 1263 
 1264   sprintf(buf, "%f", f);
 1265 
 1266   return EST_String(buf);
 1267 }
 1268 
 1269 EST_String EST_String::Number(double d)
 1270 {
 1271   char buf[64];
 1272 
 1273   sprintf(buf, "%f", d);
 1274 
 1275   return EST_String(buf);
 1276 }
 1277 
 1278 long EST_String::Long(bool *valid) const
 1279 {
 1280   char *end;
 1281 
 1282   long val = strtol(str(), &end, 10);
 1283 
 1284   if (end==NULL|| *end != '\0')
 1285   {
 1286     if (valid != NULL)
 1287       {
 1288     *valid=0;
 1289     return 0L;
 1290       }
 1291     else
 1292       {
 1293     printf("bad integer number format '%s'\n",
 1294         (const char *)str());
 1295     exit(0);
 1296       }
 1297   }
 1298 
 1299   if (valid)
 1300     *valid=1;
 1301   
 1302   return val;
 1303 }
 1304 
 1305 int EST_String::Int(bool *valid) const
 1306 {
 1307   long val = Long(valid);
 1308 
 1309   if (valid && !*valid)
 1310     return 0L;
 1311 
 1312   if (val > INT_MAX || val < INT_MIN)
 1313   {
 1314     if (valid != NULL)
 1315       {
 1316     *valid=0;
 1317     return 0L;
 1318       }
 1319     else
 1320       {
 1321     printf("number out of range for integer %ld",
 1322            val);
 1323     exit(0);
 1324       }
 1325   }
 1326 
 1327   return val;
 1328 }
 1329 
 1330 double EST_String::Double(bool *valid) const
 1331 {
 1332   char *end;
 1333 
 1334   double val = strtod(str(), &end);
 1335 
 1336   if (end==NULL|| *end != '\0')
 1337   {
 1338     if (valid != NULL)
 1339       {
 1340     *valid=0;
 1341     return 0.0;
 1342       }
 1343     else
 1344       {
 1345     printf("bad decimal number format '%s'",
 1346         (const char *)str());
 1347     exit(0);
 1348       }
 1349   }
 1350 
 1351   if (valid)
 1352     *valid=1;
 1353   
 1354   return val;
 1355 }
 1356 
 1357 float EST_String::Float(bool *valid) const
 1358 {
 1359   double val = Double(valid);
 1360 
 1361   if (valid && !*valid)
 1362     return 0.0;
 1363 
 1364   if (val > FLT_MAX || val < -FLT_MAX) 
 1365   {
 1366     if (valid != NULL)
 1367       {
 1368     *valid=0;
 1369     return 0.0;
 1370       }
 1371     else
 1372       {
 1373     printf("number out of range for float %f",
 1374            val);
 1375     exit(0);
 1376       }
 1377   }
 1378 
 1379   return val;
 1380 }
 1381 
 1382 
 1383