"Fossies" - the Fresh Open Source Software Archive

Member "qdiff-0.9.1/tstring.cc" (21 Oct 2008, 34818 Bytes) of package /linux/privat/old/qdiff-0.9.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "tstring.cc" see the Fossies "Dox" file reference documentation.

    1 /*GPL*START*
    2  *
    3  * NUL byte safe string implementation
    4  * 
    5  * Copyright (C) 1997-2001 by Johannes Overmann <Johannes.Overmann@gmx.de>
    6  * 
    7  * This program is free software; you can redistribute it and/or modify
    8  * it under the terms of the GNU General Public License as published by
    9  * the Free Software Foundation; either version 2 of the License, or
   10  * (at your option) any later version.
   11  * 
   12  * This program is distributed in the hope that it will be useful,
   13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   15  * GNU General Public License for more details.
   16  * 
   17  * You should have received a copy of the GNU General Public License
   18  * along with this program; if not, write to the Free Software
   19  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   20  * *GPL*END*/  
   21 
   22 #include <stdlib.h>
   23 #include <sys/stat.h>
   24 #include <unistd.h>
   25 #include <errno.h>
   26 #include "tstring.h"
   27 #include "texception.h"
   28 
   29 
   30 // todo:
   31 // - make Split,Unquote,ReadLine,extractFilename,extractPath 0 byte safe
   32 // - separat functions using tvector<> for better modularity
   33 
   34 
   35 // 1997:
   36 // 01:45 11 Jun split(): backslash behavior fixed (601 lines)
   37 // 23:50 11 Jun strings may contain 0 bytes
   38 // 12:00 19 Jun some filename extracting added
   39 // 17:00 19 Jun more sophisticated search: ignore_case and whole_words
   40 // 02:00 08 Jul substring extraction via operator() (start,end)
   41 // 02:00 31 Jul new ContainsNulChar, new ReadFile, fixed \ \\ in ExpUnPrint
   42 // 12:00 08 Aug new Upper Lower Capitalize
   43 // 23:30 19 Aug improved collapseSpace()
   44 // 00:00 27 Aug cropSpace() bug fixed (1 byte out of bound zero write)
   45 // 20:00 30 Aug now cons accept 0 pointer as empty string
   46 // 21:00 30 Aug addDirSlash() added (809 lines)
   47 // 13:00 02 Sep isLower ... added, preserve_case for SearchReplace added (867)
   48 // 23:45 16 Dec normalizePath() added
   49 // 15:00 24 Dec started conversion to Rep reference model
   50 // 18:00 27 Dec finished. debugging starts ... :)
   51 
   52 // 1998:
   53 // 00:30 09 Jan scanTools started (cc=817) (h=462)
   54 // 00:05 12 Jan compare operators fixed (0 byte ...)
   55 // 19:00 09 Oct zeroRep and fast string(int i) for i=0 
   56 // 14:30 10 Oct xc16emu emuwid.s problem solved: memset()
   57 // 14:36 10 Oct string(0) 80 times faster than string(1)! (zero_rep)
   58 // 01:53 17 Oct createNulRep and createZeroRep non inline
   59 
   60 // 1999:
   61 // 14:55 31 Jan +=string speedup for empty string (cc=919, h=532)
   62 // 15:08 31 Jan searchReplace: pre/post_padstring added
   63 // 00:36 03 Feb getFitWordsBlock added (954)
   64 // 23:02 04 Feb search/searchReplace match_pos added (954)
   65 // 23:49 15 Feb class string renamed to class tstring, tappframe simplified (1003)
   66 // 00:46 16 Feb toLong/toDouble/toInt/toBool added (from old str2value.cc) (1016)
   67 // 23:51 03 Mar cropSpaceEnd added, getFitWords space semantics change
   68 // 23:46 13 Apr trelops.h replaces != and > operator (1034)
   69 // 00:31 16 Apr started: replace fatalErrors by exceptions
   70 // 23:48 20 Aug remove html tags added
   71 // 22:17 09 Dec added operator != and > because trelops will not instantiate them for two different types
   72 
   73 // 2000:
   74 // 23:30 30 Jun loop changed from while(1) to for(;;) ;-)
   75 // 22:50 01 Jul toInt/Long pointer p initialized to 0, quotes feature added to expandUnprintable
   76 // 22:00 06 Jul progressBar() added
   77 
   78 // 2001:
   79 // 00:15 08 Feb extractPath now removed trailing slash (1090 lines)
   80 // 00:45 15 Mar searchReplace max_num parameter added
   81 // 22:00 18 Sep palmos fixes
   82 
   83 // 2002:
   84 // 22:25 08 Apr expandUnpritable: allow high ISO graphical characters (ASCII 161-255), better nul_mem and zero_mem sizes for 64 bit systems
   85 
   86 // 2003:
   87 // 22:20 27 Jan length of nul_mem and zero_mem fixed
   88 
   89 // 2006:
   90 // 27 Jul: palmos support removed
   91 
   92 
   93 // global static null and zero rep members
   94 tstring::Rep* tstring::Rep::nul = 0;
   95 char tstring::Rep::nul_mem[sizeof(Rep) + 1];
   96 tstring::Rep* tstring::Rep::zero = 0;
   97 char tstring::Rep::zero_mem[sizeof(Rep) + 2];
   98 
   99 
  100 // non inline Rep implementations
  101 
  102 // copy this representation
  103 tstring::Rep *tstring::Rep::clone(size_t minmem) {
  104    Rep *p = create(minmem >= len ? minmem : len);
  105    p->len = len;
  106    memcpy(p->data(), data(), len+1);
  107    return p; 
  108 }
  109 
  110 // create a new representation
  111 tstring::Rep *tstring::Rep::create(size_t tmem) {
  112    size_t m = sizeof(Rep) << 1;
  113    while((m - 1 - sizeof(Rep)) < tmem) m <<= 1;
  114    Rep *p = new (m - 1 - sizeof(Rep)) Rep;
  115    p->mem = m - 1 - sizeof(Rep); p->ref = 1; p->vulnerable = false;
  116    return p;
  117 }
  118 
  119 // create null string representation
  120 void tstring::Rep::createNulRep() {
  121    nul = (Rep *)nul_mem;
  122    nul->len = 0;
  123    nul->mem = 0;
  124    nul->ref = 1; // never modify/delete static object
  125    nul->vulnerable = false;
  126    nul->terminate();
  127 }
  128 
  129 // create zero string representation
  130 void tstring::Rep::createZeroRep() {
  131    zero = (Rep *)zero_mem;
  132    zero->len = 1;
  133    zero->mem = 1;
  134    zero->ref = 1; // never modify/delete static object
  135    zero->vulnerable = false;
  136    (*zero)[0] = '0';
  137    zero->terminate();
  138 }
  139       
  140       
  141 // non inline string implelentation
  142 
  143 tstring::tstring(const char *s):rep(0) {
  144    if(s){
  145       int l = strlen(s);
  146       rep = Rep::create(l);
  147       rep->len = l;
  148       strcpy(rep->data(), s);
  149    } else rep = Rep::nulRep()->grab();
  150 }
  151 
  152 
  153 tstring::tstring(const char *s, size_t l):rep(0) {
  154    if(s && (l > 0)) {
  155       rep = Rep::create(l);
  156       rep->len = l;
  157       memcpy(rep->data(), s, l);      
  158       rep->terminate();
  159    } else rep = Rep::nulRep()->grab();
  160 }
  161 
  162 
  163 tstring::tstring(char c, size_t n):rep(0) {
  164    if(n) {
  165       rep = Rep::create(n);
  166       rep->len = n;
  167       if(n) memset(rep->data(), c, n);      
  168       rep->terminate();      
  169    } else rep = Rep::nulRep()->grab();
  170 }
  171 
  172 
  173 tstring::tstring(char c):rep(0) {
  174    rep = Rep::create(1); 
  175    rep->len = 1; 
  176    (*rep)[0] = c; 
  177    rep->terminate();
  178 }
  179 
  180 
  181 tstring::tstring(int i):rep((i==0)?(Rep::zeroRep()->grab()):(Rep::nulRep()->grab())) {
  182    if(i) sprintf("%d", i);
  183 }
  184 
  185 
  186 tstring::tstring(int i, const char *format):rep(Rep::nulRep()->grab()) {
  187    sprintf(format, i);
  188 }
  189 
  190 
  191 tstring::tstring(double d, const char *format):rep(Rep::nulRep()->grab()) {
  192    sprintf(format, d);
  193 }
  194 
  195 
  196 
  197 tstring operator + (const tstring& s1, const tstring& s2) {
  198    tstring r(s1); r += s2; return r; }
  199 tstring operator + (const char *s1, const tstring& s2) {
  200    tstring r(s1); r += s2; return r; }
  201 tstring operator + (const tstring& s1, const char *s2) {
  202    tstring r(s1); r += s2; return r; }
  203 tstring operator + (char s1, const tstring& s2) {
  204    tstring r(s1); r += s2; return r; }
  205 tstring operator + (const tstring& s1, char s2) {
  206    tstring r(s1); r += tstring(s2); return r; }
  207 
  208 bool operator == (const tstring& s1, const tstring& s2) {return tstring::_string_equ(s1, s2);}
  209 bool operator == (const tstring& s1, const char   *s2) {return (strcmp(s1.c_str(), s2)==0);}
  210 bool operator == (const char   *s1, const tstring& s2) {return (strcmp(s1, s2.c_str())==0);}
  211 bool operator != (const tstring& s1, const tstring& s2) {return !tstring::_string_equ(s1, s2);}
  212 bool operator != (const tstring& s1, const char   *s2) {return (strcmp(s1.c_str(), s2)!=0);}
  213 bool operator != (const char   *s1, const tstring& s2) {return (strcmp(s1, s2.c_str())!=0);}
  214 bool operator <  (const tstring& s1, const tstring& s2) {return (tstring::_string_cmp(s1, s2) < 0);}
  215 bool operator <  (const tstring& s1, const char   *s2) {return (strcmp(s1.c_str(), s2) < 0);}
  216 bool operator <  (const char   *s1, const tstring& s2) {return (strcmp(s1, s2.c_str()) < 0);}
  217 bool operator >  (const tstring& s1, const char   *s2) {return (strcmp(s1.c_str(), s2) > 0);}
  218 bool operator >  (const char   *s1, const tstring& s2) {return (strcmp(s1, s2.c_str()) > 0);}
  219 bool operator >  (const tstring& s1, const tstring& s2) {return (tstring::_string_cmp(s1, s2) > 0);}
  220 
  221 /// append string
  222 tstring& tstring::operator += (const tstring& a) {if(!a.empty()) {append(a.rep->data(), a.rep->len);} return *this;}
  223 /// append cstring
  224 tstring& tstring::operator += (const char *a) {if(a) append(a, strlen(a)); return *this;}
  225 /// append cstring
  226 tstring& tstring::operator += (char c) {detachResize(rep->len + 1); (*rep)[rep->len++]=c; (*rep)[rep->len]=0; return *this;}
  227 /// append byte array a of length len
  228 tstring& tstring::append(const char *a, int alen) {
  229    if(a) {
  230       detachResize(rep->len + alen);
  231       memcpy(rep->data() + rep->len, a, alen);
  232       rep->len += alen;
  233       rep->terminate();
  234    }
  235    return *this;
  236 }
  237 /// assign string a to this
  238 tstring& tstring::operator = (const tstring& a) 
  239 {if(&a != this) {rep->release(); rep = a.rep->grab();} return *this;}
  240 /// direct character access: const/readonly
  241 char tstring::operator [] (size_t i) const /* throw(IndexOutOfRange) */ {
  242    if(i <= rep->len) return (*rep)[i];
  243    else return 0;
  244 }
  245 /// direct character access: read/write
  246 char& tstring::operator[](size_t i) {
  247    if(i < rep->len) {detach(); return (*rep)[i];}
  248    detachResize(i + 1);
  249    for(; rep->len <= i; rep->len++) (*rep)[rep->len] = 0;
  250    return (*rep)[i];
  251 }
  252 
  253 /// substring extraction (len=end-start)
  254 tstring tstring::substr(size_t start, size_t end) const /* throw(InvalidRange) */ {
  255    if((end == npos) || (end > rep->len)) end = rep->len;
  256    if(start > rep->len) start = rep->len;
  257    if(start > end) start = end;
  258    return tstring(rep->data()+start, end-start); 
  259 }
  260 
  261 // compare helpers
  262 int tstring::_string_cmp(const tstring& s1, const tstring& s2) {
  263    int r = memcmp(s1.rep->data(), s2.rep->data(), s1.rep->len <= s2.rep->len ? s1.rep->len : s2.rep->len);
  264    if(r) return r;
  265    if(s1.rep->len > s2.rep->len) return +1;
  266    if(s1.rep->len < s2.rep->len) return -1;
  267    return 0;
  268 }
  269 
  270 bool tstring::_string_equ(const tstring& s1, const tstring& s2) {
  271    if(s1.rep->len != s2.rep->len) return false;
  272    return memcmp(s1.rep->data(), s2.rep->data(), s1.rep->len)==0;
  273 }
  274 
  275 /// detach from string pool, you should never need to call this
  276 void tstring::detach() { if(rep->ref > 1) { replaceRep(rep->clone()); } }
  277 // no, there is *not* a dangling pointer here (ref > 1)
  278 /** detach from string pool and make sure at least minsize bytes of mem are available
  279  (use this before the dirty version sprintf to make it clean)
  280  (use this before the clean version sprintf to make it fast)
  281  */
  282 void tstring::detachResize(size_t minsize) {
  283    if((rep->ref==1) && (minsize <= rep->mem)) return;
  284    replaceRep(rep->clone(minsize));
  285 }
  286 /// detach from string pool and declare that string might be externally modified (the string has become vulnerable)
  287 void tstring::invulnerableDetach() { detach(); rep->vulnerable = true; }
  288 
  289 /// check for 0 in string (then its not a real cstring anymore)
  290 bool tstring::containsNulChar() const {
  291    rep->terminate();
  292    if(strlen(rep->data()) != rep->len) 
  293      return true; 
  294    else 
  295      return false;
  296 }
  297 
  298 
  299 /// get a pointer to the at most max last chars (useful for printf)
  300 const char *tstring::pSuf(size_t max) const {
  301    return rep->data()+((max>=rep->len)?0:(rep->len-max));
  302 }
  303 
  304 
  305 /// sprintf into this string
  306 void tstring::sprintf(const char *format, ...) {
  307    va_list ap;
  308    int ret = -1;
  309    va_start(ap, format);
  310 #if defined(__STRICT_ANSI__)
  311    // this is the unsecure and dirty but ansi compatible version
  312    detachResize(256);      
  313    ret = vsprintf(rep->data(), format, ap); // not secure! may write out of bounds!
  314 #else
  315    // this is the clean version (never overflows)
  316    int s = 16/4;
  317    do { 
  318       if(ret <= s)
  319     s <<= 2; // fast increase, printf may be slow
  320       else 
  321     s = ret + 8; // C99 standard, after first iteration this should be large enough
  322       detachResize(s);
  323       ret = vsnprintf(rep->data(), s, format, ap); 
  324    } while((ret == -1) || (ret >= s));
  325 #endif
  326    va_end(ap);
  327    rep->len = ret;
  328 }
  329 
  330 
  331 // returns true on success! returns value in bool_out!
  332 bool tstring::toBool(bool& bool_out) const {
  333    char buf[7];
  334    int i;
  335    for(i=0; i<6; i++) {
  336       buf[i] = tolower((*rep)[i]);
  337       if((buf[i]==0) || isspace(buf[i])) break; 
  338    }
  339    buf[i]=0;
  340    switch(i) {
  341     case 1:
  342       if((buf[0]=='1')||(buf[0]=='t')) { bool_out = true;  return true; }
  343       if((buf[0]=='0')||(buf[0]=='f')) { bool_out = false; return true; }
  344       break;
  345     case 2:
  346       if(strcmp(buf,"on")==0)          { bool_out = true;  return true; }
  347       if(strcmp(buf,"no")==0)          { bool_out = false; return true; }
  348       break;
  349     case 3:
  350       if(strcmp(buf,"yes")==0)         { bool_out = true;  return true; }
  351       if(strcmp(buf,"off")==0)         { bool_out = false; return true; }
  352       break;
  353     case 4:
  354       if(strcmp(buf,"true")==0)        { bool_out = true;  return true; }
  355       break;
  356     case 5:
  357       if(strcmp(buf,"false")==0)       { bool_out = false; return true; }
  358       break;
  359    }   
  360    return false;
  361 }
  362 
  363 
  364 // returns true on success
  365 bool tstring::toLong(long& long_out, int base) const {
  366    char *p = 0;
  367    long r = strtoul(rep->data(), &p, base);
  368    if(p == rep->data()) return false;
  369    if(*p) if(!isspace(*p)) return false;
  370    long_out = r;
  371    return true;
  372 }
  373 
  374 
  375 // returns true on success
  376 bool tstring::toInt(int& int_out, int base) const {
  377    char *p = 0;
  378    int r = strtoul(rep->data(), &p, base);
  379    if(p == rep->data()) return false;
  380    if(*p) if(!isspace(*p)) return false;
  381    int_out = r;
  382    return true;
  383 }
  384 
  385 
  386 // returns true on success
  387 bool tstring::toDouble(double& double_out) const {
  388    char *p = 0;
  389    double r = strtod(rep->data(), &p);
  390    if(p == rep->data()) return false;
  391    if(*p) if(!isspace(*p)) return false;
  392    double_out = r;
  393    return true;
  394 }
  395 
  396 
  397 tstring tstring::scanToken(size_t& scanner, int flags, 
  398                const char *allow, const char *forbid,
  399                bool allow_quoted) const 
  400 {
  401    if(allow_quoted && (scanner < rep->len)) {
  402       char q = (*rep)[scanner];
  403       if((q=='\'')||(q=='\"')) {
  404      int st(++scanner);
  405      while((scanner < rep->len) && ((*rep)[scanner]!=q))
  406        ++scanner;
  407      tstring out = substr(st, scanner);  
  408      if(scanner < rep->len) ++scanner;
  409      return out;
  410       }
  411    }
  412    size_t start(scanner);
  413    for(; (scanner < rep->len); ++scanner) {
  414       char c = (*rep)[scanner];
  415       if(forbid && strchr(forbid, c)) break; 
  416       if((flags&ALL                )) continue;
  417       if(allow  && strchr(allow , c)) continue; 
  418       if((flags&ALPHA) && isalpha(c)) continue;
  419       if((flags&DIGIT) && isdigit(c)) continue;
  420       if((flags&LOWER) && islower(c)) continue;
  421       if((flags&UPPER) && isupper(c)) continue;
  422       if((flags&PRINT) && isprint(c)) continue;
  423       if((flags&GRAPH) && isgraph(c)) continue;
  424       if((flags&CNTRL) && iscntrl(c)) continue;
  425       if((flags&SPACE) && isspace(c)) continue;
  426       if((flags&XDIGIT)&&isxdigit(c)) continue;
  427       if((flags&PUNCT) && ispunct(c)) continue;
  428       break;
  429    }
  430    return substr(start, scanner);
  431 }
  432 
  433 
  434 tstring tstring::shortFilename(size_t maxchar) const {
  435    if(rep->len <= maxchar) return *this;
  436    if(maxchar < 3) return "";
  437    return "..." + substr(rep->len - maxchar + 3);
  438 }
  439 
  440 
  441 void tstring::normalizePath() {
  442    // split path
  443    tvector<tstring> a = split(*this, "/", false, false);
  444 
  445    // delete nul dirs 
  446    for(tvector<tstring>::iterator i = a.begin(); i != a.end();) {
  447       if(i->empty() || (*i == ".")) i = a.erase(i);
  448       else i++;
  449    }
  450    
  451    // check for absolute
  452    if((*rep)[0]=='/') clear();
  453    else operator=(".");
  454 
  455    // delete '..'
  456    for(tvector<tstring>::iterator i = a.begin(); i != a.end();) {
  457       if((*i == "..") && (i != a.begin())) {
  458      i--;
  459      if(*i != "..") {
  460         i = a.erase(i);
  461         i = a.erase(i);
  462      } else {
  463         i++;
  464         i++;
  465      }
  466       } else i++;
  467    }
  468       
  469    // assemble string
  470    if((a.size() > 0) || (len() == 0))
  471      operator+=("/" + join(a, "/"));
  472 }
  473 void tstring::extractFilename() {
  474    const char *p = strrchr(rep->data(), '/');
  475    if(p) operator=(p+1);
  476 }
  477 
  478 
  479 void tstring::extractPath() {
  480    const char *p = strrchr(rep->data(), '/');
  481    if(p) {
  482       truncate((p - rep->data() + 1));
  483       removeDirSlash();
  484    }
  485    else clear();
  486 }
  487 
  488 
  489 void tstring::removeDirSlash() {
  490    if(*this == "/") return;
  491    while(lastChar() == '/') truncate(rep->len-1);   
  492 }
  493 
  494 
  495 void tstring::addDirSlash() {
  496    if(lastChar() != '/') operator += ("/");
  497 }
  498 
  499 
  500 void tstring::extractFilenameExtension() {
  501    extractFilename();  // get file name
  502    const char *p = strrchr(rep->data(), '.');
  503    if(p) {  // contains dot
  504       if(p > rep->data()) { // last dot not first char
  505      operator=(p+1);    // get extension
  506      return;
  507       }
  508    }
  509    clear(); // no extension
  510 }
  511 
  512 
  513 double tstring::binaryPercentage() const {
  514    double bin = 0;
  515    
  516    for(size_t i = 0; i < rep->len; i++) 
  517      if((!isprint((*rep)[i])) && (!isspace((*rep)[i]))) bin+=1.0;
  518    return (bin * 100.0) / double(rep->len);
  519 }
  520 
  521 
  522 bool tstring::isLower() const {
  523    if(rep->len == 0) return false;
  524    for(size_t i = 0; i < rep->len; i++) 
  525      if(isalpha((*rep)[i])) 
  526        if(isupper((*rep)[i])) 
  527      return false;
  528    return true;
  529 }
  530 
  531 
  532 bool tstring::isUpper() const {
  533    if(rep->len == 0) return false;
  534    for(size_t i = 0; i < rep->len; i++) 
  535      if(isalpha((*rep)[i])) 
  536        if(islower((*rep)[i])) 
  537      return false;
  538    return true;
  539 }
  540 
  541 
  542 bool tstring::isCapitalized() const {
  543    if(rep->len == 0) return false;
  544    if(isalpha((*rep)[0])) if(islower((*rep)[0])) return false;
  545    for(size_t i = 1; i < rep->len; i++)
  546      if(isalpha((*rep)[i])) 
  547        if(isupper((*rep)[i])) 
  548      return false;
  549    return true;   
  550 }
  551 
  552 
  553 void tstring::lower() {
  554    detach();
  555    for(size_t i = 0; i < rep->len; i++) (*rep)[i] = tolower((*rep)[i]);
  556 }
  557 
  558 
  559 void tstring::upper() {
  560    detach();
  561    for(size_t i = 0; i < rep->len; i++) (*rep)[i] = toupper((*rep)[i]);
  562 }
  563 
  564 
  565 void tstring::capitalize() {
  566    lower();
  567    if(rep->len) (*rep)[0] = toupper((*rep)[0]);
  568 }
  569 
  570 
  571 static const char *bytesearch(const char *mem, int mlen,
  572                   const char *pat, int plen,
  573                   bool ignore_case, bool whole_words) {
  574    int i,j;   
  575    for(i=0; i <= mlen-plen; i++) {
  576       if(ignore_case) {
  577      for(j=0; j<plen; j++) 
  578        if(tolower(mem[i+j]) != tolower(pat[j])) break;
  579       } else {
  580      for(j=0; j<plen; j++) 
  581        if(mem[i+j] != pat[j]) break;
  582       }
  583       if(j==plen) { // found
  584      if(!whole_words) return mem + i;
  585      else {
  586         bool left_ok = true;
  587         bool right_ok = true;
  588         if(i > 0) if(isalnum(mem[i-1]) || (mem[i-1]=='_')) 
  589           left_ok = false;
  590         if(i < mlen-plen) if(isalnum(mem[i+plen]) || (mem[i+plen]=='_')) 
  591           right_ok = false;
  592         if(left_ok && right_ok) return mem + i;
  593      }
  594       }
  595    }
  596    return 0; // not found
  597 }
  598 
  599 
  600 int tstring::searchReplace(const tstring& tsearch, const tstring& replace_, 
  601                bool ignore_case, bool whole_words,
  602                bool preserve_case, int progress,
  603                const tstring& pre_padstring, const tstring& post_padstring, tvector<int> *match_pos, int max_num) {
  604    // get new length and positions
  605    if(progress) { putc('S', stderr);fflush(stderr); }
  606    int num = search(tsearch, ignore_case, whole_words, progress);
  607    if(progress) { putc('R', stderr);fflush(stderr); }   
  608    if(num==0) {
  609       return 0;
  610    }
  611    if(num >= max_num) num = max_num;
  612    int newlen = rep->len + num*(replace_.rep->len-tsearch.rep->len + 
  613                 pre_padstring.len()+post_padstring.len());
  614 
  615    // create new string 
  616    Rep *newrep = Rep::create(newlen);   
  617    const char *p = rep->data();  // read
  618    char *q =    newrep->data();  // write
  619    const char *r;                // found substring
  620    int mlen = rep->len;          // rest of read mem
  621    for(int i=0; i < num; i++) {
  622       if(progress>0) if((i%progress)==0) {putc('.', stderr);fflush(stderr);}
  623       r = bytesearch(p, mlen, tsearch.rep->data(), tsearch.rep->len, ignore_case, whole_words);
  624       memcpy(q, p, r-p); // add skipped part
  625       q += r-p;      
  626       if(match_pos) (*match_pos) += int(q-newrep->data()); // enter start
  627       memcpy(q, pre_padstring.rep->data(), pre_padstring.rep->len); // add pre pad
  628       q += pre_padstring.len();
  629       if(!preserve_case) { // add replaced part
  630      memcpy(q, replace_.rep->data(), replace_.rep->len);
  631       } else {
  632      tstring rr(preserveCase(tstring(r, tsearch.rep->len), replace_.rep->data()));
  633      memcpy(q, rr.rep->data(), rr.rep->len);
  634       }
  635       q += replace_.rep->len;      
  636       memcpy(q, post_padstring.rep->data(), post_padstring.rep->len); // add post pad
  637       q += post_padstring.len();
  638       if(match_pos) (*match_pos) += int(q-newrep->data()); // enter end
  639       mlen -= r-p;
  640       mlen -= tsearch.rep->len;
  641       p = r + tsearch.rep->len;
  642    }
  643    memcpy(q, p, mlen); // add rest
  644    replaceRep(newrep);
  645    rep->len = newlen;
  646    rep->terminate();
  647    return num;
  648 }
  649 
  650 
  651 int tstring::search(const tstring& pat, bool ignore_case, bool whole_words, int progress, tvector<int> *match_pos) const {
  652    if(pat.empty()) return -1;
  653    int num=0;
  654    int mlen=rep->len;
  655    const char *q;                         
  656    for(const char *p = rep->data(); (q=bytesearch(p, mlen, pat.rep->data(), pat.rep->len,
  657                     ignore_case, whole_words)); num++) {
  658       if(match_pos) (*match_pos) += int(q-rep->data());
  659       mlen -= q-p;
  660       mlen -= pat.rep->len;
  661       p = q + pat.rep->len;
  662       if(match_pos) (*match_pos) += int(p-rep->data());
  663       if(progress>0) if((num%progress)==0) {putc('.', stderr);fflush(stderr);}
  664    }
  665    return num;
  666 }
  667 
  668 
  669 /// replace substring
  670 void tstring::replace(size_t start, size_t len_, const tstring &str) {
  671    if(start > length()) return;
  672    if(start + len_ > length()) return;
  673    if(str.length() > len_)
  674      detachResize(length() + str.length() - len_);
  675    else
  676      detach();
  677    if(str.length() != len_)
  678      memmove(rep->data() + start + str.length(), rep->data() + start + len_, length() - start - len_);
  679    // insert string
  680    memcpy(rep->data() + start, str.data(), str.length());
  681    // fix length
  682    rep->len += str.length() - len_;
  683    rep->terminate();
  684 }
  685 
  686 
  687 bool tstring::hasPrefix(const tstring& pref) const {
  688    if(pref.rep->len > rep->len) return false;
  689    return memcmp(rep->data(), pref.rep->data(), pref.rep->len)==0;
  690 }
  691 
  692 
  693 bool tstring::hasSuffix(const tstring& suf) const {
  694    if(suf.rep->len > rep->len) return false;
  695    return memcmp(rep->data() + (rep->len - suf.rep->len), 
  696          suf.rep->data(), suf.rep->len)==0;
  697 }
  698 
  699 
  700 bool tstring::consistsOfSpace() const {
  701    for(size_t i = 0; i < rep->len; i++) {
  702       if(!isspace((*rep)[i])) return false;
  703    }
  704    return true;
  705 }
  706 
  707 
  708 void tstring::truncate(size_t max) {
  709    if(max < rep->len) {
  710       detach();
  711       rep->len = max;
  712       rep->terminate();
  713    }
  714 }
  715 
  716 
  717 void tstring::replaceUnprintable(bool only_ascii) {
  718    for(size_t i = 0; i < rep->len; i++) {
  719       unsigned char& c = (unsigned char &)(*rep)[i];
  720       if(!isprint(c)) {
  721      if(c < ' ') {
  722         c = '!';
  723      } else if(only_ascii || (c < 0xa0)) {
  724         c = '?';
  725      }
  726       }
  727    }
  728 }
  729 
  730 
  731 void tstring::unquote(bool allow_bslash, bool crop_space) {
  732    detach();
  733    
  734    char *p=rep->data();
  735    char *q=rep->data();
  736    char quote=0;
  737    char *nonspace=rep->data();
  738    
  739    if(crop_space) while(isspace(*p)) p++;
  740    for(; *p; p++) {
  741       if(allow_bslash && *p=='\\') {
  742      if(p[1] == quote) {
  743         p++;
  744         if(*p == 0) break;
  745      }
  746       } else {
  747      if(quote) {
  748         if(*p == quote) {
  749            quote = 0;
  750            continue;
  751         }
  752      } else {
  753         if((*p == '\'') || (*p == '\"')) {
  754            quote = *p;
  755            continue;
  756         }
  757      }   
  758       }
  759       if(quote || (!isspace(*p))) nonspace = q;
  760       *(q++) = *p;
  761    }   
  762    *q = 0;
  763    if(crop_space) if(*nonspace) nonspace[1] = 0;
  764    rep->len = strlen(rep->data());   
  765 }
  766 
  767 
  768 tstring tstring::getFitWordsBlock(size_t max) {
  769    tstring r = getFitWords(max);
  770    size_t spaces;
  771    size_t fill = max - r.len();
  772    if(fill > 8) return r;
  773    size_t i,j;
  774       
  775    for(i = 0; i < r.len(); i++)
  776      if(r[i] != ' ') break;
  777    for(spaces = 0; i < r.len(); i++)
  778      if(r[i] == ' ') spaces++;
  779    if(fill > spaces) return r;
  780    tstring t;
  781    t.detachResize(max);
  782    for(i = 0, j = 0; i < r.len(); i++) {
  783       if(r[i] != ' ') break;
  784       (*(t.rep))[j++] = r[i];
  785    }
  786    for(; i < r.len(); i++) {
  787       if((fill > 0)&&(r[i] == ' ')) {
  788      (*(t.rep))[j++] = ' ';
  789      (*(t.rep))[j++] = ' ';
  790      fill--;
  791       } else (*(t.rep))[j++] = r[i];
  792    }
  793    t.rep->len = j;
  794    t.rep->terminate();
  795    return t;
  796 }
  797 
  798 
  799 void tstring::cropSpaceEnd() {
  800    int e = rep->len;
  801    
  802    if(e == 0) return;
  803    else e--;
  804    while((e >= 0) && isspace((*rep)[e])) e--;
  805    truncate(e+1);               
  806 }
  807 
  808 
  809 tstring tstring::getFitWords(size_t max) {
  810    if(max < 1) return tstring();
  811 
  812    tstring r(*this); // return value
  813    
  814    // check for lf
  815    size_t lf = firstOccurence('\n');
  816    if((lf != npos) && (lf <= max)) {
  817       operator=(substr(lf + 1));
  818       r.truncate(lf);
  819       r.cropSpaceEnd();
  820       return r;
  821    }
  822    
  823    // string fits
  824    if(rep->len <= max) {
  825       clear();
  826       r.cropSpaceEnd();
  827       return r;
  828    }
  829    
  830    // find space
  831    size_t last_space = npos;
  832    for(size_t i = 0; i <= max; i++) {
  833       if((*rep)[i] == ' ') last_space = i;
  834    }
  835    if(last_space == npos) last_space = max;
  836    
  837    // return 
  838    r.truncate(last_space);
  839    while(isspace((*rep)[last_space])) last_space++;
  840    operator=(substr(last_space));
  841    r.cropSpaceEnd();
  842    return r;
  843 }
  844 
  845 
  846 void tstring::expandUnprintable(char quotes) {
  847    Rep *newrep = Rep::create(rep->len*4);
  848    char *q = newrep->data(); // write
  849    char *p = rep->data();    // read
  850    size_t l = 0;
  851    
  852    // expand each char
  853    for(size_t j = 0; j < rep->len; ++j, ++p) {
  854       if(isprint(*p) || (((unsigned char)*p) > 160)) { // printable --> print
  855      if((*p=='\\') || (quotes && (*p==quotes))) { // backslashify backslash and quotes
  856         *(q++) = '\\';   
  857         l++;        
  858      } 
  859      *(q++) = *p;
  860      l++;
  861       } else { // unprintable --> expand
  862      *(q++) = '\\'; // leading backslash
  863      l++;
  864      switch(*p) {
  865       case '\a':
  866         *(q++) = 'a';
  867         l++;
  868         break;
  869       case '\b':
  870         *(q++) = 'b';
  871         l++;
  872         break;
  873       case '\f':
  874         *(q++) = 'f';
  875         l++;
  876         break;
  877       case '\n':
  878         *(q++) = 'n';
  879         l++;
  880         break;
  881       case '\r':
  882         *(q++) = 'r';
  883         l++;
  884         break;
  885       case '\t':
  886         *(q++) = 't';
  887         l++;
  888         break;
  889       case '\v':
  890         *(q++) = 'v';
  891         l++;
  892         break;
  893       default: // no single char control
  894         unsigned int i = (unsigned char)*p;
  895         l += 3;
  896         if(i < 32) {  // print lower control octal
  897            if(isdigit(p[1])) {
  898           q += ::sprintf(q, "%03o", i);
  899            } else {
  900           q += ::sprintf(q, "%o", i);
  901           if(i>=8) --l;
  902           else l-=2;
  903            }
  904         } else {    // print octal or hex
  905            if(isxdigit(p[1])) {
  906           q += ::sprintf(q, "%03o", i);
  907            } else {
  908           q += ::sprintf(q, "x%02x", i);
  909            }
  910         }
  911      }
  912       }
  913    }
  914    
  915    // end
  916    replaceRep(newrep);
  917    rep->len = l;
  918    rep->terminate();
  919 }
  920 
  921 
  922 void tstring::backslashify() {
  923    Rep *newrep = Rep::create(rep->len*2);
  924    char *p = rep->data();
  925    char *q = newrep->data();
  926    int l = 0;
  927    
  928    // backslashify each char
  929    for(size_t i = 0; i < rep->len; i++, p++) {
  930       switch(*p) {
  931        case '\\':
  932      *(q++) = '\\';
  933      *(q++) = '\\';
  934      l+=2;
  935      break;
  936        case '\'':
  937      *(q++) = '\\';
  938      *(q++) = '\'';
  939      l+=2;
  940      break;
  941        case '\"':
  942      *(q++) = '\\';
  943      *(q++) = '\"';
  944      l+=2;
  945      break;
  946        default:
  947      *(q++) = *p;
  948      l++;
  949      break;
  950       }
  951    }
  952    
  953    // end
  954    replaceRep(newrep);
  955    rep->len = l;
  956    rep->terminate();
  957 }
  958 
  959 
  960 void tstring::compileCString() {
  961    detach();
  962 
  963    char *p = rep->data(); // read
  964    char *q = rep->data(); // write
  965    char c;                // tmp char
  966    size_t l = 0;          // write
  967    size_t i = 0;          // read
  968    
  969    while(i < rep->len) {
  970       c = *(p++); // read char
  971       i++;
  972       if(c == '\\') { // compile char
  973      if(i>=rep->len) break;
  974      c = *(p++);
  975      i++;
  976      switch(c) {
  977       case 'a':
  978         c = '\a';
  979         break;
  980       case 'b':
  981         c = '\b';
  982         break;
  983       case 'f':
  984         c = '\f';
  985         break;
  986       case 'n':
  987         c = '\n';
  988         break;
  989       case 'r':
  990         c = '\r';
  991         break;
  992       case 't':
  993         c = '\t';
  994         break;
  995       case 'v':
  996         c = '\v';
  997         break;
  998       case 'x': // hex
  999         char *qq;
 1000         c = strtol(p, &qq, 16);
 1001         i += qq-p;
 1002         p = qq;
 1003         break;      
 1004       case '0': // octal
 1005       case '1':
 1006       case '2':
 1007       case '3':
 1008       case '4':
 1009       case '5':
 1010       case '6':
 1011       case '7':
 1012         char buf[4];
 1013         buf[0] = c;
 1014         buf[1] = *p;
 1015         buf[2] = (i < rep->len) ? p[1] : 0;
 1016         buf[3] = 0;
 1017         char *t;
 1018         c = strtol(buf, &t, 8);
 1019         i += (t-buf)-1;
 1020         p += (t-buf)-1;
 1021         break;      
 1022      }   
 1023       } 
 1024       *(q++) = c; // write char
 1025       l++;
 1026    }
 1027    rep->len = l;
 1028    rep->terminate();
 1029 }
 1030 
 1031 
 1032 void tstring::removeHTMLTags(int& level) {
 1033    detach();
 1034 
 1035    char *p = rep->data(); // read
 1036    char *q = rep->data(); // write
 1037    size_t l = 0;          // write
 1038    size_t i = 0;          // read
 1039    
 1040    while(i < rep->len) {
 1041       switch(*p) {
 1042        case '<': 
 1043      level++;
 1044      break;
 1045 
 1046        case '>':
 1047      if(level > 0) level--;
 1048      break;
 1049      
 1050        default:
 1051      if(level == 0) {
 1052         *(q++) = *p;
 1053         l++;
 1054      }
 1055       }      
 1056       p++;
 1057       i++;
 1058    }
 1059    
 1060    rep->len = l;
 1061    rep->terminate();
 1062 }
 1063 
 1064 
 1065 void tstring::cropSpace(void) {
 1066    size_t first = rep->len;
 1067    size_t last = 0;
 1068    size_t i;
 1069    
 1070    // get first nonspace
 1071    for(i = 0; i < rep->len; ++i) 
 1072      if(!isspace((*rep)[i])) {
 1073     first = i;
 1074     break;
 1075      }
 1076    
 1077    // full of spaces   
 1078    if(first == rep->len) {
 1079       clear();
 1080       return;
 1081    }
 1082    
 1083    // get last nonspace
 1084    for(i = rep->len - 1; i >= first; --i) 
 1085      if(!isspace((*rep)[i])) {
 1086     last = i;
 1087     break;
 1088      }
 1089    ++last;
 1090    
 1091    // truncate
 1092    if(first == 0) {
 1093       truncate(last);
 1094       return;
 1095    }
 1096      
 1097    // extract substring
 1098    operator=(substr(first, last));   
 1099 }
 1100 
 1101 
 1102 void tstring::collapseSpace(void) {
 1103    detach();
 1104    
 1105    char *p = rep->data(); // read
 1106    char *q = rep->data(); // write
 1107    char last_char = ' ';
 1108    size_t l = 0;          // length
 1109    char c;
 1110    
 1111    for(size_t i = 0; i < rep->len; ++i, ++p) {
 1112       if((!isspace(*p)) || (!isspace(last_char))) {
 1113      c = *p;
 1114      if(isspace(c)) c=' ';
 1115      *(q++) = c;
 1116      last_char = c;
 1117      l++;
 1118       }
 1119    }
 1120    if(isspace(last_char)&&(l>0)) --l;
 1121    rep->len = l;
 1122    rep->terminate();
 1123 }
 1124 
 1125 
 1126 void tstring::translateChar(char from, char to) {
 1127    detach();   
 1128    char *p = rep->data();   
 1129    for(size_t i = 0; i < rep->len; ++i, ++p)
 1130      if(*p == from) *p = to;
 1131 }
 1132 
 1133 
 1134 size_t tstring::firstOccurence(char c) const {
 1135    size_t i;
 1136    
 1137    for(i = 0; (i < rep->len) && ((*rep)[i] != c); ++i);
 1138    if(i < rep->len) return i;
 1139    else return npos;
 1140 }
 1141 
 1142 
 1143 
 1144 // non member implementation
 1145 
 1146 
 1147 tvector<tstring> split(const tstring &s, const char *sep, bool allow_quoting, bool crop_space) {
 1148    tvector<tstring> r;
 1149    tstring buf;
 1150    const char *p = s.c_str();
 1151    p--; // bias
 1152    
 1153    do {
 1154       // next chunk
 1155       p++;    
 1156       
 1157       // collect chars to buf
 1158       while(*p) {
 1159      if(strchr(sep, *p)) {
 1160         break;
 1161      } else if(!allow_quoting) {
 1162         buf += *(p++);      
 1163      } else if(*p=='\\') {
 1164         p++;
 1165         if(strchr(sep, *p)==0) buf += '\\';
 1166         if(*p) buf += *(p++);
 1167      } else if(*p=='\'') {
 1168         buf += '\'';
 1169         for(p++; *p && *p!='\''; p++) {
 1170            if(*p=='\\') {
 1171           p++;
 1172           buf += '\\';
 1173           if(*p) buf += *p;
 1174            } else 
 1175          buf += *p;
 1176         }
 1177         buf += '\'';
 1178         if(*p=='\'') p++;
 1179      } else if(*p=='\"') {
 1180         buf += '\"';
 1181         for(p++; *p && *p!='\"'; p++) {
 1182            if(*p=='\\') {
 1183           p++;
 1184           buf += '\\';
 1185           if(*p) buf += *p;
 1186            } else 
 1187          buf += *p;
 1188         }
 1189         buf += '\"';
 1190         if(*p=='\"') p++;
 1191      } else {
 1192         buf += *(p++);
 1193      }
 1194       }
 1195       
 1196       // put buf to r
 1197       if(crop_space) buf.cropSpace();
 1198       r.push_back(buf);
 1199 
 1200       // cleanup
 1201       buf.clear();
 1202    } while(*p);
 1203    
 1204    return r;
 1205 }
 1206 
 1207 
 1208 tstring join(const tvector<tstring>& a, const tstring& sep) {
 1209    tstring r;
 1210    
 1211    if(a.empty()) return r;
 1212    else r = a[0];   
 1213    for(size_t i = 1; i < a.size(); i++) {
 1214       r += sep;
 1215       r += a[i]; 
 1216    }
 1217    return r;
 1218 }
 1219 
 1220 
 1221 tstring preserveCase(const tstring& from, const tstring& to) {
 1222    tstring r(to);
 1223    
 1224    if(from.len() == to.len()) { 
 1225       // same len
 1226       for(size_t i = 0; i < r.len(); i++) {
 1227      if(islower(from[i])) r[i] = tolower(r[i]);
 1228      else if(isupper(from[i])) r[i] = toupper(r[i]);
 1229       }
 1230    } else {   
 1231       // some heuristics
 1232       if(from.isLower()) r.lower();
 1233       if(from.isUpper()) r.upper();
 1234       if(from.isCapitalized()) r.capitalize();
 1235    }
 1236    
 1237    return r;
 1238 }
 1239 
 1240 
 1241 const char *progressBar(const char *message, unsigned int n, unsigned int max, int width) {
 1242    // max size of a buffer
 1243 #define size 1024
 1244    // number of static buffers (must be power of two)
 1245 #define numbuf 4
 1246    static char tbuf[size * numbuf];
 1247    static int tphase = 0;
 1248    static int phase = 0;
 1249    static char phasechar[] = "/-~-_-\\|";
 1250 
 1251    tphase++;
 1252    tphase &= numbuf - 1;
 1253    char *buf = tbuf + size * tphase;
 1254    
 1255    // limit width
 1256    if(width >= size) width = size - 1;
 1257    if(message == 0) {
 1258       // clear line
 1259       sprintf(buf, "%*s", width, "");
 1260       return buf;
 1261    }
 1262    if(max == 0) {
 1263       // open end progress
 1264       if(phasechar[phase] == 0) phase = 0;
 1265       sprintf(buf, "%.*s %11d %c", width - (11 - 3), message, n, phasechar[phase++]);
 1266       return buf;
 1267    }
 1268    
 1269    // proportional progress
 1270     
 1271    // get num chars for number and max
 1272    int nlen = 0, i;
 1273    for(i = max; i; i /= 10, nlen++);
 1274    
 1275    int l = sprintf(buf, "%.*s %*d/%*d (%5.1f%%) ", width - (12 + 2 * nlen), message, nlen, n, nlen, max, double(n)/double(max)*100.0);
 1276    int rest = width - l;
 1277    if(rest <= 0) return buf;
 1278    int done = int(double(n)/double(max)*double(rest));
 1279    if(done > rest) done = rest;
 1280    char *p = buf + l;
 1281    for(i = 0; i < done; i++) *(p++) = '*';
 1282    for(; i < rest; i++) *(p++) = '.';
 1283    *p = 0;
 1284    return buf;
 1285 #undef size
 1286 }
 1287 
 1288 
 1289 bool tstring::readLine(FILE *file) {
 1290    char buf[1024];
 1291    
 1292    clear();
 1293    for(;;) {     
 1294       buf[sizeof(buf)-2] = '\n';
 1295       if(!fgets(buf, sizeof(buf), file)) break;
 1296       operator+=(buf);
 1297       if(buf[sizeof(buf)-2] == '\n') break;
 1298    }
 1299    if(rep->len) return true;
 1300    else    return false;
 1301 }
 1302 
 1303 
 1304 size_t tstring::write(FILE *file) const {
 1305    return fwrite(rep->data(), 1, rep->len, file);
 1306 }
 1307 
 1308 
 1309 size_t tstring::read(FILE *file, size_t l) {
 1310    rep->release();
 1311    rep = Rep::create(l);
 1312    int r = fread(rep->data(), 1, l, file);
 1313    rep->len = r;
 1314    rep->terminate();
 1315    return r;
 1316 }
 1317 
 1318 
 1319 int tstring::readFile(const char *filename) {
 1320    struct stat buf;
 1321 
 1322    if(stat(filename, &buf)) return -1; // does not exist
 1323    FILE *f=fopen(filename, "rb");
 1324    if(f == 0) return -2;                 // no permission?
 1325    int r = read(f, buf.st_size);
 1326    fclose(f);
 1327    if(r != buf.st_size) return -3;     // read error
 1328    return 0;
 1329 }
 1330 
 1331 
 1332 int tstring::writeFile(const char *filename) {
 1333    FILE *f = fopen(filename, "wb");
 1334    if(f == 0) return -2;                 // no permission?
 1335    int r = write(f);
 1336    fclose(f);
 1337    if(r != int(length())) return -3;     // write error
 1338    return 0;
 1339 }
 1340 
 1341 
 1342 tvector<tstring> loadTextFile(const char *fname) {
 1343    FILE *f = fopen(fname, "r");
 1344    if(f==0) throw TFileOperationErrnoException(fname, "fopen(mode='r')", errno);
 1345    tvector<tstring> r;
 1346    for(size_t i = 0; r[i].readLine(f); i++);
 1347    fclose(f);
 1348    r.pop_back();
 1349    return r;
 1350 }
 1351 
 1352 
 1353 tvector<tstring> loadTextFile(FILE *file) {
 1354    tvector<tstring> r;
 1355    for(size_t i = 0; r[i].readLine(file); i++);
 1356    r.pop_back();
 1357    return r;
 1358 }
 1359