"Fossies" - the Fresh Open Source Software Archive

Member "qdiff-0.9.1/tstring.h" (21 Oct 2008, 16106 Bytes) of package /linux/privat/old/qdiff-0.9.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "tstring.h" see the Fossies "Dox" file reference documentation.

    1 /*GPL*START*
    2  * 
    3  * tstring - NUL byte tolerant sophisticated string class
    4  * 
    5  * Copyright (C) 1997-2001 by Johannes Overmann <Johannes.Overmann@gmx.de>
    6  * 
    7  * This program is free software; you can redistribute it and/or modify
    8  * it under the terms of the GNU General Public License as published by
    9  * the Free Software Foundation; either version 2 of the License, or
   10  * (at your option) any later version.
   11  * 
   12  * This program is distributed in the hope that it will be useful,
   13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   15  * GNU General Public License for more details.
   16  * 
   17  * You should have received a copy of the GNU General Public License
   18  * along with this program; if not, write to the Free Software
   19  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
   20  * *GPL*END*/  
   21 
   22 #ifndef _ngw_tstring_h_
   23 #define _ngw_tstring_h_
   24 
   25 #include <stdio.h>
   26 #include <stdarg.h>
   27 #include <ctype.h>
   28 #include <string.h>
   29 #include <limits.h>
   30 #include "tvector.h"
   31 #include "texception.h"
   32 
   33 using namespace std;
   34 
   35 /**@name null tolerant string class */
   36 /*@{*/
   37 /// null tolerant string class
   38 class tstring {
   39  public:
   40    // invalid iterator
   41    static const size_t npos = static_cast<size_t>(-1);
   42    // flags for scanToken()
   43    enum {ALPHA=1, NUM=2, DIGIT=2, LOWER=4, UPPER=8, PRINT=16, XDIGIT=32, 
   44       SPACE=64, ALNUM=1|2, PUNCT=128, CNTRL=256, GRAPH=1024,
   45       ALL=2048, NONE=0};
   46    /// case flags for modify case
   47    enum {NOT=0, CAPITALIZE=-1};
   48  private:
   49    // internal string representation
   50    class Rep {
   51     public:
   52       size_t len; // length without term 0 byte
   53       size_t mem; // allocated mem without term 0 byte
   54       int ref; // reference count (>=1)
   55       bool vulnerable; // true == always grab by clone, never by reference
   56       //                  (the string has become vulnerable to the outside)
   57       // char data[mem+1]; string data follows (+1 for term 0 byte)
   58       
   59       // return pointer to string data
   60       char *data() {return (char *)(this + 1);} // 'this + 1' means 'the byte following this object'
   61       // character access
   62       char& operator[] (size_t i) {return data()[i];}
   63       // reference
   64       Rep* grab() {if(vulnerable) return clone(); ++ref; return this;}
   65       // dereference
   66       void release() {if(--ref == 0) delete this;}
   67       // copy this representation
   68       Rep *clone(size_t minmem = 0);
   69       // terminate string with 0 byte
   70       void terminate() {*(data()+len) = 0;} // set term 0 byte
   71       
   72       // static methods
   73       // operator new for this class
   74       static void * operator new (size_t size, size_t tmem) {
   75      return ::operator new (size + tmem + 1);}
   76       static void operator delete (void *p, size_t) {
   77      ::operator delete (p); }
   78       
   79       // create a new representation
   80       static Rep *create(size_t tmem);
   81             
   82       // return pointer to the null string representation
   83       static Rep * nulRep() {if(nul == 0) createNulRep(); return nul;}
   84 
   85       // return pointer to the zero string representation (string conatining a literal 0: "0" (and not "\0"))
   86       static Rep * zeroRep() {if(zero == 0) createZeroRep(); return zero;}
   87      
   88       // create null string representation
   89       static void createNulRep();
   90       
   91       // create zero string representation
   92       static void createZeroRep();
   93 
   94     private:
   95       // static null string ("") representation
   96       static Rep* nul;
   97       static char nul_mem[];
   98       // static zero string ("0") representation
   99       static Rep* zero;
  100       static char zero_mem[];
  101       
  102       // forbid assignement
  103       Rep& operator=(const Rep&);
  104    };
  105    
  106  public:
  107    /**@name constructor & destructor */
  108    /*@{*/
  109    /// default construction
  110    tstring(): rep(Rep::nulRep()->grab()) {}
  111    /// copy construction
  112    tstring(const tstring& a):rep(a.rep->grab()) {}
  113    /// init from cstring
  114    tstring(const char *s);
  115    /// extract bytearray s of length len 
  116    tstring(const char *s, size_t len);
  117    /// create string of chars c with length n
  118    explicit tstring(char c, size_t n);
  119    /// char to string conversion
  120    explicit tstring(char c);
  121    /// int to string conversion
  122    explicit tstring(int i);
  123    /// int to string conversion with format
  124    explicit tstring(int i, const char *format);
  125    /// double to string conversion
  126    explicit tstring(double d, const char *format = "%g");
  127    /// destructor
  128    ~tstring() {rep->release();}
  129    /*@}*/
  130       
  131    
  132    /**@name main interface */
  133    /*@{*/
  134    /// return length in bytes
  135    size_t len() const {return rep->len;}
  136    /// return length in bytes
  137    size_t length() const {return rep->len;}
  138    /// return length in bytes
  139    size_t size() const {return rep->len;}
  140    /// clear string
  141    void clear() {replaceRep(Rep::nulRep()->grab());}
  142    /// explicit conversion to c string
  143    // const char *operator*() const {return rep->data();}
  144    /// explicit conversion to c string
  145    const char *c_str() const {return rep->data();}
  146    /// explicit conversion to c string
  147    const char *data() const { return rep->data();}
  148    /// direct raw data access: user with caution
  149    char *rawdata() { invulnerableDetach(); return rep->data(); }
  150    /// return true if string is empty, else false
  151    bool empty() const {return rep->len == 0;}
  152    /// append string
  153    tstring& operator += (const tstring& a);
  154    /// append cstring
  155    tstring& operator += (const char *a);
  156    /// append cstring
  157    tstring& operator += (char c);
  158    /// append byte array a of length len
  159    tstring& append(const char *a, int alen);
  160    /// assign string a to this
  161    tstring& operator = (const tstring& a);
  162    /// direct character access: const/readonly
  163    char operator [] (size_t i) const;
  164    /// direct character access: read/write
  165    char& operator [] (size_t i);
  166    /// substring extraction (len=end-start)
  167    tstring substr(size_t start, size_t end = npos) const;
  168    /// ASCII to number conversion
  169    bool toLong(long& long_out, int base = 0) const;
  170    bool toInt(int& int_out, int base = 0) const;
  171    bool toDouble(double& double_out) const;
  172    bool toBool(bool& bool_out) const;
  173    /*@}*/
  174    
  175       
  176    /**@name scanning */
  177    /*@{*/
  178    /// return a scanned token with scanner
  179    tstring scanToken(size_t& scanner, int flags, 
  180           const char *allow=0, const char *forbid=0, 
  181           bool allow_quoted=false) const;
  182    /// scan a token or quoted string to out with scanner
  183    tstring scanString(size_t& scanner, int flags, 
  184           const char *allow=0, const char *forbid=0) const {
  185              return scanToken(scanner, flags, allow, forbid, true);}
  186    /// scan a token up to char upto
  187    tstring scanUpTo(size_t& scanner, char upto) const {
  188       int start(scanner);
  189       while((scanner < rep->len)&&((*rep)[scanner]!=upto)) ++scanner;
  190       return substr(start, scanner);}
  191    /// scan a token to out up to chars upto
  192    tstring scanUpTo(size_t& scanner, const char *upto) const {
  193       int start(scanner);
  194       while((scanner < rep->len)&&(strchr(upto, (*rep)[scanner])==0))
  195     ++scanner;
  196       return substr(start, scanner);}
  197    /// return the rest of the scanned string
  198    tstring scanRest(size_t& scanner) const {if(scanner < rep->len) {
  199       int start(scanner);scanner=rep->len;return substr(start, scanner);
  200    } return tstring();}   
  201    /// skip spaces
  202    void skipSpace(size_t& scanner) const
  203    {while((scanner < rep->len)&&isspace((*rep)[scanner]))++scanner;}
  204    /// perhaps skip one char c
  205    void perhapsSkipOneChar(size_t& scanner, char c) const 
  206    {if((scanner < rep->len)&&((*rep)[scanner]==c)) ++scanner;}
  207    /// return true if the end of string (eos) is reached
  208    bool scanEOS(size_t scanner) const
  209    {if(scanner >= rep->len) return true; else return false;}
  210    
  211    
  212    /// return the last character in the string or 0 if empty
  213    char lastChar() const {return rep->len?(*rep)[rep->len-1]:0;}
  214    /// return the first character in the string or 0 if empty
  215    char firstChar() const {return (*rep)[0];}
  216    /// return true if entire string consists of whitespace
  217    bool consistsOfSpace() const;
  218    /// return true if string has prefix 
  219    bool hasPrefix(const tstring& prefix) const;
  220    /// return true if string has suffix 
  221    bool hasSuffix(const tstring& suffix) const;
  222    /// return index of first occurence of char c or npos if not found
  223    size_t firstOccurence(char c) const;
  224    /// check whether char is contained or not
  225    bool contains(char c) const { return firstOccurence(c) != npos; }
  226    /// remove whitespace at beginning and end 
  227    void cropSpace();
  228    /// remove whitespace at end
  229    void cropSpaceEnd();
  230    /// collapse whitespace 
  231    void collapseSpace();
  232    /// replace char from with char to
  233    void translateChar(char from, char to);
  234    /// expand unprintable chars to C-style backslash sequences
  235    void expandUnprintable(char quotes = 0);
  236    /// backslashify backslash and quotes 
  237    void backslashify();
  238    /// compile C-style backslash sequences back to unprintable chars
  239    void compileCString();
  240    /// truncate to maximal length max
  241    void truncate(size_t max);
  242    /// replace unprintable characters for safe printing
  243    void replaceUnprintable(bool only_ascii = true);
  244    /**
  245     remove quotes
  246     @param allow_bslash true == backslashing allowed to protect quotes
  247     @param crop_space   true == remove leading/trailing spaces not protected by quotes
  248     */
  249    void unquote(bool allow_bslash = true, bool crop_space = true);
  250    /// return and remove the first words that fit into a string of length max
  251    tstring getFitWords(size_t max); // throw(InvalidWidth);
  252    /// remove the first words that fit into a string of length max and return in block format
  253    tstring getFitWordsBlock(size_t max); // throw(InvalidWidth);
  254    /// remove html tags (level == number of open brakets before call, init:0)
  255    void removeHTMLTags(int& level);
  256    /*@}*/
  257       
  258    /**@name search/replace */
  259    /*@{*/
  260    /// replace substring search with replace, return number of replacements (not regexp, use TRegEx to match regular expressions)
  261    int searchReplace(const tstring& search, const tstring& replace,
  262              bool ignore_case=false, bool whole_words=false, 
  263              bool preserve_case=false, int progress=0,
  264              const tstring& pre_padstring=tstring(), 
  265              const tstring& post_padstring=tstring(), tvector<int> *match_pos=0, int max_num = INT_MAX);
  266    /// return number of occurences of pat (not regexp) returns -1 on empty pat
  267    int search(const tstring& pat, 
  268           bool ignore_case=false, bool whole_words=false,
  269           int progress=0, tvector<int> *match_pos=0) const; // throw(StringIsEmpty);
  270    /// replace substring
  271    void replace(size_t start, size_t len, const tstring &str);
  272    /*@}*/
  273       
  274    /**@name file I/O */
  275    /*@{*/
  276    /// read line from file like fgets, no line length limit
  277    bool readLine(FILE *file);
  278    /// write string to file, return number of bytes written
  279    size_t write(FILE *file) const;
  280    /// read len bytes from file to string, return bytes read
  281    size_t read(FILE *file, size_t len); // throw(InvalidWidth);
  282    /// read whole file into one string, return 0 on success -x on error
  283    int readFile(const char *filename);
  284    /// write string into file, return 0 on success -x on error
  285    int writeFile(const char *filename);
  286    /*@}*/
  287    
  288    /**@name filename manipulation */
  289    /*@{*/
  290    /// remove leading path from filename
  291    void extractFilename();
  292    /// remove part after last slash
  293    void extractPath();   
  294    /// add a slash at the end if it is missing
  295    void addDirSlash();
  296    /// remove last char if last char is a slash
  297    void removeDirSlash();      
  298    /// extract part after the last dot (empty string if no extension, leading dot is ignored)
  299    void extractFilenameExtension();
  300    /// make paths comparable (kill multislash, dots and resolve '..')
  301    void normalizePath();
  302    /// check for absolute path
  303    bool isAbsolutePath() const {if((*rep)[0]=='/') return true; return false;}
  304    /// get truncated filename (for printing puroses)
  305    tstring shortFilename(size_t maxchar) const;
  306    /*@}*/
  307    
  308    /**@name misc */
  309    /*@{*/
  310    /// get percentage of nonprintable and nonspace chars (0.0 .. 100.0)
  311    double binaryPercentage() const;
  312    /// check for 0 in string (then its not a real cstring anymore)
  313    bool containsNulChar() const;
  314    /// get a pointer to the at most max last chars (useful for printf)
  315    const char *pSuf(size_t max) const;
  316    /// sprintf into this string
  317    void sprintf(const char *format, ...);
  318    /*@}*/
  319    
  320    /**@name case */
  321    /*@{*/
  322    /// convert to lower case
  323    void lower();
  324    /// convert to upper case
  325    void upper();
  326    /// convert to lower case, first char upper case
  327    void capitalize();
  328    /// check for lower case, empty string returns false      
  329    bool isLower() const;
  330    /// check for upper case, empty string returns false      
  331    bool isUpper() const;
  332    /// check for capitalized case, empty string returns false      
  333    bool isCapitalized() const;
  334    /*@}*/
  335       
  336  public:
  337    /**@name detach methods */
  338    /*@{*/
  339    /// detach from string pool, you should never need to call this
  340    void detach();
  341    // no, there is *not* a dangling pointer here (ref > 1)
  342    /** detach from string pool and make sure at least minsize bytes of mem are available
  343     (use this before the dirty version sprintf to make it clean)
  344     (use this before the clean version sprintf to make it fast)
  345     */
  346    void detachResize(size_t minsize);
  347    /// detach from string pool and declare that string might be externally modified (the string has become vulnerable)
  348    void invulnerableDetach();
  349    /*@}*/
  350    
  351  private:
  352    // hidden string representation
  353    Rep *rep;
  354    
  355    // private methods
  356    void replaceRep(Rep *p) {rep->release(); rep = p;}
  357 
  358  public:
  359    // compare helpers
  360    static int _string_cmp(const tstring& s1, const tstring& s2);
  361    static bool _string_equ(const tstring& s1, const tstring& s2);
  362 };
  363 
  364 
  365 
  366 
  367 /**@name concat operators */
  368 /*@{*/
  369 ///
  370 tstring operator + (const tstring& s1, const tstring& s2);
  371 ///
  372 tstring operator + (const char *s1, const tstring& s2);
  373 ///
  374 tstring operator + (const tstring& s1, const char *s2);
  375 ///
  376 tstring operator + (char s1, const tstring& s2);
  377 ///
  378 tstring operator + (const tstring& s1, char s2);
  379 /*@}*/
  380 
  381 
  382 
  383 /**@name compare operators */
  384 /*@{*/
  385 ///
  386 bool operator == (const tstring& s1, const tstring& s2);
  387 ///
  388 bool operator == (const tstring& s1, const char   *s2);
  389 ///
  390 bool operator == (const char   *s1, const tstring& s2);
  391 ///
  392 bool operator != (const tstring& s1, const tstring& s2);
  393 ///
  394 bool operator != (const tstring& s1, const char   *s2);
  395 ///
  396 bool operator != (const char   *s1, const tstring& s2);
  397 ///
  398 bool operator <  (const tstring& s1, const tstring& s2);
  399 ///
  400 bool operator <  (const tstring& s1, const char   *s2);
  401 ///
  402 bool operator <  (const char   *s1, const tstring& s2);
  403 ///
  404 bool operator >  (const tstring& s1, const char   *s2);
  405 ///
  406 bool operator >  (const char   *s1, const tstring& s2);
  407 ///
  408 bool operator >  (const tstring& s1, const tstring& s2);
  409 /*@}*/
  410 
  411 
  412 /**@name misc friends and nonmembers */
  413 /*@{*/
  414 /// split string into pieces by characters in c-str separator
  415 tvector<tstring> split(const tstring& s, const char *separator,
  416              bool allow_quoting=false,
  417              bool crop_space=false);
  418 
  419 /// join, reverse the effect of split
  420 tstring join(const tvector<tstring>& a, const tstring& separator);
  421 
  422 /// try to preserve case from 'from' to 'to' and return altered 'to' with case from 'from'
  423 tstring preserveCase(const tstring& from, const tstring& to);
  424 
  425 /// modify case 
  426 inline tstring modifyCase(const tstring& s, int _case) {
  427    tstring r(s);
  428    switch(_case) {
  429     case tstring::UPPER:      r.upper(); break;
  430     case tstring::LOWER:      r.lower(); break;
  431     case tstring::CAPITALIZE: r.capitalize(); break;
  432     default: break;      
  433    }
  434    return r;
  435 }
  436 
  437 /// Create progress bar
  438 const char *progressBar(const char *message = 0, unsigned int n = 0, unsigned int max = 0, int width = 79);
  439 
  440 /// load text file to array of strings
  441 tvector<tstring> loadTextFile(const char *fname);
  442 /// load text file to array of strings
  443 tvector<tstring> loadTextFile(FILE *file);
  444 
  445 /*@}*/
  446 /*@}*/
  447 
  448 #endif /* _ngw_tstring_h_ */