"Fossies" - the Fresh Open Source Software Archive 
Member "qdiff-0.9.1/tstring.h" (21 Oct 2008, 16106 Bytes) of package /linux/privat/old/qdiff-0.9.1.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "tstring.h" see the
Fossies "Dox" file reference documentation.
1 /*GPL*START*
2 *
3 * tstring - NUL byte tolerant sophisticated string class
4 *
5 * Copyright (C) 1997-2001 by Johannes Overmann <Johannes.Overmann@gmx.de>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 * *GPL*END*/
21
22 #ifndef _ngw_tstring_h_
23 #define _ngw_tstring_h_
24
25 #include <stdio.h>
26 #include <stdarg.h>
27 #include <ctype.h>
28 #include <string.h>
29 #include <limits.h>
30 #include "tvector.h"
31 #include "texception.h"
32
33 using namespace std;
34
35 /**@name null tolerant string class */
36 /*@{*/
37 /// null tolerant string class
38 class tstring {
39 public:
40 // invalid iterator
41 static const size_t npos = static_cast<size_t>(-1);
42 // flags for scanToken()
43 enum {ALPHA=1, NUM=2, DIGIT=2, LOWER=4, UPPER=8, PRINT=16, XDIGIT=32,
44 SPACE=64, ALNUM=1|2, PUNCT=128, CNTRL=256, GRAPH=1024,
45 ALL=2048, NONE=0};
46 /// case flags for modify case
47 enum {NOT=0, CAPITALIZE=-1};
48 private:
49 // internal string representation
50 class Rep {
51 public:
52 size_t len; // length without term 0 byte
53 size_t mem; // allocated mem without term 0 byte
54 int ref; // reference count (>=1)
55 bool vulnerable; // true == always grab by clone, never by reference
56 // (the string has become vulnerable to the outside)
57 // char data[mem+1]; string data follows (+1 for term 0 byte)
58
59 // return pointer to string data
60 char *data() {return (char *)(this + 1);} // 'this + 1' means 'the byte following this object'
61 // character access
62 char& operator[] (size_t i) {return data()[i];}
63 // reference
64 Rep* grab() {if(vulnerable) return clone(); ++ref; return this;}
65 // dereference
66 void release() {if(--ref == 0) delete this;}
67 // copy this representation
68 Rep *clone(size_t minmem = 0);
69 // terminate string with 0 byte
70 void terminate() {*(data()+len) = 0;} // set term 0 byte
71
72 // static methods
73 // operator new for this class
74 static void * operator new (size_t size, size_t tmem) {
75 return ::operator new (size + tmem + 1);}
76 static void operator delete (void *p, size_t) {
77 ::operator delete (p); }
78
79 // create a new representation
80 static Rep *create(size_t tmem);
81
82 // return pointer to the null string representation
83 static Rep * nulRep() {if(nul == 0) createNulRep(); return nul;}
84
85 // return pointer to the zero string representation (string conatining a literal 0: "0" (and not "\0"))
86 static Rep * zeroRep() {if(zero == 0) createZeroRep(); return zero;}
87
88 // create null string representation
89 static void createNulRep();
90
91 // create zero string representation
92 static void createZeroRep();
93
94 private:
95 // static null string ("") representation
96 static Rep* nul;
97 static char nul_mem[];
98 // static zero string ("0") representation
99 static Rep* zero;
100 static char zero_mem[];
101
102 // forbid assignement
103 Rep& operator=(const Rep&);
104 };
105
106 public:
107 /**@name constructor & destructor */
108 /*@{*/
109 /// default construction
110 tstring(): rep(Rep::nulRep()->grab()) {}
111 /// copy construction
112 tstring(const tstring& a):rep(a.rep->grab()) {}
113 /// init from cstring
114 tstring(const char *s);
115 /// extract bytearray s of length len
116 tstring(const char *s, size_t len);
117 /// create string of chars c with length n
118 explicit tstring(char c, size_t n);
119 /// char to string conversion
120 explicit tstring(char c);
121 /// int to string conversion
122 explicit tstring(int i);
123 /// int to string conversion with format
124 explicit tstring(int i, const char *format);
125 /// double to string conversion
126 explicit tstring(double d, const char *format = "%g");
127 /// destructor
128 ~tstring() {rep->release();}
129 /*@}*/
130
131
132 /**@name main interface */
133 /*@{*/
134 /// return length in bytes
135 size_t len() const {return rep->len;}
136 /// return length in bytes
137 size_t length() const {return rep->len;}
138 /// return length in bytes
139 size_t size() const {return rep->len;}
140 /// clear string
141 void clear() {replaceRep(Rep::nulRep()->grab());}
142 /// explicit conversion to c string
143 // const char *operator*() const {return rep->data();}
144 /// explicit conversion to c string
145 const char *c_str() const {return rep->data();}
146 /// explicit conversion to c string
147 const char *data() const { return rep->data();}
148 /// direct raw data access: user with caution
149 char *rawdata() { invulnerableDetach(); return rep->data(); }
150 /// return true if string is empty, else false
151 bool empty() const {return rep->len == 0;}
152 /// append string
153 tstring& operator += (const tstring& a);
154 /// append cstring
155 tstring& operator += (const char *a);
156 /// append cstring
157 tstring& operator += (char c);
158 /// append byte array a of length len
159 tstring& append(const char *a, int alen);
160 /// assign string a to this
161 tstring& operator = (const tstring& a);
162 /// direct character access: const/readonly
163 char operator [] (size_t i) const;
164 /// direct character access: read/write
165 char& operator [] (size_t i);
166 /// substring extraction (len=end-start)
167 tstring substr(size_t start, size_t end = npos) const;
168 /// ASCII to number conversion
169 bool toLong(long& long_out, int base = 0) const;
170 bool toInt(int& int_out, int base = 0) const;
171 bool toDouble(double& double_out) const;
172 bool toBool(bool& bool_out) const;
173 /*@}*/
174
175
176 /**@name scanning */
177 /*@{*/
178 /// return a scanned token with scanner
179 tstring scanToken(size_t& scanner, int flags,
180 const char *allow=0, const char *forbid=0,
181 bool allow_quoted=false) const;
182 /// scan a token or quoted string to out with scanner
183 tstring scanString(size_t& scanner, int flags,
184 const char *allow=0, const char *forbid=0) const {
185 return scanToken(scanner, flags, allow, forbid, true);}
186 /// scan a token up to char upto
187 tstring scanUpTo(size_t& scanner, char upto) const {
188 int start(scanner);
189 while((scanner < rep->len)&&((*rep)[scanner]!=upto)) ++scanner;
190 return substr(start, scanner);}
191 /// scan a token to out up to chars upto
192 tstring scanUpTo(size_t& scanner, const char *upto) const {
193 int start(scanner);
194 while((scanner < rep->len)&&(strchr(upto, (*rep)[scanner])==0))
195 ++scanner;
196 return substr(start, scanner);}
197 /// return the rest of the scanned string
198 tstring scanRest(size_t& scanner) const {if(scanner < rep->len) {
199 int start(scanner);scanner=rep->len;return substr(start, scanner);
200 } return tstring();}
201 /// skip spaces
202 void skipSpace(size_t& scanner) const
203 {while((scanner < rep->len)&&isspace((*rep)[scanner]))++scanner;}
204 /// perhaps skip one char c
205 void perhapsSkipOneChar(size_t& scanner, char c) const
206 {if((scanner < rep->len)&&((*rep)[scanner]==c)) ++scanner;}
207 /// return true if the end of string (eos) is reached
208 bool scanEOS(size_t scanner) const
209 {if(scanner >= rep->len) return true; else return false;}
210
211
212 /// return the last character in the string or 0 if empty
213 char lastChar() const {return rep->len?(*rep)[rep->len-1]:0;}
214 /// return the first character in the string or 0 if empty
215 char firstChar() const {return (*rep)[0];}
216 /// return true if entire string consists of whitespace
217 bool consistsOfSpace() const;
218 /// return true if string has prefix
219 bool hasPrefix(const tstring& prefix) const;
220 /// return true if string has suffix
221 bool hasSuffix(const tstring& suffix) const;
222 /// return index of first occurence of char c or npos if not found
223 size_t firstOccurence(char c) const;
224 /// check whether char is contained or not
225 bool contains(char c) const { return firstOccurence(c) != npos; }
226 /// remove whitespace at beginning and end
227 void cropSpace();
228 /// remove whitespace at end
229 void cropSpaceEnd();
230 /// collapse whitespace
231 void collapseSpace();
232 /// replace char from with char to
233 void translateChar(char from, char to);
234 /// expand unprintable chars to C-style backslash sequences
235 void expandUnprintable(char quotes = 0);
236 /// backslashify backslash and quotes
237 void backslashify();
238 /// compile C-style backslash sequences back to unprintable chars
239 void compileCString();
240 /// truncate to maximal length max
241 void truncate(size_t max);
242 /// replace unprintable characters for safe printing
243 void replaceUnprintable(bool only_ascii = true);
244 /**
245 remove quotes
246 @param allow_bslash true == backslashing allowed to protect quotes
247 @param crop_space true == remove leading/trailing spaces not protected by quotes
248 */
249 void unquote(bool allow_bslash = true, bool crop_space = true);
250 /// return and remove the first words that fit into a string of length max
251 tstring getFitWords(size_t max); // throw(InvalidWidth);
252 /// remove the first words that fit into a string of length max and return in block format
253 tstring getFitWordsBlock(size_t max); // throw(InvalidWidth);
254 /// remove html tags (level == number of open brakets before call, init:0)
255 void removeHTMLTags(int& level);
256 /*@}*/
257
258 /**@name search/replace */
259 /*@{*/
260 /// replace substring search with replace, return number of replacements (not regexp, use TRegEx to match regular expressions)
261 int searchReplace(const tstring& search, const tstring& replace,
262 bool ignore_case=false, bool whole_words=false,
263 bool preserve_case=false, int progress=0,
264 const tstring& pre_padstring=tstring(),
265 const tstring& post_padstring=tstring(), tvector<int> *match_pos=0, int max_num = INT_MAX);
266 /// return number of occurences of pat (not regexp) returns -1 on empty pat
267 int search(const tstring& pat,
268 bool ignore_case=false, bool whole_words=false,
269 int progress=0, tvector<int> *match_pos=0) const; // throw(StringIsEmpty);
270 /// replace substring
271 void replace(size_t start, size_t len, const tstring &str);
272 /*@}*/
273
274 /**@name file I/O */
275 /*@{*/
276 /// read line from file like fgets, no line length limit
277 bool readLine(FILE *file);
278 /// write string to file, return number of bytes written
279 size_t write(FILE *file) const;
280 /// read len bytes from file to string, return bytes read
281 size_t read(FILE *file, size_t len); // throw(InvalidWidth);
282 /// read whole file into one string, return 0 on success -x on error
283 int readFile(const char *filename);
284 /// write string into file, return 0 on success -x on error
285 int writeFile(const char *filename);
286 /*@}*/
287
288 /**@name filename manipulation */
289 /*@{*/
290 /// remove leading path from filename
291 void extractFilename();
292 /// remove part after last slash
293 void extractPath();
294 /// add a slash at the end if it is missing
295 void addDirSlash();
296 /// remove last char if last char is a slash
297 void removeDirSlash();
298 /// extract part after the last dot (empty string if no extension, leading dot is ignored)
299 void extractFilenameExtension();
300 /// make paths comparable (kill multislash, dots and resolve '..')
301 void normalizePath();
302 /// check for absolute path
303 bool isAbsolutePath() const {if((*rep)[0]=='/') return true; return false;}
304 /// get truncated filename (for printing puroses)
305 tstring shortFilename(size_t maxchar) const;
306 /*@}*/
307
308 /**@name misc */
309 /*@{*/
310 /// get percentage of nonprintable and nonspace chars (0.0 .. 100.0)
311 double binaryPercentage() const;
312 /// check for 0 in string (then its not a real cstring anymore)
313 bool containsNulChar() const;
314 /// get a pointer to the at most max last chars (useful for printf)
315 const char *pSuf(size_t max) const;
316 /// sprintf into this string
317 void sprintf(const char *format, ...);
318 /*@}*/
319
320 /**@name case */
321 /*@{*/
322 /// convert to lower case
323 void lower();
324 /// convert to upper case
325 void upper();
326 /// convert to lower case, first char upper case
327 void capitalize();
328 /// check for lower case, empty string returns false
329 bool isLower() const;
330 /// check for upper case, empty string returns false
331 bool isUpper() const;
332 /// check for capitalized case, empty string returns false
333 bool isCapitalized() const;
334 /*@}*/
335
336 public:
337 /**@name detach methods */
338 /*@{*/
339 /// detach from string pool, you should never need to call this
340 void detach();
341 // no, there is *not* a dangling pointer here (ref > 1)
342 /** detach from string pool and make sure at least minsize bytes of mem are available
343 (use this before the dirty version sprintf to make it clean)
344 (use this before the clean version sprintf to make it fast)
345 */
346 void detachResize(size_t minsize);
347 /// detach from string pool and declare that string might be externally modified (the string has become vulnerable)
348 void invulnerableDetach();
349 /*@}*/
350
351 private:
352 // hidden string representation
353 Rep *rep;
354
355 // private methods
356 void replaceRep(Rep *p) {rep->release(); rep = p;}
357
358 public:
359 // compare helpers
360 static int _string_cmp(const tstring& s1, const tstring& s2);
361 static bool _string_equ(const tstring& s1, const tstring& s2);
362 };
363
364
365
366
367 /**@name concat operators */
368 /*@{*/
369 ///
370 tstring operator + (const tstring& s1, const tstring& s2);
371 ///
372 tstring operator + (const char *s1, const tstring& s2);
373 ///
374 tstring operator + (const tstring& s1, const char *s2);
375 ///
376 tstring operator + (char s1, const tstring& s2);
377 ///
378 tstring operator + (const tstring& s1, char s2);
379 /*@}*/
380
381
382
383 /**@name compare operators */
384 /*@{*/
385 ///
386 bool operator == (const tstring& s1, const tstring& s2);
387 ///
388 bool operator == (const tstring& s1, const char *s2);
389 ///
390 bool operator == (const char *s1, const tstring& s2);
391 ///
392 bool operator != (const tstring& s1, const tstring& s2);
393 ///
394 bool operator != (const tstring& s1, const char *s2);
395 ///
396 bool operator != (const char *s1, const tstring& s2);
397 ///
398 bool operator < (const tstring& s1, const tstring& s2);
399 ///
400 bool operator < (const tstring& s1, const char *s2);
401 ///
402 bool operator < (const char *s1, const tstring& s2);
403 ///
404 bool operator > (const tstring& s1, const char *s2);
405 ///
406 bool operator > (const char *s1, const tstring& s2);
407 ///
408 bool operator > (const tstring& s1, const tstring& s2);
409 /*@}*/
410
411
412 /**@name misc friends and nonmembers */
413 /*@{*/
414 /// split string into pieces by characters in c-str separator
415 tvector<tstring> split(const tstring& s, const char *separator,
416 bool allow_quoting=false,
417 bool crop_space=false);
418
419 /// join, reverse the effect of split
420 tstring join(const tvector<tstring>& a, const tstring& separator);
421
422 /// try to preserve case from 'from' to 'to' and return altered 'to' with case from 'from'
423 tstring preserveCase(const tstring& from, const tstring& to);
424
425 /// modify case
426 inline tstring modifyCase(const tstring& s, int _case) {
427 tstring r(s);
428 switch(_case) {
429 case tstring::UPPER: r.upper(); break;
430 case tstring::LOWER: r.lower(); break;
431 case tstring::CAPITALIZE: r.capitalize(); break;
432 default: break;
433 }
434 return r;
435 }
436
437 /// Create progress bar
438 const char *progressBar(const char *message = 0, unsigned int n = 0, unsigned int max = 0, int width = 79);
439
440 /// load text file to array of strings
441 tvector<tstring> loadTextFile(const char *fname);
442 /// load text file to array of strings
443 tvector<tstring> loadTextFile(FILE *file);
444
445 /*@}*/
446 /*@}*/
447
448 #endif /* _ngw_tstring_h_ */