"Fossies" - the Fresh Open Source Software Archive 
Member "qdiff-0.9.1/tstring.cc" (21 Oct 2008, 34818 Bytes) of package /linux/privat/old/qdiff-0.9.1.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "tstring.cc" see the
Fossies "Dox" file reference documentation.
1 /*GPL*START*
2 *
3 * NUL byte safe string implementation
4 *
5 * Copyright (C) 1997-2001 by Johannes Overmann <Johannes.Overmann@gmx.de>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 * *GPL*END*/
21
22 #include <stdlib.h>
23 #include <sys/stat.h>
24 #include <unistd.h>
25 #include <errno.h>
26 #include "tstring.h"
27 #include "texception.h"
28
29
30 // todo:
31 // - make Split,Unquote,ReadLine,extractFilename,extractPath 0 byte safe
32 // - separat functions using tvector<> for better modularity
33
34
35 // 1997:
36 // 01:45 11 Jun split(): backslash behavior fixed (601 lines)
37 // 23:50 11 Jun strings may contain 0 bytes
38 // 12:00 19 Jun some filename extracting added
39 // 17:00 19 Jun more sophisticated search: ignore_case and whole_words
40 // 02:00 08 Jul substring extraction via operator() (start,end)
41 // 02:00 31 Jul new ContainsNulChar, new ReadFile, fixed \ \\ in ExpUnPrint
42 // 12:00 08 Aug new Upper Lower Capitalize
43 // 23:30 19 Aug improved collapseSpace()
44 // 00:00 27 Aug cropSpace() bug fixed (1 byte out of bound zero write)
45 // 20:00 30 Aug now cons accept 0 pointer as empty string
46 // 21:00 30 Aug addDirSlash() added (809 lines)
47 // 13:00 02 Sep isLower ... added, preserve_case for SearchReplace added (867)
48 // 23:45 16 Dec normalizePath() added
49 // 15:00 24 Dec started conversion to Rep reference model
50 // 18:00 27 Dec finished. debugging starts ... :)
51
52 // 1998:
53 // 00:30 09 Jan scanTools started (cc=817) (h=462)
54 // 00:05 12 Jan compare operators fixed (0 byte ...)
55 // 19:00 09 Oct zeroRep and fast string(int i) for i=0
56 // 14:30 10 Oct xc16emu emuwid.s problem solved: memset()
57 // 14:36 10 Oct string(0) 80 times faster than string(1)! (zero_rep)
58 // 01:53 17 Oct createNulRep and createZeroRep non inline
59
60 // 1999:
61 // 14:55 31 Jan +=string speedup for empty string (cc=919, h=532)
62 // 15:08 31 Jan searchReplace: pre/post_padstring added
63 // 00:36 03 Feb getFitWordsBlock added (954)
64 // 23:02 04 Feb search/searchReplace match_pos added (954)
65 // 23:49 15 Feb class string renamed to class tstring, tappframe simplified (1003)
66 // 00:46 16 Feb toLong/toDouble/toInt/toBool added (from old str2value.cc) (1016)
67 // 23:51 03 Mar cropSpaceEnd added, getFitWords space semantics change
68 // 23:46 13 Apr trelops.h replaces != and > operator (1034)
69 // 00:31 16 Apr started: replace fatalErrors by exceptions
70 // 23:48 20 Aug remove html tags added
71 // 22:17 09 Dec added operator != and > because trelops will not instantiate them for two different types
72
73 // 2000:
74 // 23:30 30 Jun loop changed from while(1) to for(;;) ;-)
75 // 22:50 01 Jul toInt/Long pointer p initialized to 0, quotes feature added to expandUnprintable
76 // 22:00 06 Jul progressBar() added
77
78 // 2001:
79 // 00:15 08 Feb extractPath now removed trailing slash (1090 lines)
80 // 00:45 15 Mar searchReplace max_num parameter added
81 // 22:00 18 Sep palmos fixes
82
83 // 2002:
84 // 22:25 08 Apr expandUnpritable: allow high ISO graphical characters (ASCII 161-255), better nul_mem and zero_mem sizes for 64 bit systems
85
86 // 2003:
87 // 22:20 27 Jan length of nul_mem and zero_mem fixed
88
89 // 2006:
90 // 27 Jul: palmos support removed
91
92
93 // global static null and zero rep members
94 tstring::Rep* tstring::Rep::nul = 0;
95 char tstring::Rep::nul_mem[sizeof(Rep) + 1];
96 tstring::Rep* tstring::Rep::zero = 0;
97 char tstring::Rep::zero_mem[sizeof(Rep) + 2];
98
99
100 // non inline Rep implementations
101
102 // copy this representation
103 tstring::Rep *tstring::Rep::clone(size_t minmem) {
104 Rep *p = create(minmem >= len ? minmem : len);
105 p->len = len;
106 memcpy(p->data(), data(), len+1);
107 return p;
108 }
109
110 // create a new representation
111 tstring::Rep *tstring::Rep::create(size_t tmem) {
112 size_t m = sizeof(Rep) << 1;
113 while((m - 1 - sizeof(Rep)) < tmem) m <<= 1;
114 Rep *p = new (m - 1 - sizeof(Rep)) Rep;
115 p->mem = m - 1 - sizeof(Rep); p->ref = 1; p->vulnerable = false;
116 return p;
117 }
118
119 // create null string representation
120 void tstring::Rep::createNulRep() {
121 nul = (Rep *)nul_mem;
122 nul->len = 0;
123 nul->mem = 0;
124 nul->ref = 1; // never modify/delete static object
125 nul->vulnerable = false;
126 nul->terminate();
127 }
128
129 // create zero string representation
130 void tstring::Rep::createZeroRep() {
131 zero = (Rep *)zero_mem;
132 zero->len = 1;
133 zero->mem = 1;
134 zero->ref = 1; // never modify/delete static object
135 zero->vulnerable = false;
136 (*zero)[0] = '0';
137 zero->terminate();
138 }
139
140
141 // non inline string implelentation
142
143 tstring::tstring(const char *s):rep(0) {
144 if(s){
145 int l = strlen(s);
146 rep = Rep::create(l);
147 rep->len = l;
148 strcpy(rep->data(), s);
149 } else rep = Rep::nulRep()->grab();
150 }
151
152
153 tstring::tstring(const char *s, size_t l):rep(0) {
154 if(s && (l > 0)) {
155 rep = Rep::create(l);
156 rep->len = l;
157 memcpy(rep->data(), s, l);
158 rep->terminate();
159 } else rep = Rep::nulRep()->grab();
160 }
161
162
163 tstring::tstring(char c, size_t n):rep(0) {
164 if(n) {
165 rep = Rep::create(n);
166 rep->len = n;
167 if(n) memset(rep->data(), c, n);
168 rep->terminate();
169 } else rep = Rep::nulRep()->grab();
170 }
171
172
173 tstring::tstring(char c):rep(0) {
174 rep = Rep::create(1);
175 rep->len = 1;
176 (*rep)[0] = c;
177 rep->terminate();
178 }
179
180
181 tstring::tstring(int i):rep((i==0)?(Rep::zeroRep()->grab()):(Rep::nulRep()->grab())) {
182 if(i) sprintf("%d", i);
183 }
184
185
186 tstring::tstring(int i, const char *format):rep(Rep::nulRep()->grab()) {
187 sprintf(format, i);
188 }
189
190
191 tstring::tstring(double d, const char *format):rep(Rep::nulRep()->grab()) {
192 sprintf(format, d);
193 }
194
195
196
197 tstring operator + (const tstring& s1, const tstring& s2) {
198 tstring r(s1); r += s2; return r; }
199 tstring operator + (const char *s1, const tstring& s2) {
200 tstring r(s1); r += s2; return r; }
201 tstring operator + (const tstring& s1, const char *s2) {
202 tstring r(s1); r += s2; return r; }
203 tstring operator + (char s1, const tstring& s2) {
204 tstring r(s1); r += s2; return r; }
205 tstring operator + (const tstring& s1, char s2) {
206 tstring r(s1); r += tstring(s2); return r; }
207
208 bool operator == (const tstring& s1, const tstring& s2) {return tstring::_string_equ(s1, s2);}
209 bool operator == (const tstring& s1, const char *s2) {return (strcmp(s1.c_str(), s2)==0);}
210 bool operator == (const char *s1, const tstring& s2) {return (strcmp(s1, s2.c_str())==0);}
211 bool operator != (const tstring& s1, const tstring& s2) {return !tstring::_string_equ(s1, s2);}
212 bool operator != (const tstring& s1, const char *s2) {return (strcmp(s1.c_str(), s2)!=0);}
213 bool operator != (const char *s1, const tstring& s2) {return (strcmp(s1, s2.c_str())!=0);}
214 bool operator < (const tstring& s1, const tstring& s2) {return (tstring::_string_cmp(s1, s2) < 0);}
215 bool operator < (const tstring& s1, const char *s2) {return (strcmp(s1.c_str(), s2) < 0);}
216 bool operator < (const char *s1, const tstring& s2) {return (strcmp(s1, s2.c_str()) < 0);}
217 bool operator > (const tstring& s1, const char *s2) {return (strcmp(s1.c_str(), s2) > 0);}
218 bool operator > (const char *s1, const tstring& s2) {return (strcmp(s1, s2.c_str()) > 0);}
219 bool operator > (const tstring& s1, const tstring& s2) {return (tstring::_string_cmp(s1, s2) > 0);}
220
221 /// append string
222 tstring& tstring::operator += (const tstring& a) {if(!a.empty()) {append(a.rep->data(), a.rep->len);} return *this;}
223 /// append cstring
224 tstring& tstring::operator += (const char *a) {if(a) append(a, strlen(a)); return *this;}
225 /// append cstring
226 tstring& tstring::operator += (char c) {detachResize(rep->len + 1); (*rep)[rep->len++]=c; (*rep)[rep->len]=0; return *this;}
227 /// append byte array a of length len
228 tstring& tstring::append(const char *a, int alen) {
229 if(a) {
230 detachResize(rep->len + alen);
231 memcpy(rep->data() + rep->len, a, alen);
232 rep->len += alen;
233 rep->terminate();
234 }
235 return *this;
236 }
237 /// assign string a to this
238 tstring& tstring::operator = (const tstring& a)
239 {if(&a != this) {rep->release(); rep = a.rep->grab();} return *this;}
240 /// direct character access: const/readonly
241 char tstring::operator [] (size_t i) const /* throw(IndexOutOfRange) */ {
242 if(i <= rep->len) return (*rep)[i];
243 else return 0;
244 }
245 /// direct character access: read/write
246 char& tstring::operator[](size_t i) {
247 if(i < rep->len) {detach(); return (*rep)[i];}
248 detachResize(i + 1);
249 for(; rep->len <= i; rep->len++) (*rep)[rep->len] = 0;
250 return (*rep)[i];
251 }
252
253 /// substring extraction (len=end-start)
254 tstring tstring::substr(size_t start, size_t end) const /* throw(InvalidRange) */ {
255 if((end == npos) || (end > rep->len)) end = rep->len;
256 if(start > rep->len) start = rep->len;
257 if(start > end) start = end;
258 return tstring(rep->data()+start, end-start);
259 }
260
261 // compare helpers
262 int tstring::_string_cmp(const tstring& s1, const tstring& s2) {
263 int r = memcmp(s1.rep->data(), s2.rep->data(), s1.rep->len <= s2.rep->len ? s1.rep->len : s2.rep->len);
264 if(r) return r;
265 if(s1.rep->len > s2.rep->len) return +1;
266 if(s1.rep->len < s2.rep->len) return -1;
267 return 0;
268 }
269
270 bool tstring::_string_equ(const tstring& s1, const tstring& s2) {
271 if(s1.rep->len != s2.rep->len) return false;
272 return memcmp(s1.rep->data(), s2.rep->data(), s1.rep->len)==0;
273 }
274
275 /// detach from string pool, you should never need to call this
276 void tstring::detach() { if(rep->ref > 1) { replaceRep(rep->clone()); } }
277 // no, there is *not* a dangling pointer here (ref > 1)
278 /** detach from string pool and make sure at least minsize bytes of mem are available
279 (use this before the dirty version sprintf to make it clean)
280 (use this before the clean version sprintf to make it fast)
281 */
282 void tstring::detachResize(size_t minsize) {
283 if((rep->ref==1) && (minsize <= rep->mem)) return;
284 replaceRep(rep->clone(minsize));
285 }
286 /// detach from string pool and declare that string might be externally modified (the string has become vulnerable)
287 void tstring::invulnerableDetach() { detach(); rep->vulnerable = true; }
288
289 /// check for 0 in string (then its not a real cstring anymore)
290 bool tstring::containsNulChar() const {
291 rep->terminate();
292 if(strlen(rep->data()) != rep->len)
293 return true;
294 else
295 return false;
296 }
297
298
299 /// get a pointer to the at most max last chars (useful for printf)
300 const char *tstring::pSuf(size_t max) const {
301 return rep->data()+((max>=rep->len)?0:(rep->len-max));
302 }
303
304
305 /// sprintf into this string
306 void tstring::sprintf(const char *format, ...) {
307 va_list ap;
308 int ret = -1;
309 va_start(ap, format);
310 #if defined(__STRICT_ANSI__)
311 // this is the unsecure and dirty but ansi compatible version
312 detachResize(256);
313 ret = vsprintf(rep->data(), format, ap); // not secure! may write out of bounds!
314 #else
315 // this is the clean version (never overflows)
316 int s = 16/4;
317 do {
318 if(ret <= s)
319 s <<= 2; // fast increase, printf may be slow
320 else
321 s = ret + 8; // C99 standard, after first iteration this should be large enough
322 detachResize(s);
323 ret = vsnprintf(rep->data(), s, format, ap);
324 } while((ret == -1) || (ret >= s));
325 #endif
326 va_end(ap);
327 rep->len = ret;
328 }
329
330
331 // returns true on success! returns value in bool_out!
332 bool tstring::toBool(bool& bool_out) const {
333 char buf[7];
334 int i;
335 for(i=0; i<6; i++) {
336 buf[i] = tolower((*rep)[i]);
337 if((buf[i]==0) || isspace(buf[i])) break;
338 }
339 buf[i]=0;
340 switch(i) {
341 case 1:
342 if((buf[0]=='1')||(buf[0]=='t')) { bool_out = true; return true; }
343 if((buf[0]=='0')||(buf[0]=='f')) { bool_out = false; return true; }
344 break;
345 case 2:
346 if(strcmp(buf,"on")==0) { bool_out = true; return true; }
347 if(strcmp(buf,"no")==0) { bool_out = false; return true; }
348 break;
349 case 3:
350 if(strcmp(buf,"yes")==0) { bool_out = true; return true; }
351 if(strcmp(buf,"off")==0) { bool_out = false; return true; }
352 break;
353 case 4:
354 if(strcmp(buf,"true")==0) { bool_out = true; return true; }
355 break;
356 case 5:
357 if(strcmp(buf,"false")==0) { bool_out = false; return true; }
358 break;
359 }
360 return false;
361 }
362
363
364 // returns true on success
365 bool tstring::toLong(long& long_out, int base) const {
366 char *p = 0;
367 long r = strtoul(rep->data(), &p, base);
368 if(p == rep->data()) return false;
369 if(*p) if(!isspace(*p)) return false;
370 long_out = r;
371 return true;
372 }
373
374
375 // returns true on success
376 bool tstring::toInt(int& int_out, int base) const {
377 char *p = 0;
378 int r = strtoul(rep->data(), &p, base);
379 if(p == rep->data()) return false;
380 if(*p) if(!isspace(*p)) return false;
381 int_out = r;
382 return true;
383 }
384
385
386 // returns true on success
387 bool tstring::toDouble(double& double_out) const {
388 char *p = 0;
389 double r = strtod(rep->data(), &p);
390 if(p == rep->data()) return false;
391 if(*p) if(!isspace(*p)) return false;
392 double_out = r;
393 return true;
394 }
395
396
397 tstring tstring::scanToken(size_t& scanner, int flags,
398 const char *allow, const char *forbid,
399 bool allow_quoted) const
400 {
401 if(allow_quoted && (scanner < rep->len)) {
402 char q = (*rep)[scanner];
403 if((q=='\'')||(q=='\"')) {
404 int st(++scanner);
405 while((scanner < rep->len) && ((*rep)[scanner]!=q))
406 ++scanner;
407 tstring out = substr(st, scanner);
408 if(scanner < rep->len) ++scanner;
409 return out;
410 }
411 }
412 size_t start(scanner);
413 for(; (scanner < rep->len); ++scanner) {
414 char c = (*rep)[scanner];
415 if(forbid && strchr(forbid, c)) break;
416 if((flags&ALL )) continue;
417 if(allow && strchr(allow , c)) continue;
418 if((flags&ALPHA) && isalpha(c)) continue;
419 if((flags&DIGIT) && isdigit(c)) continue;
420 if((flags&LOWER) && islower(c)) continue;
421 if((flags&UPPER) && isupper(c)) continue;
422 if((flags&PRINT) && isprint(c)) continue;
423 if((flags&GRAPH) && isgraph(c)) continue;
424 if((flags&CNTRL) && iscntrl(c)) continue;
425 if((flags&SPACE) && isspace(c)) continue;
426 if((flags&XDIGIT)&&isxdigit(c)) continue;
427 if((flags&PUNCT) && ispunct(c)) continue;
428 break;
429 }
430 return substr(start, scanner);
431 }
432
433
434 tstring tstring::shortFilename(size_t maxchar) const {
435 if(rep->len <= maxchar) return *this;
436 if(maxchar < 3) return "";
437 return "..." + substr(rep->len - maxchar + 3);
438 }
439
440
441 void tstring::normalizePath() {
442 // split path
443 tvector<tstring> a = split(*this, "/", false, false);
444
445 // delete nul dirs
446 for(tvector<tstring>::iterator i = a.begin(); i != a.end();) {
447 if(i->empty() || (*i == ".")) i = a.erase(i);
448 else i++;
449 }
450
451 // check for absolute
452 if((*rep)[0]=='/') clear();
453 else operator=(".");
454
455 // delete '..'
456 for(tvector<tstring>::iterator i = a.begin(); i != a.end();) {
457 if((*i == "..") && (i != a.begin())) {
458 i--;
459 if(*i != "..") {
460 i = a.erase(i);
461 i = a.erase(i);
462 } else {
463 i++;
464 i++;
465 }
466 } else i++;
467 }
468
469 // assemble string
470 if((a.size() > 0) || (len() == 0))
471 operator+=("/" + join(a, "/"));
472 }
473 void tstring::extractFilename() {
474 const char *p = strrchr(rep->data(), '/');
475 if(p) operator=(p+1);
476 }
477
478
479 void tstring::extractPath() {
480 const char *p = strrchr(rep->data(), '/');
481 if(p) {
482 truncate((p - rep->data() + 1));
483 removeDirSlash();
484 }
485 else clear();
486 }
487
488
489 void tstring::removeDirSlash() {
490 if(*this == "/") return;
491 while(lastChar() == '/') truncate(rep->len-1);
492 }
493
494
495 void tstring::addDirSlash() {
496 if(lastChar() != '/') operator += ("/");
497 }
498
499
500 void tstring::extractFilenameExtension() {
501 extractFilename(); // get file name
502 const char *p = strrchr(rep->data(), '.');
503 if(p) { // contains dot
504 if(p > rep->data()) { // last dot not first char
505 operator=(p+1); // get extension
506 return;
507 }
508 }
509 clear(); // no extension
510 }
511
512
513 double tstring::binaryPercentage() const {
514 double bin = 0;
515
516 for(size_t i = 0; i < rep->len; i++)
517 if((!isprint((*rep)[i])) && (!isspace((*rep)[i]))) bin+=1.0;
518 return (bin * 100.0) / double(rep->len);
519 }
520
521
522 bool tstring::isLower() const {
523 if(rep->len == 0) return false;
524 for(size_t i = 0; i < rep->len; i++)
525 if(isalpha((*rep)[i]))
526 if(isupper((*rep)[i]))
527 return false;
528 return true;
529 }
530
531
532 bool tstring::isUpper() const {
533 if(rep->len == 0) return false;
534 for(size_t i = 0; i < rep->len; i++)
535 if(isalpha((*rep)[i]))
536 if(islower((*rep)[i]))
537 return false;
538 return true;
539 }
540
541
542 bool tstring::isCapitalized() const {
543 if(rep->len == 0) return false;
544 if(isalpha((*rep)[0])) if(islower((*rep)[0])) return false;
545 for(size_t i = 1; i < rep->len; i++)
546 if(isalpha((*rep)[i]))
547 if(isupper((*rep)[i]))
548 return false;
549 return true;
550 }
551
552
553 void tstring::lower() {
554 detach();
555 for(size_t i = 0; i < rep->len; i++) (*rep)[i] = tolower((*rep)[i]);
556 }
557
558
559 void tstring::upper() {
560 detach();
561 for(size_t i = 0; i < rep->len; i++) (*rep)[i] = toupper((*rep)[i]);
562 }
563
564
565 void tstring::capitalize() {
566 lower();
567 if(rep->len) (*rep)[0] = toupper((*rep)[0]);
568 }
569
570
571 static const char *bytesearch(const char *mem, int mlen,
572 const char *pat, int plen,
573 bool ignore_case, bool whole_words) {
574 int i,j;
575 for(i=0; i <= mlen-plen; i++) {
576 if(ignore_case) {
577 for(j=0; j<plen; j++)
578 if(tolower(mem[i+j]) != tolower(pat[j])) break;
579 } else {
580 for(j=0; j<plen; j++)
581 if(mem[i+j] != pat[j]) break;
582 }
583 if(j==plen) { // found
584 if(!whole_words) return mem + i;
585 else {
586 bool left_ok = true;
587 bool right_ok = true;
588 if(i > 0) if(isalnum(mem[i-1]) || (mem[i-1]=='_'))
589 left_ok = false;
590 if(i < mlen-plen) if(isalnum(mem[i+plen]) || (mem[i+plen]=='_'))
591 right_ok = false;
592 if(left_ok && right_ok) return mem + i;
593 }
594 }
595 }
596 return 0; // not found
597 }
598
599
600 int tstring::searchReplace(const tstring& tsearch, const tstring& replace_,
601 bool ignore_case, bool whole_words,
602 bool preserve_case, int progress,
603 const tstring& pre_padstring, const tstring& post_padstring, tvector<int> *match_pos, int max_num) {
604 // get new length and positions
605 if(progress) { putc('S', stderr);fflush(stderr); }
606 int num = search(tsearch, ignore_case, whole_words, progress);
607 if(progress) { putc('R', stderr);fflush(stderr); }
608 if(num==0) {
609 return 0;
610 }
611 if(num >= max_num) num = max_num;
612 int newlen = rep->len + num*(replace_.rep->len-tsearch.rep->len +
613 pre_padstring.len()+post_padstring.len());
614
615 // create new string
616 Rep *newrep = Rep::create(newlen);
617 const char *p = rep->data(); // read
618 char *q = newrep->data(); // write
619 const char *r; // found substring
620 int mlen = rep->len; // rest of read mem
621 for(int i=0; i < num; i++) {
622 if(progress>0) if((i%progress)==0) {putc('.', stderr);fflush(stderr);}
623 r = bytesearch(p, mlen, tsearch.rep->data(), tsearch.rep->len, ignore_case, whole_words);
624 memcpy(q, p, r-p); // add skipped part
625 q += r-p;
626 if(match_pos) (*match_pos) += int(q-newrep->data()); // enter start
627 memcpy(q, pre_padstring.rep->data(), pre_padstring.rep->len); // add pre pad
628 q += pre_padstring.len();
629 if(!preserve_case) { // add replaced part
630 memcpy(q, replace_.rep->data(), replace_.rep->len);
631 } else {
632 tstring rr(preserveCase(tstring(r, tsearch.rep->len), replace_.rep->data()));
633 memcpy(q, rr.rep->data(), rr.rep->len);
634 }
635 q += replace_.rep->len;
636 memcpy(q, post_padstring.rep->data(), post_padstring.rep->len); // add post pad
637 q += post_padstring.len();
638 if(match_pos) (*match_pos) += int(q-newrep->data()); // enter end
639 mlen -= r-p;
640 mlen -= tsearch.rep->len;
641 p = r + tsearch.rep->len;
642 }
643 memcpy(q, p, mlen); // add rest
644 replaceRep(newrep);
645 rep->len = newlen;
646 rep->terminate();
647 return num;
648 }
649
650
651 int tstring::search(const tstring& pat, bool ignore_case, bool whole_words, int progress, tvector<int> *match_pos) const {
652 if(pat.empty()) return -1;
653 int num=0;
654 int mlen=rep->len;
655 const char *q;
656 for(const char *p = rep->data(); (q=bytesearch(p, mlen, pat.rep->data(), pat.rep->len,
657 ignore_case, whole_words)); num++) {
658 if(match_pos) (*match_pos) += int(q-rep->data());
659 mlen -= q-p;
660 mlen -= pat.rep->len;
661 p = q + pat.rep->len;
662 if(match_pos) (*match_pos) += int(p-rep->data());
663 if(progress>0) if((num%progress)==0) {putc('.', stderr);fflush(stderr);}
664 }
665 return num;
666 }
667
668
669 /// replace substring
670 void tstring::replace(size_t start, size_t len_, const tstring &str) {
671 if(start > length()) return;
672 if(start + len_ > length()) return;
673 if(str.length() > len_)
674 detachResize(length() + str.length() - len_);
675 else
676 detach();
677 if(str.length() != len_)
678 memmove(rep->data() + start + str.length(), rep->data() + start + len_, length() - start - len_);
679 // insert string
680 memcpy(rep->data() + start, str.data(), str.length());
681 // fix length
682 rep->len += str.length() - len_;
683 rep->terminate();
684 }
685
686
687 bool tstring::hasPrefix(const tstring& pref) const {
688 if(pref.rep->len > rep->len) return false;
689 return memcmp(rep->data(), pref.rep->data(), pref.rep->len)==0;
690 }
691
692
693 bool tstring::hasSuffix(const tstring& suf) const {
694 if(suf.rep->len > rep->len) return false;
695 return memcmp(rep->data() + (rep->len - suf.rep->len),
696 suf.rep->data(), suf.rep->len)==0;
697 }
698
699
700 bool tstring::consistsOfSpace() const {
701 for(size_t i = 0; i < rep->len; i++) {
702 if(!isspace((*rep)[i])) return false;
703 }
704 return true;
705 }
706
707
708 void tstring::truncate(size_t max) {
709 if(max < rep->len) {
710 detach();
711 rep->len = max;
712 rep->terminate();
713 }
714 }
715
716
717 void tstring::replaceUnprintable(bool only_ascii) {
718 for(size_t i = 0; i < rep->len; i++) {
719 unsigned char& c = (unsigned char &)(*rep)[i];
720 if(!isprint(c)) {
721 if(c < ' ') {
722 c = '!';
723 } else if(only_ascii || (c < 0xa0)) {
724 c = '?';
725 }
726 }
727 }
728 }
729
730
731 void tstring::unquote(bool allow_bslash, bool crop_space) {
732 detach();
733
734 char *p=rep->data();
735 char *q=rep->data();
736 char quote=0;
737 char *nonspace=rep->data();
738
739 if(crop_space) while(isspace(*p)) p++;
740 for(; *p; p++) {
741 if(allow_bslash && *p=='\\') {
742 if(p[1] == quote) {
743 p++;
744 if(*p == 0) break;
745 }
746 } else {
747 if(quote) {
748 if(*p == quote) {
749 quote = 0;
750 continue;
751 }
752 } else {
753 if((*p == '\'') || (*p == '\"')) {
754 quote = *p;
755 continue;
756 }
757 }
758 }
759 if(quote || (!isspace(*p))) nonspace = q;
760 *(q++) = *p;
761 }
762 *q = 0;
763 if(crop_space) if(*nonspace) nonspace[1] = 0;
764 rep->len = strlen(rep->data());
765 }
766
767
768 tstring tstring::getFitWordsBlock(size_t max) {
769 tstring r = getFitWords(max);
770 size_t spaces;
771 size_t fill = max - r.len();
772 if(fill > 8) return r;
773 size_t i,j;
774
775 for(i = 0; i < r.len(); i++)
776 if(r[i] != ' ') break;
777 for(spaces = 0; i < r.len(); i++)
778 if(r[i] == ' ') spaces++;
779 if(fill > spaces) return r;
780 tstring t;
781 t.detachResize(max);
782 for(i = 0, j = 0; i < r.len(); i++) {
783 if(r[i] != ' ') break;
784 (*(t.rep))[j++] = r[i];
785 }
786 for(; i < r.len(); i++) {
787 if((fill > 0)&&(r[i] == ' ')) {
788 (*(t.rep))[j++] = ' ';
789 (*(t.rep))[j++] = ' ';
790 fill--;
791 } else (*(t.rep))[j++] = r[i];
792 }
793 t.rep->len = j;
794 t.rep->terminate();
795 return t;
796 }
797
798
799 void tstring::cropSpaceEnd() {
800 int e = rep->len;
801
802 if(e == 0) return;
803 else e--;
804 while((e >= 0) && isspace((*rep)[e])) e--;
805 truncate(e+1);
806 }
807
808
809 tstring tstring::getFitWords(size_t max) {
810 if(max < 1) return tstring();
811
812 tstring r(*this); // return value
813
814 // check for lf
815 size_t lf = firstOccurence('\n');
816 if((lf != npos) && (lf <= max)) {
817 operator=(substr(lf + 1));
818 r.truncate(lf);
819 r.cropSpaceEnd();
820 return r;
821 }
822
823 // string fits
824 if(rep->len <= max) {
825 clear();
826 r.cropSpaceEnd();
827 return r;
828 }
829
830 // find space
831 size_t last_space = npos;
832 for(size_t i = 0; i <= max; i++) {
833 if((*rep)[i] == ' ') last_space = i;
834 }
835 if(last_space == npos) last_space = max;
836
837 // return
838 r.truncate(last_space);
839 while(isspace((*rep)[last_space])) last_space++;
840 operator=(substr(last_space));
841 r.cropSpaceEnd();
842 return r;
843 }
844
845
846 void tstring::expandUnprintable(char quotes) {
847 Rep *newrep = Rep::create(rep->len*4);
848 char *q = newrep->data(); // write
849 char *p = rep->data(); // read
850 size_t l = 0;
851
852 // expand each char
853 for(size_t j = 0; j < rep->len; ++j, ++p) {
854 if(isprint(*p) || (((unsigned char)*p) > 160)) { // printable --> print
855 if((*p=='\\') || (quotes && (*p==quotes))) { // backslashify backslash and quotes
856 *(q++) = '\\';
857 l++;
858 }
859 *(q++) = *p;
860 l++;
861 } else { // unprintable --> expand
862 *(q++) = '\\'; // leading backslash
863 l++;
864 switch(*p) {
865 case '\a':
866 *(q++) = 'a';
867 l++;
868 break;
869 case '\b':
870 *(q++) = 'b';
871 l++;
872 break;
873 case '\f':
874 *(q++) = 'f';
875 l++;
876 break;
877 case '\n':
878 *(q++) = 'n';
879 l++;
880 break;
881 case '\r':
882 *(q++) = 'r';
883 l++;
884 break;
885 case '\t':
886 *(q++) = 't';
887 l++;
888 break;
889 case '\v':
890 *(q++) = 'v';
891 l++;
892 break;
893 default: // no single char control
894 unsigned int i = (unsigned char)*p;
895 l += 3;
896 if(i < 32) { // print lower control octal
897 if(isdigit(p[1])) {
898 q += ::sprintf(q, "%03o", i);
899 } else {
900 q += ::sprintf(q, "%o", i);
901 if(i>=8) --l;
902 else l-=2;
903 }
904 } else { // print octal or hex
905 if(isxdigit(p[1])) {
906 q += ::sprintf(q, "%03o", i);
907 } else {
908 q += ::sprintf(q, "x%02x", i);
909 }
910 }
911 }
912 }
913 }
914
915 // end
916 replaceRep(newrep);
917 rep->len = l;
918 rep->terminate();
919 }
920
921
922 void tstring::backslashify() {
923 Rep *newrep = Rep::create(rep->len*2);
924 char *p = rep->data();
925 char *q = newrep->data();
926 int l = 0;
927
928 // backslashify each char
929 for(size_t i = 0; i < rep->len; i++, p++) {
930 switch(*p) {
931 case '\\':
932 *(q++) = '\\';
933 *(q++) = '\\';
934 l+=2;
935 break;
936 case '\'':
937 *(q++) = '\\';
938 *(q++) = '\'';
939 l+=2;
940 break;
941 case '\"':
942 *(q++) = '\\';
943 *(q++) = '\"';
944 l+=2;
945 break;
946 default:
947 *(q++) = *p;
948 l++;
949 break;
950 }
951 }
952
953 // end
954 replaceRep(newrep);
955 rep->len = l;
956 rep->terminate();
957 }
958
959
960 void tstring::compileCString() {
961 detach();
962
963 char *p = rep->data(); // read
964 char *q = rep->data(); // write
965 char c; // tmp char
966 size_t l = 0; // write
967 size_t i = 0; // read
968
969 while(i < rep->len) {
970 c = *(p++); // read char
971 i++;
972 if(c == '\\') { // compile char
973 if(i>=rep->len) break;
974 c = *(p++);
975 i++;
976 switch(c) {
977 case 'a':
978 c = '\a';
979 break;
980 case 'b':
981 c = '\b';
982 break;
983 case 'f':
984 c = '\f';
985 break;
986 case 'n':
987 c = '\n';
988 break;
989 case 'r':
990 c = '\r';
991 break;
992 case 't':
993 c = '\t';
994 break;
995 case 'v':
996 c = '\v';
997 break;
998 case 'x': // hex
999 char *qq;
1000 c = strtol(p, &qq, 16);
1001 i += qq-p;
1002 p = qq;
1003 break;
1004 case '0': // octal
1005 case '1':
1006 case '2':
1007 case '3':
1008 case '4':
1009 case '5':
1010 case '6':
1011 case '7':
1012 char buf[4];
1013 buf[0] = c;
1014 buf[1] = *p;
1015 buf[2] = (i < rep->len) ? p[1] : 0;
1016 buf[3] = 0;
1017 char *t;
1018 c = strtol(buf, &t, 8);
1019 i += (t-buf)-1;
1020 p += (t-buf)-1;
1021 break;
1022 }
1023 }
1024 *(q++) = c; // write char
1025 l++;
1026 }
1027 rep->len = l;
1028 rep->terminate();
1029 }
1030
1031
1032 void tstring::removeHTMLTags(int& level) {
1033 detach();
1034
1035 char *p = rep->data(); // read
1036 char *q = rep->data(); // write
1037 size_t l = 0; // write
1038 size_t i = 0; // read
1039
1040 while(i < rep->len) {
1041 switch(*p) {
1042 case '<':
1043 level++;
1044 break;
1045
1046 case '>':
1047 if(level > 0) level--;
1048 break;
1049
1050 default:
1051 if(level == 0) {
1052 *(q++) = *p;
1053 l++;
1054 }
1055 }
1056 p++;
1057 i++;
1058 }
1059
1060 rep->len = l;
1061 rep->terminate();
1062 }
1063
1064
1065 void tstring::cropSpace(void) {
1066 size_t first = rep->len;
1067 size_t last = 0;
1068 size_t i;
1069
1070 // get first nonspace
1071 for(i = 0; i < rep->len; ++i)
1072 if(!isspace((*rep)[i])) {
1073 first = i;
1074 break;
1075 }
1076
1077 // full of spaces
1078 if(first == rep->len) {
1079 clear();
1080 return;
1081 }
1082
1083 // get last nonspace
1084 for(i = rep->len - 1; i >= first; --i)
1085 if(!isspace((*rep)[i])) {
1086 last = i;
1087 break;
1088 }
1089 ++last;
1090
1091 // truncate
1092 if(first == 0) {
1093 truncate(last);
1094 return;
1095 }
1096
1097 // extract substring
1098 operator=(substr(first, last));
1099 }
1100
1101
1102 void tstring::collapseSpace(void) {
1103 detach();
1104
1105 char *p = rep->data(); // read
1106 char *q = rep->data(); // write
1107 char last_char = ' ';
1108 size_t l = 0; // length
1109 char c;
1110
1111 for(size_t i = 0; i < rep->len; ++i, ++p) {
1112 if((!isspace(*p)) || (!isspace(last_char))) {
1113 c = *p;
1114 if(isspace(c)) c=' ';
1115 *(q++) = c;
1116 last_char = c;
1117 l++;
1118 }
1119 }
1120 if(isspace(last_char)&&(l>0)) --l;
1121 rep->len = l;
1122 rep->terminate();
1123 }
1124
1125
1126 void tstring::translateChar(char from, char to) {
1127 detach();
1128 char *p = rep->data();
1129 for(size_t i = 0; i < rep->len; ++i, ++p)
1130 if(*p == from) *p = to;
1131 }
1132
1133
1134 size_t tstring::firstOccurence(char c) const {
1135 size_t i;
1136
1137 for(i = 0; (i < rep->len) && ((*rep)[i] != c); ++i);
1138 if(i < rep->len) return i;
1139 else return npos;
1140 }
1141
1142
1143
1144 // non member implementation
1145
1146
1147 tvector<tstring> split(const tstring &s, const char *sep, bool allow_quoting, bool crop_space) {
1148 tvector<tstring> r;
1149 tstring buf;
1150 const char *p = s.c_str();
1151 p--; // bias
1152
1153 do {
1154 // next chunk
1155 p++;
1156
1157 // collect chars to buf
1158 while(*p) {
1159 if(strchr(sep, *p)) {
1160 break;
1161 } else if(!allow_quoting) {
1162 buf += *(p++);
1163 } else if(*p=='\\') {
1164 p++;
1165 if(strchr(sep, *p)==0) buf += '\\';
1166 if(*p) buf += *(p++);
1167 } else if(*p=='\'') {
1168 buf += '\'';
1169 for(p++; *p && *p!='\''; p++) {
1170 if(*p=='\\') {
1171 p++;
1172 buf += '\\';
1173 if(*p) buf += *p;
1174 } else
1175 buf += *p;
1176 }
1177 buf += '\'';
1178 if(*p=='\'') p++;
1179 } else if(*p=='\"') {
1180 buf += '\"';
1181 for(p++; *p && *p!='\"'; p++) {
1182 if(*p=='\\') {
1183 p++;
1184 buf += '\\';
1185 if(*p) buf += *p;
1186 } else
1187 buf += *p;
1188 }
1189 buf += '\"';
1190 if(*p=='\"') p++;
1191 } else {
1192 buf += *(p++);
1193 }
1194 }
1195
1196 // put buf to r
1197 if(crop_space) buf.cropSpace();
1198 r.push_back(buf);
1199
1200 // cleanup
1201 buf.clear();
1202 } while(*p);
1203
1204 return r;
1205 }
1206
1207
1208 tstring join(const tvector<tstring>& a, const tstring& sep) {
1209 tstring r;
1210
1211 if(a.empty()) return r;
1212 else r = a[0];
1213 for(size_t i = 1; i < a.size(); i++) {
1214 r += sep;
1215 r += a[i];
1216 }
1217 return r;
1218 }
1219
1220
1221 tstring preserveCase(const tstring& from, const tstring& to) {
1222 tstring r(to);
1223
1224 if(from.len() == to.len()) {
1225 // same len
1226 for(size_t i = 0; i < r.len(); i++) {
1227 if(islower(from[i])) r[i] = tolower(r[i]);
1228 else if(isupper(from[i])) r[i] = toupper(r[i]);
1229 }
1230 } else {
1231 // some heuristics
1232 if(from.isLower()) r.lower();
1233 if(from.isUpper()) r.upper();
1234 if(from.isCapitalized()) r.capitalize();
1235 }
1236
1237 return r;
1238 }
1239
1240
1241 const char *progressBar(const char *message, unsigned int n, unsigned int max, int width) {
1242 // max size of a buffer
1243 #define size 1024
1244 // number of static buffers (must be power of two)
1245 #define numbuf 4
1246 static char tbuf[size * numbuf];
1247 static int tphase = 0;
1248 static int phase = 0;
1249 static char phasechar[] = "/-~-_-\\|";
1250
1251 tphase++;
1252 tphase &= numbuf - 1;
1253 char *buf = tbuf + size * tphase;
1254
1255 // limit width
1256 if(width >= size) width = size - 1;
1257 if(message == 0) {
1258 // clear line
1259 sprintf(buf, "%*s", width, "");
1260 return buf;
1261 }
1262 if(max == 0) {
1263 // open end progress
1264 if(phasechar[phase] == 0) phase = 0;
1265 sprintf(buf, "%.*s %11d %c", width - (11 - 3), message, n, phasechar[phase++]);
1266 return buf;
1267 }
1268
1269 // proportional progress
1270
1271 // get num chars for number and max
1272 int nlen = 0, i;
1273 for(i = max; i; i /= 10, nlen++);
1274
1275 int l = sprintf(buf, "%.*s %*d/%*d (%5.1f%%) ", width - (12 + 2 * nlen), message, nlen, n, nlen, max, double(n)/double(max)*100.0);
1276 int rest = width - l;
1277 if(rest <= 0) return buf;
1278 int done = int(double(n)/double(max)*double(rest));
1279 if(done > rest) done = rest;
1280 char *p = buf + l;
1281 for(i = 0; i < done; i++) *(p++) = '*';
1282 for(; i < rest; i++) *(p++) = '.';
1283 *p = 0;
1284 return buf;
1285 #undef size
1286 }
1287
1288
1289 bool tstring::readLine(FILE *file) {
1290 char buf[1024];
1291
1292 clear();
1293 for(;;) {
1294 buf[sizeof(buf)-2] = '\n';
1295 if(!fgets(buf, sizeof(buf), file)) break;
1296 operator+=(buf);
1297 if(buf[sizeof(buf)-2] == '\n') break;
1298 }
1299 if(rep->len) return true;
1300 else return false;
1301 }
1302
1303
1304 size_t tstring::write(FILE *file) const {
1305 return fwrite(rep->data(), 1, rep->len, file);
1306 }
1307
1308
1309 size_t tstring::read(FILE *file, size_t l) {
1310 rep->release();
1311 rep = Rep::create(l);
1312 int r = fread(rep->data(), 1, l, file);
1313 rep->len = r;
1314 rep->terminate();
1315 return r;
1316 }
1317
1318
1319 int tstring::readFile(const char *filename) {
1320 struct stat buf;
1321
1322 if(stat(filename, &buf)) return -1; // does not exist
1323 FILE *f=fopen(filename, "rb");
1324 if(f == 0) return -2; // no permission?
1325 int r = read(f, buf.st_size);
1326 fclose(f);
1327 if(r != buf.st_size) return -3; // read error
1328 return 0;
1329 }
1330
1331
1332 int tstring::writeFile(const char *filename) {
1333 FILE *f = fopen(filename, "wb");
1334 if(f == 0) return -2; // no permission?
1335 int r = write(f);
1336 fclose(f);
1337 if(r != int(length())) return -3; // write error
1338 return 0;
1339 }
1340
1341
1342 tvector<tstring> loadTextFile(const char *fname) {
1343 FILE *f = fopen(fname, "r");
1344 if(f==0) throw TFileOperationErrnoException(fname, "fopen(mode='r')", errno);
1345 tvector<tstring> r;
1346 for(size_t i = 0; r[i].readLine(f); i++);
1347 fclose(f);
1348 r.pop_back();
1349 return r;
1350 }
1351
1352
1353 tvector<tstring> loadTextFile(FILE *file) {
1354 tvector<tstring> r;
1355 for(size_t i = 0; r[i].readLine(file); i++);
1356 r.pop_back();
1357 return r;
1358 }
1359