ucommon  7.0.0
About: GNU uCommon C++ is a portable and optimized class framework for writing C++ applications that need to use threads and support concurrent synchronization, and that use sockets, XML parsing, object serialization, thread-optimized string and data structure classes, etc..
  Fossies Dox: ucommon-7.0.0.tar.gz  ("inofficial" and yet experimental doxygen-generated source code documentation)  

unicode.h
Go to the documentation of this file.
1 // Copyright (C) 2009-2014 David Sugar, Tycho Softworks.
2 // Copyright (C) 2015 Cherokees of Idaho.
3 //
4 // This file is part of GNU uCommon C++.
5 //
6 // GNU uCommon C++ is free software: you can redistribute it and/or modify
7 // it under the terms of the GNU Lesser General Public License as published
8 // by the Free Software Foundation, either version 3 of the License, or
9 // (at your option) any later version.
10 //
11 // GNU uCommon C++ is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU Lesser General Public License for more details.
15 //
16 // You should have received a copy of the GNU Lesser General Public License
17 // along with GNU uCommon C++. If not, see <http://www.gnu.org/licenses/>.
18 
33 #ifndef _UCOMMON_UNICODE_H_
34 #define _UCOMMON_UNICODE_H_
35 
36 #ifndef _UCOMMON_STRING_H_
37 #include <ucommon/string.h>
38 #endif
39 
40 #ifdef nil
41 #undef nil
42 #endif
43 
44 namespace ucommon {
45 
50 typedef int32_t ucs4_t;
51 
55 typedef int16_t ucs2_t;
56 
60 typedef void *unicode_t;
61 
68 {
69 protected:
70  inline utf8() {};
71 
72  inline utf8(const utf8& copy) {};
73 
74 public:
78  static const unsigned ucsize;
79 
83  static const char *nil;
84 
90  static unsigned size(const char *codepoint);
91 
97  static size_t count(const char *string);
98 
105  static char *offset(char *string, ssize_t position);
106 
112  static ucs4_t codepoint(const char *encoded);
113 
119  static size_t chars(const unicode_t string);
120 
126  static size_t chars(ucs4_t character);
127 
134  static size_t unpack(const unicode_t string, char *text, size_t size);
135 
143  static size_t pack(unicode_t unicode, const char *cp, size_t len);
144 
148  static ucs4_t *udup(const char *string);
149 
153  static ucs2_t *wdup(const char *string);
154 
162  static const char *find(const char *string, ucs4_t character, size_t start = 0);
163 
171  static const char *rfind(const char *string, ucs4_t character, size_t end = (size_t)-1l);
172 
179  static unsigned ccount(const char *string, ucs4_t character);
180 
186  static ucs4_t get(const char *cp);
187 
194  static void put(ucs4_t character, char *buf);
195 };
196 
203 class __EXPORT UString : public String, public utf8
204 {
205 protected:
209  UString();
210 
215  UString(size_t size);
216 
221  UString(const unicode_t text);
222 
229  UString(const char *text, size_t size);
230 
237  UString(const unicode_t *text, const unicode_t *end);
238 
244  UString(const UString& existing);
245 
250  virtual ~UString();
251 
258  UString get(size_t codepoint, size_t size = 0) const;
259 
266  size_t get(unicode_t unicode, size_t size) const;
267 
272  void set(const unicode_t unicode);
273 
278  void add(const unicode_t unicode);
279 
285  ucs4_t at(int position) const;
286 
293  inline size_t operator()(unicode_t unicode, size_t size) const {
294  return get(unicode, size);
295  }
296 
303  UString operator()(int codepoint, size_t size) const;
304 
310  inline UString left(size_t size) const {
311  return operator()(0, size);
312  }
313 
319  inline UString right(size_t offset) const {
320  return operator()(-((int)offset), 0);
321  }
322 
329  inline UString copy(size_t offset, size_t size) const {
330  return operator()((int)offset, size);
331  }
332 
338  void cut(size_t offset, size_t size = 0);
339 
346  void paste(size_t offset, const char *text, size_t size = 0);
347 
355  const char *operator()(int offset) const;
356 
362  inline ucs4_t operator[](int position) const {
363  return UString::at(position);
364  }
365 
370  inline size_t count(void) const {
371  return (size_t)utf8::count(str->text);
372  }
373 
379  unsigned ccount(ucs4_t character) const;
380 
387  const char *find(ucs4_t character, size_t start = 0) const;
388 
395  const char *rfind(ucs4_t character, size_t end = npos) const;
396 };
397 
404 {
405 protected:
406  uint8_t *text;
407 
408 public:
412  utf8_pointer();
413 
418  utf8_pointer(const char *string);
419 
425 
430  utf8_pointer& operator ++();
431 
436  utf8_pointer& operator --();
437 
443  utf8_pointer& operator +=(long offset);
444 
450  utf8_pointer& operator -=(long offset);
451 
457  utf8_pointer operator+(long offset) const;
458 
464  utf8_pointer operator-(long offset) const;
465 
470  inline operator bool() const {
471  return text != NULL;
472  }
473 
478  inline bool operator!() const {
479  return text == NULL;
480  }
481 
487  ucs4_t operator[](long codepoint) const;
488 
494  utf8_pointer& operator=(const char *string);
495 
499  void inc(void);
500 
504  void dec(void);
505 
511  inline bool operator==(const char *string) const {
512  return (const char *)text == string;
513  }
514 
520  inline bool operator!=(const char *string) const {
521  return (const char *)text != string;
522  }
523 
528  inline ucs4_t operator*() const {
529  return utf8::codepoint((const char *)text);
530  }
531 
536  inline char *c_str(void) const {
537  return (char *)text;
538  }
539 
544  inline operator char*() const {
545  return (char *)text;
546  }
547 
552  inline size_t len(void) const {
553  return utf8::count((const char *)text);
554  }
555 };
556 
557 inline ucs4_t *strudup(const char *string) {
558  return utf8::udup(string);
559 }
560 
561 inline ucs2_t *strwdup(const char *string) {
562  return utf8::wdup(string);
563 }
564 
565 __EXPORT unicode_t unidup(const char *string);
566 
567 template<>
568 inline void dupfree<ucs2_t*>(ucs2_t *string) {
569  ::free(string);
570 }
571 
572 template<>
573 inline void dupfree<ucs4_t*>(ucs4_t *string) {
574  ::free(string);
575 }
576 
577 template<>
578 inline void dupfree<unicode_t>(unicode_t string) {
579  ::free(string);
580 }
581 
586 
591 
592 } // namespace ucommon
593 
594 #endif
ucommon::UString::operator()
size_t operator()(unicode_t unicode, size_t size) const
Definition: unicode.h:293
ucommon::UString::at
ucs4_t at(int position) const
Definition: unicode.cpp:557
ucommon::dupfree< unicode_t >
void dupfree< unicode_t >(unicode_t string)
Definition: unicode.h:578
ucommon::utf8_pointer::len
size_t len(void) const
Definition: unicode.h:552
ucommon::operator+
std::string & operator+(std::string &target, String &source)
Definition: stream.h:577
ucommon
Definition: access.cpp:23
ucommon::utf8_pointer::operator!=
bool operator!=(const char *string) const
Definition: unicode.h:520
ucommon::utf8::wdup
static ucs2_t * wdup(const char *string)
Definition: unicode.cpp:328
ucommon::unicode_t
void * unicode_t
Definition: unicode.h:60
ucommon::UString::copy
UString copy(size_t offset, size_t size) const
Definition: unicode.h:329
ucommon::String
Definition: string.h:78
ucommon::UString::left
UString left(size_t size) const
Definition: unicode.h:310
ucommon::utf8::count
static size_t count(const char *string)
Definition: unicode.cpp:158
ucommon::utf8
Definition: unicode.h:67
ucommon::copy
T copy(const T &src)
Definition: generics.h:395
ucommon::strwdup
ucs2_t * strwdup(const char *string)
Definition: unicode.h:561
ucommon::ucs4_t
int32_t ucs4_t
Definition: unicode.h:50
ucommon::dupfree< ucs4_t * >
void dupfree< ucs4_t * >(ucs4_t *string)
Definition: unicode.h:573
__EXPORT
#define __EXPORT
Definition: config.h:49
ucommon::utf8::udup
static ucs4_t * udup(const char *string)
Definition: unicode.cpp:309
ucommon::utf8_pointer::text
uint8_t * text
Definition: unicode.h:406
ucommon::utf8::nil
static const char * nil
Definition: unicode.h:83
ucommon::ustring_t
UString ustring_t
Definition: unicode.h:585
ucommon::utf8_pointer::operator==
bool operator==(const char *string) const
Definition: unicode.h:511
ucommon::utf8_pointer
Definition: unicode.h:403
ucommon::utf8_pointer::operator!
bool operator!() const
Definition: unicode.h:478
ucommon::UString::operator[]
ucs4_t operator[](int position) const
Definition: unicode.h:362
ucommon::UString::count
size_t count(void) const
Definition: unicode.h:370
ucommon::str
String str(Socket &so, size_t size)
Definition: socket.cpp:3507
ucommon::strudup
ucs4_t * strudup(const char *string)
Definition: unicode.h:557
ucommon::utf8_pointer::c_str
char * c_str(void) const
Definition: unicode.h:536
ucommon::utf8::codepoint
static ucs4_t codepoint(const char *encoded)
Definition: unicode.cpp:115
ucommon::utf8::utf8
utf8(const utf8 &copy)
Definition: unicode.h:72
ucommon::UString
Definition: unicode.h:203
string.h
ucommon::utf8_t
utf8_pointer utf8_t
Definition: unicode.h:590
ucommon::unidup
unicode_t unidup(const char *string)
Definition: unicode.cpp:235
ucommon::dupfree< ucs2_t * >
void dupfree< ucs2_t * >(ucs2_t *string)
Definition: unicode.h:568
ucommon::operator+=
std::string & operator+=(std::string &target, String &source)
Definition: stream.h:581
ucommon::UString::right
UString right(size_t offset) const
Definition: unicode.h:319
ucommon::utf8_pointer::operator*
ucs4_t operator*() const
Definition: unicode.h:528
ucommon::utf8::utf8
utf8()
Definition: unicode.h:70
ucommon::ucs2_t
int16_t ucs2_t
Definition: unicode.h:55