w32tex
About: TeX Live provides a comprehensive TeX system including all the major TeX-related programs, macro packages, and fonts that are free software. Windows sources.
  Fossies Dox: w32tex-src.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

UTF8.cc
Go to the documentation of this file.
1 //========================================================================
2 //
3 // UTF8.cc
4 //
5 // Copyright 2001-2017 Glyph & Cog, LLC
6 //
7 //========================================================================
8 
9 #include <aconf.h>
10 #include "UTF8.h"
11 
12 int mapUTF8(Unicode u, char *buf, int bufSize) {
13  if (u <= 0x0000007f) {
14  if (bufSize < 1) {
15  return 0;
16  }
17  buf[0] = (char)u;
18  return 1;
19  } else if (u <= 0x000007ff) {
20  if (bufSize < 2) {
21  return 0;
22  }
23  buf[0] = (char)(0xc0 + (u >> 6));
24  buf[1] = (char)(0x80 + (u & 0x3f));
25  return 2;
26  } else if (u <= 0x0000ffff) {
27  if (bufSize < 3) {
28  return 0;
29  }
30  buf[0] = (char)(0xe0 + (u >> 12));
31  buf[1] = (char)(0x80 + ((u >> 6) & 0x3f));
32  buf[2] = (char)(0x80 + (u & 0x3f));
33  return 3;
34  } else if (u <= 0x0010ffff) {
35  if (bufSize < 4) {
36  return 0;
37  }
38  buf[0] = (char)(0xf0 + (u >> 18));
39  buf[1] = (char)(0x80 + ((u >> 12) & 0x3f));
40  buf[2] = (char)(0x80 + ((u >> 6) & 0x3f));
41  buf[3] = (char)(0x80 + (u & 0x3f));
42  return 4;
43  } else {
44  return 0;
45  }
46 }
47 
48 int mapUCS2(Unicode u, char *buf, int bufSize) {
49  if (u <= 0xffff) {
50  if (bufSize < 2) {
51  return 0;
52  }
53  buf[0] = (char)((u >> 8) & 0xff);
54  buf[1] = (char)(u & 0xff);
55  return 2;
56  } else {
57  return 0;
58  }
59 }
60 
62  Guchar c0, c1, c2, c3, c4, c5;
63 
64  if (*i >= s->getLength()) {
65  return gFalse;
66  }
67  c0 = (Guchar)s->getChar((*i)++);
68  if (c0 < 0x80) {
69  *u = (Unicode)c0;
70  } else if (c0 < 0xe0) {
71  if (*i < s->getLength() &&
72  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80) {
73  *i += 1;
74  *u = (Unicode)(((c0 & 0x1f) << 6) |
75  (c1 & 0x3f));
76  } else {
77  *u = (Unicode)c0;
78  }
79  } else if (c0 < 0xf0) {
80  if (*i < s->getLength() - 1 &&
81  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
82  ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80) {
83  *i += 2;
84  *u = (Unicode)(((c0 & 0x0f) << 12) |
85  ((c1 & 0x3f) << 6) |
86  (c2 & 0x3f));
87  } else {
88  *u = (Unicode)c0;
89  }
90  } else if (c0 < 0xf8) {
91  if (*i < s->getLength() - 2 &&
92  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
93  ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 &&
94  ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80) {
95  *i += 3;
96  *u = (Unicode)(((c0 & 0x07) << 18) |
97  ((c1 & 0x3f) << 12) |
98  ((c2 & 0x3f) << 6) |
99  (c3 & 0x3f));
100  } else {
101  *u = (Unicode)c0;
102  }
103  } else if (c0 < 0xfc) {
104  if (*i < s->getLength() - 3 &&
105  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
106  ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 &&
107  ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80 &&
108  ((c4 = (Guchar)s->getChar(*i + 3)) & 0xc0) == 0x80) {
109  *i += 4;
110  *u = (Unicode)(((c0 & 0x03) << 24) |
111  ((c1 & 0x3f) << 18) |
112  ((c2 & 0x3f) << 12) |
113  ((c3 & 0x3f) << 6) |
114  (c4 & 0x3f));
115  } else {
116  *u = (Unicode)c0;
117  }
118  } else if (c0 < 0xfe) {
119  if (*i < s->getLength() - 4 &&
120  ((c1 = (Guchar)s->getChar(*i)) & 0xc0) == 0x80 &&
121  ((c2 = (Guchar)s->getChar(*i + 1)) & 0xc0) == 0x80 &&
122  ((c3 = (Guchar)s->getChar(*i + 2)) & 0xc0) == 0x80 &&
123  ((c4 = (Guchar)s->getChar(*i + 3)) & 0xc0) == 0x80 &&
124  ((c5 = (Guchar)s->getChar(*i + 4)) & 0xc0) == 0x80) {
125  *i += 5;
126  *u = (Unicode)(((c0 & 0x01) << 30) |
127  ((c1 & 0x3f) << 24) |
128  ((c2 & 0x3f) << 18) |
129  ((c3 & 0x3f) << 12) |
130  ((c4 & 0x3f) << 6) |
131  (c5 & 0x3f));
132  } else {
133  *u = (Unicode)c0;
134  }
135  } else {
136  *u = (Unicode)c0;
137  }
138  return gTrue;
139 }
140 
142  int w0, w1;
143 
144  if (*i >= s->getLength() - 1) {
145  return gFalse;
146  }
147  w0 = ((s->getChar(*i) & 0xff) << 8) | (s->getChar(*i + 1) & 0xff);
148  *i += 2;
149  if (w0 < 0xd800 || w0 >= 0xe000) {
150  *u = (Unicode)w0;
151  } else {
152  if (*i < s->getLength() - 1) {
153  w1 = ((s->getChar(*i) & 0xff) << 8) | (s->getChar(*i + 1) & 0xff);
154  *i += 2;
155  *u = 0x10000 + ((w0 - 0xd800) << 10) + (w1 - 0xdc00);
156  } else {
157  *u = (Unicode)w0;
158  }
159  }
160  return gTrue;
161 }
162 
164  int w0, w1;
165 
166  if (*i >= s->getLength() - 1) {
167  return gFalse;
168  }
169  w0 = (s->getChar(*i) & 0xff) | ((s->getChar(*i + 1) & 0xff) << 8);
170  *i += 2;
171  if (w0 < 0xd800 || w0 >= 0xe000) {
172  *u = (Unicode)w0;
173  } else {
174  if (*i < s->getLength() - 1) {
175  w1 = (s->getChar(*i) & 0xff) | ((s->getChar(*i + 1) & 0xff) << 8);
176  *i += 2;
177  *u = 0x10000 + ((w0 - 0xd800) << 10) + (w1 - 0xdc00);
178  } else {
179  *u = (Unicode)w0;
180  }
181  }
182  return gTrue;
183 }
int mapUCS2(Unicode u, char *buf, int bufSize)
Definition: UTF8.cc:48
GBool getUTF16LE(GString *s, int *i, Unicode *u)
Definition: UTF8.cc:163
GBool getUTF16BE(GString *s, int *i, Unicode *u)
Definition: UTF8.cc:141
GBool getUTF8(GString *s, int *i, Unicode *u)
Definition: UTF8.cc:61
int mapUTF8(Unicode u, char *buf, int bufSize)
Definition: UTF8.cc:12
#define w1
#define s
Definition: afcover.h:80
#define w0
#define gFalse
Definition: gtypes.h:18
int GBool
Definition: gtypes.h:16
#define gTrue
Definition: gtypes.h:17
unsigned char Guchar
Definition: gtypes.h:24
small capitals from c petite p scientific f u
Definition: afcover.h:88
small capitals from c petite p scientific i
Definition: afcover.h:80
unsigned int Unicode
Definition: CharTypes.h:13
#define buf
int getLength(char *s)
Definition: lengths.c:99
#define c2
Definition: t1io.c:53
#define c1
Definition: t1io.c:52