"Fossies" - the Fresh Open Source Software Archive 
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) C and C++ source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
1 /* Copyright (C) 2002 Ghostgum Software Pty Ltd. All rights reserved.
2
3 This software is provided AS-IS with no warranty, either express or
4 implied.
5
6 This software is distributed under licence and may not be copied,
7 modified or distributed except as expressly authorised under the terms
8 of the licence contained in the file LICENCE in this distribution.
9
10 For more information about licensing, please refer to
11 http://www.ghostgum.com.au/ or contact Ghostsgum Software Pty Ltd,
12 218 Gallaghers Rd, Glen Waverley VIC 3150, AUSTRALIA,
13 Fax +61 3 9886 6616.
14 */
15
16 /* $Id: cmbcs.c,v 1.6 2002/08/01 08:27:52 ghostgum Exp $ */
17 /* Multiple Byte Character Set */
18
19 /*
20 * GSview uses Unicode on Windows.
21 * On Linux it may use a multiple byte character set,
22 * such as UTF-8, EUC, Shift-JIS.
23 * This file provides support for stepping over multiple byte
24 * characters. This is needed when searching for a particular
25 * characters such as a tab, space, slash or backslash.
26 * We assume that the null character will not occur within
27 * a MBCS string, and use the C strlen(str)+1 to get the byte
28 * count for allocating memory.
29 *
30 * For Japanese text on Unix, EUC is most commonly used, SJIS is often
31 * used, UTF-8 and UCS-2 are rarely used.
32 * For Japanese filenames on Unix, SJIS is most commonly used
33 * (for compatibility with Windows), EUC and UTF-8 are sometimes used,
34 * UCS-2 is rarely used.
35 * GSview only searches for characters in TCHAR strings we may be able
36 * to handle filenames in a different encoding by doing the translation
37 * in cs_to_narrow().
38 *
39 * FIX: explain TCHAR, cs, MBCS.
40 */
41
42 #include "common.h"
43
44 #ifndef UNICODE
45
46 CODEPAGE global_codepage = CODEPAGE_SBCS; /* GLOBAL */
47
48 /* Return number of bytes from current character to start of
49 * next character.
50 */
51 int char_next(const char *str)
52 {
53 int i;
54 const unsigned char *t = (const unsigned char *)str;
55 switch (global_codepage) {
56 default:
57 case CODEPAGE_SBCS:
58 i = 1;
59 break;
60 case CODEPAGE_UTF8:
61 if (t[0] == 0)
62 i = 0;
63 else if ((t[0] > 0) && (t[0] <= 0x7f))
64 i = 1;
65 else {
66 /* multiple byte UTF-8 */
67 /* scan until we find a byte in a suitable range */
68 i = 0;
69 while (t[i] && (t[i] >= 0x80) && (t[i] <= 0xbf))
70 i++;
71 }
72 break;
73 case CODEPAGE_EUC:
74 if (t[0] == 0x8f) {
75 /* 3 bytes */
76 if (t[1] == '\0')
77 i = 1;
78 else if (t[2] == '\0')
79 i = 2;
80 else
81 i = 3;
82 }
83 else if (t[0] & 0x80) {
84 /* 2 bytes */
85 if (str[1] == '\0')
86 i = 1;
87 else
88 i = 2;
89 }
90 else
91 i = 1;
92 case CODEPAGE_SJIS:
93 if (t[0] == 0) {
94 i = 0;
95 }
96 else if ((t[0] > 0) && (t[0] <= 0x7f)) {
97 i = 1;
98 }
99 else if ((t[0] >= 0x80) && (t[0] <= 0xbf)) {
100 if (t[1] == '\0')
101 i = 1;
102 else
103 i = 2;
104 }
105 else if ((t[0] >= 0xa0) && (t[0] <= 0xdf)) {
106 i = 1;
107 }
108 else if ((t[0] >= 0xe0) && (t[0] <= 0xef)) {
109 if (t[1] == '\0')
110 i = 1;
111 else
112 i = 2;
113 }
114 else
115 i = 1;
116 }
117 return i;
118 }
119
120 /* This implementation is for systems that don't support wide characters */
121 /* Convert a cs (wide or narrow) string to a narrow string.
122 * If the output narrow string needs to be null terminated,
123 * the input string length needs to include the null.
124 * Returns the number of characters written to the narrow string.
125 * If nlen is 0, the function returns the needed buffer size for nstr.
126 * If the function fails, it returns 0.
127 */
128 int
129 char_to_narrow(char *nstr, int nlen, LPCTSTR wstr, int wlen)
130 {
131 /* no translation */
132 if (nlen == 0)
133 return wlen;
134 if (nlen < wlen)
135 return 0;
136 memcpy(nstr, wstr, wlen);
137 return wlen;
138 }
139
140
141 /* opposite of char_to_narrow */
142 int
143 narrow_to_char(TCHAR *wstr, int wlen, const char *nstr, int nlen)
144 {
145 /* no translation */
146 if (wlen == 0)
147 return nlen;
148 if (wlen < nlen)
149 return 0;
150 memcpy(wstr, nstr, nlen);
151 return nlen;
152 }
153
154 #endif
155
156 /* Convert ISO-Latin1 str to UTF-8 ustr.
157 * Return byte length of UTF-8 string.
158 * If ustr is NULL or insufficient space don't copy.
159 * This is needed for the gtk+ user interface.
160 */
161 int
162 latin1_to_utf8(char *ustr, int ulen, const char *str, int slen)
163 {
164 int i, j;
165 const char *p = str;
166 int len = slen;
167 for (i=0; i<slen; i++)
168 if (p[i] & 0x80)
169 len++;
170 if ((ustr != NULL) && (ulen <= len)) {
171 p = str;
172 for (i=0, j=0; i<slen; i++) {
173 if (*p & 0x80) {
174 ustr[j++] = (char)(0xc0 | ((*p & 0xc0) >> 6));
175 ustr[j++] = (char)(0x80 | (*p & 0x3f));
176 }
177 else
178 ustr[j++] = *p;
179 p++;
180 }
181 }
182 return len;
183 }
184