w32tex
About: TeX Live provides a comprehensive TeX system including all the major TeX-related programs, macro packages, and fonts that are free software. Windows sources.
  Fossies Dox: w32tex-src.tar.xz  ("unofficial" and yet experimental doxygen-generated source code documentation)  

aptex-unicode.c
Go to the documentation of this file.
1 /*
2  Copyright 2017, 2018 Clerk Ma
3 
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation; either version 2 of the License, or
7  (at your option) any later version.
8 
9  This program is distributed in the hope that it will be useful, but
10  WITHOUT ANY WARRANTY; without even the implied warranty of
11  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  General Public License for more details.
13 
14  You should have received a copy of the GNU General Public License
15  along with this program; if not, write to the Free Software
16  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17  02110-1301 USA.
18 */
19 
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <stdint.h>
23 
24 /*
25  JIS X 4051 character classes
26 
27  1 Opening parentheses and quotation marks
28  1-1-42 0x214A U+0028 # LEFT PARENTHESIS Fullwidth: U+FF08
29  1-1-46 0x214E U+005B # LEFT SQUARE BRACKET Fullwidth: U+FF3B
30  1-1-48 0x2150 U+007B # LEFT CURLY BRACKET Fullwidth: U+FF5B
31  1-1-44 0x214C U+3014 # LEFT TORTOISE SHELL BRACKET
32  1-1-50 0x2152 U+3008 # LEFT ANGLE BRACKET
33  1-1-52 0x2154 U+300A # LEFT DOUBLE ANGLE BRACKET
34  1-1-54 0x2156 U+300C # LEFT CORNER BRACKET
35  1-1-56 0x2158 U+300E # LEFT WHITE CORNER BRACKET
36  1-1-58 0x215A U+3010 # LEFT BLACK LENTICULAR BRACKET
37  1-13-64 0x2D60 U+301D # REVERSED DOUBLE PRIME QUOTATION MARK
38  1-1-38 0x2146 U+2018 # LEFT SINGLE QUOTATION MARK
39  1-1-40 0x2148 U+201C # LEFT DOUBLE QUOTATION MARK
40  1-2-54 0x2256 U+FF5F # FULLWIDTH LEFT WHITE PARENTHESIS
41  1-9-8 0x2928 U+00AB # LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
42  2 Closing parentheses and quotation marks
43  1-1-4 0x2124 U+002C # COMMA Fullwidth: U+FF0C
44  1-1-43 0x214B U+0029 # RIGHT PARENTHESIS Fullwidth: U+FF09
45  1-1-47 0x214F U+005D # RIGHT SQUARE BRACKET Fullwidth: U+FF3D
46  1-1-49 0x2151 U+007D # RIGHT CURLY BRACKET Fullwidth: U+FF5D
47  1-1-2 0x2122 U+3001 # IDEOGRAPHIC COMMA
48  1-1-45 0x214D U+3015 # RIGHT TORTOISE SHELL BRACKET
49  1-1-51 0x2153 U+3009 # RIGHT ANGLE BRACKET
50  1-1-53 0x2155 U+300B # RIGHT DOUBLE ANGLE BRACKET
51  1-1-55 0x2157 U+300D # RIGHT CORNER BRACKET
52  1-1-57 0x2159 U+300F # RIGHT WHITE CORNER BRACKET
53  1-1-59 0x215B U+3011 # RIGHT BLACK LENTICULAR BRACKET
54  1-2-57 0x2259 U+3019 # RIGHT WHITE TORTOISE SHELL BRACKET
55  1-2-59 0x225B U+3017 # RIGHT WHITE LENTICULAR BRACKET
56  1-13-65 0x2D61 U+301F # LOW DOUBLE PRIME QUOTATION MARK
57  1-1-39 0x2147 U+2019 # RIGHT SINGLE QUOTATION MARK
58  1-1-41 0x2149 U+201D # RIGHT DOUBLE QUOTATION MARK
59  1-2-55 0x2257 U+FF60 # FULLWIDTH RIGHT WHITE PARENTHESIS
60  1-9-18 0x2932 U+00BB # RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
61  3 Japanese characters prohibited from starting lines
62  1-1-19 0x2133 U+30FD # KATAKANA ITERATION MARK
63  1-1-20 0x2134 U+30FE # KATAKANA VOICED ITERATION MARK
64  1-1-28 0x213C U+30FC # KATAKANA-HIRAGANA PROLONGED SOUND MARK
65  1-5-1 0x2521 U+30A1 # KATAKANA LETTER SMALL A
66  1-5-3 0x2523 U+30A3 # KATAKANA LETTER SMALL I
67  1-5-5 0x2525 U+30A5 # KATAKANA LETTER SMALL U
68  1-5-7 0x2527 U+30A7 # KATAKANA LETTER SMALL E
69  1-5-9 0x2529 U+30A9 # KATAKANA LETTER SMALL O
70  1-5-35 0x2543 U+30C3 # KATAKANA LETTER SMALL TU
71  1-5-67 0x2563 U+30E3 # KATAKANA LETTER SMALL YA
72  1-5-69 0x2565 U+30E5 # KATAKANA LETTER SMALL YU
73  1-5-71 0x2567 U+30E7 # KATAKANA LETTER SMALL YO
74  1-5-78 0x256E U+30EE # KATAKANA LETTER SMALL WA
75  1-5-85 0x2575 U+30F5 # KATAKANA LETTER SMALL KA
76  1-5-86 0x2576 U+30F6 # KATAKANA LETTER SMALL KE
77  1-1-21 0x2135 U+309D # HIRAGANA ITERATION MARK
78  1-1-22 0x2136 U+309E # HIRAGANA VOICED ITERATION MARK
79  1-4-1 0x2421 U+3041 # HIRAGANA LETTER SMALL A
80  1-4-3 0x2423 U+3043 # HIRAGANA LETTER SMALL I
81  1-4-5 0x2425 U+3045 # HIRAGANA LETTER SMALL U
82  1-4-7 0x2427 U+3047 # HIRAGANA LETTER SMALL E
83  1-4-9 0x2429 U+3049 # HIRAGANA LETTER SMALL O
84  1-4-35 0x2443 U+3063 # HIRAGANA LETTER SMALL TU
85  1-4-67 0x2463 U+3083 # HIRAGANA LETTER SMALL YA
86  1-4-69 0x2465 U+3085 # HIRAGANA LETTER SMALL YU
87  1-4-71 0x2467 U+3087 # HIRAGANA LETTER SMALL YO
88  1-4-78 0x246E U+308E # HIRAGANA LETTER SMALL WA
89  1-4-85 0x2475 U+3095 # HIRAGANA LETTER SMALL KA
90  1-4-86 0x2476 U+3096 # HIRAGANA LETTER SMALL KE
91  1-6-78 0x266E U+31F0 # KATAKANA LETTER SMALL KU
92  1-6-79 0x266F U+31F1 # KATAKANA LETTER SMALL SI
93  1-6-80 0x2670 U+31F2 # KATAKANA LETTER SMALL SU
94  1-6-81 0x2671 U+31F3 # KATAKANA LETTER SMALL TO
95  1-6-82 0x2672 U+31F4 # KATAKANA LETTER SMALL NU
96  1-6-83 0x2673 U+31F5 # KATAKANA LETTER SMALL HA
97  1-6-84 0x2674 U+31F6 # KATAKANA LETTER SMALL HI
98  1-6-85 0x2675 U+31F7 # KATAKANA LETTER SMALL HU
99  1-6-86 0x2676 U+31F8 # KATAKANA LETTER SMALL HE
100  1-6-87 0x2677 U+31F9 # KATAKANA LETTER SMALL HO
101  1-6-89 0x2679 U+31FA # KATAKANA LETTER SMALL MU
102  1-6-90 0x267A U+31FB # KATAKANA LETTER SMALL RA
103  1-6-91 0x267B U+31FC # KATAKANA LETTER SMALL RI
104  1-6-92 0x267C U+31FD # KATAKANA LETTER SMALL RU
105  1-6-93 0x267D U+31FE # KATAKANA LETTER SMALL RE
106  1-6-94 0x267E U+31FF # KATAKANA LETTER SMALL RO
107  1-1-25 0x2139 U+3005 # IDEOGRAPHIC ITERATION MARK
108  1-2-22 0x2236 U+303B # VERTICAL IDEOGRAPHIC ITERATION MARK
109  4 Hyphens and hyphen-like characters
110  1-1-30 0x213E U+2010 # HYPHEN
111  1-3-91 0x237B U+30A0 # KATAKANA-HIRAGANA DOUBLE HYPHEN
112  1-3-92 0x237C U+2013 # EN DASH
113  1-1-33 0x2141 U+301C # WAVE DASH Windows: U+FF5E
114  5 Question and exclamation marks
115  1-1-9 0x2129 U+003F # QUESTION MARK Fullwidth: U+FF1F
116  1-1-10 0x212A U+0021 # EXCLAMATION MARK Fullwidth: U+FF01
117  1-8-75 0x286B U+203C # DOUBLE EXCLAMATION MARK
118  1-8-76 0x286C U+2047 # DOUBLE QUESTION MARK
119  1-8-77 0x286D U+2048 # QUESTION EXCLAMATION MARK
120  1-8-78 0x286E U+2049 # EXCLAMATION QUESTION MARK
121  6 Bullets, colons, and semicolons
122  1-1-6 0x2126 U+30FB # KATAKANA MIDDLE DOT
123  1-1-7 0x2127 U+003A # COLON Fullwidth: U+FF1A
124  1-1-8 0x2128 U+003B # SEMICOLON Fullwidth: U+FF1B
125  7 Periods
126  1-1-3 0x2123 U+3002 # IDEOGRAPHIC FULL STOP
127  1-1-5 0x2125 U+002E # FULL STOP Fullwidth: U+FF0E
128  8 Inseparable characters
129  1-1-29 0x213D U+2014 # EM DASH Windows: U+2015
130  1-1-36 0x2144 U+2026 # HORIZONTAL ELLIPSIS
131  1-1-37 0x2145 U+2025 # TWO DOT LEADER
132  1-2-19 0x2233 U+3033 # VERTICAL KANA REPEAT MARK UPPER HALF
133  1-2-20 0x2234 U+3034 # VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HALF
134  1-2-21 0x2235 U+3035 # VERTICAL KANA REPEAT MARK LOWER HALF
135  9 Prefixed abbreviated symbols
136  1-1-79 0x216F U+00A5 # YEN SIGN Windows: U+FFE5
137  1-1-82 0x2172 U+00A3 # POUND SIGN Windows: U+FFE1
138  1-1-80 0x2170 U+0024 # DOLLAR SIGN Fullwidth: U+FF04
139  1-1-84 0x2174 U+0023 # NUMBER SIGN Fullwidth: U+FF03
140  1-9-1 0x2921 U+20AC # EURO SIGN
141  1-13-66 0x2D62 U+2116 # NUMERO SIGN
142  10 Suffixed abbreviated symbols
143  1-1-75 0x216B U+00B0 # DEGREE SIGN
144  1-1-81 0x2171 U+00A2 # CENT SIGN Windows: U+FFE0
145  1-1-76 0x216C U+2032 # PRIME
146  1-1-77 0x216D U+2033 # DOUBLE PRIME
147  1-2-82 0x2272 U+212B # ANGSTROM SIGN
148  1-1-78 0x216E U+2103 # DEGREE CELSIUS
149  1-3-63 0x235F U+2113 # SCRIPT SMALL L
150  1-1-83 0x2173 U+0025 # PERCENT SIGN Fullwidth: U+FF05
151  1-3-62 0x235E U+33CB # SQUARE HP
152  11 Ideographic space
153  1-1-1 0x2121 U+3000 # IDEOGRAPHIC SPACE
154  12 Hiragana
155  13 Japanese characters other than those in classes 1-12
156  14 Characters used in note references
157  15 Body characters of an attached sequence
158  16 Body characters of an attached ruby other than a compound ruby
159  17 Body characters of an attached compound ruby
160  18 Characters used in numeric sequences
161  19 Unit symbols
162  20 Latin space
163  21 Latin characters other than a space
164  22 Opening parentheses for inline notes
165  23 Closing parentheses for inline notes
166 */
167 
168 const char * aptex_unicode_version (void)
169 {
170  return "10.0";
171 }
172 
174 {
175  uint32_t jis4051_class;
176 
177  switch (codepoint)
178  {
179  case 0xff08:
180  case 0xff3b:
181  case 0xff5b:
182  case 0x3014:
183  case 0x3008:
184  case 0x300a:
185  case 0x300c:
186  case 0x300e:
187  case 0x3010:
188  case 0x301d:
189  case 0x2018:
190  case 0x201c:
191  case 0xff5f:
192  case 0x00ab:
193  jis4051_class = 1;
194  break;
195  case 0xff0c:
196  case 0xff09:
197  case 0xff3d:
198  case 0xff5d:
199  case 0x3001:
200  case 0x3015:
201  case 0x3009:
202  case 0x300b:
203  case 0x300d:
204  case 0x300f:
205  case 0x3011:
206  case 0x3019:
207  case 0x3017:
208  case 0x301f:
209  case 0x2019:
210  case 0x201d:
211  case 0xff60:
212  case 0x00bb:
213  jis4051_class = 2;
214  break;
215  case 0xff1f:
216  case 0xff01:
217  case 0x203c:
218  case 0x2047:
219  case 0x2048:
220  case 0x2049:
221  jis4051_class = 5;
222  break;
223  case 0x30fb:
224  case 0xff1a:
225  case 0xff1b:
226  jis4051_class = 6;
227  break;
228  case 0x3002:
229  case 0xff0e:
230  jis4051_class = 7;
231  break;
232  case 0x2014:
233  case 0x2026:
234  case 0x2025:
235  case 0x3033:
236  case 0x3034:
237  case 0x3035:
238  jis4051_class = 8;
239  break;
240  case 0x3000:
241  jis4051_class = 11;
242  break;
243  }
244 
245  return jis4051_class;
246 }
const char * aptex_unicode_version(void)
uint32_t aptex_get_jis4051_class(uint32_t codepoint)
unsigned int uint32_t
Definition: stdint.h:80
static int codepoint(lua_State *L)
Definition: lutf8lib.c:100