"Fossies" - the Fresh Open Source Software Archive 
Member "xterm-379/unicode/convmap.pl" (9 Sep 2018, 6472 Bytes) of package /linux/misc/xterm-379.tgz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style:
standard) with prefixed line numbers and
code folding option.
Alternatively you can here
view or
download the uninterpreted source code file.
For more information about "convmap.pl" see the
Fossies "Dox" file reference documentation.
1 #!/usr/bin/perl -w
2 # $XTermId: convmap.pl,v 1.15 2018/09/09 17:22:24 tom Exp $
3 #
4 # Generate keysym2ucs.c file
5 #
6 # See also:
7 # http://mail.nl.linux.org/linux-utf8/2001-04/msg00248.html
8 #
9 # $XFree86: xc/programs/xterm/unicode/convmap.pl,v 1.5 2000/01/24 22:22:05 dawes Exp $
10
11 use strict;
12
13 our $keysym;
14 our %name;
15 our %keysym_to_ucs;
16 our %keysym_to_keysymname;
17
18 sub utf8 ($);
19
20 sub utf8 ($) {
21 my $c = shift(@_);
22
23 if ($c < 0x80) {
24 return sprintf("%c", $c);
25 } elsif ($c < 0x800) {
26 return sprintf("%c%c", 0xc0 | ($c >> 6), 0x80 | ($c & 0x3f));
27 } elsif ($c < 0x10000) {
28 return sprintf("%c%c%c",
29 0xe0 | ($c >> 12),
30 0x80 | (($c >> 6) & 0x3f),
31 0x80 | ( $c & 0x3f));
32 } elsif ($c < 0x200000) {
33 return sprintf("%c%c%c%c",
34 0xf0 | ($c >> 18),
35 0x80 | (($c >> 12) & 0x3f),
36 0x80 | (($c >> 6) & 0x3f),
37 0x80 | ( $c & 0x3f));
38 } elsif ($c < 0x4000000) {
39 return sprintf("%c%c%c%c%c",
40 0xf8 | ($c >> 24),
41 0x80 | (($c >> 18) & 0x3f),
42 0x80 | (($c >> 12) & 0x3f),
43 0x80 | (($c >> 6) & 0x3f),
44 0x80 | ( $c & 0x3f));
45
46 } elsif ($c < 0x80000000) {
47 return sprintf("%c%c%c%c%c%c",
48 0xfe | ($c >> 30),
49 0x80 | (($c >> 24) & 0x3f),
50 0x80 | (($c >> 18) & 0x3f),
51 0x80 | (($c >> 12) & 0x3f),
52 0x80 | (($c >> 6) & 0x3f),
53 0x80 | ( $c & 0x3f));
54 } else {
55 return utf8(0xfffd);
56 }
57 }
58
59 my $unicodedata = "UnicodeData.txt";
60
61 # read list of all Unicode names
62 if (!open(UDATA, $unicodedata) && !open(UDATA, "$unicodedata")) {
63 die ("Can't open Unicode database '$unicodedata':\n$!\n\n" .
64 "Please make sure that you have downloaded the file\n" .
65 "ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt\n");
66 }
67 while (<UDATA>) {
68 if (/^([0-9,A-F]{4,6});([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*)$/) {
69 $name{hex($1)} = $2;
70 } else {
71 die("Syntax error in line '$_' in file '$unicodedata'");
72 }
73 }
74 close(UDATA);
75
76 # read mapping (from http://wsinwp07.win.tue.nl:1234/unicode/keysym.map)
77 open(LIST, "<keysym.map") || die ("Can't open map file:\n$!\n");
78 while (<LIST>) {
79 if (/^0x([0-9a-f]{4})\s+U([0-9a-f]{4})\s*(\#.*)?$/){
80 my $keysym = hex($1);
81 my $ucs = hex($2);
82 my $comment = $3;
83 $comment =~ s/^#\s*//;
84 $keysym_to_ucs{$keysym} = $ucs;
85 $keysym_to_keysymname{$keysym} = $comment;
86 } elsif (/^\s*\#/ || /^\s*$/) {
87 } else {
88 die("Syntax error in 'list' in line\n$_\n");
89 }
90 }
91 close(LIST);
92
93 # read entries in keysymdef.h
94 open(LIST, "</usr/include/X11/keysymdef.h") || die ("Can't open keysymdef.h:\n$!\n");
95 while (<LIST>) {
96 if (/^\#define\s+XK_([A-Za-z_0-9]+)\s+0x([0-9a-fA-F]+)\s*(\/.*)?$/) {
97 next if /\/\* deprecated \*\//;
98 my $keysymname = $1;
99 my $keysym = hex($2);
100 $keysym_to_keysymname{$keysym} = $keysymname;
101 }
102 }
103 close(LIST);
104
105 print <<EOT;
106 /* \$XTermId\$
107 * This module converts keysym values into the corresponding ISO 10646
108 * (UCS, Unicode) values.
109 *
110 * The array keysymtab[] contains pairs of X11 keysym values for graphical
111 * characters and the corresponding Unicode value. The function
112 * keysym2ucs() maps a keysym onto a Unicode value using a binary search,
113 * therefore keysymtab[] must remain SORTED by keysym value.
114 *
115 * The keysym -> UTF-8 conversion will hopefully one day be provided
116 * by Xlib via XmbLookupString() and should ideally not have to be
117 * done in X applications. But we are not there yet.
118 *
119 * We allow to represent any UCS character in the range U-00000000 to
120 * U-00FFFFFF by a keysym value in the range 0x01000000 to 0x01ffffff.
121 * This admittedly does not cover the entire 31-bit space of UCS, but
122 * it does cover all of the characters up to U-10FFFF, which can be
123 * represented by UTF-16, and more, and it is very unlikely that higher
124 * UCS codes will ever be assigned by ISO. So to get Unicode character
125 * U+ABCD you can directly use keysym 0x0100abcd.
126 *
127 * NOTE: The comments in the table below contain the actual character
128 * encoded in UTF-8, so for viewing and editing best use an editor in
129 * UTF-8 mode.
130 *
131 * Author: Markus G. Kuhn <mkuhn\@acm.org>, University of Cambridge, April 2001
132 *
133 * Special thanks to Richard Verhoeven <river\@win.tue.nl> for preparing
134 * an initial draft of the mapping table.
135 *
136 * This software is in the public domain. Share and enjoy!
137 *
138 * AUTOMATICALLY GENERATED FILE, DO NOT EDIT !!! (unicode/convmap.pl)
139 */
140
141 #ifndef KEYSYM2UCS_INCLUDED
142
143 #include "keysym2ucs.h"
144 #define VISIBLE /* */
145
146 #else
147
148 #define VISIBLE static
149
150 #endif
151
152 static struct codepair {
153 unsigned short keysym;
154 unsigned short ucs;
155 } keysymtab[] = {
156 EOT
157
158 for $keysym (sort {$a <=> $b} keys(%keysym_to_keysymname)) {
159 my $ucs = $keysym_to_ucs{$keysym};
160 next if $keysym >= 0xf000 || $keysym < 0x100;
161 if ($ucs) {
162 printf(" { 0x%04x, 0x%04x }, /*%28s %s %s */\n",
163 $keysym, $ucs, $keysym_to_keysymname{$keysym}, utf8($ucs),
164 defined($name{$ucs}) ? $name{$ucs} : "???" );
165 } else {
166 printf("/* 0x%04x %39s ? ??? */\n",
167 $keysym, $keysym_to_keysymname{$keysym});
168 }
169 }
170
171 print <<EOT;
172 };
173
174 VISIBLE
175 long keysym2ucs(KeySym keysym)
176 {
177 int min = 0;
178 int max = sizeof(keysymtab) / sizeof(struct codepair) - 1;
179
180 /* first check for Latin-1 characters (1:1 mapping) */
181 if ((keysym >= 0x0020 && keysym <= 0x007e) ||
182 (keysym >= 0x00a0 && keysym <= 0x00ff))
183 return (long) keysym;
184
185 /* also check for directly encoded 24-bit UCS characters */
186 if ((keysym & 0xff000000) == 0x01000000)
187 return (long) (keysym & 0x00ffffff);
188
189 /* binary search in table */
190 while (max >= min) {
191 int mid = (min + max) / 2;
192 if (keysymtab[mid].keysym < keysym)
193 min = mid + 1;
194 else if (keysymtab[mid].keysym > keysym)
195 max = mid - 1;
196 else {
197 /* found it */
198 return keysymtab[mid].ucs;
199 }
200 }
201
202 /* no matching Unicode value found */
203 return -1;
204 }
205 EOT