"Fossies" - the Fresh Open Source Software Archive

Member "xterm-379/unicode/convmap.pl" (9 Sep 2018, 6472 Bytes) of package /linux/misc/xterm-379.tgz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "convmap.pl" see the Fossies "Dox" file reference documentation.

    1 #!/usr/bin/perl -w
    2 # $XTermId: convmap.pl,v 1.15 2018/09/09 17:22:24 tom Exp $
    3 #
    4 # Generate keysym2ucs.c file
    5 #
    6 # See also:
    7 # http://mail.nl.linux.org/linux-utf8/2001-04/msg00248.html
    8 #
    9 # $XFree86: xc/programs/xterm/unicode/convmap.pl,v 1.5 2000/01/24 22:22:05 dawes Exp $
   10 
   11 use strict;
   12 
   13 our $keysym;
   14 our %name;
   15 our %keysym_to_ucs;
   16 our %keysym_to_keysymname;
   17 
   18 sub utf8 ($);
   19 
   20 sub utf8 ($) {
   21     my $c = shift(@_);
   22 
   23     if ($c < 0x80) {
   24         return sprintf("%c", $c);
   25     } elsif ($c < 0x800) {
   26         return sprintf("%c%c", 0xc0 | ($c >> 6), 0x80 | ($c & 0x3f));
   27     } elsif ($c < 0x10000) {
   28         return sprintf("%c%c%c",
   29                        0xe0 |  ($c >> 12),
   30                        0x80 | (($c >>  6) & 0x3f),
   31                        0x80 | ( $c        & 0x3f));
   32     } elsif ($c < 0x200000) {
   33         return sprintf("%c%c%c%c",
   34                        0xf0 |  ($c >> 18),
   35                        0x80 | (($c >> 12) & 0x3f),
   36                        0x80 | (($c >>  6) & 0x3f),
   37                        0x80 | ( $c        & 0x3f));
   38     } elsif ($c < 0x4000000) {
   39         return sprintf("%c%c%c%c%c",
   40                        0xf8 |  ($c >> 24),
   41                        0x80 | (($c >> 18) & 0x3f),
   42                        0x80 | (($c >> 12) & 0x3f),
   43                        0x80 | (($c >>  6) & 0x3f),
   44                        0x80 | ( $c        & 0x3f));
   45 
   46     } elsif ($c < 0x80000000) {
   47         return sprintf("%c%c%c%c%c%c",
   48                        0xfe |  ($c >> 30),
   49                        0x80 | (($c >> 24) & 0x3f),
   50                        0x80 | (($c >> 18) & 0x3f),
   51                        0x80 | (($c >> 12) & 0x3f),
   52                        0x80 | (($c >> 6)  & 0x3f),
   53                        0x80 | ( $c        & 0x3f));
   54     } else {
   55         return utf8(0xfffd);
   56     }
   57 }
   58 
   59 my $unicodedata = "UnicodeData.txt";
   60 
   61 # read list of all Unicode names
   62 if (!open(UDATA, $unicodedata) && !open(UDATA, "$unicodedata")) {
   63     die ("Can't open Unicode database '$unicodedata':\n$!\n\n" .
   64          "Please make sure that you have downloaded the file\n" .
   65          "ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData.txt\n");
   66 }
   67 while (<UDATA>) {
   68     if (/^([0-9,A-F]{4,6});([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*)$/) {
   69         $name{hex($1)} = $2;
   70     } else {
   71         die("Syntax error in line '$_' in file '$unicodedata'");
   72     }
   73 }
   74 close(UDATA);
   75 
   76 # read mapping (from http://wsinwp07.win.tue.nl:1234/unicode/keysym.map)
   77 open(LIST, "<keysym.map") || die ("Can't open map file:\n$!\n");
   78 while (<LIST>) {
   79     if (/^0x([0-9a-f]{4})\s+U([0-9a-f]{4})\s*(\#.*)?$/){
   80         my $keysym = hex($1);
   81         my $ucs = hex($2);
   82         my $comment = $3;
   83         $comment =~ s/^#\s*//;
   84         $keysym_to_ucs{$keysym} = $ucs;
   85         $keysym_to_keysymname{$keysym} = $comment;
   86     } elsif (/^\s*\#/ || /^\s*$/) {
   87     } else {
   88         die("Syntax error in 'list' in line\n$_\n");
   89     }
   90 }
   91 close(LIST);
   92 
   93 # read entries in keysymdef.h
   94 open(LIST, "</usr/include/X11/keysymdef.h") || die ("Can't open keysymdef.h:\n$!\n");
   95 while (<LIST>) {
   96     if (/^\#define\s+XK_([A-Za-z_0-9]+)\s+0x([0-9a-fA-F]+)\s*(\/.*)?$/) {
   97         next if /\/\* deprecated \*\//;
   98         my $keysymname = $1;
   99         my $keysym = hex($2);
  100         $keysym_to_keysymname{$keysym} = $keysymname;
  101     }
  102 }
  103 close(LIST);
  104 
  105 print <<EOT;
  106 /* \$XTermId\$
  107  * This module converts keysym values into the corresponding ISO 10646
  108  * (UCS, Unicode) values.
  109  *
  110  * The array keysymtab[] contains pairs of X11 keysym values for graphical
  111  * characters and the corresponding Unicode value. The function
  112  * keysym2ucs() maps a keysym onto a Unicode value using a binary search,
  113  * therefore keysymtab[] must remain SORTED by keysym value.
  114  *
  115  * The keysym -> UTF-8 conversion will hopefully one day be provided
  116  * by Xlib via XmbLookupString() and should ideally not have to be
  117  * done in X applications. But we are not there yet.
  118  *
  119  * We allow to represent any UCS character in the range U-00000000 to
  120  * U-00FFFFFF by a keysym value in the range 0x01000000 to 0x01ffffff.
  121  * This admittedly does not cover the entire 31-bit space of UCS, but
  122  * it does cover all of the characters up to U-10FFFF, which can be
  123  * represented by UTF-16, and more, and it is very unlikely that higher
  124  * UCS codes will ever be assigned by ISO. So to get Unicode character
  125  * U+ABCD you can directly use keysym 0x0100abcd.
  126  *
  127  * NOTE: The comments in the table below contain the actual character
  128  * encoded in UTF-8, so for viewing and editing best use an editor in
  129  * UTF-8 mode.
  130  *
  131  * Author: Markus G. Kuhn <mkuhn\@acm.org>, University of Cambridge, April 2001
  132  *
  133  * Special thanks to Richard Verhoeven <river\@win.tue.nl> for preparing
  134  * an initial draft of the mapping table.
  135  *
  136  * This software is in the public domain. Share and enjoy!
  137  *
  138  * AUTOMATICALLY GENERATED FILE, DO NOT EDIT !!! (unicode/convmap.pl)
  139  */
  140 
  141 #ifndef KEYSYM2UCS_INCLUDED
  142 
  143 #include "keysym2ucs.h"
  144 #define VISIBLE /* */
  145 
  146 #else
  147 
  148 #define VISIBLE static
  149 
  150 #endif
  151 
  152 static struct codepair {
  153   unsigned short keysym;
  154   unsigned short ucs;
  155 } keysymtab[] = {
  156 EOT
  157 
  158 for $keysym (sort {$a <=> $b} keys(%keysym_to_keysymname)) {
  159     my $ucs = $keysym_to_ucs{$keysym};
  160     next if $keysym >= 0xf000 || $keysym < 0x100;
  161     if ($ucs) {
  162         printf("  { 0x%04x, 0x%04x }, /*%28s %s %s */\n",
  163                $keysym, $ucs, $keysym_to_keysymname{$keysym}, utf8($ucs),
  164                defined($name{$ucs}) ? $name{$ucs} : "???" );
  165     } else {
  166         printf("/*  0x%04x   %39s ? ??? */\n",
  167                $keysym, $keysym_to_keysymname{$keysym});
  168     }
  169 }
  170 
  171 print <<EOT;
  172 };
  173 
  174 VISIBLE
  175 long keysym2ucs(KeySym keysym)
  176 {
  177     int min = 0;
  178     int max = sizeof(keysymtab) / sizeof(struct codepair) - 1;
  179 
  180     /* first check for Latin-1 characters (1:1 mapping) */
  181     if ((keysym >= 0x0020 && keysym <= 0x007e) ||
  182         (keysym >= 0x00a0 && keysym <= 0x00ff))
  183         return (long) keysym;
  184 
  185     /* also check for directly encoded 24-bit UCS characters */
  186     if ((keysym & 0xff000000) == 0x01000000)
  187         return (long) (keysym & 0x00ffffff);
  188 
  189     /* binary search in table */
  190     while (max >= min) {
  191         int mid = (min + max) / 2;
  192         if (keysymtab[mid].keysym < keysym)
  193             min = mid + 1;
  194         else if (keysymtab[mid].keysym > keysym)
  195             max = mid - 1;
  196         else {
  197             /* found it */
  198             return keysymtab[mid].ucs;
  199         }
  200     }
  201 
  202     /* no matching Unicode value found */
  203     return -1;
  204 }
  205 EOT