"Fossies" - the Fresh Open Source Software Archive

Member "unifont-12.1.02/src/johab2ucs2" (20 Oct 2014, 11458 Bytes) of package /linux/misc/unifont-12.1.02.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file.

    1 #!/usr/bin/perl  -w
    2 #
    3 # Copyright (C) 1998, 2013 Jungshik Shin, Paul Hardy
    4 #
    5 # johab2ucs2.pl 
    6 # This script(working as filter) converts  Hangul "Johab encoded  fonts"
    7 # with an unofficial XLFD name "-johab" in BDF format
    8 # to UCS-2 encoded font in a format defined by
    9 # Roman Czyborra <roman@czyborra.com> at 
   10 # http://czyborra.com/unifont/
   11 #
   12 # LICENSE:
   13 #
   14 #    This program is free software: you can redistribute it and/or modify
   15 #    it under the terms of the GNU General Public License as published by
   16 #    the Free Software Foundation, either version 2 of the License, or
   17 #    (at your option) any later version.
   18 #  
   19 #    This program is distributed in the hope that it will be useful,
   20 #    but WITHOUT ANY WARRANTY; without even the implied warranty of
   21 #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the 
   22 #    GNU General Public License for more details.
   23 #  
   24 #    You should have received a copy of the GNU General Public License
   25 #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
   26 #
   27 #
   28 # 'hanterm304font.tar.gz contains about a dozen of 
   29 # "Johab-encoded" fonts. The package is available 
   30 # ftp://ftp.kaist.ac.kr/hangul/terminal/hanterm/hanterm304beta/fonts
   31 # Please, note that this script only works with  fonts whose 
   32 # XLFD name ends with 
   33 #
   34 #  --16-160-75-75-c-160-johab-1
   35 # (and whose file name in the package doesn't include 's' or 'sh' preceding
   36 # '(m|g)16.bdf'. )
   37 #
   38 # There are  four of them : 
   39 #  johabg16.bdf,johabm16.bdf,johabp16.bdf,iyagi16.bdf.  
   40 #
   41 # Fonts in the package with other XLFD names
   42 # (johabs and johabsh) contain glyphs for about 5000 Hanjas and special symbols
   43 # defined in KS C 5601-1987. 
   44 
   45 #  Sep. 29, 1998
   46 #  Jungshik Shin <jshin@pantheon.yale.edu>
   47 
   48 # A more complete routine which not only covers
   49 # *modern* pre-composed Hangul syllables in UAC00-UD7A3
   50 # but also supports dynamic rendering of
   51 # Hangul syllables(medieval as well as modern)
   52 # using Hangul comibining Jamos  at [U1100-U11FF]
   53 # was made by Deog-tae Kim <dtkim@calab.kaist.ac.kr
   54 # to be used in Java font-properties file. 
   55 # It's available at  http://calab.kaist.ac.kr/~dtkim/java/
   56 
   57 # 2 May 2008: changes by Paul Hardy (unifoundry <at> unfoundry.com):
   58 #
   59 #    - In tconBase, "459" index corrected to "449".
   60 #    - Modified subroutine get_ind to always return 0 for final
   61 #      if no final consonant is in the composite syllable.
   62 #      Previously it always added $tconMap[$m] to the final
   63 #      consonant location even if there was no final consonant.
   64 #    - Index arrays were extended to cover all of Johab encoded
   65 #      Hangul, even though not all glyphs are used to generate
   66 #      the Unicode Hangul Syllables range.
   67 #    - Added comments on the letters in the letter arrays
   68 
   69 
   70 # Conversion routine from Hangul Jamo index to glyph index 
   71 # of Hangul "Johab-encoded" fonts  as used by 
   72 # Hangul xterm, hanterm.
   73 # The following routine is based on Hanterm by Song, Jaekyung
   74 # available at ftp://ftp.kaist.ac.kr/hangul/terminal/hanterm
   75 
   76 # Leading Consonant index values:
   77 #
   78 #    Modern Letters:                     Archaic Letters (no Romanization):
   79 #
   80 #       0 G  (choseong kiyeok)              19 (choseong kapyeounpieup) 
   81 #       1 GG (choseong ssangkiyeok)         20 (choseong pieup-kiyeok)
   82 #       2 N  (choseong nieun)               21 (choseong sios-kiyeok)
   83 #       3 D  (choseong tikeut)              22 (choseong pieup-tikeut)
   84 #       4 DD (choseong ssangtikeut)         23 (choseong sios-tikeut)
   85 #       5 R  (choseong rieul)               24 (choseong sios-pieup)
   86 #       6 M  (choseong mieum)               25 (choseong pieup-sios)
   87 #       7 B  (choseong pieup)               26 (choseong pansios)
   88 #       8 BB (choseong ssangpieup)          27 (choseong yesieung)
   89 #       9 S  (choseong sios)                28 (choseong pieup-cieuc)
   90 #      10 SS (choseong ssangsios)           29 (choseong sios-cieuc)
   91 #      11 ieung (choseong ieung)            30 (choseong yeorinhieuh)
   92 #      12 J  (choseong cieuc)
   93 #      13 JJ (choseong ssangcieuc)
   94 #      14 C  (choseong chieuch)
   95 #      15 K  (choseong khieukh)
   96 #      16 T  (choseong thieuth)
   97 #      17 P  (choseong phieuph)
   98 #      18 H  (choseong hieuh)
   99 #
  100 
  101 # Middle Letter index values:
  102 #
  103 #    Modern Letters:                     Archaic Letters (no Romanization):
  104 #
  105 #       0 Filler (blank)                    22 YO-YA   (jungseong yo-ya)
  106 #       1 A   (jungseong a)                 23 YO-YAE  (jungseong yo-yae)
  107 #       2 AE  (jungseong ae)                24 YO-I    (jungseong yo-i)
  108 #       3 YA  (jungseong ya)                25 YU-YEO  (jungseong yu-yeo)
  109 #       4 YAE (jungseong yae)               26 YU-YE   (jungseong yu-ye)
  110 #       5 EO  (jungseong eo)                27 YU-I    (jungseong yu-i)
  111 #       6 E   (jungseong e)                 28 araea   (jungseong araea)
  112 #       7 YEO (jungseong yeo)               29 araea-i (jungseong araea-i)
  113 #       8 YE  (jungseong ye)
  114 #       9 O   (jungseong o)
  115 #      10 WA  (jungseong wa)
  116 #      11 WAE (jungseong wae)
  117 #      12 OE  (jungseong oe)
  118 #      13 YO  (jungseong yo)
  119 #      14 U   (jungseong u)
  120 #      15 WEO (jungseong weo)
  121 #      16 WE  (jungseong we)
  122 #      17 WI  (jungseong wi)
  123 #      18 YU  (jungseong yu)
  124 #      19 EU  (jungseong eu)
  125 #      20 YI  (jungseong yi)
  126 #      21 I   (jungseong i)
  127 #
  128 
  129 # Terminal (Final) Letter index values:
  130 #
  131 #    Modern Letters:                     Archaic Letters (no Romanization):
  132 #
  133 #       0 Filler (blank)                    28 (jongseong rieul-hieuh)
  134 #       1 G  (jongseong kiyeok)             29 (jongseong mieum-kiyeok)
  135 #       2 GG (jongseong ssangkiyeok)        30 (jongseong yeorinhieuh)
  136 #       3 GS (jongseong kiyeok-sios)        31 (jongseong yesieung)
  137 #       4 N  (jongseong nieun)
  138 #       5 NJ (jongseong nieun-cieuc)
  139 #       6 NH (jongseong niuen-hieuh)
  140 #       7 D  (jongseong tikeut)
  141 #       8 L  (jongseong rieul)
  142 #       9 LG (jongseong rieul-kiyeok)
  143 #      10 LM (jongseong rieul-mieum)
  144 #      11 LB (jongseong rieul-pieup)
  145 #      12 LS (jongseong rieul-sios)
  146 #      13 LT (jongseong rieul-thieuth)
  147 #      14 LP (jongseong rieul-phieuph)
  148 #      15 LH (jongseong rieul-hieuh)
  149 #      16 M  (jongseong mieum)
  150 #      17 B  (jongseong pieup)
  151 #      18 BS (jongseong pieup-sios)
  152 #      19 S  (jongseong sios)
  153 #      20 SS (jongseong ssangsios)
  154 #      21 NG (jongseong ieung)
  155 #      22 J  (jongseong cieuc)
  156 #      23 C  (jongseong chieuch)
  157 #      24 K  (jongseong khieukh)
  158 #      25 T  (jongseong thieuth)
  159 #      26 P  (jongseong phieuph)
  160 #      27 H  (jongseong hieuh)
  161 #
  162 
  163 
  164 # The base font index for leading consonants
  165   @lconBase= (
  166           1,  11,  21,  31,  41,  51,  # G, GG, N, D, DD, R
  167          61,  71,  81,  91, 101, 111,  # M, B, BB, S, SS, ieung
  168         121, 131, 141, 151, 161, 171,  # J, JJ, C, K, T, P
  169         181,                           # H -- end of modern set
  170         191, 201, 211, 221, 231, 241,  # 
  171         251, 261, 271, 281, 291, 301
  172   );
  173 
  174   # The base index for vowels
  175 
  176   @vowBase = (
  177         0,311,314,317,320,323,   # (Fill), A, AE, YA, YAE, EO
  178         326,329,332,335,339,343, # E, YEO, YE, O, WA, WAE
  179         347,351,355,358,361,364, # OI, YO, U, WEO, WE, WI
  180         367,370,374,378,         # YU, EU, UI, I     -- end of modern set
  181         381, 384, 387,           # YO-YA, YO-YAE, YO-YI
  182         390, 393, 396,           # YU-YEO, YU-YE, YU-I
  183         399, 402                 # araea, araea-i
  184   );
  185 
  186   # The base font index for trailing consonants
  187 
  188   @tconBase = (
  189         # modern trailing consonants (filler + 27)
  190         0,
  191         405, 409, 413, 417, 421,  #  G, GG, GS,  N, NJ
  192         425, 429, 433, 437, 441,  # NH,  D,  L, LG, LM
  193         445, 449, 453, 457, 461,  # LB, LS, LT, LP, LH
  194         465, 469, 473, 477, 481,  #  M,  B, BS,  S, SS
  195         485, 489, 493, 497, 501,  # NG,  J,  C,  K,  T
  196         505, 509,                 #  P,  H     -- end of modern set
  197         513, 517, 521, 525
  198    );
  199 
  200     # The mapping from vowels to leading consonant type
  201     # in absence of trailing consonant
  202 
  203     @lconMap1 = (
  204         0,0,0,0,0,0,     # (Fill), A, AE, YA, YAE, EO
  205         0,0,0,1,3,3,     # E, YEO, YE, O, WA, WAE
  206         3,1,2,4,4,4,     # OI, YO, U, WEO, WE, WI
  207         2,1,3,0,         # YU, EU, UI, I    -- end of modern set
  208         3,3,3,4,4,4,     # YO-YA, YO-YAE, YO-I, YU-YEO, YU-YE, YU-I
  209         1,3              # araea, araea-i
  210     );
  211 
  212     # The mapping from vowels to leading consonant type
  213     # in presence of trailing consonant
  214 
  215     @lconMap2 = (
  216         5,5,5,5,5,5,     #  (Fill), A, AE, YA, YAE, EO
  217         5,5,5,6,8,8,     #  E, YEO, YE, O, WA, WAE
  218         8,6,7,9,9,9,     #  OI, YO, U, WEO, WE, WI
  219         7,6,8,5,         #  YU, EU, UI, I    -- end of modern set
  220         8,8,8,9,9,9,     # YO-YA, YO-YAE, YO-I, YU-YEO, YU-YE, YU-I
  221         6,8              # araea, araea-i
  222     );
  223 
  224     #  vowel type ; 1 = o and its alikes, 0 = others
  225 
  226     @vowType = (
  227         0,0,0,0,0,0,
  228         0,0,0,1,1,1,
  229         1,1,0,0,0,0,
  230         0,1,1,0,         # end of modern set
  231         1,1,1,0,0,0,1,1
  232     );
  233 
  234     #  The mapping from trailing consonants to vowel type
  235 
  236     @tconType = (
  237         0, 1, 1, 1, 2, 1,
  238         1, 1, 1, 1, 1, 1,
  239         1, 1, 1, 1, 1, 1,
  240         1, 1, 1, 1, 1, 1,
  241         1, 1, 1, 1,        # end of moder set
  242         1, 1, 1, 1
  243     );
  244 
  245     #  The mapping from vowels to trailing consonant type
  246 
  247     @tconMap = (
  248         0, 0, 2, 0, 2, 1,  # (Fill), A, AE, YA, YAE, EO
  249         2, 1, 2, 3, 0, 0,  # E, YEO, YE, O, WA, WAE
  250         0, 3, 3, 1, 1, 1,  # OI, YO, U, WEO, WE, WI
  251         3, 3, 0, 1,        # YU, EU, UI, I   -- end of modern set
  252         0, 0, 0, 1, 1, 1,  # YO-YA, YO-YAE, YO-I, YU-YEO, YU-YE, YU-I
  253         3, 0               # araea, araea-i
  254     );
  255 
  256 
  257 
  258 # read in BITMAP patterns for Jamos from JOHAB-encoded BDF font file 
  259 # thru STDIN
  260 
  261 $BITMAP=0;
  262 while (<>) {
  263   if (/^ENCODING\s+(\d+)/) { $i = $1; $jamo[$i]=""; }
  264   elsif (/^BITMAP/) { $BITMAP=1; }
  265   elsif (/^ENDCHAR/) { $BITMAP=0; 
  266   }
  267   elsif ($BITMAP) { 
  268     y/a-f/A-F/; 
  269     s/\n$//; 
  270     $jamo[$i] = $jamo[$i] . $_;
  271   }
  272 }
  273 
  274 for ( $j=0 ; $j <  11172 ;  $j++ ) {
  275 
  276    $init = int( $j / 21 / 28) ;
  277    $medial = int($j / 28 ) % 21+1 ; 
  278    $final = $j % 28;
  279   
  280    printf ("%04X:%64s\n", $j+0xAC00, &compose_hangul($init,$medial,$final)) or die ("Cannot print to stdout.\n");
  281 
  282 }
  283 
  284 sub compose_hangul
  285 {
  286    local($l,$m,$f) = @_; 
  287 
  288    @l_bit = unpack("a2" x 32, $jamo[&get_ind($l,$m,$f,1)]);
  289    @m_bit = unpack("a2" x 32, $jamo[&get_ind($l,$m,$f,2)]);
  290    @f_bit = unpack("a2" x 32, $jamo[&get_ind($l,$m,$f,3)]);
  291  
  292 
  293    for ( $i = 0; $i < 32; $i++) {
  294       $bit[$i]=sprintf("%02X", 
  295          hex($l_bit[$i]) | hex($m_bit[$i]) | hex($f_bit[$i]) ) or die ("Cannot print to stdout.\n");
  296    }
  297 
  298    return pack("a2" x 32, @bit );
  299 
  300 }
  301 
  302 sub get_ind
  303 {
  304   local($l,$m,$f,$wh) = @_;
  305 
  306 # ($l = 0 && $l < 19 && $m =0 && $m < 21  && $f =0 && $f < 28) or 
  307 #   die ("$0: get_ind() : invalid Jamo index\n");
  308 
  309   if ( $wh == 1 ) {  # leading consonant index; no final consonant if $f==0
  310     $ind = $lconBase[$l] + 
  311            ($f > 0 ?  $lconMap2[$m] : $lconMap1[$m] ) ;
  312   } 
  313   elsif ( $wh == 2 ) { # medial vowel index
  314 
  315      $ind = $vowBase[$m];
  316      if ( $vowType[$m] == 1 ) {
  317      # For vowels 'o' and alikes,
  318      # Giyeok and Kieuk get special treatment
  319          $ind += ( ($l==0 || $l == 15) ? 0 : 1)
  320                  + ($f > 0 ? 2 : 0 );
  321      }
  322      else {
  323          $ind+= $tconType[$f];
  324      }
  325   }
  326   else {
  327       if ($f == 0) {
  328          $ind = 0;
  329       }
  330       else {
  331          $ind = $tconBase[$f] + $tconMap[$m];
  332       }
  333   }
  334   return $ind;
  335 }