"Fossies" - the Fresh Open Source Software Archive

Member "yudit-3.0.7/bin/emoji.pl" (6 Jun 2020, 1368 Bytes) of package /linux/misc/yudit-3.0.7.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Perl source code syntax highlighting (style: standard) with prefixed line numbers and code folding option. Alternatively you can here view or download the uninterpreted source code file. For more information about "emoji.pl" see the Fossies "Dox" file reference documentation.

    1 #!/usr/bin/perl
    2 # This script parses:
    3 # https://unicode.org/emoji/charts/full-emoji-list.html
    4 # and creates a mys kmap file from it.
    5 # Gaspar Sinai <gaspar yudit org>
    6 # 2020-06-06
    7 
    8 use HTML::TagParser;
    9 use Data::Dumper;{package Data::Dumper;sub qquote{return shift;}}$Data::Dumper::Useperl=1;
   10 
   11 
   12 # my $html = HTML::TagParser->new('https://unicode.org/emoji/charts/full-emoji-list.html');
   13 
   14 my $html = HTML::TagParser->new ('/home/gsinai/Documents/Unicode/ftp.unicode.org/full-emoji-list.html');
   15 
   16 my @tables = $html->getElementsByTagName("table");
   17 
   18 printf ("# Generated by emoji.pl from Unicode 13.0.0 CLDR Data\n");
   19 foreach my $table (@tables) {
   20     &parse ($table);
   21 }
   22 
   23 exit (0);
   24 
   25 sub parse { 
   26     my $table = $_[0];
   27     my @trs = $table->subTree()->getElementsByTagName("tr");;
   28     foreach my $tr (@trs) {
   29 # print Dumper $tr;
   30         my @tds = $tr->subTree()->getElementsByTagName("td");;
   31         # next unless ($tr);
   32         # next unless ($tr->can("tagName"));
   33         next unless ($#tds > 1);
   34         my $no = $tds[0]->innerText;
   35         next unless ($no =~ /\d+/);
   36         my $code = $tds[1]->innerText;
   37         next unless ($code =~ /U\+[0-8A-Z]+.*/);
   38         $code =~ s/U\+//g;
   39         # can be emtpy cells.
   40         #my $value = $tds[14]->innerText;
   41         my $value = $tds[$#tds]->innerText;
   42         $value =~ s/⊛ //g;
   43         print "$code,$value\n";
   44     }
   45 }