"Fossies" - the Fresh Open Source Software archive 
Member "latex2html-2002-2-1/IndicTeX-HTML/indica.perl" of archive latex2html-2002-2-1.tar.gz:
# $Id: indica.perl,v 1.4 1999/03/13 00:26:54 RRM Exp $
# INDICA.PERL by Ross Moore <ross@mpce.mq.edu.au> 14-1-98
# Mathematics Department, Macquarie University, Sydney, Australia.
#
# Style for LaTeX2HTML v98.1 to construct images of traditional
# Indic scripts, using:
#
# Indica pre-processor and sinhala fonts: sinha, sinhb, sinhc
# by Yannis Haralambous <Yannis.Haralambous@univ-lille1.fr>
#
# sinhala.sty package for LaTeX-2e
# by Dominik Wujastyk <D.Wujastyk@ucl.ac.uk>
#
# extended for Prasad Dharmasena's <pkd@isr.umd.edu>
# `samanala' transliteration scheme
# by Vasantha Saparamadu <vsaparam@ocs.mq.edu.au>
#
# These resources are *not* included with this package.
# Obtain them from CTAN: http//ctan.tug.org/ctan
#
# ===================================================================
# This package requires the corresponding LaTeX package: indica.sty .
#
# With LaTeX2HTML the options on the \usepackage line specify which
# preprocessor and transcription mode to use.
#
# Usage:
#
# \usepackage{indica} %| for text already pre-processed
# \usepackage[indica]{indica} %| for all supported languages
# \usepackage[preprocess]{indica}%| same as [indica]
#
# options affecting Input-forms
#
# \usepackage[7bit]{indica} %| Velthuis' Hindi/Sanskri transcription
# \usepackage[csx]{indica} %| 8-bit Sanskrit extension of ISO 646
# \usepackage[latex]{indica} %| standardized LaTeX transcription form
# \usepackage[unicode]{indica} %| ISO 10646-1 + Sinhalese extension
# \usepackage[samanala]{indica}%| Prasad Dharmasena's transliteration
#
# options specifyinging languages:
#
# \usepackage[ben]{indica} %| Bengali
# \usepackage[guj]{indica} %| Gujarati
# \usepackage[gur]{indica} %| Gurmukhi
# \usepackage[hin]{indica} %| Hindi
# \usepackage[kan]{indica} %| Kannada
# \usepackage[mal]{indica} %| Malayalam
# \usepackage[ori]{indica} %| Oriya
# \usepackage[san]{indica} %| Sanskrit
# \usepackage[sin]{indica} %| Sinhala, Sinhalese
# \usepackage[tam]{indica} %| Tamil
# \usepackage[tel]{indica} %| Telugu
# \usepackage[tib]{indica} %| Tibetan
#
# LaTeX2HTML: create aliases to 3-letter abbreviations;
# e.g. #ALIAS SANSKRIT SAN
#
#
# \usepackage[bengali]{indica} %| Bengali
# \usepackage[gujarati]{indica} %| Gujarati
# \usepackage[gurmukhi]{indica} %| Gurmukhi
# \usepackage[hindi]{indica} %| Hindi
# \usepackage[kannada]{indica} %| Kannada
# \usepackage[malayalam]{indica} %| Malayalam
# \usepackage[oriya]{indica} %| Oriya
# \usepackage[sanskrit]{indica} %| Sanskrit
# \usepackage[sinhala]{indica} %| Sinhala, Sinhalese
# \usepackage[sinhalese]{indica} %| Sinhala, Sinhalese
# \usepackage[tamil]{indica} %| Tamil
# \usepackage[telugu]{indica} %| Telugu
# \usepackage[tibetan]{indica} %| Tibetan
#
# LaTeX2HTML: create aliases to 1-letter abbreviations;
# e.g. #ALIAS SANSKRIT S
#
# ===================================================================
# Warning
#
# This package works BOTH with source *before* pre-processing
# and also *after* having pre-processed.
# The latter may create more smaller images of individual syllabes,
# whereas the former tends to create larger images of whole lines,
# paragraphs, sections, etc.
# ===================================================================
#
# Change Log:
# ===========
# $Log: indica.perl,v $
# Revision 1.4 1999/03/13 00:26:54 RRM
# -- implement <SPAN> and <DIV> tags for HTML4.0
# -- include LANG= attribute with HTM4.0
# -- use \vbox with paragraphs
#
# Revision 1.3 1998/08/18 12:59:56 RRM
# -- allow for extra space after particular characters
# -- include the \diatop macro within images.tex in a better way
#
# Revision 1.2 1998/02/03 05:28:47 RRM
# -- changed file-names: gujrathi --> gujarati
#
# Revision 1.1 1998/01/22 04:33:20 RRM
# LaTeX2HTML interfaces to packages and pre-processors for including
# traditional Indic scripts (as images) in HTML documents
#
# see the .perl files for documentation on usage
# see the corresponding .sty file for the LaTeX-2e interface
#
#
package main;
### configuration variables ###
# these may be set in .latex2html-init files
# command-name for the Indica pre-processor
#$INDICA = 'Indica' unless $INDICA;
$INDICA = 'indica' unless $INDICA;
# mode
$INDICA_MODE = 'sevenbit' unless ($INDICA_MODE);
# pre-processor directives for header
$indica_default = "\#SEVENBIT\n\#ALIAS NIL N\n" unless ($indica_default);
# max characters in an inline string
$indica_inline = 200 unless ($indica_inline);
$indica_csx = 100 unless ($indica_inline);
$indica_latex = 300 unless ($indica_latex);
$indica_unicode = 800 unless ($indica_unicode);
# matches directives to revert to normal (La)TeX
$indica_normal_rx = '\#(N(IL)?)' unless ($indica_normal_rx);
# list of recognised pre-processor directives
# (other than language switches)
$indica_commands_rx = '(SEVENBIT|CSX|LATEX|UNICODE|SAMANALA|ALIAS)'
unless ($indica_commands_rx);
# preprocessor: indica
sub do_indica_preprocess { &alias_indica('','') }
sub do_indica_indica { &alias_indica('','') }
# input modes
sub do_indica_7bit { &alias_indica('SEVENBIT','') }
sub do_indica_csx { &alias_indica('CSX','') }
sub do_indica_latex { &alias_indica('LATEX','') }
sub do_indica_unicode { &alias_indica('UNICODE','') }
sub do_indica_samanala { &alias_indica('SAMANALA','') }
# language short aliases
sub do_indica_bengali { &alias_indica('BENGALI','B') }
sub do_indica_gujarati { &alias_indica('GUJARATI','G') }
sub do_indica_gurmukhi { &alias_indica('GURMUKHI','G') }
sub do_indica_hindi { &alias_indica('HINDI','H') }
sub do_indica_kannada { &alias_indica('KANNADA','K') }
sub do_indica_malayalam { &alias_indica('MALAYALAM','M') }
sub do_indica_oriya { &alias_indica('ORIYA','O') }
sub do_indica_sanskrit { &alias_indica('SANSKRIT','S') }
sub do_indica_sinhala { &alias_indica('SINHALA','S') }
sub do_indica_sinhalese { &alias_indica('SINHALESE','S') }
sub do_indica_tamil { &alias_indica('TAMIL','T') }
sub do_indica_telugu { &alias_indica('TELUGU','T') }
sub do_indica_tibetan { &alias_indica('TIBETAN','T') }
# language medium aliases
sub do_indica_ben { &alias_indica('BENGALI','BEN') }
sub do_indica_guj { &alias_indica('GUJARATI','GUJ') }
sub do_indica_gur { &alias_indica('GURMUKHI','GUR') }
sub do_indica_hin { &alias_indica('HINDI','HIN') }
sub do_indica_kan { &alias_indica('KANNADA','KAN') }
sub do_indica_mal { &alias_indica('MALAYALAM','MAL') }
sub do_indica_ori { &alias_indica('ORIYA','ORI') }
sub do_indica_san { &alias_indica('SANSKRIT','SAN') }
sub do_indica_sin { &alias_indica('SINHALA','SIN') }
sub do_indica_tam { &alias_indica('TAMIL','TAM') }
sub do_indica_tel { &alias_indica('TELUGU','TEL') }
sub do_indica_tib { &alias_indica('TIBETAN','TIB') }
sub alias_indica {
local($mode,$alias) = @_;
$prelatex .= $indica_default unless $indica_loaded;
if ($alias) {
$prelatex .= join(' ','#ALIAS', $mode, "$alias\n")
} elsif ($mode && $mode =~ /^$indica_commands_rx$/) {
$prelatex .= "\#$mode\n";
$INDICA_MODE = $mode unless ($mode =~ /SAMANALA/);
if ($mode =~ /UNICODE/ ) { $indica_inline = 500; }
elsif ($mode =~ /LATEX/ ) { $indica_inline = 300; }
}
&pre_process_indica($alias);
}
sub pre_process_indica {
local($pattern) = @_;
$preprocessor_cmds .=
"$INDICA <${PREFIX}images.pre >${PREFIX}images.tex\n"
unless $indica_loaded;
&indica_environments() unless $indica_loaded;
%other_environments = ( %other_environments
, "\#$pattern:\#", 'indica[]'
) if ($pattern);
$indica_loaded = 1;
$PREPROCESS_IMAGES = 1;
}
sub indica_environments {
%other_environments = ( %other_environments
, "\#BENGALI:\#", 'indica[]'
, "\#GUJARATHI:\#", 'indica[]'
, "\#GURMUKHI:\#", 'indica[]'
, "\#HINDI:\#", 'indica[]'
, "\#KANNADA:\#", 'indica[]'
, "\#MALAYALAM:\#", 'indica[]'
, "\#ORIYA:\#", 'indica[]'
, "\#SANSKRIT:\#", 'indica[]'
, "\#SINHALA:\#", 'indica[]'
, "\#SINHALESE:\#", 'indica[]'
, "\#TAMIL:\#", 'indica[]'
, "\#TELUGU:\#", 'indica[]'
, "\#TIBETAN:\#", 'indica[]'
, "\#SEVENBIT:", 'nowrap'
, "\#CSX:", 'nowrap'
, "\#LATEX:", 'nowrap'
, "\#UNICODE:", 'nowrap'
, "\#SAMANALA:", 'nowrap'
, "\#ALIAS:", 'nowrap'
);
}
%ISO_indic = (
'BENGALI' , 'bn'
, 'GUJARATHI' , 'gu'
, 'GURMUKHI' , 'pa'
, 'HINDI' , 'hi'
, 'KANNADA' , 'kn'
, 'MALAYALAM' , 'ml'
, 'ORIYA' , 'or'
, 'SANSKRIT' , 'sa'
, 'SINHALA' , 'si'
, 'SINHALESE' , 'si'
, 'TAMIL' , 'ta'
, 'TELUGU' , 'te'
, 'TIBETAN' , 'bo'
);
sub do_env_pre_indica {
local($_) = @_;
local($inline_length) = $indica_inline;
local($indic) = &get_next_optional_argument;
if ($INDICA_MODE =~ /UNICODE/ ) { $inline_length = $indica_unicode; }
elsif ($INDICA_MODE =~ /CSX/ ) { $inline_length = $indica_csx; }
elsif ($INDICA_MODE =~ /LATEX/ ) { $inline_length = $indica_latex; }
else { $inline_length = $indica_inline; }
local($par_start, $par_end, $ilang) = ('<P', "</P>\n", '');
$ilang = join('', ' LANG="', $ISO_indic{$indic}, '"');
if (/\\par/) {
local(@paragraphs, @indic_processed, $this_par);
if ($USING_STYLES) {
$indic =~ s/^([A-Z]{3})\w*$/$1/;
$env_style{$indic} = " " unless ($env_style{$indic});
$par_start .= "$ilang CLASS=\"$indic\">";
} else { $par_start .= '>' }
@paragraphs = (split(/$par_rx/, $_));
while (@paragraphs) {
$this_par = shift @paragraphs;
foreach (1..6) { shift @paragraphs; }
next unless ($this_par);
$this_par =~ s/\s$//;
if (($HTML_VERSION >= 4)&&(defined &process_object_in_latex)) {
$_ = &process_object_in_latex(
"\#$indic\n" , $this_par , "\n\#NIL\n" );
push(@indic_processed , $par_start , $_ , $par_end);
} else {
$_ = &process_in_latex("\\vbox{\#$indic\n$this_par\n\#NIL }\n");
push(@indic_processed
, &make_comment( 'INDICA '.$indic, $this_par)
, $par_start , $_ , $par_end);
}
}
join('', @indic_processed );
} else {
local($comment);
if (length($_) < $inline_length ) {
if (($HTML_VERSION >= 4)&&(defined &process_object_in_latex)) {
$_ = &process_object_in_latex("\#$indic ", $_ , "\#NIL\n");
} else {
$_ = &process_undefined_environment('tex2html_ind_inline'
, ++$global{'max_id'}, "\#$indic$_\#NIL\n");
}
} elsif (($HTML_VERSION >= 4)&&(defined &process_object_in_latex)) {
$_ = &process_object_in_latex("\#$indic\n", $_ , "\n\#NIL\n");
} else {
$comment = join('', &make_comment( 'INDICA '.$indic, $_),"\n");
$_ = &process_in_latex("\#$indic\n$_\n\#NIL\n")
}
if ($USING_STYLES) {
$indic =~ s/^([A-Z]{3})\w*$/$1/;
$env_style{$indic} = " " unless ($env_style{$indic});
join('', $comment, "<SPAN$ilang CLASS=\"$indic\">", $_, '</SPAN>');
} else { $comment . $_ }
}
}
# for source already pre-processed
# $ACCENT_IMAGES .= 'rm'; # make images of unusual (not ISO-Latin1) accents
$SNHCURRM = 'rm';
$SNH_SIZE = '';
$image_switch_rx .= "|SH[abc]";
sub do_cmd_SHa { &process_indica_output('SHa', $font_size{'SHa'}, $snh_inline, @_[0]) }
sub do_cmd_SHb { &process_indica_output('SHb', $font_size{'SHb'}, $snh_inline, @_[0]) }
sub do_cmd_SHc { &process_indica_output('SHc', $font_size{'SHc'}, $snh_inline, @_[0]) }
sub process_indica_output {
local($snhfont, $snh_size, $brlength, $snhtxt) = @_;
local($afterspace) = '\\kern.05em';
if ($snhfont =~ /SHa/) {
$afterspace = '\\kern.1em' if ($snhtxt =~ /char7$/);
}
# size defaults to $LATEX_FONT_SIZE
$snhtxt = "\{\\$snhfont$snhtxt$afterspace\}\%".
($snh_size ? $snh_size : $LATEX_FONT_SIZE)."\%";
if (length($snhtxt) < $brlength ) {
$global{'max_id'}++;
$snhtxt = &process_undefined_environment('tex2html_snh_inline'
,$global{'max_id'}, $snhtxt);
} else { $snhtxt = &process_in_latex($snhtxt) }
if ($USING_STYLES) {
$env_style{'INDIC'} = " " unless ($env_style{'INDIC'});
join('','<SPAN CLASS="INDIC">', $snhtxt, '</SPAN>');
} else { $snhtxt }
}
sub do_cmd_snhcurrm {
local($_) = @_[0];
foreach $cmd (split(/\\/,$SNHCURRM)) {
$tmp = "do_cmd_$cmd";
if (defined &$tmp) { eval("\$_ = &$tmp(\$_)") }
else {
$_ = &declared_env($cmd,$_);
}
}
$_;
}
#explicitly include Thiele's \diatop into preamble of images.tex
local($diatop) = "\n\\def\\diatop[#1|#2]{%\n"
. "{\\setbox1=\\hbox{{#1{}}}\\setbox2=\\hbox{{#2{}}}%\n"
. " \\dimen0=\\ifdim\\wd1>\\wd2\\wd1\\else\\wd2\\fi%\n"
. " \\dimen1=\\ht2\\advance\\dimen1by-1ex%\n"
. " \\setbox1=\\hbox to1\\dimen0{\\hss#1\\hss}%\n"
. " \\hbox{\\rlap{\\raise1\\dimen1\\box1}%\n"
. " \\hbox to1\\dimen0{\\hss#2\\hss}}}}%\n"
."%e.g. of use: \\diatop[\\'|{\\=o}] gives o macron acute\n\n";
$LaTeXmacros .= $diatop; undef $diatop;
&process_commands_in_tex (<<_RAW_ARG_CMDS_);
diatop # []
_RAW_ARG_CMDS_
1; # Not really necessary...