"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "tables.py" between
recode-3.7.4.tar.gz and recode-3.7.5.tar.gz

About: recode is a charset converter tool and library (fork of the original and now unmaintained GNU recode).

tables.py  (recode-3.7.4):tables.py  (recode-3.7.5)
#!/usr/bin/python #!/usr/bin/python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Automatically derive Recode table files from various sources. # Automatically derive Recode table files from various sources.
# Copyright © 1993, 1994, 1997, 1998, 1999, 2000 Free Software Foundation, Inc. # Copyright © 1993, 1994, 1997, 1998, 1999, 2000 Free Software Foundation, Inc.
# François Pinard <pinard@iro.umontreal.ca>, 1993. # François Pinard <pinard@iro.umontreal.ca>, 1993.
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by # it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3, or (at your option) # the Free Software Foundation; either version 3, or (at your option)
# any later version. # any later version.
skipping to change at line 43 skipping to change at line 43
Modality options: Modality options:
-C DIRECTORY Change to DIRECTORY prior to processing -C DIRECTORY Change to DIRECTORY prior to processing
-F Produce French versions for -n, -s or -t -F Produce French versions for -n, -s or -t
-v Increase verbosity -v Increase verbosity
DATA-FILEs may be rfc1345.txt, mnemonic[.,]ds, Unicode maps, or .def files DATA-FILEs may be rfc1345.txt, mnemonic[.,]ds, Unicode maps, or .def files
from Keld's chset* packages. The digesting order is usually important. from Keld's chset* packages. The digesting order is usually important.
When `-F' and `-n' are used, process Alain's tables. When `-F' and `-n' are used, process Alain's tables.
""" """
import re, sys import re, sys, io
# Character constants. # Character constants.
REPLACEMENT_CHARACTER = 0xFFFD REPLACEMENT_CHARACTER = 0xFFFD
NOT_A_CHARACTER = 0xFFFF NOT_A_CHARACTER = 0xFFFF
# Main driver. # Main driver.
class Main: class Main:
directory = None directory = None
charnames = None charnames = None
skipping to change at line 202 skipping to change at line 202
charname_map = {} charname_map = {}
# Maximum printable length of a character name. # Maximum printable length of a character name.
max_length = 0 max_length = 0
# Frequency of each word, then its crypt code. # Frequency of each word, then its crypt code.
code_map = {} code_map = {}
def digest_french(self, input): def digest_french(self, input):
self.preset_french() self.preset_french()
fold_table = range(256) fold_table = list(range(256))
for before, after in map( for before, after in zip(
None,
u'ABCDEFGHIJKLMNOPQRSTUVWXYZÀÂÇÈÉÊÎÏÑÔÖÛ'.encode('ISO-8859-1'), u'ABCDEFGHIJKLMNOPQRSTUVWXYZÀÂÇÈÉÊÎÏÑÔÖÛ'.encode('ISO-8859-1'),
u'abcdefghijklmnopqrstuvwxyzàâçèéêîïñôöû'.encode('ISO-8859-1')): u'abcdefghijklmnopqrstuvwxyzàâçèéêîïñôöû'.encode('ISO-8859-1')):
fold_table[ord(before)] = ord(after) fold_table[before] = after
folding = ''.join(map(chr, fold_table)) folding = ''.join(map(chr, fold_table))
ignorables = ( ignorables = (
u'<commande>'.encode('ISO-8859-1'), u'<commande>'.encode('ISO-8859-1'),
u'<réservé>'.encode('ISO-8859-1'), u'<réservé>'.encode('ISO-8859-1'),
u'<pas un caractère>'.encode('ISO-8859-1')) u'<pas un caractère>'.encode('ISO-8859-1'))
while True: while True:
line = input.readline() line = input.readline()
if not line: if not line:
break break
if input.begins('@@\t'): if input.begins('@@\t'):
skipping to change at line 269 skipping to change at line 268
u"fin de transmission de bloc (etb)", # 0017 u"fin de transmission de bloc (etb)", # 0017
u"annulation (can)", # 0018 u"annulation (can)", # 0018
u"fin de support (em)", # 0019 u"fin de support (em)", # 0019
u"caractère de substitution (sub)", # 001A u"caractère de substitution (sub)", # 001A
u"échappement (esc)", # 001B u"échappement (esc)", # 001B
u"séparateur de fichier (fs)", # 001C u"séparateur de fichier (fs)", # 001C
u"séparateur de groupe (gs)", # 001D u"séparateur de groupe (gs)", # 001D
u"séparateur d'article (rs)", # 001E u"séparateur d'article (rs)", # 001E
u"séparateur de sous-article (us)", # 001F u"séparateur de sous-article (us)", # 001F
): ):
self.declare(ucs, text.encode('ISO-8859-1')) self.declare(ucs, text)
ucs += 1 ucs += 1
ucs = 0x007F ucs = 0x007F
for text in ( for text in (
u"suppression (del)", # 007F u"suppression (del)", # 007F
u"caractère de bourre (pad)", # 0080 u"caractère de bourre (pad)", # 0080
u"octet supérieur prédéfini (hop)", # 0081 u"octet supérieur prédéfini (hop)", # 0081
u"arrêt permis ici (bph)", # 0082 u"arrêt permis ici (bph)", # 0082
u"aucun arrêt ici (nbh)", # 0083 u"aucun arrêt ici (nbh)", # 0083
u"index (ind)", # 0084 u"index (ind)", # 0084
u"à la ligne (nel)", # 0085 u"à la ligne (nel)", # 0085
skipping to change at line 307 skipping to change at line 306
u"fin de zone protégée (ega)", # 0097 u"fin de zone protégée (ega)", # 0097
u"début de chaîne (sos)", # 0098 u"début de chaîne (sos)", # 0098
u"introducteur de caractère graphique unique (sgci)",# 0099 u"introducteur de caractère graphique unique (sgci)",# 0099
u"introducteur de caractère unique (sci)", # 009A u"introducteur de caractère unique (sci)", # 009A
u"introducteur de séquence de commande (csi)", # 009B u"introducteur de séquence de commande (csi)", # 009B
u"fin de chaîne (st)", # 009C u"fin de chaîne (st)", # 009C
u"commande de système d'exploitation (osc)", # 009D u"commande de système d'exploitation (osc)", # 009D
u"message privé (pm)", # 009E u"message privé (pm)", # 009E
u"commande de progiciel (apc)", # 009F u"commande de progiciel (apc)", # 009F
): ):
self.declare(ucs, text.encode('ISO-8859-1')) self.declare(ucs, text)
ucs += 1 ucs += 1
def declare(self, ucs, text): def declare(self, ucs, text):
self.charname_map[ucs] = text self.charname_map[ucs] = text
if len(text) > self.max_length: if len(text) > self.max_length:
self.max_length = len(text) self.max_length = len(text)
for word in text.split(): for word in text.split():
self.code_map[word] = self.code_map.get(word, 0) + 1 self.code_map[word] = self.code_map.get(word, 0) + 1
def presort_word(self, word): def presort_word(self, word):
skipping to change at line 335 skipping to change at line 334
write = Output('fr-%s' % self.SOURCES).write write = Output('fr-%s' % self.SOURCES).write
else: else:
write = Output(self.SOURCES).write write = Output(self.SOURCES).write
# Establish a mild compression scheme. Words word[:singles] # Establish a mild compression scheme. Words word[:singles]
# will be represented by a single byte running from 1 to # will be represented by a single byte running from 1 to
# singles. All remaining words will be represented by two # singles. All remaining words will be represented by two
# bytes, the first one running slowly from singles+1 to 255, # bytes, the first one running slowly from singles+1 to 255,
# the second cycling faster from 1 to 255. # the second cycling faster from 1 to 255.
if run.verbose: if run.verbose:
sys.stdout.write(' sorting words...') sys.stdout.write(' sorting words...')
pairs = map(self.presort_word, self.code_map.keys()) pairs = list(map(self.presort_word, self.code_map.keys()))
pairs.sort() pairs.sort()
words = map(lambda pair: pair[1], pairs) words = list(map(lambda pair: pair[1], pairs))
pairs = None pairs = None
if run.verbose: if run.verbose:
sys.stdout.write(' %d of them\n' % len(words)) sys.stdout.write(' %d of them\n' % len(words))
count = len(words) count = len(words)
singles = (255 * 255 - count) / 254 singles = (255 * 255 - count) // 254
# Transmit a few values for further usage by the C code. # Transmit a few values for further usage by the C code.
if run.verbose: if run.verbose:
sys.stdout.write(' sorting names...') sys.stdout.write(' sorting names...')
ucs2_table = self.charname_map.keys() ucs2_table = list(self.charname_map.keys())
ucs2_table.sort() ucs2_table.sort()
if run.verbose: if run.verbose:
sys.stdout.write(' %d of them\n' % len(ucs2_table)) sys.stdout.write(' %d of them\n' % len(ucs2_table))
write('\n' write('\n'
'#define NUMBER_OF_SINGLES %d\n' '#define NUMBER_OF_SINGLES %d\n'
'#define MAX_CHARNAME_LENGTH %d\n' '#define MAX_CHARNAME_LENGTH %d\n'
'#define NUMBER_OF_CHARNAMES %d\n' '#define NUMBER_OF_CHARNAMES %d\n'
% (singles, self.max_length, len(ucs2_table))) % (singles, self.max_length, len(ucs2_table)))
# Establish a mild compression scheme (one or two bytes per word). # Establish a mild compression scheme (one or two bytes per word).
sys.stdout.write(" writing words\n") sys.stdout.write(" writing words\n")
skipping to change at line 398 skipping to change at line 397
'static const struct charname charname[NUMBER_OF_CHARNAMES] =\n' 'static const struct charname charname[NUMBER_OF_CHARNAMES] =\n'
' {\n') ' {\n')
for ucs2 in ucs2_table: for ucs2 in ucs2_table:
write(' {0x%04X, "' % ucs2) write(' {0x%04X, "' % ucs2)
for word in self.charname_map[ucs2].split(): for word in self.charname_map[ucs2].split():
if word in self.code_map: if word in self.code_map:
code = self.code_map[word] code = self.code_map[word]
if code < 256: if code < 256:
write('\\%0.3o' % code) write('\\%0.3o' % code)
else: else:
write('\\%0.3o\\%0.3o' % (code / 256, code % 256)) write('\\%0.3o\\%0.3o' % (code // 256, code % 256))
else: else:
sys.stdout.write('??? %s\n' % word) sys.stdout.write('??? %s\n' % word)
write('"},\n') write('"},\n')
write(' };\n') write(' };\n')
# Explodes. # Explodes.
class Explodes(Options): class Explodes(Options):
SOURCES = 'explode.c' SOURCES = 'explode.c'
def __init__(self): def __init__(self):
skipping to change at line 665 skipping to change at line 664
write('\n' write('\n'
'struct entry\n' 'struct entry\n'
' {\n' ' {\n'
' recode_ucs2 code;\n' ' recode_ucs2 code;\n'
' const char *rfc1345;\n' ' const char *rfc1345;\n'
' };\n' ' };\n'
'\n' '\n'
'static const struct entry table[TABLE_LENGTH] =\n' 'static const struct entry table[TABLE_LENGTH] =\n'
' {\n') ' {\n')
count = 0 count = 0
indices = self.mnemonic_map.keys() indices = list(self.mnemonic_map.keys())
indices.sort() indices.sort()
for ucs2 in indices: for ucs2 in indices:
text = self.mnemonic_map[ucs2] text = self.mnemonic_map[ucs2]
inverse_map[text] = count inverse_map[text] = count
write(' /* %4d */ {0x%04X, "%s"},\n' write(' /* %4d */ {0x%04X, "%s"},\n'
% (count, ucs2, re.sub(r'([\"])', r'\\\1', text))) % (count, ucs2, re.sub(r'([\"])', r'\\\1', text)))
count += 1 count += 1
write(' };\n') write(' };\n')
write('\n' write('\n'
'static const unsigned short inverse[TABLE_LENGTH] =\n' 'static const unsigned short inverse[TABLE_LENGTH] =\n'
' {') ' {')
count = 0 count = 0
keys = inverse_map.keys() keys = list(inverse_map.keys())
keys.sort() keys.sort()
for text in keys: for text in keys:
if count % 10 == 0: if count % 10 == 0:
if count != 0: if count != 0:
write(',') write(',')
write('\n /* %4d */ ' % count) write('\n /* %4d */ ' % count)
else: else:
write(', ') write(', ')
write('%4d' % inverse_map[text]) write('%4d' % inverse_map[text])
count += 1 count += 1
skipping to change at line 1122 skipping to change at line 1121
write('0x' + strip[pos:pos+4]) write('0x' + strip[pos:pos+4])
count += 1 count += 1
write('\n' write('\n'
' };\n') ' };\n')
def complete_texinfo(self, french): def complete_texinfo(self, french):
if french: if french:
write = Output('fr-%s' % self.TEXINFO, noheader=True).write write = Output('fr-%s' % self.TEXINFO, noheader=True).write
else: else:
write = Output(self.TEXINFO, noheader=True).write write = Output(self.TEXINFO, noheader=True).write
charsets = self.remark_map.keys() charsets = list(self.remark_map.keys())
charsets.sort() charsets.sort()
for charset in charsets: for charset in charsets:
write('\n' write('\n'
'@item %s\n' '@item %s\n'
'@tindex %s@r{, aliases and source}\n' '@tindex %s@r{, aliases and source}\n'
% (charset, re.sub(':([0-9]+)', r'(\1)', charset))) % (charset, re.sub(':([0-9]+)', r'(\1)', charset)))
aliases = self.aliases_map[charset] aliases = self.aliases_map[charset]
if aliases: if aliases:
if len(aliases) == 1: if len(aliases) == 1:
if aliases[0]: # FIXME: why empty sometimes? if aliases[0]: # FIXME: why empty sometimes?
skipping to change at line 1157 skipping to change at line 1156
write(line.replace('@', '@@')) write(line.replace('@', '@@'))
if line[-1] != '.': if line[-1] != '.':
write('.') write('.')
write('\n') write('\n')
# Handling basic input and output. # Handling basic input and output.
class Input: class Input:
def __init__(self, name): def __init__(self, name):
self.name = name self.name = name
self.input = file(name) self.input = io.open(name, encoding='latin-1')
self.line_count = 0 self.line_count = 0
sys.stdout.write("Reading %s\n" % name) sys.stdout.write("Reading %s\n" % name)
def readline(self): def readline(self):
self.line = self.input.readline() self.line = self.input.readline()
self.line_count += 1 self.line_count += 1
if type(self.line) == bytes:
self.line = self.line.decode('utf-8')
return self.line return self.line
def warn(self, format, *args): def warn(self, format, *args):
if run.verbose: if run.verbose:
sys.stdout.write('%s:%s: %s\n' sys.stdout.write('%s:%s: %s\n'
% (self.name, self.line_count, format % args)) % (self.name, self.line_count, format % args))
def die(self, format, *args): def die(self, format, *args):
sys.stdout.write('%s:%s: %s\n' sys.stdout.write('%s:%s: %s\n'
% (self.name, self.line_count, format % args)) % (self.name, self.line_count, format % args))
skipping to change at line 1189 skipping to change at line 1190
def match(self, pattern): def match(self, pattern):
return re.match(pattern, self.line) return re.match(pattern, self.line)
def search(self, pattern): def search(self, pattern):
return re.search(pattern, self.line) return re.search(pattern, self.line)
class Output: class Output:
def __init__(self, name, noheader=False): def __init__(self, name, noheader=False):
self.name = name self.name = name
self.write = file(name, 'w').write self.write = open(name, 'w', encoding='utf-8').write
sys.stdout.write("Writing %s\n" % name) sys.stdout.write("Writing %s\n" % name)
if not noheader: if not noheader:
self.write("""\ self.write("""\
/* DO NOT MODIFY THIS FILE! It was generated by `recode/tables.py'. */ /* DO NOT MODIFY THIS FILE! It was generated by `recode/tables.py'. */
/* Conversion of files between different charsets and surfaces. /* Conversion of files between different charsets and surfaces.
Copyright © 1999 Free Software Foundation, Inc. Copyright © 1999 Free Software Foundation, Inc.
Contributed by François Pinard <pinard@iro.umontreal.ca>, 1993, 1997. Contributed by François Pinard <pinard@iro.umontreal.ca>, 1993, 1997.
This library is free software; you can redistribute it and/or This library is free software; you can redistribute it and/or
 End of changes. 17 change blocks. 
18 lines changed or deleted 19 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)