"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "src/fuglu/stringencode.py" between
fuglu-0.10.8.tar.gz and fuglu-1.0.0.tar.gz

About: FuGlu is a mail scanning daemon for Postfix written in Python. It acts as a glue application between the MTA and spam checkers and antivirus software.

stringencode.py  (fuglu-0.10.8):stringencode.py  (fuglu-1.0.0)
skipping to change at line 66 skipping to change at line 66
""" Try to decode an encoded string """ Try to decode an encoded string
This will raise exceptions if object can not be decoded. The calling This will raise exceptions if object can not be decoded. The calling
routine has to handle the exception. For example, "force_uString" has routine has to handle the exception. For example, "force_uString" has
to handle exceptions for sending non-encoded strings. to handle exceptions for sending non-encoded strings.
Args: Args:
b_inputstring (str/bytes): input byte string b_inputstring (str/bytes): input byte string
encodingGuess (str): guess for encoding used, default assume unicode encodingGuess (str): guess for encoding used, default assume unicode
errors (str): error handling as in standard bytes.decode -> strict, igno re, replace errors (str): error handling as in standard bytes.decode -> strict, igno re, replace
or strict+ignore, strict+replace (try strict first, on c ertain errors try ignore/replace)
Returns: Returns:
unicode string unicode string
""" """
if b_inputstring is None: if b_inputstring is None:
return None return None
# make sure encoding is not None or empty # make sure encoding is not None or empty
if not encodingGuess: if not encodingGuess:
encodingGuess = "utf-8" encodingGuess = "utf-8"
if '+' in errors:
firsterrors, seconderrors = errors.split('+', 1)
else:
firsterrors = errors
seconderrors = None
logger = logging.getLogger("%s.stringencode.try_decoding" % __package__) logger = logging.getLogger("%s.stringencode.try_decoding" % __package__)
u_outputstring = None u_outputstring = None
try: try:
u_outputstring = b_inputstring.decode(encodingGuess, errors=errors) u_outputstring = b_inputstring.decode(encodingGuess, errors=firsterrors)
except (UnicodeDecodeError, LookupError) as e: except (UnicodeDecodeError, LookupError) as e:
# if we get here we will also print either the chardet or trial&error de if seconderrors and str(e).endswith('invalid continuation byte'):
coding message anyway try:
logger.debug("found non %s encoding or encoding not found (msg: %s), try u_outputstring = b_inputstring.decode(encodingGuess, errors=seco
to detect encoding" nderrors)
% (str(e), encodingGuess)) except (UnicodeDecodeError, LookupError) as e:
pass pass
if not u_outputstring:
# if we get here we will also print either the chardet or trial&erro
r decoding message anyway
logger.debug("found non %s encoding or encoding not found (msg: %s),
try to detect encoding"
% (str(e), encodingGuess))
if u_outputstring is None: if u_outputstring is None:
if CHARDET_AVAILABLE: if CHARDET_AVAILABLE:
encoding = chardet.detect(b_inputstring)['encoding'] # limit to analyse max 10'000 characters because it might become ver
y expensive
encoding = chardet.detect(b_inputstring[:10000])['encoding']
logger.info("chardet -> encoding estimated as %s" % encoding) logger.info("chardet -> encoding estimated as %s" % encoding)
try: try:
u_outputstring = b_inputstring.decode(encoding, errors=errors) u_outputstring = b_inputstring.decode(encoding, errors=firsterro rs)
except (UnicodeDecodeError, LookupError): except (UnicodeDecodeError, LookupError):
logger.info("encoding found by chardet (%s) does not work" % enc oding) logger.info("encoding found by chardet (%s) does not work" % enc oding)
else: else:
logger.debug("module chardet not available -> skip autodetect") logger.debug("module chardet not available -> skip autodetect")
if u_outputstring is None: if u_outputstring is None:
trialerrorencoding = EncodingTrialError.test_all(b_inputstring, returnim mediately=True) trialerrorencoding = EncodingTrialError.test_all(b_inputstring, returnim mediately=True)
logger.info("trial&error -> encoding estimated as one of (selecting firs t) %s" % trialerrorencoding) logger.info("trial&error -> encoding estimated as one of (selecting firs t) %s" % trialerrorencoding)
if trialerrorencoding: if trialerrorencoding:
try: try:
u_outputstring = b_inputstring.decode(trialerrorencoding[0], err ors=errors) u_outputstring = b_inputstring.decode(trialerrorencoding[0], err ors=firsterrors)
except (UnicodeDecodeError, LookupError): except (UnicodeDecodeError, LookupError):
logger.info("encoding found by trial & error (%s) does not work" % trialerrorencoding) logger.info("encoding found by trial & error (%s) does not work" % trialerrorencoding)
if u_outputstring is None: if u_outputstring is None:
raise UnicodeDecodeError raise UnicodeDecodeError
return u_outputstring return u_outputstring
def force_uString(inputstring, encodingGuess="utf-8", errors="strict", convert_n one=False): def force_uString(inputstring, encodingGuess="utf-8", errors="strict", convert_n one=False):
"""Try to enforce a unicode string """Try to enforce a unicode string
Args: Args:
inputstring (str, unicode, list): input string or list of strings to be checked inputstring (str, unicode, list): input string or list of strings to be checked
encodingGuess (str): guess for encoding used, default assume unicode encodingGuess (str): guess for encoding used, default assume unicode
errors (str): error handling as in standard bytes.decode -> strict, igno re, replace errors (str): error handling as in standard bytes.decode -> strict, igno re, replace
or strict+ignore, strict+replace (try strict first, on c ertain errors try ignore/replace)
convert_none (bool): convert None to empty string if True convert_none (bool): convert None to empty string if True
Raises: Raises:
ForceUStringError: if input is not string/unicode/bytes (or list contain ing such elements) ForceUStringError: if input is not string/unicode/bytes (or list contain ing such elements)
Returns: unicode string (or list with unicode strings) Returns: unicode string (or list with unicode strings)
""" """
if inputstring is None: if inputstring is None:
return "" if convert_none else None return "" if convert_none else None
 End of changes. 8 change blocks. 
10 lines changed or deleted 26 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)