"Fossies" - the Fresh Open Source Software Archive

Member "getmail-5.16/getmailcore/imap_utf7.py" (26 May 2012, 3748 Bytes) of package /linux/misc/getmail-5.16.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. For more information about "imap_utf7.py" see the Fossies "Dox" file reference documentation.

    1 # -*- coding: utf-8 -*-
    2 """
    3 Modified utf-7 encoding as used in IMAP v4r1 for encoding mailbox names.
    4 Code from here; couldn't find a license statement:
    5 http://www.koders.com/python/fid744B4E448B1689C0963942A7928FA049084FAC86.aspx
    6 
    7 From the RFC:
    8 
    9 5.1.3.  Mailbox International Naming Convention
   10 
   11    By convention, international mailbox names are specified using a
   12    modified version of the UTF-7 encoding described in [UTF-7].  The
   13    purpose of these modifications is to correct the following problems
   14    with UTF-7:
   15 
   16       1) UTF-7 uses the "+" character for shifting; this conflicts with
   17          the common use of "+" in mailbox names, in particular USENET
   18          newsgroup names.
   19 
   20       2) UTF-7's encoding is BASE64 which uses the "/" character; this
   21          conflicts with the use of "/" as a popular hierarchy delimiter.
   22 
   23       3) UTF-7 prohibits the unencoded usage of "\"; this conflicts with
   24          the use of "\" as a popular hierarchy delimiter.
   25 
   26       4) UTF-7 prohibits the unencoded usage of "~"; this conflicts with
   27          the use of "~" in some servers as a home directory indicator.
   28 
   29       5) UTF-7 permits multiple alternate forms to represent the same
   30          string; in particular, printable US-ASCII chararacters can be
   31          represented in encoded form.
   32 
   33    In modified UTF-7, printable US-ASCII characters except for "&"
   34    represent themselves; that is, characters with octet values 0x20-0x25
   35    and 0x27-0x7e.  The character "&" (0x26) is represented by the two-
   36    octet sequence "&-".
   37 
   38    All other characters (octet values 0x00-0x1f, 0x7f-0xff, and all
   39    Unicode 16-bit octets) are represented in modified BASE64, with a
   40    further modification from [UTF-7] that "," is used instead of "/".
   41    Modified BASE64 MUST NOT be used to represent any printing US-ASCII
   42    character which can represent itself.
   43 
   44    "&" is used to shift to modified BASE64 and "-" to shift back to US-
   45    ASCII.  All names start in US-ASCII, and MUST end in US-ASCII (that
   46    is, a name that ends with a Unicode 16-bit octet MUST end with a "-
   47    ").
   48 """
   49 import binascii
   50 import codecs
   51 
   52 #
   53 # encoding
   54 #
   55 def modified_base64(s):
   56     s = s.encode('utf-16be')
   57     return binascii.b2a_base64(s).rstrip('\n=').replace('/', ',')
   58 
   59 def doB64(_in, r):
   60     if _in:
   61         r.append('&%s-' % modified_base64(''.join(_in)))
   62         del _in[:]
   63 
   64 def encoder(s):
   65     r = []
   66     _in = []
   67     for c in s:
   68         ordC = ord(c)
   69         if 0x20 <= ordC <= 0x25 or 0x27 <= ordC <= 0x7e:
   70             doB64(_in, r)
   71             r.append(c)
   72         elif c == '&':
   73             doB64(_in, r)
   74             r.append('&-')
   75         else:
   76             _in.append(c)
   77     doB64(_in, r)
   78     return (str(''.join(r)), len(s))
   79 
   80 #
   81 # decoding
   82 #
   83 def modified_unbase64(s):
   84     b = binascii.a2b_base64(s.replace(',', '/') + '===')
   85     return unicode(b, 'utf-16be')
   86 
   87 def decoder(s):
   88     r = []
   89     decode = []
   90     for c in s:
   91         if c == '&' and not decode:
   92             decode.append('&')
   93         elif c == '-' and decode:
   94             if len(decode) == 1:
   95                 r.append('&')
   96             else:
   97                 r.append(modified_unbase64(''.join(decode[1:])))
   98             decode = []
   99         elif decode:
  100             decode.append(c)
  101         else:
  102             r.append(c)
  103     if decode:
  104         r.append(modified_unbase64(''.join(decode[1:])))
  105     bin_str = ''.join(r)
  106     return (bin_str, len(s))
  107 
  108 
  109 class StreamReader(codecs.StreamReader):
  110     def decode(self, s, errors='strict'):
  111         return decoder(s)
  112 
  113 
  114 class StreamWriter(codecs.StreamWriter):
  115     def decode(self, s, errors='strict'):
  116         return encoder(s)
  117 
  118 
  119 def imap4_utf_7(name):
  120     if name == 'imap4-utf-7':
  121         return (encoder, decoder, StreamReader, StreamWriter)
  122 codecs.register(imap4_utf_7)
  123