"Fossies" - the Fresh Open Source Software Archive

Member "archivemail-0.9.0/archivemail" (9 Jul 2011, 75260 Bytes) of package /linux/privat/old/archivemail-0.9.0.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file.

    1 #! /usr/bin/env python
    2 ############################################################################
    3 # Copyright (C) 2002  Paul Rodger <paul@paulrodger.com>,
    4 #           (C) 2006  Peter Poeml <poeml@suse.de>,
    5 #           (C) 2006-2010  Nikolaus Schulz <microschulz@web.de>
    6 #
    7 # This program is free software; you can redistribute it and/or modify
    8 # it under the terms of the GNU General Public License as published by
    9 # the Free Software Foundation; either version 2 of the License, or
   10 # (at your option) any later version.
   11 #
   12 # This program is distributed in the hope that it will be useful,
   13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
   14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   15 # GNU General Public License for more details.
   16 #
   17 # You should have received a copy of the GNU General Public License
   18 # along with this program; if not, write to the Free Software
   19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
   20 ############################################################################
   21 """
   22 Archive and compress old mail in mbox, MH or maildir-format mailboxes.
   23 Website: http://archivemail.sourceforge.net/
   24 """
   25 
   26 # global administrivia 
   27 __version__ = "archivemail v0.9.0"
   28 __copyright__ = """\
   29 Copyright (C) 2002  Paul Rodger <paul@paulrodger.com>
   30           (C) 2006  Peter Poeml <poeml@suse.de>,
   31           (C) 2006-2011  Nikolaus Schulz <microschulz@web.de>
   32 This is free software; see the source for copying conditions. There is NO
   33 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."""
   34 
   35 import sys
   36 
   37 def check_python_version(): 
   38     """Abort if we are running on python < v2.3"""
   39     too_old_error = "This program requires python v2.3 or greater. " + \
   40       "Your version of python is:\n%s""" % sys.version
   41     try: 
   42         version = sys.version_info  # we might not even have this function! :)
   43         if (version[0] < 2) or (version[0] == 2 and version[1] < 3):
   44             print too_old_error
   45             sys.exit(1)
   46     except AttributeError:
   47         print too_old_error
   48         sys.exit(1)
   49 
   50 # define & run this early
   51 # (IMAP over SSL requires Python >= 2.3) 
   52 check_python_version()  
   53 
   54 import fcntl
   55 import getopt
   56 import gzip
   57 import mailbox
   58 import os
   59 import pwd
   60 import re
   61 import rfc822
   62 import shutil
   63 import signal
   64 import stat
   65 import string
   66 import tempfile
   67 import time
   68 import urlparse
   69 import errno
   70 import socket
   71 import locale
   72 
   73 # From_ mangling regex. 
   74 from_re = re.compile(r'^From ', re.MULTILINE)
   75 imapsize_re = re.compile(r'^(?P<msn>[0-9]+) \(RFC822\.SIZE (?P<size>[0-9]+)\)')
   76 
   77 userencoding = locale.getpreferredencoding()
   78 
   79 ############## class definitions ###############
   80 
   81 class ArchivemailException(Exception):
   82     pass
   83 class UserError(ArchivemailException): 
   84     pass
   85 class UnexpectedError(ArchivemailException): 
   86     pass
   87 class LockUnavailable(ArchivemailException):
   88     pass
   89 
   90 class Stats:
   91     """Class to collect and print statistics about mailbox archival"""
   92     __archived = 0
   93     __archived_size = 0
   94     __mailbox_name = None
   95     __archive_name = None
   96     __start_time = 0
   97     __total = 0
   98     __total_size = 0
   99 
  100     def __init__(self, mailbox_name, final_archive_name):
  101         """Constructor for a new set of statistics.
  102 
  103         Arguments: 
  104         mailbox_name -- filename/dirname of the original mailbox
  105         final_archive_name -- filename for the final 'mbox' archive, without
  106                               compression extension (eg .gz)
  107 
  108         """
  109         assert mailbox_name
  110         assert final_archive_name
  111         self.__start_time = time.time()
  112         self.__mailbox_name = mailbox_name
  113         self.__archive_name = final_archive_name + ".gz"
  114 
  115     def another_message(self, size):
  116         """Add one to the internal count of total messages processed 
  117         and record message size."""
  118         self.__total = self.__total + 1
  119         self.__total_size = self.__total_size + size
  120 
  121     def another_archived(self, size):
  122         """Add one to the internal count of messages archived
  123         and record message size."""
  124         self.__archived = self.__archived + 1
  125         self.__archived_size = self.__archived_size + size
  126 
  127     def display(self):
  128         """Print statistics about how many messages were archived"""
  129         end_time = time.time()
  130         time_seconds = end_time - self.__start_time
  131         action = "archived"
  132         if options.delete_old_mail:
  133             action = "deleted"
  134         if options.dry_run:
  135             action = "I would have " + action
  136         print "%s:\n    %s %d of %d message(s) (%s of %s) in %.1f seconds" % \
  137             (self.__mailbox_name, action, self.__archived, self.__total,
  138             nice_size_str(self.__archived_size), 
  139             nice_size_str(self.__total_size), time_seconds)
  140             
  141 
  142 class StaleFiles:
  143     """Class to keep track of files to be deleted on abnormal exit"""
  144     dotlock_files      = []    # dotlock files for source mbox and final archive
  145     temp_mboxes        = []    # temporary retain and archive mboxes
  146     temp_dir           = None  # our tempfile directory container
  147 
  148     def clean(self):
  149         """Delete any temporary files or lockfiles that exist"""
  150         while self.dotlock_files:
  151             dotlock = self.dotlock_files.pop()
  152             vprint("removing stale dotlock file '%s'" % dotlock)
  153             try: 
  154                 os.remove(dotlock)
  155             except (IOError, OSError): pass
  156         while self.temp_mboxes:
  157             mbox = self.temp_mboxes.pop()
  158             vprint("removing stale temporary mbox '%s'" % mbox)
  159             try: 
  160                 os.remove(mbox)
  161             except (IOError, OSError): pass
  162         if self.temp_dir:
  163             vprint("removing stale tempfile directory '%s'" % self.temp_dir)
  164             try: 
  165                 os.rmdir(self.temp_dir)
  166             except OSError, e:
  167                 if e.errno == errno.ENOTEMPTY: # Probably a bug
  168                     user_warning("cannot remove temporary directory '%s', "
  169                             "directory not empty" % self.temp_dir)
  170             except IOError: pass
  171             else: self.temp_dir = None
  172 
  173 
  174 
  175 class Options:
  176     """Class to store runtime options, including defaults"""
  177     archive_prefix       = None
  178     archive_suffix       = None
  179     archive_default_suffix = "_archive"
  180     archive_name         = None
  181     days_old_max         = 180
  182     date_old_max         = None
  183     delete_old_mail      = False
  184     dry_run              = False
  185     filter_append        = None
  186     include_flagged      = False
  187     locking_attempts     = 5
  188     lockfile_extension   = ".lock"
  189     lock_sleep           = True
  190     no_compress          = False
  191     only_archive_read    = False
  192     output_dir           = None
  193     pwfile               = None
  194     preserve_unread      = False
  195     mangle_from          = True
  196     quiet                = False
  197     read_buffer_size     = 8192
  198     script_name          = os.path.basename(sys.argv[0])
  199     min_size             = None
  200     verbose              = False
  201     debug_imap           = 0
  202     warn_duplicates      = False
  203     copy_old_mail        = False
  204     archive_all          = False
  205 
  206     def parse_args(self, args, usage):
  207         """Set our runtime options from the command-line arguments.
  208 
  209         Arguments:
  210         args -- this is sys.argv[1:]
  211         usage -- a usage message to display on '--help' or bad arguments
  212 
  213         Returns the remaining command-line arguments that have not yet been
  214         parsed as a string.
  215 
  216         """
  217         try:
  218             opts, args = getopt.getopt(args, '?D:S:Vd:hno:F:P:qs:p:a:uv',
  219                              ["date=", "days=", "delete", "dry-run", "help",
  220                              "include-flagged", "no-compress", "output-dir=",
  221                              "filter-append=", "pwfile=", "dont-mangle",
  222                              "preserve-unread", "quiet", "size=", "suffix=",
  223                              "prefix=", "archive-name=", "verbose",
  224                              "debug-imap=", "version", "warn-duplicate",
  225                              "copy", "all"])
  226         except getopt.error, msg:
  227             user_error(msg)
  228 
  229         archive_by = None 
  230 
  231         for o, a in opts:
  232             if o == '--delete':
  233                 if self.copy_old_mail: 
  234                     user_error("found conflicting options --copy and --delete")
  235                 self.delete_old_mail = True
  236             if o == '--include-flagged':
  237                 self.include_flagged = True
  238             if o == '--no-compress':
  239                 self.no_compress = True
  240             if o == '--warn-duplicate':
  241                 self.warn_duplicates = True
  242             if o in ('-D', '--date'):
  243                 if archive_by: 
  244                     user_error("you cannot specify both -d and -D options")
  245                 archive_by = "date"                        
  246                 self.date_old_max = self.date_argument(a)
  247             if o in ('-d', '--days'):
  248                 if archive_by: 
  249                     user_error("you cannot specify both -d and -D options")
  250                 archive_by = "days"                        
  251                 self.days_old_max = string.atoi(a)
  252             if o in ('-o', '--output-dir'):
  253                 self.output_dir = os.path.expanduser(a)
  254             if o in ('-P', '--pwfile'):
  255                 self.pwfile = os.path.expanduser(a)
  256             if o in ('-F', '--filter-append'):
  257                 self.filter_append = a
  258             if o in ('-h', '-?', '--help'):
  259                 print usage
  260                 sys.exit(0)
  261             if o in ('-n', '--dry-run'):
  262                 self.dry_run = True
  263             if o in ('-q', '--quiet'):
  264                 self.quiet = True
  265             if o in ('-s', '--suffix'):
  266                 self.archive_suffix = a
  267             if o in ('-p', '--prefix'):
  268                 self.archive_prefix = a
  269             if o in ('-a', '--archive-name'):
  270                 self.archive_name = os.path.expanduser(a)
  271             if o in ('-S', '--size'):
  272                 self.min_size = string.atoi(a)
  273             if o in ('-u', '--preserve-unread'):
  274                 self.preserve_unread = True
  275             if o == '--dont-mangle':
  276                 self.mangle_from = False
  277             if o in ('-v', '--verbose'):
  278                 self.verbose = True
  279             if o == '--debug-imap': 
  280                 self.debug_imap = int(a)
  281             if o == '--copy':
  282                 if self.delete_old_mail: 
  283                     user_error("found conflicting options --copy and --delete")
  284                 self.copy_old_mail = True
  285             if o == '--all': 
  286                 self.archive_all = True
  287             if o in ('-V', '--version'):
  288                 print __version__ + "\n\n" + __copyright__
  289                 sys.exit(0)
  290         return args
  291 
  292     def sanity_check(self, args):
  293         """Complain bitterly about our options now rather than later"""
  294         if self.output_dir:
  295             check_sane_destdir(self.output_dir)
  296         if self.days_old_max < 0:
  297             user_error("--days argument must be positive")
  298         if self.days_old_max >= 10000:
  299             user_error("--days argument must be less than 10000")
  300         if self.min_size is not None and self.min_size < 1:
  301             user_error("--size argument must be greater than zero")
  302         if self.quiet and self.verbose:
  303             user_error("you cannot use both the --quiet and --verbose options")
  304         if self.pwfile:
  305             if not os.path.isfile(self.pwfile):
  306                 user_error("pwfile %s does not exist" % self.pwfile)
  307         if self.archive_name and len(args) > 1:
  308             user_error("the --archive-name cannot be used with multiple " \
  309                 "mailboxes")
  310 
  311     def date_argument(self, string):
  312         """Converts a date argument string into seconds since the epoch"""
  313         date_formats = (
  314             "%Y-%m-%d",  # ISO format 
  315             "%d %b %Y" , # Internet format 
  316             "%d %B %Y" , # Internet format with full month names
  317         )
  318         time.accept2dyear = False  # I'm not going to support 2-digit years
  319         for format in date_formats:
  320             try:
  321                 date = time.strptime(string, format)
  322                 seconds = time.mktime(date)
  323                 return seconds
  324             except (ValueError, OverflowError):
  325                 pass
  326         user_error("cannot parse the date argument '%s'\n"
  327             "The date should be in ISO format (eg '2002-04-23'),\n"
  328             "Internet format (eg '23 Apr 2002') or\n"
  329             "Internet format with full month names (eg '23 April 2002')" % 
  330             string)
  331 
  332 
  333 class LockableMboxMixin:
  334     """Locking methods for mbox files."""
  335 
  336     def __init__(self, mbox_file, mbox_file_name):
  337         self.mbox_file = mbox_file
  338         self.mbox_file_name = mbox_file_name
  339         self._locked = False
  340         self._use_dotlock = True
  341 
  342     def lock(self):
  343         """Lock this mbox with both a dotlock and a posix lock."""
  344         assert not self._locked
  345         attempt = 1
  346         while True:
  347             try:
  348                 self._posix_lock()
  349                 self._dotlock_lock()
  350                 break
  351             except LockUnavailable, e:
  352                 self._posix_unlock()
  353                 attempt += 1
  354                 if (attempt > options.locking_attempts):
  355                     unexpected_error(str(e))
  356                 vprint("%s - sleeping..." % e)
  357                 time.sleep(options.lock_sleep)
  358             except:
  359                 self._posix_unlock()
  360                 raise
  361         self._locked = True
  362 
  363     def unlock(self):
  364         """Unlock this mbox."""
  365         assert self._locked
  366         self._dotlock_unlock()
  367         self._posix_unlock()
  368         self._locked = False
  369 
  370     def _posix_lock(self):
  371         """Set an exclusive posix lock on the 'mbox' mailbox"""
  372         vprint("trying to acquire posix lock on file '%s'" % self.mbox_file_name)
  373         try:
  374             fcntl.lockf(self.mbox_file, fcntl.LOCK_EX|fcntl.LOCK_NB)
  375         except IOError, e:
  376             if e.errno in (errno.EAGAIN, errno.EACCES):
  377                 raise LockUnavailable("posix lock for '%s' unavailable" % \
  378                     self.mbox_file_name)
  379             else:
  380                 raise
  381         vprint("acquired posix lock on file '%s'" % self.mbox_file_name)
  382 
  383     def _posix_unlock(self):
  384         """Unset any posix lock on the 'mbox' mailbox"""
  385         vprint("dropping posix lock on file '%s'" % self.mbox_file_name)
  386         fcntl.lockf(self.mbox_file, fcntl.LOCK_UN)
  387 
  388     def _dotlock_lock(self):
  389         """Create a dotlock file for the 'mbox' mailbox"""
  390         hostname = socket.gethostname()
  391         pid = os.getpid()
  392         box_dir, prelock_prefix = os.path.split(self.mbox_file_name)
  393         prelock_suffix = ".%s.%s%s" % (hostname, pid, options.lockfile_extension)
  394         lock_name = self.mbox_file_name + options.lockfile_extension
  395         vprint("trying to create dotlock file '%s'" % lock_name)
  396         try:
  397             plfd, prelock_name = tempfile.mkstemp(prelock_suffix, prelock_prefix,
  398                 dir=box_dir)
  399         except OSError, e:
  400             if e.errno == errno.EACCES:
  401                 if not options.quiet:
  402                     user_warning("no write permissions: omitting dotlock for '%s'" % \
  403                         self.mbox_file_name)
  404                 self._use_dotlock = False
  405                 return
  406             raise
  407         try:
  408             try:
  409                 os.link(prelock_name, lock_name)
  410                 # We've got the lock.
  411             except OSError, e:
  412                 if os.fstat(plfd)[stat.ST_NLINK] == 2:
  413                     # The Linux man page for open(2) claims that in this
  414                     # case we have actually succeeded to create the link,
  415                     # and this assumption seems to be folklore.
  416                     # So we've got the lock.
  417                     pass
  418                 elif e.errno == errno.EEXIST:
  419                     raise LockUnavailable("Dotlock for '%s' unavailable" % self.mbox_file_name)
  420                 else:
  421                     raise
  422             _stale.dotlock_files.append(lock_name)
  423         finally:
  424             os.close(plfd)
  425             os.unlink(prelock_name)
  426         vprint("acquired lockfile '%s'" % lock_name)
  427 
  428     def _dotlock_unlock(self):
  429         """Delete the dotlock file for the 'mbox' mailbox."""
  430         if not self._use_dotlock:
  431             return
  432         lock_name = self.mbox_file_name + options.lockfile_extension
  433         vprint("removing lockfile '%s'" % lock_name)
  434         os.remove(lock_name)
  435         _stale.dotlock_files.remove(lock_name)
  436 
  437     def commit(self):
  438         """Sync the mbox file to disk."""
  439         self.mbox_file.flush()
  440         os.fsync(self.mbox_file.fileno())
  441 
  442     def close(self):
  443         """Close the mbox file"""
  444         vprint("closing file '%s'" % self.mbox_file_name)
  445         assert not self._locked
  446         self.mbox_file.close()
  447 
  448 
  449 class Mbox(mailbox.UnixMailbox, LockableMboxMixin):
  450     """A mostly-read-only mbox with locking. The mbox content can only be
  451     modified by overwriting the entire underlying file."""
  452 
  453     def __init__(self, path):
  454         """Constructor for opening an existing 'mbox' mailbox.
  455         Extends constructor for mailbox.UnixMailbox()
  456 
  457         Named Arguments:
  458         path -- file name of the 'mbox' file to be opened
  459         """
  460         assert path
  461         fd = safe_open_existing(path)
  462         st = os.fstat(fd)
  463         self.original_atime = st.st_atime
  464         self.original_mtime = st.st_mtime
  465         self.starting_size = st.st_size
  466         self.mbox_file = os.fdopen(fd, "r+")
  467         self.mbox_file_name = path
  468         LockableMboxMixin.__init__(self, self.mbox_file, path)
  469         mailbox.UnixMailbox.__init__(self, self.mbox_file)
  470 
  471     def reset_timestamps(self):
  472         """Set the file timestamps to the original values"""
  473         assert self.original_atime
  474         assert self.original_mtime
  475         assert self.mbox_file_name
  476         os.utime(self.mbox_file_name, (self.original_atime,  \
  477             self.original_mtime))
  478 
  479     def get_size(self):
  480         """Return the current size of the mbox file on disk"""
  481         return os.path.getsize(self.mbox_file_name)
  482 
  483     def overwrite_with(self, mbox_filename):
  484         """Overwrite the mbox content with the content of the given mbox file."""
  485         fin = open(mbox_filename, "r")
  486         self.mbox_file.seek(0)
  487         shutil.copyfileobj(fin, self.mbox_file)
  488         self.mbox_file.truncate()
  489 
  490 
  491 class ArchiveMbox(LockableMboxMixin):
  492     """Simple append-only access to the archive mbox. Entirely content-agnostic."""
  493 
  494     def __init__(self, path):
  495         fd = safe_open(path)
  496         self.mbox_file = os.fdopen(fd, "a")
  497         LockableMboxMixin.__init__(self, self.mbox_file, path)
  498 
  499     def append(self, filename):
  500         """Append the content of the given file to the mbox."""
  501         assert self._locked
  502         fin = open(filename, "r")
  503         oldsize = os.fstat(self.mbox_file.fileno()).st_size
  504         try:
  505             shutil.copyfileobj(fin, self.mbox_file)
  506         except:
  507             # We can safely abort here without data loss, because
  508             # we have not yet changed the original mailbox
  509             self.mbox_file.truncate(oldsize)
  510             raise
  511         fin.close()
  512 
  513 
  514 class TempMbox:
  515     """A write-only temporary mbox. No locking methods."""
  516 
  517     def __init__(self, prefix=tempfile.template):
  518         """Creates a temporary mbox file."""
  519         fd, filename = tempfile.mkstemp(prefix=prefix)
  520         self.mbox_file_name = filename
  521         _stale.temp_mboxes.append(filename)
  522         self.mbox_file = os.fdopen(fd, "w")
  523         # an empty gzip file is not really empty (it contains the gzip header
  524         # and trailer), so we need to track manually if this mbox is empty
  525         self.empty = True
  526 
  527     def write(self, msg):
  528         """Write a rfc822 message object to the 'mbox' mailbox.
  529         If the rfc822 has no Unix 'From_' line, then one is constructed
  530         from other headers in the message.
  531 
  532         Arguments:
  533         msg -- rfc822 message object to be written
  534 
  535         """
  536         assert msg
  537         assert self.mbox_file
  538 
  539         self.empty = False
  540         vprint("saving message to file '%s'" % self.mbox_file_name)
  541         unix_from = msg.unixfrom
  542         if unix_from:
  543             msg_has_mbox_format = True
  544         else:
  545             msg_has_mbox_format = False
  546             unix_from = make_mbox_from(msg)
  547         self.mbox_file.write(unix_from)
  548         assert msg.headers
  549         self.mbox_file.writelines(msg.headers)
  550         self.mbox_file.write(os.linesep)
  551 
  552         # The following while loop is about twice as fast in
  553         # practice to 'self.mbox_file.writelines(msg.fp.readlines())'
  554         assert options.read_buffer_size > 0
  555         linebuf = ""
  556         while True:
  557             body = msg.fp.read(options.read_buffer_size)
  558             if (not msg_has_mbox_format) and options.mangle_from:
  559                 # Be careful not to break pattern matching
  560                 splitindex = body.rfind(os.linesep)
  561                 nicebody = linebuf + body[:splitindex]
  562                 linebuf = body[splitindex:]
  563                 body = from_re.sub('>From ', nicebody)
  564             if not body:
  565                 break
  566             self.mbox_file.write(body)
  567         if not msg_has_mbox_format:
  568             self.mbox_file.write(os.linesep)
  569 
  570     def commit(self):
  571         """Sync the mbox file to disk."""
  572         self.mbox_file.flush()
  573         os.fsync(self.mbox_file.fileno())
  574 
  575     def close(self):
  576         """Close the mbox file"""
  577         vprint("closing file '%s'" % self.mbox_file_name)
  578         self.mbox_file.close()
  579 
  580     def saveas(self, filename):
  581         """Rename this temporary mbox file to the given name, making it
  582         permanent.  Emergency use only."""
  583         os.rename(self.mbox_file_name, filename)
  584         _stale.temp_mboxes.remove(self.mbox_file_name)
  585 
  586     def remove(self):
  587         """Delete the temporary mbox file."""
  588         os.remove(self.mbox_file_name)
  589         _stale.temp_mboxes.remove(self.mbox_file_name)
  590 
  591 
  592 class CompressedTempMbox(TempMbox):
  593     """A compressed version of a TempMbox."""
  594 
  595     def __init__(self, prefix=tempfile.template):
  596         TempMbox.__init__(self, prefix)
  597         self.raw_file = self.mbox_file
  598         self.mbox_file = gzip.GzipFile(mode="a", fileobj=self.mbox_file)
  599         # Workaround that GzipFile.close() isn't idempotent in Python < 2.6
  600         # (python issue #2959).  There is no GzipFile.closed, so we need a
  601         # replacement.
  602         self.gzipfile_closed = False
  603 
  604     def commit(self):
  605         """Finish gzip file and sync it to disk."""
  606         # This method is currently not used
  607         self.mbox_file.close()  # close GzipFile, writing gzip trailer
  608         self.gzipfile_closed = True
  609         self.raw_file.flush()
  610         os.fsync(self.raw_file.fileno())
  611 
  612     def close(self):
  613         """Close the gzip file."""
  614         if not self.gzipfile_closed:
  615             self.mbox_file.close()
  616         self.raw_file.close()
  617 
  618 
  619 class IdentityCache:
  620     """Class used to remember Message-IDs and warn if they are seen twice"""
  621     seen_ids = {}
  622     mailbox_name = None
  623 
  624     def __init__(self, mailbox_name):
  625         """Constructor: takes the mailbox name as an argument"""
  626         assert mailbox_name
  627         self.mailbox_name = mailbox_name
  628 
  629     def warn_if_dupe(self, msg):
  630         """Print a warning message if the message has already appeared"""
  631         assert msg
  632         message_id = msg.get('Message-ID')
  633         assert message_id
  634         if self.seen_ids.has_key(message_id):
  635             user_warning("duplicate message id: '%s' in mailbox '%s'" % 
  636                 (message_id, self.mailbox_name))
  637         self.seen_ids[message_id] = True
  638 
  639 
  640 # global class instances
  641 options = Options()  # the run-time options object
  642 _stale = StaleFiles() # remember what we have to delete on abnormal exit
  643 
  644 
  645 def main(args = sys.argv[1:]):
  646     global _stale
  647 
  648     # this usage message is longer than 24 lines -- bad idea?
  649     usage = """Usage: %s [options] mailbox [mailbox...]
  650 Moves old mail in IMAP, mbox, MH or maildir-format mailboxes to an mbox-format
  651 mailbox compressed with gzip. 
  652 
  653 Options are as follows:
  654   -d, --days=NUM        archive messages older than NUM days (default: %d)
  655   -D, --date=DATE       archive messages older than DATE
  656   -o, --output-dir=DIR  directory to store archives (default: same as original)
  657   -P, --pwfile=FILE     file to read imap password from (default: None)
  658   -F, --filter-append=STRING  append arbitrary string to the IMAP filter string
  659   -p, --prefix=NAME     prefix for archive filename (default: none)
  660   -s, --suffix=NAME     suffix for archive filename (default: '%s')
  661   -a, --archive-name=NAME   specify complete archive filename
  662   -S, --size=NUM        only archive messages NUM bytes or larger
  663   -n, --dry-run         don't write to anything - just show what would be done
  664   -u, --preserve-unread never archive unread messages
  665       --dont-mangle     do not mangle From_ in message bodies
  666       --delete          delete rather than archive old mail (use with caution!)
  667       --copy            copy rather than archive old mail 
  668       --include-flagged messages flagged important can also be archived
  669       --all             archive all messages 
  670       --no-compress     do not compress archives with gzip
  671       --warn-duplicate  warn about duplicate Message-IDs in the same mailbox
  672   -v, --verbose         report lots of extra debugging information
  673       --debug-imap=NUM  set IMAP debugging output level (0 is none)
  674   -q, --quiet           quiet mode - print no statistics (suitable for crontab)
  675   -V, --version         display version information
  676   -h, --help            display this message
  677 
  678 Example: %s linux-kernel
  679   This will move all messages older than %s days to a 'mbox' mailbox called 
  680   'linux-kernel_archive.gz', deleting them from the original 'linux-kernel'
  681   mailbox. If the 'linux-kernel_archive.gz' mailbox already exists, the 
  682   newly archived messages are appended.
  683 
  684 To archive IMAP mailboxes, format your mailbox argument like this:
  685   imap://username:password@server/mailbox
  686   (substitute 'imap' with 'imaps' for an SSL connection)
  687 
  688 Website: http://archivemail.sourceforge.net/ """ %   \
  689     (options.script_name, options.days_old_max, options.archive_suffix,
  690     options.script_name, options.days_old_max)
  691 
  692     args = options.parse_args(args, usage)
  693     if len(args) == 0:
  694         print usage
  695         sys.exit(1)
  696 
  697     options.sanity_check(args)
  698 
  699     for mailbox_path in args:
  700         archive(mailbox_path)
  701 
  702 
  703 ######## errors and debug ##########
  704 
  705 def vprint(string):
  706     """Print the string argument if we are in verbose mode"""
  707     if options.verbose:
  708         print string
  709 
  710 
  711 def unexpected_error(string):
  712     """Print the string argument, a 'shutting down' message and abort.  Raise
  713     UnexpectedErrors if archivemail is run as a module. This function never
  714     returns."""
  715     if not __name__ == '__main__':
  716         raise UnexpectedError(string)
  717     sys.stderr.write("%s: %s\n" % (options.script_name, string))
  718     sys.stderr.write("%s: unexpected error encountered - shutting down\n" % 
  719         options.script_name)
  720     sys.exit(1)
  721 
  722 
  723 def user_error(string):
  724     """Print the string argument and abort. Raise UserError if archivemail is
  725     run as a module. This function never returns."""
  726     if not __name__ == '__main__':
  727         raise UserError(string)
  728     sys.stderr.write("%s: %s\n" % (options.script_name, string))
  729     sys.exit(1)
  730 
  731 
  732 def user_warning(string):
  733     """Print the string argument"""
  734     sys.stderr.write("%s: Warning - %s\n" % (options.script_name, string))
  735 
  736 ########### operations on a message ############
  737 
  738 def make_mbox_from(message):
  739     """Return a string suitable for use as a 'From_' mbox header for the
  740     message.
  741 
  742     Arguments:
  743     message -- the rfc822 message object
  744 
  745     """
  746     assert message
  747     address = guess_return_path(message)
  748     time_message = guess_delivery_time(message)
  749     date = time.localtime(time_message)
  750     assert date
  751     date_string = time.asctime(date)
  752     mbox_from = "From %s %s\n" % (address, date_string)
  753     return mbox_from
  754 
  755 
  756 def guess_return_path(message):
  757     """Return a guess at the Return Path address of an rfc822 message"""
  758     assert message
  759 
  760     for header in ('Return-path', 'From'):
  761         address_header = message.get(header)
  762         if address_header:
  763             (name, address) = rfc822.parseaddr(address_header)
  764             if address:
  765                 return address
  766     # argh, we can't find any valid 'Return-path' guesses - just 
  767     # just use the current unix username like mutt does
  768     login = pwd.getpwuid(os.getuid())[0]
  769     assert login
  770     return login
  771 
  772 
  773 def guess_delivery_time(message):
  774     """Return a guess at the delivery date of an rfc822 message""" 
  775     assert message
  776     # try to guess the delivery date from various headers
  777     # get more desparate as we go through the array
  778     for header in 'Delivery-date', 'Received', 'Resent-Date', 'Date':
  779         try: 
  780             if header == 'Received': 
  781                 # This should be good enough for almost all headers in the wild; 
  782                 # if we're guessing wrong, parsedate_tz() will fail graciously. 
  783                 token = message.getrawheader(header).rsplit(';', 1)[-1]
  784             else: 
  785                 token = message.get(header)
  786             date = rfc822.parsedate_tz(token)
  787             if date:
  788                 time_message = rfc822.mktime_tz(date)
  789                 vprint("using valid time found from '%s' header" % header)
  790                 return time_message
  791         except (AttributeError, IndexError, ValueError, OverflowError): pass
  792     # as a second-last resort, try the date from the 'From_' line (ugly)
  793     # this will only work from a mbox-format mailbox
  794     if (message.unixfrom):
  795         # Hmm. This will break with full-blown RFC 2822 addr-spec's. 
  796         header = message.unixfrom.split(None, 2)[-1]
  797         # Interpret no timezone as localtime
  798         date = rfc822.parsedate_tz(header)
  799         if date:
  800             try:
  801                 time_message = rfc822.mktime_tz(date)
  802                 vprint("using valid time found from unix 'From_' header")
  803                 return time_message
  804             except (ValueError, OverflowError): pass
  805     # the headers have no valid dates -- last resort, try the file timestamp
  806     # this will not work for mbox mailboxes
  807     try:
  808         file_name = get_filename(message)
  809     except AttributeError:
  810         # we are looking at a 'mbox' mailbox - argh! 
  811         # Just return the current time - this will never get archived :(
  812         vprint("no valid times found at all -- using current time!")
  813         return time.time()
  814     if not os.path.isfile(file_name):
  815         unexpected_error("mailbox file name '%s' has gone missing" % \
  816             file_name)    
  817     time_message = os.path.getmtime(file_name)
  818     vprint("using valid time found from '%s' last-modification time" % \
  819         file_name)
  820     return time_message
  821    
  822 
  823 def add_status_headers(message):
  824     """
  825     Add Status and X-Status headers to a message from a maildir mailbox.
  826 
  827     Maildir messages store their information about being read/replied/etc in
  828     the suffix of the filename rather than in Status and X-Status headers in
  829     the message. In order to archive maildir messages into mbox format, it is
  830     nice to preserve this information by putting it into the status headers.
  831 
  832     """
  833     status = ""
  834     x_status = ""
  835     file_name = get_filename(message)
  836     match = re.search(":2,(.+)$", file_name)
  837     if match:
  838         flags = match.group(1)
  839         for flag in flags: 
  840             if flag == "D": # (draft): the user considers this message a draft
  841                 pass # does this make any sense in mbox? 
  842             elif flag == "F": # (flagged): user-defined 'important' flag
  843                 x_status = x_status + "F"
  844             elif flag == "R": # (replied): the user has replied to this message
  845                 x_status = x_status + "A"
  846             elif flag == "S": # (seen): the user has viewed this message
  847                 status = status + "R"
  848             elif flag == "T": # (trashed): user has moved this message to trash
  849                 pass # is this Status: D ? 
  850             else:
  851                 pass # no whingeing here, although it could be a good experiment
  852 
  853     # files in the maildir 'cur' directory are no longer new,
  854     # they are the same as messages with 'Status: O' headers in mbox
  855     last_dir = os.path.basename(os.path.dirname(file_name))
  856     if last_dir == "cur":
  857         status = status + "O" 
  858 
  859     # Overwrite existing 'Status' and 'X-Status' headers.  They add no value in
  860     # maildirs, and we better don't listen to them.
  861     if status:
  862         vprint("converting maildir status into Status header '%s'" % status)
  863         message['Status'] = status
  864     else: 
  865         del message['Status']
  866     if x_status:
  867         vprint("converting maildir status into X-Status header '%s'" % x_status)
  868         message['X-Status'] = x_status
  869     else: 
  870         del message['X-Status']
  871 
  872 def add_status_headers_imap(message, flags):
  873     """Add Status and X-Status headers to a message from an imap mailbox."""
  874     status = ""
  875     x_status = ""
  876     for flag in flags: 
  877         if flag == "\\Draft": # (draft): the user considers this message a draft
  878             pass # does this make any sense in mbox? 
  879         elif flag == "\\Flagged": # (flagged): user-defined 'important' flag
  880             x_status = x_status + "F"
  881         elif flag == "\\Answered": # (replied): the user has replied to this message
  882             x_status = x_status + "A"
  883         elif flag == "\\Seen": # (seen): the user has viewed this message
  884             status = status + "R"
  885         elif flag == "\\Deleted": # (trashed): user has moved this message to trash
  886             pass # is this Status: D ? 
  887         else:
  888             pass # no whingeing here, although it could be a good experiment
  889     if not "\\Recent" in flags:
  890         status = status + "O" 
  891 
  892     # As with maildir folders, overwrite Status and X-Status headers 
  893     # if they exist.
  894     vprint("converting imap status (%s)..." % " ".join(flags))
  895     if status:
  896         vprint("generating Status header '%s'" % status)
  897         message['Status'] = status
  898     else: 
  899         vprint("not generating Status header")
  900         del message['Status']
  901     if x_status:
  902         vprint("generating X-Status header '%s'" % x_status)
  903         message['X-Status'] = x_status
  904     else: 
  905         vprint("not generating X-Status header")
  906         del message['X-Status']
  907 
  908 def is_flagged(message):
  909     """return true if the message is flagged important, false otherwise"""
  910     # MH and mbox mailboxes use the 'X-Status' header to indicate importance
  911     x_status = message.get('X-Status')
  912     if x_status and re.search('F', x_status):
  913         vprint("message is important (X-Status header='%s')" % x_status)
  914         return True
  915     file_name = None
  916     try:
  917         file_name = get_filename(message)
  918     except AttributeError:
  919         pass
  920     # maildir mailboxes use the filename suffix to indicate flagged status
  921     if file_name and re.search(":2,.*F.*$", file_name):
  922         vprint("message is important (filename info has 'F')")
  923         return True
  924     vprint("message is not flagged important")
  925     return False
  926 
  927 
  928 def is_unread(message):
  929     """return true if the message is unread, false otherwise"""
  930     # MH and mbox mailboxes use the 'Status' header to indicate read status
  931     status = message.get('Status')
  932     if status and re.search('R', status):
  933         vprint("message has been read (status header='%s')" % status)
  934         return False
  935     file_name = None
  936     try:
  937         file_name = get_filename(message)
  938     except AttributeError:
  939         pass
  940     # maildir mailboxes use the filename suffix to indicate read status
  941     if file_name and re.search(":2,.*S.*$", file_name):
  942         vprint("message has been read (filename info has 'S')")
  943         return False
  944     vprint("message is unread")
  945     return True
  946 
  947 
  948 def sizeof_message(message):
  949     """Return size of message in bytes (octets)."""
  950     assert message
  951     file_name = None
  952     message_size = None
  953     try:
  954         file_name = get_filename(message)
  955     except AttributeError:
  956         pass
  957     if file_name:
  958         # with maildir and MH mailboxes, we can just use the file size
  959         message_size = os.path.getsize(file_name)
  960     else:
  961         # with mbox mailboxes, not so easy
  962         message_size = 0
  963         if message.unixfrom:
  964             message_size = message_size + len(message.unixfrom)
  965         for header in message.headers:
  966             message_size = message_size + len(header)
  967         message_size = message_size + 1 # the blank line after the headers
  968         start_offset = message.fp.tell()
  969         message.fp.seek(0, 2) # seek to the end of the message
  970         end_offset = message.fp.tell()
  971         message.rewindbody()
  972         message_size = message_size + (end_offset - start_offset)
  973     return message_size
  974 
  975 def is_smaller(message, size):
  976     """Return true if the message is smaller than size bytes, false otherwise"""
  977     assert message
  978     assert size > 0
  979     message_size = sizeof_message(message) 
  980     if message_size < size:
  981         vprint("message is too small (%d bytes), minimum bytes : %d" % \
  982             (message_size, size))
  983         return True
  984     else:
  985         vprint("message is not too small (%d bytes), minimum bytes: %d" % \
  986             (message_size, size))
  987         return False
  988 
  989 
  990 def should_archive(message):
  991     """Return true if we should archive the message, false otherwise"""
  992     if options.archive_all:
  993         return True
  994     old = False
  995     time_message = guess_delivery_time(message)
  996     if options.date_old_max == None:
  997         old = is_older_than_days(time_message, options.days_old_max)
  998     else:
  999         old = is_older_than_time(time_message, options.date_old_max)
 1000 
 1001     # I could probably do this in one if statement, but then I wouldn't
 1002     # understand it. 
 1003     if not old:
 1004         return False
 1005     if not options.include_flagged and is_flagged(message):
 1006         return False
 1007     if options.min_size and is_smaller(message, options.min_size):
 1008         return False
 1009     if options.preserve_unread and is_unread(message):
 1010         return False
 1011     return True
 1012         
 1013     
 1014 def is_older_than_time(time_message, max_time):
 1015     """Return true if a message is older than the specified time,
 1016     false otherwise.
 1017 
 1018     Arguments:
 1019     time_message -- the delivery date of the message measured in seconds
 1020                     since the epoch
 1021     max_time -- maximum time allowed for message
 1022        
 1023     """
 1024     days_old = (max_time - time_message) / 24 / 60 / 60
 1025     if time_message < max_time:
 1026         vprint("message is %.2f days older than the specified date" % days_old)
 1027         return True
 1028     vprint("message is %.2f days younger than the specified date" % \
 1029         abs(days_old))
 1030     return False
 1031 
 1032 
 1033 def is_older_than_days(time_message, max_days):
 1034     """Return true if a message is older than the specified number of days,
 1035     false otherwise.
 1036 
 1037     Arguments:
 1038     time_message -- the delivery date of the message measured in seconds
 1039                     since the epoch
 1040     max_days -- maximum number of days before message is considered old
 1041     """
 1042     time_now = time.time()
 1043     if time_message > time_now:
 1044         vprint("warning: message has date in the future")
 1045         return False
 1046     secs_old_max = (max_days * 24 * 60 * 60)
 1047     days_old = (time_now - time_message) / 24 / 60 / 60
 1048     vprint("message is %.2f days old" % days_old)
 1049     if ((time_message + secs_old_max) < time_now):
 1050         return True
 1051     return False
 1052 
 1053 def build_imap_filter():
 1054     """Return an imap filter string"""
 1055 
 1056     imap_filter = []
 1057     if options.date_old_max == None:
 1058         time_now = time.time()
 1059         secs_old_max = (options.days_old_max * 24 * 60 * 60)
 1060         time_old = time.gmtime(time_now - secs_old_max)
 1061     else:
 1062         time_old = time.gmtime(options.date_old_max)
 1063     time_str = time.strftime('%d-%b-%Y', time_old)
 1064     imap_filter.append("BEFORE %s" % time_str)
 1065 
 1066     if not options.include_flagged:
 1067         imap_filter.append("UNFLAGGED")
 1068     if options.min_size:
 1069         imap_filter.append("LARGER %d" % options.min_size)
 1070     if options.preserve_unread:
 1071         imap_filter.append("SEEN")
 1072     if options.filter_append:
 1073         imap_filter.append(options.filter_append)
 1074 
 1075     return '(' + string.join(imap_filter, ' ') + ')'
 1076 
 1077 ###############  mailbox operations ###############
 1078 
 1079 def archive(mailbox_name):
 1080     """Archives a mailbox.
 1081 
 1082     Arguments:
 1083     mailbox_name -- the filename/dirname/url of the mailbox to be archived
 1084     """
 1085     assert mailbox_name
 1086 
 1087     # strip any trailing slash (we could be archiving a maildir or MH format
 1088     # mailbox and somebody was pressing <tab> in bash) - we don't want to use
 1089     # the trailing slash in the archive name
 1090     mailbox_name = mailbox_name.rstrip("/")
 1091     assert mailbox_name
 1092 
 1093     set_signal_handlers()
 1094     os.umask(077) # saves setting permissions on mailboxes/tempfiles
 1095 
 1096     vprint("processing '%s'" % mailbox_name)
 1097     is_imap = urlparse.urlparse(mailbox_name)[0] in ('imap', 'imaps')
 1098     if not is_imap:
 1099         # Check if the mailbox exists, and refuse to mess with other people's
 1100         # stuff
 1101         try:
 1102             fuid = os.stat(mailbox_name).st_uid
 1103         except OSError, e:
 1104             user_error(str(e))
 1105         else:
 1106             if fuid != os.getuid():
 1107                 user_error("'%s' is owned by someone else!" % mailbox_name)
 1108 
 1109     old_temp_dir = tempfile.tempdir
 1110     try:
 1111         # create a temporary directory for us to work in securely
 1112         tempfile.tempdir = None
 1113         new_temp_dir = tempfile.mkdtemp('archivemail')
 1114         assert new_temp_dir
 1115         _stale.temp_dir = new_temp_dir
 1116         tempfile.tempdir = new_temp_dir
 1117         vprint("set tempfile directory to '%s'" % new_temp_dir)
 1118 
 1119         if is_imap:
 1120             vprint("guessing mailbox is of type: imap(s)")
 1121             _archive_imap(mailbox_name)
 1122         elif os.path.isfile(mailbox_name):
 1123             vprint("guessing mailbox is of type: mbox")
 1124             _archive_mbox(mailbox_name)
 1125         elif os.path.isdir(mailbox_name):
 1126             cur_path = os.path.join(mailbox_name, "cur")
 1127             new_path = os.path.join(mailbox_name, "new")
 1128             if os.path.isdir(cur_path) and os.path.isdir(new_path):
 1129                 vprint("guessing mailbox is of type: maildir")
 1130                 _archive_dir(mailbox_name, "maildir")
 1131             else:
 1132                 vprint("guessing mailbox is of type: MH")
 1133                 _archive_dir(mailbox_name, "mh")
 1134         else:
 1135             user_error("'%s' is not a normal file or directory" % mailbox_name)
 1136 
 1137         # remove our special temp directory - hopefully empty
 1138         os.rmdir(new_temp_dir)
 1139         _stale.temp_dir = None
 1140 
 1141     finally:
 1142         tempfile.tempdir = old_temp_dir
 1143         clean_up()
 1144 
 1145 def _archive_mbox(mailbox_name):
 1146     """Archive a 'mbox' style mailbox - used by archive_mailbox()"""
 1147     assert mailbox_name
 1148     final_archive_name = make_archive_name(mailbox_name)
 1149     vprint("archiving '%s' to '%s' ..." % (mailbox_name, final_archive_name))
 1150     check_archive(final_archive_name)
 1151     stats = Stats(mailbox_name, final_archive_name)
 1152     cache = IdentityCache(mailbox_name)
 1153     original = Mbox(path=mailbox_name)
 1154     if options.dry_run or options.copy_old_mail:
 1155         retain = None
 1156     else:
 1157         retain = TempMbox(prefix="retain")
 1158     archive = prepare_temp_archive()
 1159 
 1160     original.lock()
 1161     msg = original.next()
 1162     if not msg and (original.starting_size > 0):
 1163         user_error("'%s' is not a valid mbox-format mailbox" % mailbox_name)
 1164     if msg and 'X-IMAP' in msg:
 1165         # Dovecot and UW-IMAP pseudo message for mailbox meta data
 1166         vprint("detected IMAP pseudo message")
 1167         if retain:
 1168             retain.write(msg)
 1169         msg = original.next()
 1170     while (msg):
 1171         msg_size = sizeof_message(msg)
 1172         stats.another_message(msg_size)
 1173         vprint("processing message '%s'" % msg.get('Message-ID'))
 1174         if options.warn_duplicates:
 1175             cache.warn_if_dupe(msg)             
 1176         if should_archive(msg):
 1177             stats.another_archived(msg_size)
 1178             if options.delete_old_mail:
 1179                 vprint("decision: delete message")
 1180             else:
 1181                 vprint("decision: archive message")
 1182                 if archive:
 1183                     archive.write(msg)
 1184         else:
 1185             vprint("decision: retain message")
 1186             if retain:
 1187                 retain.write(msg)
 1188         msg = original.next()
 1189     vprint("finished reading messages") 
 1190     if original.starting_size != original.get_size():
 1191         unexpected_error("the mailbox '%s' changed size during reading!" % \
 1192            mailbox_name)         
 1193     # Write the new archive before modifying the mailbox, to prevent
 1194     # losing data if something goes wrong
 1195     commit_archive(archive, final_archive_name)
 1196     if retain:
 1197         pending_changes = original.mbox_file.tell() != retain.mbox_file.tell()
 1198         if pending_changes:
 1199             retain.commit()
 1200             retain.close()
 1201             vprint("writing back changed mailbox '%s'..." % \
 1202                     original.mbox_file_name)
 1203             # Prepare for recovery on error.
 1204             # FIXME: tempfile.tempdir is our nested dir.
 1205             saved_name = "%s/%s.%s.%s-%s-%s" % \
 1206                 (tempfile.tempdir, options.script_name,
 1207                     os.path.basename(original.mbox_file_name),
 1208                     socket.gethostname(), os.getuid(),
 1209                     os.getpid())
 1210             try:
 1211                 original.overwrite_with(retain.mbox_file_name)
 1212                 original.commit()
 1213             except:
 1214                 retain.saveas(saved_name)
 1215                 print "Error writing back changed mailbox; saved good copy to " \
 1216                         "%s" % saved_name
 1217                 raise
 1218         else:
 1219             retain.close()
 1220             vprint("no changes to mbox '%s'" %  original.mbox_file_name)
 1221         retain.remove()
 1222     original.unlock()
 1223     original.close()
 1224     original.reset_timestamps() # Minor race here; mutt has this too.
 1225     if not options.quiet:
 1226         stats.display()
 1227 
 1228 
 1229 def _archive_dir(mailbox_name, type):
 1230     """Archive a 'maildir' or 'MH' style mailbox - used by archive_mailbox()"""
 1231     assert mailbox_name
 1232     assert type
 1233     final_archive_name = make_archive_name(mailbox_name)
 1234     vprint("archiving '%s' to '%s' ..." % (mailbox_name, final_archive_name))
 1235     check_archive(final_archive_name)
 1236     stats = Stats(mailbox_name, final_archive_name)
 1237     delete_queue = []
 1238 
 1239     if type == "maildir":
 1240         original = mailbox.Maildir(mailbox_name)
 1241     elif type == "mh":
 1242         original = mailbox.MHMailbox(mailbox_name)
 1243     else:
 1244         unexpected_error("unknown type: %s" % type)        
 1245     cache = IdentityCache(mailbox_name)
 1246     archive = prepare_temp_archive()
 1247 
 1248     for msg in original:
 1249         if not msg: 
 1250             vprint("ignoring invalid message '%s'" % get_filename(msg))
 1251             continue
 1252         msg_size = sizeof_message(msg)
 1253         stats.another_message(msg_size)
 1254         vprint("processing message '%s'" % msg.get('Message-ID'))
 1255         if options.warn_duplicates:
 1256             cache.warn_if_dupe(msg)             
 1257         if should_archive(msg):
 1258             stats.another_archived(msg_size)
 1259             if options.delete_old_mail:
 1260                 vprint("decision: delete message")
 1261             else:
 1262                 vprint("decision: archive message")
 1263                 if archive:
 1264                     if type == "maildir":
 1265                         add_status_headers(msg)
 1266                     archive.write(msg)
 1267             if not options.dry_run and not options.copy_old_mail:
 1268                 delete_queue.append(get_filename(msg)) 
 1269         else:
 1270             vprint("decision: retain message")
 1271     vprint("finished reading messages") 
 1272     # Write the new archive before modifying the mailbox, to prevent
 1273     # losing data if something goes wrong
 1274     commit_archive(archive, final_archive_name)
 1275     for file_name in delete_queue:
 1276         vprint("removing original message: '%s'" % file_name)
 1277         try: os.remove(file_name)
 1278         except OSError, e:
 1279             if e.errno != errno.ENOENT: raise
 1280     if not options.quiet:
 1281         stats.display()
 1282 
 1283 def _archive_imap(mailbox_name):
 1284     """Archive an imap mailbox - used by archive_mailbox()"""
 1285     assert mailbox_name
 1286     import imaplib
 1287     import cStringIO
 1288     import getpass
 1289 
 1290     vprint("Setting imaplib.Debug = %d" % options.debug_imap)
 1291     imaplib.Debug = options.debug_imap
 1292     archive = None
 1293     imap_username, imap_password, \
 1294             imap_server, imap_server_port, \
 1295             imap_folder_pattern = parse_imap_url(mailbox_name)
 1296     if not imap_password: 
 1297         if options.pwfile:
 1298             imap_password = open(options.pwfile).read().rstrip()
 1299         else:
 1300             if (not os.isatty(sys.stdin.fileno())) or options.quiet:
 1301                 unexpected_error("No imap password specified")
 1302             imap_password = getpass.getpass('IMAP password: ')
 1303 
 1304     is_ssl = mailbox_name[:5].lower() == 'imaps'
 1305     if is_ssl: 
 1306         vprint("establishing secure connection to server %s, port %s" %
 1307                 (imap_server, imap_server_port))
 1308         imap_srv = imaplib.IMAP4_SSL(imap_server, imap_server_port)
 1309     else:
 1310         vprint("establishing connection to server %s, port %s" %
 1311             (imap_server, imap_server_port))
 1312         imap_srv = imaplib.IMAP4(imap_server, imap_server_port)
 1313     if "AUTH=CRAM-MD5" in imap_srv.capabilities: 
 1314         vprint("authenticating (cram-md5) to server as %s" % imap_username)
 1315         result, response = imap_srv.login_cram_md5(imap_username, imap_password)
 1316     elif not "LOGINDISABLED" in imap_srv.capabilities: 
 1317         vprint("logging in to server as %s" % imap_username)
 1318         result, response = imap_srv.login(imap_username, imap_password)
 1319     else: 
 1320         user_error("imap server %s has login disabled (hint: "
 1321                              "try ssl/imaps)" % imap_server)
 1322 
 1323     mailboxes = imap_find_mailboxes(imap_srv, imap_folder_pattern)
 1324     for imap_folder in mailboxes:
 1325         final_archive_name = make_archive_name(imap_folder)
 1326         vprint("archiving mailbox '%s' on IMAP server '%s' to '%s' ..." %
 1327                 (imap_folder, imap_server, final_archive_name))
 1328         check_archive(final_archive_name)
 1329         cur_mailbox = mailbox_name[:-len(imap_folder_pattern)] + imap_folder
 1330         stats = Stats(cur_mailbox, final_archive_name)
 1331         cache = IdentityCache(cur_mailbox)
 1332 
 1333         imap_smart_select(imap_srv, imap_folder)
 1334         total_msg_count = int(imap_srv.response("EXISTS")[1][0])
 1335         vprint("folder has %d message(s)" % total_msg_count)
 1336 
 1337         # IIUIC the message sequence numbers are stable for the whole session, since
 1338         # we just send SEARCH, FETCH and STORE commands, which should prevent the
 1339         # server from sending untagged EXPUNGE responses -- see RFC 3501 (IMAP4rev1)
 1340         # 7.4.1 and RFC 2180 (Multi-Accessed Mailbox Practice).
 1341         # Worst thing should be that we bail out FETCHing a message that has been
 1342         # deleted.
 1343 
 1344         if options.archive_all:
 1345             message_list = [str(n) for n in range(1, total_msg_count+1)]
 1346         else:
 1347             imap_filter = build_imap_filter()
 1348             vprint("imap filter: '%s'" % imap_filter)
 1349             vprint("searching messages matching criteria")
 1350             result, response = imap_srv.search(None, imap_filter)
 1351             if result != 'OK': unexpected_error("imap search failed; server says '%s'" %
 1352                 response[0])
 1353             if response[0] is not None:
 1354                 # response is a list with a single item, listing message
 1355                 # sequence numbers like ['1 2 3 1016']
 1356                 message_list = response[0].split()
 1357             else:
 1358                 # Broken server has sent no untagged response; assume empty result set.
 1359                 message_list = []
 1360             vprint("%d messages are matching filter" % len(message_list))
 1361 
 1362         # First, gather data for the statistics.
 1363         if total_msg_count > 0 and not options.quiet:
 1364             vprint("fetching size of messages...")
 1365             result, response = imap_srv.fetch('1:*', '(RFC822.SIZE)')
 1366             if result != 'OK': unexpected_error("Failed to fetch message sizes; "
 1367                 "server says '%s'" % response[0])
 1368             # response is a list with entries like '1016 (RFC822.SIZE 3118)',
 1369             # where the first number is the message sequence number, the second is
 1370             # the size.
 1371             for x in response:
 1372                 m = imapsize_re.match(x)
 1373                 msn, msg_size = m.group('msn'), int(m.group('size'))
 1374                 stats.another_message(msg_size)
 1375                 if msn in message_list:
 1376                     stats.another_archived(msg_size)
 1377 
 1378         if not options.dry_run:
 1379             if not options.delete_old_mail:
 1380                 archive = prepare_temp_archive()
 1381                 vprint("fetching messages...")
 1382                 for msn in message_list:
 1383                     # Fetching message flags and body together always finds \Seen
 1384                     # set.  To check \Seen, we must fetch the flags first.
 1385                     result, response = imap_srv.fetch(msn, '(FLAGS)')
 1386                     if result != 'OK': unexpected_error("Failed to fetch message "
 1387                             "flags; server says '%s'" % response[0])
 1388                     msg_flags = imaplib.ParseFlags(response[0])
 1389                     result, response = imap_srv.fetch(msn, '(RFC822)')
 1390                     if result != 'OK': unexpected_error("Failed to fetch message; "
 1391                         "server says '%s'" % response[0])
 1392                     msg_str = response[0][1].replace("\r\n", os.linesep)
 1393                     msg = rfc822.Message(cStringIO.StringIO(msg_str))
 1394                     vprint("processing message '%s'" % msg.get('Message-ID'))
 1395                     add_status_headers_imap(msg, msg_flags)
 1396                     if options.warn_duplicates:
 1397                         cache.warn_if_dupe(msg)
 1398                     archive.write(msg)
 1399                 commit_archive(archive, final_archive_name)
 1400             if not options.copy_old_mail:
 1401                 vprint("Deleting %s messages" % len(message_list))
 1402                 # do not delete more than a certain number of messages at a time,
 1403                 # because the command length is limited. This avoids that servers
 1404                 # terminate the connection with EOF or TCP RST.
 1405                 max_delete = 100
 1406                 for i in range(0, len(message_list), max_delete):
 1407                     result, response = imap_srv.store( \
 1408                         string.join(message_list[i:i+max_delete], ','),
 1409                         '+FLAGS.SILENT', '\\Deleted')
 1410                     if result != 'OK': unexpected_error("Error while deleting "
 1411                         "messages; server says '%s'" % response[0])
 1412         vprint("Closing mailbox.")
 1413         imap_srv.close()
 1414         if not options.quiet:
 1415             stats.display()
 1416     vprint("Terminating connection.")
 1417     imap_srv.logout()
 1418     
 1419 
 1420 ###############  IMAP  functions  ###############
 1421 
 1422 
 1423 # First, some IMAP modified UTF-7 support functions.
 1424 
 1425 # The modified BASE64 alphabet. 64 characters, each one encodes 6 Bit.
 1426 mb64alpha = string.ascii_uppercase + string.ascii_lowercase + string.digits + '+,'
 1427 
 1428 def isprint_ascii(char):
 1429     """Test for an ASCII printable character."""
 1430     return 0x20 <= ord(char) and ord(char) <= 0x7e
 1431 
 1432 def mod_utf7_encode(ustr):
 1433     """Encode unicode string object in modified UTF-7."""
 1434 
 1435     def mb64_encode(tomb64):
 1436         """Encode unicode string object as a modified UTF-7 shifted sequence
 1437         in modified BASE64."""
 1438         u16be = tomb64.encode('utf_16_be')
 1439         mb64 = ""
 1440         # Process 24-bit blocks, encoding them in 6-bit steps.
 1441         for block in [u16be[i:i+3] for i in range(0, len(u16be), 3)]:
 1442             idx = 0
 1443             shift = 2
 1444             for octet in block:
 1445                 mb64 += mb64alpha[idx | (ord(octet) >> shift)]
 1446                 idx = (ord(octet) << (6-shift)) & 0x3f
 1447                 shift += 2
 1448             mb64 += mb64alpha[idx]
 1449         return mb64
 1450 
 1451     mu7 = ""
 1452     tomb64 = u""
 1453     for c in ustr:
 1454         if not isprint_ascii(c):
 1455             tomb64 += c
 1456             continue
 1457         if tomb64:
 1458             mu7 += '&' + mb64_encode(tomb64) + '-'
 1459             tomb64 = u""
 1460         if c == '&':
 1461             mu7 += '&-'
 1462         else:
 1463             mu7 += str(c)
 1464     if tomb64:
 1465         mu7 += '&' + mb64_encode(tomb64) + '-'
 1466     return mu7
 1467 
 1468 def mod_utf7_decode(mu7):
 1469     """Decode a modified UTF-7 encoded string to an unicode string object."""
 1470 
 1471     def mb64_decode(mb64):
 1472         """Decode a modified UTF-7 shifted sequence from modified BASE64 to an
 1473         unicode string object."""
 1474         if not mb64:
 1475             # A null shift '&-' decodes to '&'.
 1476             return u"&"
 1477         u16be = ""
 1478         # Process blocks of 4 BASE64 characters, decoding each char to 6 bits.
 1479         for block in [mb64[i:i+4] for i in range(0, len(mb64), 4)]:
 1480             carrybits = mb64alpha.index(block[0]) << 2
 1481             shift = 4
 1482             for char in block[1:]:
 1483                 bits = mb64alpha.index(char)
 1484                 u16be += chr(carrybits | (bits >> shift))
 1485                 carrybits = (bits << (8-shift)) & 0xff
 1486                 shift -= 2
 1487         if carrybits:
 1488             raise ValueError("Ill-formed modified UTF-7 string: "
 1489                              "trailing bits in shifted sequence")
 1490         return u16be.decode('utf_16_be')
 1491 
 1492     ustr = u""
 1493     mb64 = ""
 1494     inmb64 = False
 1495     for octet in mu7:
 1496         if not isprint_ascii(octet):
 1497             raise ValueError("Ill-formed modified UTF-7 string: "
 1498                              "contains non-printable ASCII" % ord(octet))
 1499         if not inmb64:
 1500             if octet == '&':
 1501                 inmb64 = True
 1502             else:
 1503                 ustr += octet
 1504             continue
 1505 
 1506         if octet in mb64alpha:
 1507             mb64 += octet
 1508             continue
 1509 
 1510         if octet == '-':
 1511             inmb64 = False
 1512             ustr += mb64_decode(mb64)
 1513             mb64 = ""
 1514         else:
 1515             break   # This triggers the exception below.
 1516 
 1517     if inmb64:
 1518         raise ValueError("Ill-formed modified UTF-7 string: "
 1519                          "unterminated BASE64 sequence")
 1520     return ustr
 1521 
 1522 
 1523 def imap_quote(astring):
 1524     """Quote an IMAP `astring' string (see RFC 3501, section "Formal Syntax")."""
 1525     if astring.startswith('"') and astring.endswith('"'):
 1526         quoted = astring
 1527     else:
 1528         quoted = '"' + astring.replace('\\', '\\\\').replace('"', '\\"') + '"'
 1529     return quoted
 1530 
 1531 def imap_unquote(quoted):
 1532     """Un-quote a `quoted' IMAP string (see RFC 3501, section "Formal Syntax")."""
 1533     if not (quoted.startswith('"') and quoted.endswith('"')):
 1534         unquoted = quoted
 1535     else:
 1536         unquoted = re.sub(r'\\(\\|")', r'\1', quoted[1:-1])
 1537     return unquoted
 1538 
 1539 def parse_imap_url(url): 
 1540     """Parse IMAP URL and return username, password (if appliciable), servername
 1541     and foldername."""
 1542 
 1543     def split_qstr(string, delim): 
 1544         """Split string once at delim, keeping quoted substring intact.
 1545         Strip and unescape quotes where necessary."""
 1546         rm = re.match(r'"(.+?(?<!\\))"(.)(.*)', string)
 1547         if rm:
 1548             a, d, b = rm.groups()
 1549             if not d == delim: 
 1550                 raise ValueError
 1551             a = a.replace('\\"', '"')
 1552         else:
 1553             a, b = string.split(delim, 1)
 1554         return a, b
 1555 
 1556     scheme, url = url.split('://')
 1557     password = None
 1558     try: 
 1559         if options.pwfile: 
 1560             username, url = split_qstr(url, '@')
 1561         else: 
 1562             try:
 1563                 username, url = split_qstr(url, ':')
 1564             except ValueError: 
 1565                 # request password interactively later
 1566                 username, url = split_qstr(url, '@')
 1567             else: 
 1568                 password, url = split_qstr(url, '@')
 1569         server, folder = url.split('/', 1)
 1570     except ValueError:
 1571         unexpected_error("Invalid IMAP connection string")
 1572     try:
 1573         server, port = server.split(':')
 1574     except ValueError:
 1575         if scheme.lower() == 'imap':
 1576             port = 143
 1577         else:
 1578             port = 993
 1579     else:
 1580         port = int(port)
 1581     return username, password, server, port, folder
 1582 
 1583 
 1584 def imap_getdelim(imap_server): 
 1585     """Return the IMAP server's hierarchy delimiter. Assumes there is only one."""
 1586     # This function will break if the LIST reply doesn't meet our expectations. 
 1587     # Imaplib and IMAP itself are both little beasts, and I do not know how
 1588     # fragile this function will be in the wild.
 1589     try: 
 1590         result, response = imap_server.list(pattern='""')
 1591     except ValueError:
 1592         # Stolen from offlineimap: 
 1593         # Some buggy IMAP servers do not respond well to LIST "" ""
 1594         # Work around them.
 1595         result, response = imap_server.list(pattern='%')
 1596     if result != 'OK': unexpected_error("Error listing directory; "
 1597         "server says '%s'" % response[0])
 1598 
 1599     # Response should be a list of strings like 
 1600     # '(\\Noselect \\HasChildren) "." boxname'
 1601     # We parse only the first list item and just grab the delimiter. 
 1602     m = re.match(r'\([^\)]*\) (?P<delim>"."|NIL)', response[0])
 1603     if not m: 
 1604         unexpected_error("imap_getdelim(): cannot parse '%s'" % response[0])
 1605     delim = m.group('delim').strip('"')
 1606     vprint("Found mailbox hierarchy delimiter: '%s'" % delim)
 1607     if delim == "NIL": 
 1608         return None
 1609     return delim
 1610 
 1611 
 1612 def imap_get_namespace(srv):
 1613     """Return the IMAP namespace prefixes and hierarchy delimiters."""
 1614     assert 'NAMESPACE' in srv.capabilities
 1615     result, response = srv.namespace()
 1616     if result != 'OK': 
 1617         unexpected_error("Cannot retrieve IMAP namespace; server says: '%s'" 
 1618             % response[0])
 1619     vprint("NAMESPACE response: %s" % repr(response[0]))
 1620     # Typical response is e.g.
 1621     # ['(("INBOX." ".")) NIL (("#shared." ".")("shared." "."))'] or
 1622     # ['(("" ".")) NIL NIL'], see RFC 2342.
 1623     # Make a reasonable guess parsing this beast. 
 1624     try:
 1625         m = re.match(r'\(\("([^"]*)" (?:"(.)"|NIL)', response[0])
 1626         nsprefix, hdelim = m.groups()
 1627     except:
 1628         print "Cannot parse IMAP NAMESPACE response %s" % repr(response)
 1629         raise
 1630     return nsprefix, hdelim
 1631 
 1632 
 1633 def imap_smart_select(srv, mailbox): 
 1634     """Select the given mailbox on the IMAP server."""
 1635     roflag = options.dry_run or options.copy_old_mail
 1636     # Work around python bug #1277098 (still pending in python << 2.5)
 1637     if not roflag: 
 1638         roflag = None
 1639     if roflag:
 1640         vprint("examining imap folder '%s' read-only" % mailbox)
 1641     else:
 1642         vprint("selecting imap folder '%s'" % mailbox)
 1643     imap_mailbox = mod_utf7_encode(mailbox.decode(userencoding))
 1644     result, response = srv.select(imap_quote(imap_mailbox), roflag)
 1645     if result != 'OK':
 1646         unexpected_error("selecting '%s' failed; server says: '%s'." \
 1647                 % (mailbox, response[0]))
 1648     if not roflag: 
 1649         # Sanity check that we don't silently fail to delete messages. 
 1650         # As to the following indices: IMAP4.response(key) returns 
 1651         # a tuple (key, ['<all_items>']) if the key is found, (key, [None])
 1652         # otherwise.  Imaplib just *loves* to nest trivial lists!  
 1653         permflags = srv.response("PERMANENTFLAGS")[1][0]
 1654         if permflags: 
 1655             permflags = permflags.strip('()').lower().split()
 1656             if not '\\deleted' in permflags: 
 1657                 unexpected_error("Server doesn't allow deleting messages in " \
 1658                         "'%s'." % mailbox)
 1659         elif "IMAP4REV1" in srv.capabilities: 
 1660             vprint("Suspect IMAP4rev1 server, doesn't send PERMANENTFLAGS " \
 1661                     "upon SELECT")
 1662 
 1663 
 1664 def imap_find_mailboxes(srv, mailbox):
 1665     """Find matching mailboxes on the IMAP server, correcting an invalid
 1666     mailbox path if possible."""
 1667     for curbox in imap_guess_mailboxnames(srv, mailbox): 
 1668         if '%' in curbox or '*' in curbox:
 1669             vprint("Looking for mailboxes matching '%s'..." % curbox)
 1670         else:
 1671             vprint("Looking for mailbox '%s'..." % curbox)
 1672         curbox = mod_utf7_encode(curbox.decode(userencoding))
 1673         result, response = srv.list(pattern=imap_quote(curbox))
 1674         if result != 'OK': 
 1675             unexpected_error("LIST command failed; " \
 1676                 "server says: '%s'" % response[0])
 1677         # Say we queried for the mailbox "foo". 
 1678         # Upon success, response is e.g. ['(\\HasChildren) "." foo'].
 1679         # Upon failure, response is [None].  Funky imaplib!
 1680         if response[0] != None: 
 1681             break
 1682     else: 
 1683         user_error("Cannot find mailbox '%s' on server." % mailbox)
 1684     mailboxes = []
 1685     for mailbox_data in response:
 1686         if not mailbox_data:    # imaplib sometimes returns an empty string
 1687             continue
 1688         try:
 1689             m = re.match(r'\((.*?)\) (?:"."|NIL) (.+)', mailbox_data)
 1690         except TypeError:
 1691             # May be a literal.  For literals, imaplib returns a tuple like
 1692             # ('(\\HasNoChildren) "." {12}', 'with "quote"').
 1693             m = re.match(r'\((.*?)\) (?:"."|NIL) \{\d+\}$', mailbox_data[0])
 1694             if m is None:
 1695                 unexpected_error("cannot parse LIST reply %s" %
 1696                         (mailbox_data,))
 1697             attrs = m.group(1)
 1698             name = mailbox_data[1]
 1699         else:
 1700             attrs, name = m.groups()
 1701             name = imap_unquote(name)
 1702         try:
 1703             name = mod_utf7_decode(name)
 1704         except ValueError:
 1705             vprint("Mailbox name '%s' returned by server doesn't look like "
 1706                    "modified UTF-7" % name)
 1707             name = name.decode('utf-8')
 1708         name = name.encode(userencoding)
 1709         if '\\noselect' in attrs.lower().split():
 1710             vprint("skipping not selectable mailbox '%s'" % name)
 1711             continue
 1712         vprint("Found mailbox '%s'" % name)
 1713         mailboxes.append(name)
 1714     if not mailboxes:
 1715         user_error("No matching folder is selectable")
 1716     return mailboxes
 1717 
 1718 
 1719 def imap_guess_mailboxnames(srv, mailbox): 
 1720     """Return a list of possible real IMAP mailbox names in descending order
 1721     of preference, compiled by prepending an IMAP namespace prefix if necessary,
 1722     and by translating hierarchy delimiters."""
 1723     if 'NAMESPACE' in srv.capabilities: 
 1724         nsprefix, hdelim = imap_get_namespace(srv)
 1725     else: 
 1726         vprint("Server doesn't support NAMESPACE command.")
 1727         nsprefix = ""
 1728         hdelim = imap_getdelim(srv)
 1729     vprint("IMAP namespace prefix: '%s', hierarchy delimiter: '%s'" % \
 1730             (nsprefix, hdelim))
 1731     if mailbox.upper() == "INBOX" or \
 1732        (hdelim is not None and mailbox.upper().startswith("INBOX" + hdelim)):
 1733         # INBOX is not a real mailbox name, so namespace prefixes do not apply
 1734         # to INBOX and its children
 1735         boxnames = [mailbox]
 1736     elif mailbox.startswith(nsprefix):
 1737         boxnames = [mailbox]
 1738     else:
 1739         boxnames = [nsprefix + mailbox]
 1740     if os.path.sep in mailbox and hdelim is not None:
 1741         mailbox = mailbox.replace(os.path.sep, hdelim)
 1742         if mailbox.upper().startswith("INBOX" + hdelim):
 1743             boxnames.append(mailbox)
 1744         else:
 1745             if mailbox.startswith(nsprefix):
 1746                 boxnames.append(mailbox)
 1747             if nsprefix:
 1748                 boxnames.append(nsprefix + mailbox)
 1749     return boxnames
 1750 
 1751 
 1752 ###############  misc  functions  ###############
 1753 
 1754 
 1755 def set_signal_handlers():
 1756     """set signal handlers to clean up temporary files on unexpected exit"""
 1757     # Make sure we clean up nicely - we don't want to leave stale dotlock
 1758     # files about if something bad happens to us. This is quite
 1759     # important, even though procmail will delete stale files after a while.
 1760     signal.signal(signal.SIGHUP, clean_up_signal)   # signal 1
 1761     # SIGINT (signal 2) is handled as a python exception
 1762     signal.signal(signal.SIGQUIT, clean_up_signal)  # signal 3
 1763     signal.signal(signal.SIGTERM, clean_up_signal)  # signal 15
 1764 
 1765 
 1766 def clean_up():
 1767     """Delete stale files"""
 1768     vprint("cleaning up ...")
 1769     _stale.clean()
 1770 
 1771 
 1772 def clean_up_signal(signal_number, stack_frame):
 1773     """Delete stale files -- to be registered as a signal handler.
 1774 
 1775     Arguments:
 1776     signal_number -- signal number of the terminating signal
 1777     stack_frame -- the current stack frame
 1778     
 1779     """
 1780     # this will run the above clean_up(), since unexpected_error()
 1781     # will abort with sys.exit() and clean_up will be registered 
 1782     # at this stage
 1783     unexpected_error("received signal %s" % signal_number)
 1784 
 1785 def prepare_temp_archive():
 1786     """Create temporary archive mbox."""
 1787     if options.dry_run or options.delete_old_mail:
 1788         return None
 1789     if options.no_compress:
 1790         return TempMbox()
 1791     else:
 1792         return CompressedTempMbox()
 1793 
 1794 def commit_archive(archive, final_archive_name):
 1795     """Finalize temporary archive and write it to its final destination."""
 1796     if not options.no_compress:
 1797         final_archive_name = final_archive_name + '.gz'
 1798     if archive:
 1799         archive.close()
 1800         if not archive.empty:
 1801             final_archive = ArchiveMbox(final_archive_name)
 1802             final_archive.lock()
 1803             try:
 1804                 final_archive.append(archive.mbox_file_name)
 1805                 final_archive.commit()
 1806             finally:
 1807                 final_archive.unlock()
 1808                 final_archive.close()
 1809         archive.remove()
 1810 
 1811 def make_archive_name(mailbox_name):
 1812     """Derive archive name and (relative) path from the mailbox name."""
 1813     # allow the user to embed time formats such as '%B' in the archive name
 1814     if options.date_old_max == None:
 1815         tm = time.localtime(time.time() - options.days_old_max*24*60*60)
 1816     else:
 1817         tm = time.localtime(options.date_old_max)
 1818     prefix = suffix = ""
 1819     if options.archive_name:
 1820         archive_head = ""
 1821         archive_tail = time.strftime(options.archive_name, tm)
 1822     else:
 1823         if options.archive_prefix is None and options.archive_suffix is None:
 1824             suffix = options.archive_default_suffix
 1825         else:
 1826             if options.archive_prefix:
 1827                 prefix = time.strftime(options.archive_prefix, tm)
 1828             if options.archive_suffix:
 1829                 suffix = time.strftime(options.archive_suffix, tm)
 1830         archive_head, archive_tail = os.path.split(mailbox_name)
 1831         if not prefix:
 1832             # Don't create hidden archives, e.g. when processing Maildir++
 1833             # subfolders
 1834             archive_tail = archive_tail.lstrip('.')
 1835     if options.output_dir:
 1836         archive_head = options.output_dir
 1837     archive_name = os.path.join(archive_head, prefix + archive_tail + suffix)
 1838     return archive_name
 1839 
 1840 def check_sane_destdir(dir):
 1841     """Do a very primitive check if the given directory looks like a reasonable
 1842     destination directory and bail out if it doesn't."""
 1843     assert dir
 1844     if not os.path.isdir(dir):
 1845         user_error("output directory does not exist: '%s'" % dir)
 1846     if not os.access(dir, os.W_OK):
 1847         user_error("no write permission on output directory: '%s'" % dir)
 1848 
 1849 def check_archive(archive_name):
 1850     """Check if existing archive files are (not) compressed as expected and
 1851     check if we can work with the destination directory."""
 1852     compressed_archive = archive_name + ".gz"
 1853     if options.no_compress:
 1854         if os.path.isfile(compressed_archive):
 1855             user_error("There is already a file named '%s'!\n"
 1856                 "Have you been previously compressing this archive?\n"
 1857                 "You probably should uncompress it manually, and try running me "
 1858                 "again." % compressed_archive)
 1859     elif os.path.isfile(archive_name):
 1860         user_error("There is already a file named '%s'!\n"
 1861             "Have you been reading this archive?\n"
 1862             "You probably should re-compress it manually, and try running me "
 1863             "again." % archive_name)
 1864     dest_dir = os.path.dirname(archive_name)
 1865     if not dest_dir:
 1866         dest_dir = os.getcwd()
 1867     check_sane_destdir(dest_dir)
 1868 
 1869 def nice_size_str(size):
 1870     """Return given size in bytes as '12kB', '1.2MB'"""
 1871     kb = size / 1024.0
 1872     mb = kb / 1024.0
 1873     if mb >= 1.0: return str(round(mb, 1)) + 'MB'
 1874     if kb >= 1.0: return str(round(kb)) + 'kB'
 1875     return str(size) + 'B'
 1876 
 1877 
 1878 def get_filename(msg): 
 1879     """If the given rfc822.Message can be identified with a file (no mbox),
 1880     return the filename, otherwise raise AttributeError."""
 1881     try:
 1882         return msg.fp.name
 1883     except AttributeError:
 1884         # Ugh, that's ugly.  msg.fp is not a plain file, it may be an 
 1885         # instance of 
 1886         # a. mailbox._Subfile 
 1887         #    (msg from mailbox.UnixMailbox, Python <= 2.4) 
 1888         #    File object is msg.fp.fp, we don't want that
 1889         # b. mailbox._PartialFile, subclass of mailbox._ProxyFile
 1890         #    (msg from mailbox.UnixMailbox, Python >= 2.5)
 1891         #    File object is msg.fp._file, we don't want that
 1892         # c. mailbox._ProxyFile
 1893         #    (msg from mailbox.Maildir, Python >= 2.5)
 1894         #    File object is msg.fp._file, we do want that.
 1895         if msg.fp.__class__ == mailbox._ProxyFile: 
 1896             assert hasattr(mailbox, "_PartialFile")
 1897             return msg.fp._file.name
 1898         raise
 1899 
 1900 def safe_open_create(filename):
 1901     """Create and open a file in a NFSv2-safe way, and return a r/w file descriptor.
 1902     The new file is created with mode 600."""
 1903     # This is essentially a simplified version of the dotlocking function.
 1904     vprint("Creating file '%s'" % filename)
 1905     dir, basename = os.path.split(filename)
 1906     # We rely on tempfile.mkstemp to create files safely and with 600 mode.
 1907     fd, pre_name = tempfile.mkstemp(prefix=basename+".pre-", dir=dir)
 1908     try:
 1909         try:
 1910             os.link(pre_name, filename)
 1911         except OSError, e:
 1912             if os.fstat(fd).st_nlink == 2:
 1913                 pass
 1914             else:
 1915                 raise
 1916     finally:
 1917         os.unlink(pre_name)
 1918     return fd
 1919 
 1920 def safe_open_existing(filename):
 1921     """Safely open an existing file, and return a r/w file descriptor."""
 1922     lst = os.lstat(filename)
 1923     if stat.S_ISLNK(lst.st_mode):
 1924         unexpected_error("file '%s' is a symlink." % filename)
 1925     fd = os.open(filename, os.O_RDWR)
 1926     fst = os.fstat(fd)
 1927     if fst.st_nlink != 1:
 1928         unexpected_error("file '%s' has %d hard links." % \
 1929                 (filename, fst.st_nlink))
 1930     if stat.S_ISDIR(fst.st_mode):
 1931         unexpected_error("file '%s' is a directory." % filename)
 1932     for i in stat.ST_DEV, stat.ST_INO, stat.ST_UID, stat.ST_GID, stat.ST_MODE, stat.ST_NLINK:
 1933         if fst[i] != lst[i]:
 1934             unexpected_error("file status changed unexpectedly")
 1935     return fd
 1936 
 1937 def safe_open(filename):
 1938     """Safely open a file, creating it if it doesn't exist, and return a
 1939     r/w file descriptor."""
 1940     # This borrows from postfix code.
 1941     vprint("Opening archive...")
 1942     try:
 1943         fd = safe_open_existing(filename)
 1944     except OSError, e:
 1945         if e.errno != errno.ENOENT: raise
 1946         fd = safe_open_create(filename)
 1947     return fd
 1948 
 1949 # this is where it all happens, folks
 1950 if __name__ == '__main__':
 1951     main()