"Fossies" - the Fresh Open Source Software Archive 
Member "archivemail-0.9.0/archivemail" (9 Jul 2011, 75260 Bytes) of package /linux/privat/old/archivemail-0.9.0.tar.gz:
As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style:
standard) with prefixed line numbers.
Alternatively you can here
view or
download the uninterpreted source code file.
1 #! /usr/bin/env python
2 ############################################################################
3 # Copyright (C) 2002 Paul Rodger <paul@paulrodger.com>,
4 # (C) 2006 Peter Poeml <poeml@suse.de>,
5 # (C) 2006-2010 Nikolaus Schulz <microschulz@web.de>
6 #
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
11 #
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
16 #
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 ############################################################################
21 """
22 Archive and compress old mail in mbox, MH or maildir-format mailboxes.
23 Website: http://archivemail.sourceforge.net/
24 """
25
26 # global administrivia
27 __version__ = "archivemail v0.9.0"
28 __copyright__ = """\
29 Copyright (C) 2002 Paul Rodger <paul@paulrodger.com>
30 (C) 2006 Peter Poeml <poeml@suse.de>,
31 (C) 2006-2011 Nikolaus Schulz <microschulz@web.de>
32 This is free software; see the source for copying conditions. There is NO
33 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."""
34
35 import sys
36
37 def check_python_version():
38 """Abort if we are running on python < v2.3"""
39 too_old_error = "This program requires python v2.3 or greater. " + \
40 "Your version of python is:\n%s""" % sys.version
41 try:
42 version = sys.version_info # we might not even have this function! :)
43 if (version[0] < 2) or (version[0] == 2 and version[1] < 3):
44 print too_old_error
45 sys.exit(1)
46 except AttributeError:
47 print too_old_error
48 sys.exit(1)
49
50 # define & run this early
51 # (IMAP over SSL requires Python >= 2.3)
52 check_python_version()
53
54 import fcntl
55 import getopt
56 import gzip
57 import mailbox
58 import os
59 import pwd
60 import re
61 import rfc822
62 import shutil
63 import signal
64 import stat
65 import string
66 import tempfile
67 import time
68 import urlparse
69 import errno
70 import socket
71 import locale
72
73 # From_ mangling regex.
74 from_re = re.compile(r'^From ', re.MULTILINE)
75 imapsize_re = re.compile(r'^(?P<msn>[0-9]+) \(RFC822\.SIZE (?P<size>[0-9]+)\)')
76
77 userencoding = locale.getpreferredencoding()
78
79 ############## class definitions ###############
80
81 class ArchivemailException(Exception):
82 pass
83 class UserError(ArchivemailException):
84 pass
85 class UnexpectedError(ArchivemailException):
86 pass
87 class LockUnavailable(ArchivemailException):
88 pass
89
90 class Stats:
91 """Class to collect and print statistics about mailbox archival"""
92 __archived = 0
93 __archived_size = 0
94 __mailbox_name = None
95 __archive_name = None
96 __start_time = 0
97 __total = 0
98 __total_size = 0
99
100 def __init__(self, mailbox_name, final_archive_name):
101 """Constructor for a new set of statistics.
102
103 Arguments:
104 mailbox_name -- filename/dirname of the original mailbox
105 final_archive_name -- filename for the final 'mbox' archive, without
106 compression extension (eg .gz)
107
108 """
109 assert mailbox_name
110 assert final_archive_name
111 self.__start_time = time.time()
112 self.__mailbox_name = mailbox_name
113 self.__archive_name = final_archive_name + ".gz"
114
115 def another_message(self, size):
116 """Add one to the internal count of total messages processed
117 and record message size."""
118 self.__total = self.__total + 1
119 self.__total_size = self.__total_size + size
120
121 def another_archived(self, size):
122 """Add one to the internal count of messages archived
123 and record message size."""
124 self.__archived = self.__archived + 1
125 self.__archived_size = self.__archived_size + size
126
127 def display(self):
128 """Print statistics about how many messages were archived"""
129 end_time = time.time()
130 time_seconds = end_time - self.__start_time
131 action = "archived"
132 if options.delete_old_mail:
133 action = "deleted"
134 if options.dry_run:
135 action = "I would have " + action
136 print "%s:\n %s %d of %d message(s) (%s of %s) in %.1f seconds" % \
137 (self.__mailbox_name, action, self.__archived, self.__total,
138 nice_size_str(self.__archived_size),
139 nice_size_str(self.__total_size), time_seconds)
140
141
142 class StaleFiles:
143 """Class to keep track of files to be deleted on abnormal exit"""
144 dotlock_files = [] # dotlock files for source mbox and final archive
145 temp_mboxes = [] # temporary retain and archive mboxes
146 temp_dir = None # our tempfile directory container
147
148 def clean(self):
149 """Delete any temporary files or lockfiles that exist"""
150 while self.dotlock_files:
151 dotlock = self.dotlock_files.pop()
152 vprint("removing stale dotlock file '%s'" % dotlock)
153 try:
154 os.remove(dotlock)
155 except (IOError, OSError): pass
156 while self.temp_mboxes:
157 mbox = self.temp_mboxes.pop()
158 vprint("removing stale temporary mbox '%s'" % mbox)
159 try:
160 os.remove(mbox)
161 except (IOError, OSError): pass
162 if self.temp_dir:
163 vprint("removing stale tempfile directory '%s'" % self.temp_dir)
164 try:
165 os.rmdir(self.temp_dir)
166 except OSError, e:
167 if e.errno == errno.ENOTEMPTY: # Probably a bug
168 user_warning("cannot remove temporary directory '%s', "
169 "directory not empty" % self.temp_dir)
170 except IOError: pass
171 else: self.temp_dir = None
172
173
174
175 class Options:
176 """Class to store runtime options, including defaults"""
177 archive_prefix = None
178 archive_suffix = None
179 archive_default_suffix = "_archive"
180 archive_name = None
181 days_old_max = 180
182 date_old_max = None
183 delete_old_mail = False
184 dry_run = False
185 filter_append = None
186 include_flagged = False
187 locking_attempts = 5
188 lockfile_extension = ".lock"
189 lock_sleep = True
190 no_compress = False
191 only_archive_read = False
192 output_dir = None
193 pwfile = None
194 preserve_unread = False
195 mangle_from = True
196 quiet = False
197 read_buffer_size = 8192
198 script_name = os.path.basename(sys.argv[0])
199 min_size = None
200 verbose = False
201 debug_imap = 0
202 warn_duplicates = False
203 copy_old_mail = False
204 archive_all = False
205
206 def parse_args(self, args, usage):
207 """Set our runtime options from the command-line arguments.
208
209 Arguments:
210 args -- this is sys.argv[1:]
211 usage -- a usage message to display on '--help' or bad arguments
212
213 Returns the remaining command-line arguments that have not yet been
214 parsed as a string.
215
216 """
217 try:
218 opts, args = getopt.getopt(args, '?D:S:Vd:hno:F:P:qs:p:a:uv',
219 ["date=", "days=", "delete", "dry-run", "help",
220 "include-flagged", "no-compress", "output-dir=",
221 "filter-append=", "pwfile=", "dont-mangle",
222 "preserve-unread", "quiet", "size=", "suffix=",
223 "prefix=", "archive-name=", "verbose",
224 "debug-imap=", "version", "warn-duplicate",
225 "copy", "all"])
226 except getopt.error, msg:
227 user_error(msg)
228
229 archive_by = None
230
231 for o, a in opts:
232 if o == '--delete':
233 if self.copy_old_mail:
234 user_error("found conflicting options --copy and --delete")
235 self.delete_old_mail = True
236 if o == '--include-flagged':
237 self.include_flagged = True
238 if o == '--no-compress':
239 self.no_compress = True
240 if o == '--warn-duplicate':
241 self.warn_duplicates = True
242 if o in ('-D', '--date'):
243 if archive_by:
244 user_error("you cannot specify both -d and -D options")
245 archive_by = "date"
246 self.date_old_max = self.date_argument(a)
247 if o in ('-d', '--days'):
248 if archive_by:
249 user_error("you cannot specify both -d and -D options")
250 archive_by = "days"
251 self.days_old_max = string.atoi(a)
252 if o in ('-o', '--output-dir'):
253 self.output_dir = os.path.expanduser(a)
254 if o in ('-P', '--pwfile'):
255 self.pwfile = os.path.expanduser(a)
256 if o in ('-F', '--filter-append'):
257 self.filter_append = a
258 if o in ('-h', '-?', '--help'):
259 print usage
260 sys.exit(0)
261 if o in ('-n', '--dry-run'):
262 self.dry_run = True
263 if o in ('-q', '--quiet'):
264 self.quiet = True
265 if o in ('-s', '--suffix'):
266 self.archive_suffix = a
267 if o in ('-p', '--prefix'):
268 self.archive_prefix = a
269 if o in ('-a', '--archive-name'):
270 self.archive_name = os.path.expanduser(a)
271 if o in ('-S', '--size'):
272 self.min_size = string.atoi(a)
273 if o in ('-u', '--preserve-unread'):
274 self.preserve_unread = True
275 if o == '--dont-mangle':
276 self.mangle_from = False
277 if o in ('-v', '--verbose'):
278 self.verbose = True
279 if o == '--debug-imap':
280 self.debug_imap = int(a)
281 if o == '--copy':
282 if self.delete_old_mail:
283 user_error("found conflicting options --copy and --delete")
284 self.copy_old_mail = True
285 if o == '--all':
286 self.archive_all = True
287 if o in ('-V', '--version'):
288 print __version__ + "\n\n" + __copyright__
289 sys.exit(0)
290 return args
291
292 def sanity_check(self, args):
293 """Complain bitterly about our options now rather than later"""
294 if self.output_dir:
295 check_sane_destdir(self.output_dir)
296 if self.days_old_max < 0:
297 user_error("--days argument must be positive")
298 if self.days_old_max >= 10000:
299 user_error("--days argument must be less than 10000")
300 if self.min_size is not None and self.min_size < 1:
301 user_error("--size argument must be greater than zero")
302 if self.quiet and self.verbose:
303 user_error("you cannot use both the --quiet and --verbose options")
304 if self.pwfile:
305 if not os.path.isfile(self.pwfile):
306 user_error("pwfile %s does not exist" % self.pwfile)
307 if self.archive_name and len(args) > 1:
308 user_error("the --archive-name cannot be used with multiple " \
309 "mailboxes")
310
311 def date_argument(self, string):
312 """Converts a date argument string into seconds since the epoch"""
313 date_formats = (
314 "%Y-%m-%d", # ISO format
315 "%d %b %Y" , # Internet format
316 "%d %B %Y" , # Internet format with full month names
317 )
318 time.accept2dyear = False # I'm not going to support 2-digit years
319 for format in date_formats:
320 try:
321 date = time.strptime(string, format)
322 seconds = time.mktime(date)
323 return seconds
324 except (ValueError, OverflowError):
325 pass
326 user_error("cannot parse the date argument '%s'\n"
327 "The date should be in ISO format (eg '2002-04-23'),\n"
328 "Internet format (eg '23 Apr 2002') or\n"
329 "Internet format with full month names (eg '23 April 2002')" %
330 string)
331
332
333 class LockableMboxMixin:
334 """Locking methods for mbox files."""
335
336 def __init__(self, mbox_file, mbox_file_name):
337 self.mbox_file = mbox_file
338 self.mbox_file_name = mbox_file_name
339 self._locked = False
340 self._use_dotlock = True
341
342 def lock(self):
343 """Lock this mbox with both a dotlock and a posix lock."""
344 assert not self._locked
345 attempt = 1
346 while True:
347 try:
348 self._posix_lock()
349 self._dotlock_lock()
350 break
351 except LockUnavailable, e:
352 self._posix_unlock()
353 attempt += 1
354 if (attempt > options.locking_attempts):
355 unexpected_error(str(e))
356 vprint("%s - sleeping..." % e)
357 time.sleep(options.lock_sleep)
358 except:
359 self._posix_unlock()
360 raise
361 self._locked = True
362
363 def unlock(self):
364 """Unlock this mbox."""
365 assert self._locked
366 self._dotlock_unlock()
367 self._posix_unlock()
368 self._locked = False
369
370 def _posix_lock(self):
371 """Set an exclusive posix lock on the 'mbox' mailbox"""
372 vprint("trying to acquire posix lock on file '%s'" % self.mbox_file_name)
373 try:
374 fcntl.lockf(self.mbox_file, fcntl.LOCK_EX|fcntl.LOCK_NB)
375 except IOError, e:
376 if e.errno in (errno.EAGAIN, errno.EACCES):
377 raise LockUnavailable("posix lock for '%s' unavailable" % \
378 self.mbox_file_name)
379 else:
380 raise
381 vprint("acquired posix lock on file '%s'" % self.mbox_file_name)
382
383 def _posix_unlock(self):
384 """Unset any posix lock on the 'mbox' mailbox"""
385 vprint("dropping posix lock on file '%s'" % self.mbox_file_name)
386 fcntl.lockf(self.mbox_file, fcntl.LOCK_UN)
387
388 def _dotlock_lock(self):
389 """Create a dotlock file for the 'mbox' mailbox"""
390 hostname = socket.gethostname()
391 pid = os.getpid()
392 box_dir, prelock_prefix = os.path.split(self.mbox_file_name)
393 prelock_suffix = ".%s.%s%s" % (hostname, pid, options.lockfile_extension)
394 lock_name = self.mbox_file_name + options.lockfile_extension
395 vprint("trying to create dotlock file '%s'" % lock_name)
396 try:
397 plfd, prelock_name = tempfile.mkstemp(prelock_suffix, prelock_prefix,
398 dir=box_dir)
399 except OSError, e:
400 if e.errno == errno.EACCES:
401 if not options.quiet:
402 user_warning("no write permissions: omitting dotlock for '%s'" % \
403 self.mbox_file_name)
404 self._use_dotlock = False
405 return
406 raise
407 try:
408 try:
409 os.link(prelock_name, lock_name)
410 # We've got the lock.
411 except OSError, e:
412 if os.fstat(plfd)[stat.ST_NLINK] == 2:
413 # The Linux man page for open(2) claims that in this
414 # case we have actually succeeded to create the link,
415 # and this assumption seems to be folklore.
416 # So we've got the lock.
417 pass
418 elif e.errno == errno.EEXIST:
419 raise LockUnavailable("Dotlock for '%s' unavailable" % self.mbox_file_name)
420 else:
421 raise
422 _stale.dotlock_files.append(lock_name)
423 finally:
424 os.close(plfd)
425 os.unlink(prelock_name)
426 vprint("acquired lockfile '%s'" % lock_name)
427
428 def _dotlock_unlock(self):
429 """Delete the dotlock file for the 'mbox' mailbox."""
430 if not self._use_dotlock:
431 return
432 lock_name = self.mbox_file_name + options.lockfile_extension
433 vprint("removing lockfile '%s'" % lock_name)
434 os.remove(lock_name)
435 _stale.dotlock_files.remove(lock_name)
436
437 def commit(self):
438 """Sync the mbox file to disk."""
439 self.mbox_file.flush()
440 os.fsync(self.mbox_file.fileno())
441
442 def close(self):
443 """Close the mbox file"""
444 vprint("closing file '%s'" % self.mbox_file_name)
445 assert not self._locked
446 self.mbox_file.close()
447
448
449 class Mbox(mailbox.UnixMailbox, LockableMboxMixin):
450 """A mostly-read-only mbox with locking. The mbox content can only be
451 modified by overwriting the entire underlying file."""
452
453 def __init__(self, path):
454 """Constructor for opening an existing 'mbox' mailbox.
455 Extends constructor for mailbox.UnixMailbox()
456
457 Named Arguments:
458 path -- file name of the 'mbox' file to be opened
459 """
460 assert path
461 fd = safe_open_existing(path)
462 st = os.fstat(fd)
463 self.original_atime = st.st_atime
464 self.original_mtime = st.st_mtime
465 self.starting_size = st.st_size
466 self.mbox_file = os.fdopen(fd, "r+")
467 self.mbox_file_name = path
468 LockableMboxMixin.__init__(self, self.mbox_file, path)
469 mailbox.UnixMailbox.__init__(self, self.mbox_file)
470
471 def reset_timestamps(self):
472 """Set the file timestamps to the original values"""
473 assert self.original_atime
474 assert self.original_mtime
475 assert self.mbox_file_name
476 os.utime(self.mbox_file_name, (self.original_atime, \
477 self.original_mtime))
478
479 def get_size(self):
480 """Return the current size of the mbox file on disk"""
481 return os.path.getsize(self.mbox_file_name)
482
483 def overwrite_with(self, mbox_filename):
484 """Overwrite the mbox content with the content of the given mbox file."""
485 fin = open(mbox_filename, "r")
486 self.mbox_file.seek(0)
487 shutil.copyfileobj(fin, self.mbox_file)
488 self.mbox_file.truncate()
489
490
491 class ArchiveMbox(LockableMboxMixin):
492 """Simple append-only access to the archive mbox. Entirely content-agnostic."""
493
494 def __init__(self, path):
495 fd = safe_open(path)
496 self.mbox_file = os.fdopen(fd, "a")
497 LockableMboxMixin.__init__(self, self.mbox_file, path)
498
499 def append(self, filename):
500 """Append the content of the given file to the mbox."""
501 assert self._locked
502 fin = open(filename, "r")
503 oldsize = os.fstat(self.mbox_file.fileno()).st_size
504 try:
505 shutil.copyfileobj(fin, self.mbox_file)
506 except:
507 # We can safely abort here without data loss, because
508 # we have not yet changed the original mailbox
509 self.mbox_file.truncate(oldsize)
510 raise
511 fin.close()
512
513
514 class TempMbox:
515 """A write-only temporary mbox. No locking methods."""
516
517 def __init__(self, prefix=tempfile.template):
518 """Creates a temporary mbox file."""
519 fd, filename = tempfile.mkstemp(prefix=prefix)
520 self.mbox_file_name = filename
521 _stale.temp_mboxes.append(filename)
522 self.mbox_file = os.fdopen(fd, "w")
523 # an empty gzip file is not really empty (it contains the gzip header
524 # and trailer), so we need to track manually if this mbox is empty
525 self.empty = True
526
527 def write(self, msg):
528 """Write a rfc822 message object to the 'mbox' mailbox.
529 If the rfc822 has no Unix 'From_' line, then one is constructed
530 from other headers in the message.
531
532 Arguments:
533 msg -- rfc822 message object to be written
534
535 """
536 assert msg
537 assert self.mbox_file
538
539 self.empty = False
540 vprint("saving message to file '%s'" % self.mbox_file_name)
541 unix_from = msg.unixfrom
542 if unix_from:
543 msg_has_mbox_format = True
544 else:
545 msg_has_mbox_format = False
546 unix_from = make_mbox_from(msg)
547 self.mbox_file.write(unix_from)
548 assert msg.headers
549 self.mbox_file.writelines(msg.headers)
550 self.mbox_file.write(os.linesep)
551
552 # The following while loop is about twice as fast in
553 # practice to 'self.mbox_file.writelines(msg.fp.readlines())'
554 assert options.read_buffer_size > 0
555 linebuf = ""
556 while True:
557 body = msg.fp.read(options.read_buffer_size)
558 if (not msg_has_mbox_format) and options.mangle_from:
559 # Be careful not to break pattern matching
560 splitindex = body.rfind(os.linesep)
561 nicebody = linebuf + body[:splitindex]
562 linebuf = body[splitindex:]
563 body = from_re.sub('>From ', nicebody)
564 if not body:
565 break
566 self.mbox_file.write(body)
567 if not msg_has_mbox_format:
568 self.mbox_file.write(os.linesep)
569
570 def commit(self):
571 """Sync the mbox file to disk."""
572 self.mbox_file.flush()
573 os.fsync(self.mbox_file.fileno())
574
575 def close(self):
576 """Close the mbox file"""
577 vprint("closing file '%s'" % self.mbox_file_name)
578 self.mbox_file.close()
579
580 def saveas(self, filename):
581 """Rename this temporary mbox file to the given name, making it
582 permanent. Emergency use only."""
583 os.rename(self.mbox_file_name, filename)
584 _stale.temp_mboxes.remove(self.mbox_file_name)
585
586 def remove(self):
587 """Delete the temporary mbox file."""
588 os.remove(self.mbox_file_name)
589 _stale.temp_mboxes.remove(self.mbox_file_name)
590
591
592 class CompressedTempMbox(TempMbox):
593 """A compressed version of a TempMbox."""
594
595 def __init__(self, prefix=tempfile.template):
596 TempMbox.__init__(self, prefix)
597 self.raw_file = self.mbox_file
598 self.mbox_file = gzip.GzipFile(mode="a", fileobj=self.mbox_file)
599 # Workaround that GzipFile.close() isn't idempotent in Python < 2.6
600 # (python issue #2959). There is no GzipFile.closed, so we need a
601 # replacement.
602 self.gzipfile_closed = False
603
604 def commit(self):
605 """Finish gzip file and sync it to disk."""
606 # This method is currently not used
607 self.mbox_file.close() # close GzipFile, writing gzip trailer
608 self.gzipfile_closed = True
609 self.raw_file.flush()
610 os.fsync(self.raw_file.fileno())
611
612 def close(self):
613 """Close the gzip file."""
614 if not self.gzipfile_closed:
615 self.mbox_file.close()
616 self.raw_file.close()
617
618
619 class IdentityCache:
620 """Class used to remember Message-IDs and warn if they are seen twice"""
621 seen_ids = {}
622 mailbox_name = None
623
624 def __init__(self, mailbox_name):
625 """Constructor: takes the mailbox name as an argument"""
626 assert mailbox_name
627 self.mailbox_name = mailbox_name
628
629 def warn_if_dupe(self, msg):
630 """Print a warning message if the message has already appeared"""
631 assert msg
632 message_id = msg.get('Message-ID')
633 assert message_id
634 if self.seen_ids.has_key(message_id):
635 user_warning("duplicate message id: '%s' in mailbox '%s'" %
636 (message_id, self.mailbox_name))
637 self.seen_ids[message_id] = True
638
639
640 # global class instances
641 options = Options() # the run-time options object
642 _stale = StaleFiles() # remember what we have to delete on abnormal exit
643
644
645 def main(args = sys.argv[1:]):
646 global _stale
647
648 # this usage message is longer than 24 lines -- bad idea?
649 usage = """Usage: %s [options] mailbox [mailbox...]
650 Moves old mail in IMAP, mbox, MH or maildir-format mailboxes to an mbox-format
651 mailbox compressed with gzip.
652
653 Options are as follows:
654 -d, --days=NUM archive messages older than NUM days (default: %d)
655 -D, --date=DATE archive messages older than DATE
656 -o, --output-dir=DIR directory to store archives (default: same as original)
657 -P, --pwfile=FILE file to read imap password from (default: None)
658 -F, --filter-append=STRING append arbitrary string to the IMAP filter string
659 -p, --prefix=NAME prefix for archive filename (default: none)
660 -s, --suffix=NAME suffix for archive filename (default: '%s')
661 -a, --archive-name=NAME specify complete archive filename
662 -S, --size=NUM only archive messages NUM bytes or larger
663 -n, --dry-run don't write to anything - just show what would be done
664 -u, --preserve-unread never archive unread messages
665 --dont-mangle do not mangle From_ in message bodies
666 --delete delete rather than archive old mail (use with caution!)
667 --copy copy rather than archive old mail
668 --include-flagged messages flagged important can also be archived
669 --all archive all messages
670 --no-compress do not compress archives with gzip
671 --warn-duplicate warn about duplicate Message-IDs in the same mailbox
672 -v, --verbose report lots of extra debugging information
673 --debug-imap=NUM set IMAP debugging output level (0 is none)
674 -q, --quiet quiet mode - print no statistics (suitable for crontab)
675 -V, --version display version information
676 -h, --help display this message
677
678 Example: %s linux-kernel
679 This will move all messages older than %s days to a 'mbox' mailbox called
680 'linux-kernel_archive.gz', deleting them from the original 'linux-kernel'
681 mailbox. If the 'linux-kernel_archive.gz' mailbox already exists, the
682 newly archived messages are appended.
683
684 To archive IMAP mailboxes, format your mailbox argument like this:
685 imap://username:password@server/mailbox
686 (substitute 'imap' with 'imaps' for an SSL connection)
687
688 Website: http://archivemail.sourceforge.net/ """ % \
689 (options.script_name, options.days_old_max, options.archive_suffix,
690 options.script_name, options.days_old_max)
691
692 args = options.parse_args(args, usage)
693 if len(args) == 0:
694 print usage
695 sys.exit(1)
696
697 options.sanity_check(args)
698
699 for mailbox_path in args:
700 archive(mailbox_path)
701
702
703 ######## errors and debug ##########
704
705 def vprint(string):
706 """Print the string argument if we are in verbose mode"""
707 if options.verbose:
708 print string
709
710
711 def unexpected_error(string):
712 """Print the string argument, a 'shutting down' message and abort. Raise
713 UnexpectedErrors if archivemail is run as a module. This function never
714 returns."""
715 if not __name__ == '__main__':
716 raise UnexpectedError(string)
717 sys.stderr.write("%s: %s\n" % (options.script_name, string))
718 sys.stderr.write("%s: unexpected error encountered - shutting down\n" %
719 options.script_name)
720 sys.exit(1)
721
722
723 def user_error(string):
724 """Print the string argument and abort. Raise UserError if archivemail is
725 run as a module. This function never returns."""
726 if not __name__ == '__main__':
727 raise UserError(string)
728 sys.stderr.write("%s: %s\n" % (options.script_name, string))
729 sys.exit(1)
730
731
732 def user_warning(string):
733 """Print the string argument"""
734 sys.stderr.write("%s: Warning - %s\n" % (options.script_name, string))
735
736 ########### operations on a message ############
737
738 def make_mbox_from(message):
739 """Return a string suitable for use as a 'From_' mbox header for the
740 message.
741
742 Arguments:
743 message -- the rfc822 message object
744
745 """
746 assert message
747 address = guess_return_path(message)
748 time_message = guess_delivery_time(message)
749 date = time.localtime(time_message)
750 assert date
751 date_string = time.asctime(date)
752 mbox_from = "From %s %s\n" % (address, date_string)
753 return mbox_from
754
755
756 def guess_return_path(message):
757 """Return a guess at the Return Path address of an rfc822 message"""
758 assert message
759
760 for header in ('Return-path', 'From'):
761 address_header = message.get(header)
762 if address_header:
763 (name, address) = rfc822.parseaddr(address_header)
764 if address:
765 return address
766 # argh, we can't find any valid 'Return-path' guesses - just
767 # just use the current unix username like mutt does
768 login = pwd.getpwuid(os.getuid())[0]
769 assert login
770 return login
771
772
773 def guess_delivery_time(message):
774 """Return a guess at the delivery date of an rfc822 message"""
775 assert message
776 # try to guess the delivery date from various headers
777 # get more desparate as we go through the array
778 for header in 'Delivery-date', 'Received', 'Resent-Date', 'Date':
779 try:
780 if header == 'Received':
781 # This should be good enough for almost all headers in the wild;
782 # if we're guessing wrong, parsedate_tz() will fail graciously.
783 token = message.getrawheader(header).rsplit(';', 1)[-1]
784 else:
785 token = message.get(header)
786 date = rfc822.parsedate_tz(token)
787 if date:
788 time_message = rfc822.mktime_tz(date)
789 vprint("using valid time found from '%s' header" % header)
790 return time_message
791 except (AttributeError, IndexError, ValueError, OverflowError): pass
792 # as a second-last resort, try the date from the 'From_' line (ugly)
793 # this will only work from a mbox-format mailbox
794 if (message.unixfrom):
795 # Hmm. This will break with full-blown RFC 2822 addr-spec's.
796 header = message.unixfrom.split(None, 2)[-1]
797 # Interpret no timezone as localtime
798 date = rfc822.parsedate_tz(header)
799 if date:
800 try:
801 time_message = rfc822.mktime_tz(date)
802 vprint("using valid time found from unix 'From_' header")
803 return time_message
804 except (ValueError, OverflowError): pass
805 # the headers have no valid dates -- last resort, try the file timestamp
806 # this will not work for mbox mailboxes
807 try:
808 file_name = get_filename(message)
809 except AttributeError:
810 # we are looking at a 'mbox' mailbox - argh!
811 # Just return the current time - this will never get archived :(
812 vprint("no valid times found at all -- using current time!")
813 return time.time()
814 if not os.path.isfile(file_name):
815 unexpected_error("mailbox file name '%s' has gone missing" % \
816 file_name)
817 time_message = os.path.getmtime(file_name)
818 vprint("using valid time found from '%s' last-modification time" % \
819 file_name)
820 return time_message
821
822
823 def add_status_headers(message):
824 """
825 Add Status and X-Status headers to a message from a maildir mailbox.
826
827 Maildir messages store their information about being read/replied/etc in
828 the suffix of the filename rather than in Status and X-Status headers in
829 the message. In order to archive maildir messages into mbox format, it is
830 nice to preserve this information by putting it into the status headers.
831
832 """
833 status = ""
834 x_status = ""
835 file_name = get_filename(message)
836 match = re.search(":2,(.+)$", file_name)
837 if match:
838 flags = match.group(1)
839 for flag in flags:
840 if flag == "D": # (draft): the user considers this message a draft
841 pass # does this make any sense in mbox?
842 elif flag == "F": # (flagged): user-defined 'important' flag
843 x_status = x_status + "F"
844 elif flag == "R": # (replied): the user has replied to this message
845 x_status = x_status + "A"
846 elif flag == "S": # (seen): the user has viewed this message
847 status = status + "R"
848 elif flag == "T": # (trashed): user has moved this message to trash
849 pass # is this Status: D ?
850 else:
851 pass # no whingeing here, although it could be a good experiment
852
853 # files in the maildir 'cur' directory are no longer new,
854 # they are the same as messages with 'Status: O' headers in mbox
855 last_dir = os.path.basename(os.path.dirname(file_name))
856 if last_dir == "cur":
857 status = status + "O"
858
859 # Overwrite existing 'Status' and 'X-Status' headers. They add no value in
860 # maildirs, and we better don't listen to them.
861 if status:
862 vprint("converting maildir status into Status header '%s'" % status)
863 message['Status'] = status
864 else:
865 del message['Status']
866 if x_status:
867 vprint("converting maildir status into X-Status header '%s'" % x_status)
868 message['X-Status'] = x_status
869 else:
870 del message['X-Status']
871
872 def add_status_headers_imap(message, flags):
873 """Add Status and X-Status headers to a message from an imap mailbox."""
874 status = ""
875 x_status = ""
876 for flag in flags:
877 if flag == "\\Draft": # (draft): the user considers this message a draft
878 pass # does this make any sense in mbox?
879 elif flag == "\\Flagged": # (flagged): user-defined 'important' flag
880 x_status = x_status + "F"
881 elif flag == "\\Answered": # (replied): the user has replied to this message
882 x_status = x_status + "A"
883 elif flag == "\\Seen": # (seen): the user has viewed this message
884 status = status + "R"
885 elif flag == "\\Deleted": # (trashed): user has moved this message to trash
886 pass # is this Status: D ?
887 else:
888 pass # no whingeing here, although it could be a good experiment
889 if not "\\Recent" in flags:
890 status = status + "O"
891
892 # As with maildir folders, overwrite Status and X-Status headers
893 # if they exist.
894 vprint("converting imap status (%s)..." % " ".join(flags))
895 if status:
896 vprint("generating Status header '%s'" % status)
897 message['Status'] = status
898 else:
899 vprint("not generating Status header")
900 del message['Status']
901 if x_status:
902 vprint("generating X-Status header '%s'" % x_status)
903 message['X-Status'] = x_status
904 else:
905 vprint("not generating X-Status header")
906 del message['X-Status']
907
908 def is_flagged(message):
909 """return true if the message is flagged important, false otherwise"""
910 # MH and mbox mailboxes use the 'X-Status' header to indicate importance
911 x_status = message.get('X-Status')
912 if x_status and re.search('F', x_status):
913 vprint("message is important (X-Status header='%s')" % x_status)
914 return True
915 file_name = None
916 try:
917 file_name = get_filename(message)
918 except AttributeError:
919 pass
920 # maildir mailboxes use the filename suffix to indicate flagged status
921 if file_name and re.search(":2,.*F.*$", file_name):
922 vprint("message is important (filename info has 'F')")
923 return True
924 vprint("message is not flagged important")
925 return False
926
927
928 def is_unread(message):
929 """return true if the message is unread, false otherwise"""
930 # MH and mbox mailboxes use the 'Status' header to indicate read status
931 status = message.get('Status')
932 if status and re.search('R', status):
933 vprint("message has been read (status header='%s')" % status)
934 return False
935 file_name = None
936 try:
937 file_name = get_filename(message)
938 except AttributeError:
939 pass
940 # maildir mailboxes use the filename suffix to indicate read status
941 if file_name and re.search(":2,.*S.*$", file_name):
942 vprint("message has been read (filename info has 'S')")
943 return False
944 vprint("message is unread")
945 return True
946
947
948 def sizeof_message(message):
949 """Return size of message in bytes (octets)."""
950 assert message
951 file_name = None
952 message_size = None
953 try:
954 file_name = get_filename(message)
955 except AttributeError:
956 pass
957 if file_name:
958 # with maildir and MH mailboxes, we can just use the file size
959 message_size = os.path.getsize(file_name)
960 else:
961 # with mbox mailboxes, not so easy
962 message_size = 0
963 if message.unixfrom:
964 message_size = message_size + len(message.unixfrom)
965 for header in message.headers:
966 message_size = message_size + len(header)
967 message_size = message_size + 1 # the blank line after the headers
968 start_offset = message.fp.tell()
969 message.fp.seek(0, 2) # seek to the end of the message
970 end_offset = message.fp.tell()
971 message.rewindbody()
972 message_size = message_size + (end_offset - start_offset)
973 return message_size
974
975 def is_smaller(message, size):
976 """Return true if the message is smaller than size bytes, false otherwise"""
977 assert message
978 assert size > 0
979 message_size = sizeof_message(message)
980 if message_size < size:
981 vprint("message is too small (%d bytes), minimum bytes : %d" % \
982 (message_size, size))
983 return True
984 else:
985 vprint("message is not too small (%d bytes), minimum bytes: %d" % \
986 (message_size, size))
987 return False
988
989
990 def should_archive(message):
991 """Return true if we should archive the message, false otherwise"""
992 if options.archive_all:
993 return True
994 old = False
995 time_message = guess_delivery_time(message)
996 if options.date_old_max == None:
997 old = is_older_than_days(time_message, options.days_old_max)
998 else:
999 old = is_older_than_time(time_message, options.date_old_max)
1000
1001 # I could probably do this in one if statement, but then I wouldn't
1002 # understand it.
1003 if not old:
1004 return False
1005 if not options.include_flagged and is_flagged(message):
1006 return False
1007 if options.min_size and is_smaller(message, options.min_size):
1008 return False
1009 if options.preserve_unread and is_unread(message):
1010 return False
1011 return True
1012
1013
1014 def is_older_than_time(time_message, max_time):
1015 """Return true if a message is older than the specified time,
1016 false otherwise.
1017
1018 Arguments:
1019 time_message -- the delivery date of the message measured in seconds
1020 since the epoch
1021 max_time -- maximum time allowed for message
1022
1023 """
1024 days_old = (max_time - time_message) / 24 / 60 / 60
1025 if time_message < max_time:
1026 vprint("message is %.2f days older than the specified date" % days_old)
1027 return True
1028 vprint("message is %.2f days younger than the specified date" % \
1029 abs(days_old))
1030 return False
1031
1032
1033 def is_older_than_days(time_message, max_days):
1034 """Return true if a message is older than the specified number of days,
1035 false otherwise.
1036
1037 Arguments:
1038 time_message -- the delivery date of the message measured in seconds
1039 since the epoch
1040 max_days -- maximum number of days before message is considered old
1041 """
1042 time_now = time.time()
1043 if time_message > time_now:
1044 vprint("warning: message has date in the future")
1045 return False
1046 secs_old_max = (max_days * 24 * 60 * 60)
1047 days_old = (time_now - time_message) / 24 / 60 / 60
1048 vprint("message is %.2f days old" % days_old)
1049 if ((time_message + secs_old_max) < time_now):
1050 return True
1051 return False
1052
1053 def build_imap_filter():
1054 """Return an imap filter string"""
1055
1056 imap_filter = []
1057 if options.date_old_max == None:
1058 time_now = time.time()
1059 secs_old_max = (options.days_old_max * 24 * 60 * 60)
1060 time_old = time.gmtime(time_now - secs_old_max)
1061 else:
1062 time_old = time.gmtime(options.date_old_max)
1063 time_str = time.strftime('%d-%b-%Y', time_old)
1064 imap_filter.append("BEFORE %s" % time_str)
1065
1066 if not options.include_flagged:
1067 imap_filter.append("UNFLAGGED")
1068 if options.min_size:
1069 imap_filter.append("LARGER %d" % options.min_size)
1070 if options.preserve_unread:
1071 imap_filter.append("SEEN")
1072 if options.filter_append:
1073 imap_filter.append(options.filter_append)
1074
1075 return '(' + string.join(imap_filter, ' ') + ')'
1076
1077 ############### mailbox operations ###############
1078
1079 def archive(mailbox_name):
1080 """Archives a mailbox.
1081
1082 Arguments:
1083 mailbox_name -- the filename/dirname/url of the mailbox to be archived
1084 """
1085 assert mailbox_name
1086
1087 # strip any trailing slash (we could be archiving a maildir or MH format
1088 # mailbox and somebody was pressing <tab> in bash) - we don't want to use
1089 # the trailing slash in the archive name
1090 mailbox_name = mailbox_name.rstrip("/")
1091 assert mailbox_name
1092
1093 set_signal_handlers()
1094 os.umask(077) # saves setting permissions on mailboxes/tempfiles
1095
1096 vprint("processing '%s'" % mailbox_name)
1097 is_imap = urlparse.urlparse(mailbox_name)[0] in ('imap', 'imaps')
1098 if not is_imap:
1099 # Check if the mailbox exists, and refuse to mess with other people's
1100 # stuff
1101 try:
1102 fuid = os.stat(mailbox_name).st_uid
1103 except OSError, e:
1104 user_error(str(e))
1105 else:
1106 if fuid != os.getuid():
1107 user_error("'%s' is owned by someone else!" % mailbox_name)
1108
1109 old_temp_dir = tempfile.tempdir
1110 try:
1111 # create a temporary directory for us to work in securely
1112 tempfile.tempdir = None
1113 new_temp_dir = tempfile.mkdtemp('archivemail')
1114 assert new_temp_dir
1115 _stale.temp_dir = new_temp_dir
1116 tempfile.tempdir = new_temp_dir
1117 vprint("set tempfile directory to '%s'" % new_temp_dir)
1118
1119 if is_imap:
1120 vprint("guessing mailbox is of type: imap(s)")
1121 _archive_imap(mailbox_name)
1122 elif os.path.isfile(mailbox_name):
1123 vprint("guessing mailbox is of type: mbox")
1124 _archive_mbox(mailbox_name)
1125 elif os.path.isdir(mailbox_name):
1126 cur_path = os.path.join(mailbox_name, "cur")
1127 new_path = os.path.join(mailbox_name, "new")
1128 if os.path.isdir(cur_path) and os.path.isdir(new_path):
1129 vprint("guessing mailbox is of type: maildir")
1130 _archive_dir(mailbox_name, "maildir")
1131 else:
1132 vprint("guessing mailbox is of type: MH")
1133 _archive_dir(mailbox_name, "mh")
1134 else:
1135 user_error("'%s' is not a normal file or directory" % mailbox_name)
1136
1137 # remove our special temp directory - hopefully empty
1138 os.rmdir(new_temp_dir)
1139 _stale.temp_dir = None
1140
1141 finally:
1142 tempfile.tempdir = old_temp_dir
1143 clean_up()
1144
1145 def _archive_mbox(mailbox_name):
1146 """Archive a 'mbox' style mailbox - used by archive_mailbox()"""
1147 assert mailbox_name
1148 final_archive_name = make_archive_name(mailbox_name)
1149 vprint("archiving '%s' to '%s' ..." % (mailbox_name, final_archive_name))
1150 check_archive(final_archive_name)
1151 stats = Stats(mailbox_name, final_archive_name)
1152 cache = IdentityCache(mailbox_name)
1153 original = Mbox(path=mailbox_name)
1154 if options.dry_run or options.copy_old_mail:
1155 retain = None
1156 else:
1157 retain = TempMbox(prefix="retain")
1158 archive = prepare_temp_archive()
1159
1160 original.lock()
1161 msg = original.next()
1162 if not msg and (original.starting_size > 0):
1163 user_error("'%s' is not a valid mbox-format mailbox" % mailbox_name)
1164 if msg and 'X-IMAP' in msg:
1165 # Dovecot and UW-IMAP pseudo message for mailbox meta data
1166 vprint("detected IMAP pseudo message")
1167 if retain:
1168 retain.write(msg)
1169 msg = original.next()
1170 while (msg):
1171 msg_size = sizeof_message(msg)
1172 stats.another_message(msg_size)
1173 vprint("processing message '%s'" % msg.get('Message-ID'))
1174 if options.warn_duplicates:
1175 cache.warn_if_dupe(msg)
1176 if should_archive(msg):
1177 stats.another_archived(msg_size)
1178 if options.delete_old_mail:
1179 vprint("decision: delete message")
1180 else:
1181 vprint("decision: archive message")
1182 if archive:
1183 archive.write(msg)
1184 else:
1185 vprint("decision: retain message")
1186 if retain:
1187 retain.write(msg)
1188 msg = original.next()
1189 vprint("finished reading messages")
1190 if original.starting_size != original.get_size():
1191 unexpected_error("the mailbox '%s' changed size during reading!" % \
1192 mailbox_name)
1193 # Write the new archive before modifying the mailbox, to prevent
1194 # losing data if something goes wrong
1195 commit_archive(archive, final_archive_name)
1196 if retain:
1197 pending_changes = original.mbox_file.tell() != retain.mbox_file.tell()
1198 if pending_changes:
1199 retain.commit()
1200 retain.close()
1201 vprint("writing back changed mailbox '%s'..." % \
1202 original.mbox_file_name)
1203 # Prepare for recovery on error.
1204 # FIXME: tempfile.tempdir is our nested dir.
1205 saved_name = "%s/%s.%s.%s-%s-%s" % \
1206 (tempfile.tempdir, options.script_name,
1207 os.path.basename(original.mbox_file_name),
1208 socket.gethostname(), os.getuid(),
1209 os.getpid())
1210 try:
1211 original.overwrite_with(retain.mbox_file_name)
1212 original.commit()
1213 except:
1214 retain.saveas(saved_name)
1215 print "Error writing back changed mailbox; saved good copy to " \
1216 "%s" % saved_name
1217 raise
1218 else:
1219 retain.close()
1220 vprint("no changes to mbox '%s'" % original.mbox_file_name)
1221 retain.remove()
1222 original.unlock()
1223 original.close()
1224 original.reset_timestamps() # Minor race here; mutt has this too.
1225 if not options.quiet:
1226 stats.display()
1227
1228
1229 def _archive_dir(mailbox_name, type):
1230 """Archive a 'maildir' or 'MH' style mailbox - used by archive_mailbox()"""
1231 assert mailbox_name
1232 assert type
1233 final_archive_name = make_archive_name(mailbox_name)
1234 vprint("archiving '%s' to '%s' ..." % (mailbox_name, final_archive_name))
1235 check_archive(final_archive_name)
1236 stats = Stats(mailbox_name, final_archive_name)
1237 delete_queue = []
1238
1239 if type == "maildir":
1240 original = mailbox.Maildir(mailbox_name)
1241 elif type == "mh":
1242 original = mailbox.MHMailbox(mailbox_name)
1243 else:
1244 unexpected_error("unknown type: %s" % type)
1245 cache = IdentityCache(mailbox_name)
1246 archive = prepare_temp_archive()
1247
1248 for msg in original:
1249 if not msg:
1250 vprint("ignoring invalid message '%s'" % get_filename(msg))
1251 continue
1252 msg_size = sizeof_message(msg)
1253 stats.another_message(msg_size)
1254 vprint("processing message '%s'" % msg.get('Message-ID'))
1255 if options.warn_duplicates:
1256 cache.warn_if_dupe(msg)
1257 if should_archive(msg):
1258 stats.another_archived(msg_size)
1259 if options.delete_old_mail:
1260 vprint("decision: delete message")
1261 else:
1262 vprint("decision: archive message")
1263 if archive:
1264 if type == "maildir":
1265 add_status_headers(msg)
1266 archive.write(msg)
1267 if not options.dry_run and not options.copy_old_mail:
1268 delete_queue.append(get_filename(msg))
1269 else:
1270 vprint("decision: retain message")
1271 vprint("finished reading messages")
1272 # Write the new archive before modifying the mailbox, to prevent
1273 # losing data if something goes wrong
1274 commit_archive(archive, final_archive_name)
1275 for file_name in delete_queue:
1276 vprint("removing original message: '%s'" % file_name)
1277 try: os.remove(file_name)
1278 except OSError, e:
1279 if e.errno != errno.ENOENT: raise
1280 if not options.quiet:
1281 stats.display()
1282
1283 def _archive_imap(mailbox_name):
1284 """Archive an imap mailbox - used by archive_mailbox()"""
1285 assert mailbox_name
1286 import imaplib
1287 import cStringIO
1288 import getpass
1289
1290 vprint("Setting imaplib.Debug = %d" % options.debug_imap)
1291 imaplib.Debug = options.debug_imap
1292 archive = None
1293 imap_username, imap_password, \
1294 imap_server, imap_server_port, \
1295 imap_folder_pattern = parse_imap_url(mailbox_name)
1296 if not imap_password:
1297 if options.pwfile:
1298 imap_password = open(options.pwfile).read().rstrip()
1299 else:
1300 if (not os.isatty(sys.stdin.fileno())) or options.quiet:
1301 unexpected_error("No imap password specified")
1302 imap_password = getpass.getpass('IMAP password: ')
1303
1304 is_ssl = mailbox_name[:5].lower() == 'imaps'
1305 if is_ssl:
1306 vprint("establishing secure connection to server %s, port %s" %
1307 (imap_server, imap_server_port))
1308 imap_srv = imaplib.IMAP4_SSL(imap_server, imap_server_port)
1309 else:
1310 vprint("establishing connection to server %s, port %s" %
1311 (imap_server, imap_server_port))
1312 imap_srv = imaplib.IMAP4(imap_server, imap_server_port)
1313 if "AUTH=CRAM-MD5" in imap_srv.capabilities:
1314 vprint("authenticating (cram-md5) to server as %s" % imap_username)
1315 result, response = imap_srv.login_cram_md5(imap_username, imap_password)
1316 elif not "LOGINDISABLED" in imap_srv.capabilities:
1317 vprint("logging in to server as %s" % imap_username)
1318 result, response = imap_srv.login(imap_username, imap_password)
1319 else:
1320 user_error("imap server %s has login disabled (hint: "
1321 "try ssl/imaps)" % imap_server)
1322
1323 mailboxes = imap_find_mailboxes(imap_srv, imap_folder_pattern)
1324 for imap_folder in mailboxes:
1325 final_archive_name = make_archive_name(imap_folder)
1326 vprint("archiving mailbox '%s' on IMAP server '%s' to '%s' ..." %
1327 (imap_folder, imap_server, final_archive_name))
1328 check_archive(final_archive_name)
1329 cur_mailbox = mailbox_name[:-len(imap_folder_pattern)] + imap_folder
1330 stats = Stats(cur_mailbox, final_archive_name)
1331 cache = IdentityCache(cur_mailbox)
1332
1333 imap_smart_select(imap_srv, imap_folder)
1334 total_msg_count = int(imap_srv.response("EXISTS")[1][0])
1335 vprint("folder has %d message(s)" % total_msg_count)
1336
1337 # IIUIC the message sequence numbers are stable for the whole session, since
1338 # we just send SEARCH, FETCH and STORE commands, which should prevent the
1339 # server from sending untagged EXPUNGE responses -- see RFC 3501 (IMAP4rev1)
1340 # 7.4.1 and RFC 2180 (Multi-Accessed Mailbox Practice).
1341 # Worst thing should be that we bail out FETCHing a message that has been
1342 # deleted.
1343
1344 if options.archive_all:
1345 message_list = [str(n) for n in range(1, total_msg_count+1)]
1346 else:
1347 imap_filter = build_imap_filter()
1348 vprint("imap filter: '%s'" % imap_filter)
1349 vprint("searching messages matching criteria")
1350 result, response = imap_srv.search(None, imap_filter)
1351 if result != 'OK': unexpected_error("imap search failed; server says '%s'" %
1352 response[0])
1353 if response[0] is not None:
1354 # response is a list with a single item, listing message
1355 # sequence numbers like ['1 2 3 1016']
1356 message_list = response[0].split()
1357 else:
1358 # Broken server has sent no untagged response; assume empty result set.
1359 message_list = []
1360 vprint("%d messages are matching filter" % len(message_list))
1361
1362 # First, gather data for the statistics.
1363 if total_msg_count > 0 and not options.quiet:
1364 vprint("fetching size of messages...")
1365 result, response = imap_srv.fetch('1:*', '(RFC822.SIZE)')
1366 if result != 'OK': unexpected_error("Failed to fetch message sizes; "
1367 "server says '%s'" % response[0])
1368 # response is a list with entries like '1016 (RFC822.SIZE 3118)',
1369 # where the first number is the message sequence number, the second is
1370 # the size.
1371 for x in response:
1372 m = imapsize_re.match(x)
1373 msn, msg_size = m.group('msn'), int(m.group('size'))
1374 stats.another_message(msg_size)
1375 if msn in message_list:
1376 stats.another_archived(msg_size)
1377
1378 if not options.dry_run:
1379 if not options.delete_old_mail:
1380 archive = prepare_temp_archive()
1381 vprint("fetching messages...")
1382 for msn in message_list:
1383 # Fetching message flags and body together always finds \Seen
1384 # set. To check \Seen, we must fetch the flags first.
1385 result, response = imap_srv.fetch(msn, '(FLAGS)')
1386 if result != 'OK': unexpected_error("Failed to fetch message "
1387 "flags; server says '%s'" % response[0])
1388 msg_flags = imaplib.ParseFlags(response[0])
1389 result, response = imap_srv.fetch(msn, '(RFC822)')
1390 if result != 'OK': unexpected_error("Failed to fetch message; "
1391 "server says '%s'" % response[0])
1392 msg_str = response[0][1].replace("\r\n", os.linesep)
1393 msg = rfc822.Message(cStringIO.StringIO(msg_str))
1394 vprint("processing message '%s'" % msg.get('Message-ID'))
1395 add_status_headers_imap(msg, msg_flags)
1396 if options.warn_duplicates:
1397 cache.warn_if_dupe(msg)
1398 archive.write(msg)
1399 commit_archive(archive, final_archive_name)
1400 if not options.copy_old_mail:
1401 vprint("Deleting %s messages" % len(message_list))
1402 # do not delete more than a certain number of messages at a time,
1403 # because the command length is limited. This avoids that servers
1404 # terminate the connection with EOF or TCP RST.
1405 max_delete = 100
1406 for i in range(0, len(message_list), max_delete):
1407 result, response = imap_srv.store( \
1408 string.join(message_list[i:i+max_delete], ','),
1409 '+FLAGS.SILENT', '\\Deleted')
1410 if result != 'OK': unexpected_error("Error while deleting "
1411 "messages; server says '%s'" % response[0])
1412 vprint("Closing mailbox.")
1413 imap_srv.close()
1414 if not options.quiet:
1415 stats.display()
1416 vprint("Terminating connection.")
1417 imap_srv.logout()
1418
1419
1420 ############### IMAP functions ###############
1421
1422
1423 # First, some IMAP modified UTF-7 support functions.
1424
1425 # The modified BASE64 alphabet. 64 characters, each one encodes 6 Bit.
1426 mb64alpha = string.ascii_uppercase + string.ascii_lowercase + string.digits + '+,'
1427
1428 def isprint_ascii(char):
1429 """Test for an ASCII printable character."""
1430 return 0x20 <= ord(char) and ord(char) <= 0x7e
1431
1432 def mod_utf7_encode(ustr):
1433 """Encode unicode string object in modified UTF-7."""
1434
1435 def mb64_encode(tomb64):
1436 """Encode unicode string object as a modified UTF-7 shifted sequence
1437 in modified BASE64."""
1438 u16be = tomb64.encode('utf_16_be')
1439 mb64 = ""
1440 # Process 24-bit blocks, encoding them in 6-bit steps.
1441 for block in [u16be[i:i+3] for i in range(0, len(u16be), 3)]:
1442 idx = 0
1443 shift = 2
1444 for octet in block:
1445 mb64 += mb64alpha[idx | (ord(octet) >> shift)]
1446 idx = (ord(octet) << (6-shift)) & 0x3f
1447 shift += 2
1448 mb64 += mb64alpha[idx]
1449 return mb64
1450
1451 mu7 = ""
1452 tomb64 = u""
1453 for c in ustr:
1454 if not isprint_ascii(c):
1455 tomb64 += c
1456 continue
1457 if tomb64:
1458 mu7 += '&' + mb64_encode(tomb64) + '-'
1459 tomb64 = u""
1460 if c == '&':
1461 mu7 += '&-'
1462 else:
1463 mu7 += str(c)
1464 if tomb64:
1465 mu7 += '&' + mb64_encode(tomb64) + '-'
1466 return mu7
1467
1468 def mod_utf7_decode(mu7):
1469 """Decode a modified UTF-7 encoded string to an unicode string object."""
1470
1471 def mb64_decode(mb64):
1472 """Decode a modified UTF-7 shifted sequence from modified BASE64 to an
1473 unicode string object."""
1474 if not mb64:
1475 # A null shift '&-' decodes to '&'.
1476 return u"&"
1477 u16be = ""
1478 # Process blocks of 4 BASE64 characters, decoding each char to 6 bits.
1479 for block in [mb64[i:i+4] for i in range(0, len(mb64), 4)]:
1480 carrybits = mb64alpha.index(block[0]) << 2
1481 shift = 4
1482 for char in block[1:]:
1483 bits = mb64alpha.index(char)
1484 u16be += chr(carrybits | (bits >> shift))
1485 carrybits = (bits << (8-shift)) & 0xff
1486 shift -= 2
1487 if carrybits:
1488 raise ValueError("Ill-formed modified UTF-7 string: "
1489 "trailing bits in shifted sequence")
1490 return u16be.decode('utf_16_be')
1491
1492 ustr = u""
1493 mb64 = ""
1494 inmb64 = False
1495 for octet in mu7:
1496 if not isprint_ascii(octet):
1497 raise ValueError("Ill-formed modified UTF-7 string: "
1498 "contains non-printable ASCII" % ord(octet))
1499 if not inmb64:
1500 if octet == '&':
1501 inmb64 = True
1502 else:
1503 ustr += octet
1504 continue
1505
1506 if octet in mb64alpha:
1507 mb64 += octet
1508 continue
1509
1510 if octet == '-':
1511 inmb64 = False
1512 ustr += mb64_decode(mb64)
1513 mb64 = ""
1514 else:
1515 break # This triggers the exception below.
1516
1517 if inmb64:
1518 raise ValueError("Ill-formed modified UTF-7 string: "
1519 "unterminated BASE64 sequence")
1520 return ustr
1521
1522
1523 def imap_quote(astring):
1524 """Quote an IMAP `astring' string (see RFC 3501, section "Formal Syntax")."""
1525 if astring.startswith('"') and astring.endswith('"'):
1526 quoted = astring
1527 else:
1528 quoted = '"' + astring.replace('\\', '\\\\').replace('"', '\\"') + '"'
1529 return quoted
1530
1531 def imap_unquote(quoted):
1532 """Un-quote a `quoted' IMAP string (see RFC 3501, section "Formal Syntax")."""
1533 if not (quoted.startswith('"') and quoted.endswith('"')):
1534 unquoted = quoted
1535 else:
1536 unquoted = re.sub(r'\\(\\|")', r'\1', quoted[1:-1])
1537 return unquoted
1538
1539 def parse_imap_url(url):
1540 """Parse IMAP URL and return username, password (if appliciable), servername
1541 and foldername."""
1542
1543 def split_qstr(string, delim):
1544 """Split string once at delim, keeping quoted substring intact.
1545 Strip and unescape quotes where necessary."""
1546 rm = re.match(r'"(.+?(?<!\\))"(.)(.*)', string)
1547 if rm:
1548 a, d, b = rm.groups()
1549 if not d == delim:
1550 raise ValueError
1551 a = a.replace('\\"', '"')
1552 else:
1553 a, b = string.split(delim, 1)
1554 return a, b
1555
1556 scheme, url = url.split('://')
1557 password = None
1558 try:
1559 if options.pwfile:
1560 username, url = split_qstr(url, '@')
1561 else:
1562 try:
1563 username, url = split_qstr(url, ':')
1564 except ValueError:
1565 # request password interactively later
1566 username, url = split_qstr(url, '@')
1567 else:
1568 password, url = split_qstr(url, '@')
1569 server, folder = url.split('/', 1)
1570 except ValueError:
1571 unexpected_error("Invalid IMAP connection string")
1572 try:
1573 server, port = server.split(':')
1574 except ValueError:
1575 if scheme.lower() == 'imap':
1576 port = 143
1577 else:
1578 port = 993
1579 else:
1580 port = int(port)
1581 return username, password, server, port, folder
1582
1583
1584 def imap_getdelim(imap_server):
1585 """Return the IMAP server's hierarchy delimiter. Assumes there is only one."""
1586 # This function will break if the LIST reply doesn't meet our expectations.
1587 # Imaplib and IMAP itself are both little beasts, and I do not know how
1588 # fragile this function will be in the wild.
1589 try:
1590 result, response = imap_server.list(pattern='""')
1591 except ValueError:
1592 # Stolen from offlineimap:
1593 # Some buggy IMAP servers do not respond well to LIST "" ""
1594 # Work around them.
1595 result, response = imap_server.list(pattern='%')
1596 if result != 'OK': unexpected_error("Error listing directory; "
1597 "server says '%s'" % response[0])
1598
1599 # Response should be a list of strings like
1600 # '(\\Noselect \\HasChildren) "." boxname'
1601 # We parse only the first list item and just grab the delimiter.
1602 m = re.match(r'\([^\)]*\) (?P<delim>"."|NIL)', response[0])
1603 if not m:
1604 unexpected_error("imap_getdelim(): cannot parse '%s'" % response[0])
1605 delim = m.group('delim').strip('"')
1606 vprint("Found mailbox hierarchy delimiter: '%s'" % delim)
1607 if delim == "NIL":
1608 return None
1609 return delim
1610
1611
1612 def imap_get_namespace(srv):
1613 """Return the IMAP namespace prefixes and hierarchy delimiters."""
1614 assert 'NAMESPACE' in srv.capabilities
1615 result, response = srv.namespace()
1616 if result != 'OK':
1617 unexpected_error("Cannot retrieve IMAP namespace; server says: '%s'"
1618 % response[0])
1619 vprint("NAMESPACE response: %s" % repr(response[0]))
1620 # Typical response is e.g.
1621 # ['(("INBOX." ".")) NIL (("#shared." ".")("shared." "."))'] or
1622 # ['(("" ".")) NIL NIL'], see RFC 2342.
1623 # Make a reasonable guess parsing this beast.
1624 try:
1625 m = re.match(r'\(\("([^"]*)" (?:"(.)"|NIL)', response[0])
1626 nsprefix, hdelim = m.groups()
1627 except:
1628 print "Cannot parse IMAP NAMESPACE response %s" % repr(response)
1629 raise
1630 return nsprefix, hdelim
1631
1632
1633 def imap_smart_select(srv, mailbox):
1634 """Select the given mailbox on the IMAP server."""
1635 roflag = options.dry_run or options.copy_old_mail
1636 # Work around python bug #1277098 (still pending in python << 2.5)
1637 if not roflag:
1638 roflag = None
1639 if roflag:
1640 vprint("examining imap folder '%s' read-only" % mailbox)
1641 else:
1642 vprint("selecting imap folder '%s'" % mailbox)
1643 imap_mailbox = mod_utf7_encode(mailbox.decode(userencoding))
1644 result, response = srv.select(imap_quote(imap_mailbox), roflag)
1645 if result != 'OK':
1646 unexpected_error("selecting '%s' failed; server says: '%s'." \
1647 % (mailbox, response[0]))
1648 if not roflag:
1649 # Sanity check that we don't silently fail to delete messages.
1650 # As to the following indices: IMAP4.response(key) returns
1651 # a tuple (key, ['<all_items>']) if the key is found, (key, [None])
1652 # otherwise. Imaplib just *loves* to nest trivial lists!
1653 permflags = srv.response("PERMANENTFLAGS")[1][0]
1654 if permflags:
1655 permflags = permflags.strip('()').lower().split()
1656 if not '\\deleted' in permflags:
1657 unexpected_error("Server doesn't allow deleting messages in " \
1658 "'%s'." % mailbox)
1659 elif "IMAP4REV1" in srv.capabilities:
1660 vprint("Suspect IMAP4rev1 server, doesn't send PERMANENTFLAGS " \
1661 "upon SELECT")
1662
1663
1664 def imap_find_mailboxes(srv, mailbox):
1665 """Find matching mailboxes on the IMAP server, correcting an invalid
1666 mailbox path if possible."""
1667 for curbox in imap_guess_mailboxnames(srv, mailbox):
1668 if '%' in curbox or '*' in curbox:
1669 vprint("Looking for mailboxes matching '%s'..." % curbox)
1670 else:
1671 vprint("Looking for mailbox '%s'..." % curbox)
1672 curbox = mod_utf7_encode(curbox.decode(userencoding))
1673 result, response = srv.list(pattern=imap_quote(curbox))
1674 if result != 'OK':
1675 unexpected_error("LIST command failed; " \
1676 "server says: '%s'" % response[0])
1677 # Say we queried for the mailbox "foo".
1678 # Upon success, response is e.g. ['(\\HasChildren) "." foo'].
1679 # Upon failure, response is [None]. Funky imaplib!
1680 if response[0] != None:
1681 break
1682 else:
1683 user_error("Cannot find mailbox '%s' on server." % mailbox)
1684 mailboxes = []
1685 for mailbox_data in response:
1686 if not mailbox_data: # imaplib sometimes returns an empty string
1687 continue
1688 try:
1689 m = re.match(r'\((.*?)\) (?:"."|NIL) (.+)', mailbox_data)
1690 except TypeError:
1691 # May be a literal. For literals, imaplib returns a tuple like
1692 # ('(\\HasNoChildren) "." {12}', 'with "quote"').
1693 m = re.match(r'\((.*?)\) (?:"."|NIL) \{\d+\}$', mailbox_data[0])
1694 if m is None:
1695 unexpected_error("cannot parse LIST reply %s" %
1696 (mailbox_data,))
1697 attrs = m.group(1)
1698 name = mailbox_data[1]
1699 else:
1700 attrs, name = m.groups()
1701 name = imap_unquote(name)
1702 try:
1703 name = mod_utf7_decode(name)
1704 except ValueError:
1705 vprint("Mailbox name '%s' returned by server doesn't look like "
1706 "modified UTF-7" % name)
1707 name = name.decode('utf-8')
1708 name = name.encode(userencoding)
1709 if '\\noselect' in attrs.lower().split():
1710 vprint("skipping not selectable mailbox '%s'" % name)
1711 continue
1712 vprint("Found mailbox '%s'" % name)
1713 mailboxes.append(name)
1714 if not mailboxes:
1715 user_error("No matching folder is selectable")
1716 return mailboxes
1717
1718
1719 def imap_guess_mailboxnames(srv, mailbox):
1720 """Return a list of possible real IMAP mailbox names in descending order
1721 of preference, compiled by prepending an IMAP namespace prefix if necessary,
1722 and by translating hierarchy delimiters."""
1723 if 'NAMESPACE' in srv.capabilities:
1724 nsprefix, hdelim = imap_get_namespace(srv)
1725 else:
1726 vprint("Server doesn't support NAMESPACE command.")
1727 nsprefix = ""
1728 hdelim = imap_getdelim(srv)
1729 vprint("IMAP namespace prefix: '%s', hierarchy delimiter: '%s'" % \
1730 (nsprefix, hdelim))
1731 if mailbox.upper() == "INBOX" or \
1732 (hdelim is not None and mailbox.upper().startswith("INBOX" + hdelim)):
1733 # INBOX is not a real mailbox name, so namespace prefixes do not apply
1734 # to INBOX and its children
1735 boxnames = [mailbox]
1736 elif mailbox.startswith(nsprefix):
1737 boxnames = [mailbox]
1738 else:
1739 boxnames = [nsprefix + mailbox]
1740 if os.path.sep in mailbox and hdelim is not None:
1741 mailbox = mailbox.replace(os.path.sep, hdelim)
1742 if mailbox.upper().startswith("INBOX" + hdelim):
1743 boxnames.append(mailbox)
1744 else:
1745 if mailbox.startswith(nsprefix):
1746 boxnames.append(mailbox)
1747 if nsprefix:
1748 boxnames.append(nsprefix + mailbox)
1749 return boxnames
1750
1751
1752 ############### misc functions ###############
1753
1754
1755 def set_signal_handlers():
1756 """set signal handlers to clean up temporary files on unexpected exit"""
1757 # Make sure we clean up nicely - we don't want to leave stale dotlock
1758 # files about if something bad happens to us. This is quite
1759 # important, even though procmail will delete stale files after a while.
1760 signal.signal(signal.SIGHUP, clean_up_signal) # signal 1
1761 # SIGINT (signal 2) is handled as a python exception
1762 signal.signal(signal.SIGQUIT, clean_up_signal) # signal 3
1763 signal.signal(signal.SIGTERM, clean_up_signal) # signal 15
1764
1765
1766 def clean_up():
1767 """Delete stale files"""
1768 vprint("cleaning up ...")
1769 _stale.clean()
1770
1771
1772 def clean_up_signal(signal_number, stack_frame):
1773 """Delete stale files -- to be registered as a signal handler.
1774
1775 Arguments:
1776 signal_number -- signal number of the terminating signal
1777 stack_frame -- the current stack frame
1778
1779 """
1780 # this will run the above clean_up(), since unexpected_error()
1781 # will abort with sys.exit() and clean_up will be registered
1782 # at this stage
1783 unexpected_error("received signal %s" % signal_number)
1784
1785 def prepare_temp_archive():
1786 """Create temporary archive mbox."""
1787 if options.dry_run or options.delete_old_mail:
1788 return None
1789 if options.no_compress:
1790 return TempMbox()
1791 else:
1792 return CompressedTempMbox()
1793
1794 def commit_archive(archive, final_archive_name):
1795 """Finalize temporary archive and write it to its final destination."""
1796 if not options.no_compress:
1797 final_archive_name = final_archive_name + '.gz'
1798 if archive:
1799 archive.close()
1800 if not archive.empty:
1801 final_archive = ArchiveMbox(final_archive_name)
1802 final_archive.lock()
1803 try:
1804 final_archive.append(archive.mbox_file_name)
1805 final_archive.commit()
1806 finally:
1807 final_archive.unlock()
1808 final_archive.close()
1809 archive.remove()
1810
1811 def make_archive_name(mailbox_name):
1812 """Derive archive name and (relative) path from the mailbox name."""
1813 # allow the user to embed time formats such as '%B' in the archive name
1814 if options.date_old_max == None:
1815 tm = time.localtime(time.time() - options.days_old_max*24*60*60)
1816 else:
1817 tm = time.localtime(options.date_old_max)
1818 prefix = suffix = ""
1819 if options.archive_name:
1820 archive_head = ""
1821 archive_tail = time.strftime(options.archive_name, tm)
1822 else:
1823 if options.archive_prefix is None and options.archive_suffix is None:
1824 suffix = options.archive_default_suffix
1825 else:
1826 if options.archive_prefix:
1827 prefix = time.strftime(options.archive_prefix, tm)
1828 if options.archive_suffix:
1829 suffix = time.strftime(options.archive_suffix, tm)
1830 archive_head, archive_tail = os.path.split(mailbox_name)
1831 if not prefix:
1832 # Don't create hidden archives, e.g. when processing Maildir++
1833 # subfolders
1834 archive_tail = archive_tail.lstrip('.')
1835 if options.output_dir:
1836 archive_head = options.output_dir
1837 archive_name = os.path.join(archive_head, prefix + archive_tail + suffix)
1838 return archive_name
1839
1840 def check_sane_destdir(dir):
1841 """Do a very primitive check if the given directory looks like a reasonable
1842 destination directory and bail out if it doesn't."""
1843 assert dir
1844 if not os.path.isdir(dir):
1845 user_error("output directory does not exist: '%s'" % dir)
1846 if not os.access(dir, os.W_OK):
1847 user_error("no write permission on output directory: '%s'" % dir)
1848
1849 def check_archive(archive_name):
1850 """Check if existing archive files are (not) compressed as expected and
1851 check if we can work with the destination directory."""
1852 compressed_archive = archive_name + ".gz"
1853 if options.no_compress:
1854 if os.path.isfile(compressed_archive):
1855 user_error("There is already a file named '%s'!\n"
1856 "Have you been previously compressing this archive?\n"
1857 "You probably should uncompress it manually, and try running me "
1858 "again." % compressed_archive)
1859 elif os.path.isfile(archive_name):
1860 user_error("There is already a file named '%s'!\n"
1861 "Have you been reading this archive?\n"
1862 "You probably should re-compress it manually, and try running me "
1863 "again." % archive_name)
1864 dest_dir = os.path.dirname(archive_name)
1865 if not dest_dir:
1866 dest_dir = os.getcwd()
1867 check_sane_destdir(dest_dir)
1868
1869 def nice_size_str(size):
1870 """Return given size in bytes as '12kB', '1.2MB'"""
1871 kb = size / 1024.0
1872 mb = kb / 1024.0
1873 if mb >= 1.0: return str(round(mb, 1)) + 'MB'
1874 if kb >= 1.0: return str(round(kb)) + 'kB'
1875 return str(size) + 'B'
1876
1877
1878 def get_filename(msg):
1879 """If the given rfc822.Message can be identified with a file (no mbox),
1880 return the filename, otherwise raise AttributeError."""
1881 try:
1882 return msg.fp.name
1883 except AttributeError:
1884 # Ugh, that's ugly. msg.fp is not a plain file, it may be an
1885 # instance of
1886 # a. mailbox._Subfile
1887 # (msg from mailbox.UnixMailbox, Python <= 2.4)
1888 # File object is msg.fp.fp, we don't want that
1889 # b. mailbox._PartialFile, subclass of mailbox._ProxyFile
1890 # (msg from mailbox.UnixMailbox, Python >= 2.5)
1891 # File object is msg.fp._file, we don't want that
1892 # c. mailbox._ProxyFile
1893 # (msg from mailbox.Maildir, Python >= 2.5)
1894 # File object is msg.fp._file, we do want that.
1895 if msg.fp.__class__ == mailbox._ProxyFile:
1896 assert hasattr(mailbox, "_PartialFile")
1897 return msg.fp._file.name
1898 raise
1899
1900 def safe_open_create(filename):
1901 """Create and open a file in a NFSv2-safe way, and return a r/w file descriptor.
1902 The new file is created with mode 600."""
1903 # This is essentially a simplified version of the dotlocking function.
1904 vprint("Creating file '%s'" % filename)
1905 dir, basename = os.path.split(filename)
1906 # We rely on tempfile.mkstemp to create files safely and with 600 mode.
1907 fd, pre_name = tempfile.mkstemp(prefix=basename+".pre-", dir=dir)
1908 try:
1909 try:
1910 os.link(pre_name, filename)
1911 except OSError, e:
1912 if os.fstat(fd).st_nlink == 2:
1913 pass
1914 else:
1915 raise
1916 finally:
1917 os.unlink(pre_name)
1918 return fd
1919
1920 def safe_open_existing(filename):
1921 """Safely open an existing file, and return a r/w file descriptor."""
1922 lst = os.lstat(filename)
1923 if stat.S_ISLNK(lst.st_mode):
1924 unexpected_error("file '%s' is a symlink." % filename)
1925 fd = os.open(filename, os.O_RDWR)
1926 fst = os.fstat(fd)
1927 if fst.st_nlink != 1:
1928 unexpected_error("file '%s' has %d hard links." % \
1929 (filename, fst.st_nlink))
1930 if stat.S_ISDIR(fst.st_mode):
1931 unexpected_error("file '%s' is a directory." % filename)
1932 for i in stat.ST_DEV, stat.ST_INO, stat.ST_UID, stat.ST_GID, stat.ST_MODE, stat.ST_NLINK:
1933 if fst[i] != lst[i]:
1934 unexpected_error("file status changed unexpectedly")
1935 return fd
1936
1937 def safe_open(filename):
1938 """Safely open a file, creating it if it doesn't exist, and return a
1939 r/w file descriptor."""
1940 # This borrows from postfix code.
1941 vprint("Opening archive...")
1942 try:
1943 fd = safe_open_existing(filename)
1944 except OSError, e:
1945 if e.errno != errno.ENOENT: raise
1946 fd = safe_open_create(filename)
1947 return fd
1948
1949 # this is where it all happens, folks
1950 if __name__ == '__main__':
1951 main()