"Fossies" - the Fresh Open Source Software Archive

Member "bup-0.30/lib/bup/helpers.py" (28 Sep 2019, 37411 Bytes) of package /linux/privat/bup-0.30.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. For more information about "helpers.py" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 0.29.3_vs_0.30.

    1 """Helper functions and classes for bup."""
    2 
    3 from __future__ import absolute_import, division
    4 from collections import namedtuple
    5 from contextlib import contextmanager
    6 from ctypes import sizeof, c_void_p
    7 from os import environ
    8 from pipes import quote
    9 from subprocess import PIPE, Popen
   10 import sys, os, pwd, subprocess, errno, socket, select, mmap, stat, re, struct
   11 import hashlib, heapq, math, operator, time, grp, tempfile
   12 
   13 from bup import _helpers
   14 from bup import compat
   15 # This function should really be in helpers, not in bup.options.  But we
   16 # want options.py to be standalone so people can include it in other projects.
   17 from bup.options import _tty_width as tty_width
   18 
   19 
   20 class Nonlocal:
   21     """Helper to deal with Python scoping issues"""
   22     pass
   23 
   24 
   25 sc_page_size = os.sysconf('SC_PAGE_SIZE')
   26 assert(sc_page_size > 0)
   27 
   28 sc_arg_max = os.sysconf('SC_ARG_MAX')
   29 if sc_arg_max == -1:  # "no definite limit" - let's choose 2M
   30     sc_arg_max = 2 * 1024 * 1024
   31 
   32 def last(iterable):
   33     result = None
   34     for result in iterable:
   35         pass
   36     return result
   37 
   38 
   39 def atoi(s):
   40     """Convert the string 's' to an integer. Return 0 if s is not a number."""
   41     try:
   42         return int(s or '0')
   43     except ValueError:
   44         return 0
   45 
   46 
   47 def atof(s):
   48     """Convert the string 's' to a float. Return 0 if s is not a number."""
   49     try:
   50         return float(s or '0')
   51     except ValueError:
   52         return 0
   53 
   54 
   55 buglvl = atoi(os.environ.get('BUP_DEBUG', 0))
   56 
   57 
   58 try:
   59     _fdatasync = os.fdatasync
   60 except AttributeError:
   61     _fdatasync = os.fsync
   62 
   63 if sys.platform.startswith('darwin'):
   64     # Apparently os.fsync on OS X doesn't guarantee to sync all the way down
   65     import fcntl
   66     def fdatasync(fd):
   67         try:
   68             return fcntl.fcntl(fd, fcntl.F_FULLFSYNC)
   69         except IOError as e:
   70             # Fallback for file systems (SMB) that do not support F_FULLFSYNC
   71             if e.errno == errno.ENOTSUP:
   72                 return _fdatasync(fd)
   73             else:
   74                 raise
   75 else:
   76     fdatasync = _fdatasync
   77 
   78 
   79 def partition(predicate, stream):
   80     """Returns (leading_matches_it, rest_it), where leading_matches_it
   81     must be completely exhausted before traversing rest_it.
   82 
   83     """
   84     stream = iter(stream)
   85     ns = Nonlocal()
   86     ns.first_nonmatch = None
   87     def leading_matches():
   88         for x in stream:
   89             if predicate(x):
   90                 yield x
   91             else:
   92                 ns.first_nonmatch = (x,)
   93                 break
   94     def rest():
   95         if ns.first_nonmatch:
   96             yield ns.first_nonmatch[0]
   97             for x in stream:
   98                 yield x
   99     return (leading_matches(), rest())
  100 
  101 
  102 def lines_until_sentinel(f, sentinel, ex_type):
  103     # sentinel must end with \n and must contain only one \n
  104     while True:
  105         line = f.readline()
  106         if not (line and line.endswith('\n')):
  107             raise ex_type('Hit EOF while reading line')
  108         if line == sentinel:
  109             return
  110         yield line
  111 
  112 
  113 def stat_if_exists(path):
  114     try:
  115         return os.stat(path)
  116     except OSError as e:
  117         if e.errno != errno.ENOENT:
  118             raise
  119     return None
  120 
  121 
  122 # Write (blockingly) to sockets that may or may not be in blocking mode.
  123 # We need this because our stderr is sometimes eaten by subprocesses
  124 # (probably ssh) that sometimes make it nonblocking, if only temporarily,
  125 # leading to race conditions.  Ick.  We'll do it the hard way.
  126 def _hard_write(fd, buf):
  127     while buf:
  128         (r,w,x) = select.select([], [fd], [], None)
  129         if not w:
  130             raise IOError('select(fd) returned without being writable')
  131         try:
  132             sz = os.write(fd, buf)
  133         except OSError as e:
  134             if e.errno != errno.EAGAIN:
  135                 raise
  136         assert(sz >= 0)
  137         buf = buf[sz:]
  138 
  139 
  140 _last_prog = 0
  141 def log(s):
  142     """Print a log message to stderr."""
  143     global _last_prog
  144     sys.stdout.flush()
  145     _hard_write(sys.stderr.fileno(), s)
  146     _last_prog = 0
  147 
  148 
  149 def debug1(s):
  150     if buglvl >= 1:
  151         log(s)
  152 
  153 
  154 def debug2(s):
  155     if buglvl >= 2:
  156         log(s)
  157 
  158 
  159 istty1 = os.isatty(1) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 1)
  160 istty2 = os.isatty(2) or (atoi(os.environ.get('BUP_FORCE_TTY')) & 2)
  161 _last_progress = ''
  162 def progress(s):
  163     """Calls log() if stderr is a TTY.  Does nothing otherwise."""
  164     global _last_progress
  165     if istty2:
  166         log(s)
  167         _last_progress = s
  168 
  169 
  170 def qprogress(s):
  171     """Calls progress() only if we haven't printed progress in a while.
  172     
  173     This avoids overloading the stderr buffer with excess junk.
  174     """
  175     global _last_prog
  176     now = time.time()
  177     if now - _last_prog > 0.1:
  178         progress(s)
  179         _last_prog = now
  180 
  181 
  182 def reprogress():
  183     """Calls progress() to redisplay the most recent progress message.
  184 
  185     Useful after you've printed some other message that wipes out the
  186     progress line.
  187     """
  188     if _last_progress and _last_progress.endswith('\r'):
  189         progress(_last_progress)
  190 
  191 
  192 def mkdirp(d, mode=None):
  193     """Recursively create directories on path 'd'.
  194 
  195     Unlike os.makedirs(), it doesn't raise an exception if the last element of
  196     the path already exists.
  197     """
  198     try:
  199         if mode:
  200             os.makedirs(d, mode)
  201         else:
  202             os.makedirs(d)
  203     except OSError as e:
  204         if e.errno == errno.EEXIST:
  205             pass
  206         else:
  207             raise
  208 
  209 
  210 def merge_iter(iters, pfreq, pfunc, pfinal, key=None):
  211     if key:
  212         samekey = lambda e, pe: getattr(e, key) == getattr(pe, key, None)
  213     else:
  214         samekey = operator.eq
  215     count = 0
  216     total = sum(len(it) for it in iters)
  217     iters = (iter(it) for it in iters)
  218     heap = ((next(it, None),it) for it in iters)
  219     heap = [(e,it) for e,it in heap if e]
  220 
  221     heapq.heapify(heap)
  222     pe = None
  223     while heap:
  224         if not count % pfreq:
  225             pfunc(count, total)
  226         e, it = heap[0]
  227         if not samekey(e, pe):
  228             pe = e
  229             yield e
  230         count += 1
  231         try:
  232             e = next(it)
  233         except StopIteration:
  234             heapq.heappop(heap) # remove current
  235         else:
  236             heapq.heapreplace(heap, (e, it)) # shift current to new location
  237     pfinal(count, total)
  238 
  239 
  240 def unlink(f):
  241     """Delete a file at path 'f' if it currently exists.
  242 
  243     Unlike os.unlink(), does not throw an exception if the file didn't already
  244     exist.
  245     """
  246     try:
  247         os.unlink(f)
  248     except OSError as e:
  249         if e.errno != errno.ENOENT:
  250             raise
  251 
  252 
  253 def shstr(cmd):
  254     if isinstance(cmd, compat.str_type):
  255         return cmd
  256     else:
  257         return ' '.join(map(quote, cmd))
  258 
  259 exc = subprocess.check_call
  260 
  261 def exo(cmd,
  262         input=None,
  263         stdin=None,
  264         stderr=None,
  265         shell=False,
  266         check=True,
  267         preexec_fn=None):
  268     if input:
  269         assert stdin in (None, PIPE)
  270         stdin = PIPE
  271     p = Popen(cmd,
  272               stdin=stdin, stdout=PIPE, stderr=stderr,
  273               shell=shell,
  274               preexec_fn=preexec_fn)
  275     out, err = p.communicate(input)
  276     if check and p.returncode != 0:
  277         raise Exception('subprocess %r failed with status %d, stderr: %r'
  278                         % (' '.join(map(quote, cmd)), p.returncode, err))
  279     return out, err, p
  280 
  281 def readpipe(argv, preexec_fn=None, shell=False):
  282     """Run a subprocess and return its output."""
  283     p = subprocess.Popen(argv, stdout=subprocess.PIPE, preexec_fn=preexec_fn,
  284                          shell=shell)
  285     out, err = p.communicate()
  286     if p.returncode != 0:
  287         raise Exception('subprocess %r failed with status %d'
  288                         % (' '.join(argv), p.returncode))
  289     return out
  290 
  291 
  292 def _argmax_base(command):
  293     base_size = 2048
  294     for c in command:
  295         base_size += len(command) + 1
  296     for k, v in compat.items(environ):
  297         base_size += len(k) + len(v) + 2 + sizeof(c_void_p)
  298     return base_size
  299 
  300 
  301 def _argmax_args_size(args):
  302     return sum(len(x) + 1 + sizeof(c_void_p) for x in args)
  303 
  304 
  305 def batchpipe(command, args, preexec_fn=None, arg_max=sc_arg_max):
  306     """If args is not empty, yield the output produced by calling the
  307 command list with args as a sequence of strings (It may be necessary
  308 to return multiple strings in order to respect ARG_MAX)."""
  309     # The optional arg_max arg is a workaround for an issue with the
  310     # current wvtest behavior.
  311     base_size = _argmax_base(command)
  312     while args:
  313         room = arg_max - base_size
  314         i = 0
  315         while i < len(args):
  316             next_size = _argmax_args_size(args[i:i+1])
  317             if room - next_size < 0:
  318                 break
  319             room -= next_size
  320             i += 1
  321         sub_args = args[:i]
  322         args = args[i:]
  323         assert(len(sub_args))
  324         yield readpipe(command + sub_args, preexec_fn=preexec_fn)
  325 
  326 
  327 def resolve_parent(p):
  328     """Return the absolute path of a file without following any final symlink.
  329 
  330     Behaves like os.path.realpath, but doesn't follow a symlink for the last
  331     element. (ie. if 'p' itself is a symlink, this one won't follow it, but it
  332     will follow symlinks in p's directory)
  333     """
  334     try:
  335         st = os.lstat(p)
  336     except OSError:
  337         st = None
  338     if st and stat.S_ISLNK(st.st_mode):
  339         (dir, name) = os.path.split(p)
  340         dir = os.path.realpath(dir)
  341         out = os.path.join(dir, name)
  342     else:
  343         out = os.path.realpath(p)
  344     #log('realpathing:%r,%r\n' % (p, out))
  345     return out
  346 
  347 
  348 def detect_fakeroot():
  349     "Return True if we appear to be running under fakeroot."
  350     return os.getenv("FAKEROOTKEY") != None
  351 
  352 
  353 if sys.platform.startswith('cygwin'):
  354     def is_superuser():
  355         # https://cygwin.com/ml/cygwin/2015-02/msg00057.html
  356         groups = os.getgroups()
  357         return 544 in groups or 0 in groups
  358 else:
  359     def is_superuser():
  360         return os.geteuid() == 0
  361 
  362 
  363 def _cache_key_value(get_value, key, cache):
  364     """Return (value, was_cached).  If there is a value in the cache
  365     for key, use that, otherwise, call get_value(key) which should
  366     throw a KeyError if there is no value -- in which case the cached
  367     and returned value will be None.
  368     """
  369     try: # Do we already have it (or know there wasn't one)?
  370         value = cache[key]
  371         return value, True
  372     except KeyError:
  373         pass
  374     value = None
  375     try:
  376         cache[key] = value = get_value(key)
  377     except KeyError:
  378         cache[key] = None
  379     return value, False
  380 
  381 
  382 _uid_to_pwd_cache = {}
  383 _name_to_pwd_cache = {}
  384 
  385 def pwd_from_uid(uid):
  386     """Return password database entry for uid (may be a cached value).
  387     Return None if no entry is found.
  388     """
  389     global _uid_to_pwd_cache, _name_to_pwd_cache
  390     entry, cached = _cache_key_value(pwd.getpwuid, uid, _uid_to_pwd_cache)
  391     if entry and not cached:
  392         _name_to_pwd_cache[entry.pw_name] = entry
  393     return entry
  394 
  395 
  396 def pwd_from_name(name):
  397     """Return password database entry for name (may be a cached value).
  398     Return None if no entry is found.
  399     """
  400     global _uid_to_pwd_cache, _name_to_pwd_cache
  401     entry, cached = _cache_key_value(pwd.getpwnam, name, _name_to_pwd_cache)
  402     if entry and not cached:
  403         _uid_to_pwd_cache[entry.pw_uid] = entry
  404     return entry
  405 
  406 
  407 _gid_to_grp_cache = {}
  408 _name_to_grp_cache = {}
  409 
  410 def grp_from_gid(gid):
  411     """Return password database entry for gid (may be a cached value).
  412     Return None if no entry is found.
  413     """
  414     global _gid_to_grp_cache, _name_to_grp_cache
  415     entry, cached = _cache_key_value(grp.getgrgid, gid, _gid_to_grp_cache)
  416     if entry and not cached:
  417         _name_to_grp_cache[entry.gr_name] = entry
  418     return entry
  419 
  420 
  421 def grp_from_name(name):
  422     """Return password database entry for name (may be a cached value).
  423     Return None if no entry is found.
  424     """
  425     global _gid_to_grp_cache, _name_to_grp_cache
  426     entry, cached = _cache_key_value(grp.getgrnam, name, _name_to_grp_cache)
  427     if entry and not cached:
  428         _gid_to_grp_cache[entry.gr_gid] = entry
  429     return entry
  430 
  431 
  432 _username = None
  433 def username():
  434     """Get the user's login name."""
  435     global _username
  436     if not _username:
  437         uid = os.getuid()
  438         _username = pwd_from_uid(uid)[0] or 'user%d' % uid
  439     return _username
  440 
  441 
  442 _userfullname = None
  443 def userfullname():
  444     """Get the user's full name."""
  445     global _userfullname
  446     if not _userfullname:
  447         uid = os.getuid()
  448         entry = pwd_from_uid(uid)
  449         if entry:
  450             _userfullname = entry[4].split(',')[0] or entry[0]
  451         if not _userfullname:
  452             _userfullname = 'user%d' % uid
  453     return _userfullname
  454 
  455 
  456 _hostname = None
  457 def hostname():
  458     """Get the FQDN of this machine."""
  459     global _hostname
  460     if not _hostname:
  461         _hostname = socket.getfqdn()
  462     return _hostname
  463 
  464 
  465 _resource_path = None
  466 def resource_path(subdir=''):
  467     global _resource_path
  468     if not _resource_path:
  469         _resource_path = os.environ.get('BUP_RESOURCE_PATH') or '.'
  470     return os.path.join(_resource_path, subdir)
  471 
  472 def format_filesize(size):
  473     unit = 1024.0
  474     size = float(size)
  475     if size < unit:
  476         return "%d" % (size)
  477     exponent = int(math.log(size) // math.log(unit))
  478     size_prefix = "KMGTPE"[exponent - 1]
  479     return "%.1f%s" % (size // math.pow(unit, exponent), size_prefix)
  480 
  481 
  482 class NotOk(Exception):
  483     pass
  484 
  485 
  486 class BaseConn:
  487     def __init__(self, outp):
  488         self.outp = outp
  489 
  490     def close(self):
  491         while self._read(65536): pass
  492 
  493     def read(self, size):
  494         """Read 'size' bytes from input stream."""
  495         self.outp.flush()
  496         return self._read(size)
  497 
  498     def readline(self):
  499         """Read from input stream until a newline is found."""
  500         self.outp.flush()
  501         return self._readline()
  502 
  503     def write(self, data):
  504         """Write 'data' to output stream."""
  505         #log('%d writing: %d bytes\n' % (os.getpid(), len(data)))
  506         self.outp.write(data)
  507 
  508     def has_input(self):
  509         """Return true if input stream is readable."""
  510         raise NotImplemented("Subclasses must implement has_input")
  511 
  512     def ok(self):
  513         """Indicate end of output from last sent command."""
  514         self.write('\nok\n')
  515 
  516     def error(self, s):
  517         """Indicate server error to the client."""
  518         s = re.sub(r'\s+', ' ', str(s))
  519         self.write('\nerror %s\n' % s)
  520 
  521     def _check_ok(self, onempty):
  522         self.outp.flush()
  523         rl = ''
  524         for rl in linereader(self):
  525             #log('%d got line: %r\n' % (os.getpid(), rl))
  526             if not rl:  # empty line
  527                 continue
  528             elif rl == 'ok':
  529                 return None
  530             elif rl.startswith('error '):
  531                 #log('client: error: %s\n' % rl[6:])
  532                 return NotOk(rl[6:])
  533             else:
  534                 onempty(rl)
  535         raise Exception('server exited unexpectedly; see errors above')
  536 
  537     def drain_and_check_ok(self):
  538         """Remove all data for the current command from input stream."""
  539         def onempty(rl):
  540             pass
  541         return self._check_ok(onempty)
  542 
  543     def check_ok(self):
  544         """Verify that server action completed successfully."""
  545         def onempty(rl):
  546             raise Exception('expected "ok", got %r' % rl)
  547         return self._check_ok(onempty)
  548 
  549 
  550 class Conn(BaseConn):
  551     def __init__(self, inp, outp):
  552         BaseConn.__init__(self, outp)
  553         self.inp = inp
  554 
  555     def _read(self, size):
  556         return self.inp.read(size)
  557 
  558     def _readline(self):
  559         return self.inp.readline()
  560 
  561     def has_input(self):
  562         [rl, wl, xl] = select.select([self.inp.fileno()], [], [], 0)
  563         if rl:
  564             assert(rl[0] == self.inp.fileno())
  565             return True
  566         else:
  567             return None
  568 
  569 
  570 def checked_reader(fd, n):
  571     while n > 0:
  572         rl, _, _ = select.select([fd], [], [])
  573         assert(rl[0] == fd)
  574         buf = os.read(fd, n)
  575         if not buf: raise Exception("Unexpected EOF reading %d more bytes" % n)
  576         yield buf
  577         n -= len(buf)
  578 
  579 
  580 MAX_PACKET = 128 * 1024
  581 def mux(p, outfd, outr, errr):
  582     try:
  583         fds = [outr, errr]
  584         while p.poll() is None:
  585             rl, _, _ = select.select(fds, [], [])
  586             for fd in rl:
  587                 if fd == outr:
  588                     buf = os.read(outr, MAX_PACKET)
  589                     if not buf: break
  590                     os.write(outfd, struct.pack('!IB', len(buf), 1) + buf)
  591                 elif fd == errr:
  592                     buf = os.read(errr, 1024)
  593                     if not buf: break
  594                     os.write(outfd, struct.pack('!IB', len(buf), 2) + buf)
  595     finally:
  596         os.write(outfd, struct.pack('!IB', 0, 3))
  597 
  598 
  599 class DemuxConn(BaseConn):
  600     """A helper class for bup's client-server protocol."""
  601     def __init__(self, infd, outp):
  602         BaseConn.__init__(self, outp)
  603         # Anything that comes through before the sync string was not
  604         # multiplexed and can be assumed to be debug/log before mux init.
  605         tail = ''
  606         while tail != 'BUPMUX':
  607             b = os.read(infd, (len(tail) < 6) and (6-len(tail)) or 1)
  608             if not b:
  609                 raise IOError('demux: unexpected EOF during initialization')
  610             tail += b
  611             sys.stderr.write(tail[:-6])  # pre-mux log messages
  612             tail = tail[-6:]
  613         self.infd = infd
  614         self.reader = None
  615         self.buf = None
  616         self.closed = False
  617 
  618     def write(self, data):
  619         self._load_buf(0)
  620         BaseConn.write(self, data)
  621 
  622     def _next_packet(self, timeout):
  623         if self.closed: return False
  624         rl, wl, xl = select.select([self.infd], [], [], timeout)
  625         if not rl: return False
  626         assert(rl[0] == self.infd)
  627         ns = ''.join(checked_reader(self.infd, 5))
  628         n, fdw = struct.unpack('!IB', ns)
  629         assert(n <= MAX_PACKET)
  630         if fdw == 1:
  631             self.reader = checked_reader(self.infd, n)
  632         elif fdw == 2:
  633             for buf in checked_reader(self.infd, n):
  634                 sys.stderr.write(buf)
  635         elif fdw == 3:
  636             self.closed = True
  637             debug2("DemuxConn: marked closed\n")
  638         return True
  639 
  640     def _load_buf(self, timeout):
  641         if self.buf is not None:
  642             return True
  643         while not self.closed:
  644             while not self.reader:
  645                 if not self._next_packet(timeout):
  646                     return False
  647             try:
  648                 self.buf = next(self.reader)
  649                 return True
  650             except StopIteration:
  651                 self.reader = None
  652         return False
  653 
  654     def _read_parts(self, ix_fn):
  655         while self._load_buf(None):
  656             assert(self.buf is not None)
  657             i = ix_fn(self.buf)
  658             if i is None or i == len(self.buf):
  659                 yv = self.buf
  660                 self.buf = None
  661             else:
  662                 yv = self.buf[:i]
  663                 self.buf = self.buf[i:]
  664             yield yv
  665             if i is not None:
  666                 break
  667 
  668     def _readline(self):
  669         def find_eol(buf):
  670             try:
  671                 return buf.index('\n')+1
  672             except ValueError:
  673                 return None
  674         return ''.join(self._read_parts(find_eol))
  675 
  676     def _read(self, size):
  677         csize = [size]
  678         def until_size(buf): # Closes on csize
  679             if len(buf) < csize[0]:
  680                 csize[0] -= len(buf)
  681                 return None
  682             else:
  683                 return csize[0]
  684         return ''.join(self._read_parts(until_size))
  685 
  686     def has_input(self):
  687         return self._load_buf(0)
  688 
  689 
  690 def linereader(f):
  691     """Generate a list of input lines from 'f' without terminating newlines."""
  692     while 1:
  693         line = f.readline()
  694         if not line:
  695             break
  696         yield line[:-1]
  697 
  698 
  699 def chunkyreader(f, count = None):
  700     """Generate a list of chunks of data read from 'f'.
  701 
  702     If count is None, read until EOF is reached.
  703 
  704     If count is a positive integer, read 'count' bytes from 'f'. If EOF is
  705     reached while reading, raise IOError.
  706     """
  707     if count != None:
  708         while count > 0:
  709             b = f.read(min(count, 65536))
  710             if not b:
  711                 raise IOError('EOF with %d bytes remaining' % count)
  712             yield b
  713             count -= len(b)
  714     else:
  715         while 1:
  716             b = f.read(65536)
  717             if not b: break
  718             yield b
  719 
  720 
  721 @contextmanager
  722 def atomically_replaced_file(name, mode='w', buffering=-1):
  723     """Yield a file that will be atomically renamed name when leaving the block.
  724 
  725     This contextmanager yields an open file object that is backed by a
  726     temporary file which will be renamed (atomically) to the target
  727     name if everything succeeds.
  728 
  729     The mode and buffering arguments are handled exactly as with open,
  730     and the yielded file will have very restrictive permissions, as
  731     per mkstemp.
  732 
  733     E.g.::
  734 
  735         with atomically_replaced_file('foo.txt', 'w') as f:
  736             f.write('hello jack.')
  737 
  738     """
  739 
  740     (ffd, tempname) = tempfile.mkstemp(dir=os.path.dirname(name),
  741                                        text=('b' not in mode))
  742     try:
  743         try:
  744             f = os.fdopen(ffd, mode, buffering)
  745         except:
  746             os.close(ffd)
  747             raise
  748         try:
  749             yield f
  750         finally:
  751             f.close()
  752         os.rename(tempname, name)
  753     finally:
  754         unlink(tempname)  # nonexistant file is ignored
  755 
  756 
  757 def slashappend(s):
  758     """Append "/" to 's' if it doesn't aleady end in "/"."""
  759     if s and not s.endswith('/'):
  760         return s + '/'
  761     else:
  762         return s
  763 
  764 
  765 def _mmap_do(f, sz, flags, prot, close):
  766     if not sz:
  767         st = os.fstat(f.fileno())
  768         sz = st.st_size
  769     if not sz:
  770         # trying to open a zero-length map gives an error, but an empty
  771         # string has all the same behaviour of a zero-length map, ie. it has
  772         # no elements :)
  773         return ''
  774     map = mmap.mmap(f.fileno(), sz, flags, prot)
  775     if close:
  776         f.close()  # map will persist beyond file close
  777     return map
  778 
  779 
  780 def mmap_read(f, sz = 0, close=True):
  781     """Create a read-only memory mapped region on file 'f'.
  782     If sz is 0, the region will cover the entire file.
  783     """
  784     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ, close)
  785 
  786 
  787 def mmap_readwrite(f, sz = 0, close=True):
  788     """Create a read-write memory mapped region on file 'f'.
  789     If sz is 0, the region will cover the entire file.
  790     """
  791     return _mmap_do(f, sz, mmap.MAP_SHARED, mmap.PROT_READ|mmap.PROT_WRITE,
  792                     close)
  793 
  794 
  795 def mmap_readwrite_private(f, sz = 0, close=True):
  796     """Create a read-write memory mapped region on file 'f'.
  797     If sz is 0, the region will cover the entire file.
  798     The map is private, which means the changes are never flushed back to the
  799     file.
  800     """
  801     return _mmap_do(f, sz, mmap.MAP_PRIVATE, mmap.PROT_READ|mmap.PROT_WRITE,
  802                     close)
  803 
  804 
  805 _mincore = getattr(_helpers, 'mincore', None)
  806 if _mincore:
  807     # ./configure ensures that we're on Linux if MINCORE_INCORE isn't defined.
  808     MINCORE_INCORE = getattr(_helpers, 'MINCORE_INCORE', 1)
  809 
  810     _fmincore_chunk_size = None
  811     def _set_fmincore_chunk_size():
  812         global _fmincore_chunk_size
  813         pref_chunk_size = 64 * 1024 * 1024
  814         chunk_size = sc_page_size
  815         if (sc_page_size < pref_chunk_size):
  816             chunk_size = sc_page_size * (pref_chunk_size // sc_page_size)
  817         _fmincore_chunk_size = chunk_size
  818 
  819     def fmincore(fd):
  820         """Return the mincore() data for fd as a bytearray whose values can be
  821         tested via MINCORE_INCORE, or None if fd does not fully
  822         support the operation."""
  823         st = os.fstat(fd)
  824         if (st.st_size == 0):
  825             return bytearray(0)
  826         if not _fmincore_chunk_size:
  827             _set_fmincore_chunk_size()
  828         pages_per_chunk = _fmincore_chunk_size // sc_page_size;
  829         page_count = (st.st_size + sc_page_size - 1) // sc_page_size;
  830         chunk_count = page_count // _fmincore_chunk_size
  831         if chunk_count < 1:
  832             chunk_count = 1
  833         result = bytearray(page_count)
  834         for ci in compat.range(chunk_count):
  835             pos = _fmincore_chunk_size * ci;
  836             msize = min(_fmincore_chunk_size, st.st_size - pos)
  837             try:
  838                 m = mmap.mmap(fd, msize, mmap.MAP_PRIVATE, 0, 0, pos)
  839             except mmap.error as ex:
  840                 if ex.errno == errno.EINVAL or ex.errno == errno.ENODEV:
  841                     # Perhaps the file was a pipe, i.e. "... | bup split ..."
  842                     return None
  843                 raise ex
  844             try:
  845                 _mincore(m, msize, 0, result, ci * pages_per_chunk)
  846             except OSError as ex:
  847                 if ex.errno == errno.ENOSYS:
  848                     return None
  849                 raise
  850         return result
  851 
  852 
  853 def parse_timestamp(epoch_str):
  854     """Return the number of nanoseconds since the epoch that are described
  855 by epoch_str (100ms, 100ns, ...); when epoch_str cannot be parsed,
  856 throw a ValueError that may contain additional information."""
  857     ns_per = {'s' :  1000000000,
  858               'ms' : 1000000,
  859               'us' : 1000,
  860               'ns' : 1}
  861     match = re.match(r'^((?:[-+]?[0-9]+)?)(s|ms|us|ns)$', epoch_str)
  862     if not match:
  863         if re.match(r'^([-+]?[0-9]+)$', epoch_str):
  864             raise ValueError('must include units, i.e. 100ns, 100ms, ...')
  865         raise ValueError()
  866     (n, units) = match.group(1, 2)
  867     if not n:
  868         n = 1
  869     n = int(n)
  870     return n * ns_per[units]
  871 
  872 
  873 def parse_num(s):
  874     """Parse data size information into a float number.
  875 
  876     Here are some examples of conversions:
  877         199.2k means 203981 bytes
  878         1GB means 1073741824 bytes
  879         2.1 tb means 2199023255552 bytes
  880     """
  881     g = re.match(r'([-+\d.e]+)\s*(\w*)', str(s))
  882     if not g:
  883         raise ValueError("can't parse %r as a number" % s)
  884     (val, unit) = g.groups()
  885     num = float(val)
  886     unit = unit.lower()
  887     if unit in ['t', 'tb']:
  888         mult = 1024*1024*1024*1024
  889     elif unit in ['g', 'gb']:
  890         mult = 1024*1024*1024
  891     elif unit in ['m', 'mb']:
  892         mult = 1024*1024
  893     elif unit in ['k', 'kb']:
  894         mult = 1024
  895     elif unit in ['', 'b']:
  896         mult = 1
  897     else:
  898         raise ValueError("invalid unit %r in number %r" % (unit, s))
  899     return int(num*mult)
  900 
  901 
  902 def count(l):
  903     """Count the number of elements in an iterator. (consumes the iterator)"""
  904     return reduce(lambda x,y: x+1, l)
  905 
  906 
  907 saved_errors = []
  908 def add_error(e):
  909     """Append an error message to the list of saved errors.
  910 
  911     Once processing is able to stop and output the errors, the saved errors are
  912     accessible in the module variable helpers.saved_errors.
  913     """
  914     saved_errors.append(e)
  915     log('%-70s\n' % e)
  916 
  917 
  918 def clear_errors():
  919     global saved_errors
  920     saved_errors = []
  921 
  922 
  923 def die_if_errors(msg=None, status=1):
  924     global saved_errors
  925     if saved_errors:
  926         if not msg:
  927             msg = 'warning: %d errors encountered\n' % len(saved_errors)
  928         log(msg)
  929         sys.exit(status)
  930 
  931 
  932 def handle_ctrl_c():
  933     """Replace the default exception handler for KeyboardInterrupt (Ctrl-C).
  934 
  935     The new exception handler will make sure that bup will exit without an ugly
  936     stacktrace when Ctrl-C is hit.
  937     """
  938     oldhook = sys.excepthook
  939     def newhook(exctype, value, traceback):
  940         if exctype == KeyboardInterrupt:
  941             log('\nInterrupted.\n')
  942         else:
  943             return oldhook(exctype, value, traceback)
  944     sys.excepthook = newhook
  945 
  946 
  947 def columnate(l, prefix):
  948     """Format elements of 'l' in columns with 'prefix' leading each line.
  949 
  950     The number of columns is determined automatically based on the string
  951     lengths.
  952     """
  953     if not l:
  954         return ""
  955     l = l[:]
  956     clen = max(len(s) for s in l)
  957     ncols = (tty_width() - len(prefix)) // (clen + 2)
  958     if ncols <= 1:
  959         ncols = 1
  960         clen = 0
  961     cols = []
  962     while len(l) % ncols:
  963         l.append('')
  964     rows = len(l) // ncols
  965     for s in compat.range(0, len(l), rows):
  966         cols.append(l[s:s+rows])
  967     out = ''
  968     for row in zip(*cols):
  969         out += prefix + ''.join(('%-*s' % (clen+2, s)) for s in row) + '\n'
  970     return out
  971 
  972 
  973 def parse_date_or_fatal(str, fatal):
  974     """Parses the given date or calls Option.fatal().
  975     For now we expect a string that contains a float."""
  976     try:
  977         date = float(str)
  978     except ValueError as e:
  979         raise fatal('invalid date format (should be a float): %r' % e)
  980     else:
  981         return date
  982 
  983 
  984 def parse_excludes(options, fatal):
  985     """Traverse the options and extract all excludes, or call Option.fatal()."""
  986     excluded_paths = []
  987 
  988     for flag in options:
  989         (option, parameter) = flag
  990         if option == '--exclude':
  991             excluded_paths.append(resolve_parent(parameter))
  992         elif option == '--exclude-from':
  993             try:
  994                 f = open(resolve_parent(parameter))
  995             except IOError as e:
  996                 raise fatal("couldn't read %s" % parameter)
  997             for exclude_path in f.readlines():
  998                 # FIXME: perhaps this should be rstrip('\n')
  999                 exclude_path = resolve_parent(exclude_path.strip())
 1000                 if exclude_path:
 1001                     excluded_paths.append(exclude_path)
 1002     return sorted(frozenset(excluded_paths))
 1003 
 1004 
 1005 def parse_rx_excludes(options, fatal):
 1006     """Traverse the options and extract all rx excludes, or call
 1007     Option.fatal()."""
 1008     excluded_patterns = []
 1009 
 1010     for flag in options:
 1011         (option, parameter) = flag
 1012         if option == '--exclude-rx':
 1013             try:
 1014                 excluded_patterns.append(re.compile(parameter))
 1015             except re.error as ex:
 1016                 fatal('invalid --exclude-rx pattern (%s): %s' % (parameter, ex))
 1017         elif option == '--exclude-rx-from':
 1018             try:
 1019                 f = open(resolve_parent(parameter))
 1020             except IOError as e:
 1021                 raise fatal("couldn't read %s" % parameter)
 1022             for pattern in f.readlines():
 1023                 spattern = pattern.rstrip('\n')
 1024                 if not spattern:
 1025                     continue
 1026                 try:
 1027                     excluded_patterns.append(re.compile(spattern))
 1028                 except re.error as ex:
 1029                     fatal('invalid --exclude-rx pattern (%s): %s' % (spattern, ex))
 1030     return excluded_patterns
 1031 
 1032 
 1033 def should_rx_exclude_path(path, exclude_rxs):
 1034     """Return True if path matches a regular expression in exclude_rxs."""
 1035     for rx in exclude_rxs:
 1036         if rx.search(path):
 1037             debug1('Skipping %r: excluded by rx pattern %r.\n'
 1038                    % (path, rx.pattern))
 1039             return True
 1040     return False
 1041 
 1042 
 1043 # FIXME: Carefully consider the use of functions (os.path.*, etc.)
 1044 # that resolve against the current filesystem in the strip/graft
 1045 # functions for example, but elsewhere as well.  I suspect bup's not
 1046 # always being careful about that.  For some cases, the contents of
 1047 # the current filesystem should be irrelevant, and consulting it might
 1048 # produce the wrong result, perhaps via unintended symlink resolution,
 1049 # for example.
 1050 
 1051 def path_components(path):
 1052     """Break path into a list of pairs of the form (name,
 1053     full_path_to_name).  Path must start with '/'.
 1054     Example:
 1055       '/home/foo' -> [('', '/'), ('home', '/home'), ('foo', '/home/foo')]"""
 1056     if not path.startswith('/'):
 1057         raise Exception('path must start with "/": %s' % path)
 1058     # Since we assume path startswith('/'), we can skip the first element.
 1059     result = [('', '/')]
 1060     norm_path = os.path.abspath(path)
 1061     if norm_path == '/':
 1062         return result
 1063     full_path = ''
 1064     for p in norm_path.split('/')[1:]:
 1065         full_path += '/' + p
 1066         result.append((p, full_path))
 1067     return result
 1068 
 1069 
 1070 def stripped_path_components(path, strip_prefixes):
 1071     """Strip any prefix in strip_prefixes from path and return a list
 1072     of path components where each component is (name,
 1073     none_or_full_fs_path_to_name).  Assume path startswith('/').
 1074     See thelpers.py for examples."""
 1075     normalized_path = os.path.abspath(path)
 1076     sorted_strip_prefixes = sorted(strip_prefixes, key=len, reverse=True)
 1077     for bp in sorted_strip_prefixes:
 1078         normalized_bp = os.path.abspath(bp)
 1079         if normalized_bp == '/':
 1080             continue
 1081         if normalized_path.startswith(normalized_bp):
 1082             prefix = normalized_path[:len(normalized_bp)]
 1083             result = []
 1084             for p in normalized_path[len(normalized_bp):].split('/'):
 1085                 if p: # not root
 1086                     prefix += '/'
 1087                 prefix += p
 1088                 result.append((p, prefix))
 1089             return result
 1090     # Nothing to strip.
 1091     return path_components(path)
 1092 
 1093 
 1094 def grafted_path_components(graft_points, path):
 1095     # Create a result that consists of some number of faked graft
 1096     # directories before the graft point, followed by all of the real
 1097     # directories from path that are after the graft point.  Arrange
 1098     # for the directory at the graft point in the result to correspond
 1099     # to the "orig" directory in --graft orig=new.  See t/thelpers.py
 1100     # for some examples.
 1101 
 1102     # Note that given --graft orig=new, orig and new have *nothing* to
 1103     # do with each other, even if some of their component names
 1104     # match. i.e. --graft /foo/bar/baz=/foo/bar/bax is semantically
 1105     # equivalent to --graft /foo/bar/baz=/x/y/z, or even
 1106     # /foo/bar/baz=/x.
 1107 
 1108     # FIXME: This can't be the best solution...
 1109     clean_path = os.path.abspath(path)
 1110     for graft_point in graft_points:
 1111         old_prefix, new_prefix = graft_point
 1112         # Expand prefixes iff not absolute paths.
 1113         old_prefix = os.path.normpath(old_prefix)
 1114         new_prefix = os.path.normpath(new_prefix)
 1115         if clean_path.startswith(old_prefix):
 1116             escaped_prefix = re.escape(old_prefix)
 1117             grafted_path = re.sub(r'^' + escaped_prefix, new_prefix, clean_path)
 1118             # Handle /foo=/ (at least) -- which produces //whatever.
 1119             grafted_path = '/' + grafted_path.lstrip('/')
 1120             clean_path_components = path_components(clean_path)
 1121             # Count the components that were stripped.
 1122             strip_count = 0 if old_prefix == '/' else old_prefix.count('/')
 1123             new_prefix_parts = new_prefix.split('/')
 1124             result_prefix = grafted_path.split('/')[:new_prefix.count('/')]
 1125             result = [(p, None) for p in result_prefix] \
 1126                 + clean_path_components[strip_count:]
 1127             # Now set the graft point name to match the end of new_prefix.
 1128             graft_point = len(result_prefix)
 1129             result[graft_point] = \
 1130                 (new_prefix_parts[-1], clean_path_components[strip_count][1])
 1131             if new_prefix == '/': # --graft ...=/ is a special case.
 1132                 return result[1:]
 1133             return result
 1134     return path_components(clean_path)
 1135 
 1136 
 1137 Sha1 = hashlib.sha1
 1138 
 1139 
 1140 _localtime = getattr(_helpers, 'localtime', None)
 1141 
 1142 if _localtime:
 1143     bup_time = namedtuple('bup_time', ['tm_year', 'tm_mon', 'tm_mday',
 1144                                        'tm_hour', 'tm_min', 'tm_sec',
 1145                                        'tm_wday', 'tm_yday',
 1146                                        'tm_isdst', 'tm_gmtoff', 'tm_zone'])
 1147 
 1148 # Define a localtime() that returns bup_time when possible.  Note:
 1149 # this means that any helpers.localtime() results may need to be
 1150 # passed through to_py_time() before being passed to python's time
 1151 # module, which doesn't appear willing to ignore the extra items.
 1152 if _localtime:
 1153     def localtime(time):
 1154         return bup_time(*_helpers.localtime(time))
 1155     def utc_offset_str(t):
 1156         """Return the local offset from UTC as "+hhmm" or "-hhmm" for time t.
 1157         If the current UTC offset does not represent an integer number
 1158         of minutes, the fractional component will be truncated."""
 1159         off = localtime(t).tm_gmtoff
 1160         # Note: // doesn't truncate like C for negative values, it rounds down.
 1161         offmin = abs(off) // 60
 1162         m = offmin % 60
 1163         h = (offmin - m) // 60
 1164         return "%+03d%02d" % (-h if off < 0 else h, m)
 1165     def to_py_time(x):
 1166         if isinstance(x, time.struct_time):
 1167             return x
 1168         return time.struct_time(x[:9])
 1169 else:
 1170     localtime = time.localtime
 1171     def utc_offset_str(t):
 1172         return time.strftime('%z', localtime(t))
 1173     def to_py_time(x):
 1174         return x
 1175 
 1176 
 1177 _some_invalid_save_parts_rx = re.compile(r'[[ ~^:?*\\]|\.\.|//|@{')
 1178 
 1179 def valid_save_name(name):
 1180     # Enforce a superset of the restrictions in git-check-ref-format(1)
 1181     if name == '@' \
 1182        or name.startswith('/') or name.endswith('/') \
 1183        or name.endswith('.'):
 1184         return False
 1185     if _some_invalid_save_parts_rx.search(name):
 1186         return False
 1187     for c in name:
 1188         if ord(c) < 0x20 or ord(c) == 0x7f:
 1189             return False
 1190     for part in name.split('/'):
 1191         if part.startswith('.') or part.endswith('.lock'):
 1192             return False
 1193     return True
 1194 
 1195 
 1196 _period_rx = re.compile(r'^([0-9]+)(s|min|h|d|w|m|y)$')
 1197 
 1198 def period_as_secs(s):
 1199     if s == 'forever':
 1200         return float('inf')
 1201     match = _period_rx.match(s)
 1202     if not match:
 1203         return None
 1204     mag = int(match.group(1))
 1205     scale = match.group(2)
 1206     return mag * {'s': 1,
 1207                   'min': 60,
 1208                   'h': 60 * 60,
 1209                   'd': 60 * 60 * 24,
 1210                   'w': 60 * 60 * 24 * 7,
 1211                   'm': 60 * 60 * 24 * 31,
 1212                   'y': 60 * 60 * 24 * 366}[scale]