"Fossies" - the Fresh Open Source Software Archive

Member "viewvc-1.2.1/lib/vclib/ccvs/bincvs.py" (26 Mar 2020, 40708 Bytes) of package /linux/misc/viewvc-1.2.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. For more information about "bincvs.py" see the Fossies "Dox" file reference documentation and the latest Fossies "Diffs" side-by-side code changes report: 1.1.28_vs_1.2.1.

    1 # -*-python-*-
    2 #
    3 # Copyright (C) 1999-2020 The ViewCVS Group. All Rights Reserved.
    4 #
    5 # By using this file, you agree to the terms and conditions set forth in
    6 # the LICENSE.html file which can be found at the top level of the ViewVC
    7 # distribution or at http://viewvc.org/license-1.html.
    8 #
    9 # For more information, visit http://viewvc.org/
   10 #
   11 # -----------------------------------------------------------------------
   12 
   13 "Version Control lib driver for locally accessible cvs-repositories."
   14 
   15 import vclib
   16 import vcauth
   17 import os
   18 import os.path
   19 import sys
   20 import stat
   21 import re
   22 import time
   23 import calendar
   24 
   25 # ViewVC libs
   26 import popen
   27 import vclib.ccvs
   28 
   29 def _path_join(path_parts):
   30   return '/'.join(path_parts)
   31   
   32 class BaseCVSRepository(vclib.Repository):
   33   def __init__(self, name, rootpath, authorizer, utilities):
   34     if not os.path.isdir(rootpath):
   35       raise vclib.ReposNotFound(name) 
   36    
   37     self.name = name
   38     self.rootpath = rootpath
   39     self.auth = authorizer
   40     self.utilities = utilities
   41 
   42     # See if this repository is even viewable, authz-wise.
   43     if not vclib.check_root_access(self):
   44       raise vclib.ReposNotFound(name)
   45 
   46   def open(self):
   47     # See if a universal read access determination can be made.
   48     if self.auth and self.auth.check_universal_access(self.name) == 1:
   49       self.auth = None
   50 
   51   def rootname(self):
   52     return self.name
   53 
   54   def rootpath(self):
   55     return self.rootpath
   56 
   57   def roottype(self):
   58     return vclib.CVS
   59 
   60   def authorizer(self):
   61     return self.auth
   62   
   63   def itemtype(self, path_parts, rev):
   64     basepath = self._getpath(path_parts)
   65     kind = None
   66     if os.path.isdir(basepath):
   67       kind = vclib.DIR
   68     elif os.path.isfile(basepath + ',v'):
   69       kind = vclib.FILE
   70     else:
   71       atticpath = self._getpath(self._atticpath(path_parts))
   72       if os.path.isfile(atticpath + ',v'):
   73         kind = vclib.FILE
   74     if not kind:
   75       raise vclib.ItemNotFound(path_parts)
   76     if not vclib.check_path_access(self, path_parts, kind, rev):
   77       raise vclib.ItemNotFound(path_parts)
   78     return kind
   79 
   80   def itemprops(self, path_parts, rev):
   81     self.itemtype(path_parts, rev)  # does auth-check
   82     return {}  # CVS doesn't support properties
   83   
   84   def listdir(self, path_parts, rev, options):
   85     if self.itemtype(path_parts, rev) != vclib.DIR:  # does auth-check
   86       raise vclib.Error("Path '%s' is not a directory."
   87                         % (_path_join(path_parts)))
   88     
   89     # Only RCS files (*,v) and subdirs are returned.
   90     data = [ ]
   91     full_name = self._getpath(path_parts)
   92     for file in os.listdir(full_name):
   93       name = None
   94       kind, errors = _check_path(os.path.join(full_name, file))
   95       if kind == vclib.FILE:
   96         if file[-2:] == ',v':
   97           name = file[:-2]
   98       elif kind == vclib.DIR:
   99         if file != 'Attic' and file != 'CVS': # CVS directory is for fileattr
  100           name = file
  101       else:
  102         name = file
  103       if not name:
  104         continue
  105       if vclib.check_path_access(self, path_parts + [name], kind, rev):
  106         data.append(CVSDirEntry(name, kind, errors, 0))
  107 
  108     full_name = os.path.join(full_name, 'Attic')
  109     if os.path.isdir(full_name):
  110       for file in os.listdir(full_name):
  111         name = None
  112         kind, errors = _check_path(os.path.join(full_name, file))
  113         if kind == vclib.FILE:
  114           if file[-2:] == ',v':
  115             name = file[:-2]
  116         elif kind != vclib.DIR:
  117           name = file
  118         if not name:
  119           continue
  120         if vclib.check_path_access(self, path_parts + [name], kind, rev):
  121           data.append(CVSDirEntry(name, kind, errors, 1))
  122 
  123     return data
  124     
  125   def _getpath(self, path_parts):
  126     return apply(os.path.join, (self.rootpath,) + tuple(path_parts))
  127 
  128   def _atticpath(self, path_parts):
  129     return path_parts[:-1] + ['Attic'] + path_parts[-1:]
  130 
  131   def rcsfile(self, path_parts, root=0, v=1):
  132     "Return path to RCS file"
  133 
  134     ret_parts = path_parts
  135     ret_file = self._getpath(ret_parts)
  136     if not os.path.isfile(ret_file + ',v'):
  137       ret_parts = self._atticpath(path_parts)
  138       ret_file = self._getpath(ret_parts)
  139       if not os.path.isfile(ret_file + ',v'):
  140         raise vclib.ItemNotFound(path_parts)
  141     if root:
  142       ret = ret_file
  143     else:
  144       ret = _path_join(ret_parts)
  145     if v:
  146       ret = ret + ",v"
  147     return ret
  148 
  149   def isexecutable(self, path_parts, rev):
  150     if self.itemtype(path_parts, rev) != vclib.FILE:  # does auth-check
  151       raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts)))
  152     rcsfile = self.rcsfile(path_parts, 1)
  153     return os.access(rcsfile, os.X_OK)
  154   
  155   def filesize(self, path_parts, rev):
  156     if self.itemtype(path_parts, rev) != vclib.FILE:  # does auth-check
  157       raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts)))
  158     return -1
  159 
  160 
  161 class BinCVSRepository(BaseCVSRepository):
  162   def _get_tip_revision(self, rcs_file, rev=None):
  163     """Get the (basically) youngest revision (filtered by REV)."""
  164     args = rcs_file,
  165     fp = self.rcs_popen('rlog', args, 'rt', 0)
  166     filename, default_branch, tags, lockinfo, msg, eof = _parse_log_header(fp)
  167     revs = []
  168     while not eof:
  169       revision, eof = _parse_log_entry(fp)
  170       if revision:
  171         revs.append(revision)
  172     revs = _file_log(revs, tags, lockinfo, default_branch, rev)
  173     if revs:
  174       return revs[-1]
  175     return None
  176 
  177   def openfile(self, path_parts, rev, options):
  178     """see vclib.Repository.openfile docstring
  179 
  180     Option values recognized by this implementation:
  181 
  182       cvs_oldkeywords
  183         boolean. true to use the original keyword substitution values.
  184     """
  185     if self.itemtype(path_parts, rev) != vclib.FILE:  # does auth-check
  186       raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts)))
  187     if not rev or rev == 'HEAD' or rev == 'MAIN':
  188       rev_flag = '-p'
  189     else:
  190       rev_flag = '-p' + rev
  191     if options.get('cvs_oldkeywords', 0):
  192       kv_flag = '-ko'
  193     else:
  194       kv_flag = '-kkv'
  195     full_name = self.rcsfile(path_parts, root=1, v=0)
  196     used_rlog = 0
  197     tip_rev = None  # used only if we have to fallback to using rlog
  198     fp = self.rcs_popen('co', (kv_flag, rev_flag, full_name), 'rb') 
  199     try:
  200       filename, revision = _parse_co_header(fp)
  201     except COMissingRevision:
  202       # We got a "revision X.Y.Z absent" error from co.  This could be
  203       # because we were asked to find a tip of a branch, which co
  204       # doesn't seem to handle.  So we do rlog-gy stuff to figure out
  205       # which revision the tip of the branch currently maps to.
  206       ### TODO: Only do this when 'rev' is a branch symbol name?
  207       if not used_rlog:
  208         tip_rev = self._get_tip_revision(full_name + ',v', rev)
  209         used_rlog = 1
  210       if not tip_rev:
  211         raise vclib.Error("Unable to find valid revision")
  212       fp = self.rcs_popen('co', ('-p' + tip_rev.string, full_name), 'rb') 
  213       filename, revision = _parse_co_header(fp)
  214       
  215     if filename is None:
  216       # CVSNT's co exits without any output if a dead revision is requested.
  217       # Bug at http://www.cvsnt.org/cgi-bin/bugzilla/show_bug.cgi?id=190
  218       # As a workaround, we invoke rlog to find the first non-dead revision
  219       # that precedes it and check out that revision instead.  Of course, 
  220       # if we've already invoked rlog above, we just reuse its output.
  221       if not used_rlog:
  222         tip_rev = self._get_tip_revision(full_name + ',v', rev)
  223         used_rlog = 1
  224       if not (tip_rev and tip_rev.undead):
  225         raise vclib.Error(
  226           'Could not find non-dead revision preceding "%s"' % rev)
  227       fp = self.rcs_popen('co', ('-p' + tip_rev.undead.string,
  228                                  full_name), 'rb') 
  229       filename, revision = _parse_co_header(fp)
  230 
  231     if filename is None:
  232       raise vclib.Error('Missing output from co (filename = "%s")' % full_name)
  233 
  234     if not _paths_eq(filename, full_name):
  235       raise vclib.Error(
  236         'The filename from co ("%s") did not match (expected "%s")'
  237         % (filename, full_name))
  238 
  239     return fp, revision
  240 
  241   def dirlogs(self, path_parts, rev, entries, options):
  242     """see vclib.Repository.dirlogs docstring
  243 
  244     rev can be a tag name or None. if set only information from revisions
  245     matching the tag will be retrieved
  246 
  247     Option values recognized by this implementation:
  248 
  249       cvs_subdirs
  250         boolean. true to fetch logs of the most recently modified file in each
  251         subdirectory
  252 
  253     Option values returned by this implementation:
  254 
  255       cvs_tags, cvs_branches
  256         lists of tag and branch names encountered in the directory
  257     """
  258     if self.itemtype(path_parts, rev) != vclib.DIR:  # does auth-check
  259       raise vclib.Error("Path '%s' is not a directory."
  260                         % (_path_join(path_parts)))
  261 
  262     subdirs = options.get('cvs_subdirs', 0)
  263     entries_to_fetch = []
  264     for entry in entries:
  265       if vclib.check_path_access(self, path_parts + [entry.name], None, rev):
  266         entries_to_fetch.append(entry)
  267     alltags = _get_logs(self, path_parts, entries_to_fetch, rev, subdirs)
  268     branches = options['cvs_branches'] = []
  269     tags = options['cvs_tags'] = []
  270     for name, rev in alltags.items():
  271       if Tag(None, rev).is_branch:
  272         branches.append(name)
  273       else:
  274         tags.append(name)
  275 
  276   def itemlog(self, path_parts, rev, sortby, first, limit, options):
  277     """see vclib.Repository.itemlog docstring
  278 
  279     rev parameter can be a revision number, a branch number, a tag name,
  280     or None. If None, will return information about all revisions, otherwise,
  281     will only return information about the specified revision or branch.
  282 
  283     Option values recognized by this implementation:
  284 
  285       cvs_pass_rev
  286         boolean, default false. set to true to pass rev parameter as -r
  287         argument to rlog, this is more efficient but causes less
  288         information to be returned
  289 
  290     Option values returned by this implementation:
  291 
  292       cvs_tags
  293         dictionary of Tag objects for all tags encountered
  294     """
  295 
  296     if self.itemtype(path_parts, rev) != vclib.FILE:  # does auth-check
  297       raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts)))
  298     
  299     # Invoke rlog
  300     rcsfile = self.rcsfile(path_parts, 1)
  301     if rev and options.get('cvs_pass_rev', 0):
  302       args = '-r' + rev, rcsfile
  303     else:
  304       args = rcsfile,
  305 
  306     fp = self.rcs_popen('rlog', args, 'rt', 0)
  307     filename, default_branch, tags, lockinfo, msg, eof = _parse_log_header(fp)
  308 
  309     # Retrieve revision objects
  310     revs = []
  311     while not eof:
  312       revision, eof = _parse_log_entry(fp)
  313       if revision:
  314         revs.append(revision)
  315 
  316     filtered_revs = _file_log(revs, tags, lockinfo, default_branch, rev)
  317 
  318     options['cvs_tags'] = tags
  319     if sortby == vclib.SORTBY_DATE:
  320       filtered_revs.sort(_logsort_date_cmp)
  321     elif sortby == vclib.SORTBY_REV:
  322       filtered_revs.sort(_logsort_rev_cmp)
  323 
  324     if len(filtered_revs) < first:
  325       return []
  326     if limit:
  327       return filtered_revs[first:first+limit]
  328     return filtered_revs
  329 
  330   def rcs_popen(self, rcs_cmd, rcs_args, mode, capture_err=1):
  331     if self.utilities.cvsnt:
  332       cmd = self.utilities.cvsnt
  333       args = ['rcsfile', rcs_cmd]
  334       args.extend(list(rcs_args))
  335     else:
  336       cmd = os.path.join(self.utilities.rcs_dir, rcs_cmd)
  337       args = rcs_args
  338     return popen.popen(cmd, args, mode, capture_err)
  339 
  340   def annotate(self, path_parts, rev=None, include_text=False):
  341     if self.itemtype(path_parts, rev) != vclib.FILE:  # does auth-check
  342       raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts)))
  343                         
  344     from vclib.ccvs import blame
  345     source = blame.BlameSource(self.rcsfile(path_parts, 1), rev, include_text)
  346     return source, source.revision
  347 
  348   def revinfo(self, rev):
  349     raise vclib.UnsupportedFeature
  350   
  351   def rawdiff(self, path_parts1, rev1, path_parts2, rev2, type, options={}):
  352     """see vclib.Repository.rawdiff docstring
  353 
  354     Option values recognized by this implementation:
  355 
  356       ignore_keyword_subst - boolean, ignore keyword substitution
  357     """
  358     if self.itemtype(path_parts1, rev1) != vclib.FILE:  # does auth-check
  359       raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts1)))
  360     if self.itemtype(path_parts2, rev2) != vclib.FILE:  # does auth-check
  361       raise vclib.Error("Path '%s' is not a file." % (_path_join(path_parts2)))
  362     
  363     args = vclib._diff_args(type, options)
  364     if options.get('ignore_keyword_subst', 0):
  365       args.append('-kk')
  366 
  367     rcsfile = self.rcsfile(path_parts1, 1)
  368     if path_parts1 != path_parts2:
  369       raise NotImplementedError, "cannot diff across paths in cvs"
  370     args.extend(['-r' + rev1, '-r' + rev2, rcsfile])
  371     
  372     fp = self.rcs_popen('rcsdiff', args, 'rt')
  373 
  374     # Eat up the non-GNU-diff-y headers.
  375     while 1:
  376       line = fp.readline()
  377       if not line or line[0:5] == 'diff ':
  378         break
  379     return fp
  380   
  381 
  382 class CVSDirEntry(vclib.DirEntry):
  383   def __init__(self, name, kind, errors, in_attic, absent=0):
  384     vclib.DirEntry.__init__(self, name, kind, errors)
  385     self.in_attic = in_attic
  386     self.absent = absent # meaning, no revisions found on requested tag
  387 
  388 class Revision(vclib.Revision):
  389   def __init__(self, revstr, date=None, author=None, dead=None,
  390                changed=None, log=None):
  391     vclib.Revision.__init__(self, _revision_tuple(revstr), revstr,
  392                             date, author, changed, log, None, None)
  393     self.dead = dead
  394 
  395 class Tag:
  396   def __init__(self, name, revstr):
  397     self.name = name
  398     self.number = _tag_tuple(revstr)
  399     self.is_branch = len(self.number) % 2 == 1 or not self.number
  400 
  401 
  402 # ======================================================================
  403 # Functions for dealing with Revision and Tag objects
  404 
  405 def _logsort_date_cmp(rev1, rev2):
  406   # sort on date; secondary on revision number
  407   return -cmp(rev1.date, rev2.date) or -cmp(rev1.number, rev2.number)
  408 
  409 def _logsort_rev_cmp(rev1, rev2):
  410   # sort highest revision first
  411   return -cmp(rev1.number, rev2.number)
  412 
  413 def _match_revs_tags(revlist, taglist):
  414   """Match up a list of Revision objects with a list of Tag objects
  415 
  416   Sets the following properties on each Revision in revlist:
  417     "tags"
  418       list of non-branch tags which refer to this revision
  419       example: if revision is 1.2.3.4, tags is a list of all 1.2.3.4 tags
  420 
  421     "branches"
  422       list of branch tags which refer to this revision's branch
  423       example: if revision is 1.2.3.4, branches is a list of all 1.2.3 tags
  424 
  425     "branch_points"
  426       list of branch tags which branch off of this revision
  427       example: if revision is 1.2, it's a list of tags like 1.2.3 and 1.2.4
  428 
  429     "prev"
  430       reference to the previous revision, possibly None
  431       example: if revision is 1.2.3.4, prev is 1.2.3.3
  432 
  433     "next"
  434       reference to next revision, possibly None
  435       example: if revision is 1.2.3.4, next is 1.2.3.5
  436 
  437     "parent"
  438       reference to revision this one branches off of, possibly None
  439       example: if revision is 1.2.3.4, parent is 1.2
  440 
  441     "undead"
  442       If the revision is dead, then this is a reference to the first 
  443       previous revision which isn't dead, otherwise it's a reference
  444       to itself. If all the previous revisions are dead it's None. 
  445 
  446     "branch_number"
  447       tuple representing branch number or empty tuple if on trunk
  448       example: if revision is 1.2.3.4, branch_number is (1, 2, 3)
  449 
  450   Each tag in taglist gets these properties set:
  451     "co_rev"
  452       reference to revision that would be retrieved if tag were checked out
  453 
  454     "branch_rev"
  455       reference to revision branched off of, only set for branch tags
  456       example: if tag is 1.2.3, branch_rev points to 1.2 revision
  457 
  458     "aliases"
  459       list of tags that have the same number
  460   """
  461 
  462   # map of branch numbers to lists of corresponding branch Tags
  463   branch_dict = {}
  464 
  465   # map of revision numbers to lists of non-branch Tags
  466   tag_dict = {}
  467 
  468   # map of revision numbers to lists of branch Tags
  469   branch_point_dict = {}
  470 
  471   # toss tags into "branch_dict", "tag_dict", and "branch_point_dict"
  472   # set "aliases" property and default "co_rev" and "branch_rev" values
  473   for tag in taglist:
  474     tag.co_rev = None
  475     if tag.is_branch:
  476       tag.branch_rev = None
  477       _dict_list_add(branch_point_dict, tag.number[:-1], tag)
  478       tag.aliases = _dict_list_add(branch_dict, tag.number, tag)
  479     else:
  480       tag.aliases = _dict_list_add(tag_dict, tag.number, tag)
  481 
  482   # sort the revisions so the loop below can work properly
  483   revlist.sort()
  484 
  485   # array of the most recently encountered revision objects indexed by depth
  486   history = []
  487 
  488   # loop through revisions, setting properties and storing state in "history"
  489   for rev in revlist:
  490     depth = len(rev.number) / 2 - 1
  491 
  492     # set "prev" and "next" properties
  493     rev.prev = rev.next = None
  494     if depth < len(history):
  495       prev = history[depth]
  496       if prev and (depth == 0 or rev.number[:-1] == prev.number[:-1]):
  497         rev.prev = prev
  498         prev.next = rev
  499 
  500     # set "parent"
  501     rev.parent = None
  502     if depth and depth <= len(history):
  503       parent = history[depth-1]
  504       if parent and parent.number == rev.number[:-2]:
  505         rev.parent = history[depth-1]
  506 
  507     # set "undead"
  508     if rev.dead:
  509       prev = rev.prev or rev.parent
  510       rev.undead = prev and prev.undead
  511     else:
  512       rev.undead = rev
  513 
  514     # set "tags" and "branch_points"
  515     rev.tags = tag_dict.get(rev.number, [])
  516     rev.branch_points = branch_point_dict.get(rev.number, [])
  517 
  518     # set "branches" and "branch_number"
  519     if rev.prev:
  520       rev.branches = rev.prev.branches
  521       rev.branch_number = rev.prev.branch_number
  522     else:
  523       rev.branch_number = depth and rev.number[:-1] or ()
  524       try:
  525         rev.branches = branch_dict[rev.branch_number]
  526       except KeyError:
  527         rev.branches = []
  528 
  529     # set "co_rev" and "branch_rev"
  530     for tag in rev.tags:
  531       tag.co_rev = rev
  532 
  533     for tag in rev.branch_points:
  534       tag.co_rev = rev
  535       tag.branch_rev = rev
  536 
  537     # This loop only needs to be run for revisions at the heads of branches,
  538     # but for the simplicity's sake, it actually runs for every revision on
  539     # a branch. The later revisions overwrite values set by the earlier ones.
  540     for branch in rev.branches:
  541       branch.co_rev = rev
  542 
  543     # end of outer loop, store most recent revision in "history" array
  544     while len(history) <= depth:
  545       history.append(None)
  546     history[depth] = rev
  547 
  548 def _add_tag(tag_name, revision):
  549   """Create a new tag object and associate it with a revision"""
  550   if revision:
  551     tag = Tag(tag_name, revision.string)
  552     tag.aliases = revision.tags
  553     revision.tags.append(tag)
  554   else:
  555     tag = Tag(tag_name, None)
  556     tag.aliases = []
  557   tag.co_rev = revision
  558   tag.is_branch = 0
  559   return tag
  560 
  561 def _remove_tag(tag):
  562   """Remove a tag's associations"""
  563   tag.aliases.remove(tag)
  564   if tag.is_branch and tag.branch_rev:
  565     tag.branch_rev.branch_points.remove(tag)
  566 
  567 def _revision_tuple(revision_string):
  568   """convert a revision number into a tuple of integers"""
  569   t = tuple(map(int, revision_string.split('.')))
  570   if len(t) % 2 == 0:
  571     return t
  572   raise ValueError
  573 
  574 def _tag_tuple(revision_string):
  575   """convert a revision number or branch number into a tuple of integers"""
  576   if revision_string:
  577     t = map(int, revision_string.split('.'))
  578     l = len(t)
  579     if l == 1:
  580       return ()
  581     if l > 2 and t[-2] == 0 and l % 2 == 0:
  582       del t[-2]
  583     return tuple(t)
  584   return ()
  585 
  586 def _dict_list_add(dict, idx, elem):
  587   try:
  588     list = dict[idx]
  589   except KeyError:
  590     list = dict[idx] = [elem]
  591   else:
  592     list.append(elem)
  593   return list
  594 
  595 
  596 # ======================================================================
  597 # Functions for parsing output from RCS utilities
  598 
  599 
  600 class COMalformedOutput(vclib.Error):
  601   pass
  602 class COMissingRevision(vclib.Error):
  603   pass
  604 
  605 ### suck up other warnings in _re_co_warning?
  606 _re_co_filename = re.compile(r'^(.*),v\s+-->\s+(?:(?:standard output)|(?:stdout))\s*\n?$')
  607 _re_co_warning = re.compile(r'^.*co: .*,v: warning: Unknown phrases like .*\n$')
  608 _re_co_missing_rev = re.compile(r'^.*co: .*,v: revision.*absent\n$')
  609 _re_co_side_branches = re.compile(r'^.*co: .*,v: no side branches present for [\d\.]+\n$')
  610 _re_co_revision = re.compile(r'^revision\s+([\d\.]+)\s*\n$')
  611 
  612 def _parse_co_header(fp):
  613   """Parse RCS co header.
  614 
  615   fp is a file (pipe) opened for reading the co standard error stream.
  616 
  617   Returns: (filename, revision) or (None, None) if output is empty
  618   """
  619 
  620   # header from co:
  621   #
  622   #/home/cvsroot/mod_dav/dav_shared_stub.c,v  -->  standard output
  623   #revision 1.1
  624   #
  625   # Sometimes, the following line might occur at line 2:
  626   #co: INSTALL,v: warning: Unknown phrases like `permissions ...;' are present.
  627 
  628   # parse the output header
  629   filename = None
  630 
  631   # look for a filename in the first line (if there is a first line).
  632   line = fp.readline()
  633   if not line:
  634     return None, None
  635   match = _re_co_filename.match(line)
  636   if not match:
  637     raise COMalformedOutput, "Unable to find filename in co output stream"
  638   filename = match.group(1)
  639 
  640   # look through subsequent lines for a revision.  we might encounter
  641   # some ignorable or problematic lines along the way.
  642   while 1:
  643     line = fp.readline()
  644     if not line:
  645       break
  646     # look for a revision.
  647     match = _re_co_revision.match(line)
  648     if match:
  649       return filename, match.group(1)
  650     elif _re_co_missing_rev.match(line) or _re_co_side_branches.match(line):
  651       raise COMissingRevision, "Got missing revision error from co output stream"
  652     elif _re_co_warning.match(line):
  653       pass
  654     else:
  655       break
  656     
  657   raise COMalformedOutput, "Unable to find revision in co output stream"
  658 
  659 # if your rlog doesn't use 77 '=' characters, then this must change
  660 LOG_END_MARKER = '=' * 77 + '\n'
  661 ENTRY_END_MARKER = '-' * 28 + '\n'
  662 
  663 _EOF_FILE = 'end of file entries'       # no more entries for this RCS file
  664 _EOF_LOG = 'end of log'                 # hit the true EOF on the pipe
  665 _EOF_ERROR = 'error message found'      # rlog issued an error
  666 
  667 # rlog error messages look like
  668 #
  669 #   rlog: filename/goes/here,v: error message
  670 #   rlog: filename/goes/here,v:123: error message
  671 #
  672 # so we should be able to match them with a regex like
  673 #
  674 #   ^rlog\: (.*)(?:\:\d+)?\: (.*)$
  675 #
  676 # But for some reason the windows version of rlog omits the "rlog: " prefix
  677 # for the first error message when the standard error stream has been 
  678 # redirected to a file or pipe. (the prefix is present in subsequent errors
  679 # and when rlog is run from the console). So the expression below is more
  680 # complicated
  681 _re_log_error = re.compile(r'^(?:rlog\: )*(.*,v)(?:\:\d+)?\: (.*)$')
  682 
  683 # CVSNT error messages look like:
  684 # cvs rcsfile: `C:/path/to/file,v' does not appear to be a valid rcs file
  685 # cvs [rcsfile aborted]: C:/path/to/file,v: No such file or directory
  686 # cvs [rcsfile aborted]: cannot open C:/path/to/file,v: Permission denied
  687 _re_cvsnt_error = re.compile(r'^(?:cvs rcsfile\: |cvs \[rcsfile aborted\]: )'
  688                              r'(?:\`(.*,v)\' |cannot open (.*,v)\: |(.*,v)\: |)'
  689                              r'(.*)$')
  690 
  691 def _parse_log_header(fp):
  692   """Parse and RCS/CVS log header.
  693 
  694   fp is a file (pipe) opened for reading the log information.
  695 
  696   On entry, fp should point to the start of a log entry.
  697   On exit, fp will have consumed the separator line between the header and
  698   the first revision log.
  699 
  700   If there is no revision information (e.g. the "-h" switch was passed to
  701   rlog), then fp will consumed the file separator line on exit.
  702 
  703   Returns: filename, default branch, tag dictionary, lock dictionary,
  704   rlog error message, and eof flag
  705   """
  706   
  707   filename = head = branch = msg = ""
  708   taginfo = { }   # tag name => number
  709   lockinfo = { }  # revision => locker
  710   state = 0       # 0 = base, 1 = parsing symbols, 2 = parsing locks
  711   eof = None
  712 
  713   while 1:
  714     line = fp.readline()
  715     if not line:
  716       # the true end-of-file
  717       eof = _EOF_LOG
  718       break
  719 
  720     if state == 1:
  721       if line[0] == '\t':
  722         [ tag, rev ] = map(lambda x: x.strip(), line.split(':'))
  723         taginfo[tag] = rev
  724       else:
  725         # oops. this line isn't tag info. stop parsing tags.
  726         state = 0
  727 
  728     if state == 2:
  729       if line[0] == '\t':
  730         [ locker, rev ] = map(lambda x: x.strip(), line.split(':'))
  731         lockinfo[rev] = locker
  732       else:
  733         # oops. this line isn't lock info. stop parsing tags.
  734         state = 0
  735       
  736     if state == 0:
  737       if line[:9] == 'RCS file:':
  738         filename = line[10:-1]
  739       elif line[:5] == 'head:':
  740         head = line[6:-1]
  741       elif line[:7] == 'branch:':
  742         branch = line[8:-1]
  743       elif line[:6] == 'locks:':
  744         # start parsing the lock information
  745         state = 2
  746       elif line[:14] == 'symbolic names':
  747         # start parsing the tag information
  748         state = 1
  749       elif line == ENTRY_END_MARKER:
  750         # end of the headers
  751         break
  752       elif line == LOG_END_MARKER:
  753         # end of this file's log information
  754         eof = _EOF_FILE
  755         break
  756       else:
  757         error = _re_cvsnt_error.match(line)
  758         if error:
  759           p1, p2, p3, msg = error.groups()
  760           filename = p1 or p2 or p3
  761           if not filename:
  762             raise vclib.Error("Could not get filename from CVSNT error:\n%s"
  763                                % line)
  764           eof = _EOF_ERROR
  765           break
  766 
  767         error = _re_log_error.match(line)
  768         if error:
  769           filename, msg = error.groups()
  770           if msg[:30] == 'warning: Unknown phrases like ':
  771             # don't worry about this warning. it can happen with some RCS
  772             # files that have unknown fields in them (e.g. "permissions 644;"
  773             continue
  774           eof = _EOF_ERROR
  775           break
  776 
  777   return filename, branch, taginfo, lockinfo, msg, eof
  778 
  779 _re_log_info = re.compile(r'^date:\s+([^;]+);'
  780                           r'\s+author:\s+([^;]+);'
  781                           r'\s+state:\s+([^;]+);'
  782                           r'(\s+lines:\s+([0-9\s+-]+);?)?'
  783                           r'(\s+commitid:\s+([a-zA-Z0-9]+))?\n$')
  784 ### _re_rev should be updated to extract the "locked" flag
  785 _re_rev = re.compile(r'^revision\s+([0-9.]+).*')
  786 def _parse_log_entry(fp):
  787   """Parse a single log entry.
  788 
  789   On entry, fp should point to the first line of the entry (the "revision"
  790   line).
  791   On exit, fp will have consumed the log separator line (dashes) or the
  792   end-of-file marker (equals).
  793 
  794   Returns: Revision object and eof flag (see _EOF_*)
  795   """
  796   rev = None
  797   line = fp.readline()
  798   if not line:
  799     return None, _EOF_LOG
  800   if line == LOG_END_MARKER:
  801     # Needed because some versions of RCS precede LOG_END_MARKER
  802     # with ENTRY_END_MARKER
  803     return None, _EOF_FILE
  804   if line[:8] == 'revision':
  805     match = _re_rev.match(line)
  806     if not match:
  807       return None, _EOF_LOG
  808     rev = match.group(1)
  809 
  810     line = fp.readline()
  811     if not line:
  812       return None, _EOF_LOG
  813     match = _re_log_info.match(line)
  814 
  815   eof = None
  816   log = ''
  817   while 1:
  818     line = fp.readline()
  819     if not line:
  820       # true end-of-file
  821       eof = _EOF_LOG
  822       break
  823     if line[:9] == 'branches:':
  824       continue
  825     if line == ENTRY_END_MARKER:
  826       break
  827     if line == LOG_END_MARKER:
  828       # end of this file's log information
  829       eof = _EOF_FILE
  830       break
  831 
  832     log = log + line
  833 
  834   if not rev or not match:
  835     # there was a parsing error
  836     return None, eof
  837 
  838   # parse out a time tuple for the local time
  839   tm = vclib.ccvs.cvs_strptime(match.group(1))
  840 
  841   # rlog seems to assume that two-digit years are 1900-based (so, "04"
  842   # comes out as "1904", not "2004").
  843   EPOCH = 1970
  844   if tm[0] < EPOCH:
  845     tm = list(tm)
  846     if (tm[0] - 1900) < 70:
  847       tm[0] = tm[0] + 100
  848     if tm[0] < EPOCH:
  849       raise ValueError, 'invalid year'
  850   date = calendar.timegm(tm)
  851 
  852   return Revision(rev, date,
  853                   # author, state, lines changed
  854                   match.group(2), match.group(3) == "dead", match.group(5),
  855                   log), eof
  856 
  857 def _skip_file(fp):
  858   "Skip the rest of a file's log information."
  859   while 1:
  860     line = fp.readline()
  861     if not line:
  862       break
  863     if line == LOG_END_MARKER:
  864       break
  865 
  866 def _paths_eq(path1, path2):
  867   "See if two path strings are the same"
  868   # This function is neccessary because CVSNT (since version 2.0.29)
  869   # converts paths passed as arguments to use upper case drive
  870   # letter and forward slashes
  871   return os.path.normcase(path1) == os.path.normcase(path2)
  872 
  873 
  874 # ======================================================================
  875 # Functions for interpreting and manipulating log information
  876 
  877 def _file_log(revs, taginfo, lockinfo, cur_branch, filter):
  878   """Augment list of Revisions and a dictionary of Tags"""
  879 
  880   # Add artificial ViewVC tag MAIN. If the file has a default branch, then
  881   # MAIN acts like a branch tag pointing to that branch. Otherwise MAIN acts
  882   # like a branch tag that points to the trunk. (Note: A default branch is
  883   # just a branch number specified in an RCS file that tells CVS and RCS
  884   # what branch to use for checkout and update operations by default, when
  885   # there's no revision argument or sticky branch to override it. Default
  886   # branches get set by "cvs import" to point to newly created vendor
  887   # branches. Sometimes they are also set manually with "cvs admin -b")
  888   taginfo['MAIN'] = cur_branch
  889 
  890   # Create tag objects
  891   for name, num in taginfo.items():
  892     taginfo[name] = Tag(name, num)
  893   tags = taginfo.values()
  894 
  895   # Set view_tag to a Tag object in order to filter results. We can filter by
  896   # revision number or branch number
  897   if filter:
  898     try:
  899       view_tag = Tag(None, filter)
  900     except ValueError:
  901       view_tag = None
  902     else:
  903       tags.append(view_tag)  
  904 
  905   # Match up tags and revisions
  906   _match_revs_tags(revs, tags)
  907 
  908   # Match up lockinfo and revision
  909   for rev in revs:
  910     rev.lockinfo = lockinfo.get(rev.string)
  911       
  912   # Add artificial ViewVC tag HEAD, which acts like a non-branch tag pointing
  913   # at the latest revision on the MAIN branch. The HEAD revision doesn't have
  914   # anything to do with the "head" revision number specified in the RCS file
  915   # and in rlog output. HEAD refers to the revision that the CVS and RCS co
  916   # commands will check out by default, whereas the "head" field just refers
  917   # to the highest revision on the trunk.  
  918   taginfo['HEAD'] = _add_tag('HEAD', taginfo['MAIN'].co_rev)
  919 
  920   # Determine what revisions to return
  921   if filter:
  922     # If view_tag isn't set, it means filter is not a valid revision or
  923     # branch number. Check taginfo to see if filter is set to a valid tag
  924     # name. If so, filter by that tag, otherwise raise an error.
  925     if not view_tag:
  926       try:
  927         view_tag = taginfo[filter]
  928       except KeyError:
  929         raise vclib.Error('Invalid tag or revision number "%s"' % filter)
  930     filtered_revs = [ ]
  931 
  932     # only include revisions on the tag branch or it's parent branches
  933     if view_tag.is_branch:
  934       branch = view_tag.number
  935     elif len(view_tag.number) > 2:
  936       branch = view_tag.number[:-1]
  937     else:
  938       branch = ()
  939 
  940     # for a normal tag, include all tag revision and all preceding revisions.
  941     # for a branch tag, include revisions on branch, branch point revision,
  942     # and all preceding revisions
  943     for rev in revs:
  944       if (rev.number == view_tag.number
  945           or rev.branch_number == view_tag.number
  946           or (rev.number < view_tag.number
  947               and rev.branch_number == branch[:len(rev.branch_number)])):
  948         filtered_revs.append(rev)
  949 
  950     # get rid of the view_tag if it was only created for filtering
  951     if view_tag.name is None:
  952       _remove_tag(view_tag)
  953   else:
  954     filtered_revs = revs
  955   
  956   return filtered_revs
  957 
  958 def _get_logs(repos, dir_path_parts, entries, view_tag, get_dirs):
  959   alltags = {           # all the tags seen in the files of this dir
  960     'MAIN' : '',
  961     'HEAD' : '1.1'
  962     }
  963 
  964   entries_idx = 0
  965   entries_len = len(entries)
  966   max_args = 100
  967 
  968   while 1:
  969     chunk = []
  970 
  971     while len(chunk) < max_args and entries_idx < entries_len:
  972       entry = entries[entries_idx]
  973       path = _log_path(entry, repos._getpath(dir_path_parts), get_dirs)
  974       if path:
  975         entry.path = path
  976         entry.idx = entries_idx
  977         chunk.append(entry)
  978 
  979       # set properties even if we don't retrieve logs
  980       entry.rev = entry.date = entry.author = None
  981       entry.dead = entry.log = entry.lockinfo = None
  982 
  983       entries_idx = entries_idx + 1
  984 
  985     if not chunk:
  986       return alltags
  987 
  988     args = []
  989     if not view_tag:
  990       # NOTE: can't pass tag on command line since a tag may contain "-"
  991       #       we'll search the output for the appropriate revision
  992       # fetch the latest revision on the default branch
  993       args.append('-r')
  994     args.extend(map(lambda x: x.path, chunk))
  995     rlog = repos.rcs_popen('rlog', args, 'rt')
  996 
  997     # consume each file found in the resulting log
  998     chunk_idx = 0
  999     while chunk_idx < len(chunk):
 1000       file = chunk[chunk_idx]
 1001       filename, default_branch, taginfo, lockinfo, msg, eof \
 1002         = _parse_log_header(rlog)
 1003 
 1004       if eof == _EOF_LOG:
 1005         # the rlog output ended early. this can happen on errors that rlog 
 1006         # thinks are so serious that it stops parsing the current file and
 1007         # refuses to parse any of the files that come after it. one of the
 1008         # errors that triggers this obnoxious behavior looks like:
 1009         #
 1010         # rlog: c:\cvsroot\dir\file,v:8: unknown expand mode u
 1011         # rlog aborted
 1012 
 1013         # if current file has errors, restart on the next one
 1014         if file.errors:
 1015           chunk_idx = chunk_idx + 1
 1016           if chunk_idx < len(chunk):
 1017             entries_idx = chunk[chunk_idx].idx
 1018           break
 1019 
 1020         # otherwise just error out
 1021         raise vclib.Error('Rlog output ended early. Expected RCS file "%s"'
 1022                           % file.path)
 1023 
 1024       # if rlog filename doesn't match current file and we already have an
 1025       # error message about this file, move on to the next file
 1026       while not (file and _paths_eq(file.path, filename)):
 1027         if file and file.errors:
 1028           chunk_idx = chunk_idx + 1
 1029           file = chunk_idx < len(chunk) and chunk[chunk_idx] or None
 1030           continue
 1031 
 1032         raise vclib.Error('Error parsing rlog output. Expected RCS file %s'
 1033                           ', found %s' % (file and file.path, filename))
 1034 
 1035       # if we get an rlog error message, restart loop without advancing
 1036       # chunk_idx cause there might be more output about the same file
 1037       if eof == _EOF_ERROR:
 1038         file.errors.append("rlog error: %s" % msg)
 1039         continue
 1040 
 1041       tag = None
 1042       if view_tag == 'MAIN' or view_tag == 'HEAD':
 1043         tag = Tag(None, default_branch)
 1044       elif taginfo.has_key(view_tag):
 1045         tag = Tag(None, taginfo[view_tag])
 1046       elif view_tag and (eof != _EOF_FILE):
 1047         # the tag wasn't found, so skip this file (unless we already
 1048         # know there's nothing left of it to read)
 1049         _skip_file(rlog)
 1050         eof = _EOF_FILE
 1051 
 1052       # we don't care about the specific values -- just the keys and whether
 1053       # the values point to branches or revisions. this the fastest way to 
 1054       # merge the set of keys and keep values that allow us to make the 
 1055       # distinction between branch tags and normal tags
 1056       alltags.update(taginfo)
 1057 
 1058       # read all of the log entries until we find the revision we want
 1059       wanted_entry = None
 1060       while not eof:
 1061 
 1062         # fetch one of the log entries
 1063         entry, eof = _parse_log_entry(rlog)
 1064 
 1065         if not entry:
 1066           # parsing error
 1067           break
 1068 
 1069         # A perfect match is a revision on the branch being viewed or
 1070         # a revision having the tag being viewed or any revision
 1071         # when nothing is being viewed. When there's a perfect match
 1072         # we set the wanted_entry value and break out of the loop.
 1073         # An imperfect match is a revision at the branch point of a
 1074         # branch being viewed. When there's an imperfect match we
 1075         # also set the wanted_entry value but keep looping in case
 1076         # something better comes along.
 1077         perfect = not tag or entry.number == tag.number or       \
 1078                   (len(entry.number) == 2 and not tag.number) or \
 1079                   entry.number[:-1] == tag.number
 1080         if perfect or entry.number == tag.number[:-1]:
 1081           wanted_entry = entry
 1082           if perfect:
 1083             break
 1084 
 1085       if wanted_entry:
 1086         file.rev = wanted_entry.string
 1087         file.date = wanted_entry.date
 1088         file.author = wanted_entry.author
 1089         file.dead = file.kind == vclib.FILE and wanted_entry.dead
 1090         file.absent = 0
 1091         file.log = wanted_entry.log
 1092         file.lockinfo = lockinfo.get(file.rev)
 1093         # suppress rlog errors if we find a usable revision in the end
 1094         del file.errors[:]
 1095       elif file.kind == vclib.FILE:
 1096         file.dead = 0
 1097         #file.errors.append("No revisions exist on %s" % (view_tag or "MAIN"))
 1098         file.absent = 1
 1099         
 1100       # done with this file now, skip the rest of this file's revisions
 1101       if not eof:
 1102         _skip_file(rlog)
 1103 
 1104       # end of while loop, advance index
 1105       chunk_idx = chunk_idx + 1
 1106 
 1107     rlog.close()
 1108 
 1109 def _log_path(entry, dirpath, getdirs):
 1110   path = name = None
 1111   if not entry.errors:
 1112     if entry.kind == vclib.FILE:
 1113       path = entry.in_attic and 'Attic' or ''
 1114       name = entry.name
 1115     elif entry.kind == vclib.DIR and getdirs:
 1116       entry.newest_file = _newest_file(os.path.join(dirpath, entry.name))
 1117       if entry.newest_file:
 1118         path = entry.name
 1119         name = entry.newest_file
 1120 
 1121   if name:
 1122     return os.path.join(dirpath, path, name + ',v')
 1123   return None
 1124 
 1125 
 1126 # ======================================================================
 1127 # Functions for dealing with the filesystem
 1128 
 1129 if sys.platform == "win32":
 1130   def _check_path(path):
 1131     kind = None
 1132     errors = []
 1133 
 1134     if os.path.isfile(path):
 1135       kind = vclib.FILE
 1136     elif os.path.isdir(path):
 1137       kind = vclib.DIR
 1138     else:
 1139       errors.append("error: path is not a file or directory")
 1140 
 1141     if not os.access(path, os.R_OK):
 1142       errors.append("error: path is not accessible")
 1143 
 1144     return kind, errors
 1145 
 1146 else:
 1147   _uid = os.getuid()
 1148   _gid = os.getgid()
 1149 
 1150   def _check_path(pathname):
 1151     try:
 1152       info = os.stat(pathname)
 1153     except os.error, e:
 1154       return None, ["stat error: %s" % e]
 1155 
 1156     kind = None
 1157     errors = []
 1158 
 1159     mode = info[stat.ST_MODE]
 1160     isdir = stat.S_ISDIR(mode)
 1161     isreg = stat.S_ISREG(mode)
 1162     if isreg or isdir:
 1163       #
 1164       # Quick version of access() where we use existing stat() data.
 1165       #
 1166       # This might not be perfect -- the OS may return slightly different
 1167       # results for some bizarre reason. However, we make a good show of
 1168       # "can I read this file/dir?" by checking the various perm bits.
 1169       #
 1170       # NOTE: if the UID matches, then we must match the user bits -- we
 1171       # cannot defer to group or other bits. Similarly, if the GID matches,
 1172       # then we must have read access in the group bits.
 1173       #
 1174       # If the UID or GID don't match, we need to check the
 1175       # results of an os.access() call, in case the web server process
 1176       # is in the group that owns the directory.
 1177       #
 1178       if isdir:
 1179         mask = stat.S_IROTH | stat.S_IXOTH
 1180       else:
 1181         mask = stat.S_IROTH
 1182 
 1183       if info[stat.ST_UID] == _uid:
 1184         if ((mode >> 6) & mask) != mask:
 1185           errors.append("error: path is not accessible to user %i" % _uid)
 1186       elif info[stat.ST_GID] == _gid:
 1187         if ((mode >> 3) & mask) != mask:
 1188           errors.append("error: path is not accessible to group %i" % _gid)
 1189       # If the process running the web server is a member of
 1190       # the group stat.ST_GID access may be granted.
 1191       # so the fall back to os.access is needed to figure this out.
 1192       elif (mode & mask) != mask:
 1193         if not os.access(pathname, isdir and (os.R_OK | os.X_OK) or os.R_OK):
 1194           errors.append("error: path is not accessible")
 1195 
 1196       if isdir:
 1197         kind = vclib.DIR
 1198       else:
 1199         kind = vclib.FILE
 1200 
 1201     else:
 1202       errors.append("error: path is not a file or directory")
 1203 
 1204     return kind, errors
 1205 
 1206 def _newest_file(dirpath):
 1207   """Find the last modified RCS file in a directory"""
 1208   newest_file = None
 1209   newest_time = 0
 1210 
 1211   ### FIXME:  This sucker is leaking unauthorized paths! ###
 1212   
 1213   for subfile in os.listdir(dirpath):
 1214     ### filter CVS locks? stale NFS handles?
 1215     if subfile[-2:] != ',v':
 1216       continue
 1217     path = os.path.join(dirpath, subfile)
 1218     info = os.stat(path)
 1219     if not stat.S_ISREG(info[stat.ST_MODE]):
 1220       continue
 1221     if info[stat.ST_MTIME] > newest_time:
 1222       kind, verboten = _check_path(path)
 1223       if kind == vclib.FILE and not verboten:
 1224         newest_file = subfile[:-2]
 1225         newest_time = info[stat.ST_MTIME]
 1226 
 1227   return newest_file