"Fossies" - the Fresh Open Source Software Archive

Member "codespell-1.17.1/codespell_lib/tests/test_dictionary.py" (22 May 2020, 10356 Bytes) of package /linux/misc/codespell-1.17.1.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file. See also the latest Fossies "Diffs" side-by-side code changes report for "test_dictionary.py": 1.16.0_vs_1.17.1.

    1 # -*- coding: utf-8 -*-
    2 
    3 import glob
    4 import os.path as op
    5 import os
    6 import re
    7 import warnings
    8 
    9 import pytest
   10 
   11 from codespell_lib._codespell import _builtin_dictionaries
   12 
   13 try:
   14     import aspell
   15     speller = aspell.Speller('lang', 'en')
   16 except Exception as exp:  # probably ImportError, but maybe also language
   17     speller = None
   18     if os.getenv('REQUIRE_ASPELL', 'false').lower() == 'true':
   19         raise RuntimeError(
   20             'Cannot run complete tests without aspell when '
   21             'REQUIRE_ASPELL=true. Got error during import:\n%s'
   22             % (exp,))
   23     else:
   24         warnings.warn(
   25             'aspell not found, but not required, skipping aspell tests. Got '
   26             'error during import:\n%s' % (exp,))
   27 
   28 ws = re.compile(r'.*\s.*')  # whitespace
   29 comma = re.compile(r'.*,.*')  # comma
   30 
   31 global_err_dicts = dict()
   32 global_pairs = set()
   33 
   34 # Filename, should be seen as errors in aspell or not
   35 _data_dir = op.join(op.dirname(__file__), '..', 'data')
   36 _fnames_in_aspell = [
   37     (op.join(_data_dir, 'dictionary%s.txt' % d[2]), d[3:5])
   38     for d in _builtin_dictionaries]
   39 fname_params = pytest.mark.parametrize('fname, in_aspell', _fnames_in_aspell)
   40 
   41 
   42 def test_dictionaries_exist():
   43     """Test consistency of dictionaries."""
   44     doc_fnames = set(op.basename(f[0]) for f in _fnames_in_aspell)
   45     got_fnames = set(op.basename(f)
   46                      for f in glob.glob(op.join(_data_dir, '*.txt')))
   47     assert doc_fnames == got_fnames
   48 
   49 
   50 @fname_params
   51 def test_dictionary_formatting(fname, in_aspell):
   52     """Test that all dictionary entries are valid."""
   53     errors = list()
   54     with open(fname, 'rb') as fid:
   55         for line in fid:
   56             err, rep = line.decode('utf-8').split('->')
   57             err = err.lower()
   58             rep = rep.rstrip('\n')
   59             try:
   60                 _check_err_rep(err, rep, in_aspell, fname)
   61             except AssertionError as exp:
   62                 errors.append(str(exp).split('\n')[0])
   63     if len(errors):
   64         raise AssertionError('\n' + '\n'.join(errors))
   65 
   66 
   67 def _check_aspell(phrase, msg, in_aspell, fname):
   68     if speller is None:
   69         return  # cannot check
   70     if in_aspell is None:
   71         return  # don't check
   72     if ' ' in phrase:
   73         for word in phrase.split():
   74             _check_aspell(word, msg, in_aspell, fname)
   75         return  # stop normal checking as we've done each word above
   76     this_in_aspell = speller.check(
   77         phrase.encode(speller.ConfigKeys()['encoding'][1]))
   78     end = 'be in aspell for dictionary %s' % (fname,)
   79     if in_aspell:  # should be an error in aspell
   80         assert this_in_aspell, '%s should %s' % (msg, end)
   81     else:  # shouldn't be
   82         assert not this_in_aspell, '%s should not %s' % (msg, end)
   83 
   84 
   85 def _check_err_rep(err, rep, in_aspell, fname):
   86     assert ws.match(err) is None, 'error %r has whitespace' % err
   87     assert comma.match(err) is None, 'error %r has a comma' % err
   88     assert len(rep) > 0, ('error %s: correction %r must be non-empty'
   89                           % (err, rep))
   90     assert not re.match(r'^\s.*', rep), ('error %s: correction %r '
   91                                          'cannot start with whitespace'
   92                                          % (err, rep))
   93     _check_aspell(err, 'error %r' % (err,), in_aspell[0], fname)
   94     prefix = 'error %s: correction %r' % (err, rep)
   95     for (r, msg) in [
   96             (r'^,',
   97              '%s starts with a comma'),
   98             (r'\s,',
   99              '%s contains a whitespace character followed by a comma'),
  100             (r',\s\s',
  101              '%s contains a comma followed by multiple whitespace characters'),
  102             (r',[^ ]',
  103              '%s contains a comma *not* followed by a space'),
  104             (r'\s+$',
  105              '%s has a trailing space'),
  106             (r'^[^,]*,\s*$',
  107              '%s has a single entry but contains a trailing comma')]:
  108         assert not re.search(r, rep), (msg % (prefix,))
  109     del msg
  110     if rep.count(','):
  111         assert rep.endswith(','), ('error %s: multiple corrections must end '
  112                                    'with trailing ","' % (err,))
  113     reps = [r.strip() for r in rep.lower().split(',')]
  114     reps = [r for r in reps if len(r)]
  115     for r in reps:
  116         assert err != r.lower(), ('error %r corrects to itself amongst others'
  117                                   % (err,))
  118         _check_aspell(
  119             r, 'error %s: correction %r' % (err, r), in_aspell[1], fname)
  120     assert len(set(reps)) == len(reps), ('error %s: corrections "%s" are not '
  121                                          '(lower-case) unique' % (err, rep))
  122 
  123 
  124 @pytest.mark.parametrize('err, rep, match', [
  125     ('a a', 'bar', 'has whitespace'),
  126     ('a,a', 'bar', 'has a comma'),
  127     ('a', '', 'non-empty'),
  128     ('a', ' bar', 'start with whitespace'),
  129     ('a', ',bar', 'starts with a comma'),
  130     ('a', 'bar,bat', '.*not.*followed by a space'),
  131     ('a', 'bar ', 'trailing space'),
  132     ('a', 'b ,ar', 'contains a whitespace.*followed by a comma'),
  133     ('a', 'bar,', 'single entry.*comma'),
  134     ('a', 'bar, bat', 'must end with trailing ","'),
  135     ('a', 'a, bar,', 'corrects to itself amongst others'),
  136     ('a', 'a', 'corrects to itself'),
  137     ('a', 'bar, bar,', 'unique'),
  138 ])
  139 def test_error_checking(err, rep, match):
  140     """Test that our error checking works."""
  141     with pytest.raises(AssertionError, match=match):
  142         _check_err_rep(err, rep, (None, None), 'dummy')
  143 
  144 
  145 @pytest.mark.skipif(speller is None, reason='requires aspell')
  146 @pytest.mark.parametrize('err, rep, err_aspell, rep_aspell, match', [
  147     # This doesn't raise any exceptions, so skip for now:
  148     # pytest.param('a', 'uvw, bar,', None, None, 'should be in aspell'),
  149     ('abcdef', 'uvwxyz, bar,', True, None, 'should be in aspell'),
  150     ('a', 'uvwxyz, bar,', False, None, 'should not be in aspell'),
  151     ('a', 'abcdef, uvwxyz,', None, True, 'should be in aspell'),
  152     ('abcdef', 'uvwxyz, bar,', True, True, 'should be in aspell'),
  153     ('abcdef', 'uvwxyz, bar,', False, True, 'should be in aspell'),
  154     ('a', 'bar, back,', None, False, 'should not be in aspell'),
  155     ('abcdef', 'ghijkl, uvwxyz,', True, False, 'should be in aspell'),
  156     ('abcdef', 'uvwxyz, bar,', False, False, 'should not be in aspell'),
  157     # Multi-word corrections
  158     # One multi-word, both parts
  159     ('a', 'abcdef uvwxyz', None, True, 'should be in aspell'),
  160     ('a', 'bar back', None, False, 'should not be in aspell'),
  161     # Second multi-word, both parts
  162     ('a', 'bar back, abcdef uvwxyz, bar,', None, True, 'should be in aspell'),
  163     ('a', 'abcdef uvwxyz, bar back, ghijkl,', None, False, 'should not be in aspell'),  # noqa: E501
  164     # One multi-word, second part
  165     ('a', 'bar abcdef', None, True, 'should be in aspell'),
  166     ('a', 'abcdef back', None, False, 'should not be in aspell'),
  167 ])
  168 def test_error_checking_in_aspell(err, rep, err_aspell, rep_aspell, match):
  169     """Test that our error checking works with aspell."""
  170     with pytest.raises(AssertionError, match=match):
  171         _check_err_rep(err, rep, (err_aspell, rep_aspell), 'dummy')
  172 
  173 
  174 # allow some duplicates, like "m-i-n-i-m-i-s-e", or "c-a-l-c-u-l-a-t-a-b-l-e"
  175 allowed_dups = {
  176     ('dictionary.txt', 'dictionary_en-GB_to_en-US.txt'),
  177     ('dictionary.txt', 'dictionary_rare.txt'),
  178 }
  179 
  180 
  181 @fname_params
  182 @pytest.mark.dependency(name='dictionary loop')
  183 def test_dictionary_looping(fname, in_aspell):
  184     """Test that all dictionary entries are valid."""
  185     this_err_dict = dict()
  186     short_fname = op.basename(fname)
  187     with open(fname, 'rb') as fid:
  188         for line in fid:
  189             err, rep = line.decode('utf-8').split('->')
  190             err = err.lower()
  191             assert err not in this_err_dict, \
  192                 'error %r already exists in %s' % (err, short_fname)
  193             rep = rep.rstrip('\n')
  194             reps = [r.strip() for r in rep.lower().split(',')]
  195             reps = [r for r in reps if len(r)]
  196             this_err_dict[err] = reps
  197     # 1. check the dict against itself (diagonal)
  198     for err in this_err_dict:
  199         for r in this_err_dict[err]:
  200             assert r not in this_err_dict, \
  201                 ('error %s: correction %s is an error itself in the same '
  202                  'dictionary file %s' % (err, r, short_fname))
  203     pair = (short_fname, short_fname)
  204     assert pair not in global_pairs
  205     global_pairs.add(pair)
  206     for other_fname, other_err_dict in global_err_dicts.items():
  207         # error duplication (eventually maybe we should just merge?)
  208         for err in this_err_dict:
  209             assert err not in other_err_dict, \
  210                 ('error %r in dictionary %s already exists in dictionary '
  211                  '%s' % (err, short_fname, other_fname))
  212         # 2. check corrections in this dict against other dicts (upper)
  213         pair = (short_fname, other_fname)
  214         if pair not in allowed_dups:
  215             for err in this_err_dict:
  216                 assert err not in other_err_dict, \
  217                     ('error %r in dictionary %s already exists in dictionary '
  218                      '%s' % (err, short_fname, other_fname))
  219                 for r in this_err_dict[err]:
  220                     assert r not in other_err_dict, \
  221                         ('error %s: correction %s from dictionary %s is an '
  222                          'error itself in dictionary %s'
  223                          % (err, r, short_fname, other_fname))
  224         assert pair not in global_pairs
  225         global_pairs.add(pair)
  226         # 3. check corrections in other dicts against this dict (lower)
  227         pair = (other_fname, short_fname)
  228         if pair not in allowed_dups:
  229             for err in other_err_dict:
  230                 for r in other_err_dict[err]:
  231                     assert r not in this_err_dict, \
  232                         ('error %s: correction %s from dictionary %s is an '
  233                          'error itself in dictionary %s'
  234                          % (err, r, other_fname, short_fname))
  235         assert pair not in global_pairs
  236         global_pairs.add(pair)
  237     global_err_dicts[short_fname] = this_err_dict
  238 
  239 
  240 @pytest.mark.dependency(depends=['dictionary loop'])
  241 def test_ran_all():
  242     """Test that all pairwise tests ran."""
  243     for f1, _ in _fnames_in_aspell:
  244         f1 = op.basename(f1)
  245         for f2, _ in _fnames_in_aspell:
  246             f2 = op.basename(f2)
  247             assert (f1, f2) in global_pairs
  248     assert len(global_pairs) == len(_fnames_in_aspell) ** 2