"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "codespell_lib/tests/test_dictionary.py" between
codespell-1.17.1.tar.gz and codespell-2.0.0.tar.gz

About: codespell checks source code and other text files for common misspellings.

test_dictionary.py  (codespell-1.17.1):test_dictionary.py  (codespell-2.0.0)
skipping to change at line 12 skipping to change at line 12
import glob import glob
import os.path as op import os.path as op
import os import os
import re import re
import warnings import warnings
import pytest import pytest
from codespell_lib._codespell import _builtin_dictionaries from codespell_lib._codespell import _builtin_dictionaries
from codespell_lib._codespell import supported_languages
spellers = dict()
try: try:
import aspell import aspell
speller = aspell.Speller('lang', 'en') for lang in supported_languages:
spellers[lang] = aspell.Speller('lang', lang)
except Exception as exp: # probably ImportError, but maybe also language except Exception as exp: # probably ImportError, but maybe also language
speller = None
if os.getenv('REQUIRE_ASPELL', 'false').lower() == 'true': if os.getenv('REQUIRE_ASPELL', 'false').lower() == 'true':
raise RuntimeError( raise RuntimeError(
'Cannot run complete tests without aspell when ' 'Cannot run complete tests without aspell when '
'REQUIRE_ASPELL=true. Got error during import:\n%s' 'REQUIRE_ASPELL=true. Got error during import:\n%s'
% (exp,)) % (exp,))
else: else:
warnings.warn( warnings.warn(
'aspell not found, but not required, skipping aspell tests. Got ' 'aspell not found, but not required, skipping aspell tests. Got '
'error during import:\n%s' % (exp,)) 'error during import:\n%s' % (exp,))
ws = re.compile(r'.*\s.*') # whitespace ws = re.compile(r'.*\s.*') # whitespace
comma = re.compile(r'.*,.*') # comma comma = re.compile(r'.*,.*') # comma
global_err_dicts = dict() global_err_dicts = dict()
global_pairs = set() global_pairs = set()
# Filename, should be seen as errors in aspell or not # Filename, should be seen as errors in aspell or not
_data_dir = op.join(op.dirname(__file__), '..', 'data') _data_dir = op.join(op.dirname(__file__), '..', 'data')
_fnames_in_aspell = [ _fnames_in_aspell = [
(op.join(_data_dir, 'dictionary%s.txt' % d[2]), d[3:5]) (op.join(_data_dir, 'dictionary%s.txt' % d[2]), d[3:5], d[5:7])
for d in _builtin_dictionaries] for d in _builtin_dictionaries]
fname_params = pytest.mark.parametrize('fname, in_aspell', _fnames_in_aspell) fname_params = pytest.mark.parametrize('fname, in_aspell, in_dictionary', _fname s_in_aspell) # noqa: E501
def test_dictionaries_exist(): def test_dictionaries_exist():
"""Test consistency of dictionaries.""" """Test consistency of dictionaries."""
doc_fnames = set(op.basename(f[0]) for f in _fnames_in_aspell) doc_fnames = set(op.basename(f[0]) for f in _fnames_in_aspell)
got_fnames = set(op.basename(f) got_fnames = set(op.basename(f)
for f in glob.glob(op.join(_data_dir, '*.txt'))) for f in glob.glob(op.join(_data_dir, '*.txt')))
assert doc_fnames == got_fnames assert doc_fnames == got_fnames
@fname_params @fname_params
def test_dictionary_formatting(fname, in_aspell): def test_dictionary_formatting(fname, in_aspell, in_dictionary):
"""Test that all dictionary entries are valid.""" """Test that all dictionary entries are valid."""
errors = list() errors = list()
with open(fname, 'rb') as fid: with open(fname, 'rb') as fid:
for line in fid: for line in fid:
err, rep = line.decode('utf-8').split('->') err, rep = line.decode('utf-8').split('->')
err = err.lower() err = err.lower()
rep = rep.rstrip('\n') rep = rep.rstrip('\n')
try: try:
_check_err_rep(err, rep, in_aspell, fname) _check_err_rep(err, rep, in_aspell, fname, in_dictionary)
except AssertionError as exp: except AssertionError as exp:
errors.append(str(exp).split('\n')[0]) errors.append(str(exp).split('\n')[0])
if len(errors): if len(errors):
raise AssertionError('\n' + '\n'.join(errors)) raise AssertionError('\n' + '\n'.join(errors))
def _check_aspell(phrase, msg, in_aspell, fname): def _check_aspell(phrase, msg, in_aspell, fname, languages):
if speller is None: if not spellers: # if no spellcheckers exist
return # cannot check return # cannot check
if in_aspell is None: if in_aspell is None:
return # don't check return # don't check
if ' ' in phrase: if ' ' in phrase:
for word in phrase.split(): for word in phrase.split():
_check_aspell(word, msg, in_aspell, fname) _check_aspell(word, msg, in_aspell, fname, languages)
return # stop normal checking as we've done each word above return # stop normal checking as we've done each word above
this_in_aspell = speller.check( this_in_aspell = any(spellers[lang].check(phrase.encode(
phrase.encode(speller.ConfigKeys()['encoding'][1])) spellers[lang].ConfigKeys()['encoding'][1])) for lang in languages)
end = 'be in aspell for dictionary %s' % (fname,) end = 'be in aspell dictionaries (%s) for dictionary %s' % (
', '.join(languages), fname)
if in_aspell: # should be an error in aspell if in_aspell: # should be an error in aspell
assert this_in_aspell, '%s should %s' % (msg, end) assert this_in_aspell, '%s should %s' % (msg, end)
else: # shouldn't be else: # shouldn't be
assert not this_in_aspell, '%s should not %s' % (msg, end) assert not this_in_aspell, '%s should not %s' % (msg, end)
def _check_err_rep(err, rep, in_aspell, fname): def _check_err_rep(err, rep, in_aspell, fname, languages):
assert ws.match(err) is None, 'error %r has whitespace' % err assert ws.match(err) is None, 'error %r has whitespace' % err
assert comma.match(err) is None, 'error %r has a comma' % err assert comma.match(err) is None, 'error %r has a comma' % err
assert len(rep) > 0, ('error %s: correction %r must be non-empty' assert len(rep) > 0, ('error %s: correction %r must be non-empty'
% (err, rep)) % (err, rep))
assert not re.match(r'^\s.*', rep), ('error %s: correction %r ' assert not re.match(r'^\s.*', rep), ('error %s: correction %r '
'cannot start with whitespace' 'cannot start with whitespace'
% (err, rep)) % (err, rep))
_check_aspell(err, 'error %r' % (err,), in_aspell[0], fname) _check_aspell(err, 'error %r' % (err,), in_aspell[0], fname, languages[0])
prefix = 'error %s: correction %r' % (err, rep) prefix = 'error %s: correction %r' % (err, rep)
for (r, msg) in [ for (r, msg) in [
(r'^,', (r'^,',
'%s starts with a comma'), '%s starts with a comma'),
(r'\s,', (r'\s,',
'%s contains a whitespace character followed by a comma'), '%s contains a whitespace character followed by a comma'),
(r',\s\s', (r',\s\s',
'%s contains a comma followed by multiple whitespace characters'), '%s contains a comma followed by multiple whitespace characters'),
(r',[^ ]', (r',[^ ]',
'%s contains a comma *not* followed by a space'), '%s contains a comma *not* followed by a space'),
(r'\s+$', (r'\s+$',
'%s has a trailing space'), '%s has a trailing space'),
(r'^[^,]*,\s*$', (r'^[^,]*,\s*$',
'%s has a single entry but contains a trailing comma')]: '%s has a single entry but contains a trailing comma')]:
assert not re.search(r, rep), (msg % (prefix,)) assert not re.search(r, rep), (msg % (prefix,))
del msg del msg
if rep.count(','): if rep.count(','):
assert rep.endswith(','), ('error %s: multiple corrections must end ' assert rep.endswith(','), ('error %s: multiple corrections must end '
'with trailing ","' % (err,)) 'with trailing ","' % (err,))
reps = [r.strip() for r in rep.lower().split(',')] reps = [r.strip() for r in rep.split(',')]
reps = [r for r in reps if len(r)] reps = [r for r in reps if len(r)]
for r in reps: for r in reps:
assert err != r.lower(), ('error %r corrects to itself amongst others' assert err != r.lower(), ('error %r corrects to itself amongst others'
% (err,)) % (err,))
_check_aspell( _check_aspell(
r, 'error %s: correction %r' % (err, r), in_aspell[1], fname) r, 'error %s: correction %r' % (err, r),
in_aspell[1], fname, languages[1])
# aspell dictionary is case sensitive, so pass the original case into there
# we could ignore the case, but that would miss things like days of the
# week which we want to be correct
reps = [r.lower() for r in reps]
assert len(set(reps)) == len(reps), ('error %s: corrections "%s" are not ' assert len(set(reps)) == len(reps), ('error %s: corrections "%s" are not '
'(lower-case) unique' % (err, rep)) '(lower-case) unique' % (err, rep))
@pytest.mark.parametrize('err, rep, match', [ @pytest.mark.parametrize('err, rep, match', [
('a a', 'bar', 'has whitespace'), ('a a', 'bar', 'has whitespace'),
('a,a', 'bar', 'has a comma'), ('a,a', 'bar', 'has a comma'),
('a', '', 'non-empty'), ('a', '', 'non-empty'),
('a', ' bar', 'start with whitespace'), ('a', ' bar', 'start with whitespace'),
('a', ',bar', 'starts with a comma'), ('a', ',bar', 'starts with a comma'),
('a', 'bar,bat', '.*not.*followed by a space'), ('a', 'bar,bat', '.*not.*followed by a space'),
('a', 'bar ', 'trailing space'), ('a', 'bar ', 'trailing space'),
('a', 'b ,ar', 'contains a whitespace.*followed by a comma'), ('a', 'b ,ar', 'contains a whitespace.*followed by a comma'),
('a', 'bar,', 'single entry.*comma'), ('a', 'bar,', 'single entry.*comma'),
('a', 'bar, bat', 'must end with trailing ","'), ('a', 'bar, bat', 'must end with trailing ","'),
('a', 'a, bar,', 'corrects to itself amongst others'), ('a', 'a, bar,', 'corrects to itself amongst others'),
('a', 'a', 'corrects to itself'), ('a', 'a', 'corrects to itself'),
('a', 'bar, bar,', 'unique'), ('a', 'bar, Bar,', 'unique'),
]) ])
def test_error_checking(err, rep, match): def test_error_checking(err, rep, match):
"""Test that our error checking works.""" """Test that our error checking works."""
with pytest.raises(AssertionError, match=match): with pytest.raises(AssertionError, match=match):
_check_err_rep(err, rep, (None, None), 'dummy') _check_err_rep(err, rep, (None, None), 'dummy',
(supported_languages, supported_languages))
@pytest.mark.skipif(speller is None, reason='requires aspell') @pytest.mark.skipif(not spellers, reason='requires aspell-en')
@pytest.mark.parametrize('err, rep, err_aspell, rep_aspell, match', [ @pytest.mark.parametrize('err, rep, err_aspell, rep_aspell, match', [
# This doesn't raise any exceptions, so skip for now: # This doesn't raise any exceptions, so skip for now:
# pytest.param('a', 'uvw, bar,', None, None, 'should be in aspell'), # pytest.param('a', 'uvw, bar,', None, None, 'should be in aspell'),
('abcdef', 'uvwxyz, bar,', True, None, 'should be in aspell'), ('abcdef', 'uvwxyz, bar,', True, None, 'should be in aspell'),
('a', 'uvwxyz, bar,', False, None, 'should not be in aspell'), ('a', 'uvwxyz, bar,', False, None, 'should not be in aspell'),
('a', 'abcdef, uvwxyz,', None, True, 'should be in aspell'), ('a', 'abcdef, uvwxyz,', None, True, 'should be in aspell'),
('abcdef', 'uvwxyz, bar,', True, True, 'should be in aspell'), ('abcdef', 'uvwxyz, bar,', True, True, 'should be in aspell'),
('abcdef', 'uvwxyz, bar,', False, True, 'should be in aspell'), ('abcdef', 'uvwxyz, bar,', False, True, 'should be in aspell'),
('a', 'bar, back,', None, False, 'should not be in aspell'), ('a', 'bar, back,', None, False, 'should not be in aspell'),
('a', 'bar, back, Wednesday,', None, False, 'should not be in aspell'),
('abcdef', 'ghijkl, uvwxyz,', True, False, 'should be in aspell'), ('abcdef', 'ghijkl, uvwxyz,', True, False, 'should be in aspell'),
('abcdef', 'uvwxyz, bar,', False, False, 'should not be in aspell'), ('abcdef', 'uvwxyz, bar,', False, False, 'should not be in aspell'),
# Multi-word corrections # Multi-word corrections
# One multi-word, both parts # One multi-word, both parts
('a', 'abcdef uvwxyz', None, True, 'should be in aspell'), ('a', 'abcdef uvwxyz', None, True, 'should be in aspell'),
('a', 'bar back', None, False, 'should not be in aspell'), ('a', 'bar back', None, False, 'should not be in aspell'),
('a', 'bar back Wednesday', None, False, 'should not be in aspell'),
# Second multi-word, both parts # Second multi-word, both parts
('a', 'bar back, abcdef uvwxyz, bar,', None, True, 'should be in aspell'), ('a', 'bar back, abcdef uvwxyz, bar,', None, True, 'should be in aspell'),
('a', 'abcdef uvwxyz, bar back, ghijkl,', None, False, 'should not be in asp ell'), # noqa: E501 ('a', 'abcdef uvwxyz, bar back, ghijkl,', None, False, 'should not be in asp ell'), # noqa: E501
# One multi-word, second part # One multi-word, second part
('a', 'bar abcdef', None, True, 'should be in aspell'), ('a', 'bar abcdef', None, True, 'should be in aspell'),
('a', 'abcdef back', None, False, 'should not be in aspell'), ('a', 'abcdef back', None, False, 'should not be in aspell'),
]) ])
def test_error_checking_in_aspell(err, rep, err_aspell, rep_aspell, match): def test_error_checking_in_aspell(err, rep, err_aspell, rep_aspell, match):
"""Test that our error checking works with aspell.""" """Test that our error checking works with aspell."""
with pytest.raises(AssertionError, match=match): with pytest.raises(AssertionError, match=match):
_check_err_rep(err, rep, (err_aspell, rep_aspell), 'dummy') _check_err_rep(
err, rep, (err_aspell, rep_aspell), 'dummy',
(supported_languages, supported_languages))
# allow some duplicates, like "m-i-n-i-m-i-s-e", or "c-a-l-c-u-l-a-t-a-b-l-e" # allow some duplicates, like "m-i-n-i-m-i-s-e", or "c-a-l-c-u-l-a-t-a-b-l-e"
allowed_dups = { allowed_dups = {
('dictionary.txt', 'dictionary_en-GB_to_en-US.txt'), ('dictionary.txt', 'dictionary_en-GB_to_en-US.txt'),
('dictionary.txt', 'dictionary_rare.txt'), ('dictionary.txt', 'dictionary_rare.txt'),
('dictionary.txt', 'dictionary_usage.txt'),
('dictionary_rare.txt', 'dictionary_usage.txt'),
} }
@fname_params @fname_params
@pytest.mark.dependency(name='dictionary loop') @pytest.mark.dependency(name='dictionary loop')
def test_dictionary_looping(fname, in_aspell): def test_dictionary_looping(fname, in_aspell, in_dictionary):
"""Test that all dictionary entries are valid.""" """Test that all dictionary entries are valid."""
this_err_dict = dict() this_err_dict = dict()
short_fname = op.basename(fname) short_fname = op.basename(fname)
with open(fname, 'rb') as fid: with open(fname, 'rb') as fid:
for line in fid: for line in fid:
err, rep = line.decode('utf-8').split('->') err, rep = line.decode('utf-8').split('->')
err = err.lower() err = err.lower()
assert err not in this_err_dict, \ assert err not in this_err_dict, \
'error %r already exists in %s' % (err, short_fname) 'error %r already exists in %s' % (err, short_fname)
rep = rep.rstrip('\n') rep = rep.rstrip('\n')
skipping to change at line 234 skipping to change at line 251
('error %s: correction %s from dictionary %s is an ' ('error %s: correction %s from dictionary %s is an '
'error itself in dictionary %s' 'error itself in dictionary %s'
% (err, r, other_fname, short_fname)) % (err, r, other_fname, short_fname))
assert pair not in global_pairs assert pair not in global_pairs
global_pairs.add(pair) global_pairs.add(pair)
global_err_dicts[short_fname] = this_err_dict global_err_dicts[short_fname] = this_err_dict
@pytest.mark.dependency(depends=['dictionary loop']) @pytest.mark.dependency(depends=['dictionary loop'])
def test_ran_all(): def test_ran_all():
"""Test that all pairwise tests ran.""" """Test that all pairwise tests ran."""
for f1, _ in _fnames_in_aspell: for f1, _, _ in _fnames_in_aspell:
f1 = op.basename(f1) f1 = op.basename(f1)
for f2, _ in _fnames_in_aspell: for f2, _, _ in _fnames_in_aspell:
f2 = op.basename(f2) f2 = op.basename(f2)
assert (f1, f2) in global_pairs assert (f1, f2) in global_pairs
assert len(global_pairs) == len(_fnames_in_aspell) ** 2 assert len(global_pairs) == len(_fnames_in_aspell) ** 2
 End of changes. 24 change blocks. 
23 lines changed or deleted 40 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)