"Fossies" - the Fresh Open Source Software Archive

Member "numpy-1.16.4/numpy/core/tests/test_unicode.py" (22 Feb 2019, 13656 Bytes) of package /linux/misc/numpy-1.16.4.tar.gz:


As a special service "Fossies" has tried to format the requested source page into HTML format using (guessed) Python source code syntax highlighting (style: standard) with prefixed line numbers. Alternatively you can here view or download the uninterpreted source code file.

    1 from __future__ import division, absolute_import, print_function
    2 
    3 import sys
    4 
    5 import numpy as np
    6 from numpy.compat import unicode
    7 from numpy.testing import assert_, assert_equal, assert_array_equal
    8 
    9 # Guess the UCS length for this python interpreter
   10 if sys.version_info[:2] >= (3, 3):
   11     # Python 3.3 uses a flexible string representation
   12     ucs4 = False
   13 
   14     def buffer_length(arr):
   15         if isinstance(arr, unicode):
   16             arr = str(arr)
   17             if not arr:
   18                 charmax = 0
   19             else:
   20                 charmax = max([ord(c) for c in arr])
   21             if charmax < 256:
   22                 size = 1
   23             elif charmax < 65536:
   24                 size = 2
   25             else:
   26                 size = 4
   27             return size * len(arr)
   28         v = memoryview(arr)
   29         if v.shape is None:
   30             return len(v) * v.itemsize
   31         else:
   32             return np.prod(v.shape) * v.itemsize
   33 else:
   34     if len(buffer(u'u')) == 4:
   35         ucs4 = True
   36     else:
   37         ucs4 = False
   38 
   39     def buffer_length(arr):
   40         if isinstance(arr, np.ndarray):
   41             return len(arr.data)
   42         return len(buffer(arr))
   43 
   44 # In both cases below we need to make sure that the byte swapped value (as
   45 # UCS4) is still a valid unicode:
   46 # Value that can be represented in UCS2 interpreters
   47 ucs2_value = u'\u0900'
   48 # Value that cannot be represented in UCS2 interpreters (but can in UCS4)
   49 ucs4_value = u'\U00100900'
   50 
   51 
   52 def test_string_cast():
   53     str_arr = np.array(["1234", "1234\0\0"], dtype='S')
   54     uni_arr1 = str_arr.astype('>U')
   55     uni_arr2 = str_arr.astype('<U')
   56 
   57     if sys.version_info[0] < 3:
   58         assert_array_equal(str_arr, uni_arr1)
   59         assert_array_equal(str_arr, uni_arr2)
   60     else:
   61         assert_(str_arr != uni_arr1)
   62         assert_(str_arr != uni_arr2)
   63     assert_array_equal(uni_arr1, uni_arr2)
   64 
   65 
   66 ############################################################
   67 #    Creation tests
   68 ############################################################
   69 
   70 class CreateZeros(object):
   71     """Check the creation of zero-valued arrays"""
   72 
   73     def content_check(self, ua, ua_scalar, nbytes):
   74 
   75         # Check the length of the unicode base type
   76         assert_(int(ua.dtype.str[2:]) == self.ulen)
   77         # Check the length of the data buffer
   78         assert_(buffer_length(ua) == nbytes)
   79         # Small check that data in array element is ok
   80         assert_(ua_scalar == u'')
   81         # Encode to ascii and double check
   82         assert_(ua_scalar.encode('ascii') == b'')
   83         # Check buffer lengths for scalars
   84         if ucs4:
   85             assert_(buffer_length(ua_scalar) == 0)
   86         else:
   87             assert_(buffer_length(ua_scalar) == 0)
   88 
   89     def test_zeros0D(self):
   90         # Check creation of 0-dimensional objects
   91         ua = np.zeros((), dtype='U%s' % self.ulen)
   92         self.content_check(ua, ua[()], 4*self.ulen)
   93 
   94     def test_zerosSD(self):
   95         # Check creation of single-dimensional objects
   96         ua = np.zeros((2,), dtype='U%s' % self.ulen)
   97         self.content_check(ua, ua[0], 4*self.ulen*2)
   98         self.content_check(ua, ua[1], 4*self.ulen*2)
   99 
  100     def test_zerosMD(self):
  101         # Check creation of multi-dimensional objects
  102         ua = np.zeros((2, 3, 4), dtype='U%s' % self.ulen)
  103         self.content_check(ua, ua[0, 0, 0], 4*self.ulen*2*3*4)
  104         self.content_check(ua, ua[-1, -1, -1], 4*self.ulen*2*3*4)
  105 
  106 
  107 class TestCreateZeros_1(CreateZeros):
  108     """Check the creation of zero-valued arrays (size 1)"""
  109     ulen = 1
  110 
  111 
  112 class TestCreateZeros_2(CreateZeros):
  113     """Check the creation of zero-valued arrays (size 2)"""
  114     ulen = 2
  115 
  116 
  117 class TestCreateZeros_1009(CreateZeros):
  118     """Check the creation of zero-valued arrays (size 1009)"""
  119     ulen = 1009
  120 
  121 
  122 class CreateValues(object):
  123     """Check the creation of unicode arrays with values"""
  124 
  125     def content_check(self, ua, ua_scalar, nbytes):
  126 
  127         # Check the length of the unicode base type
  128         assert_(int(ua.dtype.str[2:]) == self.ulen)
  129         # Check the length of the data buffer
  130         assert_(buffer_length(ua) == nbytes)
  131         # Small check that data in array element is ok
  132         assert_(ua_scalar == self.ucs_value*self.ulen)
  133         # Encode to UTF-8 and double check
  134         assert_(ua_scalar.encode('utf-8') ==
  135                         (self.ucs_value*self.ulen).encode('utf-8'))
  136         # Check buffer lengths for scalars
  137         if ucs4:
  138             assert_(buffer_length(ua_scalar) == 4*self.ulen)
  139         else:
  140             if self.ucs_value == ucs4_value:
  141                 # In UCS2, the \U0010FFFF will be represented using a
  142                 # surrogate *pair*
  143                 assert_(buffer_length(ua_scalar) == 2*2*self.ulen)
  144             else:
  145                 # In UCS2, the \uFFFF will be represented using a
  146                 # regular 2-byte word
  147                 assert_(buffer_length(ua_scalar) == 2*self.ulen)
  148 
  149     def test_values0D(self):
  150         # Check creation of 0-dimensional objects with values
  151         ua = np.array(self.ucs_value*self.ulen, dtype='U%s' % self.ulen)
  152         self.content_check(ua, ua[()], 4*self.ulen)
  153 
  154     def test_valuesSD(self):
  155         # Check creation of single-dimensional objects with values
  156         ua = np.array([self.ucs_value*self.ulen]*2, dtype='U%s' % self.ulen)
  157         self.content_check(ua, ua[0], 4*self.ulen*2)
  158         self.content_check(ua, ua[1], 4*self.ulen*2)
  159 
  160     def test_valuesMD(self):
  161         # Check creation of multi-dimensional objects with values
  162         ua = np.array([[[self.ucs_value*self.ulen]*2]*3]*4, dtype='U%s' % self.ulen)
  163         self.content_check(ua, ua[0, 0, 0], 4*self.ulen*2*3*4)
  164         self.content_check(ua, ua[-1, -1, -1], 4*self.ulen*2*3*4)
  165 
  166 
  167 class TestCreateValues_1_UCS2(CreateValues):
  168     """Check the creation of valued arrays (size 1, UCS2 values)"""
  169     ulen = 1
  170     ucs_value = ucs2_value
  171 
  172 
  173 class TestCreateValues_1_UCS4(CreateValues):
  174     """Check the creation of valued arrays (size 1, UCS4 values)"""
  175     ulen = 1
  176     ucs_value = ucs4_value
  177 
  178 
  179 class TestCreateValues_2_UCS2(CreateValues):
  180     """Check the creation of valued arrays (size 2, UCS2 values)"""
  181     ulen = 2
  182     ucs_value = ucs2_value
  183 
  184 
  185 class TestCreateValues_2_UCS4(CreateValues):
  186     """Check the creation of valued arrays (size 2, UCS4 values)"""
  187     ulen = 2
  188     ucs_value = ucs4_value
  189 
  190 
  191 class TestCreateValues_1009_UCS2(CreateValues):
  192     """Check the creation of valued arrays (size 1009, UCS2 values)"""
  193     ulen = 1009
  194     ucs_value = ucs2_value
  195 
  196 
  197 class TestCreateValues_1009_UCS4(CreateValues):
  198     """Check the creation of valued arrays (size 1009, UCS4 values)"""
  199     ulen = 1009
  200     ucs_value = ucs4_value
  201 
  202 
  203 ############################################################
  204 #    Assignment tests
  205 ############################################################
  206 
  207 class AssignValues(object):
  208     """Check the assignment of unicode arrays with values"""
  209 
  210     def content_check(self, ua, ua_scalar, nbytes):
  211 
  212         # Check the length of the unicode base type
  213         assert_(int(ua.dtype.str[2:]) == self.ulen)
  214         # Check the length of the data buffer
  215         assert_(buffer_length(ua) == nbytes)
  216         # Small check that data in array element is ok
  217         assert_(ua_scalar == self.ucs_value*self.ulen)
  218         # Encode to UTF-8 and double check
  219         assert_(ua_scalar.encode('utf-8') ==
  220                         (self.ucs_value*self.ulen).encode('utf-8'))
  221         # Check buffer lengths for scalars
  222         if ucs4:
  223             assert_(buffer_length(ua_scalar) == 4*self.ulen)
  224         else:
  225             if self.ucs_value == ucs4_value:
  226                 # In UCS2, the \U0010FFFF will be represented using a
  227                 # surrogate *pair*
  228                 assert_(buffer_length(ua_scalar) == 2*2*self.ulen)
  229             else:
  230                 # In UCS2, the \uFFFF will be represented using a
  231                 # regular 2-byte word
  232                 assert_(buffer_length(ua_scalar) == 2*self.ulen)
  233 
  234     def test_values0D(self):
  235         # Check assignment of 0-dimensional objects with values
  236         ua = np.zeros((), dtype='U%s' % self.ulen)
  237         ua[()] = self.ucs_value*self.ulen
  238         self.content_check(ua, ua[()], 4*self.ulen)
  239 
  240     def test_valuesSD(self):
  241         # Check assignment of single-dimensional objects with values
  242         ua = np.zeros((2,), dtype='U%s' % self.ulen)
  243         ua[0] = self.ucs_value*self.ulen
  244         self.content_check(ua, ua[0], 4*self.ulen*2)
  245         ua[1] = self.ucs_value*self.ulen
  246         self.content_check(ua, ua[1], 4*self.ulen*2)
  247 
  248     def test_valuesMD(self):
  249         # Check assignment of multi-dimensional objects with values
  250         ua = np.zeros((2, 3, 4), dtype='U%s' % self.ulen)
  251         ua[0, 0, 0] = self.ucs_value*self.ulen
  252         self.content_check(ua, ua[0, 0, 0], 4*self.ulen*2*3*4)
  253         ua[-1, -1, -1] = self.ucs_value*self.ulen
  254         self.content_check(ua, ua[-1, -1, -1], 4*self.ulen*2*3*4)
  255 
  256 
  257 class TestAssignValues_1_UCS2(AssignValues):
  258     """Check the assignment of valued arrays (size 1, UCS2 values)"""
  259     ulen = 1
  260     ucs_value = ucs2_value
  261 
  262 
  263 class TestAssignValues_1_UCS4(AssignValues):
  264     """Check the assignment of valued arrays (size 1, UCS4 values)"""
  265     ulen = 1
  266     ucs_value = ucs4_value
  267 
  268 
  269 class TestAssignValues_2_UCS2(AssignValues):
  270     """Check the assignment of valued arrays (size 2, UCS2 values)"""
  271     ulen = 2
  272     ucs_value = ucs2_value
  273 
  274 
  275 class TestAssignValues_2_UCS4(AssignValues):
  276     """Check the assignment of valued arrays (size 2, UCS4 values)"""
  277     ulen = 2
  278     ucs_value = ucs4_value
  279 
  280 
  281 class TestAssignValues_1009_UCS2(AssignValues):
  282     """Check the assignment of valued arrays (size 1009, UCS2 values)"""
  283     ulen = 1009
  284     ucs_value = ucs2_value
  285 
  286 
  287 class TestAssignValues_1009_UCS4(AssignValues):
  288     """Check the assignment of valued arrays (size 1009, UCS4 values)"""
  289     ulen = 1009
  290     ucs_value = ucs4_value
  291 
  292 
  293 ############################################################
  294 #    Byteorder tests
  295 ############################################################
  296 
  297 class ByteorderValues(object):
  298     """Check the byteorder of unicode arrays in round-trip conversions"""
  299 
  300     def test_values0D(self):
  301         # Check byteorder of 0-dimensional objects
  302         ua = np.array(self.ucs_value*self.ulen, dtype='U%s' % self.ulen)
  303         ua2 = ua.newbyteorder()
  304         # This changes the interpretation of the data region (but not the
  305         #  actual data), therefore the returned scalars are not
  306         #  the same (they are byte-swapped versions of each other).
  307         assert_(ua[()] != ua2[()])
  308         ua3 = ua2.newbyteorder()
  309         # Arrays must be equal after the round-trip
  310         assert_equal(ua, ua3)
  311 
  312     def test_valuesSD(self):
  313         # Check byteorder of single-dimensional objects
  314         ua = np.array([self.ucs_value*self.ulen]*2, dtype='U%s' % self.ulen)
  315         ua2 = ua.newbyteorder()
  316         assert_((ua != ua2).all())
  317         assert_(ua[-1] != ua2[-1])
  318         ua3 = ua2.newbyteorder()
  319         # Arrays must be equal after the round-trip
  320         assert_equal(ua, ua3)
  321 
  322     def test_valuesMD(self):
  323         # Check byteorder of multi-dimensional objects
  324         ua = np.array([[[self.ucs_value*self.ulen]*2]*3]*4,
  325                       dtype='U%s' % self.ulen)
  326         ua2 = ua.newbyteorder()
  327         assert_((ua != ua2).all())
  328         assert_(ua[-1, -1, -1] != ua2[-1, -1, -1])
  329         ua3 = ua2.newbyteorder()
  330         # Arrays must be equal after the round-trip
  331         assert_equal(ua, ua3)
  332 
  333     def test_values_cast(self):
  334         # Check byteorder of when casting the array for a strided and
  335         # contiguous array:
  336         test1 = np.array([self.ucs_value*self.ulen]*2, dtype='U%s' % self.ulen)
  337         test2 = np.repeat(test1, 2)[::2]
  338         for ua in (test1, test2):
  339             ua2 = ua.astype(dtype=ua.dtype.newbyteorder())
  340             assert_((ua == ua2).all())
  341             assert_(ua[-1] == ua2[-1])
  342             ua3 = ua2.astype(dtype=ua.dtype)
  343             # Arrays must be equal after the round-trip
  344             assert_equal(ua, ua3)
  345 
  346     def test_values_updowncast(self):
  347         # Check byteorder of when casting the array to a longer and shorter
  348         # string length for strided and contiguous arrays
  349         test1 = np.array([self.ucs_value*self.ulen]*2, dtype='U%s' % self.ulen)
  350         test2 = np.repeat(test1, 2)[::2]
  351         for ua in (test1, test2):
  352             # Cast to a longer type with zero padding
  353             longer_type = np.dtype('U%s' % (self.ulen+1)).newbyteorder()
  354             ua2 = ua.astype(dtype=longer_type)
  355             assert_((ua == ua2).all())
  356             assert_(ua[-1] == ua2[-1])
  357             # Cast back again with truncating:
  358             ua3 = ua2.astype(dtype=ua.dtype)
  359             # Arrays must be equal after the round-trip
  360             assert_equal(ua, ua3)
  361 
  362 
  363 class TestByteorder_1_UCS2(ByteorderValues):
  364     """Check the byteorder in unicode (size 1, UCS2 values)"""
  365     ulen = 1
  366     ucs_value = ucs2_value
  367 
  368 
  369 class TestByteorder_1_UCS4(ByteorderValues):
  370     """Check the byteorder in unicode (size 1, UCS4 values)"""
  371     ulen = 1
  372     ucs_value = ucs4_value
  373 
  374 
  375 class TestByteorder_2_UCS2(ByteorderValues):
  376     """Check the byteorder in unicode (size 2, UCS2 values)"""
  377     ulen = 2
  378     ucs_value = ucs2_value
  379 
  380 
  381 class TestByteorder_2_UCS4(ByteorderValues):
  382     """Check the byteorder in unicode (size 2, UCS4 values)"""
  383     ulen = 2
  384     ucs_value = ucs4_value
  385 
  386 
  387 class TestByteorder_1009_UCS2(ByteorderValues):
  388     """Check the byteorder in unicode (size 1009, UCS2 values)"""
  389     ulen = 1009
  390     ucs_value = ucs2_value
  391 
  392 
  393 class TestByteorder_1009_UCS4(ByteorderValues):
  394     """Check the byteorder in unicode (size 1009, UCS4 values)"""
  395     ulen = 1009
  396     ucs_value = ucs4_value