402 lines
13 KiB
Python
402 lines
13 KiB
Python
from __future__ import division, absolute_import, print_function
|
|
|
|
import sys
|
|
|
|
import numpy as np
|
|
from numpy.compat import unicode
|
|
from numpy.testing import (
|
|
run_module_suite, assert_, assert_equal, assert_array_equal)
|
|
|
|
# Guess the UCS length for this python interpreter
|
|
if sys.version_info[:2] >= (3, 3):
|
|
# Python 3.3 uses a flexible string representation
|
|
ucs4 = False
|
|
|
|
def buffer_length(arr):
|
|
if isinstance(arr, unicode):
|
|
arr = str(arr)
|
|
if not arr:
|
|
charmax = 0
|
|
else:
|
|
charmax = max([ord(c) for c in arr])
|
|
if charmax < 256:
|
|
size = 1
|
|
elif charmax < 65536:
|
|
size = 2
|
|
else:
|
|
size = 4
|
|
return size * len(arr)
|
|
v = memoryview(arr)
|
|
if v.shape is None:
|
|
return len(v) * v.itemsize
|
|
else:
|
|
return np.prod(v.shape) * v.itemsize
|
|
else:
|
|
if len(buffer(u'u')) == 4:
|
|
ucs4 = True
|
|
else:
|
|
ucs4 = False
|
|
|
|
def buffer_length(arr):
|
|
if isinstance(arr, np.ndarray):
|
|
return len(arr.data)
|
|
return len(buffer(arr))
|
|
|
|
# In both cases below we need to make sure that the byte swapped value (as
|
|
# UCS4) is still a valid unicode:
|
|
# Value that can be represented in UCS2 interpreters
|
|
ucs2_value = u'\u0900'
|
|
# Value that cannot be represented in UCS2 interpreters (but can in UCS4)
|
|
ucs4_value = u'\U00100900'
|
|
|
|
|
|
def test_string_cast():
|
|
str_arr = np.array(["1234", "1234\0\0"], dtype='S')
|
|
uni_arr1 = str_arr.astype('>U')
|
|
uni_arr2 = str_arr.astype('<U')
|
|
|
|
if sys.version_info[0] < 3:
|
|
assert_array_equal(str_arr, uni_arr1)
|
|
assert_array_equal(str_arr, uni_arr2)
|
|
else:
|
|
assert_(str_arr != uni_arr1)
|
|
assert_(str_arr != uni_arr2)
|
|
assert_array_equal(uni_arr1, uni_arr2)
|
|
|
|
|
|
############################################################
|
|
# Creation tests
|
|
############################################################
|
|
|
|
class CreateZeros(object):
|
|
"""Check the creation of zero-valued arrays"""
|
|
|
|
def content_check(self, ua, ua_scalar, nbytes):
|
|
|
|
# Check the length of the unicode base type
|
|
assert_(int(ua.dtype.str[2:]) == self.ulen)
|
|
# Check the length of the data buffer
|
|
assert_(buffer_length(ua) == nbytes)
|
|
# Small check that data in array element is ok
|
|
assert_(ua_scalar == u'')
|
|
# Encode to ascii and double check
|
|
assert_(ua_scalar.encode('ascii') == b'')
|
|
# Check buffer lengths for scalars
|
|
if ucs4:
|
|
assert_(buffer_length(ua_scalar) == 0)
|
|
else:
|
|
assert_(buffer_length(ua_scalar) == 0)
|
|
|
|
def test_zeros0D(self):
|
|
# Check creation of 0-dimensional objects
|
|
ua = np.zeros((), dtype='U%s' % self.ulen)
|
|
self.content_check(ua, ua[()], 4*self.ulen)
|
|
|
|
def test_zerosSD(self):
|
|
# Check creation of single-dimensional objects
|
|
ua = np.zeros((2,), dtype='U%s' % self.ulen)
|
|
self.content_check(ua, ua[0], 4*self.ulen*2)
|
|
self.content_check(ua, ua[1], 4*self.ulen*2)
|
|
|
|
def test_zerosMD(self):
|
|
# Check creation of multi-dimensional objects
|
|
ua = np.zeros((2, 3, 4), dtype='U%s' % self.ulen)
|
|
self.content_check(ua, ua[0, 0, 0], 4*self.ulen*2*3*4)
|
|
self.content_check(ua, ua[-1, -1, -1], 4*self.ulen*2*3*4)
|
|
|
|
|
|
class TestCreateZeros_1(CreateZeros):
|
|
"""Check the creation of zero-valued arrays (size 1)"""
|
|
ulen = 1
|
|
|
|
|
|
class TestCreateZeros_2(CreateZeros):
|
|
"""Check the creation of zero-valued arrays (size 2)"""
|
|
ulen = 2
|
|
|
|
|
|
class TestCreateZeros_1009(CreateZeros):
|
|
"""Check the creation of zero-valued arrays (size 1009)"""
|
|
ulen = 1009
|
|
|
|
|
|
class CreateValues(object):
|
|
"""Check the creation of unicode arrays with values"""
|
|
|
|
def content_check(self, ua, ua_scalar, nbytes):
|
|
|
|
# Check the length of the unicode base type
|
|
assert_(int(ua.dtype.str[2:]) == self.ulen)
|
|
# Check the length of the data buffer
|
|
assert_(buffer_length(ua) == nbytes)
|
|
# Small check that data in array element is ok
|
|
assert_(ua_scalar == self.ucs_value*self.ulen)
|
|
# Encode to UTF-8 and double check
|
|
assert_(ua_scalar.encode('utf-8') ==
|
|
(self.ucs_value*self.ulen).encode('utf-8'))
|
|
# Check buffer lengths for scalars
|
|
if ucs4:
|
|
assert_(buffer_length(ua_scalar) == 4*self.ulen)
|
|
else:
|
|
if self.ucs_value == ucs4_value:
|
|
# In UCS2, the \U0010FFFF will be represented using a
|
|
# surrogate *pair*
|
|
assert_(buffer_length(ua_scalar) == 2*2*self.ulen)
|
|
else:
|
|
# In UCS2, the \uFFFF will be represented using a
|
|
# regular 2-byte word
|
|
assert_(buffer_length(ua_scalar) == 2*self.ulen)
|
|
|
|
def test_values0D(self):
|
|
# Check creation of 0-dimensional objects with values
|
|
ua = np.array(self.ucs_value*self.ulen, dtype='U%s' % self.ulen)
|
|
self.content_check(ua, ua[()], 4*self.ulen)
|
|
|
|
def test_valuesSD(self):
|
|
# Check creation of single-dimensional objects with values
|
|
ua = np.array([self.ucs_value*self.ulen]*2, dtype='U%s' % self.ulen)
|
|
self.content_check(ua, ua[0], 4*self.ulen*2)
|
|
self.content_check(ua, ua[1], 4*self.ulen*2)
|
|
|
|
def test_valuesMD(self):
|
|
# Check creation of multi-dimensional objects with values
|
|
ua = np.array([[[self.ucs_value*self.ulen]*2]*3]*4, dtype='U%s' % self.ulen)
|
|
self.content_check(ua, ua[0, 0, 0], 4*self.ulen*2*3*4)
|
|
self.content_check(ua, ua[-1, -1, -1], 4*self.ulen*2*3*4)
|
|
|
|
|
|
class TestCreateValues_1_UCS2(CreateValues):
|
|
"""Check the creation of valued arrays (size 1, UCS2 values)"""
|
|
ulen = 1
|
|
ucs_value = ucs2_value
|
|
|
|
|
|
class TestCreateValues_1_UCS4(CreateValues):
|
|
"""Check the creation of valued arrays (size 1, UCS4 values)"""
|
|
ulen = 1
|
|
ucs_value = ucs4_value
|
|
|
|
|
|
class TestCreateValues_2_UCS2(CreateValues):
|
|
"""Check the creation of valued arrays (size 2, UCS2 values)"""
|
|
ulen = 2
|
|
ucs_value = ucs2_value
|
|
|
|
|
|
class TestCreateValues_2_UCS4(CreateValues):
|
|
"""Check the creation of valued arrays (size 2, UCS4 values)"""
|
|
ulen = 2
|
|
ucs_value = ucs4_value
|
|
|
|
|
|
class TestCreateValues_1009_UCS2(CreateValues):
|
|
"""Check the creation of valued arrays (size 1009, UCS2 values)"""
|
|
ulen = 1009
|
|
ucs_value = ucs2_value
|
|
|
|
|
|
class TestCreateValues_1009_UCS4(CreateValues):
|
|
"""Check the creation of valued arrays (size 1009, UCS4 values)"""
|
|
ulen = 1009
|
|
ucs_value = ucs4_value
|
|
|
|
|
|
############################################################
|
|
# Assignment tests
|
|
############################################################
|
|
|
|
class AssignValues(object):
|
|
"""Check the assignment of unicode arrays with values"""
|
|
|
|
def content_check(self, ua, ua_scalar, nbytes):
|
|
|
|
# Check the length of the unicode base type
|
|
assert_(int(ua.dtype.str[2:]) == self.ulen)
|
|
# Check the length of the data buffer
|
|
assert_(buffer_length(ua) == nbytes)
|
|
# Small check that data in array element is ok
|
|
assert_(ua_scalar == self.ucs_value*self.ulen)
|
|
# Encode to UTF-8 and double check
|
|
assert_(ua_scalar.encode('utf-8') ==
|
|
(self.ucs_value*self.ulen).encode('utf-8'))
|
|
# Check buffer lengths for scalars
|
|
if ucs4:
|
|
assert_(buffer_length(ua_scalar) == 4*self.ulen)
|
|
else:
|
|
if self.ucs_value == ucs4_value:
|
|
# In UCS2, the \U0010FFFF will be represented using a
|
|
# surrogate *pair*
|
|
assert_(buffer_length(ua_scalar) == 2*2*self.ulen)
|
|
else:
|
|
# In UCS2, the \uFFFF will be represented using a
|
|
# regular 2-byte word
|
|
assert_(buffer_length(ua_scalar) == 2*self.ulen)
|
|
|
|
def test_values0D(self):
|
|
# Check assignment of 0-dimensional objects with values
|
|
ua = np.zeros((), dtype='U%s' % self.ulen)
|
|
ua[()] = self.ucs_value*self.ulen
|
|
self.content_check(ua, ua[()], 4*self.ulen)
|
|
|
|
def test_valuesSD(self):
|
|
# Check assignment of single-dimensional objects with values
|
|
ua = np.zeros((2,), dtype='U%s' % self.ulen)
|
|
ua[0] = self.ucs_value*self.ulen
|
|
self.content_check(ua, ua[0], 4*self.ulen*2)
|
|
ua[1] = self.ucs_value*self.ulen
|
|
self.content_check(ua, ua[1], 4*self.ulen*2)
|
|
|
|
def test_valuesMD(self):
|
|
# Check assignment of multi-dimensional objects with values
|
|
ua = np.zeros((2, 3, 4), dtype='U%s' % self.ulen)
|
|
ua[0, 0, 0] = self.ucs_value*self.ulen
|
|
self.content_check(ua, ua[0, 0, 0], 4*self.ulen*2*3*4)
|
|
ua[-1, -1, -1] = self.ucs_value*self.ulen
|
|
self.content_check(ua, ua[-1, -1, -1], 4*self.ulen*2*3*4)
|
|
|
|
|
|
class TestAssignValues_1_UCS2(AssignValues):
|
|
"""Check the assignment of valued arrays (size 1, UCS2 values)"""
|
|
ulen = 1
|
|
ucs_value = ucs2_value
|
|
|
|
|
|
class TestAssignValues_1_UCS4(AssignValues):
|
|
"""Check the assignment of valued arrays (size 1, UCS4 values)"""
|
|
ulen = 1
|
|
ucs_value = ucs4_value
|
|
|
|
|
|
class TestAssignValues_2_UCS2(AssignValues):
|
|
"""Check the assignment of valued arrays (size 2, UCS2 values)"""
|
|
ulen = 2
|
|
ucs_value = ucs2_value
|
|
|
|
|
|
class TestAssignValues_2_UCS4(AssignValues):
|
|
"""Check the assignment of valued arrays (size 2, UCS4 values)"""
|
|
ulen = 2
|
|
ucs_value = ucs4_value
|
|
|
|
|
|
class TestAssignValues_1009_UCS2(AssignValues):
|
|
"""Check the assignment of valued arrays (size 1009, UCS2 values)"""
|
|
ulen = 1009
|
|
ucs_value = ucs2_value
|
|
|
|
|
|
class TestAssignValues_1009_UCS4(AssignValues):
|
|
"""Check the assignment of valued arrays (size 1009, UCS4 values)"""
|
|
ulen = 1009
|
|
ucs_value = ucs4_value
|
|
|
|
|
|
############################################################
|
|
# Byteorder tests
|
|
############################################################
|
|
|
|
class ByteorderValues(object):
|
|
"""Check the byteorder of unicode arrays in round-trip conversions"""
|
|
|
|
def test_values0D(self):
|
|
# Check byteorder of 0-dimensional objects
|
|
ua = np.array(self.ucs_value*self.ulen, dtype='U%s' % self.ulen)
|
|
ua2 = ua.newbyteorder()
|
|
# This changes the interpretation of the data region (but not the
|
|
# actual data), therefore the returned scalars are not
|
|
# the same (they are byte-swapped versions of each other).
|
|
assert_(ua[()] != ua2[()])
|
|
ua3 = ua2.newbyteorder()
|
|
# Arrays must be equal after the round-trip
|
|
assert_equal(ua, ua3)
|
|
|
|
def test_valuesSD(self):
|
|
# Check byteorder of single-dimensional objects
|
|
ua = np.array([self.ucs_value*self.ulen]*2, dtype='U%s' % self.ulen)
|
|
ua2 = ua.newbyteorder()
|
|
assert_((ua != ua2).all())
|
|
assert_(ua[-1] != ua2[-1])
|
|
ua3 = ua2.newbyteorder()
|
|
# Arrays must be equal after the round-trip
|
|
assert_equal(ua, ua3)
|
|
|
|
def test_valuesMD(self):
|
|
# Check byteorder of multi-dimensional objects
|
|
ua = np.array([[[self.ucs_value*self.ulen]*2]*3]*4,
|
|
dtype='U%s' % self.ulen)
|
|
ua2 = ua.newbyteorder()
|
|
assert_((ua != ua2).all())
|
|
assert_(ua[-1, -1, -1] != ua2[-1, -1, -1])
|
|
ua3 = ua2.newbyteorder()
|
|
# Arrays must be equal after the round-trip
|
|
assert_equal(ua, ua3)
|
|
|
|
def test_values_cast(self):
|
|
# Check byteorder of when casting the array for a strided and
|
|
# contiguous array:
|
|
test1 = np.array([self.ucs_value*self.ulen]*2, dtype='U%s' % self.ulen)
|
|
test2 = np.repeat(test1, 2)[::2]
|
|
for ua in (test1, test2):
|
|
ua2 = ua.astype(dtype=ua.dtype.newbyteorder())
|
|
assert_((ua == ua2).all())
|
|
assert_(ua[-1] == ua2[-1])
|
|
ua3 = ua2.astype(dtype=ua.dtype)
|
|
# Arrays must be equal after the round-trip
|
|
assert_equal(ua, ua3)
|
|
|
|
def test_values_updowncast(self):
|
|
# Check byteorder of when casting the array to a longer and shorter
|
|
# string length for strided and contiguous arrays
|
|
test1 = np.array([self.ucs_value*self.ulen]*2, dtype='U%s' % self.ulen)
|
|
test2 = np.repeat(test1, 2)[::2]
|
|
for ua in (test1, test2):
|
|
# Cast to a longer type with zero padding
|
|
longer_type = np.dtype('U%s' % (self.ulen+1)).newbyteorder()
|
|
ua2 = ua.astype(dtype=longer_type)
|
|
assert_((ua == ua2).all())
|
|
assert_(ua[-1] == ua2[-1])
|
|
# Cast back again with truncating:
|
|
ua3 = ua2.astype(dtype=ua.dtype)
|
|
# Arrays must be equal after the round-trip
|
|
assert_equal(ua, ua3)
|
|
|
|
|
|
class TestByteorder_1_UCS2(ByteorderValues):
|
|
"""Check the byteorder in unicode (size 1, UCS2 values)"""
|
|
ulen = 1
|
|
ucs_value = ucs2_value
|
|
|
|
|
|
class TestByteorder_1_UCS4(ByteorderValues):
|
|
"""Check the byteorder in unicode (size 1, UCS4 values)"""
|
|
ulen = 1
|
|
ucs_value = ucs4_value
|
|
|
|
|
|
class TestByteorder_2_UCS2(ByteorderValues):
|
|
"""Check the byteorder in unicode (size 2, UCS2 values)"""
|
|
ulen = 2
|
|
ucs_value = ucs2_value
|
|
|
|
|
|
class TestByteorder_2_UCS4(ByteorderValues):
|
|
"""Check the byteorder in unicode (size 2, UCS4 values)"""
|
|
ulen = 2
|
|
ucs_value = ucs4_value
|
|
|
|
|
|
class TestByteorder_1009_UCS2(ByteorderValues):
|
|
"""Check the byteorder in unicode (size 1009, UCS2 values)"""
|
|
ulen = 1009
|
|
ucs_value = ucs2_value
|
|
|
|
|
|
class TestByteorder_1009_UCS4(ByteorderValues):
|
|
"""Check the byteorder in unicode (size 1009, UCS4 values)"""
|
|
ulen = 1009
|
|
ucs_value = ucs4_value
|
|
|
|
|
|
if __name__ == "__main__":
|
|
run_module_suite()
|