"Fossies" - the Fresh Open Source Software Archive  

Source code changes of the file "numpy/core/src/multiarray/item_selection.c" between
numpy-1.21.0.tar.gz and numpy-1.21.1.tar.gz

About: NumPy is the fundamental package for scientific computing with Python.

item_selection.c  (numpy-1.21.0):item_selection.c  (numpy-1.21.1)
skipping to change at line 2122 skipping to change at line 2122
count += (c[i] != 0); count += (c[i] != 0);
} }
return count; return count;
} }
return r; return r;
} }
#if NPY_SIMD #if NPY_SIMD
/* Count the zero bytes between `*d` and `end`, updating `*d` to point to where to keep counting from. */ /* Count the zero bytes between `*d` and `end`, updating `*d` to point to where to keep counting from. */
static NPY_INLINE NPY_GCC_OPT_3 npyv_u8 NPY_FINLINE NPY_GCC_OPT_3 npyv_u8
count_zero_bytes_u8(const npy_uint8 **d, const npy_uint8 *end, npy_uint8 max_cou nt) count_zero_bytes_u8(const npy_uint8 **d, const npy_uint8 *end, npy_uint8 max_cou nt)
{ {
const npyv_u8 vone = npyv_setall_u8(1); const npyv_u8 vone = npyv_setall_u8(1);
const npyv_u8 vzero = npyv_zero_u8(); const npyv_u8 vzero = npyv_zero_u8();
npy_intp lane_max = 0; npy_intp lane_max = 0;
npyv_u8 vsum8 = npyv_zero_u8(); npyv_u8 vsum8 = npyv_zero_u8();
while (*d < end && lane_max <= max_count - 1) { while (*d < end && lane_max <= max_count - 1) {
// we count zeros because `cmpeq` cheaper than `cmpneq` for most archs // we count zeros because `cmpeq` cheaper than `cmpneq` for most archs
npyv_u8 vt = npyv_cvt_u8_b8(npyv_cmpeq_u8(npyv_load_u8(*d), vzero)); npyv_u8 vt = npyv_cvt_u8_b8(npyv_cmpeq_u8(npyv_load_u8(*d), vzero));
vt = npyv_and_u8(vt, vone); vt = npyv_and_u8(vt, vone);
vsum8 = npyv_add_u8(vsum8, vt); vsum8 = npyv_add_u8(vsum8, vt);
*d += npyv_nlanes_u8; *d += npyv_nlanes_u8;
lane_max += 1; lane_max += 1;
} }
return vsum8; return vsum8;
} }
static NPY_INLINE NPY_GCC_OPT_3 npyv_u16x2 NPY_FINLINE NPY_GCC_OPT_3 npyv_u16x2
count_zero_bytes_u16(const npy_uint8 **d, const npy_uint8 *end, npy_uint16 max_c ount) count_zero_bytes_u16(const npy_uint8 **d, const npy_uint8 *end, npy_uint16 max_c ount)
{ {
npyv_u16x2 vsum16; npyv_u16x2 vsum16;
vsum16.val[0] = vsum16.val[1] = npyv_zero_u16(); vsum16.val[0] = vsum16.val[1] = npyv_zero_u16();
npy_intp lane_max = 0; npy_intp lane_max = 0;
while (*d < end && lane_max <= max_count - NPY_MAX_UINT8) { while (*d < end && lane_max <= max_count - NPY_MAX_UINT8) {
npyv_u8 vsum8 = count_zero_bytes_u8(d, end, NPY_MAX_UINT8); npyv_u8 vsum8 = count_zero_bytes_u8(d, end, NPY_MAX_UINT8);
npyv_u16x2 part = npyv_expand_u16_u8(vsum8); npyv_u16x2 part = npyv_expand_u16_u8(vsum8);
vsum16.val[0] = npyv_add_u16(vsum16.val[0], part.val[0]); vsum16.val[0] = npyv_add_u16(vsum16.val[0], part.val[0]);
vsum16.val[1] = npyv_add_u16(vsum16.val[1], part.val[1]); vsum16.val[1] = npyv_add_u16(vsum16.val[1], part.val[1]);
 End of changes. 2 change blocks. 
2 lines changed or deleted 2 lines changed or added

Home  |  About  |  Features  |  All  |  Newest  |  Dox  |  Diffs  |  RSS Feeds  |  Screenshots  |  Comments  |  Imprint  |  Privacy  |  HTTP(S)