rawmemchr.c (bison-3.8.1.tar.xz) | : | rawmemchr.c (bison-3.8.2.tar.xz) | ||
---|---|---|---|---|
skipping to change at line 25 | skipping to change at line 25 | |||
along with this program. If not, see <https://www.gnu.org/licenses/>. */ | along with this program. If not, see <https://www.gnu.org/licenses/>. */ | |||
#include <config.h> | #include <config.h> | |||
/* Specification. */ | /* Specification. */ | |||
#include <string.h> | #include <string.h> | |||
/* A function definition is only needed if HAVE_RAWMEMCHR is not defined. */ | /* A function definition is only needed if HAVE_RAWMEMCHR is not defined. */ | |||
#if !HAVE_RAWMEMCHR | #if !HAVE_RAWMEMCHR | |||
# include <limits.h> | ||||
# include <stdalign.h> | ||||
# include <stdint.h> | ||||
# include "verify.h" | ||||
/* Find the first occurrence of C in S. */ | /* Find the first occurrence of C in S. */ | |||
void * | void * | |||
rawmemchr (const void *s, int c_in) | rawmemchr (const void *s, int c_in) | |||
{ | { | |||
/* On 32-bit hardware, choosing longword to be a 32-bit unsigned | /* Change this typedef to experiment with performance. */ | |||
long instead of a 64-bit uintmax_t tends to give better | typedef uintptr_t longword; | |||
performance. On 64-bit hardware, unsigned long is generally 64 | /* If you change the "uintptr_t", you should change UINTPTR_WIDTH to match. | |||
bits already. Change this typedef to experiment with | This verifies that the type does not have padding bits. */ | |||
performance. */ | verify (UINTPTR_WIDTH == UCHAR_WIDTH * sizeof (longword)); | |||
typedef unsigned long int longword; | ||||
const unsigned char *char_ptr; | const unsigned char *char_ptr; | |||
const longword *longword_ptr; | unsigned char c = c_in; | |||
longword repeated_one; | ||||
longword repeated_c; | ||||
unsigned char c; | ||||
c = (unsigned char) c_in; | ||||
/* Handle the first few bytes by reading one byte at a time. | /* Handle the first few bytes by reading one byte at a time. | |||
Do this until CHAR_PTR is aligned on a longword boundary. */ | Do this until CHAR_PTR is aligned on a longword boundary. */ | |||
for (char_ptr = (const unsigned char *) s; | for (char_ptr = (const unsigned char *) s; | |||
(size_t) char_ptr % sizeof (longword) != 0; | (uintptr_t) char_ptr % alignof (longword) != 0; | |||
++char_ptr) | ++char_ptr) | |||
if (*char_ptr == c) | if (*char_ptr == c) | |||
return (void *) char_ptr; | return (void *) char_ptr; | |||
longword_ptr = (const longword *) char_ptr; | longword const *longword_ptr = s = char_ptr; | |||
/* All these elucidatory comments refer to 4-byte longwords, | ||||
but the theory applies equally well to any size longwords. */ | ||||
/* Compute auxiliary longword values: | /* Compute auxiliary longword values: | |||
repeated_one is a value which has a 1 in every byte. | repeated_one is a value which has a 1 in every byte. | |||
repeated_c has c in every byte. */ | repeated_c has c in every byte. */ | |||
repeated_one = 0x01010101; | longword repeated_one = (longword) -1 / UCHAR_MAX; | |||
repeated_c = c | (c << 8); | longword repeated_c = repeated_one * c; | |||
repeated_c |= repeated_c << 16; | longword repeated_hibit = repeated_one * (UCHAR_MAX / 2 + 1); | |||
if (0xffffffffU < (longword) -1) | ||||
{ | ||||
repeated_one |= repeated_one << 31 << 1; | ||||
repeated_c |= repeated_c << 31 << 1; | ||||
if (8 < sizeof (longword)) | ||||
{ | ||||
size_t i; | ||||
for (i = 64; i < sizeof (longword) * 8; i *= 2) | ||||
{ | ||||
repeated_one |= repeated_one << i; | ||||
repeated_c |= repeated_c << i; | ||||
} | ||||
} | ||||
} | ||||
/* Instead of the traditional loop which tests each byte, we will | /* Instead of the traditional loop which tests each byte, we will | |||
test a longword at a time. The tricky part is testing if *any of | test a longword at a time. The tricky part is testing if any of | |||
the four* bytes in the longword in question are equal to NUL or | the bytes in the longword in question are equal to | |||
c. We first use an xor with repeated_c. This reduces the task | c. We first use an xor with repeated_c. This reduces the task | |||
to testing whether *any of the four* bytes in longword1 is zero. | to testing whether any of the bytes in longword1 is zero. | |||
(The following comments assume 8-bit bytes, as POSIX requires; | ||||
the code's use of UCHAR_MAX should work even if bytes have more | ||||
than 8 bits.) | ||||
We compute tmp = | We compute tmp = | |||
((longword1 - repeated_one) & ~longword1) & (repeated_one << 7). | ((longword1 - repeated_one) & ~longword1) & (repeated_one * 0x80). | |||
That is, we perform the following operations: | That is, we perform the following operations: | |||
1. Subtract repeated_one. | 1. Subtract repeated_one. | |||
2. & ~longword1. | 2. & ~longword1. | |||
3. & a mask consisting of 0x80 in every byte. | 3. & a mask consisting of 0x80 in every byte. | |||
Consider what happens in each byte: | Consider what happens in each byte: | |||
- If a byte of longword1 is zero, step 1 and 2 transform it into 0xff, | - If a byte of longword1 is zero, step 1 and 2 transform it into 0xff, | |||
and step 3 transforms it into 0x80. A carry can also be propagated | and step 3 transforms it into 0x80. A carry can also be propagated | |||
to more significant bytes. | to more significant bytes. | |||
- If a byte of longword1 is nonzero, let its lowest 1 bit be at | - If a byte of longword1 is nonzero, let its lowest 1 bit be at | |||
position k (0 <= k <= 7); so the lowest k bits are 0. After step 1, | position k (0 <= k <= 7); so the lowest k bits are 0. After step 1, | |||
skipping to change at line 120 | skipping to change at line 106 | |||
This test can read beyond the end of a string, depending on where | This test can read beyond the end of a string, depending on where | |||
C_IN is encountered. However, this is considered safe since the | C_IN is encountered. However, this is considered safe since the | |||
initialization phase ensured that the read will be aligned, | initialization phase ensured that the read will be aligned, | |||
therefore, the read will not cross page boundaries and will not | therefore, the read will not cross page boundaries and will not | |||
cause a fault. */ | cause a fault. */ | |||
while (1) | while (1) | |||
{ | { | |||
longword longword1 = *longword_ptr ^ repeated_c; | longword longword1 = *longword_ptr ^ repeated_c; | |||
if ((((longword1 - repeated_one) & ~longword1) | if ((((longword1 - repeated_one) & ~longword1) & repeated_hibit) != 0) | |||
& (repeated_one << 7)) != 0) | ||||
break; | break; | |||
longword_ptr++; | longword_ptr++; | |||
} | } | |||
char_ptr = (const unsigned char *) longword_ptr; | char_ptr = s = longword_ptr; | |||
/* At this point, we know that one of the sizeof (longword) bytes | /* At this point, we know that one of the sizeof (longword) bytes | |||
starting at char_ptr is == c. On little-endian machines, we | starting at char_ptr is == c. If we knew endianness, we | |||
could determine the first such byte without any further memory | could determine the first such byte without any further memory | |||
accesses, just by looking at the tmp result from the last loop | accesses, just by looking at the tmp result from the last loop | |||
iteration. But this does not work on big-endian machines. | iteration. However, the following simple and portable code does | |||
Choose code that works in both cases. */ | not attempt this potential optimization. */ | |||
char_ptr = (unsigned char *) longword_ptr; | ||||
while (*char_ptr != c) | while (*char_ptr != c) | |||
char_ptr++; | char_ptr++; | |||
return (void *) char_ptr; | return (void *) char_ptr; | |||
} | } | |||
#endif | #endif | |||
End of changes. 14 change blocks. | ||||
46 lines changed or deleted | 30 lines changed or added |