int maybe 16bits in which case shifting left by 16 is UB. cast to uint32_t before doing the shift to avoid such issues. --- src/string/memmem.c | 8 ++++---- src/string/strstr.c | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/string/memmem.c b/src/string/memmem.c index 11eff86e..4d19a922 100644 --- a/src/string/memmem.c +++ b/src/string/memmem.c @@ -12,8 +12,8 @@ static char *twobyte_memmem(const unsigned char *h, size_t k, const unsigned cha static char *threebyte_memmem(const unsigned char *h, size_t k, const unsigned char *n) { - uint32_t nw = (uint32_t)n[0]<<24 | n[1]<<16 | n[2]<<8; - uint32_t hw = (uint32_t)h[0]<<24 | h[1]<<16 | h[2]<<8; + uint32_t nw = (uint32_t)n[0]<<24 | (uint32_t)n[1]<<16 | n[2]<<8; + uint32_t hw = (uint32_t)h[0]<<24 | (uint32_t)h[1]<<16 | h[2]<<8; for (h+=3, k-=3; k; k--, hw = (hw|*h++)<<8) if (hw == nw) return (char *)h-3; return hw == nw ? (char *)h-3 : 0; @@ -21,8 +21,8 @@ static char *threebyte_memmem(const unsigned char *h, size_t k, const unsigned c static char *fourbyte_memmem(const unsigned char *h, size_t k, const unsigned char *n) { - uint32_t nw = (uint32_t)n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3]; - uint32_t hw = (uint32_t)h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3]; + uint32_t nw = (uint32_t)n[0]<<24 | (uint32_t)n[1]<<16 | n[2]<<8 | n[3]; + uint32_t hw = (uint32_t)h[0]<<24 | (uint32_t)h[1]<<16 | h[2]<<8 | h[3]; for (h+=4, k-=4; k; k--, hw = hw<<8 | *h++) if (hw == nw) return (char *)h-4; return hw == nw ? (char *)h-4 : 0; diff --git a/src/string/strstr.c b/src/string/strstr.c index 96657bc2..a68c1adb 100644 --- a/src/string/strstr.c +++ b/src/string/strstr.c @@ -10,16 +10,16 @@ static char *twobyte_strstr(const unsigned char *h, const unsigned char *n) static char *threebyte_strstr(const unsigned char *h, const unsigned char *n) { - uint32_t nw = (uint32_t)n[0]<<24 | n[1]<<16 | n[2]<<8; - uint32_t hw = (uint32_t)h[0]<<24 | h[1]<<16 | h[2]<<8; + uint32_t nw = (uint32_t)n[0]<<24 | (uint32_t)n[1]<<16 | n[2]<<8; + uint32_t hw = (uint32_t)h[0]<<24 | (uint32_t)h[1]<<16 | h[2]<<8; for (h+=2; *h && hw != nw; hw = (hw|*++h)<<8); return *h ? (char *)h-2 : 0; } static char *fourbyte_strstr(const unsigned char *h, const unsigned char *n) { - uint32_t nw = (uint32_t)n[0]<<24 | n[1]<<16 | n[2]<<8 | n[3]; - uint32_t hw = (uint32_t)h[0]<<24 | h[1]<<16 | h[2]<<8 | h[3]; + uint32_t nw = (uint32_t)n[0]<<24 | (uint32_t)n[1]<<16 | n[2]<<8 | n[3]; + uint32_t hw = (uint32_t)h[0]<<24 | (uint32_t)h[1]<<16 | h[2]<<8 | h[3]; for (h+=3; *h && hw != nw; hw = hw<<8 | *++h); return *h ? (char *)h-3 : 0; } -- 2.35.1
> On 4 Jun 2022, at 19:26, NRK <nrk@disroot.org> wrote:
>
> + uint32_t nw = (uint32_t)n[0]<<24 | (uint32_t)n[1]<<16 | n[2]<<8;
> + uint32_t hw = (uint32_t)h[0]<<24 | (uint32_t)h[1]<<16 | h[2]<<8;
If it were a goal to support 16-bit ints in musl, then your patch would still have UB by shifting a 1 into the sign bit with {h,n}[2]<<8, which in C is a form of signed arithmetic overflow (the C++ standard makes a special case for this situation but the C standard doesn't).
However, I do not think it is a goal to support narrower that 32-bit ints in musl, and the original code is free of UB in these conditions.
On Sat, Jun 04, 2022 at 06:04:15PM +0000, Pascal Cuoq wrote:
>
> > On 4 Jun 2022, at 19:26, NRK <nrk@disroot.org> wrote:
> >
> > + uint32_t nw = (uint32_t)n[0]<<24 | (uint32_t)n[1]<<16 | n[2]<<8;
> > + uint32_t hw = (uint32_t)h[0]<<24 | (uint32_t)h[1]<<16 | h[2]<<8;
>
> If it were a goal to support 16-bit ints in musl, then your patch
> would still have UB by shifting a 1 into the sign bit with
> {h,n}[2]<<8, which in C is a form of signed arithmetic overflow (the
> C++ standard makes a special case for this situation but the C
> standard doesn't).
>
> However, I do not think it is a goal to support narrower that 32-bit
> ints in musl, and the original code is free of UB in these
> conditions.
Indeed, musl code assumes int is at least 32-bit since it assumes the
class of ABIs it supports.
It's arguable that the source files that are "pure library" code that
don't have anything to do with being part of a unified implementation
code base could/should be written with even fewer assumptions, but
there's a lot of subtle pain in environments where default promotions
don't do what you expect, and I don't think it's a good use of time to
try to maintain that in a good state. If you're really targeting some
tiny 8bit microcontroller or whatever, you don't want the code that's
in musl; you want either even more naive implementations of these
functions for minimal code size, or hand written asm.
Rich
On Sat, Jun 04, 2022 at 02:16:36PM -0400, Rich Felker wrote: > Indeed, musl code assumes int is at least 32-bit since it assumes the > class of ABIs it supports. Sorry, wasn't aware of that. On Sat, Jun 04, 2022 at 06:04:15PM +0000, Pascal Cuoq wrote: > If it were a goal to support 16-bit ints in musl, then your patch > would still have UB by shifting a 1 into the sign bit with > {h,n}[2]<<8, which in C is a form of signed arithmetic overflow (the > C++ standard makes a special case for this situation but the C > standard doesn't). That makes sense. I misunderstood what the first cast was doing and thought it was protection against int being 16bits. Thanks for the replies and sorry for the trouble! - NRK