mailing list of musl libc
 help / color / mirror / code / Atom feed
* strcasestr.c
@ 2013-02-14 14:59 Todd C. Miller
  2013-02-14 15:23 ` strcasestr.c Rich Felker
  0 siblings, 1 reply; 12+ messages in thread
From: Todd C. Miller @ 2013-02-14 14:59 UTC (permalink / raw)
  To: musl

When investigating using the musl strstr.c ofr OpenBSD I noticed
that musl only has a stub for strcasestr() that calls strstr().  I
was curious whether the twoway algorithm could be adapted to do a
case-insensitive search.  It turned out to be pretty trivial to
just add calls to tolower() in the right places, making sure to
avoid sign extension.

The changes are mostly mechanical.  You might wish to inline
_strcasechr() though the compiler will probably do that for you.

 - todd

#include <string.h>
#include <stdlib.h>
#include <stdint.h>
#include <ctype.h>

#define LOWER(c) ((unsigned char)tolower((c)))

static char *twobyte_strcasestr(const unsigned char *h, const unsigned char *n)
{
	uint16_t nw = LOWER(n[0])<<8 | LOWER(n[1]);
	uint16_t hw = LOWER(h[0])<<8 | LOWER(h[1]);
	for (h++; *h && hw != nw; hw = hw<<8 | LOWER(*++h));
	return *h ? (char *)h-1 : 0;
}

static char *threebyte_strcasestr(const unsigned char *h, const unsigned char *n)
{
	uint32_t nw = LOWER(n[0])<<24 | LOWER(n[1])<<16 | LOWER(n[2])<<8;
	uint32_t hw = LOWER(h[0])<<24 | LOWER(h[1])<<16 | LOWER(h[2])<<8;
	for (h+=2; *h && hw != nw; hw = (hw|LOWER(*++h))<<8);
	return *h ? (char *)h-2 : 0;
}

static char *fourbyte_strcasestr(const unsigned char *h, const unsigned char *n)
{
	uint32_t nw = LOWER(n[0])<<24 | LOWER(n[1])<<16 | LOWER(n[2])<<8 | LOWER(n[3]);
	uint32_t hw = LOWER(h[0])<<24 | LOWER(h[1])<<16 | LOWER(h[2])<<8 | LOWER(h[3]);
	for (h+=3; *h && hw != nw; hw = hw<<8 | LOWER(*++h));
	return *h ? (char *)h-3 : 0;
}

#define MAX(a,b) ((a)>(b)?(a):(b))
#define MIN(a,b) ((a)<(b)?(a):(b))

#define BITOP(a,b,op) \
 ((a)[(size_t)(b)/(8*sizeof *(a))] op (size_t)1<<((size_t)(b)%(8*sizeof *(a))))

static char *twoway_strcasestr(const unsigned char *h, const unsigned char *n)
{
	const unsigned char *z;
	unsigned char l1, l2;
	size_t l, ip, jp, k, p, ms, p0, mem, mem0;
	size_t byteset[32 / sizeof(size_t)] = { 0 };
	size_t shift[256];

	/* Computing length of needle and fill shift table */
	for (l=0; n[l] && h[l]; l++) {
		l1 = LOWER(n[l]);
		BITOP(byteset, l1, |=), shift[l1] = l+1;
	}
	if (n[l]) return 0; /* hit the end of h */

	/* Compute maximal suffix */
	ip = -1; jp = 0; k = p = 1;
	while (jp+k<l) {
		l1 = LOWER(n[ip+k]);
		l2 = LOWER(n[jp+k]);
		if (l1 == l2) {
			if (k == p) {
				jp += p;
				k = 1;
			} else k++;
		} else if (l1 > l2) {
			jp += k;
			k = 1;
			p = jp - ip;
		} else {
			ip = jp++;
			k = p = 1;
		}
	}
	ms = ip;
	p0 = p;

	/* And with the opposite comparison */
	ip = -1; jp = 0; k = p = 1;
	while (jp+k<l) {
		l1 = LOWER(n[ip+k]);
		l2 = LOWER(n[jp+k]);
		if (l1 == l2) {
			if (k == p) {
				jp += p;
				k = 1;
			} else k++;
		} else if (l1 < l2) {
			jp += k;
			k = 1;
			p = jp - ip;
		} else {
			ip = jp++;
			k = p = 1;
		}
	}
	if (ip+1 > ms+1) ms = ip;
	else p = p0;

	/* Periodic needle? */
	for (ip = 0; ip <= ms; ip++) {
		if (LOWER(n[ip]) != LOWER(n[ip + p]))
			break;
	}
	if (ip <= ms) {
		mem0 = 0;
		p = MAX(ms, l-ms-1) + 1;
	} else mem0 = l-p;
	mem = 0;

	/* Initialize incremental end-of-haystack pointer */
	z = h;

	/* Search loop */
	for (;;) {
		/* Update incremental end-of-haystack pointer */
		if (z-h < l) {
			/* Fast estimate for MIN(l,63) */
			size_t grow = l | 63;
			const unsigned char *z2 = memchr(z, 0, grow);
			if (z2) {
				z = z2;
				if (z-h < l) return 0;
			} else z += grow;
		}

		/* Check last byte first; advance by shift on mismatch */
		l1 = LOWER(h[l-1]);
		if (BITOP(byteset, l1, &)) {
			k = l-shift[l1];
#ifdef DEBUG
			printf("adv by %zu (on %c) at [%s] (%zu;l=%zu)\n", k, h[l-1], h, shift[h[l-1]], l);
#endif
			if (k) {
				if (mem0 && mem && k < p) k = l-p;
				h += k;
				mem = 0;
				continue;
			}
		} else {
			h += l;
			mem = 0;
			continue;
		}

		/* Compare right half */
		for (k=MAX(ms+1,mem); n[k] && LOWER(n[k]) == LOWER(h[k]); k++);
		if (n[k]) {
			h += k-ms;
			mem = 0;
			continue;
		}
		/* Compare left half */
		for (k=ms+1; k>mem && LOWER(n[k-1]) == LOWER(h[k-1]); k--);
		if (k == mem) return (char *)h;
		h += p;
		mem = mem0;
	}
}

static char *_strcasechr(const char *s, int c)
{
	for (;;) {
		if (tolower((unsigned char)*s) == tolower((unsigned char)c))
			return (char *)s;
		if (!*s++)
			return NULL;
	}
}

char *strcasestr(const char *h, const char *n)
{
	/* Return immediately on empty needle */
	if (!n[0]) return (char *)h;

	/* Use faster algorithms for short needles */
	h = _strcasechr(h, *n);
	if (!h || !n[1]) return (char *)h;
	if (!h[1]) return 0;
	if (!n[2]) return twobyte_strcasestr((void *)h, (void *)n);
	if (!h[2]) return 0;
	if (!n[3]) return threebyte_strcasestr((void *)h, (void *)n);
	if (!h[3]) return 0;
	if (!n[4]) return fourbyte_strcasestr((void *)h, (void *)n);

	return twoway_strcasestr((void *)h, (void *)n);
}


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: strcasestr.c
  2013-02-14 14:59 strcasestr.c Todd C. Miller
@ 2013-02-14 15:23 ` Rich Felker
  2013-02-17 19:04   ` strcasestr.c Rich Felker
  0 siblings, 1 reply; 12+ messages in thread
From: Rich Felker @ 2013-02-14 15:23 UTC (permalink / raw)
  To: musl

On Thu, Feb 14, 2013 at 09:59:56AM -0500, Todd C. Miller wrote:
> When investigating using the musl strstr.c ofr OpenBSD I noticed
> that musl only has a stub for strcasestr() that calls strstr().  I
> was curious whether the twoway algorithm could be adapted to do a
> case-insensitive search.  It turned out to be pretty trivial to
> just add calls to tolower() in the right places, making sure to
> avoid sign extension.
> 
> The changes are mostly mechanical.  You might wish to inline
> _strcasechr() though the compiler will probably do that for you.

Unfortunately, as far as I can tell making this correct is nontrivial;
your version only works for ascii, not the rest of ucs. I don't see
any easy way to adapt 2way to the case where matching classes contain
characters of different lengths...

Rich


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: strcasestr.c
  2013-02-14 15:23 ` strcasestr.c Rich Felker
@ 2013-02-17 19:04   ` Rich Felker
  2013-02-20 22:28     ` strcasestr.c John Spencer
  0 siblings, 1 reply; 12+ messages in thread
From: Rich Felker @ 2013-02-17 19:04 UTC (permalink / raw)
  To: musl

On Thu, Feb 14, 2013 at 10:23:49AM -0500, Rich Felker wrote:
> On Thu, Feb 14, 2013 at 09:59:56AM -0500, Todd C. Miller wrote:
> > When investigating using the musl strstr.c ofr OpenBSD I noticed
> > that musl only has a stub for strcasestr() that calls strstr().  I
> > was curious whether the twoway algorithm could be adapted to do a
> > case-insensitive search.  It turned out to be pretty trivial to
> > just add calls to tolower() in the right places, making sure to
> > avoid sign extension.
> > 
> > The changes are mostly mechanical.  You might wish to inline
> > _strcasechr() though the compiler will probably do that for you.
> 
> Unfortunately, as far as I can tell making this correct is nontrivial;
> your version only works for ascii, not the rest of ucs. I don't see
> any easy way to adapt 2way to the case where matching classes contain
> characters of different lengths...

To elaborate, since strcasestr is not a standard function with a
rigorous specification, I find it difficult to determine what the
"correct" behavior should be. It's only documented to "ignore the
case". Does this mean it should operate like a literal regex seach
with REG_ICASE? Or should it operate as if by using single-byte
tolower/toupper functions, in which case your code would be correct?

Actually we already have this problem for strcasecmp, even though it's
specified by POSIX. POSIX just says:

    When the LC_CTYPE category of the current locale is from the POSIX
    locale, strcasecmp() and strncasecmp() shall behave as if the
    strings had been converted to lowercase and then a byte comparison
    performed. Otherwise, the results are unspecified.

It seems POSIX clearly allows implementations to do whatever they like
in non-POSIX locale, but since musl is providing just a POSIX locale
for LC_CTYLE, we're under certain obligations. Note that XBD 7.2 POSIX
Locale allows us, as we're doing, to have character properties and
case mappings outside of those specified in POSIX as long as they're
for characters outside the portable character set:

    The tables in Locale Definition describe the characteristics and
    behavior of the POSIX locale for data consisting entirely of
    characters from the portable character set and the control
    character set. For other characters, the behavior is unspecified.
    For C-language programs, the POSIX locale shall be the default
    locale when the setlocale() function is not called.

Thus, it seems like since we have case mappings for all of Unicode,
strcasecmp needs to honor those, and thus the current implementation
is probably non-conforming.

Since strcasestr is nonstandard and not clearly specified, I don't see
any _obligation_ for it to do the same, but perhaps it should. Thus I
think my previous reply to your patch was premature; your code isn't
necessarily wrong, but it's a matter that needs further consideration
to decide what course of action is appropriate.

Comments from anybody else?

Rich


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: strcasestr.c
  2013-02-17 19:04   ` strcasestr.c Rich Felker
@ 2013-02-20 22:28     ` John Spencer
  2013-02-20 23:56       ` strcasestr.c Szabolcs Nagy
  2013-02-21  1:03       ` strcasestr.c Rich Felker
  0 siblings, 2 replies; 12+ messages in thread
From: John Spencer @ 2013-02-20 22:28 UTC (permalink / raw)
  To: musl

On 02/17/2013 08:04 PM, Rich Felker wrote:
> On Thu, Feb 14, 2013 at 10:23:49AM -0500, Rich Felker wrote:
>> On Thu, Feb 14, 2013 at 09:59:56AM -0500, Todd C. Miller wrote:
>>> When investigating using the musl strstr.c ofr OpenBSD I noticed
>>> that musl only has a stub for strcasestr() that calls strstr().

according to git log, this function dates back to the very first musl 
commit ever...

> Since strcasestr is nonstandard and not clearly specified,

it's so non-standard that even nobody uses it.
i looked up the usage of the function in codesearch.debian.net, and the 
only *user* (from all ~20K debian packages) of the function is gnu wget.
every other occurence of the function is from gnulib embedded into other 
packages.
so the current status is: gnulib includes the function, but nobody 
besides gnu wget uses it.

> Comments from anybody else?

given the above findings, we can just leave the function as-is (plus a 
comment that nobody uses it anyway) or remove it entirely.
in any case it doesnt make sense to put much work and especially much 
code into it.
if it's gonna be implemented "correctly" at all, it should be as slim as 
possible, in the order of 3-5 LOC.



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: strcasestr.c
  2013-02-20 22:28     ` strcasestr.c John Spencer
@ 2013-02-20 23:56       ` Szabolcs Nagy
  2013-02-21  1:03       ` strcasestr.c Rich Felker
  1 sibling, 0 replies; 12+ messages in thread
From: Szabolcs Nagy @ 2013-02-20 23:56 UTC (permalink / raw)
  To: musl

* John Spencer <maillist-musl@barfooze.de> [2013-02-20 23:28:31 +0100]:
> On 02/17/2013 08:04 PM, Rich Felker wrote:
> >On Thu, Feb 14, 2013 at 10:23:49AM -0500, Rich Felker wrote:
> >>On Thu, Feb 14, 2013 at 09:59:56AM -0500, Todd C. Miller wrote:
> >>>When investigating using the musl strstr.c ofr OpenBSD I noticed
> >>>that musl only has a stub for strcasestr() that calls strstr().
> 
> according to git log, this function dates back to the very first
> musl commit ever...
> 
> >Since strcasestr is nonstandard and not clearly specified,
> 
> it's so non-standard that even nobody uses it.
> i looked up the usage of the function in codesearch.debian.net, and
> the only *user* (from all ~20K debian packages) of the function is
> gnu wget.

it seems wget only use it for

  str = strcasestr(str, "charset=");

when parsing text mime headers

i'll just note here that glibc has sse4.2 optimization for
strcasestr.. in case someone runs into performance troubles
finding the charset in mime headers..

(they tolower by 16bytes and find match with vectorized operations)


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: strcasestr.c
  2013-02-20 22:28     ` strcasestr.c John Spencer
  2013-02-20 23:56       ` strcasestr.c Szabolcs Nagy
@ 2013-02-21  1:03       ` Rich Felker
  2013-02-21  1:30         ` strcasestr.c Kurt H Maier
                           ` (2 more replies)
  1 sibling, 3 replies; 12+ messages in thread
From: Rich Felker @ 2013-02-21  1:03 UTC (permalink / raw)
  To: musl

On Wed, Feb 20, 2013 at 11:28:31PM +0100, John Spencer wrote:
> On 02/17/2013 08:04 PM, Rich Felker wrote:
> >On Thu, Feb 14, 2013 at 10:23:49AM -0500, Rich Felker wrote:
> >>On Thu, Feb 14, 2013 at 09:59:56AM -0500, Todd C. Miller wrote:
> >>>When investigating using the musl strstr.c ofr OpenBSD I noticed
> >>>that musl only has a stub for strcasestr() that calls strstr().
> 
> according to git log, this function dates back to the very first
> musl commit ever...

Yes, it seems to have been an early mistake I made getting wget to
work. Unfortunately, I think it would be bad policy to remove it now
since that would break existing dynamic binaries using it, but one
could make an argument that breaking them is "right" since they were
already broken (not behaving as intended)...

> >Since strcasestr is nonstandard and not clearly specified,
> 
> it's so non-standard that even nobody uses it.
> i looked up the usage of the function in codesearch.debian.net, and
> the only *user* (from all ~20K debian packages) of the function is
> gnu wget.

Are you sure this search was correct? IIRC there were more...

> every other occurence of the function is from gnulib embedded into
> other packages.
> so the current status is: gnulib includes the function, but nobody
> besides gnu wget uses it.
> 
> >Comments from anybody else?
> 
> given the above findings, we can just leave the function as-is (plus
> a comment that nobody uses it anyway) or remove it entirely.

I think leaving it as-is is the worst case. It's impossible to detect
without runtime checks that it's incorrect, so it might encourage
configure scripts to add runtime checks for broken strcasestr that
break cross compiling. Or it might lead to programs just assuming it's
correct, then breaking.

> in any case it doesnt make sense to put much work and especially
> much code into it.
> if it's gonna be implemented "correctly" at all, it should be as
> slim as possible, in the order of 3-5 LOC.

As much as I appreciate Todd's interest in contributing a 2way-based
version, I tend to agree. Not only would adopting the 2way code be a
fairly large code addition for a never-used feature, but it would also
bind us to the choice to do ASCII-only case mapping or drop
performance drastically in the future if we want to change that
decision.

My leaning right now would be to write the naive strstr loop using
strcasecmp instead of strcmp (or an inline loop) for the inner loop.
This will cause strcasestr to have the exact same case-folding
semantics as strcasecmp, whatever those are in the future. This is
best for consistency. Unfortunately, it's very bad from a performance
standpoint, but I don't know of any code using this function for
high-performance use.

The other somewhat reasonable option would be removing the function,
which would expose breakage in programs that were already using the
broken version in musl. I'm mildly against this, but I'd be interested
in hearing arguments either way.

Rich


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: strcasestr.c
  2013-02-21  1:03       ` strcasestr.c Rich Felker
@ 2013-02-21  1:30         ` Kurt H Maier
  2013-02-21  1:34           ` strcasestr.c Rich Felker
  2013-02-21  6:18         ` strcasestr.c Isaac Dunham
  2013-02-22  5:20         ` strcasestr.c Rich Felker
  2 siblings, 1 reply; 12+ messages in thread
From: Kurt H Maier @ 2013-02-21  1:30 UTC (permalink / raw)
  To: musl

On Wed, Feb 20, 2013 at 08:03:28PM -0500, Rich Felker wrote:
>
> The other somewhat reasonable option would be removing the function,
> which would expose breakage in programs that were already using the
> broken version in musl. I'm mildly against this, but I'd be interested
> in hearing arguments either way.

Please remove it.  It's not world-ending to ask people to relink;
programs known to require the broken functionality can run against an
older version of musl until they're repaired.  If you start enshrining
mistakes now you're just driving down glibc avenue.  I'd rather have
musl be a correct libc than a quirky libc.

khm


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: strcasestr.c
  2013-02-21  1:30         ` strcasestr.c Kurt H Maier
@ 2013-02-21  1:34           ` Rich Felker
  0 siblings, 0 replies; 12+ messages in thread
From: Rich Felker @ 2013-02-21  1:34 UTC (permalink / raw)
  To: musl

On Wed, Feb 20, 2013 at 08:30:54PM -0500, Kurt H Maier wrote:
> On Wed, Feb 20, 2013 at 08:03:28PM -0500, Rich Felker wrote:
> >
> > The other somewhat reasonable option would be removing the function,
> > which would expose breakage in programs that were already using the
> > broken version in musl. I'm mildly against this, but I'd be interested
> > in hearing arguments either way.
> 
> Please remove it.  It's not world-ending to ask people to relink;
> programs known to require the broken functionality can run against an
> older version of musl until they're repaired.  If you start enshrining
> mistakes now you're just driving down glibc avenue.  I'd rather have
> musl be a correct libc than a quirky libc.

Well the other issue is using glibc-linked binaries/libs. If any of
them reference strcasestr (which they might in all those gnulib
packages mentioned earlier in this thread once gnulib detects that
glibc has its own strcasestr...), then we'd potentially want to
provide strcasestr to be able to use them..

I don't think "enshrining mistakes" is a good policy in itself, but if
they can be fixed (unbreaking the function) in ways that have other
practical benefits, that might be a better alternative.

With that said, opinion #1 noted. :)

Rich


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: strcasestr.c
  2013-02-21  1:03       ` strcasestr.c Rich Felker
  2013-02-21  1:30         ` strcasestr.c Kurt H Maier
@ 2013-02-21  6:18         ` Isaac Dunham
  2013-02-21 20:00           ` strcasestr.c John Spencer
  2013-02-22  5:20         ` strcasestr.c Rich Felker
  2 siblings, 1 reply; 12+ messages in thread
From: Isaac Dunham @ 2013-02-21  6:18 UTC (permalink / raw)
  To: musl

On Wed, 20 Feb 2013 20:03:28 -0500
Rich Felker <dalias@aerifal.cx> wrote:

> 
> Yes, it seems to have been an early mistake I made getting wget to
> work. Unfortunately, I think it would be bad policy to remove it now
> since that would break existing dynamic binaries using it, but one
> could make an argument that breaking them is "right" since they were
> already broken (not behaving as intended)...
> 
> > >Since strcasestr is nonstandard and not clearly specified,
> > 
> > it's so non-standard that even nobody uses it.
> > i looked up the usage of the function in codesearch.debian.net, and
> > the only *user* (from all ~20K debian packages) of the function is
> > gnu wget.
> 
> Are you sure this search was correct? IIRC there were more...

A quick check here indicates that busybox, mutt, git, midnight commander, sylpheed, foomatic-rip, elinks, and a couple libraries use it.
Busycox uses it in grep and for checking passwords (see libbb/obscure.c).
 
> I think leaving it as-is is the worst case. It's impossible to detect
> without runtime checks that it's incorrect, so it might encourage
> configure scripts to add runtime checks for broken strcasestr that
> break cross compiling. Or it might lead to programs just assuming it's
> correct, then breaking.
> 
> > in any case it doesnt make sense to put much work and especially
> > much code into it.
> > if it's gonna be implemented "correctly" at all, it should be as
> > slim as possible, in the order of 3-5 LOC.
> 
> As much as I appreciate Todd's interest in contributing a 2way-based
> version, I tend to agree. Not only would adopting the 2way code be a
> fairly large code addition for a never-used feature, but it would also
> bind us to the choice to do ASCII-only case mapping or drop
> performance drastically in the future if we want to change that
> decision.
> 
> My leaning right now would be to write the naive strstr loop using
> strcasecmp instead of strcmp (or an inline loop) for the inner loop.
> This will cause strcasestr to have the exact same case-folding
> semantics as strcasecmp, whatever those are in the future. This is
> best for consistency. Unfortunately, it's very bad from a performance
> standpoint, but I don't know of any code using this function for
> high-performance use.

Busybox implements their own version using this approach.
 
> The other somewhat reasonable option would be removing the function,
> which would expose breakage in programs that were already using the
> broken version in musl. I'm mildly against this, but I'd be interested
> in hearing arguments either way.

Were the claimed frequency correct, I would want it gone. As it stands, I think that a small but slow version is justifiable. A large one isn't.
-- 
Isaac Dunham <idunham@lavabit.com>



^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: strcasestr.c
  2013-02-21  6:18         ` strcasestr.c Isaac Dunham
@ 2013-02-21 20:00           ` John Spencer
  2013-02-21 20:13             ` strcasestr.c Szabolcs Nagy
  0 siblings, 1 reply; 12+ messages in thread
From: John Spencer @ 2013-02-21 20:00 UTC (permalink / raw)
  To: musl

On 02/21/2013 07:18 AM, Isaac Dunham wrote:
> On Wed, 20 Feb 2013 20:03:28 -0500
> Rich Felker<dalias@aerifal.cx>  wrote:
>>>> Since strcasestr is nonstandard and not clearly specified,
>>> it's so non-standard that even nobody uses it.
>>> i looked up the usage of the function in codesearch.debian.net, and
>>> the only *user* (from all ~20K debian packages) of the function is
>>> gnu wget.
>> Are you sure this search was correct? IIRC there were more...

i just checked again, and according to debian codesearch this is indeed 
the only call.

> A quick check here indicates that busybox, mutt, git, midnight commander, sylpheed, foomatic-rip, elinks, and a couple libraries use it.
> Busycox uses it in grep and for checking passwords (see libbb/obscure.c).

interesting findings. i can confirm that busybox does indeed use 
strcasestr unconditionally, and git seems to use it as well.
so it was probably added to make busybox happy.

i can't really believe that git is not in debian's 18K base packages, so 
i can't really explain why this is missing from codesearch results.

>> The other somewhat reasonable option would be removing the function,
>> which would expose breakage in programs that were already using the
>> broken version in musl. I'm mildly against this, but I'd be interested
>> in hearing arguments either way.
> Were the claimed frequency correct, I would want it gone. As it stands, I think that a small but slow version is justifiable. A large one isn't.
*nod*


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: strcasestr.c
  2013-02-21 20:00           ` strcasestr.c John Spencer
@ 2013-02-21 20:13             ` Szabolcs Nagy
  0 siblings, 0 replies; 12+ messages in thread
From: Szabolcs Nagy @ 2013-02-21 20:13 UTC (permalink / raw)
  To: musl

* John Spencer <maillist-musl@barfooze.de> [2013-02-21 21:00:27 +0100]:
> i can't really believe that git is not in debian's 18K base
> packages, so i can't really explain why this is missing from
> codesearch results.
> 

it seems if you search 'strcasestr package:git' then it
finds git, but not for the general search..

it would be nice if we had a proper code search..


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: strcasestr.c
  2013-02-21  1:03       ` strcasestr.c Rich Felker
  2013-02-21  1:30         ` strcasestr.c Kurt H Maier
  2013-02-21  6:18         ` strcasestr.c Isaac Dunham
@ 2013-02-22  5:20         ` Rich Felker
  2 siblings, 0 replies; 12+ messages in thread
From: Rich Felker @ 2013-02-22  5:20 UTC (permalink / raw)
  To: musl

On Wed, Feb 20, 2013 at 08:03:28PM -0500, Rich Felker wrote:
> My leaning right now would be to write the naive strstr loop using
> strcasecmp instead of strcmp (or an inline loop) for the inner loop.
> This will cause strcasestr to have the exact same case-folding
> semantics as strcasecmp, whatever those are in the future. This is
> best for consistency. Unfortunately, it's very bad from a performance
> standpoint, but I don't know of any code using this function for
> high-performance use.

I just committed an utterly trivial, working strcasestr based on this
principle. At least it's better than what we had before. I hope nobody
objects too much. It's 79 bytes of code (97 bytes for PIC) on x86 and
that's with -O3.

Rich


^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2013-02-22  5:20 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-02-14 14:59 strcasestr.c Todd C. Miller
2013-02-14 15:23 ` strcasestr.c Rich Felker
2013-02-17 19:04   ` strcasestr.c Rich Felker
2013-02-20 22:28     ` strcasestr.c John Spencer
2013-02-20 23:56       ` strcasestr.c Szabolcs Nagy
2013-02-21  1:03       ` strcasestr.c Rich Felker
2013-02-21  1:30         ` strcasestr.c Kurt H Maier
2013-02-21  1:34           ` strcasestr.c Rich Felker
2013-02-21  6:18         ` strcasestr.c Isaac Dunham
2013-02-21 20:00           ` strcasestr.c John Spencer
2013-02-21 20:13             ` strcasestr.c Szabolcs Nagy
2013-02-22  5:20         ` strcasestr.c Rich Felker

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).