mailing list of musl libc
 help / color / mirror / code / Atom feed
From: наб <nabijaczleweli@nabijaczleweli.xyz>
To: musl@lists.openwall.com
Subject: [musl] REG_STARTEND tests
Date: Fri, 21 Apr 2023 17:48:28 +0200	[thread overview]
Message-ID: <r2m7uapdl4n6p6bfksz7uyu3oj4p36ckfg2rdbkwqvywlurxwu@6os2hkymj5bp> (raw)
In-Reply-To: <73caac41e70db544c53b1aa947627206d3eb625b.1682024413.git.nabijaczleweli@nabijaczleweli.xyz>


[-- Attachment #1.1: Type: text/plain, Size: 783 bytes --]

I didn't formalise the tests last night, but see the attached file,
which passes cleanly on NetBSD and the illumos gate.

On musl with 1/2 I get
$ ~/store/code/musl/prefix/bin/musl-clang tst-reg-startend.c -o \
    /tmp/tst-reg-startend  -DSTANDALONE  -static &&
	/tmp/tst-reg-startend 2>&1 | cat -A
tst-reg-startend.c: ^a.c$: ac: no match
tst-reg-startend.c: ^a.*c$: ac: no match
tst-reg-startend.c: ^a[^c]c$: ac: no match
tst-reg-startend.c: ^a..: ac: no match
tst-reg-startend.c: ..c: ac: no match
tst-reg-startend.c: [^z]c: ac: no match
tst-reg-startend.c: [^z]c: ac: wanted {2, 4}, got {1, 4}

And with 2/2 it passes cleanly.


glibc gives me a host of errors, but I'll post fixes and include this
test there later.

Best,
наб

Please keep me in CC.

[-- Attachment #1.2: Type: text/x-csrc, Size: 4029 bytes --]

/* Permission to use, copy, modify, and/or distribute this software for any
   purpose with or without fee is hereby granted.

   THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
   WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
   MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
   ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.  */

#include <assert.h>
#include <locale.h>
#include <string.h>
#include <regex.h>
#include <stdio.h>
#include <stdbool.h>


#define M(s, e) (regmatch_t) {.rm_so = s, .rm_eo = e}
#define MEQ(l, r) ((l).rm_so == (r).rm_so && (l).rm_eo == (r).rm_eo)

static const regmatch_t bound = M(1, 4);

static const char *const regex_ac[] =
  {"^a", "c$", "^a.c$", "^a.*c$", "^a[^c]c$", "^a..", "..c", "[^z]c", NULL};
static const char *const regex_aa[] =
  {"^a", "a$", "^\\(a\\).\\1$", "^a[^a]*", NULL};
static const char *const data_ac[] = {"_a\0cdef", "_abcdef"};
static const char *const data_aa[] = {"_a\0adef", "_abadef"};
static const regmatch_t results_ac[] =
  {M(1, 2), M(3, 4), M(1, 4), M(1, 4), M(1, 4), M(1, 4), M(1, 4), M(2, 4)};
static const regmatch_t results_aa[] =
  {M(1, 2), M(3, 4), M(1, 4), M(1, 3)};
static_assert(sizeof(regex_ac) / sizeof(*regex_ac) - 1 ==
              sizeof(results_ac) / sizeof(*results_ac), "");
static_assert(sizeof(regex_aa) / sizeof(*regex_aa) - 1 ==
              sizeof(results_aa) / sizeof(*results_aa), "");


static bool
testbunch (const char *const *regexes, const char *const data[static 2],
           const regmatch_t *results)
{
#define BASEERR(data)                              \
  err = true,                                      \
    fprintf (stderr, __FILE__ ": %s: ", *regexes), \
    fwrite (data[i] + bound.rm_so, 1, bound.rm_eo - bound.rm_so, stderr)

  bool err = false;
  for (; *regexes; ++regexes, ++results)
    {
      regex_t rgx;
      assert (!regcomp (&rgx, *regexes, 0));

      for (size_t i = 0; i < 2; ++i)
        {
          regmatch_t match = bound;
          if (regexec (&rgx, data[i], 1, &match, REG_STARTEND))
            BASEERR(data), fputs (": no match\n", stderr);

          if (!MEQ(match, *results))
            BASEERR(data), fprintf (stderr, ": wanted {%d, %d}, got {%d, %d}\n",
                                    (int)results->rm_so, (int)results->rm_eo,
                                    (int)match.rm_so, (int)match.rm_eo);
        }

      regfree(&rgx);
    }

  return err;
}


static const char *const ać_data[2] = {"_aaćdef", "_aćdef"};
static const bool ać_exp[] = {false, true};

static bool
testać()
{
  bool err = false;
  regex_t rgx;
  const char *const regexes[] = {"ać"};
  assert (!regcomp (&rgx, *regexes, 0));

  for (size_t i = 0; i < 2; ++i)
    {
      regmatch_t match = bound;
      if (regexec (&rgx, ać_data[i], 1, &match, REG_STARTEND) == ać_exp[i])
        BASEERR(ać_data), fprintf (stderr, ": %s match\n",
                                   ać_exp[i] ? "no" : "yes");

      if (!MEQ(match, bound))
        BASEERR(ać_data), fprintf (stderr, ": wanted {%d, %d}, got {%d, %d}\n",
                                   (int)bound.rm_so, (int)bound.rm_eo,
                                   (int)match.rm_so, (int)match.rm_eo);
    }

  regfree(&rgx);
  return err;
}


static int
do_test (int argc, char **argv)
{
  (void) argc, (void) argv;
  assert (setlocale (LC_ALL, "C.UTF-8"));

  return testbunch (regex_ac, data_ac, results_ac) ||
         testbunch (regex_aa, data_aa, results_aa) ||
         testać ();
}


#if !STANDALONE
#include "../test-skeleton.c"
#else
int
main(int argc, char **argv)
{
  return do_test(argc, argv);
}
#endif

[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 833 bytes --]

  parent reply	other threads:[~2023-04-21 15:50 UTC|newest]

Thread overview: 7+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-20 21:01 [musl] [PATCH 1/2] regex: add BSD-style REG_STARTEND наб
2023-04-20 21:04 ` [musl] [PATCH 2/2] regex: increase TRE_CHAR_MAX and use it for NUL with REG_STARTEND наб
2023-04-21 15:48 ` наб [this message]
2023-04-28 11:39 ` [musl] [PATCH v2 1/2] regex: add BSD-style REG_STARTEND наб
2023-05-14 15:17   ` [musl] [PATCH v3 " наб
2023-05-14 15:17   ` [musl] [PATCH v3 2/2] regex: increase TRE_CHAR_MAX and use it for NUL with REG_STARTEND наб
2023-04-28 11:40 ` [musl] [PATCH v2 " наб

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=r2m7uapdl4n6p6bfksz7uyu3oj4p36ckfg2rdbkwqvywlurxwu@6os2hkymj5bp \
    --to=nabijaczleweli@nabijaczleweli.xyz \
    --cc=musl@lists.openwall.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/musl/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).