This character cannot be named normally, but can be matched with catch-alls like . and [^] This brings us to feature parity with NetBSD: $ ./a.out '^a[^w]c$' # matching "a\0c" 0 1, 4; -1, -1 $ ./a.out '^a.c$' 0 1, 4; -1, -1 $ ./a.out '.c$' 0 2, 4; -1, -1 $ ./a.out '.*' 0 1, 4; -1, -1 $ sed -i 's/cdef/adef/' a.c $ ./a.out '^\(a\).\1$' # matching "a\0a" 0 1, 4; 1, 2 --- Please keep me in CC, as I'm not subscribed. I haven't encountered an issue with this, and TRE_CHAR_MAX seems to be "domain of characters from GET_NEXT_WCHAR()", not "real characters in the current locale's encoding", so expanding the domain with a special character for NUL seems fine. src/regex/regexec.c | 2 +- src/regex/tre.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/regex/regexec.c b/src/regex/regexec.c index 2a2bded5..f09fdae1 100644 --- a/src/regex/regexec.c +++ b/src/regex/regexec.c @@ -60,7 +60,7 @@ tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, if(!max_len) { next_c = '\0'; pos_add_next = 1; } \ else if ((pos_add_next = mbtowc(&next_c, str_byte, max_len)) <= 0) { \ if (pos_add_next < 0) { ret = REG_NOMATCH; goto error_exit; } \ - else { pos_add_next++; if (startend) next_c = -1; }; \ + else { pos_add_next++; if (startend) next_c = TRE_CHAR_MAX; }; \ } \ str_byte += pos_add_next; \ } while (0) diff --git a/src/regex/tre.h b/src/regex/tre.h index 9aae851f..e913899a 100644 --- a/src/regex/tre.h +++ b/src/regex/tre.h @@ -50,7 +50,7 @@ typedef wchar_t tre_char_t; /* Wide characters. */ typedef wint_t tre_cint_t; -#define TRE_CHAR_MAX 0x10ffff +#define TRE_CHAR_MAX (0x10ffff + 1) #define tre_isalnum iswalnum #define tre_isalpha iswalpha -- 2.30.2