From: Phil Pennock <zsh-workers+phil.pennock@spodhuis.org>
To: zsh-workers@zsh.org
Subject: [PATCH] PCRE/NUL: pass NUL in for text, handle NUL out
Date: Thu, 15 Jun 2017 16:40:50 -0400 [thread overview]
Message-ID: <20170615204050.GA27003@breadbox.private.spodhuis.org> (raw)
[-- Attachment #1: Type: text/plain, Size: 4975 bytes --]
The regexp itself is always NUL-terminated, so detect embedded NULs and
zwarn on their presence.
---
Src/Modules/pcre.c | 37 ++++++++++++++++++++++++++-----------
Test/V07pcre.ztst | 5 +++++
2 files changed, 31 insertions(+), 11 deletions(-)
diff --git a/Src/Modules/pcre.c b/Src/Modules/pcre.c
index 5fd67963d..27191d709 100644
--- a/Src/Modules/pcre.c
+++ b/Src/Modules/pcre.c
@@ -75,7 +75,7 @@ zpcre_utf8_enabled(void)
static int
bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func))
{
- int pcre_opts = 0, pcre_errptr;
+ int pcre_opts = 0, pcre_errptr, target_len;
const char *pcre_error;
char *target;
@@ -89,15 +89,19 @@ bin_pcre_compile(char *nam, char **args, Options ops, UNUSED(int func))
pcre_opts |= PCRE_UTF8;
pcre_hints = NULL; /* Is this necessary? */
-
+
if (pcre_pattern)
pcre_free(pcre_pattern);
target = ztrdup(*args);
- unmetafy(target, NULL);
+ unmetafy(target, &target_len);
+
+ if ((int)strlen(target) != target_len) {
+ zwarnnam(nam, "embedded NULs in PCRE pattern terminate pattern");
+ }
pcre_pattern = pcre_compile(target, pcre_opts, &pcre_error, &pcre_errptr, NULL);
-
+
free(target);
if (pcre_pattern == NULL)
@@ -167,7 +171,12 @@ zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar,
sprintf(offset_all, "%d %d", ovec[0], ovec[1]);
setsparam("ZPCRE_OP", ztrdup(offset_all));
}
- match_all = metafy(captures[0], -1, META_DUP);
+ /*
+ * Result strings can contain embedded NULs; the length of each is the
+ * difference between the two values in each paired entry in ovec.
+ * ovec is length 2*(1+capture_list_length)
+ */
+ match_all = metafy(captures[0], ovec[1] - ovec[0], META_DUP);
setsparam(matchvar, match_all);
/*
* If we're setting match, mbegin, mend we only do
@@ -176,13 +185,16 @@ zpcre_get_substrings(char *arg, int *ovec, int ret, char *matchvar,
*/
if (!want_begin_end || nelem) {
char **x, **y;
+ int vec_off;
y = &captures[capture_start];
matches = x = (char **) zalloc(sizeof(char *) * (arrlen(y) + 1));
+ vec_off = 2;
do {
if (*y)
- *x++ = metafy(*y, -1, META_DUP);
+ *x++ = metafy(*y, ovec[vec_off+1]-ovec[vec_off], META_DUP);
else
*x++ = NULL;
+ vec_off += 2;
} while (*y++);
setaparam(substravar, matches);
}
@@ -318,8 +330,7 @@ bin_pcre_match(char *nam, char **args, Options ops, UNUSED(int func))
ovec = zalloc(ovecsize*sizeof(int));
plaintext = ztrdup(*args);
- unmetafy(plaintext, NULL);
- subject_len = (int)strlen(plaintext);
+ unmetafy(plaintext, &subject_len);
if (offset_start > 0 && offset_start >= subject_len)
ret = PCRE_ERROR_NOMATCH;
@@ -351,6 +362,7 @@ cond_pcre_match(char **a, int id)
const char *pcre_err;
char *lhstr, *rhre, *lhstr_plain, *rhre_plain, *avar=NULL;
int r = 0, pcre_opts = 0, pcre_errptr, capcnt, *ov, ovsize;
+ int lhstr_plain_len, rhre_plain_len;
int return_value = 0;
if (zpcre_utf8_enabled())
@@ -362,8 +374,8 @@ cond_pcre_match(char **a, int id)
rhre = cond_str(a,1,0);
lhstr_plain = ztrdup(lhstr);
rhre_plain = ztrdup(rhre);
- unmetafy(lhstr_plain, NULL);
- unmetafy(rhre_plain, NULL);
+ unmetafy(lhstr_plain, &lhstr_plain_len);
+ unmetafy(rhre_plain, &rhre_plain_len);
pcre_pat = NULL;
ov = NULL;
ovsize = 0;
@@ -373,6 +385,9 @@ cond_pcre_match(char **a, int id)
switch(id) {
case CPCRE_PLAIN:
+ if ((int)strlen(rhre_plain) != rhre_plain_len) {
+ zwarn("embedded NULs in PCRE pattern terminate pattern");
+ }
pcre_pat = pcre_compile(rhre_plain, pcre_opts, &pcre_err, &pcre_errptr, NULL);
if (pcre_pat == NULL) {
zwarn("failed to compile regexp /%s/: %s", rhre, pcre_err);
@@ -381,7 +396,7 @@ cond_pcre_match(char **a, int id)
pcre_fullinfo(pcre_pat, NULL, PCRE_INFO_CAPTURECOUNT, &capcnt);
ovsize = (capcnt+1)*3;
ov = zalloc(ovsize*sizeof(int));
- r = pcre_exec(pcre_pat, NULL, lhstr_plain, strlen(lhstr_plain), 0, 0, ov, ovsize);
+ r = pcre_exec(pcre_pat, NULL, lhstr_plain, lhstr_plain_len, 0, 0, ov, ovsize);
/* r < 0 => error; r==0 match but not enough size in ov
* r > 0 => (r-1) substrings found; r==1 => no substrings
*/
diff --git a/Test/V07pcre.ztst b/Test/V07pcre.ztst
index ad1770712..03cb95791 100644
--- a/Test/V07pcre.ztst
+++ b/Test/V07pcre.ztst
@@ -131,6 +131,11 @@
>78884; ZPCRE_OP: 25 30
>90210; ZPCRE_OP: 31 36
+# Embedded NULs allowed in plaintext, but not in RE (although \0 as two-chars allowed)
+ [[ $'a\0bc\0d' =~ '^(a\0.)(.+)$' ]]
+ print "${#MATCH}; ${#match[1]}; ${#match[2]}"
+>6; 3; 3
+
# Subshell because crash on failure
( setopt re_match_pcre
[[ test.txt =~ '^(.*_)?(test)' ]]
--
2.13.1
[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 996 bytes --]
next reply other threads:[~2017-06-15 20:41 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
[not found] <CGME20170615204203epcas5p4b0a2bcbcfe0843d979c653df6b8352db@epcas5p4.samsung.com>
2017-06-15 20:40 ` Phil Pennock [this message]
2017-06-16 6:41 ` Stephane Chazelas
2017-06-17 3:10 ` Bart Schaefer
2017-06-17 6:31 ` Stephane Chazelas
2017-06-20 20:07 ` Phil Pennock
2017-06-21 8:59 ` Peter Stephenson
2017-06-21 22:48 ` Phil Pennock
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20170615204050.GA27003@breadbox.private.spodhuis.org \
--to=zsh-workers+phil.pennock@spodhuis.org \
--cc=zsh-workers@zsh.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
Code repositories for project(s) associated with this public inbox
https://git.vuxu.org/mirror/zsh/
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).