%prep if ! zmodload zsh/pcre 2>/dev/null then ZTST_unimplemented="the zsh/pcre module was disabled by configure (see config.modules)" return 0 fi setopt rematch_pcre LANG=$(ZTST_find_UTF8) if [[ -z $LANG ]]; then ZTST_unimplemented="no UTF-8 locale or multibyte mode is not implemented" else print -u $ZTST_fd Testing PCRE multibyte with locale $LANG mkdir multibyte.tmp && cd multibyte.tmp fi %test [[ 'foo→bar' =~ .([^[:ascii:]]). ]] print $MATCH print $match[1] 0:Basic non-ASCII regexp matching >o→b >→ unset match mend s=$'\u00a0' [[ $s =~ '^.$' ]] && print OK [[ A${s}B =~ .(.). && $match[1] == $s ]] && print OK [[ A${s}${s}B =~ A([^[:ascii:]]*)B && $mend[1] == 3 ]] && print OK unset s 0:Raw IMETA characters in input string >OK >OK >OK [[ foo =~ f.+ ]] ; print $? [[ foo =~ x.+ ]] ; print $? [[ ! foo =~ f.+ ]] ; print $? [[ ! foo =~ x.+ ]] ; print $? [[ foo =~ f.+ && bar =~ b.+ ]] ; print $? [[ foo =~ x.+ && bar =~ b.+ ]] ; print $? [[ foo =~ f.+ && bar =~ x.+ ]] ; print $? [[ ! foo =~ f.+ && bar =~ b.+ ]] ; print $? [[ foo =~ f.+ && ! bar =~ b.+ ]] ; print $? [[ ! ( foo =~ f.+ && bar =~ b.+ ) ]] ; print $? [[ ! foo =~ x.+ && bar =~ b.+ ]] ; print $? [[ foo =~ x.+ && ! bar =~ b.+ ]] ; print $? [[ ! ( foo =~ x.+ && bar =~ b.+ ) ]] ; print $? 0:Regex result inversion detection >0 >1 >1 >0 >0 >1 >1 >1 >1 >1 >0 >1 >0 # Note that PCRE_ANCHORED only means anchored at the start # Also note that we don't unset MATCH/match on failed match (and it's an # open issue as to whether or not we should) pcre_compile '.(→.)' pcre_match foo→bar print $? $MATCH $match ; unset MATCH match pcre_match foo.bar print $? $MATCH $match ; unset MATCH match pcre_match foo†bar print $? $MATCH $match ; unset MATCH match pcre_match foo→†ar print $? $MATCH $match ; unset MATCH match pcre_study pcre_match foo→bar print $? $MATCH $match ; unset MATCH match pcre_compile -a '.(→.)' pcre_match foo→bar print $? $MATCH $match ; unset MATCH match pcre_match o→bar print $? $MATCH $match ; unset MATCH match pcre_match o→b print $? $MATCH $match ; unset MATCH match pcre_compile 'x.(→.)' pcre_match xo→t print $? $MATCH $match ; unset MATCH match pcre_match Xo→t print $? $MATCH $match ; unset MATCH match pcre_compile -i 'x.(→.)' pcre_match xo→t print $? $MATCH $match ; unset MATCH match pcre_match Xo→t print $? $MATCH $match ; unset MATCH match 0:pcre_compile interface testing: basic, anchored & case-insensitive >0 o→b →b >1 >1 >0 o→† →† >0 o→b →b >1 >0 o→b →b >0 o→b →b >0 xo→t →t >1 >0 xo→t →t >0 Xo→t →t string="The following zip codes: 78884 90210 99513" pcre_compile -m "\d{5}" pcre_match -b -- $string && print "$MATCH; ZPCRE_OP: $ZPCRE_OP" pcre_match -b -n $ZPCRE_OP[(w)2] -- $string || print failed print "$MATCH; ZPCRE_OP: $ZPCRE_OP" 0:pcre_match -b and pcre_match -n >78884; ZPCRE_OP: 25 30 >90210; ZPCRE_OP: 31 36 # Embedded NULs allowed in plaintext, in RE, pcre supports \0 as two-chars [[ $'a\0bc\0d' =~ '^(a\0.)(.+)$' ]] print "${#MATCH}; ${#match[1]}; ${#match[2]}" 0:ensure ASCII NUL passes in and out of matched plaintext >6; 3; 3 # PCRE2 supports NULs also in the RE [[ $'a\0b\0c' =~ $'^(.\0)+' ]] && print "${#MATCH}; ${#match[1]}" 0:ensure ASCII NUL works also in the regex >4; 2 # Ensure the long-form infix operator works [[ foo -pcre-match ^f..$ ]] print $? [[ foo -pcre-match ^g..$ ]] print $? [[ ! foo -pcre-match ^g..$ ]] print $? 0:infix -pcre-match works >0 >1 >0 # Bash mode; note zsh documents that variables not updated on match failure, # which remains different from bash setopt bash_rematch [[ "goo" -pcre-match ^f.+$ ]] ; print $? [[ "foo" -pcre-match ^f.+$ ]] ; print -l $? _${^BASH_REMATCH[@]} [[ "foot" -pcre-match ^f([aeiou]+)(.)$ ]]; print -l $? _${^BASH_REMATCH[@]} [[ "foo" -pcre-match ^f.+$ ]] ; print -l $? _${^BASH_REMATCH[@]} [[ ! "goo" -pcre-match ^f.+$ ]] ; print $? unsetopt bash_rematch 0:bash-compatibility works >1 >0 >_foo >0 >_foot >_oo >_t >0 >_foo >0 # Subshell because crash on failure ( setopt re_match_pcre [[ test.txt =~ '^(.*_)?(test)' ]] echo $match[2] ) 0:regression for segmentation fault, workers/38307 >test LANG_SAVE=$LANG [[ é =~ '^.\z' ]]; echo $? LANG=C [[ é =~ '^..\z' ]]; echo $? LANG=$LANG_SAVE [[ é =~ '^.\z' ]]; echo $? 0:switch between C/UTF-8 locales >0 >0 >0 [[ abc =~ 'a(d*)bc' ]] && print "$#MATCH; $#match; ${#match[1]}" 0:empty capture >3; 1; 0 [[ category/name-12345 =~ '(?x)^ (? [^/]* ) / (? (? \w+ ) - (? \d+ ))$' ]] typeset -p1 .pcre.match 0:named captures >typeset -g -A .pcre.match=( > [category]=category > [name]=name > [package]=name-12345 > [version]=12345 >) pcre_compile 'cat(er(pillar)?)?' pcre_match -d 'the caterpillar catchment' && print $match 0:pcre_match -d >caterpillar cater cat