zsh-workers
 help / color / mirror / code / Atom feed
860233e54b7963e77ff6b394ddce8d5e7c4488c3 blob 10533 bytes (raw)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
 
%prep

  if ! zmodload -F zsh/re2 C:re2-match 2>/dev/null
  then
    ZTST_unimplemented="the zsh/re2 module is not available"
    return 0
  fi
# Load the rest of the builtins
  zmodload zsh/re2
  # TODO: use future mechanism to switch =~ to use re2 and test =~ too
# Find a UTF-8 locale.
  setopt multibyte
# Don't let LC_* override our choice of locale.
  unset -m LC_\*
  mb_ok=
  langs=(en_{US,GB}.{UTF-,utf}8 en.UTF-8
	 $(locale -a 2>/dev/null | egrep 'utf8|UTF-8'))
  for LANG in $langs; do
    if [[ é = ? ]]; then
      mb_ok=1
      break;
    fi
  done
  if [[ -z $mb_ok ]]; then
    ZTST_unimplemented="no UTF-8 locale or multibyte mode is not implemented"
  else
    print -u $ZTST_fd Testing RE2 multibyte with locale $LANG
    mkdir multibyte.tmp && cd multibyte.tmp
  fi

%test

  [[ 'foo→bar' -re2-match .([^[:ascii:]]). ]]
  print $MATCH
  print $match[1]
0:Basic non-ASCII regexp matching
>o→b
>→

  MATCH=''
  [[ ÷x -re2-match '^(\p{Sm})(\p{Latin})$' ]]
  print "$? <$MATCH> .${match[1]}|${match[2]}."
0:Unicode character class names & extracting correct widths
>0 <÷x> .÷|x.

  [[ alphabeta -re2-match a([^a]+)a ]]
  echo "$? basic"
  print $MATCH
  print $match[1]
  [[ ! alphabeta -re2-match a(.+)a ]]
  echo "$? negated op"
  [[ alphabeta -re2-match ^b ]]
  echo "$? failed match"
# default matches on first, then takes longest substring
# -longest keeps looking
  [[ abb -re2-match a(b|bb) ]]
  echo "$? first .${MATCH}.${match[1]}."
  [[ abb -re2-match-longest a(b|bb) ]]
  echo "$? longest .${MATCH}.${match[1]}."
  [[ alphabeta -re2-match ab ]]; echo "$? unanchored"
  [[ alphabeta -re2-match ^ab ]]; echo "$? anchored"
  [[ alphabeta -re2-match '^a(\w+)a$' ]]
  echo "$? perl class used"
  echo ".${MATCH}. .${match[1]}."
  [[ alphabeta -re2-match-posix '^a(\w+)a$' ]]
  echo "$? POSIX-mode, should inhibit Perl class"
  [[ alphabeta -re2-match-posixperl '^a(\w+)a$' ]]
  echo "$? POSIX-mode with Perl classes enabled .${match[1]}."
  unset MATCH match
  [[ alphabeta -re2-match ^a([^a]+)a([^a]+)a$ ]]
  echo "$? matched, set vars"
  echo ".$MATCH. ${#MATCH}"
  echo ".${(j:|:)match[*]}."
  unset MATCH match
  [[ alphabeta -re2-match fr(.+)d ]]
  echo "$? unmatched, not setting MATCH/match"
  echo ".$MATCH. ${#MATCH}"
  echo ".${(j:|:)match[*]}."
0:Basic matching & result codes
>0 basic
>alpha
>lph
>1 negated op
>1 failed match
>0 first .ab.b.
>0 longest .abb.bb.
>0 unanchored
>1 anchored
>0 perl class used
>.alphabeta. .lphabet.
*?\(eval\):*: re2 rexexp compilation failed: invalid escape sequence: \w
>1 POSIX-mode, should inhibit Perl class
>0 POSIX-mode with Perl classes enabled .lphabet.
>0 matched, set vars
>.alphabeta. 9
>.lph|bet.
>1 unmatched, not setting MATCH/match
>.. 0
>..

  m() {
    unset MATCH MBEGIN MEND match mbegin mend
    [[ $2 -re2-match $3 ]]
    print $? $1: m:${MATCH}: ma:${(j:|:)match}: MBEGIN=$MBEGIN MEND=$MEND mbegin="(${mbegin[*]})" mend="(${mend[*]})"
  }
  data='alpha beta gamma delta'
  m uncapturing $data '\b\w+\b'
  m capturing $data '\b(\w+)\b'
  m 'capture 2' $data '\b(\w+)\s+(\w+)\b'
  m 'capture repeat' $data '\b(?:(\w+)\s+)+(\w+)\b'
0:Beginning and end testing
>0 uncapturing: m:alpha: ma:: MBEGIN=1 MEND=5 mbegin=() mend=()
>0 capturing: m:alpha: ma:alpha: MBEGIN=1 MEND=5 mbegin=(1) mend=(5)
>0 capture 2: m:alpha beta: ma:alpha|beta: MBEGIN=1 MEND=10 mbegin=(1 7) mend=(5 10)
>0 capture repeat: m:alpha beta gamma delta: ma:gamma|delta: MBEGIN=1 MEND=22 mbegin=(12 18) mend=(16 22)


  unset match mend
  s=$'\u00a0'
  [[ $s -re2-match '^.$' ]] && print OK
  [[ A${s}B -re2-match .(.). && $match[1] == $s ]] && print OK
  [[ A${s}${s}B -re2-match A([^[:ascii:]]*)B && $mend[1] == 3 ]] && print OK
  unset s
0:Raw IMETA characters in input string
>OK
>OK
>OK

  [[ foo -re2-match f.+ ]] ; print $?
  [[ foo -re2-match x.+ ]] ; print $?
  [[ ! foo -re2-match f.+ ]] ; print $?
  [[ ! foo -re2-match x.+ ]] ; print $?
  [[ foo -re2-match f.+ && bar -re2-match b.+ ]] ; print $?
  [[ foo -re2-match x.+ && bar -re2-match b.+ ]] ; print $?
  [[ foo -re2-match f.+ && bar -re2-match x.+ ]] ; print $?
  [[ ! foo -re2-match f.+ && bar -re2-match b.+ ]] ; print $?
  [[ foo -re2-match f.+ && ! bar -re2-match b.+ ]] ; print $?
  [[ ! ( foo -re2-match f.+ && bar -re2-match b.+ ) ]] ; print $?
  [[ ! foo -re2-match x.+ && bar -re2-match b.+ ]] ; print $?
  [[ foo -re2-match x.+ && ! bar -re2-match b.+ ]] ; print $?
  [[ ! ( foo -re2-match x.+ && bar -re2-match b.+ ) ]] ; print $?
0:Regex result inversion detection
>0
>1
>1
>0
>0
>1
>1
>1
>1
>1
>0
>1
>0

# Subshell because crash on failure
  ( [[ test.txt -re2-match '^(.*_)?(test)' ]]
    echo $match[2] )
0:regression for segmentation fault (pcre, dup for re2), workers/38307
>test

  setopt BASH_REMATCH KSH_ARRAYS
  unset MATCH MBEGIN MEND match mbegin mend BASH_REMATCH
  [[ alphabeta -re2-match '^a([^a]+)(a)([^a]+)a$' ]]
  print "$? bash_rematch"
  print "m:${MATCH}: ma:${(j:|:)match}:"
  print MBEGIN=$MBEGIN MEND=$MEND mbegin="(${mbegin[*]})" mend="(${mend[*]})"
  print "BASH_REMATCH=[${(j:, :)BASH_REMATCH[@]}]"
  print "[0]=${BASH_REMATCH[0]} [1]=${BASH_REMATCH[1]}"
0:bash_rematch works
>0 bash_rematch
>m:: ma::
>MBEGIN= MEND= mbegin=() mend=()
>BASH_REMATCH=[alphabeta, lph, a, bet]
>[0]=alphabeta [1]=lph

  unsetopt BASH_REMATCH KSH_ARRAYS
  m() {
    local label="$1" text="$2" rc out
    shift 2
    unset MATCH match
    # can't capture stderr sanely for amalgamation, need compile to happen in parent
    re2_compile "$@"
    rc=$?
    if (( rc )); then print "${rc}-NoCompile $label"; return 1; fi
    print -n "$rc:"
    re2_match "$text"
    print $? $label: m:${MATCH}: ma:${(j:|:)match}:
  }
  #
  m cmd-clean                 alphabeta  lph
  m cmd-anchored-nomatch      alphabeta  -a lph.+
  m cmd-anchored-match        alphabeta  -a alp.+
  m case-mismatch             alphabeta  'A\w+'
  m case-insensitive-pattern  alphabeta  -i 'A\w+'
  m case-insensitive-text     Alphabeta  -i 'a\w+'
  m case-sensitive-text       Alphabeta  'a\w+'
  m non-posix-okay-normal     ÷1   '^(\p{Sm})\d$'
  m non-posix-reject-normal   ÷x   '^(\p{Sm})\d$'
  print -u2 'stderr start non-posix-posixmode'
  m non-posix-posixmode       ÷1   -P '^(\p{Sm})\d$'
  print -u2 'stderr end non-posix-posixmode'
  m literal-match             x1   -L  x1
  m literal-nomatch           x1   -L  .1
  m literal-match-substr      abcd -L  bc
  m literal-nomatch-anchored  abcd -aL bc
  m not-longest               abb      'a(b|bb)'
  m longest                   abb  -l  'a(b|bb)'
0:re2 compile/match testing with anonymous var
>0:0 cmd-clean: m:lph: ma::
>0:1 cmd-anchored-nomatch: m:: ma::
>0:0 cmd-anchored-match: m:alphabeta: ma::
>0:1 case-mismatch: m:: ma::
>0:0 case-insensitive-pattern: m:alphabeta: ma::
>0:0 case-insensitive-text: m:Alphabeta: ma::
>0:0 case-sensitive-text: m:abeta: ma::
>0:0 non-posix-okay-normal: m:÷1: ma:÷:
>0:1 non-posix-reject-normal: m:: ma::
>1-NoCompile non-posix-posixmode
?stderr start non-posix-posixmode
*?m:re2_compile:*: re2 rexexp compilation failed: invalid escape sequence: \p
?stderr end non-posix-posixmode
>0:0 literal-match: m:x1: ma::
>0:1 literal-nomatch: m:: ma::
>0:0 literal-match-substr: m:bc: ma::
>0:1 literal-nomatch-anchored: m:: ma::
>0:0 not-longest: m:ab: ma:b:
>0:0 longest: m:abb: ma:bb:

### We've dropped multi-line support for now, rather than debug RE2/cre2
### interactions and figure out how I (pdp) am mis-reading docs.  Should
### we add it, this is the test which exposed the presence of problems:
#  m multiline-reject-nom      $'ab\ncd'      '^cd'
#  set -x
#  m multiline-okay            $'ab\ncd'   -m '^cd'
#  set +x
#0:re2 multiline matching
#>0:1 multiline-reject-nom: m:: ma::
#>0:0 multiline-okay: m:cd: ma::

  m posix-simple       a1d  -Pa   '([[:alpha:]])([[:digit:]])([[:alpha:]])'
  #
  print -u2 'stderr start posix-reject-perlclass'
  m posix-reject-perlclass      a1d  -Pa   '(\w)(\d)(\w)'
  print -u2 'stderr end posix-reject-perlclass'
  m posix-perlclass-enabled     a1d  -Pac  '(\w)(\d)(\w)'
  m boundaries-normal           'def  efg'   '\be(.)'
  print -u2 'stderr start posix-reject-boundaries'
  m posix-reject-boundaries     'def  efg'   -P   '\be(.)'
  print -u2 'stderr end posix-reject-boundaries'
  m posix-boundaries-enabled    'def  efg'   -Pw  '\be(.)'
  m posix-perlclass-boundaries  'de1g e2h' -Pcw '\be(\d)(\w)'
  m posix-pcb-mattered          'de1g e2h' -Pcw   'e(\d)(\w)'
0:re2 POSIX mode with various features added back
>0:0 posix-simple: m:a1d: ma:a|1|d:
?stderr start posix-reject-perlclass
*?m:re2_compile:*: re2 rexexp compilation failed: invalid escape sequence: \\w
?stderr end posix-reject-perlclass
>1-NoCompile posix-reject-perlclass
>0:0 posix-perlclass-enabled: m:a1d: ma:a|1|d:
>0:0 boundaries-normal: m:ef: ma:f:
?stderr start posix-reject-boundaries
*?m:re2_compile:*: re2 rexexp compilation failed: invalid escape sequence: \\b
?stderr end posix-reject-boundaries
>1-NoCompile posix-reject-boundaries
>0:0 posix-boundaries-enabled: m:ef: ma:f:
>0:0 posix-perlclass-boundaries: m:e2h: ma:2|h:
>0:0 posix-pcb-mattered: m:e1g: ma:1|g:

  re2_compile -i '^([aeiou])(\w{2})'
  mintov() {
    local label="$1"; shift
    unset MATCH match T1 t1
    re2_match "$@"
    print "$? $label MATCH=<$MATCH> match=<${(j:|:)match}> T1=<$T1> t1=<${(j:|:)t1}>"
  }
  mintov  not-first     not_first
  mintov  simple        orange
  mintov  redir-arr     -a t1 orange
  mintov  redir-var     -v T1 orange
  mintov  redir-both    -v T1 -a t1 orange
  mintov  normal-after  orange
0:re2_match capturing to named vars
>1 not-first MATCH=<> match=<> T1=<> t1=<>
>0 simple MATCH=<ora> match=<o|ra> T1=<> t1=<>
>0 redir-arr MATCH=<ora> match=<> T1=<> t1=<o|ra>
>0 redir-var MATCH=<> match=<o|ra> T1=<ora> t1=<>
>0 redir-both MATCH=<> match=<> T1=<ora> t1=<o|ra>
>0 normal-after MATCH=<ora> match=<o|ra> T1=<> t1=<>


  re2_compile '^([aeiou])(\w{2})'
  re2_match orange  && echo "yes-1"
  re2_match -P '^t.{3}' orange || echo "no-2"
  re2_match -P '^t.{3}' tangerine && echo "yes-3"
  re2_match tangerine || echo "no-4"
  re2_match orange && echo "yes-5 ${match[2]}"
0:re2_match -P pattern works & doesn't mess with anonymous
>yes-1
>no-2
>yes-3
>no-4
>yes-5 ra


  re2_compile '^(\p{Sm})(?!\d+)(?:.)$'
1:re2 check no crash on unsupported syntax
?(eval):re2_compile:1: re2 rexexp compilation failed: invalid perl operator: (?!

  re2_compile '(fred'
1:re2 complain parens not closed
?(eval):re2_compile:1: re2 rexexp compilation failed: missing ): (fred


%clean
  unfunction -m 'm*'
debug log:

solving 860233e ...
found 860233e in https://inbox.vuxu.org/zsh-workers/20160909011242.GC12371@breadbox.private.spodhuis.org/

applying [1/1] https://inbox.vuxu.org/zsh-workers/20160909011242.GC12371@breadbox.private.spodhuis.org/
diff --git a/Test/V11re2.ztst b/Test/V11re2.ztst
new file mode 100644
index 0000000..860233e

Checking patch Test/V11re2.ztst...
Applied patch Test/V11re2.ztst cleanly.

index at:
100644 860233e54b7963e77ff6b394ddce8d5e7c4488c3	Test/V11re2.ztst

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).