zsh-workers
 help / color / mirror / code / Atom feed
f6a5283d3ce65e6c45d84ef457f4bc7821b12109 blob 9131 bytes (raw)

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
 
/*
 * re2.c
 *
 * This file is part of zsh, the Z shell.
 *
 * Copyright (c) 2016 Phil Pennock
 * All Rights Reserved.
 *
 * Permission is hereby granted, without written agreement and without
 * license or royalty fees, to use, copy, modify, and distribute this
 * software and to distribute modified versions of this software for any
 * purpose, provided that the above copyright notice and the following
 * two paragraphs appear in all copies of this software.
 *
 * In no event shall Phil Pennock or the Zsh Development Group be liable
 * to any party for direct, indirect, special, incidental, or consequential
 * damages arising out of the use of this software and its documentation,
 * even if Phil Pennock and the Zsh Development Group have been advised of
 * the possibility of such damage.
 *
 * Phil Pennock and the Zsh Development Group specifically disclaim any
 * warranties, including, but not limited to, the implied warranties of
 * merchantability and fitness for a particular purpose.  The software
 * provided hereunder is on an "as is" basis, and Phil Pennock and the
 * Zsh Development Group have no obligation to provide maintenance,
 * support, updates, enhancements, or modifications.
 *
 */

/* This is heavily based upon my earlier regex module, with Peter's fixes
 * for the tougher stuff I had skipped / gotten wrong. */

#include "re2.mdh"
#include "re2.pro"

/*
 * re2 itself is a C++ library; zsh needs C language bindings.
 * These come from <https://github.com/marcomaggi/cre2>.
 */
#include <cre2.h>

/* the conditions we support */
#define ZRE2_COND_RE2		0
#define ZRE2_COND_POSIX		1
#define ZRE2_COND_POSIXPERL	2
#define ZRE2_COND_LONGEST	3

/**/
static int
zcond_re2_match(char **a, int id)
{
    cre2_regexp_t *rex;
    cre2_options_t *opt;
    cre2_string_t *m, *matches = NULL;
    char *lhstr, *lhstr_zshmeta, *rhre, *rhre_zshmeta;
    char **result_array, **x;
    char *s;
    char **mbegin, **mend, **bptr, **eptr;
    size_t matchessz = 0;
    int return_value, ncaptures, matched, nelem, start, n, indexing_base;
    int remaining_len, charlen;
    zlong offs;

    return_value = 0; /* 1 => matched successfully */

    lhstr_zshmeta = cond_str(a,0,0);
    rhre_zshmeta = cond_str(a,1,0);
    lhstr = ztrdup(lhstr_zshmeta);
    unmetafy(lhstr, NULL);
    rhre = ztrdup(rhre_zshmeta);
    unmetafy(rhre, NULL);

    opt = cre2_opt_new();
    if (!opt) {
	zwarn("re2 opt memory allocation failure");
	goto CLEANUP_UNMETAONLY;
    }
    /* nb: we can set encoding here; re2 assumes UTF-8 by default */
    cre2_opt_set_log_errors(opt, 0); /* don't hit stderr by default */
    if (!isset(CASEMATCH)) {
	cre2_opt_set_case_sensitive(opt, 0);
    }

    /* "The following options are only consulted when POSIX syntax is enabled;
     * when POSIX syntax is disabled: these features are always enabled and
     * cannot be turned off."
     * Seems hard to mis-parse, but I did.  Okay, Perl classes \d,\w and friends
     * always on normally, can _also_ be enabled in POSIX mode. */

    switch (id) {
    case ZRE2_COND_RE2:
	/* nothing to do, this is default */
	break;
    case ZRE2_COND_POSIX:
	cre2_opt_set_posix_syntax(opt, 1);
	break;
    case ZRE2_COND_POSIXPERL:
	cre2_opt_set_posix_syntax(opt, 1);
	/* we enable Perl classes (\d, \s, \w, \D, \S, \W)
	 * and boundaries/not (\b \B) */
	cre2_opt_set_perl_classes(opt, 1);
	cre2_opt_set_word_boundary(opt, 1);
	break;
    case ZRE2_COND_LONGEST:
	cre2_opt_set_longest_match(opt, 1);
	break;
    default:
	DPUTS(1, "bad re2 option");
	goto CLEANUP_OPT;
    }

    rex = cre2_new(rhre, strlen(rhre), opt);
    if (!rex) {
	zwarn("re2 regular expression memory allocation failure");
	goto CLEANUP_OPT;
    }
    if (cre2_error_code(rex)) {
	zwarn("re2 rexexp compilation failed: %s", cre2_error_string(rex));
	goto CLEANUP;
    }

    ncaptures = cre2_num_capturing_groups(rex);
    /* the nmatch for cre2_match follows the usual pattern of index 0 holding
     * the entire matched substring, index 1 holding the first capturing
     * sub-expression, etc.  So we need ncaptures+1 elements. */
    matchessz = (ncaptures + 1) * sizeof(cre2_string_t);
    matches = zalloc(matchessz);

    matched = cre2_match(rex,
			 lhstr, strlen(lhstr), /* text to match against */
			 0, strlen(lhstr), /* substring of text to consider */
			 CRE2_UNANCHORED, /* user should explicitly anchor */
			 matches, (ncaptures+1));
    if (!matched)
	goto CLEANUP;
    return_value = 1;

    /* We have a match, we will return success, we have array of cre2_string_t
     * items, each with .data and .length fields pointing into the matched text,
     * all in unmetafied format.
     *
     * We need to collect the results, put together various arrays and offset
     * variables, while respecting options to change the array set, the indexing
     * of that array and everything else that 26 years of history has endowed
     * upon us. */
    /* option BASHREMATCH set:
     *    set $BASH_REMATCH instead of $MATCH/$match
     *    entire matched portion in index 0 (useful with option KSH_ARRAYS)
     * option _not_ set:
     *    $MATCH scalar gets entire string
     *    $match array gets substrings
     *    $MBEGIN $MEND scalars get offsets of entire match
     *    $mbegin $mend arrays get offsets of substrings
     *    all of the offsets depend upon KSHARRAYS to determine indexing!
     */

    if (isset(BASHREMATCH)) {
	start = 0;
	nelem = ncaptures + 1;
    } else {
	start = 1;
	nelem = ncaptures;
    }
    result_array = NULL;
    if (nelem) {
	result_array = x = (char **) zalloc(sizeof(char *) * (nelem + 1));
	for (m = matches + start, n = start; n <= ncaptures; ++n, ++m, ++x) {
	    /* .data is (const char *), metafy can modify in-place so takes
	     * (char *) but doesn't modify given META_DUP, so safe to drop
	     * the const. */
	    *x = metafy((char *)m->data, m->length, META_DUP);
	}
	*x = NULL;
    }

    if (isset(BASHREMATCH)) {
	setaparam("BASH_REMATCH", result_array);
	goto CLEANUP;
    }

    indexing_base = isset(KSHARRAYS) ? 0 : 1;

    setsparam("MATCH", metafy((char *)matches[0].data, matches[0].length, META_DUP));
    /* count characters before the match */
    s = lhstr;
    remaining_len = matches[0].data - lhstr;
    offs = 0;
    MB_CHARINIT();
    while (remaining_len) {
	offs++;
	charlen = MB_CHARLEN(s, remaining_len);
	s += charlen;
	remaining_len -= charlen;
    }
    setiparam("MBEGIN", offs + indexing_base);
    /* then the characters within the match */
    remaining_len = matches[0].length;
    while (remaining_len) {
	offs++;
	charlen = MB_CHARLEN(s, remaining_len);
	s += charlen;
	remaining_len -= charlen;
    }
    /* zsh ${foo[a,b]} is inclusive of end-points, [a,b] not [a,b) */
    setiparam("MEND", offs + indexing_base - 1);
    if (!nelem) {
	goto CLEANUP;
    }

    bptr = mbegin = (char **)zalloc(sizeof(char *)*(nelem+1));
    eptr = mend = (char **)zalloc(sizeof(char *)*(nelem+1));
    for (m = matches + start, n = 0;
	 n < nelem;
	 ++n, ++m, ++bptr, ++eptr)
    {
	char buf[DIGBUFSIZE];
	if (m->data == NULL) {
	    /* FIXME: have assumed this is the API for non-matching substrings; confirm! */
	    *bptr = ztrdup("-1");
	    *eptr = ztrdup("-1");
	    continue;
	}
	s = lhstr;
	remaining_len = m->data - lhstr;
	offs = 0;
	/* Find the start offset */
	MB_CHARINIT();
	while (remaining_len) {
	    offs++;
	    charlen = MB_CHARLEN(s, remaining_len);
	    s += charlen;
	    remaining_len -= charlen;
	}
	convbase(buf, offs + indexing_base, 10);
	*bptr = ztrdup(buf);
	/* Continue to the end offset */
	remaining_len = m->length;
	while (remaining_len) {
	    offs++;
	    charlen = MB_CHARLEN(s, remaining_len);
	    s += charlen;
	    remaining_len -= charlen;
	}
	convbase(buf, offs + indexing_base - 1, 10);
	*eptr = ztrdup(buf);
    }
    *bptr = *eptr = NULL;

    setaparam("match", result_array);
    setaparam("mbegin", mbegin);
    setaparam("mend", mend);

CLEANUP:
    if (matches)
	zfree(matches, matchessz);
    cre2_delete(rex);
CLEANUP_OPT:
    cre2_opt_delete(opt);
CLEANUP_UNMETAONLY:
    free(lhstr);
    free(rhre);
    return return_value;
}


static struct conddef cotab[] = {
    CONDDEF("re2-match", CONDF_INFIX, zcond_re2_match, 0, 0, ZRE2_COND_RE2),
    CONDDEF("re2-match-posix", CONDF_INFIX, zcond_re2_match, 0, 0, ZRE2_COND_POSIX),
    CONDDEF("re2-match-posixperl", CONDF_INFIX, zcond_re2_match, 0, 0, ZRE2_COND_POSIXPERL),
    CONDDEF("re2-match-longest", CONDF_INFIX, zcond_re2_match, 0, 0, ZRE2_COND_LONGEST),
};


static struct features module_features = {
    NULL, 0,
    cotab, sizeof(cotab)/sizeof(*cotab),
    NULL, 0,
    NULL, 0,
    0
};


/**/
int
setup_(UNUSED(Module m))
{
    return 0;
}

/**/
int
features_(Module m, char ***features)
{
    *features = featuresarray(m, &module_features);
    return 0;
}

/**/
int
enables_(Module m, int **enables)
{
    return handlefeatures(m, &module_features, enables);
}

/**/
int
boot_(UNUSED(Module m))
{
    return 0;
}

/**/
int
cleanup_(Module m)
{
    return setfeatureenables(m, &module_features, NULL);
}

/**/
int
finish_(UNUSED(Module m))
{
    return 0;
}
debug log:

solving f6a5283 ...
found f6a5283 in https://inbox.vuxu.org/zsh-workers/20160908135559.GA10762@breadbox.private.spodhuis.org/
found e542723 in https://inbox.vuxu.org/zsh-workers/20160908041556.GA8401@breadbox.private.spodhuis.org/

applying [1/2] https://inbox.vuxu.org/zsh-workers/20160908041556.GA8401@breadbox.private.spodhuis.org/
diff --git a/Src/Modules/re2.c b/Src/Modules/re2.c
new file mode 100644
index 0000000..e542723


applying [2/2] https://inbox.vuxu.org/zsh-workers/20160908135559.GA10762@breadbox.private.spodhuis.org/
diff --git a/Src/Modules/re2.c b/Src/Modules/re2.c
index e542723..f6a5283 100644

Checking patch Src/Modules/re2.c...
Applied patch Src/Modules/re2.c cleanly.
Checking patch Src/Modules/re2.c...
Applied patch Src/Modules/re2.c cleanly.

index at:
100644 f6a5283d3ce65e6c45d84ef457f4bc7821b12109	Src/Modules/re2.c

Code repositories for project(s) associated with this public inbox

	https://git.vuxu.org/mirror/zsh/

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).