From mboxrd@z Thu Jan 1 00:00:00 1970 From: erik quanstrom Date: Mon, 30 Dec 2013 11:12:10 -0500 To: 9fans@9fans.net Message-ID: In-Reply-To: <462816165.fiyU6ZYvxS@coil> References: <462816165.fiyU6ZYvxS@coil> MIME-Version: 1.0 Content-Type: multipart/mixed; boundary="upas-onbwcmdscmusqahvklvnvauqhl" Subject: Re: [9fans] split(1): -e vs. -n, -f Topicbox-Message-UUID: a9b42410-ead8-11e9-9d60-3106f5b1d025 This is a multi-part message in MIME format. --upas-onbwcmdscmusqahvklvnvauqhl Content-Disposition: inline Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: 7bit On Mon Dec 30 05:12:16 EST 2013, dexen.devries@gmail.com wrote: > hi list, > > > both behavior and code indicate that split(1)'s `-e' (split by regular > expression) doesn't play along with either `-n' (line count) or `-f' (output > file prefix). the former is somewhat understandable, but the later is strange > in lieu of `-s' (output file suffix) working just fine. > > that by accident or is there some rationale? i think the answer is a little bit of both. it's easy to make split support mixing any number of regular expressions with one line count. (i believe using -f with -e works already, unless you want a prefix for even re-matched files. proposed version attached - erik --- ; whatis x xx x=(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20) fn xx {for(i in $x)echo $i} ; xx | $home/v.split -e '^7$' -n 10 ; for(i in *)echo $i && mc $i xaa 1 2 3 4 5 6 xab 7 8 9 10 xac 11 12 13 14 15 16 17 18 19 20 ; xx|$home^/v.split -f X -e '^7$' -n 10 ; lc Xaa Xab Xac ; xx|$home^/v.split -e '^(1)$' -e '^(7)$' -n 10 ; for(i in *)echo $i && mc $i 1 1 2 3 4 5 6 7 7 8 9 10 xaa 11 12 13 14 15 16 17 18 19 20 --upas-onbwcmdscmusqahvklvnvauqhl Content-Disposition: attachment; filename=split.c Content-Type: text/plain; charset="US-ASCII" Content-Transfer-Encoding: 7bit #include #include #include #include char digit[] = "0123456789"; char *suffix = ""; char *stem = "x"; char suff[] = "aa"; char name[200]; Biobuf bout; Biobuf *output; int iflag; int xflag; void openf(void); int nextf(void); int matchfile(Resub*); int matching(Reprog**, int, char*); char* xlower(char*); void usage(void); void main(int argc, char *argv[]) { char *pat[25], *line, buf[256]; int i, n, npat, lineno; Biobuf bin, *b; Reprog *re[25]; n = 0; b = &bin; npat = 0; ARGBEGIN { case 'l': case 'n': n=atoi(EARGF(usage())); break; case 'e': if(npat == nelem(pat)) sysfatal("split: too many pats"); pat[npat++] = EARGF(usage()); break; case 'f': stem = EARGF(usage()); break; case 's': suffix = EARGF(usage()); break; case 'x': xflag++; break; case 'i': iflag++; break; default: usage(); break; } ARGEND; if(argc > 1) usage(); else if(argc == 0) Binit(b, 0, OREAD); else{ b = Bopen(argv[0], OREAD); if(b == nil) sysfatal("split: Bopen %s: %r", argv[0]); } /* default */ if(n == 0 && npat == 0) n = 1000; /* prepare regular reressions */ for(i = 0; i < npat; i++){ re[i] = regcomp(xlower(pat[i])); if(re[i] == nil) sysfatal("split: bad regular reression: %s", pat[i]); } lineno = 0; while((line = Brdline(b,'\n')) != nil) { line[Blinelen(b)-1] = 0; lineno++; if(matching(re, npat, line)){ if(xflag) continue; }else if(n > 0 && lineno > n){ nextf(); lineno = 1; }else if(output == nil) nextf(); Bwrite(output, line, Blinelen(b)-1); Bputc(output, '\n'); } while((n = Bread(b, buf, sizeof(buf))) > 0) Bwrite(output, buf, n); Bterm(b); exits(""); } enum { Base = 26, Last = Base*(Base-1) + (Base-1), }; int nextf(void) { static int once, seq; if(seq > Last){ if(!once) fprint(2, "split: file %szz not split\n", stem); once = 1; return 0; } snprint(name, sizeof name, "%s%c%c", stem, 'a'+seq/26, 'a'+seq%26); seq++; openf(); return 1; } void openf(void) { static int fd = -1; if(fd >= 0){ Bterm(output); close(fd); } fd = create(name, OWRITE,0666); if(fd < 0) sysfatal("split: can't create %s: %r", name); output = &bout; Binit(output, fd, OWRITE); } int matching(Reprog **re, int nre, char *line) { char *p; int i, len; Resub m[2]; p = xlower(line); for(i = 0; i < nre; i++){ memset(m, 0, sizeof m); if(regexec(re[i], p, m, nelem(m))){ if(m[1].sp == nil) return nextf(); len = m[1].ep - m[1].sp; snprint(name, sizeof name, "%*s%s", len, m[1].sp, suffix); openf(); return 1; } } return 0; } char* xlower(char *s) { char *p; Rune r; static char buf[1024*UTFmax]; if(!iflag) return s; p = buf; for(;;){ if((uchar)*s < 0x80){ *p++ = tolower(*s); if(*s++ == 0) break; } else{ s += chartorune(&r, s); r = tolowerrune(r); p += runetochar(p, &r); } } return buf; } void usage(void) { fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n"); exits("usage"); } void badexp(void) { fprint(2, "split: bad regular expression\n"); exits("bad regular expression"); } --upas-onbwcmdscmusqahvklvnvauqhl--