* [9fans] split(1): -e vs. -n, -f
@ 2013-12-30 10:10 dexen deVries
2013-12-30 13:50 ` [9fans] split(1): -e vs. -n, -f [patch] dexen deVries
2013-12-30 16:12 ` [9fans] split(1): -e vs. -n, -f erik quanstrom
0 siblings, 2 replies; 3+ messages in thread
From: dexen deVries @ 2013-12-30 10:10 UTC (permalink / raw)
To: Fans of the OS Plan 9 from Bell Labs
hi list,
both behavior and code indicate that split(1)'s `-e' (split by regular
expression) doesn't play along with either `-n' (line count) or `-f' (output
file prefix). the former is somewhat understandable, but the later is strange
in lieu of `-s' (output file suffix) working just fine.
that by accident or is there some rationale?
--
dexen deVries
[[[↓][→]]]
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [9fans] split(1): -e vs. -n, -f [patch]
2013-12-30 10:10 [9fans] split(1): -e vs. -n, -f dexen deVries
@ 2013-12-30 13:50 ` dexen deVries
2013-12-30 16:12 ` [9fans] split(1): -e vs. -n, -f erik quanstrom
1 sibling, 0 replies; 3+ messages in thread
From: dexen deVries @ 2013-12-30 13:50 UTC (permalink / raw)
To: Fans of the OS Plan 9 from Bell Labs, plan9port-dev
[-- Attachment #1: Type: text/plain, Size: 441 bytes --]
On Monday 30 of December 2013 11:10:45 you wrote:
> both behavior and code indicate that split(1)'s `-e' (split by regular
> expression) doesn't play along with either `-n' (line count) or `-f' (output
> file prefix). the former is somewhat understandable, but the later is
> strange in lieu of `-s' (output file suffix) working just fine.
>
> that by accident or is there some rationale?
--
dexen deVries
[[[↓][→]]]
[-- Attachment #2: 0001-make-stat-1-s-e-play-along-with-f-output-file-prefix.patch --]
[-- Type: text/x-patch, Size: 1146 bytes --]
>From 01ae77413e4249776124727e797b0172e7874987 Mon Sep 17 00:00:00 2001
From: dexen deVries <dexen.devries@gmail.com>
Date: Mon, 30 Dec 2013 15:47:24 +0100
Subject: [PATCH] make stat(1)'s `-e' play along with `-f' (output file
prefix)
also make the file pathname buffer a bit larger.
---
src/cmd/split.c | 10 ++++++----
1 file changed, 6 insertions(+), 4 deletions(-)
diff --git a/src/cmd/split.c b/src/cmd/split.c
index e758786..4820930 100644
--- a/src/cmd/split.c
+++ b/src/cmd/split.c
@@ -8,7 +8,7 @@ char digit[] = "0123456789";
char *suffix = "";
char *stem = "x";
char suff[] = "aa";
-char name[200];
+char name[2048];
Biobuf bout;
Biobuf *output = &bout;
@@ -130,9 +130,11 @@ int
matchfile(Resub *match)
{
if(match[1].s.sp) {
- int len = match[1].e.ep - match[1].s.sp;
- strncpy(name, match[1].s.sp, len);
- strcpy(name+len, suffix);
+ int len_match = match[1].e.ep - match[1].s.sp;
+ int len_stem = strlen(stem);
+ strcpy(name, stem);
+ strncpy(name+len_stem, match[1].s.sp, len_match);
+ strcpy(name+len_stem+len_match, suffix);
openf();
return 1;
}
--
1.7.12.1
^ permalink raw reply [flat|nested] 3+ messages in thread
* Re: [9fans] split(1): -e vs. -n, -f
2013-12-30 10:10 [9fans] split(1): -e vs. -n, -f dexen deVries
2013-12-30 13:50 ` [9fans] split(1): -e vs. -n, -f [patch] dexen deVries
@ 2013-12-30 16:12 ` erik quanstrom
1 sibling, 0 replies; 3+ messages in thread
From: erik quanstrom @ 2013-12-30 16:12 UTC (permalink / raw)
To: 9fans
[-- Attachment #1: Type: text/plain, Size: 1161 bytes --]
On Mon Dec 30 05:12:16 EST 2013, dexen.devries@gmail.com wrote:
> hi list,
>
>
> both behavior and code indicate that split(1)'s `-e' (split by regular
> expression) doesn't play along with either `-n' (line count) or `-f' (output
> file prefix). the former is somewhat understandable, but the later is strange
> in lieu of `-s' (output file suffix) working just fine.
>
> that by accident or is there some rationale?
i think the answer is a little bit of both. it's easy to make split support
mixing any number of regular expressions with one line count. (i believe
using -f with -e works already, unless you want a prefix for even re-matched
files.
proposed version attached
- erik
---
; whatis x xx
x=(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20)
fn xx {for(i in $x)echo $i}
; xx | $home/v.split -e '^7$' -n 10
; for(i in *)echo $i && mc $i
xaa
1 2 3 4 5 6
xab
7 8 9 10
xac
11 12 13 14 15 16 17 18 19 20
; xx|$home^/v.split -f X -e '^7$' -n 10
; lc
Xaa Xab Xac
; xx|$home^/v.split -e '^(1)$' -e '^(7)$' -n 10
; for(i in *)echo $i && mc $i
1
1 2 3 4 5 6
7
7 8 9 10
xaa
11 12 13 14 15 16 17 18 19 20
[-- Attachment #2: split.c --]
[-- Type: text/plain, Size: 3355 bytes --]
#include <u.h>
#include <libc.h>
#include <bio.h>
#include <regexp.h>
char digit[] = "0123456789";
char *suffix = "";
char *stem = "x";
char suff[] = "aa";
char name[200];
Biobuf bout;
Biobuf *output;
int iflag;
int xflag;
void openf(void);
int nextf(void);
int matchfile(Resub*);
int matching(Reprog**, int, char*);
char* xlower(char*);
void usage(void);
void
main(int argc, char *argv[])
{
char *pat[25], *line, buf[256];
int i, n, npat, lineno;
Biobuf bin, *b;
Reprog *re[25];
n = 0;
b = &bin;
npat = 0;
ARGBEGIN {
case 'l':
case 'n':
n=atoi(EARGF(usage()));
break;
case 'e':
if(npat == nelem(pat))
sysfatal("split: too many pats");
pat[npat++] = EARGF(usage());
break;
case 'f':
stem = EARGF(usage());
break;
case 's':
suffix = EARGF(usage());
break;
case 'x':
xflag++;
break;
case 'i':
iflag++;
break;
default:
usage();
break;
} ARGEND;
if(argc > 1)
usage();
else if(argc == 0)
Binit(b, 0, OREAD);
else{
b = Bopen(argv[0], OREAD);
if(b == nil)
sysfatal("split: Bopen %s: %r", argv[0]);
}
/* default */
if(n == 0 && npat == 0)
n = 1000;
/* prepare regular reressions */
for(i = 0; i < npat; i++){
re[i] = regcomp(xlower(pat[i]));
if(re[i] == nil)
sysfatal("split: bad regular reression: %s", pat[i]);
}
lineno = 0;
while((line = Brdline(b,'\n')) != nil) {
line[Blinelen(b)-1] = 0;
lineno++;
if(matching(re, npat, line)){
if(xflag)
continue;
}else if(n > 0 && lineno > n){
nextf();
lineno = 1;
}else if(output == nil)
nextf();
Bwrite(output, line, Blinelen(b)-1);
Bputc(output, '\n');
}
while((n = Bread(b, buf, sizeof(buf))) > 0)
Bwrite(output, buf, n);
Bterm(b);
exits("");
}
enum {
Base = 26,
Last = Base*(Base-1) + (Base-1),
};
int
nextf(void)
{
static int once, seq;
if(seq > Last){
if(!once)
fprint(2, "split: file %szz not split\n", stem);
once = 1;
return 0;
}
snprint(name, sizeof name, "%s%c%c", stem, 'a'+seq/26, 'a'+seq%26);
seq++;
openf();
return 1;
}
void
openf(void)
{
static int fd = -1;
if(fd >= 0){
Bterm(output);
close(fd);
}
fd = create(name, OWRITE,0666);
if(fd < 0)
sysfatal("split: can't create %s: %r", name);
output = &bout;
Binit(output, fd, OWRITE);
}
int
matching(Reprog **re, int nre, char *line)
{
char *p;
int i, len;
Resub m[2];
p = xlower(line);
for(i = 0; i < nre; i++){
memset(m, 0, sizeof m);
if(regexec(re[i], p, m, nelem(m))){
if(m[1].sp == nil)
return nextf();
len = m[1].ep - m[1].sp;
snprint(name, sizeof name, "%*s%s", len, m[1].sp, suffix);
openf();
return 1;
}
}
return 0;
}
char*
xlower(char *s)
{
char *p;
Rune r;
static char buf[1024*UTFmax];
if(!iflag)
return s;
p = buf;
for(;;){
if((uchar)*s < 0x80){
*p++ = tolower(*s);
if(*s++ == 0)
break;
}
else{
s += chartorune(&r, s);
r = tolowerrune(r);
p += runetochar(p, &r);
}
}
return buf;
}
void
usage(void)
{
fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n");
exits("usage");
}
void
badexp(void)
{
fprint(2, "split: bad regular expression\n");
exits("bad regular expression");
}
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2013-12-30 16:12 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-12-30 10:10 [9fans] split(1): -e vs. -n, -f dexen deVries
2013-12-30 13:50 ` [9fans] split(1): -e vs. -n, -f [patch] dexen deVries
2013-12-30 16:12 ` [9fans] split(1): -e vs. -n, -f erik quanstrom
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).