9fans - fans of the OS Plan 9 from Bell Labs
 help / color / mirror / Atom feed
* [9fans] split(1): -e vs. -n, -f
@ 2013-12-30 10:10 dexen deVries
  2013-12-30 13:50 ` [9fans] split(1): -e vs. -n, -f [patch] dexen deVries
  2013-12-30 16:12 ` [9fans] split(1): -e vs. -n, -f erik quanstrom
  0 siblings, 2 replies; 3+ messages in thread
From: dexen deVries @ 2013-12-30 10:10 UTC (permalink / raw)
  To: Fans of the OS Plan 9 from Bell Labs

hi list,


both behavior and code indicate that split(1)'s  `-e' (split by regular 
expression) doesn't play along with either `-n' (line count) or `-f' (output 
file prefix). the former is somewhat understandable, but the later is strange 
in lieu of `-s' (output file suffix) working just fine.

that by accident or is there some rationale?


-- 
dexen deVries

[[[↓][→]]]




^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [9fans] split(1): -e vs. -n, -f [patch]
  2013-12-30 10:10 [9fans] split(1): -e vs. -n, -f dexen deVries
@ 2013-12-30 13:50 ` dexen deVries
  2013-12-30 16:12 ` [9fans] split(1): -e vs. -n, -f erik quanstrom
  1 sibling, 0 replies; 3+ messages in thread
From: dexen deVries @ 2013-12-30 13:50 UTC (permalink / raw)
  To: Fans of the OS Plan 9 from Bell Labs, plan9port-dev

[-- Attachment #1: Type: text/plain, Size: 441 bytes --]

On Monday 30 of December 2013 11:10:45 you wrote: 
> both behavior and code indicate that split(1)'s  `-e' (split by regular
> expression) doesn't play along with either `-n' (line count) or `-f' (output
> file prefix). the former is somewhat understandable, but the later is
> strange in lieu of `-s' (output file suffix) working just fine.
> 
> that by accident or is there some rationale?


-- 
dexen deVries

[[[↓][→]]]

[-- Attachment #2: 0001-make-stat-1-s-e-play-along-with-f-output-file-prefix.patch --]
[-- Type: text/x-patch, Size: 1146 bytes --]

>From 01ae77413e4249776124727e797b0172e7874987 Mon Sep 17 00:00:00 2001
From: dexen deVries <dexen.devries@gmail.com>
Date: Mon, 30 Dec 2013 15:47:24 +0100
Subject: [PATCH] make stat(1)'s `-e' play along with `-f' (output file
 prefix)

also make the file pathname buffer a bit larger.
---
 src/cmd/split.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/src/cmd/split.c b/src/cmd/split.c
index e758786..4820930 100644
--- a/src/cmd/split.c
+++ b/src/cmd/split.c
@@ -8,7 +8,7 @@ char	digit[] = "0123456789";
 char	*suffix = "";
 char	*stem = "x";
 char	suff[] = "aa";
-char	name[200];
+char	name[2048];
 Biobuf	bout;
 Biobuf	*output = &bout;

@@ -130,9 +130,11 @@ int
 matchfile(Resub *match)
 {
 	if(match[1].s.sp) {
-		int len = match[1].e.ep - match[1].s.sp;
-		strncpy(name, match[1].s.sp, len);
-		strcpy(name+len, suffix);
+		int len_match = match[1].e.ep - match[1].s.sp;
+		int len_stem = strlen(stem);
+		strcpy(name, stem);
+		strncpy(name+len_stem, match[1].s.sp, len_match);
+		strcpy(name+len_stem+len_match, suffix);
 		openf();
 		return 1;
 	}
--
1.7.12.1


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [9fans] split(1): -e vs. -n, -f
  2013-12-30 10:10 [9fans] split(1): -e vs. -n, -f dexen deVries
  2013-12-30 13:50 ` [9fans] split(1): -e vs. -n, -f [patch] dexen deVries
@ 2013-12-30 16:12 ` erik quanstrom
  1 sibling, 0 replies; 3+ messages in thread
From: erik quanstrom @ 2013-12-30 16:12 UTC (permalink / raw)
  To: 9fans

[-- Attachment #1: Type: text/plain, Size: 1161 bytes --]

On Mon Dec 30 05:12:16 EST 2013, dexen.devries@gmail.com wrote:
> hi list,
>
>
> both behavior and code indicate that split(1)'s  `-e' (split by regular
> expression) doesn't play along with either `-n' (line count) or `-f' (output
> file prefix). the former is somewhat understandable, but the later is strange
> in lieu of `-s' (output file suffix) working just fine.
>
> that by accident or is there some rationale?

i think the answer is a little bit of both.  it's easy to make split support
mixing any number of regular expressions with one line count.  (i believe
using -f with -e works already, unless you want a prefix for even re-matched
files.

proposed version attached

- erik

---
; whatis x xx
x=(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20)
fn xx {for(i in $x)echo $i}
; xx | $home/v.split -e '^7$' -n 10
; for(i in *)echo $i && mc $i
xaa
1	2	3	4	5	6
xab
7	8	9	10
xac
11	12	13	14	15	16	17	18	19	20

; xx|$home^/v.split -f X -e '^7$' -n 10
; lc
Xaa	Xab	Xac

; xx|$home^/v.split -e '^(1)$' -e '^(7)$'  -n 10
; for(i in *)echo $i && mc $i
1
1	2	3	4	5	6
7
7	8	9	10
xaa
11	12	13	14	15	16	17	18	19	20

[-- Attachment #2: split.c --]
[-- Type: text/plain, Size: 3355 bytes --]

#include <u.h>
#include <libc.h>
#include <bio.h>
#include <regexp.h>

char	digit[] = "0123456789";
char	*suffix = "";
char	*stem = "x";
char	suff[] = "aa";
char	name[200];
Biobuf	bout;
Biobuf	*output;
int	iflag;
int	xflag;

void openf(void);
int nextf(void);
int matchfile(Resub*);
int matching(Reprog**, int, char*);
char* xlower(char*);
void usage(void);

void
main(int argc, char *argv[])
{
	char *pat[25], *line, buf[256];
	int i, n, npat, lineno;
	Biobuf bin, *b;
	Reprog *re[25];

	n = 0;
	b = &bin;
	npat = 0;

	ARGBEGIN {
	case 'l':
	case 'n':
		n=atoi(EARGF(usage()));
		break;
	case 'e':
		if(npat == nelem(pat))
			sysfatal("split: too many pats");
		pat[npat++] = EARGF(usage());
		break;
	case 'f':
		stem = EARGF(usage());
		break;
	case 's':
		suffix = EARGF(usage());
		break;
	case 'x':
		xflag++;
		break;
	case 'i':
		iflag++;
		break;
	default:
		usage();
		break;

	} ARGEND;

	if(argc > 1)
		usage();
	else if(argc == 0)
		Binit(b, 0, OREAD);
	else{
		b = Bopen(argv[0], OREAD);
		if(b == nil)
			sysfatal("split: Bopen %s: %r", argv[0]);
	}

	/* default */
	if(n == 0 && npat == 0)
		n = 1000;

	/* prepare regular reressions */
	for(i = 0; i < npat; i++){
		re[i] = regcomp(xlower(pat[i]));
		if(re[i] == nil)
			sysfatal("split: bad regular reression: %s", pat[i]);
	}

	lineno = 0;
	while((line = Brdline(b,'\n')) != nil) {
		line[Blinelen(b)-1] = 0;
		lineno++;
		if(matching(re, npat, line)){
			if(xflag)
				continue;
		}else if(n > 0 && lineno > n){
			nextf();
			lineno = 1;
		}else if(output == nil)
			nextf();
		Bwrite(output, line, Blinelen(b)-1);
		Bputc(output, '\n');
	}

	while((n = Bread(b, buf, sizeof(buf))) > 0)
		Bwrite(output, buf, n);
	Bterm(b);
	exits("");
}

enum {
	Base	= 26,
	Last	= Base*(Base-1) + (Base-1),
};

int
nextf(void)
{
	static int once, seq;

	if(seq > Last){
		if(!once)
			fprint(2, "split: file %szz not split\n", stem);
		once = 1;
		return 0;
	}
	snprint(name, sizeof name, "%s%c%c", stem, 'a'+seq/26, 'a'+seq%26);
	seq++;
	openf();
	return 1;
}

void
openf(void)
{
	static int fd = -1;

	if(fd >= 0){
		Bterm(output);
		close(fd);
	}
	fd = create(name, OWRITE,0666);
	if(fd < 0)
		sysfatal("split: can't create %s: %r", name);
	output = &bout;
	Binit(output, fd, OWRITE);
}

int
matching(Reprog **re, int nre, char *line)
{
	char *p;
	int i, len;
	Resub m[2];

	p = xlower(line);
	for(i = 0; i < nre; i++){
		memset(m, 0, sizeof m);
		if(regexec(re[i], p, m, nelem(m))){
			if(m[1].sp == nil)
				return nextf();
			len = m[1].ep - m[1].sp;
			snprint(name, sizeof name, "%*s%s", len, m[1].sp, suffix);
			openf();
			return 1;
		}
	}
	return 0;
}

char*
xlower(char *s)
{
	char *p;
	Rune r;
	static char buf[1024*UTFmax];

	if(!iflag)
		return s;
	p = buf;
	for(;;){
		if((uchar)*s < 0x80){
			*p++ = tolower(*s);
			if(*s++ == 0)
				break;
		}
		else{
			s += chartorune(&r, s);
			r = tolowerrune(r);
			p += runetochar(p, &r);
		}
	}
	return buf;
}

void
usage(void)
{
	fprint(2, "usage: split [-n num] [-e exp] [-f stem] [-s suff] [-x] [-i] [file]\n");
	exits("usage");
}

void
badexp(void)
{
	fprint(2, "split: bad regular expression\n");
	exits("bad regular expression");
}

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2013-12-30 16:12 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-12-30 10:10 [9fans] split(1): -e vs. -n, -f dexen deVries
2013-12-30 13:50 ` [9fans] split(1): -e vs. -n, -f [patch] dexen deVries
2013-12-30 16:12 ` [9fans] split(1): -e vs. -n, -f erik quanstrom

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).