9fans - fans of the OS Plan 9 from Bell Labs
 help / color / mirror / Atom feed
* [9fans] exec() question
@ 2013-03-15 17:03 erik quanstrom
  2013-03-15 20:35 ` cinap_lenrek
  0 siblings, 1 reply; 9+ messages in thread
From: erik quanstrom @ 2013-03-15 17:03 UTC (permalink / raw)
  To: 9fans

i'm sure i'm missing something obvious here, but exec has something
like this

	up->seg[ESEG] = newseg(SG_STACK, TSTKTOP-USTKSIZE, TSTKTOP/BY2PG);

(some versions may not have /BY2PG, but that's beside the point.)

what happens if there is already a segment at TSTKTOP-USTKSIZE
that has been faulted in?

i would think that argc, argv could end up in the wrong physical page.

- erik



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [9fans] exec() question
  2013-03-15 17:03 [9fans] exec() question erik quanstrom
@ 2013-03-15 20:35 ` cinap_lenrek
  2013-03-15 20:41   ` cinap_lenrek
  0 siblings, 1 reply; 9+ messages in thread
From: cinap_lenrek @ 2013-03-15 20:35 UTC (permalink / raw)
  To: 9fans

i think you are right. the temporary tstk segment will be before
the stack segment like:

| txt | dat | bss ... | *unmapedspace* | tstk | stk |

the segattach syscall only makes sure you dont map something after
or overlapping with the stack. so i think you could indeed map
something there and make the front fall off after exec().

you could map a readonly segment there and make the kernel crash
when it tries prepare the new stack.

segattach() also would try to allocate below the lowest possible stack
address when you pass 0 as the address.

maybe the tstk (ESEG) should be placed *after* the stack swaping
tstk and stk like:

#define	TSTKTOP		(VMAP-BY2PG)
#define	TSTKSIZ		100
#define	USTKTOP		(TSTKTOP-TSTKSIZ)
#define	USTKSIZE	(16*1024*1024)

but maybe just making the checks in segattach take the tstk into
account is simpler...

--
cinap



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [9fans] exec() question
  2013-03-15 20:35 ` cinap_lenrek
@ 2013-03-15 20:41   ` cinap_lenrek
  2013-03-15 21:48     ` erik quanstrom
  0 siblings, 1 reply; 9+ messages in thread
From: cinap_lenrek @ 2013-03-15 20:41 UTC (permalink / raw)
  To: 9fans

note here:

havnt tested this with any code yet. writing a simple test program
for this case would be a good next step.

--
cinap



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [9fans] exec() question
  2013-03-15 20:41   ` cinap_lenrek
@ 2013-03-15 21:48     ` erik quanstrom
  2013-03-15 21:51       ` erik quanstrom
  2013-03-15 22:48       ` cinap_lenrek
  0 siblings, 2 replies; 9+ messages in thread
From: erik quanstrom @ 2013-03-15 21:48 UTC (permalink / raw)
  To: 9fans

On Fri Mar 15 16:42:15 EDT 2013, cinap_lenrek@gmx.de wrote:
> note here:
>
> havnt tested this with any code yet. writing a simple test program
> for this case would be a good next step.

what i can't explain is why this doesn't fail on a 32-bit kernel.

i've attached a test program and a proposed fix.  there
are shorter fixes that disallow segments just below the stack,
but that seems wrong since based on /proc/$pid/segment, that
address space should be available for allocation.  another way
to approach this would be to always allocate ESEG
and make it a new unmappable segment type, say SG_HOLE.
finally one could put ESEG above the regular stack, but it's
not clear to me that this isn't just a bit too sneaky.

- erik

minooka; 8.system2
vastart 0xdf7fe000
#	(success)
; 6.system2
vastart 0x7ffffec00000
rc 2846: suicide: sys: trap: fault read addr=0x0 pc=0x2031e4
; cat system2.c
#include <u.h>
#include <libc.h>

void
system(void)
{
	execl("/bin/rc", "rc", nil);
	sysfatal("execl: %r");
}

void*
share(usize len)
{
	uchar *vastart;

	vastart = segattach(0, "shared", nil, len);
	if(vastart== (void*)-1)
		sysfatal("segattach: %r");
	fprint(2, "vastart %#p\n", vastart);
	return vastart;
}

void
main(void)
{
	share(10);
	system();
	exits("");
}

-----
minooka; diffy -c /sys/src/nix/port/sysproc.c
/n/dump/2013/0315/sys/src/nix/port/sysproc.c:258,263 - /sys/src/nix/port/sysproc.c:258,281
  	/* no core preferred, don't change the process color */
  }

+ uintptr
+ findhole(Proc *p, usize len)
+ {
+ 	uintptr va;
+ 	Segment *os;
+
+ 	va = p->seg[SSEG]->base - len;
+ 	for(;;) {
+ 		os = isoverlap(p, va, len);
+ 		if(os == nil)
+ 			return va;
+ 		va = os->base;
+ 		if(len > va)
+ 			error("exec: no hole for ESEG");
+ 		va -= len;
+ 	}
+ }
+
  void
  sysexec(Ar0* ar0, va_list list)
  {
/n/dump/2013/0315/sys/src/nix/port/sysproc.c:271,277 - /sys/src/nix/port/sysproc.c:289,295
  	char *a, *elem, *file, *p, *ufile, **argv;
  	char line[sizeof(Exec)], *progarg[sizeof(Exec)/2+1];
  	long hdrsz, magic, textsz, datasz, bsssz;
- 	uintptr textlim, datalim, bsslim, entry, stack;
+ 	uintptr textlim, datalim, bsslim, entry, stack, tstk;
  	static int colorgen;

  	/*
/n/dump/2013/0315/sys/src/nix/port/sysproc.c:410,416 - /sys/src/nix/port/sysproc.c:428,435
  		qunlock(&up->seglock);
  		nexterror();
  	}
- 	up->seg[ESEG] = newseg(SG_STACK, TSTKTOP-USTKSIZE, TSTKTOP);
+ 	tstk = findhole(up, USTKSIZE);
+ 	up->seg[ESEG] = newseg(SG_STACK, tstk, tstk+USTKSIZE);
  	up->seg[ESEG]->color = up->color;

  	/*
/n/dump/2013/0315/sys/src/nix/port/sysproc.c:417,423 - /sys/src/nix/port/sysproc.c:436,442
  	 * Stack is a pointer into the temporary stack
  	 * segment, and will move as items are pushed.
  	 */
- 	stack = TSTKTOP-sizeof(Tos);
+ 	stack = tstk+USTKSIZE - sizeof(Tos);

  	/*
  	 * First, the top-of-stack structure.
/n/dump/2013/0315/sys/src/nix/port/sysproc.c:464,470 - /sys/src/nix/port/sysproc.c:483,489
  		 * will not overflow the bottom of the stack.
  		 */
  		stack -= n;
- 		if(stack < TSTKTOP-USTKSIZE)
+ 		if(stack < tstk)
  			error(Enovmem);
  		p = UINT2PTR(stack);
  		memmove(p, a, n);
/n/dump/2013/0315/sys/src/nix/port/sysproc.c:489,501 - /sys/src/nix/port/sysproc.c:508,520
  	 */
  	a = p = UINT2PTR(stack);
  	stack = sysexecstack(stack, argc);
- 	if(stack-(argc+1)*sizeof(char**)-m->pgsz[up->seg[ESEG]->pgszi] < TSTKTOP-USTKSIZE)
+ 	if(stack-(argc+1)*sizeof(char**)-m->pgsz[up->seg[ESEG]->pgszi] < tstk)
  		error(Enovmem);

  	argv = (char**)stack;
  	*--argv = nil;
  	for(i = 0; i < argc; i++){
- 		*--argv = p + (USTKTOP-TSTKTOP);
+ 		*--argv = p + (USTKTOP-(tstk+USTKSIZE));
  		p += strlen(p) + 1;
  	}

/n/dump/2013/0315/sys/src/nix/port/sysproc.c:600,606 - /sys/src/nix/port/sysproc.c:619,625

  	s->base = USTKTOP-USTKSIZE;
  	s->top = USTKTOP;
- 	relocateseg(s, USTKTOP-TSTKTOP);
+ 	relocateseg(s, USTKTOP-(tstk+USTKSIZE));

  	/*
  	 *  '/' processes are higher priority.
/n/dump/2013/0315/sys/src/nix/port/sysproc.c:627,633 - /sys/src/nix/port/sysproc.c:646,652
  	if(up->hang)
  		up->procctl = Proc_stopme;

- 	ar0->v = sysexecregs(entry, TSTKTOP - PTR2UINT(argv), argc);
+ 	ar0->v = sysexecregs(entry, tstk+USTKSIZE - PTR2UINT(argv), argc);

  	DBG("sysexec up %#p done\n"
  		"textsz %lx datasz %lx bsssz %lx hdrsz %lx\n"



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [9fans] exec() question
  2013-03-15 21:48     ` erik quanstrom
@ 2013-03-15 21:51       ` erik quanstrom
  2013-03-15 22:48       ` cinap_lenrek
  1 sibling, 0 replies; 9+ messages in thread
From: erik quanstrom @ 2013-03-15 21:51 UTC (permalink / raw)
  To: 9fans

sorry.  it also doesn't work on 386.  after i force-map the page with
a memset, i get the expected:

minooka; 8.system2
vastart 0xdf7fe000
rc 725735: suicide: sys: trap: fault read addr=0x0 pc=0x3c2c



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [9fans] exec() question
  2013-03-15 21:48     ` erik quanstrom
  2013-03-15 21:51       ` erik quanstrom
@ 2013-03-15 22:48       ` cinap_lenrek
  2013-03-15 23:57         ` erik quanstrom
  1 sibling, 1 reply; 9+ messages in thread
From: cinap_lenrek @ 2013-03-15 22:48 UTC (permalink / raw)
  To: 9fans

putting the TSTK above the user stack is what the alpha
and ppc kernels do. but just looking for a hole in exec
seems also good. also note that you dont need a whole
USTKSIZE window. only TSTKSIZ (or spage) window is fine
for the upper half of the temporary stack because thats
what exec accesses.

--
cinap



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [9fans] exec() question
  2013-03-15 22:48       ` cinap_lenrek
@ 2013-03-15 23:57         ` erik quanstrom
  2013-03-16  0:11           ` cinap_lenrek
  0 siblings, 1 reply; 9+ messages in thread
From: erik quanstrom @ 2013-03-15 23:57 UTC (permalink / raw)
  To: 9fans

On Fri Mar 15 18:50:20 EDT 2013, cinap_lenrek@gmx.de wrote:
> putting the TSTK above the user stack is what the alpha
> and ppc kernels do. but just looking for a hole in exec
> seems also good. also note that you dont need a whole
> USTKSIZE window. only TSTKSIZ (or spage) window is fine
> for the upper half of the temporary stack because thats
> what exec accesses.

nix doesn't artificially limit the exec args to 4096*100 bytes
as the /sys/src/9 kernel does, so in theory the exec args can
mostly fill the stack.

thanks for looking at the alpha kernel.  that shows that
in theory that approach works.

- erik



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [9fans] exec() question
  2013-03-15 23:57         ` erik quanstrom
@ 2013-03-16  0:11           ` cinap_lenrek
  2013-03-17  1:56             ` erik quanstrom
  0 siblings, 1 reply; 9+ messages in thread
From: cinap_lenrek @ 2013-03-16  0:11 UTC (permalink / raw)
  To: 9fans

it also works in practice. the only catch was that fault386 does:

	if(!user){
		if(vmapsync(addr))
			return;
->		if(addr >= USTKTOP)
			panic("kernel fault: bad address pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
		if(up == nil)
			panic("kernel fault: no user process pc=0x%.8lux addr=0x%.8lux", ureg->pc, addr);
	}

so when moving the TSTK above the USTK, you need to change the addr >= USTKTOP
to addr >= TSTKTOP.

the arm kernels use USTKTOP as the end of userspace and size...

i think your approach with dynamically finding the address hole for the temporary
stack is the most flexibe and requires the least change. i think one can
ignore the additional computational overhead and get rid of the TSTK all
together.

--
cinap



^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [9fans] exec() question
  2013-03-16  0:11           ` cinap_lenrek
@ 2013-03-17  1:56             ` erik quanstrom
  0 siblings, 0 replies; 9+ messages in thread
From: erik quanstrom @ 2013-03-17  1:56 UTC (permalink / raw)
  To: 9fans

so, i thought about this a bit, and i think the extra flexablity
and generality of finding a hole in the process' address space
for ESEG doesn't pay—too fancy.  moving TSTK above
the normal stack so TSTK is not in normally adressable space,
seems more natural.

for nix this results in a trivial diff

/n/dump/2013/0316/sys/src/nix/k10/mem.h:60,68 - /sys/src/nix/k10/mem.h:60,68
   */
  #define UTZERO		(0+2*MiB)		/* first address in user text */
  #define UTROUND(t)	ROUNDUP((t), BIGPGSZ)
- #define USTKTOP		(0x00007ffffffff000ull & ~(BIGPGSZ-1))
+ #define TSTKTOP		(0x00007ffffffff000ull & ~(BIGPGSZ-1))
  #define USTKSIZE		(16*1024*1024)		/* size of user stack */
- #define TSTKTOP		(USTKTOP-USTKSIZE)	/* end of new stack in sysexec */
+ #define USTKTOP		(TSTKTOP-USTKSIZE)	/* end of new stack in sysexec */
  
  
  /*

it's a little more intricate for the 9 kernels, as they aren't quite as
tidy.  but still, there are just a few extra bits.

it is probablly a good idea to rid ourselves of TSTKSIZE as it's not
needed anymore.

- erik


diff -c /n/dump/2013/0316/sys/src/9/bcm/mmu.c bcm/mmu.c
/n/dump/2013/0316/sys/src/9/bcm/mmu.c:12,18 - bcm/mmu.c:12,18
  
  enum {
  	L1lo		= UZERO/MiB,		/* L1X(UZERO)? */
- 	L1hi		= (USTKTOP+MiB-1)/MiB,	/* L1X(USTKTOP+MiB-1)? */
+ 	L1hi		= (TSTKTOP+MiB-1)/MiB,	/* L1X(TSTKTOP+MiB-1)? */
  };
  
  void
diff -c /n/dump/2013/0316/sys/src/9/kw/mmu.c kw/mmu.c
/n/dump/2013/0316/sys/src/9/kw/mmu.c:12,18 - kw/mmu.c:12,18
  
  enum {
  	L1lo		= UZERO/MiB,		/* L1X(UZERO)? */
- 	L1hi		= (USTKTOP+MiB-1)/MiB,	/* L1X(USTKTOP+MiB-1)? */
+ 	L1hi		= (TSTKTOP+MiB-1)/MiB,	/* L1X(TSTKTOP+MiB-1)? */
  };
  
  #define ISHOLE(pte)	((pte) == 0)
diff -c /n/dump/2013/0316/sys/src/9/omap/mmu.c omap/mmu.c
/n/dump/2013/0316/sys/src/9/omap/mmu.c:11,17 - omap/mmu.c:11,17
  
  enum {
  	L1lo		= UZERO/MiB,		/* L1X(UZERO)? */
- 	L1hi		= (USTKTOP+MiB-1)/MiB,	/* L1X(USTKTOP+MiB-1)? */
+ 	L1hi		= (TSTKTOP+MiB-1)/MiB,	/* L1X(TSTKTOP+MiB-1)? */
  };
  
  #define ISHOLE(pte)	((pte) == 0)
diff -c /n/dump/2013/0316/sys/src/9/teg2/mmu.c teg2/mmu.c
/n/dump/2013/0316/sys/src/9/teg2/mmu.c:23,29 - teg2/mmu.c:23,29
  
  	L1lo		= UZERO/MiB,		/* L1X(UZERO)? */
  #ifdef SMALL_ARM				/* well under 1GB of RAM? */
- 	L1hi		= (USTKTOP+MiB-1)/MiB,	/* L1X(USTKTOP+MiB-1)? */
+ 	L1hi		= (TSTKTOP+MiB-1)/MiB,	/* L1X(TSTKTOP+MiB-1)? */
  #else
  	/*
  	 * on trimslice, top of 1GB ram can't be addressible, as high
diff -c /n/dump/2013/0316/sys/src/9/bcm/mem.h bcm/mem.h
/n/dump/2013/0316/sys/src/9/bcm/mem.h:51,59 - bcm/mem.h:51,59
  
  #define	UZERO		0			/* user segment */
  #define	UTZERO		(UZERO+BY2PG)		/* user text start */
- #define	USTKTOP		0x20000000			/* user segment end +1 */
+ #define	TSTKTOP		0x20000000			/* user segment end +1 */
  #define	USTKSIZE	(8*1024*1024)		/* user stack size */
- #define	TSTKTOP		(USTKTOP-USTKSIZE)	/* sysexec temporary stack */
+ #define	USTKTOP		(TSTKTOP-USTKSIZE)	/* sysexec temporary stack */
  #define	TSTKSIZ	 	256
  
  /* address at which to copy and execute rebootcode */
diff -c /n/dump/2013/0316/sys/src/9/bitsy/mem.h bitsy/mem.h
/n/dump/2013/0316/sys/src/9/bitsy/mem.h:60,68 - bitsy/mem.h:60,68
  #define	UCDRAMTOP	0xD0000000		/* ... */
  #define	NULLZERO	0xE0000000		/* 128 meg for cache flush zeroes */
  #define	NULLTOP		0xE8000000		/* ... */
- #define	USTKTOP		0x2000000		/* byte just beyond user stack */
+ #define	TSTKTOP		0x2000000		/* byte just beyond user stack */
  #define	USTKSIZE		(8*1024*1024)		/* size of user stack */
- #define	TSTKTOP		(USTKTOP-USTKSIZE)	/* end of new stack in sysexec */
+ #define	USTKTOP		(TSTKTOP-USTKSIZE)	/* end of new stack in sysexec */
  #define	TSTKSIZ	 	100
  #define	MACHADDR	(KZERO+0x00001000)
  #define	EVECTORS	0xFFFF0000		/* virt base of exception vectors */
diff -c /n/dump/2013/0316/sys/src/9/kw/mem.h kw/mem.h
/n/dump/2013/0316/sys/src/9/kw/mem.h:69,77 - kw/mem.h:69,77
  
  #define	UZERO		0			/* user segment */
  #define	UTZERO		(UZERO+BY2PG)		/* user text start */
- #define	USTKTOP		KZERO			/* user segment end +1 */
+ #define	TSTKTOP		KZERO			/* user segment end +1 */
  #define	USTKSIZE	(8*1024*1024)		/* user stack size */
- #define	TSTKTOP		(USTKTOP-USTKSIZE)	/* sysexec temporary stack */
+ #define	USTKTOP		(TSTKTOP-USTKSIZE)	/* sysexec temporary stack */
  #define	TSTKSIZ	 	256
  
  /* address at which to copy and execute rebootcode */
diff -c /n/dump/2013/0316/sys/src/9/omap/mem.h omap/mem.h
/n/dump/2013/0316/sys/src/9/omap/mem.h:73,81 - omap/mem.h:73,81
  #define	UTZERO		(UZERO+BY2PG)		/* user text start */
  #define UTROUND(t)	ROUNDUP((t), BY2PG)
  /* moved USTKTOP down to 512MB to keep MMIO space out of user space. */
- #define	USTKTOP		0x20000000		/* user segment end +1 */
+ #define	TSTKTOP		0x20000000		/* user segment end +1 */
  #define	USTKSIZE	(8*1024*1024)		/* user stack size */
- #define	TSTKTOP		(USTKTOP-USTKSIZE)	/* sysexec temporary stack */
+ #define	USTKTOP		(TSTKTOP-USTKSIZE)	/* sysexec temporary stack */
  #define	TSTKSIZ	 	256
  
  /* address at which to copy and execute rebootcode */
diff -c /n/dump/2013/0316/sys/src/9/pc/mem.h pc/mem.h
/n/dump/2013/0316/sys/src/9/pc/mem.h:61,69 - pc/mem.h:61,69
  #define	VMAPSIZE	(0x10000000-VPTSIZE-KMAPSIZE)
  #define	UZERO		0			/* base of user address space */
  #define	UTZERO		(UZERO+BY2PG)		/* first address in user text */
- #define	USTKTOP		(VMAP-BY2PG)		/* byte just beyond user stack */
+ #define	TSTKTOP		(VMAP-BY2PG)		/* byte just beyond user stack */
  #define	USTKSIZE	(16*1024*1024)		/* size of user stack */
- #define	TSTKTOP		(USTKTOP-USTKSIZE)	/* end of new stack in sysexec */
+ #define	USTKTOP		(TSTKTOP-USTKSIZE)	/* end of new stack in sysexec */
  #define	TSTKSIZ 	256	/* pages in new stack; limits exec args */
  
  /*
diff -c /n/dump/2013/0316/sys/src/9/pcpae/mem.h pcpae/mem.h
/n/dump/2013/0316/sys/src/9/pcpae/mem.h:61,69 - pcpae/mem.h:61,69
  #define	VMAPSIZE	(0x10000000-VPTSIZE-KMAPSIZE-KXMAPSIZE)
  #define	UZERO		0			/* base of user address space */
  #define	UTZERO		(UZERO+BY2PG)		/* first address in user text */
- #define	USTKTOP		(VMAP-BY2PG)		/* byte just beyond user stack */
+ #define	TSTKTOP		(VMAP-BY2PG)		/* byte just beyond user stack */
  #define	USTKSIZE	(16*1024*1024)		/* size of user stack */
- #define	TSTKTOP		(USTKTOP-USTKSIZE)	/* end of new stack in sysexec */
+ #define	USTKTOP		(TSTKTOP-USTKSIZE)	/* end of new stack in sysexec */
  #define	TSTKSIZ 	256			/* pages in new stack; limits exec args */
  
  /*
diff -c /n/dump/2013/0316/sys/src/9/teg2/mem.h teg2/mem.h
/n/dump/2013/0316/sys/src/9/teg2/mem.h:91,99 - teg2/mem.h:91,99
   * moved it down another MB to utterly avoid KADDR(stack_base) mapping
   * to high exception vectors.  see confinit().
   */
- #define	USTKTOP		(0x40000000 - 64*KiB - MiB) /* user segment end +1 */
+ #define	TSTKTOP		(0x40000000 - 64*KiB - MiB) /* user segment end +1 */
  #define	USTKSIZE	(8*1024*1024)		/* user stack size */
- #define	TSTKTOP		(USTKTOP-USTKSIZE)	/* sysexec temporary stack */
+ #define	USTKTOP		(TSTKTOP-USTKSIZE)	/* sysexec temporary stack */
  #define	TSTKSIZ	 	256
  
  /* address at which to copy and execute rebootcode */



^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2013-03-17  1:56 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2013-03-15 17:03 [9fans] exec() question erik quanstrom
2013-03-15 20:35 ` cinap_lenrek
2013-03-15 20:41   ` cinap_lenrek
2013-03-15 21:48     ` erik quanstrom
2013-03-15 21:51       ` erik quanstrom
2013-03-15 22:48       ` cinap_lenrek
2013-03-15 23:57         ` erik quanstrom
2013-03-16  0:11           ` cinap_lenrek
2013-03-17  1:56             ` erik quanstrom

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).