9front - general discussion about 9front
 help / color / mirror / Atom feed
* [9front] VMX improvements + AVX
@ 2020-12-04 15:39 Sigrid Solveig Haflínudóttir
  2020-12-04 21:15 ` james palmer
                   ` (3 more replies)
  0 siblings, 4 replies; 14+ messages in thread
From: Sigrid Solveig Haflínudóttir @ 2020-12-04 15:39 UTC (permalink / raw)
  To: 9front

[-- Attachment #1: Type: text/plain, Size: 1097 bytes --]

Hello.  I made some changes to /sys/src/9/^(pc pc64) and
/sys/src/cmd/vmx to get better support for Linux and OpenBSD emulation
and before I push it (or not) I'd like to get more eyes (and hands) on
it.  386 kernel should not be affected by the change at all, this is
about amd64 specifically.

Changes were tested on two machines I have, with AVX enabled/disabled.
No problems were found with extensive use, nor any performance issues
detected.

On the host Go is using AVX successfully with this change.  I'm
writing optimized routines related to video playback so that's yet
another reason why this work has been done in the first place.

* AVX/AVX2 support on amd64 for both 9front kernel itself + VMX
  guests.  Enabled by setting "*avx=" in plan9.ini.
* Make vmx(1) report to guest it's running under a hypervisor.
* Provide "fast strings" (through msr) properly to guests.
* Rework cpuid in vmx(1).
* A bit better timing by using tsc offset feature.  Clock is still
  wrong but at least not THAT much.  Proper kvm clock implementation
  in the future will address that.

Thanks.

 - Sigrid

[-- Attachment #2: diff --]
[-- Type: text/plain, Size: 25422 bytes --]

diff -r bdf5af28b857 sys/man/8/plan9.ini
--- a/sys/man/8/plan9.ini	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/man/8/plan9.ini	Fri Dec 04 16:26:28 2020 +0100
@@ -898,6 +898,8 @@
 battery life (see
 .IR stats (8)).
 It is not on by default because it causes problems on some laptops.
+.SS \fL*avx=\fP
+Enables AVX and AVX2 on AMD64 cpu if it supports the instruction set.
 .SS USB
 .SS \fL*nousbprobe=\fP
 Disable USB host controller detection.
diff -r bdf5af28b857 sys/src/9/pc/cputemp.c
--- a/sys/src/9/pc/cputemp.c	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc/cputemp.c	Fri Dec 04 16:26:28 2020 +0100
@@ -13,7 +13,7 @@
 
 	if(m->cpuiddx & Acpif)
 	if(strcmp(m->cpuidid, "GenuineIntel") == 0){
-		cpuid(6, regs);
+		cpuid(6, 0, regs);
 		return regs[0] & 1;
 	}
 	return 0;
@@ -28,7 +28,7 @@
 	ulong regs[4];
 	static ulong tj;
 
-	cpuid(6, regs);
+	cpuid(6, 0, regs);
 	if((regs[0] & 1) == 0)
 		goto unsup;
 	if(tj == 0){
diff -r bdf5af28b857 sys/src/9/pc/dat.h
--- a/sys/src/9/pc/dat.h	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc/dat.h	Fri Dec 04 16:26:28 2020 +0100
@@ -250,7 +250,7 @@
 	int	pdbfree;
 	
 	u32int	dr7;			/* shadow copy of dr7 */
-	
+	u32int	xcr0;
 	void*	vmx;
 
 	int	stack[1];
diff -r bdf5af28b857 sys/src/9/pc/devarch.c
--- a/sys/src/9/pc/devarch.c	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc/devarch.c	Fri Dec 04 16:26:28 2020 +0100
@@ -18,11 +18,6 @@
 	Qmax = 32,
 };
 
-enum {
-	CR4Osfxsr = 1 << 9,
-	CR4Oxmmex = 1 << 10,
-};
-
 enum {				/* cpuid standard function codes */
 	Highstdfunc = 0,	/* also returns vendor string */
 	Procsig,
@@ -507,13 +502,13 @@
 	ulong regs[4];
 	vlong mca, mct, pat;
 
-	cpuid(Highstdfunc, regs);
+	cpuid(Highstdfunc, 0, regs);
 	memmove(m->cpuidid,   &regs[1], BY2WD);	/* bx */
 	memmove(m->cpuidid+4, &regs[3], BY2WD);	/* dx */
 	memmove(m->cpuidid+8, &regs[2], BY2WD);	/* cx */
 	m->cpuidid[12] = '\0';
 
-	cpuid(Procsig, regs);
+	cpuid(Procsig, 0, regs);
 	m->cpuidax = regs[0];
 	m->cpuidcx = regs[2];
 	m->cpuiddx = regs[3];
@@ -650,15 +645,6 @@
 	if(m->cpuiddx & Mtrr)
 		mtrrsync();
 
-	if((m->cpuiddx & (Sse|Fxsr)) == (Sse|Fxsr)){			/* have sse fp? */
-		fpsave = fpssesave;
-		fprestore = fpsserestore;
-		putcr4(getcr4() | CR4Osfxsr|CR4Oxmmex);
-	} else {
-		fpsave = fpx87save;
-		fprestore = fpx87restore;
-	}
-
 	if(strcmp(m->cpuidid, "GenuineIntel") == 0 && (m->cpuidcx & Rdrnd) != 0)
 		hwrandbuf = rdrandbuf;
 	else
@@ -669,9 +655,9 @@
 		m->havewatchpt8 = 1;
 
 		/* check and enable NX bit */
-		cpuid(Highextfunc, regs);
+		cpuid(Highextfunc, 0, regs);
 		if(regs[0] >= Procextfeat){
-			cpuid(Procextfeat, regs);
+			cpuid(Procextfeat, 0, regs);
 			if((regs[3] & (1<<20)) != 0){
 				vlong efer;
 
@@ -689,14 +675,16 @@
 		|| family == 6 && (model == 15 || model == 23 || model == 28))
 			m->havewatchpt8 = 1;
 		/* Intel SDM claims amd64 support implies 8-byte watchpoint support */
-		cpuid(Highextfunc, regs);
+		cpuid(Highextfunc, 0, regs);
 		if(regs[0] >= Procextfeat){
-			cpuid(Procextfeat, regs);
+			cpuid(Procextfeat, 0, regs);
 			if((regs[3] & 1<<29) != 0)
 				m->havewatchpt8 = 1;
 		}
 	}
 
+	fpuinit();
+
 	cputype = t;
 	return t->family;
 }
diff -r bdf5af28b857 sys/src/9/pc/devvmx.c
--- a/sys/src/9/pc/devvmx.c	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc/devvmx.c	Fri Dec 04 16:26:28 2020 +0100
@@ -44,6 +44,7 @@
 	
 	PROCB_CTLS = 0x4002,
 	PROCB_IRQWIN = 1<<2,
+	PROCB_TSCOFFSET = 1<<3,
 	PROCB_EXITHLT = 1<<7,
 	PROCB_EXITINVLPG = 1<<9,
 	PROCB_EXITMWAIT = 1<<10,
@@ -100,6 +101,7 @@
 	VMENTRY_INTRCODE = 0x4018,
 	VMENTRY_INTRILEN = 0x401a,
 	
+	VMCS_TSC_OFFSET = 0x2010,
 	VMCS_LINK = 0x2800,
 	
 	GUEST_ES = 0x800,
@@ -264,7 +266,9 @@
 	int index, machno;
 	char errstr[ERRMAX];
 	Ureg ureg;
+	uvlong tscoffset;
 	uintptr cr2;
+	uintptr xcr0;
 	uintptr dr[8]; /* DR7 is also kept in VMCS */
 	u8int launched;
 	u8int vpid;
@@ -484,6 +488,13 @@
 }
 
 static int
+xcr0write(Vmx *vmx, char *s)
+{
+	vmx->xcr0 = parseval(s) & 7;
+	return 0;
+}
+
+static int
 readonly(Vmx *, char *)
 {
 	return -1;
@@ -581,6 +592,7 @@
 	{VMXVAR(dr[2]), 0, "dr2"},
 	{VMXVAR(dr[3]), 0, "dr3"},
 	{VMXVAR(dr[6]), 0, "dr6", nil, dr6write},
+	{VMXVAR(xcr0), 0, "xcr0", nil, xcr0write},
 	{GUEST_DR7, 0, "dr7", nil, dr7write},
 	{VM_INSTRERR, 4, "instructionerror", nil, readonly},
 	{VM_EXREASON, 4, "exitreason", nil, readonly},
@@ -857,7 +869,7 @@
 	vlong msr;
 	int i;
 
-	cpuid(1, regs);
+	cpuid(1, 0, regs);
 	if((regs[2] & 1<<5) == 0) return;
 	/* check if disabled by BIOS */
 	if(rdmsr(0x3a, &msr) < 0) return;
@@ -945,8 +957,8 @@
 	
 	if(rdmsr(VMX_PROCB_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_PROCB_CTLS_MSR failed");
 	x = (u32int)procb_ctls | 1<<1 | 7<<4 | 1<<8 | 1<<13 | 1<<14 | 1<<26; /* currently reserved default1 bits */
-	x |= PROCB_EXITHLT | PROCB_EXITMWAIT;
-	x |= PROCB_EXITMOVDR | PROCB_EXITIO | PROCB_EXITMONITOR | PROCB_MSRBITMAP;
+	x |= PROCB_TSCOFFSET | PROCB_EXITMWAIT | PROCB_EXITMONITOR | PROCB_EXITHLT;
+	x |= PROCB_EXITMOVDR | PROCB_EXITIO | PROCB_MSRBITMAP;
 	x |= PROCB_USECTLS2;
 	x &= msr >> 32;
 	vmcswrite(PROCB_CTLS, x);
@@ -1042,8 +1054,8 @@
 	
 	vmx->onentry = FLUSHVPID | FLUSHEPT;
 	fpinit();
-	fpsave(&vmx->fp);
-	
+	vmx->xcr0 = m->xcr0 & 1; /* x87 alone */
+
 	memset(vmx->msrbits, -1, 4096);
 	vmxtrapmsr(vmx, Efer, 0);
 	vmcswrite(VMENTRY_MSRLDADDR, PADDR(vmx->msrguest));
@@ -1051,6 +1063,9 @@
 	vmcswrite(VMEXIT_MSRLDADDR, PADDR(vmx->msrhost));
 	vmcswrite(MSR_BITMAP, PADDR(vmx->msrbits));
 	
+	cycles(&vmx->tscoffset);
+	vmcswrite(VMCS_TSC_OFFSET, vmx->tscoffset);
+
 	if(sizeof(uintptr) == 8){
 		vmxaddmsr(vmx, Star, 0);
 		vmxaddmsr(vmx, Lstar, 0);
@@ -1074,7 +1089,7 @@
 	uintptr cr;
 	vlong x;
 
-	putcr4(getcr4() | 0x2000); /* set VMXE */
+	putcr4(getcr4() | CR4VMXE);
 	putcr0(getcr0() | 0x20); /* set NE */
 	cr = getcr0();
 	if(rdmsr(VMX_CR0_FIXED0, &msr) < 0) error("rdmsr(VMX_CR0_FIXED0) failed");
@@ -1590,8 +1605,9 @@
 static void
 vmxproc(void *vmxp)
 {
-	int init, rc, x;
+	int init, rc, x, useend;
 	u32int procbctls, defprocbctls;
+	u64int start, end, adj;
 	vlong v;
 	Vmx *vmx;
 
@@ -1599,6 +1615,8 @@
 	procwired(up, vmx->machno);
 	sched();
 	init = 0;
+	useend = 0;
+	adj = 0;
 	defprocbctls = 0;
 	while(waserror()){
 		kstrcpy(vmx->errstr, up->errstr, ERRMAX);
@@ -1653,11 +1671,29 @@
 			}
 			if((vmx->dr[7] & ~0xd400) != 0)
 				putdr01236(vmx->dr);
-			fpsserestore(&vmx->fp);
-			putcr2(vmx->cr2);
+
+			fprestore(&vmx->fp);
+			if(m->xcr0 != 0 && vmx->xcr0 != m->xcr0)
+				putxcr0(vmx->xcr0);
+			if(vmx->cr2 != getcr2())
+				putcr2(vmx->cr2);
+			cycles(&start);
+			if(useend){
+				vmx->tscoffset -= end - start + adj;
+				vmcswrite(VMCS_TSC_OFFSET, vmx->tscoffset);
+			}
+			if(adj == 0){
+				cycles(&adj);
+				adj -= start;
+			}
 			rc = vmlaunch(&vmx->ureg, vmx->launched);
+			cycles(&end);
+			useend = 1;
 			vmx->cr2 = getcr2();
-			fpssesave(&vmx->fp);
+			if(m->xcr0 != 0 && vmx->xcr0 != m->xcr0)
+				putxcr0(m->xcr0);
+			fpsave(&vmx->fp);
+
 			splx(x);
 			if(rc < 0)
 				error("vmlaunch failed");
@@ -1799,6 +1835,7 @@
 		free(vmx);
 		nexterror();
 	}
+	memset(vmx, 0, sizeof(Vmx));
 	vmx->state = VMXINIT;
 	vmx->lastcmd = &vmx->firstcmd;
 	vmx->mem.next = &vmx->mem;
diff -r bdf5af28b857 sys/src/9/pc/fns.h
--- a/sys/src/9/pc/fns.h	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc/fns.h	Fri Dec 04 16:26:28 2020 +0100
@@ -15,7 +15,8 @@
 int	(*cmpswap)(long*, long, long);
 int	cmpswap486(long*, long, long);
 void	(*coherence)(void);
-void	cpuid(int, ulong regs[]);
+void	cpuid(int, int, ulong regs[]);
+void	fpuinit(void);
 int	cpuidentify(void);
 void	cpuidprint(void);
 void	(*cycles)(uvlong*);
@@ -137,6 +138,7 @@
 void	putcr2(ulong);
 void	putcr3(ulong);
 void	putcr4(ulong);
+void	putxcr0(ulong);
 void	putdr(u32int*);
 void	putdr01236(uintptr*);
 void	putdr6(u32int);
diff -r bdf5af28b857 sys/src/9/pc/fpu.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/src/9/pc/fpu.c	Fri Dec 04 16:26:28 2020 +0100
@@ -0,0 +1,33 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+
+enum {
+	CR4Osfxsr  = 1 << 9,
+	CR4Oxmmex  = 1 << 10,
+};
+
+void
+putxcr0(ulong)
+{
+}
+
+void
+fpuinit(void)
+{
+	uintptr cr4;
+
+	if((m->cpuiddx & (Sse|Fxsr)) == (Sse|Fxsr)){ /* have sse fp? */
+		fpsave = fpssesave;
+		fprestore = fpsserestore;
+		cr4 = getcr4() | CR4Osfxsr|CR4Oxmmex;
+		putcr4(cr4);
+	} else {
+		fpsave = fpx87save;
+		fprestore = fpx87restore;
+	}
+}
diff -r bdf5af28b857 sys/src/9/pc/l.s
--- a/sys/src/9/pc/l.s	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc/l.s	Fri Dec 04 16:26:28 2020 +0100
@@ -520,7 +520,7 @@
  * a 386 (Ac bit can't be set). If it's not a 386 and the Id bit can't be
  * toggled then it's an older 486 of some kind.
  *
- *	cpuid(fun, regs[4]);
+ *	cpuid(fn, subfn, regs[4]);
  */
 TEXT cpuid(SB), $0
 	MOVL	$0x240000, AX
@@ -539,6 +539,7 @@
 	TESTL	$0x200000, AX			/* Id */
 	JZ	_cpu486				/* can't toggle this bit on some 486 */
 	MOVL	fn+0(FP), AX
+	MOVL	subfn+4(FP), CX
 	CPUID
 	JMP	_cpuid
 _cpu486:
@@ -555,7 +556,7 @@
 	XORL	CX, CX
 	XORL	DX, DX
 _cpuid:
-	MOVL	regs+4(FP), BP
+	MOVL	regs+8(FP), BP
 	MOVL	AX, 0(BP)
 	MOVL	BX, 4(BP)
 	MOVL	CX, 8(BP)
diff -r bdf5af28b857 sys/src/9/pc/mkfile
--- a/sys/src/9/pc/mkfile	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc/mkfile	Fri Dec 04 16:26:28 2020 +0100
@@ -49,6 +49,7 @@
 OBJ=\
 	l.$O\
 	cga.$O\
+	fpu.$O\
 	i8253.$O\
 	i8259.$O\
 	main.$O\
diff -r bdf5af28b857 sys/src/9/pc/mtrr.c
--- a/sys/src/9/pc/mtrr.c	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc/mtrr.c	Fri Dec 04 16:26:28 2020 +0100
@@ -289,9 +289,9 @@
 	ulong regs[4];
 	uvlong mask;
 
-	cpuid(Exthighfunc, regs);
+	cpuid(Exthighfunc, 0, regs);
 	if(regs[0] >= Extaddrsz) {			/* ax */
-		cpuid(Extaddrsz, regs);
+		cpuid(Extaddrsz, 0, regs);
 		mask = (1ULL << (regs[0] & 0xFF)) - 1;	/* ax */
 	} else {
 		mask = (1ULL << 36) - 1;
diff -r bdf5af28b857 sys/src/9/pc64/dat.h
--- a/sys/src/9/pc64/dat.h	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc64/dat.h	Fri Dec 04 16:26:28 2020 +0100
@@ -2,6 +2,8 @@
 typedef struct BIOS32ci	BIOS32ci;
 typedef struct Conf	Conf;
 typedef struct Confmem	Confmem;
+typedef struct FPssestate	FPssestate;
+typedef struct FPavxstate	FPavxstate;
 typedef struct FPsave	FPsave;
 typedef struct PFPU	PFPU;
 typedef struct ISAConf	ISAConf;
@@ -49,7 +51,7 @@
 	uintptr	pc;
 };
 
-struct FPsave
+struct FPssestate
 {
 	u16int	fcw;			/* x87 control word */
 	u16int	fsw;			/* x87 status word */
@@ -65,6 +67,18 @@
 	uchar	ign[96];		/* reserved, ignored */
 };
 
+struct FPavxstate
+{
+	FPssestate;
+	uchar	header[64];		/* XSAVE header */
+	uchar	ymm[256];		/* upper 128-bit regs (AVX) */
+};
+
+struct FPsave
+{
+	FPavxstate;
+};
+
 enum
 {
 	/* this is a state */
@@ -224,9 +238,12 @@
 	int	havewatchpt8;
 	int	havenx;
 	uvlong	tscticks;
-	
+
 	u64int	dr7;			/* shadow copy of dr7 */
-	
+	u64int	xcr0;
+	u32int	fpsavesz;
+	u32int	fpalign;
+
 	void*	vmx;
 
 	uintptr	stack[1];
@@ -270,8 +287,14 @@
 
 /* cpuid instruction result register bits */
 enum {
+	/* ax */
+	Xsaveopt = 1<<0,
+	Xsaves = 1<<3,
+
 	/* cx */
 	Monitor	= 1<<3,
+	Xsave = 1<<26,
+	Avx	= 1<<28,
 
 	/* dx */
 	Fpuonchip = 1<<0,
diff -r bdf5af28b857 sys/src/9/pc64/fns.h
--- a/sys/src/9/pc64/fns.h	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc64/fns.h	Fri Dec 04 16:26:28 2020 +0100
@@ -15,7 +15,8 @@
 int	(*cmpswap)(long*, long, long);
 int	cmpswap486(long*, long, long);
 void	(*coherence)(void);
-void	cpuid(int, ulong regs[]);
+void	cpuid(int, int, ulong regs[]);
+void	fpuinit(void);
 int	cpuidentify(void);
 void	cpuidprint(void);
 void	(*cycles)(uvlong*);
@@ -40,6 +41,11 @@
 void	(*fpsave)(FPsave*);
 void	fpsserestore(FPsave*);
 void	fpssesave(FPsave*);
+void	fpxrestore(FPsave*);
+void	fpxrestores(FPsave*);
+void	fpxsave(FPsave*);
+void	fpxsaveopt(FPsave*);
+void	fpxsaves(FPsave*);
 void	fpx87restore(FPsave*);
 void	fpx87save(FPsave*);
 int	fpusave(void);
@@ -48,6 +54,7 @@
 u64int	getcr2(void);
 u64int	getcr3(void);
 u64int	getcr4(void);
+u64int	getxcr0(void);
 u64int	getdr6(void);
 char*	getconf(char*);
 void	guesscpuhz(int);
@@ -137,6 +144,7 @@
 void	putcr2(u64int);
 void	putcr3(u64int);
 void	putcr4(u64int);
+void	putxcr0(u64int);
 void	putdr(u64int*);
 void	putdr01236(u64int*);
 void	putdr6(u64int);
diff -r bdf5af28b857 sys/src/9/pc64/fpu.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/src/9/pc64/fpu.c	Fri Dec 04 16:26:28 2020 +0100
@@ -0,0 +1,53 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+
+enum {
+	CR4Osfxsr  = 1 << 9,
+	CR4Oxmmex  = 1 << 10,
+	CR4Oxsave  = 1 << 18,
+};
+
+void
+fpuinit(void)
+{
+	uintptr cr4;
+	ulong regs[4];
+
+	m->fpsavesz = sizeof(FPssestate);
+	m->fpalign = 16;
+	if((m->cpuiddx & (Sse|Fxsr)) == (Sse|Fxsr)){ /* have sse fp? */
+		cr4 = getcr4() | CR4Osfxsr|CR4Oxmmex;
+		putcr4(cr4);
+		fpsave = fpssesave;
+		fprestore = fpsserestore;
+
+		if((m->cpuidcx & (Xsave|Avx)) == (Xsave|Avx) && getconf("*avx") != nil){
+			cr4 |= CR4Oxsave;
+			putcr4(cr4);
+			m->xcr0 = 7; /* x87, sse, avx */
+			putxcr0(m->xcr0);
+			fpsave = fpxsave;
+			fprestore = fpxrestore;
+
+			cpuid(0xd, 0, regs);
+			m->fpsavesz = regs[1];
+			m->fpalign = 64;
+
+			cpuid(0xd, 1, regs);
+			if(regs[0] & Xsaveopt)
+				fpsave = fpxsaveopt;
+			if(regs[0] & Xsaves){
+				fpsave = fpxsaves;
+				fprestore = fpxrestores;
+			}
+		}
+	} else {
+		fpsave = fpx87save;
+		fprestore = fpx87restore;
+	}
+}
diff -r bdf5af28b857 sys/src/9/pc64/l.s
--- a/sys/src/9/pc64/l.s	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc64/l.s	Fri Dec 04 16:26:28 2020 +0100
@@ -249,9 +249,10 @@
  */
 TEXT cpuid(SB), $-4
 	MOVL	RARG, AX			/* function in AX */
+	MOVL	cx+8(FP), CX		/* sub-level in CX */
 	CPUID
 
-	MOVQ	info+8(FP), BP
+	MOVQ	info+16(FP), BP
 	MOVL	AX, 0(BP)
 	MOVL	BX, 4(BP)
 	MOVL	CX, 8(BP)
@@ -399,6 +400,21 @@
 	MOVQ	RARG, CR4
 	RET
 
+TEXT getxcr0(SB), 1, $-4			/* XCR0 - extended control */
+	XORQ CX, CX
+	WORD $0x010f; BYTE $0xd0	// XGETBV
+	SHLQ $32, DX
+	ORQ DX, AX
+	RET
+
+TEXT putxcr0(SB), 1, $-4
+	XORQ CX, CX
+	MOVL RARG, DX
+	SHRQ $32, DX
+	MOVL RARG, AX
+	WORD $0x010f; BYTE $0xd1	// XSETBV
+	RET
+
 TEXT mb386(SB), 1, $-4				/* hack */
 TEXT mb586(SB), 1, $-4
 	XORL	AX, AX
@@ -626,6 +642,36 @@
 	FXSAVE64 (RARG)
 	RET
 
+TEXT _xrstor(SB), 1, $-4
+	MOVL $7, AX
+	XORL DX, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x6d; BYTE $0x00 // XRSTOR (RARG)
+	RET
+
+TEXT _xrstors(SB), 1, $-4
+	MOVL $7, AX
+	XORL DX, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xc7; BYTE $0x5d; BYTE $0x00 // XRSTORS (RARG)
+	RET
+
+TEXT _xsave(SB), 1, $-4
+	MOVL $7, AX
+	XORL DX, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x65; BYTE $0x00 // XSAVE (RARG)
+	RET
+
+TEXT _xsaveopt(SB), 1, $-4
+	MOVL $7, AX
+	XORL DX, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x75; BYTE $0x00 // XSAVEOPT (RARG)
+	RET
+
+TEXT _xsaves(SB), 1, $-4
+	MOVL $7, AX
+	XORL DX, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xc7; BYTE $0x6d; BYTE $0x00 // XSAVES (RARG)
+	RET
+
 TEXT _fwait(SB), 1, $-4
 	WAIT
 	RET
diff -r bdf5af28b857 sys/src/9/pc64/main.c
--- a/sys/src/9/pc64/main.c	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc64/main.c	Fri Dec 04 16:26:28 2020 +0100
@@ -304,6 +304,9 @@
 extern void _fninit(void);
 extern void _fxrstor(void*);
 extern void _fxsave(void*);
+extern void _xrstor(void*);
+extern void _xsave(void*);
+extern void _xsaveopt(void*);
 extern void _fwait(void);
 extern void _ldmxcsr(u32int);
 extern void _stts(void);
@@ -333,6 +336,39 @@
 	_fxrstor(s);
 }
 
+void
+fpxsave(FPsave *s)
+{
+	_xsave(s);
+	_stts();
+}
+void
+fpxrestore(FPsave *s)
+{
+	_clts();
+	_xrstor(s);
+}
+
+void
+fpxsaves(FPsave *s)
+{
+	_xsaveopt(s);
+	_stts();
+}
+void
+fpxrestores(FPsave *s)
+{
+	_clts();
+	_xrstor(s);
+}
+
+void
+fpxsaveopt(FPsave *s)
+{
+	_xsaveopt(s);
+	_stts();
+}
+
 static char* mathmsg[] =
 {
 	nil,	/* handled below */
@@ -452,7 +488,7 @@
 			up->fpstate |= FPkernel;
 		}
 		while(up->fpslot[index] == nil)
-			up->fpslot[index] = mallocalign(sizeof(FPsave), FPalign, 0, 0);
+			up->fpslot[index] = mallocalign(m->fpsavesz, m->fpalign, 0, 0);
 		up->fpsave = up->fpslot[index];
 		up->fpstate = FPactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
 		break;
@@ -538,8 +574,8 @@
 	case FPinactive	| FPpush:
 	case FPinactive:
 		while(p->fpslot[0] == nil)
-			p->fpslot[0] = mallocalign(sizeof(FPsave), FPalign, 0, 0);
-		memmove(p->fpsave = p->fpslot[0], up->fpslot[0], sizeof(FPsave));
+			p->fpslot[0] = mallocalign(m->fpsavesz, m->fpalign, 0, 0);
+		memmove(p->fpsave = p->fpslot[0], up->fpslot[0], m->fpsavesz);
 		p->fpstate = FPinactive;
 	}
 	splx(s);
diff -r bdf5af28b857 sys/src/9/pc64/mem.h
--- a/sys/src/9/pc64/mem.h	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc64/mem.h	Fri Dec 04 16:26:28 2020 +0100
@@ -26,7 +26,6 @@
 #define	ROUND(s, sz)	(((s)+((sz)-1))&~((sz)-1))
 #define	PGROUND(s)	ROUND(s, BY2PG)
 #define	BLOCKALIGN	8
-#define	FPalign		16
 
 #define	MAXMACH		128			/* max # cpus system can run */
 
diff -r bdf5af28b857 sys/src/9/pc64/mkfile
--- a/sys/src/9/pc64/mkfile	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc64/mkfile	Fri Dec 04 16:26:28 2020 +0100
@@ -47,6 +47,7 @@
 OBJ=\
 	l.$O\
 	cga.$O\
+	fpu.$O\
 	i8253.$O\
 	i8259.$O\
 	main.$O\
diff -r bdf5af28b857 sys/src/cmd/vmx/exith.c
--- a/sys/src/cmd/vmx/exith.c	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/cmd/vmx/exith.c	Fri Dec 04 16:26:28 2020 +0100
@@ -1,9 +1,8 @@
 #include <u.h>
 #include <libc.h>
-#include <thread.h>
-#include <bio.h>
 #include "dat.h"
 #include "fns.h"
+#include "x86.h"
 
 int persist = 1;
 
@@ -118,109 +117,167 @@
 
 typedef struct CPUID CPUID;
 struct CPUID {
-	u32int idx;
 	u32int ax, bx, cx, dx;
 };
-static CPUID *cpuidf;
-static int ncpuidf;
+static u32int cpuidmax;
+static u32int cpuidmaxext;
+static CPUID leaf1;
+static struct {
+	uvlong miscen;
+}msr;
 
-static void
-auxcpuidproc(void *vpfd)
-{
-	int *pfd;
-	
-	pfd = vpfd;
-	close(pfd[1]);
-	close(0);
-	open("/dev/null", OREAD);
-	dup(pfd[0], 1);
-	close(pfd[0]);
-	procexecl(nil, "/bin/aux/cpuid", "cpuid", "-r", nil);
-	threadexits("exec: %r");
-}
+static uchar _cpuid[] = {
+	0x5E,			/* POP SI (PC) */
+	0x5D,			/* POP BP (CPUID&) */
+	0x58,			/* POP AX */
+	0x59,			/* POP CX */
+
+	0x51,			/* PUSH CX */
+	0x50,			/* PUSH AX */
+	0x55,			/* PUSH BP */
+	0x56,			/* PUSH SI */
+
+	0x31, 0xDB,		/* XOR BX, BX */
+	0x31, 0xD2,		/* XOR DX, DX */
+
+	0x0F, 0xA2,		/* CPUID */
+
+	0x89, 0x45, 0x00,	/* MOV AX, 0(BP) */
+	0x89, 0x5d, 0x04,	/* MOV BX, 4(BP) */
+	0x89, 0x4d, 0x08,	/* MOV CX, 8(BP) */
+	0x89, 0x55, 0x0C,	/* MOV DX, 12(BP) */
+	0xC3,			/* RET */
+};
+
+static CPUID (*getcpuid)(ulong ax, ulong cx) = (CPUID(*)(ulong, ulong)) _cpuid;
 
 void
 cpuidinit(void)
 {
-	int pfd[2];
-	Biobuf *bp;
-	char *l, *f[5];
-	CPUID *cp;
-	
-	pipe(pfd);
-	procrfork(auxcpuidproc, pfd, 4096, RFFDG);
-	close(pfd[0]);
-	bp = Bfdopen(pfd[1], OREAD);
-	if(bp == nil) sysfatal("Bopenfd: %r");
-	for(; l = Brdstr(bp, '\n', 1), l != nil; free(l)){
-		if(tokenize(l, f, 5) < 5) continue;
-		cpuidf = realloc(cpuidf, (ncpuidf + 1) * sizeof(CPUID));
-		cp = cpuidf + ncpuidf++;
-		cp->idx = strtoul(f[0], nil, 16);
-		cp->ax = strtoul(f[1], nil, 16);
-		cp->bx = strtoul(f[2], nil, 16);
-		cp->cx = strtoul(f[3], nil, 16);
-		cp->dx = strtoul(f[4], nil, 16);
+	CPUID r;
+	int f;
+
+	if(sizeof(uintptr) == 8) /* patch out POP BP -> POP AX */
+		_cpuid[1] = 0x58;
+	segflush(_cpuid, sizeof(_cpuid));
+
+	r = getcpuid(0, 0);
+	cpuidmax = r.ax;
+	r = getcpuid(0x80000000, 0);
+	cpuidmaxext = r.ax;
+	leaf1 = getcpuid(1, 0);
+
+	memset(&msr, 0, sizeof(msr));
+	if((f = open("/dev/msr", OREAD)) >= 0){
+		pread(f, &msr.miscen, 8, 0x1a0);
+		msr.miscen &= 1<<0; /* fast strings */
+		close(f);
 	}
-	Bterm(bp);
-	close(pfd[1]);
 }
 
-CPUID *
-getcpuid(ulong idx)
-{
-	CPUID *cp;
-	
-	for(cp = cpuidf; cp < cpuidf + ncpuidf; cp++)
-		if(cp->idx == idx)
-			return cp;
-	return nil;
-}
-
-int maxcpuid = 7;
+static int xsavesz[] = {
+	[1] = 512+64,
+	[3] = 512+64,
+	[7] = 512+64+256,
+};
 
 static void
 cpuid(ExitInfo *ei)
 {
 	u32int ax, bx, cx, dx;
-	CPUID *cp;
-	static CPUID def;
-	
+	CPUID cp;
+
 	ax = rget(RAX);
-	cp = getcpuid(ax);
-	if(cp == nil) cp = &def;
+	cx = rget(RCX);
+	bx = dx = 0;
+	cp = getcpuid(ax, cx);
 	switch(ax){
-	case 0: /* highest register & GenuineIntel */
-		ax = maxcpuid;
-		bx = cp->bx;
-		dx = cp->dx;
-		cx = cp->cx;
+	case 0x00: /* highest register & GenuineIntel */
+		ax = MIN(cpuidmax, 0x18);
+		bx = cp.bx;
+		dx = cp.dx;
+		cx = cp.cx;
 		break;
-	case 1: /* features */
-		ax = cp->ax;
-		bx = cp->bx & 0xffff;
-		cx = cp->cx & 0x60de2203;
-		dx = cp->dx & 0x0782a179;
+	case 0x01: /* features */
+		ax = cp.ax;
+		bx = cp.bx & 0xffff;
+		/* some features removed, hypervisor added */
+		cx = cp.cx & 0x76de3217 | 0x80000000UL;
+		dx = cp.dx & 0x0f8aa579;
+		if(leaf1.cx & 1<<27){
+			if(rget("cr4real") & Cr4Osxsave)
+				cx |= 1<<27;
+		}else{
+			cx &= ~0x1c000000;
+		}
 		break;
-	case 2: goto literal; /* cache stuff */
-	case 3: goto zero; /* processor serial number */
-	case 4: goto zero; /* cache stuff */
-	case 5: goto zero; /* monitor/mwait */
-	case 6: goto zero; /* thermal management */
-	case 7: goto zero; /* more features */
-	case 10: goto zero; /* performance counters */
+	case 0x02: goto literal; /* cache stuff */
+	case 0x03: goto zero; /* processor serial number */
+	case 0x04: goto literal; /* cache stuff */
+	case 0x05: goto zero; /* monitor/mwait */
+	case 0x06: goto zero; /* thermal management */
+	case 0x07: /* more features */
+		if(cx == 0){
+			ax = 0;
+			bx = cp.bx & 0x2369;
+			cx = 0;
+			if((leaf1.cx & 1<<27) == 0)
+				bx &= ~0xdc230020;
+		}else{
+			goto zero;
+		}
+		break;
+	case 0x08: goto zero;
+	case 0x09: goto literal; /* direct cache access */
+	case 0x0a: goto zero; /* performance counters */
+	case 0x0b: goto zero; /* extended topology */
+	case 0x0c: goto zero;
+	case 0x0d: /* extended state */
+		if((leaf1.cx & 1<<27) == 0)
+			goto zero;
+		if(cx == 0){ /* main leaf */
+			ax = cp.ax & 7; /* x87, sse, avx */
+			bx = xsavesz[rget("xcr0")]; /* current xsave size */
+			cx = xsavesz[ax]; /* max xsave size */
+		}else if(cx == 1){ /* sub leaf */
+			ax = cp.ax & 7; /* xsaveopt, xsavec, xgetbv1 */
+			bx = xsavesz[rget("xcr0")];
+			cx = 0;
+		}else if(cx == 2){
+			ax = xsavesz[7] - xsavesz[3];
+			bx = xsavesz[3];
+			cx = 0;
+		}else{
+			goto zero;
+		}
+		break;
+	case 0x0f: goto zero; /* RDT */
+	case 0x10: goto zero; /* RDT */
+	case 0x12: goto zero; /* SGX */
+	case 0x14: goto zero; /* PT */
+	case 0x15: goto zero; /* TSC */
+	case 0x16: goto zero; /* cpu clock */
+	case 0x17: goto zero; /* SoC */
+	case 0x18: goto literal; /* pages, tlb */
+
+	case 0x40000000: /* hypervisor */
+		ax = 0;
+		bx = 0x4b4d564b; /* act as KVM */
+		cx = 0x564b4d56;
+		dx = 0x4d;
+		break;
+
 	case 0x80000000: /* highest register */
-		ax = 0x80000008;
-		bx = cx = dx = 0;
+		ax = MIN(cpuidmaxext, 0x80000008);
+		cx = 0;
 		break;
 	case 0x80000001: /* signature & ext features */
-		ax = cp->ax;
-		bx = 0;
-		cx = cp->cx & 0x121;
+		ax = cp.ax;
+		cx = cp.cx & 0x121;
 		if(sizeof(uintptr) == 8)
-			dx = cp->dx & 0x24100800;
+			dx = cp.dx & 0x24100800;
 		else
-			dx = cp->dx & 0x04100000;
+			dx = cp.dx & 0x04100000;
 		break;
 	case 0x80000002: goto literal; /* brand string */
 	case 0x80000003: goto literal; /* brand string */
@@ -230,18 +287,16 @@
 	case 0x80000007: goto zero; /* invariant tsc */
 	case 0x80000008: goto literal; /* address bits */
 	literal:
-		ax = cp->ax;
-		bx = cp->bx;
-		cx = cp->cx;
-		dx = cp->dx;
+		ax = cp.ax;
+		bx = cp.bx;
+		cx = cp.cx;
+		dx = cp.dx;
 		break;
 	default:
-		vmerror("unknown cpuid field eax=%#ux", ax);
+		if((ax & 0xf0000000) != 0x40000000)
+			vmerror("unknown cpuid field eax=%#ux", ax);
 	zero:
-		ax = 0;
-		bx = 0;
-		cx = 0;
-		dx = 0;
+		ax = cx = 0;
 		break;
 	}
 	rset(RAX, ax);
@@ -267,6 +322,9 @@
 		else rset("pat", val);
 		break;
 	case 0x8B: val = 0; break; /* microcode update */
+	case 0x1A0: /* IA32_MISC_ENABLE */
+		if(rd) val = msr.miscen;
+		break;
 	default:
 		if(rd){
 			vmerror("read from unknown MSR %#ux ignored", cx);
@@ -373,6 +431,26 @@
 	irqack(ei->qual);
 }
 
+static void
+xsetbv(ExitInfo *ei)
+{
+	uvlong v;
+
+	/* this should also #ud if LOCK prefix is used */
+
+	v = rget(RAX)&0xffffffff | rget(RDX)<<32;
+	if(rget(RCX) & 0xffffffff)
+		postexc("#gp", 0);
+	else if(v != 1 && v != 3 && v != 7)
+		postexc("#gp", 0);
+	else if((leaf1.cx & 1<<26) == 0 || (rget("cr4real") & Cr4Osxsave) == 0)
+		postexc("#ud", NOERRC);
+	else{
+		rset("xcr0", v);
+		skipinstr(ei);
+	}
+}
+
 typedef struct ExitType ExitType;
 struct ExitType {
 	char *name;
@@ -389,6 +467,7 @@
 	{".movdr", movdr},
 	{"#db", dbgexc},
 	{"movcr", movcr},
+	{".xsetbv", xsetbv},
 };
 
 void
diff -r bdf5af28b857 sys/src/cmd/vmx/fns.h
--- a/sys/src/cmd/vmx/fns.h	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/cmd/vmx/fns.h	Fri Dec 04 16:26:28 2020 +0100
@@ -1,3 +1,4 @@
+#define MIN(a,b) ((a)<(b)?(a):(b))
 void *emalloc(ulong);
 void loadkernel(char *);
 uvlong rget(char *);
diff -r bdf5af28b857 sys/src/cmd/vmx/x86.h
--- a/sys/src/cmd/vmx/x86.h	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/cmd/vmx/x86.h	Fri Dec 04 16:26:28 2020 +0100
@@ -22,8 +22,9 @@
 enum {
 	Cr0Pg	= 1<<31,
 	
-	Cr4Pse	= 1<<4,
-	Cr4Pae	= 1<<5,
+	Cr4Pse		= 1<<4,
+	Cr4Pae		= 1<<5,
+	Cr4Osxsave	= 1<<18,
 	
 	EferLme	= 1<<8,
 };

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [9front] VMX improvements + AVX
  2020-12-04 15:39 [9front] VMX improvements + AVX Sigrid Solveig Haflínudóttir
@ 2020-12-04 21:15 ` james palmer
  2020-12-04 22:25 ` Kurt H Maier
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 14+ messages in thread
From: james palmer @ 2020-12-04 21:15 UTC (permalink / raw)
  To: 9front

On Fri, 4 Dec 2020, at 3:39 PM, Sigrid Solveig Haflínudóttir wrote:
> Hello.  I made some changes to /sys/src/9/^(pc pc64) and
> /sys/src/cmd/vmx to get better support for Linux and OpenBSD emulation
> and before I push it (or not) I'd like to get more eyes (and hands) on
> it.  386 kernel should not be affected by the change at all, this is
> about amd64 specifically.
> 
> Changes were tested on two machines I have, with AVX enabled/disabled.
> No problems were found with extensive use, nor any performance issues
> detected.
> 
> On the host Go is using AVX successfully with this change.  I'm
> writing optimized routines related to video playback so that's yet
> another reason why this work has been done in the first place.
> 
> * AVX/AVX2 support on amd64 for both 9front kernel itself + VMX
>   guests.  Enabled by setting "*avx=" in plan9.ini.
> * Make vmx(1) report to guest it's running under a hypervisor.
> * Provide "fast strings" (through msr) properly to guests.
> * Rework cpuid in vmx(1).
> * A bit better timing by using tsc offset feature.  Clock is still
>   wrong but at least not THAT much.  Proper kvm clock implementation
>   in the future will address that.
> 
> Thanks.
> 
>  - Sigrid

seems to work fine on my box. i'm seeing better performance in my debian vm too. i haven't done any testing though.

- james

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [9front] VMX improvements + AVX
  2020-12-04 15:39 [9front] VMX improvements + AVX Sigrid Solveig Haflínudóttir
  2020-12-04 21:15 ` james palmer
@ 2020-12-04 22:25 ` Kurt H Maier
  2020-12-05  0:19 ` ori
  2020-12-10 12:23 ` Stuart Morrow
  3 siblings, 0 replies; 14+ messages in thread
From: Kurt H Maier @ 2020-12-04 22:25 UTC (permalink / raw)
  To: 9front

On Fri, Dec 04, 2020 at 04:39:37PM +0100, Sigrid Solveig Hafl�nud�ttir wrote:
> Thanks.
> 
>  - Sigrid


this is great

you are great

khm


^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [9front] VMX improvements + AVX
  2020-12-04 15:39 [9front] VMX improvements + AVX Sigrid Solveig Haflínudóttir
  2020-12-04 21:15 ` james palmer
  2020-12-04 22:25 ` Kurt H Maier
@ 2020-12-05  0:19 ` ori
  2020-12-10 12:23 ` Stuart Morrow
  3 siblings, 0 replies; 14+ messages in thread
From: ori @ 2020-12-05  0:19 UTC (permalink / raw)
  To: ftrvxmtrx, 9front

Quoth Sigrid Solveig Haflnudttir <ftrvxmtrx@gmail.com>:
> Hello.  I made some changes to /sys/src/9/^(pc pc64) and
> /sys/src/cmd/vmx to get better support for Linux and OpenBSD emulation
> and before I push it (or not) I'd like to get more eyes (and hands) on
> it.  386 kernel should not be affected by the change at all, this is
> about amd64 specifically.
> 
> Changes were tested on two machines I have, with AVX enabled/disabled.
> No problems were found with extensive use, nor any performance issues
> detected.

This fixes the 'xgetbv' trap that I was getting with OpenBSD. Thanks!
That's been something that I've wanted to get to, and I'm thrilled
that I don't need to.

> On the host Go is using AVX successfully with this change.  I'm
> writing optimized routines related to video playback so that's yet
> another reason why this work has been done in the first place.

It's also probably worth noting that the compilers for 9legacy
have some support for more recent XMM/YMM registers:

	https://github.com/0intro/plan9-contrib/commit/94fe116949ba36db8216abed83dbad8fb84ecdf7.patch

We've diverged a bit so the merge would be a pain in the ass,
and we'd need to get the disassembly done -- but at least some
of the code has been written.
 
> * AVX/AVX2 support on amd64 for both 9front kernel itself + VMX
>   guests.  Enabled by setting "*avx=" in plan9.ini.

As mentioned in gridchat: I think we can remove this knob, or
at least turn it on by default and have '*noavx=' for debugging

> * Make vmx(1) report to guest it's running under a hypervisor.

I'm skimming the code, but don't see anything obvious. How are
you reporting to the guest?

> * Provide "fast strings" (through msr) properly to guests.
> * Rework cpuid in vmx(1).
> * A bit better timing by using tsc offset feature.  Clock is still
>   wrong but at least not THAT much.  Proper kvm clock implementation
>   in the future will address that.

I'll pull up the spec soon enough and try to make a bit more sense
of this code, but it looks awesome so far. Thanks!

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [9front] VMX improvements + AVX
  2020-12-04 15:39 [9front] VMX improvements + AVX Sigrid Solveig Haflínudóttir
                   ` (2 preceding siblings ...)
  2020-12-05  0:19 ` ori
@ 2020-12-10 12:23 ` Stuart Morrow
  2020-12-10 15:09   ` ori
  2020-12-18  2:22   ` magma698hfsp273p9f
  3 siblings, 2 replies; 14+ messages in thread
From: Stuart Morrow @ 2020-12-10 12:23 UTC (permalink / raw)
  To: 9front

I have to admit I don't understand why this type of thing actually
needs to be a patch. The kernel needs to know what instructions the
program is liable to use? Why? My Nemo's-book understanding of this
stuff: the kernel sets up an allotment of time, and then the user
process goes to town within the constraints set up for it. Can't
really imagine why certain instructions would need special
arrangements or what the mechanism might be.

I appreciate whatever educ9tion I might get out of this, although it's
kind of a waste (depending on what lurker sees it who might do
something useful with it).

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [9front] VMX improvements + AVX
  2020-12-10 12:23 ` Stuart Morrow
@ 2020-12-10 15:09   ` ori
  2020-12-18  2:22   ` magma698hfsp273p9f
  1 sibling, 0 replies; 14+ messages in thread
From: ori @ 2020-12-10 15:09 UTC (permalink / raw)
  To: morrow.stuart, 9front

Quoth Stuart Morrow <morrow.stuart@gmail.com>:
> I have to admit I don't understand why this type of thing actually
> needs to be a patch. The kernel needs to know what instructions the
> program is liable to use? Why? My Nemo's-book understanding of this
> stuff: the kernel sets up an allotment of time, and then the user
> process goes to town within the constraints set up for it. Can't
> really imagine why certain instructions would need special
> arrangements or what the mechanism might be.

A process is a collection of registers, memory, and file descriptors
(mostly). So, switching processes means that you need to swap out the
active resources.

AVX adds more registers, so there are new things that need to be
swapped. That means two things:

0) the kernel needs to know that these registers exist, so that it
   can save them and restore them when switching procs.
1) it's not on automatically, and userspace can't turn it on,
   because if the kernel didn't know how to save them across
   processes, one process would step all over the others new
   registers.

(usually) new instructions that don't add new registers need no
work in the OS.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [9front] VMX improvements + AVX
  2020-12-10 12:23 ` Stuart Morrow
  2020-12-10 15:09   ` ori
@ 2020-12-18  2:22   ` magma698hfsp273p9f
  2020-12-18  4:41     ` ori
  1 sibling, 1 reply; 14+ messages in thread
From: magma698hfsp273p9f @ 2020-12-18  2:22 UTC (permalink / raw)
  To: 9front

Stuart Morrow <morrow.stuart@gmail.com> writes:

> program is liable to use? Why? My Nemo's-book understanding of this
> stuff: the kernel sets up an allotment of time, and then the user

"Nemo's book"?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [9front] VMX improvements + AVX
  2020-12-18  2:22   ` magma698hfsp273p9f
@ 2020-12-18  4:41     ` ori
  2021-01-21  0:58       ` Nemo's books (WAS: Re: [9front] VMX improvements + AVX) magma698hfsp273p9f
  0 siblings, 1 reply; 14+ messages in thread
From: ori @ 2020-12-18  4:41 UTC (permalink / raw)
  To: magma698hfsp273p9f, 9front

Quoth magma698hfsp273p9f@icebubble.org:
> Stuart Morrow <morrow.stuart@gmail.com> writes:
> 
> > program is liable to use? Why? My Nemo's-book understanding of this
> > stuff: the kernel sets up an allotment of time, and then the user
> 
> "Nemo's book"?
> 

one of:

http://lsub.org/who/nemo/9.intro.pdf
http://lsub.org/who/nemo/9.pdf

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Nemo's books (WAS: Re: [9front] VMX improvements + AVX)
  2020-12-18  4:41     ` ori
@ 2021-01-21  0:58       ` magma698hfsp273p9f
  2021-01-21  3:37         ` Roman Shaposhnik
                           ` (2 more replies)
  0 siblings, 3 replies; 14+ messages in thread
From: magma698hfsp273p9f @ 2021-01-21  0:58 UTC (permalink / raw)
  To: 9front

ori@eigenstate.org writes:

> Quoth magma698hfsp273p9f@icebubble.org:
>> "Nemo's book"?
>> 
>
> one of:
>
> http://lsub.org/who/nemo/9.intro.pdf
> http://lsub.org/who/nemo/9.pdf

Both of those return HTTP 404.  Browsing http://lsub.org/ isn't any
further revealing.  Anyone have current copies of these?

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: Nemo's books (WAS: Re: [9front] VMX improvements + AVX)
  2021-01-21  0:58       ` Nemo's books (WAS: Re: [9front] VMX improvements + AVX) magma698hfsp273p9f
@ 2021-01-21  3:37         ` Roman Shaposhnik
  2021-01-21  3:58         ` sl
  2021-01-21  4:07         ` Nemo's books (WAS: Re: [9front] VMX improvements + AVX) Anthony Martin
  2 siblings, 0 replies; 14+ messages in thread
From: Roman Shaposhnik @ 2021-01-21  3:37 UTC (permalink / raw)
  To: 9front

On Wed, Jan 20, 2021 at 7:28 PM <magma698hfsp273p9f@icebubble.org> wrote:
>
> ori@eigenstate.org writes:
>
> > Quoth magma698hfsp273p9f@icebubble.org:
> >> "Nemo's book"?
> >>
> >
> > one of:
> >
> > http://lsub.org/who/nemo/9.intro.pdf
> > http://lsub.org/who/nemo/9.pdf
>
> Both of those return HTTP 404.  Browsing http://lsub.org/ isn't any
> further revealing.  Anyone have current copies of these?

I believe they are also available from cat-v.org

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: Nemo's books (WAS: Re: [9front] VMX improvements + AVX)
  2021-01-21  0:58       ` Nemo's books (WAS: Re: [9front] VMX improvements + AVX) magma698hfsp273p9f
  2021-01-21  3:37         ` Roman Shaposhnik
@ 2021-01-21  3:58         ` sl
  2021-01-23  7:31           ` [9front] Re: Nemo's books magma698hfsp273p9f
  2021-01-21  4:07         ` Nemo's books (WAS: Re: [9front] VMX improvements + AVX) Anthony Martin
  2 siblings, 1 reply; 14+ messages in thread
From: sl @ 2021-01-21  3:58 UTC (permalink / raw)
  To: 9front

http://doc.cat-v.org/plan_9/9.intro.pdf
http://doc.cat-v.org/plan_9/9.pdf

sl

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: Nemo's books (WAS: Re: [9front] VMX improvements + AVX)
  2021-01-21  0:58       ` Nemo's books (WAS: Re: [9front] VMX improvements + AVX) magma698hfsp273p9f
  2021-01-21  3:37         ` Roman Shaposhnik
  2021-01-21  3:58         ` sl
@ 2021-01-21  4:07         ` Anthony Martin
  2 siblings, 0 replies; 14+ messages in thread
From: Anthony Martin @ 2021-01-21  4:07 UTC (permalink / raw)
  To: 9front

magma698hfsp273p9f@icebubble.org once said:
> > http://lsub.org/who/nemo/9.intro.pdf
> > http://lsub.org/who/nemo/9.pdf
>
> Both of those return HTTP 404.  Browsing http://lsub.org/ isn't any
> further revealing.  Anyone have current copies of these?

https://web.archive.org/web/20141018000902/http://lsub.org/who/nemo/9.pdf
https://web.archive.org/web/20150324074243/http://lsub.org/who/nemo/9.intro.pdf

Cheers,
  Anthony

^ permalink raw reply	[flat|nested] 14+ messages in thread

* [9front] Re: Nemo's books
  2021-01-21  3:58         ` sl
@ 2021-01-23  7:31           ` magma698hfsp273p9f
  2021-01-23 12:47             ` Eckard Brauer
  0 siblings, 1 reply; 14+ messages in thread
From: magma698hfsp273p9f @ 2021-01-23  7:31 UTC (permalink / raw)
  To: 9front

sl@stanleylieber.com writes:

> http://doc.cat-v.org/plan_9/9.intro.pdf
> http://doc.cat-v.org/plan_9/9.pdf

Ah, those both work.  Thanks.

/me peeks at the books

These contain some good information.  I've seen other writing that
refers to 9.intro.pdf, but have never actually seen it before.  Nemo's
English isn't very good, but the volume and detail of information in
these docs far outweighs anything that might be lost in translation.

^ permalink raw reply	[flat|nested] 14+ messages in thread

* Re: [9front] Re: Nemo's books
  2021-01-23  7:31           ` [9front] Re: Nemo's books magma698hfsp273p9f
@ 2021-01-23 12:47             ` Eckard Brauer
  0 siblings, 0 replies; 14+ messages in thread
From: Eckard Brauer @ 2021-01-23 12:47 UTC (permalink / raw)
  To: 9front

[-- Attachment #1: Type: text/plain, Size: 605 bytes --]

Am Sat, 23 Jan 2021 07:31:27 +0000
schrieb magma698hfsp273p9f@icebubble.org:

> sl@stanleylieber.com writes:
>
> > http://doc.cat-v.org/plan_9/9.intro.pdf
> > http://doc.cat-v.org/plan_9/9.pdf
>
> Ah, those both work.  Thanks.
>
> /me peeks at the books
>
> These contain some good information.  I've seen other writing that
> refers to 9.intro.pdf, but have never actually seen it before.  Nemo's
> English isn't very good, but the volume and detail of information in
> these docs far outweighs anything that might be lost in translation.

It seems to be a year newer or so.. attached.

[-- Attachment #2: 9.intro.pdf --]
[-- Type: application/pdf, Size: 1320610 bytes --]

^ permalink raw reply	[flat|nested] 14+ messages in thread

end of thread, other threads:[~2021-01-23 13:28 UTC | newest]

Thread overview: 14+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-12-04 15:39 [9front] VMX improvements + AVX Sigrid Solveig Haflínudóttir
2020-12-04 21:15 ` james palmer
2020-12-04 22:25 ` Kurt H Maier
2020-12-05  0:19 ` ori
2020-12-10 12:23 ` Stuart Morrow
2020-12-10 15:09   ` ori
2020-12-18  2:22   ` magma698hfsp273p9f
2020-12-18  4:41     ` ori
2021-01-21  0:58       ` Nemo's books (WAS: Re: [9front] VMX improvements + AVX) magma698hfsp273p9f
2021-01-21  3:37         ` Roman Shaposhnik
2021-01-21  3:58         ` sl
2021-01-23  7:31           ` [9front] Re: Nemo's books magma698hfsp273p9f
2021-01-23 12:47             ` Eckard Brauer
2021-01-21  4:07         ` Nemo's books (WAS: Re: [9front] VMX improvements + AVX) Anthony Martin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).