9front - general discussion about 9front
 help / color / mirror / Atom feed
From: "Sigrid Solveig Haflínudóttir" <ftrvxmtrx@gmail.com>
To: 9front@9front.org
Subject: [9front] VMX improvements + AVX
Date: Fri, 04 Dec 2020 16:39:37 +0100	[thread overview]
Message-ID: <52CB3E735527EF01CDF9E2641E3A406B@gmail.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 1097 bytes --]

Hello.  I made some changes to /sys/src/9/^(pc pc64) and
/sys/src/cmd/vmx to get better support for Linux and OpenBSD emulation
and before I push it (or not) I'd like to get more eyes (and hands) on
it.  386 kernel should not be affected by the change at all, this is
about amd64 specifically.

Changes were tested on two machines I have, with AVX enabled/disabled.
No problems were found with extensive use, nor any performance issues
detected.

On the host Go is using AVX successfully with this change.  I'm
writing optimized routines related to video playback so that's yet
another reason why this work has been done in the first place.

* AVX/AVX2 support on amd64 for both 9front kernel itself + VMX
  guests.  Enabled by setting "*avx=" in plan9.ini.
* Make vmx(1) report to guest it's running under a hypervisor.
* Provide "fast strings" (through msr) properly to guests.
* Rework cpuid in vmx(1).
* A bit better timing by using tsc offset feature.  Clock is still
  wrong but at least not THAT much.  Proper kvm clock implementation
  in the future will address that.

Thanks.

 - Sigrid

[-- Attachment #2: diff --]
[-- Type: text/plain, Size: 25422 bytes --]

diff -r bdf5af28b857 sys/man/8/plan9.ini
--- a/sys/man/8/plan9.ini	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/man/8/plan9.ini	Fri Dec 04 16:26:28 2020 +0100
@@ -898,6 +898,8 @@
 battery life (see
 .IR stats (8)).
 It is not on by default because it causes problems on some laptops.
+.SS \fL*avx=\fP
+Enables AVX and AVX2 on AMD64 cpu if it supports the instruction set.
 .SS USB
 .SS \fL*nousbprobe=\fP
 Disable USB host controller detection.
diff -r bdf5af28b857 sys/src/9/pc/cputemp.c
--- a/sys/src/9/pc/cputemp.c	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc/cputemp.c	Fri Dec 04 16:26:28 2020 +0100
@@ -13,7 +13,7 @@
 
 	if(m->cpuiddx & Acpif)
 	if(strcmp(m->cpuidid, "GenuineIntel") == 0){
-		cpuid(6, regs);
+		cpuid(6, 0, regs);
 		return regs[0] & 1;
 	}
 	return 0;
@@ -28,7 +28,7 @@
 	ulong regs[4];
 	static ulong tj;
 
-	cpuid(6, regs);
+	cpuid(6, 0, regs);
 	if((regs[0] & 1) == 0)
 		goto unsup;
 	if(tj == 0){
diff -r bdf5af28b857 sys/src/9/pc/dat.h
--- a/sys/src/9/pc/dat.h	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc/dat.h	Fri Dec 04 16:26:28 2020 +0100
@@ -250,7 +250,7 @@
 	int	pdbfree;
 	
 	u32int	dr7;			/* shadow copy of dr7 */
-	
+	u32int	xcr0;
 	void*	vmx;
 
 	int	stack[1];
diff -r bdf5af28b857 sys/src/9/pc/devarch.c
--- a/sys/src/9/pc/devarch.c	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc/devarch.c	Fri Dec 04 16:26:28 2020 +0100
@@ -18,11 +18,6 @@
 	Qmax = 32,
 };
 
-enum {
-	CR4Osfxsr = 1 << 9,
-	CR4Oxmmex = 1 << 10,
-};
-
 enum {				/* cpuid standard function codes */
 	Highstdfunc = 0,	/* also returns vendor string */
 	Procsig,
@@ -507,13 +502,13 @@
 	ulong regs[4];
 	vlong mca, mct, pat;
 
-	cpuid(Highstdfunc, regs);
+	cpuid(Highstdfunc, 0, regs);
 	memmove(m->cpuidid,   &regs[1], BY2WD);	/* bx */
 	memmove(m->cpuidid+4, &regs[3], BY2WD);	/* dx */
 	memmove(m->cpuidid+8, &regs[2], BY2WD);	/* cx */
 	m->cpuidid[12] = '\0';
 
-	cpuid(Procsig, regs);
+	cpuid(Procsig, 0, regs);
 	m->cpuidax = regs[0];
 	m->cpuidcx = regs[2];
 	m->cpuiddx = regs[3];
@@ -650,15 +645,6 @@
 	if(m->cpuiddx & Mtrr)
 		mtrrsync();
 
-	if((m->cpuiddx & (Sse|Fxsr)) == (Sse|Fxsr)){			/* have sse fp? */
-		fpsave = fpssesave;
-		fprestore = fpsserestore;
-		putcr4(getcr4() | CR4Osfxsr|CR4Oxmmex);
-	} else {
-		fpsave = fpx87save;
-		fprestore = fpx87restore;
-	}
-
 	if(strcmp(m->cpuidid, "GenuineIntel") == 0 && (m->cpuidcx & Rdrnd) != 0)
 		hwrandbuf = rdrandbuf;
 	else
@@ -669,9 +655,9 @@
 		m->havewatchpt8 = 1;
 
 		/* check and enable NX bit */
-		cpuid(Highextfunc, regs);
+		cpuid(Highextfunc, 0, regs);
 		if(regs[0] >= Procextfeat){
-			cpuid(Procextfeat, regs);
+			cpuid(Procextfeat, 0, regs);
 			if((regs[3] & (1<<20)) != 0){
 				vlong efer;
 
@@ -689,14 +675,16 @@
 		|| family == 6 && (model == 15 || model == 23 || model == 28))
 			m->havewatchpt8 = 1;
 		/* Intel SDM claims amd64 support implies 8-byte watchpoint support */
-		cpuid(Highextfunc, regs);
+		cpuid(Highextfunc, 0, regs);
 		if(regs[0] >= Procextfeat){
-			cpuid(Procextfeat, regs);
+			cpuid(Procextfeat, 0, regs);
 			if((regs[3] & 1<<29) != 0)
 				m->havewatchpt8 = 1;
 		}
 	}
 
+	fpuinit();
+
 	cputype = t;
 	return t->family;
 }
diff -r bdf5af28b857 sys/src/9/pc/devvmx.c
--- a/sys/src/9/pc/devvmx.c	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc/devvmx.c	Fri Dec 04 16:26:28 2020 +0100
@@ -44,6 +44,7 @@
 	
 	PROCB_CTLS = 0x4002,
 	PROCB_IRQWIN = 1<<2,
+	PROCB_TSCOFFSET = 1<<3,
 	PROCB_EXITHLT = 1<<7,
 	PROCB_EXITINVLPG = 1<<9,
 	PROCB_EXITMWAIT = 1<<10,
@@ -100,6 +101,7 @@
 	VMENTRY_INTRCODE = 0x4018,
 	VMENTRY_INTRILEN = 0x401a,
 	
+	VMCS_TSC_OFFSET = 0x2010,
 	VMCS_LINK = 0x2800,
 	
 	GUEST_ES = 0x800,
@@ -264,7 +266,9 @@
 	int index, machno;
 	char errstr[ERRMAX];
 	Ureg ureg;
+	uvlong tscoffset;
 	uintptr cr2;
+	uintptr xcr0;
 	uintptr dr[8]; /* DR7 is also kept in VMCS */
 	u8int launched;
 	u8int vpid;
@@ -484,6 +488,13 @@
 }
 
 static int
+xcr0write(Vmx *vmx, char *s)
+{
+	vmx->xcr0 = parseval(s) & 7;
+	return 0;
+}
+
+static int
 readonly(Vmx *, char *)
 {
 	return -1;
@@ -581,6 +592,7 @@
 	{VMXVAR(dr[2]), 0, "dr2"},
 	{VMXVAR(dr[3]), 0, "dr3"},
 	{VMXVAR(dr[6]), 0, "dr6", nil, dr6write},
+	{VMXVAR(xcr0), 0, "xcr0", nil, xcr0write},
 	{GUEST_DR7, 0, "dr7", nil, dr7write},
 	{VM_INSTRERR, 4, "instructionerror", nil, readonly},
 	{VM_EXREASON, 4, "exitreason", nil, readonly},
@@ -857,7 +869,7 @@
 	vlong msr;
 	int i;
 
-	cpuid(1, regs);
+	cpuid(1, 0, regs);
 	if((regs[2] & 1<<5) == 0) return;
 	/* check if disabled by BIOS */
 	if(rdmsr(0x3a, &msr) < 0) return;
@@ -945,8 +957,8 @@
 	
 	if(rdmsr(VMX_PROCB_CTLS_MSR, &msr) < 0) error("rdmsr(VMX_PROCB_CTLS_MSR failed");
 	x = (u32int)procb_ctls | 1<<1 | 7<<4 | 1<<8 | 1<<13 | 1<<14 | 1<<26; /* currently reserved default1 bits */
-	x |= PROCB_EXITHLT | PROCB_EXITMWAIT;
-	x |= PROCB_EXITMOVDR | PROCB_EXITIO | PROCB_EXITMONITOR | PROCB_MSRBITMAP;
+	x |= PROCB_TSCOFFSET | PROCB_EXITMWAIT | PROCB_EXITMONITOR | PROCB_EXITHLT;
+	x |= PROCB_EXITMOVDR | PROCB_EXITIO | PROCB_MSRBITMAP;
 	x |= PROCB_USECTLS2;
 	x &= msr >> 32;
 	vmcswrite(PROCB_CTLS, x);
@@ -1042,8 +1054,8 @@
 	
 	vmx->onentry = FLUSHVPID | FLUSHEPT;
 	fpinit();
-	fpsave(&vmx->fp);
-	
+	vmx->xcr0 = m->xcr0 & 1; /* x87 alone */
+
 	memset(vmx->msrbits, -1, 4096);
 	vmxtrapmsr(vmx, Efer, 0);
 	vmcswrite(VMENTRY_MSRLDADDR, PADDR(vmx->msrguest));
@@ -1051,6 +1063,9 @@
 	vmcswrite(VMEXIT_MSRLDADDR, PADDR(vmx->msrhost));
 	vmcswrite(MSR_BITMAP, PADDR(vmx->msrbits));
 	
+	cycles(&vmx->tscoffset);
+	vmcswrite(VMCS_TSC_OFFSET, vmx->tscoffset);
+
 	if(sizeof(uintptr) == 8){
 		vmxaddmsr(vmx, Star, 0);
 		vmxaddmsr(vmx, Lstar, 0);
@@ -1074,7 +1089,7 @@
 	uintptr cr;
 	vlong x;
 
-	putcr4(getcr4() | 0x2000); /* set VMXE */
+	putcr4(getcr4() | CR4VMXE);
 	putcr0(getcr0() | 0x20); /* set NE */
 	cr = getcr0();
 	if(rdmsr(VMX_CR0_FIXED0, &msr) < 0) error("rdmsr(VMX_CR0_FIXED0) failed");
@@ -1590,8 +1605,9 @@
 static void
 vmxproc(void *vmxp)
 {
-	int init, rc, x;
+	int init, rc, x, useend;
 	u32int procbctls, defprocbctls;
+	u64int start, end, adj;
 	vlong v;
 	Vmx *vmx;
 
@@ -1599,6 +1615,8 @@
 	procwired(up, vmx->machno);
 	sched();
 	init = 0;
+	useend = 0;
+	adj = 0;
 	defprocbctls = 0;
 	while(waserror()){
 		kstrcpy(vmx->errstr, up->errstr, ERRMAX);
@@ -1653,11 +1671,29 @@
 			}
 			if((vmx->dr[7] & ~0xd400) != 0)
 				putdr01236(vmx->dr);
-			fpsserestore(&vmx->fp);
-			putcr2(vmx->cr2);
+
+			fprestore(&vmx->fp);
+			if(m->xcr0 != 0 && vmx->xcr0 != m->xcr0)
+				putxcr0(vmx->xcr0);
+			if(vmx->cr2 != getcr2())
+				putcr2(vmx->cr2);
+			cycles(&start);
+			if(useend){
+				vmx->tscoffset -= end - start + adj;
+				vmcswrite(VMCS_TSC_OFFSET, vmx->tscoffset);
+			}
+			if(adj == 0){
+				cycles(&adj);
+				adj -= start;
+			}
 			rc = vmlaunch(&vmx->ureg, vmx->launched);
+			cycles(&end);
+			useend = 1;
 			vmx->cr2 = getcr2();
-			fpssesave(&vmx->fp);
+			if(m->xcr0 != 0 && vmx->xcr0 != m->xcr0)
+				putxcr0(m->xcr0);
+			fpsave(&vmx->fp);
+
 			splx(x);
 			if(rc < 0)
 				error("vmlaunch failed");
@@ -1799,6 +1835,7 @@
 		free(vmx);
 		nexterror();
 	}
+	memset(vmx, 0, sizeof(Vmx));
 	vmx->state = VMXINIT;
 	vmx->lastcmd = &vmx->firstcmd;
 	vmx->mem.next = &vmx->mem;
diff -r bdf5af28b857 sys/src/9/pc/fns.h
--- a/sys/src/9/pc/fns.h	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc/fns.h	Fri Dec 04 16:26:28 2020 +0100
@@ -15,7 +15,8 @@
 int	(*cmpswap)(long*, long, long);
 int	cmpswap486(long*, long, long);
 void	(*coherence)(void);
-void	cpuid(int, ulong regs[]);
+void	cpuid(int, int, ulong regs[]);
+void	fpuinit(void);
 int	cpuidentify(void);
 void	cpuidprint(void);
 void	(*cycles)(uvlong*);
@@ -137,6 +138,7 @@
 void	putcr2(ulong);
 void	putcr3(ulong);
 void	putcr4(ulong);
+void	putxcr0(ulong);
 void	putdr(u32int*);
 void	putdr01236(uintptr*);
 void	putdr6(u32int);
diff -r bdf5af28b857 sys/src/9/pc/fpu.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/src/9/pc/fpu.c	Fri Dec 04 16:26:28 2020 +0100
@@ -0,0 +1,33 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+
+enum {
+	CR4Osfxsr  = 1 << 9,
+	CR4Oxmmex  = 1 << 10,
+};
+
+void
+putxcr0(ulong)
+{
+}
+
+void
+fpuinit(void)
+{
+	uintptr cr4;
+
+	if((m->cpuiddx & (Sse|Fxsr)) == (Sse|Fxsr)){ /* have sse fp? */
+		fpsave = fpssesave;
+		fprestore = fpsserestore;
+		cr4 = getcr4() | CR4Osfxsr|CR4Oxmmex;
+		putcr4(cr4);
+	} else {
+		fpsave = fpx87save;
+		fprestore = fpx87restore;
+	}
+}
diff -r bdf5af28b857 sys/src/9/pc/l.s
--- a/sys/src/9/pc/l.s	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc/l.s	Fri Dec 04 16:26:28 2020 +0100
@@ -520,7 +520,7 @@
  * a 386 (Ac bit can't be set). If it's not a 386 and the Id bit can't be
  * toggled then it's an older 486 of some kind.
  *
- *	cpuid(fun, regs[4]);
+ *	cpuid(fn, subfn, regs[4]);
  */
 TEXT cpuid(SB), $0
 	MOVL	$0x240000, AX
@@ -539,6 +539,7 @@
 	TESTL	$0x200000, AX			/* Id */
 	JZ	_cpu486				/* can't toggle this bit on some 486 */
 	MOVL	fn+0(FP), AX
+	MOVL	subfn+4(FP), CX
 	CPUID
 	JMP	_cpuid
 _cpu486:
@@ -555,7 +556,7 @@
 	XORL	CX, CX
 	XORL	DX, DX
 _cpuid:
-	MOVL	regs+4(FP), BP
+	MOVL	regs+8(FP), BP
 	MOVL	AX, 0(BP)
 	MOVL	BX, 4(BP)
 	MOVL	CX, 8(BP)
diff -r bdf5af28b857 sys/src/9/pc/mkfile
--- a/sys/src/9/pc/mkfile	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc/mkfile	Fri Dec 04 16:26:28 2020 +0100
@@ -49,6 +49,7 @@
 OBJ=\
 	l.$O\
 	cga.$O\
+	fpu.$O\
 	i8253.$O\
 	i8259.$O\
 	main.$O\
diff -r bdf5af28b857 sys/src/9/pc/mtrr.c
--- a/sys/src/9/pc/mtrr.c	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc/mtrr.c	Fri Dec 04 16:26:28 2020 +0100
@@ -289,9 +289,9 @@
 	ulong regs[4];
 	uvlong mask;
 
-	cpuid(Exthighfunc, regs);
+	cpuid(Exthighfunc, 0, regs);
 	if(regs[0] >= Extaddrsz) {			/* ax */
-		cpuid(Extaddrsz, regs);
+		cpuid(Extaddrsz, 0, regs);
 		mask = (1ULL << (regs[0] & 0xFF)) - 1;	/* ax */
 	} else {
 		mask = (1ULL << 36) - 1;
diff -r bdf5af28b857 sys/src/9/pc64/dat.h
--- a/sys/src/9/pc64/dat.h	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc64/dat.h	Fri Dec 04 16:26:28 2020 +0100
@@ -2,6 +2,8 @@
 typedef struct BIOS32ci	BIOS32ci;
 typedef struct Conf	Conf;
 typedef struct Confmem	Confmem;
+typedef struct FPssestate	FPssestate;
+typedef struct FPavxstate	FPavxstate;
 typedef struct FPsave	FPsave;
 typedef struct PFPU	PFPU;
 typedef struct ISAConf	ISAConf;
@@ -49,7 +51,7 @@
 	uintptr	pc;
 };
 
-struct FPsave
+struct FPssestate
 {
 	u16int	fcw;			/* x87 control word */
 	u16int	fsw;			/* x87 status word */
@@ -65,6 +67,18 @@
 	uchar	ign[96];		/* reserved, ignored */
 };
 
+struct FPavxstate
+{
+	FPssestate;
+	uchar	header[64];		/* XSAVE header */
+	uchar	ymm[256];		/* upper 128-bit regs (AVX) */
+};
+
+struct FPsave
+{
+	FPavxstate;
+};
+
 enum
 {
 	/* this is a state */
@@ -224,9 +238,12 @@
 	int	havewatchpt8;
 	int	havenx;
 	uvlong	tscticks;
-	
+
 	u64int	dr7;			/* shadow copy of dr7 */
-	
+	u64int	xcr0;
+	u32int	fpsavesz;
+	u32int	fpalign;
+
 	void*	vmx;
 
 	uintptr	stack[1];
@@ -270,8 +287,14 @@
 
 /* cpuid instruction result register bits */
 enum {
+	/* ax */
+	Xsaveopt = 1<<0,
+	Xsaves = 1<<3,
+
 	/* cx */
 	Monitor	= 1<<3,
+	Xsave = 1<<26,
+	Avx	= 1<<28,
 
 	/* dx */
 	Fpuonchip = 1<<0,
diff -r bdf5af28b857 sys/src/9/pc64/fns.h
--- a/sys/src/9/pc64/fns.h	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc64/fns.h	Fri Dec 04 16:26:28 2020 +0100
@@ -15,7 +15,8 @@
 int	(*cmpswap)(long*, long, long);
 int	cmpswap486(long*, long, long);
 void	(*coherence)(void);
-void	cpuid(int, ulong regs[]);
+void	cpuid(int, int, ulong regs[]);
+void	fpuinit(void);
 int	cpuidentify(void);
 void	cpuidprint(void);
 void	(*cycles)(uvlong*);
@@ -40,6 +41,11 @@
 void	(*fpsave)(FPsave*);
 void	fpsserestore(FPsave*);
 void	fpssesave(FPsave*);
+void	fpxrestore(FPsave*);
+void	fpxrestores(FPsave*);
+void	fpxsave(FPsave*);
+void	fpxsaveopt(FPsave*);
+void	fpxsaves(FPsave*);
 void	fpx87restore(FPsave*);
 void	fpx87save(FPsave*);
 int	fpusave(void);
@@ -48,6 +54,7 @@
 u64int	getcr2(void);
 u64int	getcr3(void);
 u64int	getcr4(void);
+u64int	getxcr0(void);
 u64int	getdr6(void);
 char*	getconf(char*);
 void	guesscpuhz(int);
@@ -137,6 +144,7 @@
 void	putcr2(u64int);
 void	putcr3(u64int);
 void	putcr4(u64int);
+void	putxcr0(u64int);
 void	putdr(u64int*);
 void	putdr01236(u64int*);
 void	putdr6(u64int);
diff -r bdf5af28b857 sys/src/9/pc64/fpu.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sys/src/9/pc64/fpu.c	Fri Dec 04 16:26:28 2020 +0100
@@ -0,0 +1,53 @@
+#include "u.h"
+#include "../port/lib.h"
+#include "mem.h"
+#include "dat.h"
+#include "fns.h"
+#include "io.h"
+#include "ureg.h"
+
+enum {
+	CR4Osfxsr  = 1 << 9,
+	CR4Oxmmex  = 1 << 10,
+	CR4Oxsave  = 1 << 18,
+};
+
+void
+fpuinit(void)
+{
+	uintptr cr4;
+	ulong regs[4];
+
+	m->fpsavesz = sizeof(FPssestate);
+	m->fpalign = 16;
+	if((m->cpuiddx & (Sse|Fxsr)) == (Sse|Fxsr)){ /* have sse fp? */
+		cr4 = getcr4() | CR4Osfxsr|CR4Oxmmex;
+		putcr4(cr4);
+		fpsave = fpssesave;
+		fprestore = fpsserestore;
+
+		if((m->cpuidcx & (Xsave|Avx)) == (Xsave|Avx) && getconf("*avx") != nil){
+			cr4 |= CR4Oxsave;
+			putcr4(cr4);
+			m->xcr0 = 7; /* x87, sse, avx */
+			putxcr0(m->xcr0);
+			fpsave = fpxsave;
+			fprestore = fpxrestore;
+
+			cpuid(0xd, 0, regs);
+			m->fpsavesz = regs[1];
+			m->fpalign = 64;
+
+			cpuid(0xd, 1, regs);
+			if(regs[0] & Xsaveopt)
+				fpsave = fpxsaveopt;
+			if(regs[0] & Xsaves){
+				fpsave = fpxsaves;
+				fprestore = fpxrestores;
+			}
+		}
+	} else {
+		fpsave = fpx87save;
+		fprestore = fpx87restore;
+	}
+}
diff -r bdf5af28b857 sys/src/9/pc64/l.s
--- a/sys/src/9/pc64/l.s	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc64/l.s	Fri Dec 04 16:26:28 2020 +0100
@@ -249,9 +249,10 @@
  */
 TEXT cpuid(SB), $-4
 	MOVL	RARG, AX			/* function in AX */
+	MOVL	cx+8(FP), CX		/* sub-level in CX */
 	CPUID
 
-	MOVQ	info+8(FP), BP
+	MOVQ	info+16(FP), BP
 	MOVL	AX, 0(BP)
 	MOVL	BX, 4(BP)
 	MOVL	CX, 8(BP)
@@ -399,6 +400,21 @@
 	MOVQ	RARG, CR4
 	RET
 
+TEXT getxcr0(SB), 1, $-4			/* XCR0 - extended control */
+	XORQ CX, CX
+	WORD $0x010f; BYTE $0xd0	// XGETBV
+	SHLQ $32, DX
+	ORQ DX, AX
+	RET
+
+TEXT putxcr0(SB), 1, $-4
+	XORQ CX, CX
+	MOVL RARG, DX
+	SHRQ $32, DX
+	MOVL RARG, AX
+	WORD $0x010f; BYTE $0xd1	// XSETBV
+	RET
+
 TEXT mb386(SB), 1, $-4				/* hack */
 TEXT mb586(SB), 1, $-4
 	XORL	AX, AX
@@ -626,6 +642,36 @@
 	FXSAVE64 (RARG)
 	RET
 
+TEXT _xrstor(SB), 1, $-4
+	MOVL $7, AX
+	XORL DX, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x6d; BYTE $0x00 // XRSTOR (RARG)
+	RET
+
+TEXT _xrstors(SB), 1, $-4
+	MOVL $7, AX
+	XORL DX, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xc7; BYTE $0x5d; BYTE $0x00 // XRSTORS (RARG)
+	RET
+
+TEXT _xsave(SB), 1, $-4
+	MOVL $7, AX
+	XORL DX, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x65; BYTE $0x00 // XSAVE (RARG)
+	RET
+
+TEXT _xsaveopt(SB), 1, $-4
+	MOVL $7, AX
+	XORL DX, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xae; BYTE $0x75; BYTE $0x00 // XSAVEOPT (RARG)
+	RET
+
+TEXT _xsaves(SB), 1, $-4
+	MOVL $7, AX
+	XORL DX, DX
+	BYTE $0x48; BYTE $0x0f; BYTE $0xc7; BYTE $0x6d; BYTE $0x00 // XSAVES (RARG)
+	RET
+
 TEXT _fwait(SB), 1, $-4
 	WAIT
 	RET
diff -r bdf5af28b857 sys/src/9/pc64/main.c
--- a/sys/src/9/pc64/main.c	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc64/main.c	Fri Dec 04 16:26:28 2020 +0100
@@ -304,6 +304,9 @@
 extern void _fninit(void);
 extern void _fxrstor(void*);
 extern void _fxsave(void*);
+extern void _xrstor(void*);
+extern void _xsave(void*);
+extern void _xsaveopt(void*);
 extern void _fwait(void);
 extern void _ldmxcsr(u32int);
 extern void _stts(void);
@@ -333,6 +336,39 @@
 	_fxrstor(s);
 }
 
+void
+fpxsave(FPsave *s)
+{
+	_xsave(s);
+	_stts();
+}
+void
+fpxrestore(FPsave *s)
+{
+	_clts();
+	_xrstor(s);
+}
+
+void
+fpxsaves(FPsave *s)
+{
+	_xsaveopt(s);
+	_stts();
+}
+void
+fpxrestores(FPsave *s)
+{
+	_clts();
+	_xrstor(s);
+}
+
+void
+fpxsaveopt(FPsave *s)
+{
+	_xsaveopt(s);
+	_stts();
+}
+
 static char* mathmsg[] =
 {
 	nil,	/* handled below */
@@ -452,7 +488,7 @@
 			up->fpstate |= FPkernel;
 		}
 		while(up->fpslot[index] == nil)
-			up->fpslot[index] = mallocalign(sizeof(FPsave), FPalign, 0, 0);
+			up->fpslot[index] = mallocalign(m->fpsavesz, m->fpalign, 0, 0);
 		up->fpsave = up->fpslot[index];
 		up->fpstate = FPactive | (up->fpstate & (FPnouser|FPkernel|FPindexm));
 		break;
@@ -538,8 +574,8 @@
 	case FPinactive	| FPpush:
 	case FPinactive:
 		while(p->fpslot[0] == nil)
-			p->fpslot[0] = mallocalign(sizeof(FPsave), FPalign, 0, 0);
-		memmove(p->fpsave = p->fpslot[0], up->fpslot[0], sizeof(FPsave));
+			p->fpslot[0] = mallocalign(m->fpsavesz, m->fpalign, 0, 0);
+		memmove(p->fpsave = p->fpslot[0], up->fpslot[0], m->fpsavesz);
 		p->fpstate = FPinactive;
 	}
 	splx(s);
diff -r bdf5af28b857 sys/src/9/pc64/mem.h
--- a/sys/src/9/pc64/mem.h	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc64/mem.h	Fri Dec 04 16:26:28 2020 +0100
@@ -26,7 +26,6 @@
 #define	ROUND(s, sz)	(((s)+((sz)-1))&~((sz)-1))
 #define	PGROUND(s)	ROUND(s, BY2PG)
 #define	BLOCKALIGN	8
-#define	FPalign		16
 
 #define	MAXMACH		128			/* max # cpus system can run */
 
diff -r bdf5af28b857 sys/src/9/pc64/mkfile
--- a/sys/src/9/pc64/mkfile	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/9/pc64/mkfile	Fri Dec 04 16:26:28 2020 +0100
@@ -47,6 +47,7 @@
 OBJ=\
 	l.$O\
 	cga.$O\
+	fpu.$O\
 	i8253.$O\
 	i8259.$O\
 	main.$O\
diff -r bdf5af28b857 sys/src/cmd/vmx/exith.c
--- a/sys/src/cmd/vmx/exith.c	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/cmd/vmx/exith.c	Fri Dec 04 16:26:28 2020 +0100
@@ -1,9 +1,8 @@
 #include <u.h>
 #include <libc.h>
-#include <thread.h>
-#include <bio.h>
 #include "dat.h"
 #include "fns.h"
+#include "x86.h"
 
 int persist = 1;
 
@@ -118,109 +117,167 @@
 
 typedef struct CPUID CPUID;
 struct CPUID {
-	u32int idx;
 	u32int ax, bx, cx, dx;
 };
-static CPUID *cpuidf;
-static int ncpuidf;
+static u32int cpuidmax;
+static u32int cpuidmaxext;
+static CPUID leaf1;
+static struct {
+	uvlong miscen;
+}msr;
 
-static void
-auxcpuidproc(void *vpfd)
-{
-	int *pfd;
-	
-	pfd = vpfd;
-	close(pfd[1]);
-	close(0);
-	open("/dev/null", OREAD);
-	dup(pfd[0], 1);
-	close(pfd[0]);
-	procexecl(nil, "/bin/aux/cpuid", "cpuid", "-r", nil);
-	threadexits("exec: %r");
-}
+static uchar _cpuid[] = {
+	0x5E,			/* POP SI (PC) */
+	0x5D,			/* POP BP (CPUID&) */
+	0x58,			/* POP AX */
+	0x59,			/* POP CX */
+
+	0x51,			/* PUSH CX */
+	0x50,			/* PUSH AX */
+	0x55,			/* PUSH BP */
+	0x56,			/* PUSH SI */
+
+	0x31, 0xDB,		/* XOR BX, BX */
+	0x31, 0xD2,		/* XOR DX, DX */
+
+	0x0F, 0xA2,		/* CPUID */
+
+	0x89, 0x45, 0x00,	/* MOV AX, 0(BP) */
+	0x89, 0x5d, 0x04,	/* MOV BX, 4(BP) */
+	0x89, 0x4d, 0x08,	/* MOV CX, 8(BP) */
+	0x89, 0x55, 0x0C,	/* MOV DX, 12(BP) */
+	0xC3,			/* RET */
+};
+
+static CPUID (*getcpuid)(ulong ax, ulong cx) = (CPUID(*)(ulong, ulong)) _cpuid;
 
 void
 cpuidinit(void)
 {
-	int pfd[2];
-	Biobuf *bp;
-	char *l, *f[5];
-	CPUID *cp;
-	
-	pipe(pfd);
-	procrfork(auxcpuidproc, pfd, 4096, RFFDG);
-	close(pfd[0]);
-	bp = Bfdopen(pfd[1], OREAD);
-	if(bp == nil) sysfatal("Bopenfd: %r");
-	for(; l = Brdstr(bp, '\n', 1), l != nil; free(l)){
-		if(tokenize(l, f, 5) < 5) continue;
-		cpuidf = realloc(cpuidf, (ncpuidf + 1) * sizeof(CPUID));
-		cp = cpuidf + ncpuidf++;
-		cp->idx = strtoul(f[0], nil, 16);
-		cp->ax = strtoul(f[1], nil, 16);
-		cp->bx = strtoul(f[2], nil, 16);
-		cp->cx = strtoul(f[3], nil, 16);
-		cp->dx = strtoul(f[4], nil, 16);
+	CPUID r;
+	int f;
+
+	if(sizeof(uintptr) == 8) /* patch out POP BP -> POP AX */
+		_cpuid[1] = 0x58;
+	segflush(_cpuid, sizeof(_cpuid));
+
+	r = getcpuid(0, 0);
+	cpuidmax = r.ax;
+	r = getcpuid(0x80000000, 0);
+	cpuidmaxext = r.ax;
+	leaf1 = getcpuid(1, 0);
+
+	memset(&msr, 0, sizeof(msr));
+	if((f = open("/dev/msr", OREAD)) >= 0){
+		pread(f, &msr.miscen, 8, 0x1a0);
+		msr.miscen &= 1<<0; /* fast strings */
+		close(f);
 	}
-	Bterm(bp);
-	close(pfd[1]);
 }
 
-CPUID *
-getcpuid(ulong idx)
-{
-	CPUID *cp;
-	
-	for(cp = cpuidf; cp < cpuidf + ncpuidf; cp++)
-		if(cp->idx == idx)
-			return cp;
-	return nil;
-}
-
-int maxcpuid = 7;
+static int xsavesz[] = {
+	[1] = 512+64,
+	[3] = 512+64,
+	[7] = 512+64+256,
+};
 
 static void
 cpuid(ExitInfo *ei)
 {
 	u32int ax, bx, cx, dx;
-	CPUID *cp;
-	static CPUID def;
-	
+	CPUID cp;
+
 	ax = rget(RAX);
-	cp = getcpuid(ax);
-	if(cp == nil) cp = &def;
+	cx = rget(RCX);
+	bx = dx = 0;
+	cp = getcpuid(ax, cx);
 	switch(ax){
-	case 0: /* highest register & GenuineIntel */
-		ax = maxcpuid;
-		bx = cp->bx;
-		dx = cp->dx;
-		cx = cp->cx;
+	case 0x00: /* highest register & GenuineIntel */
+		ax = MIN(cpuidmax, 0x18);
+		bx = cp.bx;
+		dx = cp.dx;
+		cx = cp.cx;
 		break;
-	case 1: /* features */
-		ax = cp->ax;
-		bx = cp->bx & 0xffff;
-		cx = cp->cx & 0x60de2203;
-		dx = cp->dx & 0x0782a179;
+	case 0x01: /* features */
+		ax = cp.ax;
+		bx = cp.bx & 0xffff;
+		/* some features removed, hypervisor added */
+		cx = cp.cx & 0x76de3217 | 0x80000000UL;
+		dx = cp.dx & 0x0f8aa579;
+		if(leaf1.cx & 1<<27){
+			if(rget("cr4real") & Cr4Osxsave)
+				cx |= 1<<27;
+		}else{
+			cx &= ~0x1c000000;
+		}
 		break;
-	case 2: goto literal; /* cache stuff */
-	case 3: goto zero; /* processor serial number */
-	case 4: goto zero; /* cache stuff */
-	case 5: goto zero; /* monitor/mwait */
-	case 6: goto zero; /* thermal management */
-	case 7: goto zero; /* more features */
-	case 10: goto zero; /* performance counters */
+	case 0x02: goto literal; /* cache stuff */
+	case 0x03: goto zero; /* processor serial number */
+	case 0x04: goto literal; /* cache stuff */
+	case 0x05: goto zero; /* monitor/mwait */
+	case 0x06: goto zero; /* thermal management */
+	case 0x07: /* more features */
+		if(cx == 0){
+			ax = 0;
+			bx = cp.bx & 0x2369;
+			cx = 0;
+			if((leaf1.cx & 1<<27) == 0)
+				bx &= ~0xdc230020;
+		}else{
+			goto zero;
+		}
+		break;
+	case 0x08: goto zero;
+	case 0x09: goto literal; /* direct cache access */
+	case 0x0a: goto zero; /* performance counters */
+	case 0x0b: goto zero; /* extended topology */
+	case 0x0c: goto zero;
+	case 0x0d: /* extended state */
+		if((leaf1.cx & 1<<27) == 0)
+			goto zero;
+		if(cx == 0){ /* main leaf */
+			ax = cp.ax & 7; /* x87, sse, avx */
+			bx = xsavesz[rget("xcr0")]; /* current xsave size */
+			cx = xsavesz[ax]; /* max xsave size */
+		}else if(cx == 1){ /* sub leaf */
+			ax = cp.ax & 7; /* xsaveopt, xsavec, xgetbv1 */
+			bx = xsavesz[rget("xcr0")];
+			cx = 0;
+		}else if(cx == 2){
+			ax = xsavesz[7] - xsavesz[3];
+			bx = xsavesz[3];
+			cx = 0;
+		}else{
+			goto zero;
+		}
+		break;
+	case 0x0f: goto zero; /* RDT */
+	case 0x10: goto zero; /* RDT */
+	case 0x12: goto zero; /* SGX */
+	case 0x14: goto zero; /* PT */
+	case 0x15: goto zero; /* TSC */
+	case 0x16: goto zero; /* cpu clock */
+	case 0x17: goto zero; /* SoC */
+	case 0x18: goto literal; /* pages, tlb */
+
+	case 0x40000000: /* hypervisor */
+		ax = 0;
+		bx = 0x4b4d564b; /* act as KVM */
+		cx = 0x564b4d56;
+		dx = 0x4d;
+		break;
+
 	case 0x80000000: /* highest register */
-		ax = 0x80000008;
-		bx = cx = dx = 0;
+		ax = MIN(cpuidmaxext, 0x80000008);
+		cx = 0;
 		break;
 	case 0x80000001: /* signature & ext features */
-		ax = cp->ax;
-		bx = 0;
-		cx = cp->cx & 0x121;
+		ax = cp.ax;
+		cx = cp.cx & 0x121;
 		if(sizeof(uintptr) == 8)
-			dx = cp->dx & 0x24100800;
+			dx = cp.dx & 0x24100800;
 		else
-			dx = cp->dx & 0x04100000;
+			dx = cp.dx & 0x04100000;
 		break;
 	case 0x80000002: goto literal; /* brand string */
 	case 0x80000003: goto literal; /* brand string */
@@ -230,18 +287,16 @@
 	case 0x80000007: goto zero; /* invariant tsc */
 	case 0x80000008: goto literal; /* address bits */
 	literal:
-		ax = cp->ax;
-		bx = cp->bx;
-		cx = cp->cx;
-		dx = cp->dx;
+		ax = cp.ax;
+		bx = cp.bx;
+		cx = cp.cx;
+		dx = cp.dx;
 		break;
 	default:
-		vmerror("unknown cpuid field eax=%#ux", ax);
+		if((ax & 0xf0000000) != 0x40000000)
+			vmerror("unknown cpuid field eax=%#ux", ax);
 	zero:
-		ax = 0;
-		bx = 0;
-		cx = 0;
-		dx = 0;
+		ax = cx = 0;
 		break;
 	}
 	rset(RAX, ax);
@@ -267,6 +322,9 @@
 		else rset("pat", val);
 		break;
 	case 0x8B: val = 0; break; /* microcode update */
+	case 0x1A0: /* IA32_MISC_ENABLE */
+		if(rd) val = msr.miscen;
+		break;
 	default:
 		if(rd){
 			vmerror("read from unknown MSR %#ux ignored", cx);
@@ -373,6 +431,26 @@
 	irqack(ei->qual);
 }
 
+static void
+xsetbv(ExitInfo *ei)
+{
+	uvlong v;
+
+	/* this should also #ud if LOCK prefix is used */
+
+	v = rget(RAX)&0xffffffff | rget(RDX)<<32;
+	if(rget(RCX) & 0xffffffff)
+		postexc("#gp", 0);
+	else if(v != 1 && v != 3 && v != 7)
+		postexc("#gp", 0);
+	else if((leaf1.cx & 1<<26) == 0 || (rget("cr4real") & Cr4Osxsave) == 0)
+		postexc("#ud", NOERRC);
+	else{
+		rset("xcr0", v);
+		skipinstr(ei);
+	}
+}
+
 typedef struct ExitType ExitType;
 struct ExitType {
 	char *name;
@@ -389,6 +467,7 @@
 	{".movdr", movdr},
 	{"#db", dbgexc},
 	{"movcr", movcr},
+	{".xsetbv", xsetbv},
 };
 
 void
diff -r bdf5af28b857 sys/src/cmd/vmx/fns.h
--- a/sys/src/cmd/vmx/fns.h	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/cmd/vmx/fns.h	Fri Dec 04 16:26:28 2020 +0100
@@ -1,3 +1,4 @@
+#define MIN(a,b) ((a)<(b)?(a):(b))
 void *emalloc(ulong);
 void loadkernel(char *);
 uvlong rget(char *);
diff -r bdf5af28b857 sys/src/cmd/vmx/x86.h
--- a/sys/src/cmd/vmx/x86.h	Fri Dec 04 09:33:55 2020 +0100
+++ b/sys/src/cmd/vmx/x86.h	Fri Dec 04 16:26:28 2020 +0100
@@ -22,8 +22,9 @@
 enum {
 	Cr0Pg	= 1<<31,
 	
-	Cr4Pse	= 1<<4,
-	Cr4Pae	= 1<<5,
+	Cr4Pse		= 1<<4,
+	Cr4Pae		= 1<<5,
+	Cr4Osxsave	= 1<<18,
 	
 	EferLme	= 1<<8,
 };

             reply	other threads:[~2020-12-04 15:45 UTC|newest]

Thread overview: 14+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-12-04 15:39 Sigrid Solveig Haflínudóttir [this message]
2020-12-04 21:15 ` james palmer
2020-12-04 22:25 ` Kurt H Maier
2020-12-05  0:19 ` ori
2020-12-10 12:23 ` Stuart Morrow
2020-12-10 15:09   ` ori
2020-12-18  2:22   ` magma698hfsp273p9f
2020-12-18  4:41     ` ori
2021-01-21  0:58       ` Nemo's books (WAS: Re: [9front] VMX improvements + AVX) magma698hfsp273p9f
2021-01-21  3:37         ` Roman Shaposhnik
2021-01-21  3:58         ` sl
2021-01-23  7:31           ` [9front] Re: Nemo's books magma698hfsp273p9f
2021-01-23 12:47             ` Eckard Brauer
2021-01-21  4:07         ` Nemo's books (WAS: Re: [9front] VMX improvements + AVX) Anthony Martin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=52CB3E735527EF01CDF9E2641E3A406B@gmail.com \
    --to=ftrvxmtrx@gmail.com \
    --cc=9front@9front.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).