Discussion:
[PATCH] target-i386: add Intel AVX-512 support
Chao Peng
2014-10-23 03:02:43 UTC
Permalink
Add AVX512 feature bits, register definition and corresponding
xsave/vmstate support.

Signed-off-by: Chao Peng <***@linux.intel.com>
---
target-i386/cpu.c | 10 ++++--
target-i386/cpu.h | 61 ++++++++++++++++++++++++++++++++++
target-i386/kvm.c | 19 +++++++++++
target-i386/machine.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 175 insertions(+), 2 deletions(-)

diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index e7bf9de..e91bfbd 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -259,8 +259,8 @@ static const char *svm_feature_name[] = {
static const char *cpuid_7_0_ebx_feature_name[] = {
"fsgsbase", "tsc_adjust", NULL, "bmi1", "hle", "avx2", NULL, "smep",
"bmi2", "erms", "invpcid", "rtm", NULL, NULL, "mpx", NULL,
- NULL, NULL, "rdseed", "adx", "smap", NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ "avx512f", NULL, "rdseed", "adx", "smap", NULL, NULL, NULL,
+ NULL, NULL, "avx512pf", "avx512er", "avx512cd", NULL, NULL, NULL,
};

static const char *cpuid_apm_edx_feature_name[] = {
@@ -426,6 +426,12 @@ static const ExtSaveArea ext_save_areas[] = {
.offset = 0x3c0, .size = 0x40 },
[4] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX,
.offset = 0x400, .size = 0x40 },
+ [5] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
+ .offset = 0x440, .size = 0x40 },
+ [6] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
+ .offset = 0x480, .size = 0x200 },
+ [7] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
+ .offset = 0x680, .size = 0x400 },
};

const char *get_register_name_32(unsigned int reg)
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 2968749..9f01831 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -395,6 +395,9 @@
#define XSTATE_YMM (1ULL << 2)
#define XSTATE_BNDREGS (1ULL << 3)
#define XSTATE_BNDCSR (1ULL << 4)
+#define XSTATE_OPMASK (1ULL << 5)
+#define XSTATE_ZMM_Hi256 (1ULL << 6)
+#define XSTATE_Hi16_ZMM (1ULL << 7)


/* CPUID feature words */
@@ -560,9 +563,13 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
#define CPUID_7_0_EBX_INVPCID (1U << 10)
#define CPUID_7_0_EBX_RTM (1U << 11)
#define CPUID_7_0_EBX_MPX (1U << 14)
+#define CPUID_7_0_EBX_AVX512F (1U << 16) /* AVX-512 Foundation */
#define CPUID_7_0_EBX_RDSEED (1U << 18)
#define CPUID_7_0_EBX_ADX (1U << 19)
#define CPUID_7_0_EBX_SMAP (1U << 20)
+#define CPUID_7_0_EBX_AVX512PF (1U << 26) /* AVX-512 Prefetch */
+#define CPUID_7_0_EBX_AVX512ER (1U << 27) /* AVX-512 Exponential and Reciprocal */
+#define CPUID_7_0_EBX_AVX512CD (1U << 28) /* AVX-512 Conflict Detection */

/* CPUID[0x80000007].EDX flags: */
#define CPUID_APM_INVTSC (1U << 8)
@@ -707,6 +714,24 @@ typedef union {
} XMMReg;

typedef union {
+ uint8_t _b[32];
+ uint16_t _w[16];
+ uint32_t _l[8];
+ uint64_t _q[4];
+ float32 _s[8];
+ float64 _d[4];
+} YMMReg;
+
+typedef union {
+ uint8_t _b[64];
+ uint16_t _w[32];
+ uint32_t _l[16];
+ uint64_t _q[8];
+ float32 _s[16];
+ float64 _d[8];
+} ZMMReg;
+
+typedef union {
uint8_t _b[8];
uint16_t _w[4];
uint32_t _l[2];
@@ -725,6 +750,20 @@ typedef struct BNDCSReg {
} BNDCSReg;

#ifdef HOST_WORDS_BIGENDIAN
+#define ZMM_B(n) _b[63 - (n)]
+#define ZMM_W(n) _w[31 - (n)]
+#define ZMM_L(n) _l[15 - (n)]
+#define ZMM_S(n) _s[15 - (n)]
+#define ZMM_Q(n) _q[7 - (n)]
+#define ZMM_D(n) _d[7 - (n)]
+
+#define YMM_B(n) _b[31 - (n)]
+#define YMM_W(n) _w[15 - (n)]
+#define YMM_L(n) _l[7 - (n)]
+#define YMM_S(n) _s[7 - (n)]
+#define YMM_Q(n) _q[3 - (n)]
+#define YMM_D(n) _d[3 - (n)]
+
#define XMM_B(n) _b[15 - (n)]
#define XMM_W(n) _w[7 - (n)]
#define XMM_L(n) _l[3 - (n)]
@@ -737,6 +776,20 @@ typedef struct BNDCSReg {
#define MMX_L(n) _l[1 - (n)]
#define MMX_S(n) _s[1 - (n)]
#else
+#define ZMM_B(n) _b[n]
+#define ZMM_W(n) _w[n]
+#define ZMM_L(n) _l[n]
+#define ZMM_S(n) _s[n]
+#define ZMM_Q(n) _q[n]
+#define ZMM_D(n) _d[n]
+
+#define YMM_B(n) _b[n]
+#define YMM_W(n) _w[n]
+#define YMM_L(n) _l[n]
+#define YMM_S(n) _s[n]
+#define YMM_Q(n) _q[n]
+#define YMM_D(n) _d[n]
+
#define XMM_B(n) _b[n]
#define XMM_W(n) _w[n]
#define XMM_L(n) _l[n]
@@ -775,6 +828,8 @@ typedef struct {

#define NB_MMU_MODES 3

+#define NB_OPMASK_REGS 8
+
typedef enum TPRAccess {
TPR_ACCESS_READ,
TPR_ACCESS_WRITE,
@@ -839,6 +894,12 @@ typedef struct CPUX86State {

XMMReg ymmh_regs[CPU_NB_REGS];

+ uint64_t opmask_regs[NB_OPMASK_REGS];
+ YMMReg zmmh_regs[CPU_NB_REGS];
+#ifdef TARGET_X86_64
+ ZMMReg hi16_zmm_regs[CPU_NB_REGS];
+#endif
+
/* sysenter registers */
uint32_t sysenter_cs;
target_ulong sysenter_esp;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index ddedc73..ccf36e8 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1031,6 +1031,9 @@ static int kvm_put_fpu(X86CPU *cpu)
#define XSAVE_YMMH_SPACE 144
#define XSAVE_BNDREGS 240
#define XSAVE_BNDCSR 256
+#define XSAVE_OPMASK 272
+#define XSAVE_ZMM_Hi256 288
+#define XSAVE_Hi16_ZMM 416

static int kvm_put_xsave(X86CPU *cpu)
{
@@ -1067,6 +1070,14 @@ static int kvm_put_xsave(X86CPU *cpu)
sizeof env->bnd_regs);
memcpy(&xsave->region[XSAVE_BNDCSR], &env->bndcs_regs,
sizeof(env->bndcs_regs));
+ memcpy(&xsave->region[XSAVE_OPMASK], env->opmask_regs,
+ sizeof env->opmask_regs);
+ memcpy(&xsave->region[XSAVE_ZMM_Hi256], env->zmmh_regs,
+ sizeof env->zmmh_regs);
+#ifdef TARGET_X86_64
+ memcpy(&xsave->region[XSAVE_Hi16_ZMM], env->hi16_zmm_regs,
+ sizeof env->hi16_zmm_regs);
+#endif
r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave);
return r;
}
@@ -1402,6 +1413,14 @@ static int kvm_get_xsave(X86CPU *cpu)
sizeof env->bnd_regs);
memcpy(&env->bndcs_regs, &xsave->region[XSAVE_BNDCSR],
sizeof(env->bndcs_regs));
+ memcpy(env->opmask_regs, &xsave->region[XSAVE_OPMASK],
+ sizeof env->opmask_regs);
+ memcpy(env->zmmh_regs, &xsave->region[XSAVE_ZMM_Hi256],
+ sizeof env->zmmh_regs);
+#ifdef TARGET_X86_64
+ memcpy(env->hi16_zmm_regs, &xsave->region[XSAVE_Hi16_ZMM],
+ sizeof env->hi16_zmm_regs);
+#endif
return 0;
}

diff --git a/target-i386/machine.c b/target-i386/machine.c
index 0dd49f0..708fc54 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -60,6 +60,44 @@ static const VMStateDescription vmstate_ymmh_reg = {
#define VMSTATE_YMMH_REGS_VARS(_field, _state, _n, _v) \
VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_ymmh_reg, XMMReg)

+static const VMStateDescription vmstate_zmmh_reg = {
+ .name = "zmmh_reg",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(YMM_Q(0), YMMReg),
+ VMSTATE_UINT64(YMM_Q(1), YMMReg),
+ VMSTATE_UINT64(YMM_Q(2), YMMReg),
+ VMSTATE_UINT64(YMM_Q(3), YMMReg),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+#define VMSTATE_ZMMH_REGS_VARS(_field, _state, _n) \
+ VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_zmmh_reg, YMMReg)
+
+#ifdef TARGET_X86_64
+static const VMStateDescription vmstate_hi16_zmm_reg = {
+ .name = "hi16_zmm_reg",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(ZMM_Q(0), ZMMReg),
+ VMSTATE_UINT64(ZMM_Q(1), ZMMReg),
+ VMSTATE_UINT64(ZMM_Q(2), ZMMReg),
+ VMSTATE_UINT64(ZMM_Q(3), ZMMReg),
+ VMSTATE_UINT64(ZMM_Q(4), ZMMReg),
+ VMSTATE_UINT64(ZMM_Q(5), ZMMReg),
+ VMSTATE_UINT64(ZMM_Q(6), ZMMReg),
+ VMSTATE_UINT64(ZMM_Q(7), ZMMReg),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+#define VMSTATE_Hi16_ZMM_REGS_VARS(_field, _state, _n) \
+ VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_hi16_zmm_reg, ZMMReg)
+#endif
+
static const VMStateDescription vmstate_bnd_regs = {
.name = "bnd_regs",
.version_id = 1,
@@ -603,6 +641,52 @@ static const VMStateDescription vmstate_msr_hyperv_time = {
}
};

+static bool avx512_needed(void *opaque)
+{
+ X86CPU *cpu = opaque;
+ CPUX86State *env = &cpu->env;
+ unsigned int i;
+
+ for (i = 0; i < NB_OPMASK_REGS; i++) {
+ if (env->opmask_regs[i]) {
+ return true;
+ }
+ }
+
+ for (i = 0; i < CPU_NB_REGS; i++) {
+#define ENV_ZMMH(reg, field) (env->zmmh_regs[reg].YMM_Q(field))
+ if (ENV_ZMMH(i, 0) || ENV_ZMMH(i, 1) ||
+ ENV_ZMMH(i, 2) || ENV_ZMMH(i, 3)) {
+ return true;
+ }
+#ifdef TARGET_X86_64
+#define ENV_Hi16_ZMM(reg, field) (env->hi16_zmm_regs[reg].ZMM_Q(field))
+ if (ENV_Hi16_ZMM(i, 0) || ENV_Hi16_ZMM(i, 1) ||
+ ENV_Hi16_ZMM(i, 2) || ENV_Hi16_ZMM(i, 3) ||
+ ENV_Hi16_ZMM(i, 4) || ENV_Hi16_ZMM(i, 5) ||
+ ENV_Hi16_ZMM(i, 6) || ENV_Hi16_ZMM(i, 7)) {
+ return true;
+ }
+#endif
+ }
+
+ return false;
+}
+
+static const VMStateDescription vmstate_avx512 = {
+ .name = "cpu/avx512",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64_ARRAY(env.opmask_regs, X86CPU, NB_OPMASK_REGS),
+ VMSTATE_ZMMH_REGS_VARS(env.zmmh_regs, X86CPU, CPU_NB_REGS),
+#ifdef TARGET_X86_64
+ VMSTATE_Hi16_ZMM_REGS_VARS(env.hi16_zmm_regs, X86CPU, CPU_NB_REGS),
+#endif
+ VMSTATE_END_OF_LIST()
+ }
+};
+
VMStateDescription vmstate_x86_cpu = {
.name = "cpu",
.version_id = 12,
@@ -745,6 +829,9 @@ VMStateDescription vmstate_x86_cpu = {
}, {
.vmsd = &vmstate_msr_hyperv_time,
.needed = hyperv_time_enable_needed,
+ }, {
+ .vmsd = &vmstate_avx512,
+ .needed = avx512_needed,
} , {
/* empty */
}
--
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Paolo Bonzini
2014-10-23 14:34:46 UTC
Permalink
Post by Chao Peng
Add AVX512 feature bits, register definition and corresponding
xsave/vmstate support.
---
target-i386/cpu.c | 10 ++++--
target-i386/cpu.h | 61 ++++++++++++++++++++++++++++++++++
target-i386/kvm.c | 19 +++++++++++
target-i386/machine.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 175 insertions(+), 2 deletions(-)
Looks good---Eduardo, Juan, can you look at it too? Documentation is at

https://software.intel.com/sites/default/files/managed/68/8b/319433-019.pdf

See Tables 2-5, 3-6, 3-7, 3-8.

Paolo
Post by Chao Peng
diff --git a/target-i386/cpu.c b/target-i386/cpu.c
index e7bf9de..e91bfbd 100644
--- a/target-i386/cpu.c
+++ b/target-i386/cpu.c
@@ -259,8 +259,8 @@ static const char *svm_feature_name[] = {
static const char *cpuid_7_0_ebx_feature_name[] = {
"fsgsbase", "tsc_adjust", NULL, "bmi1", "hle", "avx2", NULL, "smep",
"bmi2", "erms", "invpcid", "rtm", NULL, NULL, "mpx", NULL,
- NULL, NULL, "rdseed", "adx", "smap", NULL, NULL, NULL,
- NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
+ "avx512f", NULL, "rdseed", "adx", "smap", NULL, NULL, NULL,
+ NULL, NULL, "avx512pf", "avx512er", "avx512cd", NULL, NULL, NULL,
};
static const char *cpuid_apm_edx_feature_name[] = {
@@ -426,6 +426,12 @@ static const ExtSaveArea ext_save_areas[] = {
.offset = 0x3c0, .size = 0x40 },
[4] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX,
.offset = 0x400, .size = 0x40 },
+ [5] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
+ .offset = 0x440, .size = 0x40 },
+ [6] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
+ .offset = 0x480, .size = 0x200 },
+ [7] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F,
+ .offset = 0x680, .size = 0x400 },
};
const char *get_register_name_32(unsigned int reg)
diff --git a/target-i386/cpu.h b/target-i386/cpu.h
index 2968749..9f01831 100644
--- a/target-i386/cpu.h
+++ b/target-i386/cpu.h
@@ -395,6 +395,9 @@
#define XSTATE_YMM (1ULL << 2)
#define XSTATE_BNDREGS (1ULL << 3)
#define XSTATE_BNDCSR (1ULL << 4)
+#define XSTATE_OPMASK (1ULL << 5)
+#define XSTATE_ZMM_Hi256 (1ULL << 6)
+#define XSTATE_Hi16_ZMM (1ULL << 7)
/* CPUID feature words */
@@ -560,9 +563,13 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS];
#define CPUID_7_0_EBX_INVPCID (1U << 10)
#define CPUID_7_0_EBX_RTM (1U << 11)
#define CPUID_7_0_EBX_MPX (1U << 14)
+#define CPUID_7_0_EBX_AVX512F (1U << 16) /* AVX-512 Foundation */
#define CPUID_7_0_EBX_RDSEED (1U << 18)
#define CPUID_7_0_EBX_ADX (1U << 19)
#define CPUID_7_0_EBX_SMAP (1U << 20)
+#define CPUID_7_0_EBX_AVX512PF (1U << 26) /* AVX-512 Prefetch */
+#define CPUID_7_0_EBX_AVX512ER (1U << 27) /* AVX-512 Exponential and Reciprocal */
+#define CPUID_7_0_EBX_AVX512CD (1U << 28) /* AVX-512 Conflict Detection */
/* CPUID[0x80000007].EDX flags: */
#define CPUID_APM_INVTSC (1U << 8)
@@ -707,6 +714,24 @@ typedef union {
} XMMReg;
typedef union {
+ uint8_t _b[32];
+ uint16_t _w[16];
+ uint32_t _l[8];
+ uint64_t _q[4];
+ float32 _s[8];
+ float64 _d[4];
+} YMMReg;
+
+typedef union {
+ uint8_t _b[64];
+ uint16_t _w[32];
+ uint32_t _l[16];
+ uint64_t _q[8];
+ float32 _s[16];
+ float64 _d[8];
+} ZMMReg;
+
+typedef union {
uint8_t _b[8];
uint16_t _w[4];
uint32_t _l[2];
@@ -725,6 +750,20 @@ typedef struct BNDCSReg {
} BNDCSReg;
#ifdef HOST_WORDS_BIGENDIAN
+#define ZMM_B(n) _b[63 - (n)]
+#define ZMM_W(n) _w[31 - (n)]
+#define ZMM_L(n) _l[15 - (n)]
+#define ZMM_S(n) _s[15 - (n)]
+#define ZMM_Q(n) _q[7 - (n)]
+#define ZMM_D(n) _d[7 - (n)]
+
+#define YMM_B(n) _b[31 - (n)]
+#define YMM_W(n) _w[15 - (n)]
+#define YMM_L(n) _l[7 - (n)]
+#define YMM_S(n) _s[7 - (n)]
+#define YMM_Q(n) _q[3 - (n)]
+#define YMM_D(n) _d[3 - (n)]
+
#define XMM_B(n) _b[15 - (n)]
#define XMM_W(n) _w[7 - (n)]
#define XMM_L(n) _l[3 - (n)]
@@ -737,6 +776,20 @@ typedef struct BNDCSReg {
#define MMX_L(n) _l[1 - (n)]
#define MMX_S(n) _s[1 - (n)]
#else
+#define ZMM_B(n) _b[n]
+#define ZMM_W(n) _w[n]
+#define ZMM_L(n) _l[n]
+#define ZMM_S(n) _s[n]
+#define ZMM_Q(n) _q[n]
+#define ZMM_D(n) _d[n]
+
+#define YMM_B(n) _b[n]
+#define YMM_W(n) _w[n]
+#define YMM_L(n) _l[n]
+#define YMM_S(n) _s[n]
+#define YMM_Q(n) _q[n]
+#define YMM_D(n) _d[n]
+
#define XMM_B(n) _b[n]
#define XMM_W(n) _w[n]
#define XMM_L(n) _l[n]
@@ -775,6 +828,8 @@ typedef struct {
#define NB_MMU_MODES 3
+#define NB_OPMASK_REGS 8
+
typedef enum TPRAccess {
TPR_ACCESS_READ,
TPR_ACCESS_WRITE,
@@ -839,6 +894,12 @@ typedef struct CPUX86State {
XMMReg ymmh_regs[CPU_NB_REGS];
+ uint64_t opmask_regs[NB_OPMASK_REGS];
+ YMMReg zmmh_regs[CPU_NB_REGS];
+#ifdef TARGET_X86_64
+ ZMMReg hi16_zmm_regs[CPU_NB_REGS];
+#endif
+
/* sysenter registers */
uint32_t sysenter_cs;
target_ulong sysenter_esp;
diff --git a/target-i386/kvm.c b/target-i386/kvm.c
index ddedc73..ccf36e8 100644
--- a/target-i386/kvm.c
+++ b/target-i386/kvm.c
@@ -1031,6 +1031,9 @@ static int kvm_put_fpu(X86CPU *cpu)
#define XSAVE_YMMH_SPACE 144
#define XSAVE_BNDREGS 240
#define XSAVE_BNDCSR 256
+#define XSAVE_OPMASK 272
+#define XSAVE_ZMM_Hi256 288
+#define XSAVE_Hi16_ZMM 416
static int kvm_put_xsave(X86CPU *cpu)
{
@@ -1067,6 +1070,14 @@ static int kvm_put_xsave(X86CPU *cpu)
sizeof env->bnd_regs);
memcpy(&xsave->region[XSAVE_BNDCSR], &env->bndcs_regs,
sizeof(env->bndcs_regs));
+ memcpy(&xsave->region[XSAVE_OPMASK], env->opmask_regs,
+ sizeof env->opmask_regs);
+ memcpy(&xsave->region[XSAVE_ZMM_Hi256], env->zmmh_regs,
+ sizeof env->zmmh_regs);
+#ifdef TARGET_X86_64
+ memcpy(&xsave->region[XSAVE_Hi16_ZMM], env->hi16_zmm_regs,
+ sizeof env->hi16_zmm_regs);
+#endif
r = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave);
return r;
}
@@ -1402,6 +1413,14 @@ static int kvm_get_xsave(X86CPU *cpu)
sizeof env->bnd_regs);
memcpy(&env->bndcs_regs, &xsave->region[XSAVE_BNDCSR],
sizeof(env->bndcs_regs));
+ memcpy(env->opmask_regs, &xsave->region[XSAVE_OPMASK],
+ sizeof env->opmask_regs);
+ memcpy(env->zmmh_regs, &xsave->region[XSAVE_ZMM_Hi256],
+ sizeof env->zmmh_regs);
+#ifdef TARGET_X86_64
+ memcpy(env->hi16_zmm_regs, &xsave->region[XSAVE_Hi16_ZMM],
+ sizeof env->hi16_zmm_regs);
+#endif
return 0;
}
diff --git a/target-i386/machine.c b/target-i386/machine.c
index 0dd49f0..708fc54 100644
--- a/target-i386/machine.c
+++ b/target-i386/machine.c
@@ -60,6 +60,44 @@ static const VMStateDescription vmstate_ymmh_reg = {
#define VMSTATE_YMMH_REGS_VARS(_field, _state, _n, _v) \
VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_ymmh_reg, XMMReg)
+static const VMStateDescription vmstate_zmmh_reg = {
+ .name = "zmmh_reg",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(YMM_Q(0), YMMReg),
+ VMSTATE_UINT64(YMM_Q(1), YMMReg),
+ VMSTATE_UINT64(YMM_Q(2), YMMReg),
+ VMSTATE_UINT64(YMM_Q(3), YMMReg),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+#define VMSTATE_ZMMH_REGS_VARS(_field, _state, _n) \
+ VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_zmmh_reg, YMMReg)
+
+#ifdef TARGET_X86_64
+static const VMStateDescription vmstate_hi16_zmm_reg = {
+ .name = "hi16_zmm_reg",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64(ZMM_Q(0), ZMMReg),
+ VMSTATE_UINT64(ZMM_Q(1), ZMMReg),
+ VMSTATE_UINT64(ZMM_Q(2), ZMMReg),
+ VMSTATE_UINT64(ZMM_Q(3), ZMMReg),
+ VMSTATE_UINT64(ZMM_Q(4), ZMMReg),
+ VMSTATE_UINT64(ZMM_Q(5), ZMMReg),
+ VMSTATE_UINT64(ZMM_Q(6), ZMMReg),
+ VMSTATE_UINT64(ZMM_Q(7), ZMMReg),
+ VMSTATE_END_OF_LIST()
+ }
+};
+
+#define VMSTATE_Hi16_ZMM_REGS_VARS(_field, _state, _n) \
+ VMSTATE_STRUCT_ARRAY(_field, _state, _n, 0, vmstate_hi16_zmm_reg, ZMMReg)
+#endif
+
static const VMStateDescription vmstate_bnd_regs = {
.name = "bnd_regs",
.version_id = 1,
@@ -603,6 +641,52 @@ static const VMStateDescription vmstate_msr_hyperv_time = {
}
};
+static bool avx512_needed(void *opaque)
+{
+ X86CPU *cpu = opaque;
+ CPUX86State *env = &cpu->env;
+ unsigned int i;
+
+ for (i = 0; i < NB_OPMASK_REGS; i++) {
+ if (env->opmask_regs[i]) {
+ return true;
+ }
+ }
+
+ for (i = 0; i < CPU_NB_REGS; i++) {
+#define ENV_ZMMH(reg, field) (env->zmmh_regs[reg].YMM_Q(field))
+ if (ENV_ZMMH(i, 0) || ENV_ZMMH(i, 1) ||
+ ENV_ZMMH(i, 2) || ENV_ZMMH(i, 3)) {
+ return true;
+ }
+#ifdef TARGET_X86_64
+#define ENV_Hi16_ZMM(reg, field) (env->hi16_zmm_regs[reg].ZMM_Q(field))
+ if (ENV_Hi16_ZMM(i, 0) || ENV_Hi16_ZMM(i, 1) ||
+ ENV_Hi16_ZMM(i, 2) || ENV_Hi16_ZMM(i, 3) ||
+ ENV_Hi16_ZMM(i, 4) || ENV_Hi16_ZMM(i, 5) ||
+ ENV_Hi16_ZMM(i, 6) || ENV_Hi16_ZMM(i, 7)) {
+ return true;
+ }
+#endif
+ }
+
+ return false;
+}
+
+static const VMStateDescription vmstate_avx512 = {
+ .name = "cpu/avx512",
+ .version_id = 1,
+ .minimum_version_id = 1,
+ .fields = (VMStateField[]) {
+ VMSTATE_UINT64_ARRAY(env.opmask_regs, X86CPU, NB_OPMASK_REGS),
+ VMSTATE_ZMMH_REGS_VARS(env.zmmh_regs, X86CPU, CPU_NB_REGS),
+#ifdef TARGET_X86_64
+ VMSTATE_Hi16_ZMM_REGS_VARS(env.hi16_zmm_regs, X86CPU, CPU_NB_REGS),
+#endif
+ VMSTATE_END_OF_LIST()
+ }
+};
+
VMStateDescription vmstate_x86_cpu = {
.name = "cpu",
.version_id = 12,
@@ -745,6 +829,9 @@ VMStateDescription vmstate_x86_cpu = {
}, {
.vmsd = &vmstate_msr_hyperv_time,
.needed = hyperv_time_enable_needed,
+ }, {
+ .vmsd = &vmstate_avx512,
+ .needed = avx512_needed,
} , {
/* empty */
}
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Eduardo Habkost
2014-10-23 19:49:23 UTC
Permalink
On Thu, Oct 23, 2014 at 11:02:43AM +0800, Chao Peng wrote:
[...]
Post by Chao Peng
@@ -707,6 +714,24 @@ typedef union {
} XMMReg;
typedef union {
+ uint8_t _b[32];
+ uint16_t _w[16];
+ uint32_t _l[8];
+ uint64_t _q[4];
+ float32 _s[8];
+ float64 _d[4];
+} YMMReg;
+
+typedef union {
+ uint8_t _b[64];
+ uint16_t _w[32];
+ uint32_t _l[16];
+ uint64_t _q[8];
+ float32 _s[16];
+ float64 _d[8];
+} ZMMReg;
+
+typedef union {
uint8_t _b[8];
uint16_t _w[4];
uint32_t _l[2];
@@ -725,6 +750,20 @@ typedef struct BNDCSReg {
} BNDCSReg;
#ifdef HOST_WORDS_BIGENDIAN
+#define ZMM_B(n) _b[63 - (n)]
+#define ZMM_W(n) _w[31 - (n)]
+#define ZMM_L(n) _l[15 - (n)]
+#define ZMM_S(n) _s[15 - (n)]
+#define ZMM_Q(n) _q[7 - (n)]
+#define ZMM_D(n) _d[7 - (n)]
+
+#define YMM_B(n) _b[31 - (n)]
+#define YMM_W(n) _w[15 - (n)]
+#define YMM_L(n) _l[7 - (n)]
+#define YMM_S(n) _s[7 - (n)]
+#define YMM_Q(n) _q[3 - (n)]
+#define YMM_D(n) _d[3 - (n)]
+
#define XMM_B(n) _b[15 - (n)]
#define XMM_W(n) _w[7 - (n)]
#define XMM_L(n) _l[3 - (n)]
@@ -737,6 +776,20 @@ typedef struct BNDCSReg {
#define MMX_L(n) _l[1 - (n)]
#define MMX_S(n) _s[1 - (n)]
#else
+#define ZMM_B(n) _b[n]
+#define ZMM_W(n) _w[n]
+#define ZMM_L(n) _l[n]
+#define ZMM_S(n) _s[n]
+#define ZMM_Q(n) _q[n]
+#define ZMM_D(n) _d[n]
+
+#define YMM_B(n) _b[n]
+#define YMM_W(n) _w[n]
+#define YMM_L(n) _l[n]
+#define YMM_S(n) _s[n]
+#define YMM_Q(n) _q[n]
+#define YMM_D(n) _d[n]
+
I am probably not being able to see some future use case of those data
structures, but: why all the extra complexity here, if only ZMM_Q and
YMM_Q are being used in the code, and the only place affected by the
ordering of YMMReg and ZMMReg array elements are the memcpy() calls on
kvm_{put,get}_xsave(), where the data always have the same layout?
--
Eduardo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Chao Peng
2014-10-24 01:27:16 UTC
Permalink
Post by Eduardo Habkost
[...]
Post by Chao Peng
@@ -707,6 +714,24 @@ typedef union {
} XMMReg;
typedef union {
+ uint8_t _b[32];
+ uint16_t _w[16];
+ uint32_t _l[8];
+ uint64_t _q[4];
+ float32 _s[8];
+ float64 _d[4];
+} YMMReg;
+
+typedef union {
+ uint8_t _b[64];
+ uint16_t _w[32];
+ uint32_t _l[16];
+ uint64_t _q[8];
+ float32 _s[16];
+ float64 _d[8];
+} ZMMReg;
+
+typedef union {
uint8_t _b[8];
uint16_t _w[4];
uint32_t _l[2];
@@ -725,6 +750,20 @@ typedef struct BNDCSReg {
} BNDCSReg;
#ifdef HOST_WORDS_BIGENDIAN
+#define ZMM_B(n) _b[63 - (n)]
+#define ZMM_W(n) _w[31 - (n)]
+#define ZMM_L(n) _l[15 - (n)]
+#define ZMM_S(n) _s[15 - (n)]
+#define ZMM_Q(n) _q[7 - (n)]
+#define ZMM_D(n) _d[7 - (n)]
+
+#define YMM_B(n) _b[31 - (n)]
+#define YMM_W(n) _w[15 - (n)]
+#define YMM_L(n) _l[7 - (n)]
+#define YMM_S(n) _s[7 - (n)]
+#define YMM_Q(n) _q[3 - (n)]
+#define YMM_D(n) _d[3 - (n)]
+
#define XMM_B(n) _b[15 - (n)]
#define XMM_W(n) _w[7 - (n)]
#define XMM_L(n) _l[3 - (n)]
@@ -737,6 +776,20 @@ typedef struct BNDCSReg {
#define MMX_L(n) _l[1 - (n)]
#define MMX_S(n) _s[1 - (n)]
#else
+#define ZMM_B(n) _b[n]
+#define ZMM_W(n) _w[n]
+#define ZMM_L(n) _l[n]
+#define ZMM_S(n) _s[n]
+#define ZMM_Q(n) _q[n]
+#define ZMM_D(n) _d[n]
+
+#define YMM_B(n) _b[n]
+#define YMM_W(n) _w[n]
+#define YMM_L(n) _l[n]
+#define YMM_S(n) _s[n]
+#define YMM_Q(n) _q[n]
+#define YMM_D(n) _d[n]
+
I am probably not being able to see some future use case of those data
structures, but: why all the extra complexity here, if only ZMM_Q and
YMM_Q are being used in the code, and the only place affected by the
ordering of YMMReg and ZMMReg array elements are the memcpy() calls on
kvm_{put,get}_xsave(), where the data always have the same layout?
Thanks Eduardo, then I feel comfortable to drop most of these macros and
only keep YMM_Q/ZMM_Q left. As no acutal benefit for ordering, then I
will also make these two endiness-insensitive.

Chao
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Paolo Bonzini
2014-10-24 05:55:10 UTC
Permalink
Post by Chao Peng
Post by Eduardo Habkost
[...]
Post by Chao Peng
@@ -707,6 +714,24 @@ typedef union {
} XMMReg;
typedef union {
+ uint8_t _b[32];
+ uint16_t _w[16];
+ uint32_t _l[8];
+ uint64_t _q[4];
+ float32 _s[8];
+ float64 _d[4];
+} YMMReg;
+
+typedef union {
+ uint8_t _b[64];
+ uint16_t _w[32];
+ uint32_t _l[16];
+ uint64_t _q[8];
+ float32 _s[16];
+ float64 _d[8];
+} ZMMReg;
+
+typedef union {
uint8_t _b[8];
uint16_t _w[4];
uint32_t _l[2];
@@ -725,6 +750,20 @@ typedef struct BNDCSReg {
} BNDCSReg;
#ifdef HOST_WORDS_BIGENDIAN
+#define ZMM_B(n) _b[63 - (n)]
+#define ZMM_W(n) _w[31 - (n)]
+#define ZMM_L(n) _l[15 - (n)]
+#define ZMM_S(n) _s[15 - (n)]
+#define ZMM_Q(n) _q[7 - (n)]
+#define ZMM_D(n) _d[7 - (n)]
+
+#define YMM_B(n) _b[31 - (n)]
+#define YMM_W(n) _w[15 - (n)]
+#define YMM_L(n) _l[7 - (n)]
+#define YMM_S(n) _s[7 - (n)]
+#define YMM_Q(n) _q[3 - (n)]
+#define YMM_D(n) _d[3 - (n)]
+
#define XMM_B(n) _b[15 - (n)]
#define XMM_W(n) _w[7 - (n)]
#define XMM_L(n) _l[3 - (n)]
@@ -737,6 +776,20 @@ typedef struct BNDCSReg {
#define MMX_L(n) _l[1 - (n)]
#define MMX_S(n) _s[1 - (n)]
#else
+#define ZMM_B(n) _b[n]
+#define ZMM_W(n) _w[n]
+#define ZMM_L(n) _l[n]
+#define ZMM_S(n) _s[n]
+#define ZMM_Q(n) _q[n]
+#define ZMM_D(n) _d[n]
+
+#define YMM_B(n) _b[n]
+#define YMM_W(n) _w[n]
+#define YMM_L(n) _l[n]
+#define YMM_S(n) _s[n]
+#define YMM_Q(n) _q[n]
+#define YMM_D(n) _d[n]
+
I am probably not being able to see some future use case of those data
structures, but: why all the extra complexity here, if only ZMM_Q and
YMM_Q are being used in the code, and the only place affected by the
ordering of YMMReg and ZMMReg array elements are the memcpy() calls on
kvm_{put,get}_xsave(), where the data always have the same layout?
Thanks Eduardo, then I feel comfortable to drop most of these macros and
only keep YMM_Q/ZMM_Q left. As no acutal benefit for ordering, then I
will also make these two endiness-insensitive.
I think we can keep the macros. The actual cleanup would be to have a
single member for the 32 512-bit ZMM registers, instead of splitting
xmm/ymmh/zmmh/zmm_hi16. This will get rid of the YMM_* and ZMM_*
registers. However, we could not use simple memcpy()s to marshal in and
out of the XSAVE data. We can do it in 2.2.

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to ***@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Continue reading on narkive:
Loading...