1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
|
From: Andrew Cooper <andrew.cooper3@citrix.com>
Subject: x86/tsx: Introduce tsx= to use MSR_TSX_CTRL when available
To protect against the TSX Async Abort speculative vulnerability, Intel have
released new microcode for affected parts which introduce the MSR_TSX_CTRL
control, which allows TSX to be turned off. This will be architectural on
future parts.
Introduce tsx= to provide a global on/off for TSX, including its enumeration
via CPUID. Provide stub virtualisation of this MSR, as it is not exposed to
guests at the moment.
VMs may have booted before microcode is loaded, or before hosts have rebooted,
and they still want to migrate freely. A VM which booted seeing TSX can
migrate safely to hosts with TSX disabled - TSX will start unconditionally
aborting, but still behave in a manner compatible with the ABI.
The guest-visible behaviour is equivalent to late loading the microcode and
setting the RTM_DISABLE bit in the course of live patching.
This is part of XSA-305 / CVE-2019-11135
Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com>
Reviewed-by: Jan Beulich <jbeulich@suse.com>
diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc
index e283017015..b7e1bf8e8b 100644
--- a/docs/misc/xen-command-line.pandoc
+++ b/docs/misc/xen-command-line.pandoc
@@ -2033,6 +2033,20 @@ Xen version.
### tsc (x86)
> `= unstable | skewed | stable:socket`
+### tsx
+ = <bool>
+
+ Applicability: x86
+ Default: true
+
+Controls for the use of Transactional Synchronization eXtensions.
+
+On Intel parts released in Q3 2019 (with updated microcode), and future parts,
+a control has been introduced which allows TSX to be turned off.
+
+On systems with the ability to turn TSX off, this boolean offers system wide
+control of whether TSX is enabled or disabled.
+
### ucode (x86)
> `= [<integer> | scan]`
diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile
index 8a8d8f060f..9b9a4435fb 100644
--- a/xen/arch/x86/Makefile
+++ b/xen/arch/x86/Makefile
@@ -66,6 +66,7 @@ obj-y += sysctl.o
obj-y += time.o
obj-y += trace.o
obj-y += traps.o
+obj-y += tsx.o
obj-y += usercopy.o
obj-y += x86_emulate.o
obj-$(CONFIG_TBOOT) += tboot.o
diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
index 57e80694f2..1727497459 100644
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -524,6 +524,20 @@ void recalculate_cpuid_policy(struct domain *d)
if ( cpu_has_itsc && (d->disable_migrate || d->arch.vtsc) )
__set_bit(X86_FEATURE_ITSC, max_fs);
+ /*
+ * On hardware with MSR_TSX_CTRL, the admin may have elected to disable
+ * TSX and hide the feature bits. Migrating-in VMs may have been booted
+ * pre-mitigation when the TSX features were visbile.
+ *
+ * This situation is compatible (albeit with a perf hit to any TSX code in
+ * the guest), so allow the feature bits to remain set.
+ */
+ if ( cpu_has_tsx_ctrl )
+ {
+ __set_bit(X86_FEATURE_HLE, max_fs);
+ __set_bit(X86_FEATURE_RTM, max_fs);
+ }
+
/* Clamp the toolstacks choices to reality. */
for ( i = 0; i < ARRAY_SIZE(fs); i++ )
fs[i] &= max_fs[i];
diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
index 56de0fe9e1..c2722d7c73 100644
--- a/xen/arch/x86/msr.c
+++ b/xen/arch/x86/msr.c
@@ -132,6 +132,7 @@ int guest_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val)
case MSR_FLUSH_CMD:
/* Write-only */
case MSR_TSX_FORCE_ABORT:
+ case MSR_TSX_CTRL:
/* Not offered to guests. */
goto gp_fault;
@@ -260,6 +261,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
case MSR_ARCH_CAPABILITIES:
/* Read-only */
case MSR_TSX_FORCE_ABORT:
+ case MSR_TSX_CTRL:
/* Not offered to guests. */
goto gp_fault;
diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c
index cf790f36ef..c1c7c44000 100644
--- a/xen/arch/x86/setup.c
+++ b/xen/arch/x86/setup.c
@@ -1594,6 +1594,8 @@ void __init noreturn __start_xen(unsigned long mbi_p)
early_microcode_init();
+ tsx_init(); /* Needs microcode. May change HLE/RTM feature bits. */
+
identify_cpu(&boot_cpu_data);
set_in_cr4(X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT);
diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
index 737a44f055..e21cf0a310 100644
--- a/xen/arch/x86/smpboot.c
+++ b/xen/arch/x86/smpboot.c
@@ -376,6 +376,8 @@ void start_secondary(void *unused)
if ( boot_cpu_has(X86_FEATURE_IBRSB) )
wrmsrl(MSR_SPEC_CTRL, default_xen_spec_ctrl);
+ tsx_init(); /* Needs microcode. May change HLE/RTM feature bits. */
+
if ( xen_guest )
hypervisor_ap_setup();
diff --git a/xen/arch/x86/tsx.c b/xen/arch/x86/tsx.c
new file mode 100644
index 0000000000..a8ec2ccc69
--- /dev/null
+++ b/xen/arch/x86/tsx.c
@@ -0,0 +1,74 @@
+#include <xen/init.h>
+#include <asm/msr.h>
+
+/*
+ * Valid values:
+ * 1 => Explicit tsx=1
+ * 0 => Explicit tsx=0
+ * -1 => Default, implicit tsx=1
+ *
+ * This is arranged such that the bottom bit encodes whether TSX is actually
+ * disabled, while identifying various explicit (>=0) and implicit (<0)
+ * conditions.
+ */
+int8_t __read_mostly opt_tsx = -1;
+int8_t __read_mostly cpu_has_tsx_ctrl = -1;
+
+static int __init parse_tsx(const char *s)
+{
+ int rc = 0, val = parse_bool(s, NULL);
+
+ if ( val >= 0 )
+ opt_tsx = val;
+ else
+ rc = -EINVAL;
+
+ return rc;
+}
+custom_param("tsx", parse_tsx);
+
+void tsx_init(void)
+{
+ /*
+ * This function is first called between microcode being loaded, and CPUID
+ * being scanned generally. Calculate from raw data whether MSR_TSX_CTRL
+ * is available.
+ */
+ if ( unlikely(cpu_has_tsx_ctrl < 0) )
+ {
+ uint64_t caps = 0;
+
+ if ( boot_cpu_data.cpuid_level >= 7 &&
+ (cpuid_count_edx(7, 0) & cpufeat_mask(X86_FEATURE_ARCH_CAPS)) )
+ rdmsrl(MSR_ARCH_CAPABILITIES, caps);
+
+ cpu_has_tsx_ctrl = !!(caps & ARCH_CAPS_TSX_CTRL);
+ }
+
+ if ( cpu_has_tsx_ctrl )
+ {
+ uint64_t val;
+
+ rdmsrl(MSR_TSX_CTRL, val);
+
+ val &= ~(TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR);
+ /* Check bottom bit only. Higher bits are various sentinals. */
+ if ( !(opt_tsx & 1) )
+ val |= TSX_CTRL_RTM_DISABLE | TSX_CTRL_CPUID_CLEAR;
+
+ wrmsrl(MSR_TSX_CTRL, val);
+ }
+ else if ( opt_tsx >= 0 )
+ printk_once(XENLOG_WARNING
+ "MSR_TSX_CTRL not available - Ignoring tsx= setting\n");
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-file-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h
index 32746aa8ae..d5f3899f73 100644
--- a/xen/include/asm-x86/msr-index.h
+++ b/xen/include/asm-x86/msr-index.h
@@ -53,6 +53,7 @@
#define ARCH_CAPS_SSB_NO (_AC(1, ULL) << 4)
#define ARCH_CAPS_MDS_NO (_AC(1, ULL) << 5)
#define ARCH_CAPS_IF_PSCHANGE_MC_NO (_AC(1, ULL) << 6)
+#define ARCH_CAPS_TSX_CTRL (_AC(1, ULL) << 7)
#define MSR_FLUSH_CMD 0x0000010b
#define FLUSH_CMD_L1D (_AC(1, ULL) << 0)
@@ -60,6 +61,10 @@
#define MSR_TSX_FORCE_ABORT 0x0000010f
#define TSX_FORCE_ABORT_RTM (_AC(1, ULL) << 0)
+#define MSR_TSX_CTRL 0x00000122
+#define TSX_CTRL_RTM_DISABLE (_AC(1, ULL) << 0)
+#define TSX_CTRL_CPUID_CLEAR (_AC(1, ULL) << 1)
+
/* Intel MSRs. Some also available on other CPUs */
#define MSR_IA32_PERFCTR0 0x000000c1
#define MSR_IA32_A_PERFCTR0 0x000004c1
diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h
index d33ac34d29..1b52712180 100644
--- a/xen/include/asm-x86/processor.h
+++ b/xen/include/asm-x86/processor.h
@@ -263,6 +263,16 @@ static always_inline unsigned int cpuid_count_ebx(
return ebx;
}
+static always_inline unsigned int cpuid_count_edx(
+ unsigned int leaf, unsigned int subleaf)
+{
+ unsigned int edx, tmp;
+
+ cpuid_count(leaf, subleaf, &tmp, &tmp, &tmp, &edx);
+
+ return edx;
+}
+
static inline unsigned long read_cr0(void)
{
unsigned long cr0;
@@ -609,6 +619,9 @@ static inline uint8_t get_cpu_family(uint32_t raw, uint8_t *model,
return fam;
}
+extern int8_t opt_tsx, cpu_has_tsx_ctrl;
+void tsx_init(void);
+
#endif /* !__ASSEMBLY__ */
#endif /* __ASM_X86_PROCESSOR_H */
diff --git a/xen/include/xen/lib.h b/xen/include/xen/lib.h
index 89939f43c8..6529f12dae 100644
--- a/xen/include/xen/lib.h
+++ b/xen/include/xen/lib.h
@@ -114,6 +114,16 @@ extern int printk_ratelimit(void);
#define gprintk(lvl, fmt, args...) \
printk(XENLOG_GUEST lvl "%pv " fmt, current, ## args)
+#define printk_once(fmt, args...) \
+({ \
+ static bool __read_mostly once_; \
+ if ( unlikely(!once_) ) \
+ { \
+ once_ = true; \
+ printk(fmt, ## args); \
+ } \
+})
+
#ifdef NDEBUG
static inline void
|