File: | arch/amd64/amd64/cpu.c |
Warning: | line 1336, column 8 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | /* $OpenBSD: cpu.c,v 1.177 2023/11/22 18:50:10 bluhm Exp $ */ | |||
2 | /* $NetBSD: cpu.c,v 1.1 2003/04/26 18:39:26 fvdl Exp $ */ | |||
3 | ||||
4 | /*- | |||
5 | * Copyright (c) 2000 The NetBSD Foundation, Inc. | |||
6 | * All rights reserved. | |||
7 | * | |||
8 | * This code is derived from software contributed to The NetBSD Foundation | |||
9 | * by RedBack Networks Inc. | |||
10 | * | |||
11 | * Author: Bill Sommerfeld | |||
12 | * | |||
13 | * Redistribution and use in source and binary forms, with or without | |||
14 | * modification, are permitted provided that the following conditions | |||
15 | * are met: | |||
16 | * 1. Redistributions of source code must retain the above copyright | |||
17 | * notice, this list of conditions and the following disclaimer. | |||
18 | * 2. Redistributions in binary form must reproduce the above copyright | |||
19 | * notice, this list of conditions and the following disclaimer in the | |||
20 | * documentation and/or other materials provided with the distribution. | |||
21 | * | |||
22 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |||
23 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |||
24 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |||
25 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |||
26 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
27 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
28 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
29 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
30 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
31 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||
32 | * POSSIBILITY OF SUCH DAMAGE. | |||
33 | */ | |||
34 | ||||
35 | /* | |||
36 | * Copyright (c) 1999 Stefan Grefen | |||
37 | * | |||
38 | * Redistribution and use in source and binary forms, with or without | |||
39 | * modification, are permitted provided that the following conditions | |||
40 | * are met: | |||
41 | * 1. Redistributions of source code must retain the above copyright | |||
42 | * notice, this list of conditions and the following disclaimer. | |||
43 | * 2. Redistributions in binary form must reproduce the above copyright | |||
44 | * notice, this list of conditions and the following disclaimer in the | |||
45 | * documentation and/or other materials provided with the distribution. | |||
46 | * 3. All advertising materials mentioning features or use of this software | |||
47 | * must display the following acknowledgement: | |||
48 | * This product includes software developed by the NetBSD | |||
49 | * Foundation, Inc. and its contributors. | |||
50 | * 4. Neither the name of The NetBSD Foundation nor the names of its | |||
51 | * contributors may be used to endorse or promote products derived | |||
52 | * from this software without specific prior written permission. | |||
53 | * | |||
54 | * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY | |||
55 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
56 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
57 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE | |||
58 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
59 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||
60 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||
61 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||
62 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |||
63 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||
64 | * SUCH DAMAGE. | |||
65 | */ | |||
66 | ||||
67 | #include "lapic.h" | |||
68 | #include "ioapic.h" | |||
69 | #include "vmm.h" | |||
70 | #include "pctr.h" | |||
71 | #include "pvbus.h" | |||
72 | ||||
73 | #include <sys/param.h> | |||
74 | #include <sys/proc.h> | |||
75 | #include <sys/timeout.h> | |||
76 | #include <sys/systm.h> | |||
77 | #include <sys/device.h> | |||
78 | #include <sys/malloc.h> | |||
79 | #include <sys/memrange.h> | |||
80 | #include <sys/atomic.h> | |||
81 | #include <sys/user.h> | |||
82 | ||||
83 | #include <uvm/uvm_extern.h> | |||
84 | ||||
85 | #include <machine/codepatch.h> | |||
86 | #include <machine/cpu_full.h> | |||
87 | #include <machine/cpufunc.h> | |||
88 | #include <machine/cpuvar.h> | |||
89 | #include <machine/pmap.h> | |||
90 | #include <machine/vmparam.h> | |||
91 | #include <machine/mpbiosvar.h> | |||
92 | #include <machine/pcb.h> | |||
93 | #include <machine/specialreg.h> | |||
94 | #include <machine/segments.h> | |||
95 | #include <machine/gdt.h> | |||
96 | #include <machine/pio.h> | |||
97 | #include <machine/vmmvar.h> | |||
98 | ||||
99 | #if NLAPIC1 > 0 | |||
100 | #include <machine/i82489reg.h> | |||
101 | #include <machine/i82489var.h> | |||
102 | #endif | |||
103 | ||||
104 | #if NIOAPIC1 > 0 | |||
105 | #include <machine/i82093var.h> | |||
106 | #endif | |||
107 | ||||
108 | #if NPCTR1 > 0 | |||
109 | #include <machine/pctr.h> | |||
110 | #endif | |||
111 | ||||
112 | #if NPVBUS1 > 0 | |||
113 | #include <dev/pv/pvvar.h> | |||
114 | #endif | |||
115 | ||||
116 | #include <dev/ic/mc146818reg.h> | |||
117 | #include <amd64/isa/nvram.h> | |||
118 | #include <dev/isa/isareg.h> | |||
119 | ||||
120 | #ifdef HIBERNATE1 | |||
121 | #include <sys/hibernate.h> | |||
122 | #include <machine/hibernate.h> | |||
123 | #endif /* HIBERNATE */ | |||
124 | ||||
125 | /* #define CPU_DEBUG */ | |||
126 | ||||
127 | #ifdef CPU_DEBUG | |||
128 | #define DPRINTF(x...) do { printf(x); } while(0) | |||
129 | #else | |||
130 | #define DPRINTF(x...) | |||
131 | #endif /* CPU_DEBUG */ | |||
132 | ||||
133 | int cpu_match(struct device *, void *, void *); | |||
134 | void cpu_attach(struct device *, struct device *, void *); | |||
135 | int cpu_activate(struct device *, int); | |||
136 | void patinit(struct cpu_info *ci); | |||
137 | #if NVMM1 > 0 | |||
138 | void cpu_init_vmm(struct cpu_info *ci); | |||
139 | #endif /* NVMM > 0 */ | |||
140 | ||||
141 | struct cpu_softc { | |||
142 | struct device sc_dev; /* device tree glue */ | |||
143 | struct cpu_info *sc_info; /* pointer to CPU info */ | |||
144 | }; | |||
145 | ||||
146 | void replacesmap(void); | |||
147 | void replacemeltdown(void); | |||
148 | void replacemds(void); | |||
149 | ||||
150 | extern long _stac; | |||
151 | extern long _clac; | |||
152 | ||||
153 | int cpuid_level = 0; /* cpuid(0).eax */ | |||
154 | char cpu_vendor[16] = { 0 }; /* cpuid(0).e[bdc]x, \0 */ | |||
155 | int cpu_id = 0; /* cpuid(1).eax */ | |||
156 | int cpu_ebxfeature = 0; /* cpuid(1).ebx */ | |||
157 | int cpu_ecxfeature = 0; /* cpuid(1).ecx */ | |||
158 | int cpu_feature = 0; /* cpuid(1).edx */ | |||
159 | int cpu_perf_eax = 0; /* cpuid(0xa).eax */ | |||
160 | int cpu_perf_ebx = 0; /* cpuid(0xa).ebx */ | |||
161 | int cpu_perf_edx = 0; /* cpuid(0xa).edx */ | |||
162 | int cpu_apmi_edx = 0; /* cpuid(0x80000007).edx */ | |||
163 | int ecpu_ecxfeature = 0; /* cpuid(0x80000001).ecx */ | |||
164 | int cpu_meltdown = 0; | |||
165 | int cpu_use_xsaves = 0; | |||
166 | ||||
167 | void | |||
168 | replacesmap(void) | |||
169 | { | |||
170 | static int replacedone = 0; | |||
171 | int s; | |||
172 | ||||
173 | if (replacedone) | |||
174 | return; | |||
175 | replacedone = 1; | |||
176 | ||||
177 | s = splhigh()splraise(0xd); | |||
178 | ||||
179 | codepatch_replace(CPTAG_STAC1, &_stac, 3); | |||
180 | codepatch_replace(CPTAG_CLAC2, &_clac, 3); | |||
181 | ||||
182 | splx(s)spllower(s); | |||
183 | } | |||
184 | ||||
185 | void | |||
186 | replacemeltdown(void) | |||
187 | { | |||
188 | static int replacedone = 0; | |||
189 | struct cpu_info *ci = &cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))); | |||
190 | int swapgs_vuln = 0, ibrs = 0, s; | |||
191 | ||||
192 | if (strcmp(cpu_vendor, "GenuineIntel") == 0) { | |||
193 | int family = ci->ci_family; | |||
194 | int model = ci->ci_model; | |||
195 | ||||
196 | swapgs_vuln = 1; | |||
197 | if (family == 0x6 && | |||
198 | (model == 0x37 || model == 0x4a || model == 0x4c || | |||
199 | model == 0x4d || model == 0x5a || model == 0x5d || | |||
200 | model == 0x6e || model == 0x65 || model == 0x75)) { | |||
201 | /* Silvermont, Airmont */ | |||
202 | swapgs_vuln = 0; | |||
203 | } else if (family == 0x6 && (model == 0x85 || model == 0x57)) { | |||
204 | /* KnightsLanding */ | |||
205 | swapgs_vuln = 0; | |||
206 | } | |||
207 | if ((ci->ci_feature_sefflags_edx & SEFF0EDX_ARCH_CAP0x20000000) && | |||
208 | (rdmsr(MSR_ARCH_CAPABILITIES0x10a) & ARCH_CAP_IBRS_ALL(1 << 1))) { | |||
209 | ibrs = 2; | |||
210 | } else if (ci->ci_feature_sefflags_edx & SEFF0EDX_IBRS0x04000000) { | |||
211 | ibrs = 1; | |||
212 | } | |||
213 | } else if (strcmp(cpu_vendor, "AuthenticAMD") == 0 && | |||
214 | ci->ci_pnfeatset >= 0x80000008) { | |||
215 | if (ci->ci_feature_amdspec_ebx & CPUIDEBX_IBRS_ALWAYSON(1ULL << 16)) { | |||
216 | ibrs = 2; | |||
217 | } else if ((ci->ci_feature_amdspec_ebx & CPUIDEBX_IBRS(1ULL << 14)) && | |||
218 | (ci->ci_feature_amdspec_ebx & CPUIDEBX_IBRS_PREF(1ULL << 18))) { | |||
219 | ibrs = 1; | |||
220 | } | |||
221 | } | |||
222 | ||||
223 | /* Enhanced IBRS: turn it on once on each CPU and don't touch again */ | |||
224 | if (ibrs == 2) | |||
225 | wrmsr(MSR_SPEC_CTRL0x048, SPEC_CTRL_IBRS(1ULL << 0)); | |||
226 | ||||
227 | if (replacedone) | |||
228 | return; | |||
229 | replacedone = 1; | |||
230 | ||||
231 | s = splhigh()splraise(0xd); | |||
232 | if (ibrs == 2 || (ci->ci_feature_sefflags_edx & SEFF0EDX_IBT0x00100000)) { | |||
233 | extern const char _jmprax, _jmpr11, _jmpr13; | |||
234 | extern const short _jmprax_len, _jmpr11_len, _jmpr13_len; | |||
235 | codepatch_replace(CPTAG_RETPOLINE_RAX14, &_jmprax, _jmprax_len); | |||
236 | codepatch_replace(CPTAG_RETPOLINE_R1115, &_jmpr11, _jmpr11_len); | |||
237 | codepatch_replace(CPTAG_RETPOLINE_R1316, &_jmpr13, _jmpr13_len); | |||
238 | } | |||
239 | ||||
240 | if (!cpu_meltdown) | |||
241 | codepatch_nop(CPTAG_MELTDOWN_NOP6); | |||
242 | else { | |||
243 | extern long alltraps_kern_meltdown; | |||
244 | ||||
245 | /* eliminate conditional branch in alltraps */ | |||
246 | codepatch_jmp(CPTAG_MELTDOWN_ALLTRAPS7, &alltraps_kern_meltdown); | |||
247 | ||||
248 | /* enable reuse of PCID for U-K page tables */ | |||
249 | if (pmap_use_pcid) { | |||
250 | extern long _pcid_set_reuse; | |||
251 | DPRINTF("%s: codepatching PCID use\n", __func__); | |||
252 | codepatch_replace(CPTAG_PCID_SET_REUSE8, | |||
253 | &_pcid_set_reuse, PCID_SET_REUSE_SIZE12); | |||
254 | } | |||
255 | } | |||
256 | ||||
257 | /* | |||
258 | * CVE-2019-1125: if the CPU has SMAP and it's not vulnerable to | |||
259 | * Meltdown, then it's protected both from speculatively mis-skipping | |||
260 | * the swapgs during interrupts of userspace and from speculatively | |||
261 | * mis-taking a swapgs during interrupts while already in the kernel | |||
262 | * as the speculative path will fault from SMAP. Warning: enabling | |||
263 | * WRGSBASE would break this 'protection'. | |||
264 | * | |||
265 | * Otherwise, if the CPU's swapgs can't be speculated over and it | |||
266 | * _is_ vulnerable to Meltdown then the %cr3 change will serialize | |||
267 | * user->kern transitions, but we still need to mitigate the | |||
268 | * already-in-kernel cases. | |||
269 | */ | |||
270 | if (!cpu_meltdown && (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMAP0x00100000)) { | |||
271 | codepatch_nop(CPTAG_FENCE_SWAPGS_MIS_TAKEN11); | |||
272 | codepatch_nop(CPTAG_FENCE_NO_SAFE_SMAP12); | |||
273 | } else if (!swapgs_vuln && cpu_meltdown) { | |||
274 | codepatch_nop(CPTAG_FENCE_SWAPGS_MIS_TAKEN11); | |||
275 | } | |||
276 | splx(s)spllower(s); | |||
277 | } | |||
278 | ||||
279 | void | |||
280 | replacemds(void) | |||
281 | { | |||
282 | static int replacedone = 0; | |||
283 | extern long mds_handler_bdw, mds_handler_ivb, mds_handler_skl; | |||
284 | extern long mds_handler_skl_sse, mds_handler_skl_avx; | |||
285 | extern long mds_handler_silvermont, mds_handler_knights; | |||
286 | struct cpu_info *ci = &cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))); | |||
287 | CPU_INFO_ITERATORint cii; | |||
288 | void *handler = NULL((void *)0), *vmm_handler = NULL((void *)0); | |||
289 | const char *type; | |||
290 | int has_verw, s; | |||
291 | ||||
292 | /* ci_mds_tmp must be 32byte aligned for AVX instructions */ | |||
293 | CTASSERT((offsetof(struct cpu_info, ci_mds_tmp) -extern char _ctassert[((__builtin_offsetof(struct cpu_info, ci_mds_tmp ) - __builtin_offsetof(struct cpu_info, ci_dev)) % 32 == 0) ? 1 : -1 ] __attribute__((__unused__)) | |||
294 | offsetof(struct cpu_info, ci_PAGEALIGN)) % 32 == 0)extern char _ctassert[((__builtin_offsetof(struct cpu_info, ci_mds_tmp ) - __builtin_offsetof(struct cpu_info, ci_dev)) % 32 == 0) ? 1 : -1 ] __attribute__((__unused__)); | |||
295 | ||||
296 | if (replacedone) | |||
297 | return; | |||
298 | replacedone = 1; | |||
299 | ||||
300 | if (strcmp(cpu_vendor, "GenuineIntel") != 0 || | |||
301 | ((ci->ci_feature_sefflags_edx & SEFF0EDX_ARCH_CAP0x20000000) && | |||
302 | (rdmsr(MSR_ARCH_CAPABILITIES0x10a) & ARCH_CAP_MDS_NO(1 << 5)))) { | |||
303 | /* Unaffected, nop out the handling code */ | |||
304 | has_verw = 0; | |||
305 | } else if (ci->ci_feature_sefflags_edx & SEFF0EDX_MD_CLEAR0x00000400) { | |||
306 | /* new firmware, use VERW */ | |||
307 | has_verw = 1; | |||
308 | } else { | |||
309 | int family = ci->ci_family; | |||
310 | int model = ci->ci_model; | |||
311 | int stepping = CPUID2STEPPING(ci->ci_signature)((ci->ci_signature) & 15); | |||
312 | ||||
313 | has_verw = 0; | |||
314 | if (family == 0x6 && | |||
315 | (model == 0x2e || model == 0x1e || model == 0x1f || | |||
316 | model == 0x1a || model == 0x2f || model == 0x25 || | |||
317 | model == 0x2c || model == 0x2d || model == 0x2a || | |||
318 | model == 0x3e || model == 0x3a)) { | |||
319 | /* Nehalem, SandyBridge, IvyBridge */ | |||
320 | handler = vmm_handler = &mds_handler_ivb; | |||
321 | type = "IvyBridge"; | |||
322 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
323 | ci->ci_mds_buf = malloc(672, M_DEVBUF2, | |||
324 | M_WAITOK0x0001); | |||
325 | memset(ci->ci_mds_buf, 0, 16)__builtin_memset((ci->ci_mds_buf), (0), (16)); | |||
326 | } | |||
327 | } else if (family == 0x6 && | |||
328 | (model == 0x3f || model == 0x3c || model == 0x45 || | |||
329 | model == 0x46 || model == 0x56 || model == 0x4f || | |||
330 | model == 0x47 || model == 0x3d)) { | |||
331 | /* Haswell and Broadwell */ | |||
332 | handler = vmm_handler = &mds_handler_bdw; | |||
333 | type = "Broadwell"; | |||
334 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
335 | ci->ci_mds_buf = malloc(1536, M_DEVBUF2, | |||
336 | M_WAITOK0x0001); | |||
337 | } | |||
338 | } else if (family == 0x6 && | |||
339 | ((model == 0x55 && stepping <= 5) || model == 0x4e || | |||
340 | model == 0x5e || (model == 0x8e && stepping <= 0xb) || | |||
341 | (model == 0x9e && stepping <= 0xc))) { | |||
342 | /* | |||
343 | * Skylake, KabyLake, CoffeeLake, WhiskeyLake, | |||
344 | * CascadeLake | |||
345 | */ | |||
346 | /* XXX mds_handler_skl_avx512 */ | |||
347 | if (xgetbv(0) & XFEATURE_AVX0x00000004) { | |||
348 | handler = &mds_handler_skl_avx; | |||
349 | type = "Skylake AVX"; | |||
350 | } else { | |||
351 | handler = &mds_handler_skl_sse; | |||
352 | type = "Skylake SSE"; | |||
353 | } | |||
354 | vmm_handler = &mds_handler_skl; | |||
355 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
356 | vaddr_t b64; | |||
357 | b64 = (vaddr_t)malloc(6 * 1024 + 64 + 63, | |||
358 | M_DEVBUF2, M_WAITOK0x0001); | |||
359 | ci->ci_mds_buf = (void *)((b64 + 63) & ~63); | |||
360 | memset(ci->ci_mds_buf, 0, 64)__builtin_memset((ci->ci_mds_buf), (0), (64)); | |||
361 | } | |||
362 | } else if (family == 0x6 && | |||
363 | (model == 0x37 || model == 0x4a || model == 0x4c || | |||
364 | model == 0x4d || model == 0x5a || model == 0x5d || | |||
365 | model == 0x6e || model == 0x65 || model == 0x75)) { | |||
366 | /* Silvermont, Airmont */ | |||
367 | handler = vmm_handler = &mds_handler_silvermont; | |||
368 | type = "Silvermont"; | |||
369 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
370 | ci->ci_mds_buf = malloc(256, M_DEVBUF2, | |||
371 | M_WAITOK0x0001); | |||
372 | memset(ci->ci_mds_buf, 0, 16)__builtin_memset((ci->ci_mds_buf), (0), (16)); | |||
373 | } | |||
374 | } else if (family == 0x6 && (model == 0x85 || model == 0x57)) { | |||
375 | handler = vmm_handler = &mds_handler_knights; | |||
376 | type = "KnightsLanding"; | |||
377 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
378 | vaddr_t b64; | |||
379 | b64 = (vaddr_t)malloc(1152 + 63, M_DEVBUF2, | |||
380 | M_WAITOK0x0001); | |||
381 | ci->ci_mds_buf = (void *)((b64 + 63) & ~63); | |||
382 | } | |||
383 | } | |||
384 | } | |||
385 | ||||
386 | if (handler != NULL((void *)0)) { | |||
387 | printf("cpu0: using %s MDS workaround%s\n", type, ""); | |||
388 | s = splhigh()splraise(0xd); | |||
389 | codepatch_call(CPTAG_MDS9, handler); | |||
390 | codepatch_call(CPTAG_MDS_VMM10, vmm_handler); | |||
391 | splx(s)spllower(s); | |||
392 | } else if (has_verw) { | |||
393 | /* The new firmware enhances L1D_FLUSH MSR to flush MDS too */ | |||
394 | if (cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))).ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr == 1) { | |||
395 | s = splhigh()splraise(0xd); | |||
396 | codepatch_nop(CPTAG_MDS_VMM10); | |||
397 | splx(s)spllower(s); | |||
398 | type = " (except on vmm entry)"; | |||
399 | } else { | |||
400 | type = ""; | |||
401 | } | |||
402 | printf("cpu0: using %s MDS workaround%s\n", "VERW", type); | |||
403 | } else { | |||
404 | s = splhigh()splraise(0xd); | |||
405 | codepatch_nop(CPTAG_MDS9); | |||
406 | codepatch_nop(CPTAG_MDS_VMM10); | |||
407 | splx(s)spllower(s); | |||
408 | } | |||
409 | } | |||
410 | ||||
411 | #ifdef MULTIPROCESSOR1 | |||
412 | int mp_cpu_start(struct cpu_info *); | |||
413 | void mp_cpu_start_cleanup(struct cpu_info *); | |||
414 | struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL((void *)0), | |||
415 | mp_cpu_start_cleanup }; | |||
416 | #endif /* MULTIPROCESSOR */ | |||
417 | ||||
418 | const struct cfattach cpu_ca = { | |||
419 | sizeof(struct cpu_softc), cpu_match, cpu_attach, NULL((void *)0), cpu_activate | |||
420 | }; | |||
421 | ||||
422 | struct cfdriver cpu_cd = { | |||
423 | NULL((void *)0), "cpu", DV_DULL | |||
424 | }; | |||
425 | ||||
426 | /* | |||
427 | * Statically-allocated CPU info for the primary CPU (or the only | |||
428 | * CPU, on uniprocessors). The CPU info list is initialized to | |||
429 | * point at it. | |||
430 | */ | |||
431 | struct cpu_info_full cpu_info_full_primary = { .cif_cpu = { .ci_self = &cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))) } }; | |||
432 | ||||
433 | struct cpu_info *cpu_info_list = &cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))); | |||
434 | ||||
435 | #ifdef MULTIPROCESSOR1 | |||
436 | /* | |||
437 | * Array of CPU info structures. Must be statically-allocated because | |||
438 | * curproc, etc. are used early. | |||
439 | */ | |||
440 | struct cpu_info *cpu_info[MAXCPUS64] = { &cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))) }; | |||
441 | ||||
442 | void cpu_hatch(void *); | |||
443 | void cpu_boot_secondary(struct cpu_info *ci); | |||
444 | void cpu_start_secondary(struct cpu_info *ci); | |||
445 | #endif | |||
446 | ||||
447 | int | |||
448 | cpu_match(struct device *parent, void *match, void *aux) | |||
449 | { | |||
450 | struct cfdata *cf = match; | |||
451 | struct cpu_attach_args *caa = aux; | |||
452 | ||||
453 | if (strcmp(caa->caa_name, cf->cf_driver->cd_name) != 0) | |||
454 | return 0; | |||
455 | ||||
456 | if (cf->cf_unit >= MAXCPUS64) | |||
457 | return 0; | |||
458 | ||||
459 | return 1; | |||
460 | } | |||
461 | ||||
462 | void cpu_idle_mwait_cycle(void); | |||
463 | void cpu_init_mwait(struct cpu_softc *, struct cpu_info *); | |||
464 | ||||
465 | u_int cpu_mwait_size, cpu_mwait_states; | |||
466 | ||||
467 | void | |||
468 | cpu_idle_mwait_cycle(void) | |||
469 | { | |||
470 | struct cpu_info *ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); | |||
471 | ||||
472 | if ((read_rflags() & PSL_I0x00000200) == 0) | |||
473 | panic("idle with interrupts blocked!"); | |||
474 | ||||
475 | /* something already queued? */ | |||
476 | if (!cpu_is_idle(ci)((ci)->ci_schedstate.spc_whichqs == 0)) | |||
477 | return; | |||
478 | ||||
479 | /* | |||
480 | * About to idle; setting the MWAIT_IN_IDLE bit tells | |||
481 | * cpu_unidle() that it can't be a no-op and tells cpu_kick() | |||
482 | * that it doesn't need to use an IPI. We also set the | |||
483 | * MWAIT_KEEP_IDLING bit: those routines clear it to stop | |||
484 | * the mwait. Once they're set, we do a final check of the | |||
485 | * queue, in case another cpu called setrunqueue() and added | |||
486 | * something to the queue and called cpu_unidle() between | |||
487 | * the check in sched_idle() and here. | |||
488 | */ | |||
489 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_mwait, MWAIT_IDLING(0x1 | 0x2) | MWAIT_ONLY0x4); | |||
490 | if (cpu_is_idle(ci)((ci)->ci_schedstate.spc_whichqs == 0)) { | |||
491 | monitor(&ci->ci_mwait, 0, 0); | |||
492 | if ((ci->ci_mwait & MWAIT_IDLING(0x1 | 0x2)) == MWAIT_IDLING(0x1 | 0x2)) | |||
493 | mwait(0, 0); | |||
494 | } | |||
495 | ||||
496 | /* done idling; let cpu_kick() know that an IPI is required */ | |||
497 | atomic_clearbits_intx86_atomic_clearbits_u32(&ci->ci_mwait, MWAIT_IDLING(0x1 | 0x2)); | |||
498 | } | |||
499 | ||||
500 | void | |||
501 | cpu_init_mwait(struct cpu_softc *sc, struct cpu_info *ci) | |||
502 | { | |||
503 | unsigned int smallest, largest, extensions, c_substates; | |||
504 | ||||
505 | if ((cpu_ecxfeature & CPUIDECX_MWAIT0x00000008) == 0 || cpuid_level < 0x5) | |||
506 | return; | |||
507 | ||||
508 | /* get the monitor granularity */ | |||
509 | CPUID(0x5, smallest, largest, extensions, cpu_mwait_states)__asm volatile("cpuid" : "=a" (smallest), "=b" (largest), "=c" (extensions), "=d" (cpu_mwait_states) : "a" (0x5)); | |||
510 | smallest &= 0xffff; | |||
511 | largest &= 0xffff; | |||
512 | ||||
513 | /* mask out states C6/C7 in 31:24 for CHT45 errata */ | |||
514 | if (strcmp(cpu_vendor, "GenuineIntel") == 0 && | |||
515 | ci->ci_family == 0x06 && ci->ci_model == 0x4c) | |||
516 | cpu_mwait_states &= 0x00ffffff; | |||
517 | ||||
518 | printf("%s: mwait min=%u, max=%u", sc->sc_dev.dv_xname, | |||
519 | smallest, largest); | |||
520 | if (extensions & 0x1) { | |||
521 | if (cpu_mwait_states > 0) { | |||
522 | c_substates = cpu_mwait_states; | |||
523 | printf(", C-substates=%u", 0xf & c_substates); | |||
524 | while ((c_substates >>= 4) > 0) | |||
525 | printf(".%u", 0xf & c_substates); | |||
526 | } | |||
527 | if (extensions & 0x2) | |||
528 | printf(", IBE"); | |||
529 | } else { | |||
530 | /* substates not supported, forge the default: just C1 */ | |||
531 | cpu_mwait_states = 1 << 4; | |||
532 | } | |||
533 | ||||
534 | /* paranoia: check the values */ | |||
535 | if (smallest < sizeof(int) || largest < smallest || | |||
536 | (largest & (sizeof(int)-1))) | |||
537 | printf(" (bogus)"); | |||
538 | else | |||
539 | cpu_mwait_size = largest; | |||
540 | printf("\n"); | |||
541 | ||||
542 | /* enable use of mwait; may be overridden by acpicpu later */ | |||
543 | if (cpu_mwait_size > 0) | |||
544 | cpu_idle_cycle_fcn = &cpu_idle_mwait_cycle; | |||
545 | } | |||
546 | ||||
547 | void | |||
548 | cpu_attach(struct device *parent, struct device *self, void *aux) | |||
549 | { | |||
550 | struct cpu_softc *sc = (void *) self; | |||
551 | struct cpu_attach_args *caa = aux; | |||
552 | struct cpu_info *ci; | |||
553 | #if defined(MULTIPROCESSOR1) | |||
554 | int cpunum = sc->sc_dev.dv_unit; | |||
555 | vaddr_t kstack; | |||
556 | struct pcb *pcb; | |||
557 | #endif | |||
558 | ||||
559 | /* | |||
560 | * If we're an Application Processor, allocate a cpu_info | |||
561 | * structure, otherwise use the primary's. | |||
562 | */ | |||
563 | if (caa->cpu_role == CPU_ROLE_AP2) { | |||
564 | struct cpu_info_full *cif; | |||
565 | ||||
566 | cif = km_alloc(sizeof *cif, &kv_any, &kp_zero, &kd_waitok); | |||
567 | ci = &cif->cif_cpu; | |||
568 | #if defined(MULTIPROCESSOR1) | |||
569 | ci->ci_tss = &cif->cif_tsscif_RO.u_tssgdt.uu_tss; | |||
570 | ci->ci_gdt = &cif->cif_gdtcif_RO.u_tssgdt.uu_gdt; | |||
571 | memcpy(ci->ci_gdt, cpu_info_primary.ci_gdt, GDT_SIZE)__builtin_memcpy((ci->ci_gdt), ((*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096*2 - __builtin_offsetof(struct cpu_info, ci_dev))).ci_gdt), (((6 << 3) + (1 << 4 )))); | |||
572 | cpu_enter_pages(cif); | |||
573 | if (cpu_info[cpunum] != NULL((void *)0)) | |||
574 | panic("cpu at apic id %d already attached?", cpunum); | |||
575 | cpu_info[cpunum] = ci; | |||
576 | #endif | |||
577 | #ifdef TRAPLOG | |||
578 | ci->ci_tlog_base = malloc(sizeof(struct tlog), | |||
579 | M_DEVBUF2, M_WAITOK0x0001); | |||
580 | #endif | |||
581 | } else { | |||
582 | ci = &cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))); | |||
583 | #if defined(MULTIPROCESSOR1) | |||
584 | if (caa->cpu_apicid != lapic_cpu_number()) { | |||
585 | panic("%s: running cpu is at apic %d" | |||
586 | " instead of at expected %d", | |||
587 | sc->sc_dev.dv_xname, lapic_cpu_number(), caa->cpu_apicid); | |||
588 | } | |||
589 | #endif | |||
590 | } | |||
591 | ||||
592 | ci->ci_self = ci; | |||
593 | sc->sc_info = ci; | |||
594 | ||||
595 | ci->ci_dev = self; | |||
596 | ci->ci_apicid = caa->cpu_apicid; | |||
597 | ci->ci_acpi_proc_id = caa->cpu_acpi_proc_id; | |||
598 | #ifdef MULTIPROCESSOR1 | |||
599 | ci->ci_cpuid = cpunum; | |||
600 | #else | |||
601 | ci->ci_cpuid = 0; /* False for APs, but they're not used anyway */ | |||
602 | #endif | |||
603 | ci->ci_func = caa->cpu_func; | |||
604 | ci->ci_handled_intr_level = IPL_NONE0x0; | |||
605 | ||||
606 | #ifndef SMALL_KERNEL | |||
607 | strlcpy(ci->ci_sensordev.xname, ci->ci_dev->dv_xname, | |||
608 | sizeof(ci->ci_sensordev.xname)); | |||
609 | #endif | |||
610 | ||||
611 | #if defined(MULTIPROCESSOR1) | |||
612 | /* | |||
613 | * Allocate UPAGES contiguous pages for the idle PCB and stack. | |||
614 | */ | |||
615 | kstack = (vaddr_t)km_alloc(USPACE(6 * (1 << 12)), &kv_any, &kp_dirty, &kd_nowait); | |||
616 | if (kstack == 0) { | |||
617 | if (caa->cpu_role != CPU_ROLE_AP2) { | |||
618 | panic("cpu_attach: unable to allocate idle stack for" | |||
619 | " primary"); | |||
620 | } | |||
621 | printf("%s: unable to allocate idle stack\n", | |||
622 | sc->sc_dev.dv_xname); | |||
623 | return; | |||
624 | } | |||
625 | pcb = ci->ci_idle_pcb = (struct pcb *) kstack; | |||
626 | memset(pcb, 0, USPACE)__builtin_memset((pcb), (0), ((6 * (1 << 12)))); | |||
627 | ||||
628 | pcb->pcb_kstack = kstack + USPACE(6 * (1 << 12)) - 16; | |||
629 | pcb->pcb_rbp = pcb->pcb_rsp = kstack + USPACE(6 * (1 << 12)) - 16; | |||
630 | pcb->pcb_pmap = pmap_kernel()(&kernel_pmap_store); | |||
631 | pcb->pcb_cr3 = pcb->pcb_pmap->pm_pdirpa; | |||
632 | #endif | |||
633 | ||||
634 | /* further PCB init done later. */ | |||
635 | ||||
636 | printf(": "); | |||
637 | ||||
638 | switch (caa->cpu_role) { | |||
639 | case CPU_ROLE_SP0: | |||
640 | printf("(uniprocessor)\n"); | |||
641 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_flags, | |||
642 | CPUF_PRESENT0x1000 | CPUF_SP0x0004 | CPUF_PRIMARY0x0008); | |||
643 | cpu_intr_init(ci); | |||
644 | #ifndef SMALL_KERNEL | |||
645 | cpu_ucode_apply(ci); | |||
646 | #endif | |||
647 | cpu_tsx_disable(ci); | |||
648 | identifycpu(ci); | |||
649 | cpu_fix_msrs(ci); | |||
650 | #ifdef MTRR1 | |||
651 | mem_range_attach(); | |||
652 | #endif /* MTRR */ | |||
653 | /* XXX SP fpuinit(ci) is done earlier */ | |||
654 | cpu_init(ci); | |||
655 | cpu_init_mwait(sc, ci); | |||
656 | break; | |||
657 | ||||
658 | case CPU_ROLE_BP1: | |||
659 | printf("apid %d (boot processor)\n", caa->cpu_apicid); | |||
660 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_flags, | |||
661 | CPUF_PRESENT0x1000 | CPUF_BSP0x0001 | CPUF_PRIMARY0x0008); | |||
662 | cpu_intr_init(ci); | |||
663 | identifycpu(ci); | |||
664 | cpu_fix_msrs(ci); | |||
665 | #ifdef MTRR1 | |||
666 | mem_range_attach(); | |||
667 | #endif /* MTRR */ | |||
668 | ||||
669 | #if NLAPIC1 > 0 | |||
670 | /* | |||
671 | * Enable local apic | |||
672 | */ | |||
673 | lapic_enable(); | |||
674 | lapic_calibrate_timer(ci); | |||
675 | #endif | |||
676 | /* XXX BP fpuinit(ci) is done earlier */ | |||
677 | cpu_init(ci); | |||
678 | ||||
679 | #if NIOAPIC1 > 0 | |||
680 | ioapic_bsp_id = caa->cpu_apicid; | |||
681 | #endif | |||
682 | cpu_init_mwait(sc, ci); | |||
683 | break; | |||
684 | ||||
685 | case CPU_ROLE_AP2: | |||
686 | /* | |||
687 | * report on an AP | |||
688 | */ | |||
689 | printf("apid %d (application processor)\n", caa->cpu_apicid); | |||
690 | ||||
691 | #if defined(MULTIPROCESSOR1) | |||
692 | cpu_intr_init(ci); | |||
693 | cpu_start_secondary(ci); | |||
694 | clockqueue_init(&ci->ci_queue); | |||
695 | sched_init_cpu(ci); | |||
696 | ncpus++; | |||
697 | if (ci->ci_flags & CPUF_PRESENT0x1000) { | |||
698 | ci->ci_next = cpu_info_list->ci_next; | |||
699 | cpu_info_list->ci_next = ci; | |||
700 | } | |||
701 | #else | |||
702 | printf("%s: not started\n", sc->sc_dev.dv_xname); | |||
703 | #endif | |||
704 | break; | |||
705 | ||||
706 | default: | |||
707 | panic("unknown processor type??"); | |||
708 | } | |||
709 | ||||
710 | #if defined(MULTIPROCESSOR1) | |||
711 | if (mp_verbose) { | |||
712 | printf("%s: kstack at 0x%lx for %d bytes\n", | |||
713 | sc->sc_dev.dv_xname, kstack, USPACE(6 * (1 << 12))); | |||
714 | printf("%s: idle pcb at %p, idle sp at 0x%llx\n", | |||
715 | sc->sc_dev.dv_xname, pcb, pcb->pcb_rsp); | |||
716 | } | |||
717 | #endif | |||
718 | #if NVMM1 > 0 | |||
719 | cpu_init_vmm(ci); | |||
720 | #endif /* NVMM > 0 */ | |||
721 | ||||
722 | #ifndef SMALL_KERNEL | |||
723 | if (ci->ci_sensordev.sensors_count > 0) | |||
724 | sensordev_install(&ci->ci_sensordev); | |||
725 | #endif | |||
726 | } | |||
727 | ||||
728 | static void | |||
729 | replacexsave(int xsave_ext) | |||
730 | { | |||
731 | extern long _xrstor, _xrstors, _xsave, _xsaves, _xsaveopt; | |||
732 | static int replacedone = 0; | |||
733 | int s; | |||
734 | ||||
735 | if (replacedone) | |||
736 | return; | |||
737 | replacedone = 1; | |||
738 | ||||
739 | s = splhigh()splraise(0xd); | |||
740 | codepatch_replace(CPTAG_XRSTORS13, | |||
741 | (xsave_ext & XSAVE_XSAVES0x08UL) ? &_xrstors : &_xrstor, 4); | |||
742 | codepatch_replace(CPTAG_XRSTOR4, &_xrstor, 4); | |||
743 | codepatch_replace(CPTAG_XSAVE5, | |||
744 | (xsave_ext & XSAVE_XSAVES0x08UL) ? &_xsaves : | |||
745 | (xsave_ext & XSAVE_XSAVEOPT0x01UL) ? &_xsaveopt : &_xsave, 4); | |||
746 | splx(s)spllower(s); | |||
747 | } | |||
748 | ||||
749 | ||||
750 | /* | |||
751 | * Initialize the processor appropriately. | |||
752 | */ | |||
753 | ||||
754 | void | |||
755 | cpu_init(struct cpu_info *ci) | |||
756 | { | |||
757 | struct savefpu *sfp; | |||
758 | u_int cr4; | |||
759 | ||||
760 | /* configure the CPU if needed */ | |||
761 | if (ci->cpu_setup != NULL((void *)0)) | |||
762 | (*ci->cpu_setup)(ci); | |||
763 | ||||
764 | cr4 = rcr4() | CR4_DEFAULT(0x00000020|0x00000080|0x00000010|0x00000200|0x00000400); | |||
765 | if (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMEP0x00000080) | |||
766 | cr4 |= CR4_SMEP0x00100000; | |||
767 | if (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMAP0x00100000) | |||
768 | cr4 |= CR4_SMAP0x00200000; | |||
769 | if (ci->ci_feature_sefflags_ecx & SEFF0ECX_UMIP0x00000004) | |||
770 | cr4 |= CR4_UMIP0x00000800; | |||
771 | if ((cpu_ecxfeature & CPUIDECX_XSAVE0x04000000) && cpuid_level >= 0xd) | |||
772 | cr4 |= CR4_OSXSAVE0x00040000; | |||
773 | if (pg_xo) | |||
774 | cr4 |= CR4_PKE0x00400000; | |||
775 | if (pmap_use_pcid) | |||
776 | cr4 |= CR4_PCIDE0x00020000; | |||
777 | lcr4(cr4); | |||
778 | ||||
779 | if ((cpu_ecxfeature & CPUIDECX_XSAVE0x04000000) && cpuid_level >= 0xd) { | |||
780 | u_int32_t eax, ebx, ecx, edx; | |||
781 | ||||
782 | xsave_mask = XFEATURE_X870x00000001 | XFEATURE_SSE0x00000002; | |||
783 | CPUID_LEAF(0xd, 0, eax, ebx, ecx, edx)__asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0xd), "c" (0)); | |||
784 | xsave_mask |= eax & XFEATURE_AVX0x00000004; | |||
785 | xsetbv(0, xsave_mask); | |||
786 | CPUID_LEAF(0xd, 0, eax, ebx, ecx, edx)__asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0xd), "c" (0)); | |||
787 | if (CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008)) { | |||
788 | fpu_save_len = ebx; | |||
789 | KASSERT(fpu_save_len <= sizeof(struct savefpu))((fpu_save_len <= sizeof(struct savefpu)) ? (void)0 : __assert ("diagnostic ", "/usr/src/sys/arch/amd64/amd64/cpu.c", 789, "fpu_save_len <= sizeof(struct savefpu)" )); | |||
790 | } else { | |||
791 | KASSERT(ebx == fpu_save_len)((ebx == fpu_save_len) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/cpu.c" , 791, "ebx == fpu_save_len")); | |||
792 | } | |||
793 | ||||
794 | /* check for xsaves, xsaveopt, and supervisor features */ | |||
795 | CPUID_LEAF(0xd, 1, eax, ebx, ecx, edx)__asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0xd), "c" (1)); | |||
796 | /* Disable XSAVES on AMD family 17h due to Erratum 1386 */ | |||
797 | if (!strcmp(cpu_vendor, "AuthenticAMD") && | |||
798 | ci->ci_family == 0x17) { | |||
799 | eax &= ~XSAVE_XSAVES0x08UL; | |||
800 | } | |||
801 | if (eax & XSAVE_XSAVES0x08UL) { | |||
802 | #ifndef SMALL_KERNEL | |||
803 | if (ci->ci_feature_sefflags_edx & SEFF0EDX_IBT0x00100000) | |||
804 | xsave_mask |= ecx & XFEATURE_CET_U0x00000800; | |||
805 | #endif | |||
806 | if (xsave_mask & XFEATURE_XSS_MASK(0x00000100 | 0x00000400 | (0x00000800 | 0x00001000) | 0x00002000 | 0x00004000 | 0x00008000 | 0x00010000)) { | |||
807 | wrmsr(MSR_XSS0xda0, xsave_mask & XFEATURE_XSS_MASK(0x00000100 | 0x00000400 | (0x00000800 | 0x00001000) | 0x00002000 | 0x00004000 | 0x00008000 | 0x00010000)); | |||
808 | CPUID_LEAF(0xd, 1, eax, ebx, ecx, edx)__asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0xd), "c" (1)); | |||
809 | KASSERT(ebx <= sizeof(struct savefpu))((ebx <= sizeof(struct savefpu)) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/arch/amd64/amd64/cpu.c", 809, "ebx <= sizeof(struct savefpu)" )); | |||
810 | } | |||
811 | if (CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008)) | |||
812 | cpu_use_xsaves = 1; | |||
813 | } | |||
814 | ||||
815 | replacexsave(eax); | |||
816 | } | |||
817 | ||||
818 | if (CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008)) { | |||
819 | /* Clean our FPU save area */ | |||
820 | sfp = fpu_cleandata(&proc0.p_addr->u_pcb.pcb_savefpu); | |||
821 | memset(sfp, 0, fpu_save_len)__builtin_memset((sfp), (0), (fpu_save_len)); | |||
822 | sfp->fp_fxsave.fx_fcw = __INITIAL_NPXCW__0x037f; | |||
823 | sfp->fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__0x1f80; | |||
824 | xrstor_user(sfp, xsave_mask); | |||
825 | if (cpu_use_xsaves || !xsave_mask) | |||
826 | fpusave(sfp); | |||
827 | else { | |||
828 | /* must not use xsaveopt here */ | |||
829 | xsave(sfp, xsave_mask); | |||
830 | } | |||
831 | } else { | |||
832 | fpureset()xrstor_kern((&proc0.p_addr->u_pcb.pcb_savefpu), xsave_mask ); | |||
833 | } | |||
834 | ||||
835 | #if NVMM1 > 0 | |||
836 | /* Re-enable VMM if needed */ | |||
837 | if (ci->ci_flags & CPUF_VMM0x20000) | |||
838 | start_vmm_on_cpu(ci); | |||
839 | #endif /* NVMM > 0 */ | |||
840 | ||||
841 | #ifdef MULTIPROCESSOR1 | |||
842 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_flags, CPUF_RUNNING0x2000); | |||
843 | /* | |||
844 | * Big hammer: flush all TLB entries, including ones from PTEs | |||
845 | * with the G bit set. This should only be necessary if TLB | |||
846 | * shootdown falls far behind. | |||
847 | */ | |||
848 | cr4 = rcr4(); | |||
849 | lcr4(cr4 & ~CR4_PGE0x00000080); | |||
850 | lcr4(cr4); | |||
851 | ||||
852 | /* Check if TSC is synchronized. */ | |||
853 | if (cold && !CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008)) | |||
854 | tsc_test_sync_ap(ci); | |||
855 | #endif | |||
856 | } | |||
857 | ||||
858 | #if NVMM1 > 0 | |||
859 | /* | |||
860 | * cpu_init_vmm | |||
861 | * | |||
862 | * Initializes per-cpu VMM state | |||
863 | * | |||
864 | * Parameters: | |||
865 | * ci: the cpu for which state is being initialized | |||
866 | */ | |||
867 | void | |||
868 | cpu_init_vmm(struct cpu_info *ci) | |||
869 | { | |||
870 | /* | |||
871 | * Allocate a per-cpu VMXON region for VMX CPUs | |||
872 | */ | |||
873 | if (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0)) { | |||
874 | ci->ci_vmxon_region = (struct vmxon_region *)malloc(PAGE_SIZE(1 << 12), | |||
875 | M_DEVBUF2, M_WAITOK0x0001 | M_ZERO0x0008); | |||
876 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), (vaddr_t)ci->ci_vmxon_region, | |||
877 | &ci->ci_vmxon_region_pa)) | |||
878 | panic("Can't locate VMXON region in phys mem"); | |||
879 | ci->ci_vmcs_pa = VMX_VMCS_PA_CLEAR0xFFFFFFFFFFFFFFFFUL; | |||
880 | rw_init(&ci->ci_vmcs_lock, "vmcslock")_rw_init_flags(&ci->ci_vmcs_lock, "vmcslock", 0, ((void *)0)); | |||
881 | } | |||
882 | } | |||
883 | #endif /* NVMM > 0 */ | |||
884 | ||||
885 | #ifdef MULTIPROCESSOR1 | |||
886 | void | |||
887 | cpu_boot_secondary_processors(void) | |||
888 | { | |||
889 | struct cpu_info *ci; | |||
890 | u_long i; | |||
891 | ||||
892 | for (i=0; i < MAXCPUS64; i++) { | |||
893 | ci = cpu_info[i]; | |||
894 | if (ci == NULL((void *)0)) | |||
895 | continue; | |||
896 | if (ci->ci_idle_pcb == NULL((void *)0)) | |||
897 | continue; | |||
898 | if ((ci->ci_flags & CPUF_PRESENT0x1000) == 0) | |||
899 | continue; | |||
900 | if (ci->ci_flags & (CPUF_BSP0x0001 | CPUF_SP0x0004 | CPUF_PRIMARY0x0008)) | |||
901 | continue; | |||
902 | ci->ci_randseed = (arc4random() & 0x7fffffff) + 1; | |||
903 | cpu_boot_secondary(ci); | |||
904 | } | |||
905 | } | |||
906 | ||||
907 | void | |||
908 | cpu_start_secondary(struct cpu_info *ci) | |||
909 | { | |||
910 | int i; | |||
911 | u_long s; | |||
912 | ||||
913 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_flags, CPUF_AP0x0002); | |||
914 | ||||
915 | pmap_kenter_pa(MP_TRAMPOLINE(16 * (1 << 12)), MP_TRAMPOLINE(16 * (1 << 12)), PROT_READ0x01 | PROT_EXEC0x04); | |||
916 | pmap_kenter_pa(MP_TRAMP_DATA(17 * (1 << 12)), MP_TRAMP_DATA(17 * (1 << 12)), PROT_READ0x01 | PROT_WRITE0x02); | |||
917 | ||||
918 | CPU_STARTUP(ci)((ci)->ci_func->start(ci)); | |||
919 | ||||
920 | /* | |||
921 | * wait for it to become ready | |||
922 | */ | |||
923 | for (i = 100000; (!(ci->ci_flags & CPUF_PRESENT0x1000)) && i>0;i--) { | |||
924 | delay(10)(*delay_func)(10); | |||
925 | } | |||
926 | if (! (ci->ci_flags & CPUF_PRESENT0x1000)) { | |||
927 | printf("%s: failed to become ready\n", ci->ci_dev->dv_xname); | |||
928 | #if defined(MPDEBUG) && defined(DDB1) | |||
929 | printf("dropping into debugger; continue from here to resume boot\n"); | |||
930 | db_enter(); | |||
931 | #endif | |||
932 | } | |||
933 | ||||
934 | if ((ci->ci_flags & CPUF_IDENTIFIED0x0020) == 0) { | |||
935 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_flags, CPUF_IDENTIFY0x0010); | |||
936 | ||||
937 | /* wait for it to identify */ | |||
938 | for (i = 2000000; (ci->ci_flags & CPUF_IDENTIFY0x0010) && i > 0; i--) | |||
939 | delay(10)(*delay_func)(10); | |||
940 | ||||
941 | if (ci->ci_flags & CPUF_IDENTIFY0x0010) | |||
942 | printf("%s: failed to identify\n", | |||
943 | ci->ci_dev->dv_xname); | |||
944 | } | |||
945 | ||||
946 | if (ci->ci_flags & CPUF_IDENTIFIED0x0020) { | |||
947 | /* | |||
948 | * Test if TSCs are synchronized. Invalidate cache to | |||
949 | * minimize possible cache effects. Disable interrupts to | |||
950 | * try to rule out external interference. | |||
951 | */ | |||
952 | s = intr_disable(); | |||
953 | wbinvd(); | |||
954 | tsc_test_sync_bp(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})); | |||
955 | intr_restore(s); | |||
956 | } | |||
957 | ||||
958 | CPU_START_CLEANUP(ci)((ci)->ci_func->cleanup(ci)); | |||
959 | ||||
960 | pmap_kremove(MP_TRAMPOLINE(16 * (1 << 12)), PAGE_SIZE(1 << 12)); | |||
961 | pmap_kremove(MP_TRAMP_DATA(17 * (1 << 12)), PAGE_SIZE(1 << 12)); | |||
962 | } | |||
963 | ||||
964 | void | |||
965 | cpu_boot_secondary(struct cpu_info *ci) | |||
966 | { | |||
967 | int i; | |||
968 | u_long s; | |||
969 | ||||
970 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_flags, CPUF_GO0x8000); | |||
971 | ||||
972 | for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING0x2000)) && i>0;i--) { | |||
973 | delay(10)(*delay_func)(10); | |||
974 | } | |||
975 | if (! (ci->ci_flags & CPUF_RUNNING0x2000)) { | |||
976 | printf("cpu failed to start\n"); | |||
977 | #if defined(MPDEBUG) && defined(DDB1) | |||
978 | printf("dropping into debugger; continue from here to resume boot\n"); | |||
979 | db_enter(); | |||
980 | #endif | |||
981 | } else if (cold) { | |||
982 | /* Test if TSCs are synchronized again. */ | |||
983 | s = intr_disable(); | |||
984 | wbinvd(); | |||
985 | tsc_test_sync_bp(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})); | |||
986 | intr_restore(s); | |||
987 | } | |||
988 | } | |||
989 | ||||
990 | /* | |||
991 | * The CPU ends up here when it's ready to run | |||
992 | * This is called from code in mptramp.s; at this point, we are running | |||
993 | * in the idle pcb/idle stack of the new cpu. When this function returns, | |||
994 | * this processor will enter the idle loop and start looking for work. | |||
995 | * | |||
996 | * XXX should share some of this with init386 in machdep.c | |||
997 | */ | |||
998 | void | |||
999 | cpu_hatch(void *v) | |||
1000 | { | |||
1001 | struct cpu_info *ci = (struct cpu_info *)v; | |||
1002 | int s; | |||
1003 | ||||
1004 | cpu_init_msrs(ci); | |||
1005 | ||||
1006 | #ifdef DEBUG | |||
1007 | if (ci->ci_flags & CPUF_PRESENT0x1000) | |||
1008 | panic("%s: already running!?", ci->ci_dev->dv_xname); | |||
1009 | #endif | |||
1010 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_flags, CPUF_PRESENT0x1000); | |||
1011 | ||||
1012 | lapic_enable(); | |||
1013 | cpu_ucode_apply(ci); | |||
1014 | cpu_tsx_disable(ci); | |||
1015 | ||||
1016 | if ((ci->ci_flags & CPUF_IDENTIFIED0x0020) == 0) { | |||
1017 | /* | |||
1018 | * We need to wait until we can identify, otherwise dmesg | |||
1019 | * output will be messy. | |||
1020 | */ | |||
1021 | while ((ci->ci_flags & CPUF_IDENTIFY0x0010) == 0) | |||
1022 | delay(10)(*delay_func)(10); | |||
1023 | ||||
1024 | identifycpu(ci); | |||
1025 | ||||
1026 | /* Prevent identifycpu() from running again */ | |||
1027 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_flags, CPUF_IDENTIFIED0x0020); | |||
1028 | ||||
1029 | /* Signal we're done */ | |||
1030 | atomic_clearbits_intx86_atomic_clearbits_u32(&ci->ci_flags, CPUF_IDENTIFY0x0010); | |||
1031 | } | |||
1032 | ||||
1033 | /* These have to run after identifycpu() */ | |||
1034 | cpu_fix_msrs(ci); | |||
1035 | ||||
1036 | /* | |||
1037 | * Test if our TSC is synchronized for the first time. | |||
1038 | * Note that interrupts are off at this point. | |||
1039 | */ | |||
1040 | wbinvd(); | |||
1041 | tsc_test_sync_ap(ci); | |||
1042 | ||||
1043 | while ((ci->ci_flags & CPUF_GO0x8000) == 0) | |||
1044 | delay(10)(*delay_func)(10); | |||
1045 | #ifdef HIBERNATE1 | |||
1046 | if ((ci->ci_flags & CPUF_PARK0x10000) != 0) { | |||
1047 | if (ci->ci_feature_sefflags_edx & SEFF0EDX_IBT0x00100000) | |||
1048 | lcr4(rcr4() & ~CR4_CET0x00800000); | |||
1049 | atomic_clearbits_intx86_atomic_clearbits_u32(&ci->ci_flags, CPUF_PARK0x10000); | |||
1050 | hibernate_drop_to_real_mode(); | |||
1051 | } | |||
1052 | #endif /* HIBERNATE */ | |||
1053 | ||||
1054 | #ifdef DEBUG | |||
1055 | if (ci->ci_flags & CPUF_RUNNING0x2000) | |||
1056 | panic("%s: already running!?", ci->ci_dev->dv_xname); | |||
1057 | #endif | |||
1058 | ||||
1059 | cpu_init_idt(); | |||
1060 | lapic_set_lvt(); | |||
1061 | gdt_init_cpu(ci); | |||
1062 | fpuinit(ci); | |||
1063 | ||||
1064 | lldt(0); | |||
1065 | ||||
1066 | cpu_init(ci); | |||
1067 | #if NPVBUS1 > 0 | |||
1068 | pvbus_init_cpu(); | |||
1069 | #endif | |||
1070 | ||||
1071 | /* Re-initialise memory range handling on AP */ | |||
1072 | if (mem_range_softc.mr_op != NULL((void *)0)) | |||
1073 | mem_range_softc.mr_op->initAP(&mem_range_softc); | |||
1074 | ||||
1075 | s = splhigh()splraise(0xd); | |||
1076 | lcr8(0); | |||
1077 | intr_enable(); | |||
1078 | splx(s)spllower(s); | |||
1079 | ||||
1080 | lapic_startclock(); | |||
1081 | ||||
1082 | sched_toidle(); | |||
1083 | } | |||
1084 | ||||
1085 | #if defined(DDB1) | |||
1086 | ||||
1087 | #include <ddb/db_output.h> | |||
1088 | #include <machine/db_machdep.h> | |||
1089 | ||||
1090 | /* | |||
1091 | * Dump cpu information from ddb. | |||
1092 | */ | |||
1093 | void | |||
1094 | cpu_debug_dump(void) | |||
1095 | { | |||
1096 | struct cpu_info *ci; | |||
1097 | CPU_INFO_ITERATORint cii; | |||
1098 | ||||
1099 | db_printf("addr dev id flags ipis curproc\n"); | |||
1100 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
1101 | db_printf("%p %s %u %x %x %10p\n", | |||
1102 | ci, | |||
1103 | ci->ci_dev == NULL((void *)0) ? "BOOT" : ci->ci_dev->dv_xname, | |||
1104 | ci->ci_cpuid, | |||
1105 | ci->ci_flags, ci->ci_ipis, | |||
1106 | ci->ci_curproc); | |||
1107 | } | |||
1108 | } | |||
1109 | #endif | |||
1110 | ||||
1111 | int | |||
1112 | mp_cpu_start(struct cpu_info *ci) | |||
1113 | { | |||
1114 | unsigned short dwordptr[2]; | |||
1115 | ||||
1116 | /* | |||
1117 | * "The BSP must initialize CMOS shutdown code to 0Ah ..." | |||
1118 | */ | |||
1119 | ||||
1120 | outb(IO_RTC, NVRAM_RESET)( (__builtin_constant_p((0x070)) && (0x070) < 0x100 ) ? __outbc(0x070, (0xe + 1)) : __outb(0x070, (0xe + 1))); | |||
1121 | outb(IO_RTC+1, NVRAM_RESET_JUMP)( (__builtin_constant_p((0x070 +1)) && (0x070 +1) < 0x100) ? __outbc(0x070 +1, 0x0a) : __outb(0x070 +1, 0x0a)); | |||
1122 | ||||
1123 | /* | |||
1124 | * "and the warm reset vector (DWORD based at 40:67) to point | |||
1125 | * to the AP startup code ..." | |||
1126 | */ | |||
1127 | ||||
1128 | dwordptr[0] = 0; | |||
1129 | dwordptr[1] = MP_TRAMPOLINE(16 * (1 << 12)) >> 4; | |||
1130 | ||||
1131 | pmap_kenter_pa(0, 0, PROT_READ0x01 | PROT_WRITE0x02); | |||
1132 | memcpy((u_int8_t *) 0x467, dwordptr, 4)__builtin_memcpy(((u_int8_t *) 0x467), (dwordptr), (4)); | |||
1133 | pmap_kremove(0, PAGE_SIZE(1 << 12)); | |||
1134 | ||||
1135 | #if NLAPIC1 > 0 | |||
1136 | /* | |||
1137 | * ... prior to executing the following sequence:" | |||
1138 | */ | |||
1139 | ||||
1140 | if (ci->ci_flags & CPUF_AP0x0002) { | |||
1141 | x86_ipi_init(ci->ci_apicid); | |||
1142 | ||||
1143 | delay(10000)(*delay_func)(10000); | |||
1144 | ||||
1145 | if (cpu_feature & CPUID_APIC0x00000200) { | |||
1146 | x86_ipi(MP_TRAMPOLINE(16 * (1 << 12))/PAGE_SIZE(1 << 12), ci->ci_apicid, | |||
1147 | LAPIC_DLMODE_STARTUP0x00000600); | |||
1148 | delay(200)(*delay_func)(200); | |||
1149 | ||||
1150 | x86_ipi(MP_TRAMPOLINE(16 * (1 << 12))/PAGE_SIZE(1 << 12), ci->ci_apicid, | |||
1151 | LAPIC_DLMODE_STARTUP0x00000600); | |||
1152 | delay(200)(*delay_func)(200); | |||
1153 | } | |||
1154 | } | |||
1155 | #endif | |||
1156 | return 0; | |||
1157 | } | |||
1158 | ||||
1159 | void | |||
1160 | mp_cpu_start_cleanup(struct cpu_info *ci) | |||
1161 | { | |||
1162 | /* | |||
1163 | * Ensure the NVRAM reset byte contains something vaguely sane. | |||
1164 | */ | |||
1165 | ||||
1166 | outb(IO_RTC, NVRAM_RESET)( (__builtin_constant_p((0x070)) && (0x070) < 0x100 ) ? __outbc(0x070, (0xe + 1)) : __outb(0x070, (0xe + 1))); | |||
1167 | outb(IO_RTC+1, NVRAM_RESET_RST)( (__builtin_constant_p((0x070 +1)) && (0x070 +1) < 0x100) ? __outbc(0x070 +1, 0x00) : __outb(0x070 +1, 0x00)); | |||
1168 | } | |||
1169 | #endif /* MULTIPROCESSOR */ | |||
1170 | ||||
1171 | typedef void (vector)(void); | |||
1172 | extern vector Xsyscall_meltdown, Xsyscall, Xsyscall32; | |||
1173 | ||||
1174 | void | |||
1175 | cpu_init_msrs(struct cpu_info *ci) | |||
1176 | { | |||
1177 | wrmsr(MSR_STAR0xc0000081, | |||
1178 | ((uint64_t)GSEL(GCODE_SEL, SEL_KPL)(((1) << 3) | 0) << 32) | | |||
1179 | ((uint64_t)GSEL(GUCODE32_SEL, SEL_UPL)(((3) << 3) | 3) << 48)); | |||
1180 | wrmsr(MSR_LSTAR0xc0000082, cpu_meltdown ? (uint64_t)Xsyscall_meltdown : | |||
1181 | (uint64_t)Xsyscall); | |||
1182 | wrmsr(MSR_CSTAR0xc0000083, (uint64_t)Xsyscall32); | |||
1183 | wrmsr(MSR_SFMASK0xc0000084, PSL_NT0x00004000|PSL_T0x00000100|PSL_I0x00000200|PSL_C0x00000001|PSL_D0x00000400|PSL_AC0x00040000); | |||
1184 | ||||
1185 | wrmsr(MSR_FSBASE0xc0000100, 0); | |||
1186 | wrmsr(MSR_GSBASE0xc0000101, (u_int64_t)ci); | |||
1187 | wrmsr(MSR_KERNELGSBASE0xc0000102, 0); | |||
1188 | patinit(ci); | |||
1189 | } | |||
1190 | ||||
1191 | void | |||
1192 | cpu_fix_msrs(struct cpu_info *ci) | |||
1193 | { | |||
1194 | int family = ci->ci_family; | |||
1195 | uint64_t msr, nmsr; | |||
1196 | ||||
1197 | if (!strcmp(cpu_vendor, "GenuineIntel")) { | |||
1198 | if ((family > 6 || (family == 6 && ci->ci_model >= 0xd)) && | |||
1199 | rdmsr_safe(MSR_MISC_ENABLE0x1a0, &msr) == 0 && | |||
1200 | (msr & MISC_ENABLE_FAST_STRINGS(1 << 0)) == 0) { | |||
1201 | msr |= MISC_ENABLE_FAST_STRINGS(1 << 0); | |||
1202 | wrmsr(MSR_MISC_ENABLE0x1a0, msr); | |||
1203 | DPRINTF("%s: enabled fast strings\n", ci->ci_dev->dv_xname); | |||
1204 | ||||
1205 | /* | |||
1206 | * Attempt to disable Silicon Debug and lock the configuration | |||
1207 | * if it's enabled and unlocked. | |||
1208 | */ | |||
1209 | if (cpu_ecxfeature & CPUIDECX_SDBG0x00000800) { | |||
1210 | msr = rdmsr(IA32_DEBUG_INTERFACE0xc80); | |||
1211 | if ((msr & IA32_DEBUG_INTERFACE_ENABLE0x00000001) && | |||
1212 | (msr & IA32_DEBUG_INTERFACE_LOCK0x40000000) == 0) { | |||
1213 | msr &= IA32_DEBUG_INTERFACE_MASK0x80000000; | |||
1214 | msr |= IA32_DEBUG_INTERFACE_LOCK0x40000000; | |||
1215 | wrmsr(IA32_DEBUG_INTERFACE0xc80, msr); | |||
1216 | } else if (msr & IA32_DEBUG_INTERFACE_ENABLE0x00000001) | |||
1217 | printf("%s: cannot disable silicon debug\n", | |||
1218 | ci->ci_dev->dv_xname); | |||
1219 | } | |||
1220 | } | |||
1221 | } | |||
1222 | ||||
1223 | if (!strcmp(cpu_vendor, "AuthenticAMD")) { | |||
1224 | /* Apply AMD errata */ | |||
1225 | amd64_errata(ci); | |||
1226 | ||||
1227 | /* | |||
1228 | * "Mitigation G-2" per AMD's Whitepaper "Software Techniques | |||
1229 | * for Managing Speculation on AMD Processors" | |||
1230 | * | |||
1231 | * By setting MSR C001_1029[1]=1, LFENCE becomes a dispatch | |||
1232 | * serializing instruction. | |||
1233 | * | |||
1234 | * This MSR is available on all AMD families >= 10h, except 11h | |||
1235 | * where LFENCE is always serializing. | |||
1236 | */ | |||
1237 | if (family >= 0x10 && family != 0x11) { | |||
1238 | nmsr = msr = rdmsr(MSR_DE_CFG0xc0011029); | |||
1239 | nmsr |= DE_CFG_SERIALIZE_LFENCE(1 << 1); | |||
1240 | if (msr != nmsr) | |||
1241 | wrmsr(MSR_DE_CFG0xc0011029, nmsr); | |||
1242 | } | |||
1243 | if (family == 0x17 && ci->ci_model >= 0x31 && | |||
1244 | (cpu_ecxfeature & CPUIDECX_HV0x80000000) == 0) { | |||
1245 | nmsr = msr = rdmsr(MSR_DE_CFG0xc0011029); | |||
1246 | nmsr |= DE_CFG_SERIALIZE_9(1 << 9); | |||
1247 | if (msr != nmsr) | |||
1248 | wrmsr(MSR_DE_CFG0xc0011029, nmsr); | |||
1249 | } | |||
1250 | } | |||
1251 | ||||
1252 | #ifndef SMALL_KERNEL | |||
1253 | if (ci->ci_feature_sefflags_edx & SEFF0EDX_IBT0x00100000) { | |||
1254 | msr = rdmsr(MSR_S_CET0x6a2); | |||
1255 | wrmsr(MSR_S_CET0x6a2, msr | MSR_CET_ENDBR_EN(1 << 2)); | |||
1256 | lcr4(rcr4() | CR4_CET0x00800000); | |||
1257 | } | |||
1258 | #endif | |||
1259 | } | |||
1260 | ||||
1261 | void | |||
1262 | cpu_tsx_disable(struct cpu_info *ci) | |||
1263 | { | |||
1264 | uint64_t msr; | |||
1265 | uint32_t dummy, sefflags_edx; | |||
1266 | ||||
1267 | /* this runs before identifycpu() populates ci_feature_sefflags_edx */ | |||
1268 | if (cpuid_level < 0x07) | |||
1269 | return; | |||
1270 | CPUID_LEAF(0x7, 0, dummy, dummy, dummy, sefflags_edx)__asm volatile("cpuid" : "=a" (dummy), "=b" (dummy), "=c" (dummy ), "=d" (sefflags_edx) : "a" (0x7), "c" (0)); | |||
1271 | ||||
1272 | if (strcmp(cpu_vendor, "GenuineIntel") == 0 && | |||
1273 | (sefflags_edx & SEFF0EDX_ARCH_CAP0x20000000)) { | |||
1274 | msr = rdmsr(MSR_ARCH_CAPABILITIES0x10a); | |||
1275 | if (msr & ARCH_CAP_TSX_CTRL(1 << 7)) { | |||
1276 | msr = rdmsr(MSR_TSX_CTRL0x122); | |||
1277 | msr |= TSX_CTRL_RTM_DISABLE(1ULL << 0) | TSX_CTRL_TSX_CPUID_CLEAR(1ULL << 1); | |||
1278 | wrmsr(MSR_TSX_CTRL0x122, msr); | |||
1279 | } | |||
1280 | } | |||
1281 | } | |||
1282 | ||||
1283 | void | |||
1284 | patinit(struct cpu_info *ci) | |||
1285 | { | |||
1286 | extern int pmap_pg_wc; | |||
1287 | u_int64_t reg; | |||
1288 | ||||
1289 | if ((cpu_feature & CPUID_PAT0x00010000) == 0) | |||
1290 | return; | |||
1291 | /* | |||
1292 | * Set up PAT bits. | |||
1293 | * The default pat table is the following: | |||
1294 | * WB, WT, UC-, UC, WB, WT, UC-, UC | |||
1295 | * We change it to: | |||
1296 | * WB, WC, UC-, UC, WB, WC, UC-, UC | |||
1297 | * i.e change the WT bit to be WC. | |||
1298 | */ | |||
1299 | reg = PATENTRY(0, PAT_WB)(0x6UL << ((0) * 8)) | PATENTRY(1, PAT_WC)(0x1UL << ((1) * 8)) | | |||
1300 | PATENTRY(2, PAT_UCMINUS)(0x7UL << ((2) * 8)) | PATENTRY(3, PAT_UC)(0x0UL << ((3) * 8)) | | |||
1301 | PATENTRY(4, PAT_WB)(0x6UL << ((4) * 8)) | PATENTRY(5, PAT_WC)(0x1UL << ((5) * 8)) | | |||
1302 | PATENTRY(6, PAT_UCMINUS)(0x7UL << ((6) * 8)) | PATENTRY(7, PAT_UC)(0x0UL << ((7) * 8)); | |||
1303 | ||||
1304 | wrmsr(MSR_CR_PAT0x277, reg); | |||
1305 | pmap_pg_wc = PG_WC(0x0000000000000008UL); | |||
1306 | } | |||
1307 | ||||
1308 | struct timeout rdrand_tmo; | |||
1309 | void rdrand(void *); | |||
1310 | ||||
1311 | void | |||
1312 | rdrand(void *v) | |||
1313 | { | |||
1314 | struct timeout *tmo = v; | |||
1315 | extern int has_rdrand, has_rdseed; | |||
1316 | union { | |||
1317 | uint64_t u64; | |||
1318 | uint32_t u32[2]; | |||
1319 | } r, t; | |||
1320 | uint64_t tsc; | |||
1321 | uint8_t valid = 0; | |||
1322 | ||||
1323 | tsc = rdtsc(); | |||
1324 | if (has_rdseed) | |||
1325 | __asm volatile( | |||
1326 | "rdseed %0\n\t" | |||
1327 | "setc %1\n" | |||
1328 | : "=r" (r.u64), "=qm" (valid) ); | |||
1329 | if (has_rdrand && (has_rdseed == 0 || valid == 0)) | |||
1330 | __asm volatile( | |||
1331 | "rdrand %0\n\t" | |||
1332 | "setc %1\n" | |||
1333 | : "=r" (r.u64), "=qm" (valid) ); | |||
1334 | ||||
1335 | t.u64 = tsc; | |||
1336 | t.u64 ^= r.u64; | |||
| ||||
1337 | t.u64 ^= valid; /* potential rdrand empty */ | |||
1338 | if (has_rdrand) | |||
1339 | t.u64 += rdtsc(); /* potential vmexit latency */ | |||
1340 | ||||
1341 | enqueue_randomness(t.u32[0]); | |||
1342 | enqueue_randomness(t.u32[1]); | |||
1343 | ||||
1344 | if (tmo) | |||
1345 | timeout_add_msec(tmo, 10); | |||
1346 | } | |||
1347 | ||||
1348 | int | |||
1349 | cpu_activate(struct device *self, int act) | |||
1350 | { | |||
1351 | struct cpu_softc *sc = (struct cpu_softc *)self; | |||
1352 | ||||
1353 | switch (act) { | |||
| ||||
1354 | case DVACT_RESUME4: | |||
1355 | if (sc->sc_info->ci_cpuid == 0) | |||
1356 | rdrand(NULL((void *)0)); | |||
1357 | #if NPCTR1 > 0 | |||
1358 | pctr_resume(sc->sc_info); | |||
1359 | #endif | |||
1360 | break; | |||
1361 | } | |||
1362 | ||||
1363 | return (0); | |||
1364 | } | |||
1365 | ||||
1366 | /* | |||
1367 | * cpu_enter_pages | |||
1368 | * | |||
1369 | * Requests mapping of various special pages required in the Intel Meltdown | |||
1370 | * case (to be entered into the U-K page table): | |||
1371 | * | |||
1372 | * 1 tss+gdt page for each CPU | |||
1373 | * 1 trampoline stack page for each CPU | |||
1374 | * | |||
1375 | * The cpu_info_full struct for each CPU straddles these pages. The offset into | |||
1376 | * 'cif' is calculated below, for each page. For more information, consult | |||
1377 | * the definition of struct cpu_info_full in cpu_full.h | |||
1378 | * | |||
1379 | * On CPUs unaffected by Meltdown, this function still configures 'cif' but | |||
1380 | * the calls to pmap_enter_special become no-ops. | |||
1381 | * | |||
1382 | * Parameters: | |||
1383 | * cif : the cpu_info_full structure describing a CPU whose pages are to be | |||
1384 | * entered into the special meltdown U-K page table. | |||
1385 | */ | |||
1386 | void | |||
1387 | cpu_enter_pages(struct cpu_info_full *cif) | |||
1388 | { | |||
1389 | vaddr_t va; | |||
1390 | paddr_t pa; | |||
1391 | ||||
1392 | /* The TSS+GDT need to be readable */ | |||
1393 | va = (vaddr_t)cif; | |||
1394 | pmap_extract(pmap_kernel()(&kernel_pmap_store), va, &pa); | |||
1395 | pmap_enter_special(va, pa, PROT_READ0x01); | |||
1396 | DPRINTF("%s: entered tss+gdt page at va 0x%llx pa 0x%llx\n", __func__, | |||
1397 | (uint64_t)va, (uint64_t)pa); | |||
1398 | ||||
1399 | /* The trampoline stack page needs to be read/write */ | |||
1400 | va = (vaddr_t)&cif->cif_tramp_stack; | |||
1401 | pmap_extract(pmap_kernel()(&kernel_pmap_store), va, &pa); | |||
1402 | pmap_enter_special(va, pa, PROT_READ0x01 | PROT_WRITE0x02); | |||
1403 | DPRINTF("%s: entered t.stack page at va 0x%llx pa 0x%llx\n", __func__, | |||
1404 | (uint64_t)va, (uint64_t)pa); | |||
1405 | ||||
1406 | cif->cif_tsscif_RO.u_tssgdt.uu_tss.tss_rsp0 = va + sizeof(cif->cif_tramp_stack) - 16; | |||
1407 | DPRINTF("%s: cif_tss.tss_rsp0 = 0x%llx\n" ,__func__, | |||
1408 | (uint64_t)cif->cif_tss.tss_rsp0); | |||
1409 | cif->cif_cpu.ci_intr_rsp = cif->cif_tsscif_RO.u_tssgdt.uu_tss.tss_rsp0 - | |||
1410 | sizeof(struct iretq_frame); | |||
1411 | ||||
1412 | #define SETUP_IST_SPECIAL_STACK(ist, cif, member)do { (cif)->cif_RO.u_tssgdt.uu_tss.tss_ist[(ist)] = (vaddr_t )&(cif)->member + sizeof((cif)->member) - 16; (cif) ->member[(sizeof(((cif)->member)) / sizeof(((cif)->member )[0])) - 2] = (int64_t)&(cif)->cif_cpu; } while (0) do { \ | |||
1413 | (cif)->cif_tsscif_RO.u_tssgdt.uu_tss.tss_ist[(ist)] = (vaddr_t)&(cif)->member + \ | |||
1414 | sizeof((cif)->member) - 16; \ | |||
1415 | (cif)->member[nitems((cif)->member)(sizeof(((cif)->member)) / sizeof(((cif)->member)[0])) - 2] = (int64_t)&(cif)->cif_cpu; \ | |||
1416 | } while (0) | |||
1417 | ||||
1418 | SETUP_IST_SPECIAL_STACK(0, cif, cif_dblflt_stack)do { (cif)->cif_RO.u_tssgdt.uu_tss.tss_ist[(0)] = (vaddr_t )&(cif)->cif_dblflt_stack + sizeof((cif)->cif_dblflt_stack ) - 16; (cif)->cif_dblflt_stack[(sizeof(((cif)->cif_dblflt_stack )) / sizeof(((cif)->cif_dblflt_stack)[0])) - 2] = (int64_t )&(cif)->cif_cpu; } while (0); | |||
1419 | SETUP_IST_SPECIAL_STACK(1, cif, cif_nmi_stack)do { (cif)->cif_RO.u_tssgdt.uu_tss.tss_ist[(1)] = (vaddr_t )&(cif)->cif_nmi_stack + sizeof((cif)->cif_nmi_stack ) - 16; (cif)->cif_nmi_stack[(sizeof(((cif)->cif_nmi_stack )) / sizeof(((cif)->cif_nmi_stack)[0])) - 2] = (int64_t)& (cif)->cif_cpu; } while (0); | |||
1420 | ||||
1421 | /* an empty iomap, by setting its offset to the TSS limit */ | |||
1422 | cif->cif_tsscif_RO.u_tssgdt.uu_tss.tss_iobase = sizeof(cif->cif_tsscif_RO.u_tssgdt.uu_tss); | |||
1423 | } | |||
1424 | ||||
1425 | #ifdef MULTIPROCESSOR1 | |||
1426 | int | |||
1427 | wbinvd_on_all_cpus(void) | |||
1428 | { | |||
1429 | x86_broadcast_ipi(X86_IPI_WBINVD0x00000400); | |||
1430 | wbinvd(); | |||
1431 | return 0; | |||
1432 | } | |||
1433 | #endif |