| File: | arch/amd64/amd64/cpu.c |
| Warning: | line 1336, column 8 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* $OpenBSD: cpu.c,v 1.177 2023/11/22 18:50:10 bluhm Exp $ */ | |||
| 2 | /* $NetBSD: cpu.c,v 1.1 2003/04/26 18:39:26 fvdl Exp $ */ | |||
| 3 | ||||
| 4 | /*- | |||
| 5 | * Copyright (c) 2000 The NetBSD Foundation, Inc. | |||
| 6 | * All rights reserved. | |||
| 7 | * | |||
| 8 | * This code is derived from software contributed to The NetBSD Foundation | |||
| 9 | * by RedBack Networks Inc. | |||
| 10 | * | |||
| 11 | * Author: Bill Sommerfeld | |||
| 12 | * | |||
| 13 | * Redistribution and use in source and binary forms, with or without | |||
| 14 | * modification, are permitted provided that the following conditions | |||
| 15 | * are met: | |||
| 16 | * 1. Redistributions of source code must retain the above copyright | |||
| 17 | * notice, this list of conditions and the following disclaimer. | |||
| 18 | * 2. Redistributions in binary form must reproduce the above copyright | |||
| 19 | * notice, this list of conditions and the following disclaimer in the | |||
| 20 | * documentation and/or other materials provided with the distribution. | |||
| 21 | * | |||
| 22 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |||
| 23 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |||
| 24 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |||
| 25 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |||
| 26 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
| 27 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
| 28 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
| 29 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
| 30 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
| 31 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||
| 32 | * POSSIBILITY OF SUCH DAMAGE. | |||
| 33 | */ | |||
| 34 | ||||
| 35 | /* | |||
| 36 | * Copyright (c) 1999 Stefan Grefen | |||
| 37 | * | |||
| 38 | * Redistribution and use in source and binary forms, with or without | |||
| 39 | * modification, are permitted provided that the following conditions | |||
| 40 | * are met: | |||
| 41 | * 1. Redistributions of source code must retain the above copyright | |||
| 42 | * notice, this list of conditions and the following disclaimer. | |||
| 43 | * 2. Redistributions in binary form must reproduce the above copyright | |||
| 44 | * notice, this list of conditions and the following disclaimer in the | |||
| 45 | * documentation and/or other materials provided with the distribution. | |||
| 46 | * 3. All advertising materials mentioning features or use of this software | |||
| 47 | * must display the following acknowledgement: | |||
| 48 | * This product includes software developed by the NetBSD | |||
| 49 | * Foundation, Inc. and its contributors. | |||
| 50 | * 4. Neither the name of The NetBSD Foundation nor the names of its | |||
| 51 | * contributors may be used to endorse or promote products derived | |||
| 52 | * from this software without specific prior written permission. | |||
| 53 | * | |||
| 54 | * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY | |||
| 55 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| 56 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| 57 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE | |||
| 58 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| 59 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||
| 60 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||
| 61 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||
| 62 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |||
| 63 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||
| 64 | * SUCH DAMAGE. | |||
| 65 | */ | |||
| 66 | ||||
| 67 | #include "lapic.h" | |||
| 68 | #include "ioapic.h" | |||
| 69 | #include "vmm.h" | |||
| 70 | #include "pctr.h" | |||
| 71 | #include "pvbus.h" | |||
| 72 | ||||
| 73 | #include <sys/param.h> | |||
| 74 | #include <sys/proc.h> | |||
| 75 | #include <sys/timeout.h> | |||
| 76 | #include <sys/systm.h> | |||
| 77 | #include <sys/device.h> | |||
| 78 | #include <sys/malloc.h> | |||
| 79 | #include <sys/memrange.h> | |||
| 80 | #include <sys/atomic.h> | |||
| 81 | #include <sys/user.h> | |||
| 82 | ||||
| 83 | #include <uvm/uvm_extern.h> | |||
| 84 | ||||
| 85 | #include <machine/codepatch.h> | |||
| 86 | #include <machine/cpu_full.h> | |||
| 87 | #include <machine/cpufunc.h> | |||
| 88 | #include <machine/cpuvar.h> | |||
| 89 | #include <machine/pmap.h> | |||
| 90 | #include <machine/vmparam.h> | |||
| 91 | #include <machine/mpbiosvar.h> | |||
| 92 | #include <machine/pcb.h> | |||
| 93 | #include <machine/specialreg.h> | |||
| 94 | #include <machine/segments.h> | |||
| 95 | #include <machine/gdt.h> | |||
| 96 | #include <machine/pio.h> | |||
| 97 | #include <machine/vmmvar.h> | |||
| 98 | ||||
| 99 | #if NLAPIC1 > 0 | |||
| 100 | #include <machine/i82489reg.h> | |||
| 101 | #include <machine/i82489var.h> | |||
| 102 | #endif | |||
| 103 | ||||
| 104 | #if NIOAPIC1 > 0 | |||
| 105 | #include <machine/i82093var.h> | |||
| 106 | #endif | |||
| 107 | ||||
| 108 | #if NPCTR1 > 0 | |||
| 109 | #include <machine/pctr.h> | |||
| 110 | #endif | |||
| 111 | ||||
| 112 | #if NPVBUS1 > 0 | |||
| 113 | #include <dev/pv/pvvar.h> | |||
| 114 | #endif | |||
| 115 | ||||
| 116 | #include <dev/ic/mc146818reg.h> | |||
| 117 | #include <amd64/isa/nvram.h> | |||
| 118 | #include <dev/isa/isareg.h> | |||
| 119 | ||||
| 120 | #ifdef HIBERNATE1 | |||
| 121 | #include <sys/hibernate.h> | |||
| 122 | #include <machine/hibernate.h> | |||
| 123 | #endif /* HIBERNATE */ | |||
| 124 | ||||
| 125 | /* #define CPU_DEBUG */ | |||
| 126 | ||||
| 127 | #ifdef CPU_DEBUG | |||
| 128 | #define DPRINTF(x...) do { printf(x); } while(0) | |||
| 129 | #else | |||
| 130 | #define DPRINTF(x...) | |||
| 131 | #endif /* CPU_DEBUG */ | |||
| 132 | ||||
| 133 | int cpu_match(struct device *, void *, void *); | |||
| 134 | void cpu_attach(struct device *, struct device *, void *); | |||
| 135 | int cpu_activate(struct device *, int); | |||
| 136 | void patinit(struct cpu_info *ci); | |||
| 137 | #if NVMM1 > 0 | |||
| 138 | void cpu_init_vmm(struct cpu_info *ci); | |||
| 139 | #endif /* NVMM > 0 */ | |||
| 140 | ||||
| 141 | struct cpu_softc { | |||
| 142 | struct device sc_dev; /* device tree glue */ | |||
| 143 | struct cpu_info *sc_info; /* pointer to CPU info */ | |||
| 144 | }; | |||
| 145 | ||||
| 146 | void replacesmap(void); | |||
| 147 | void replacemeltdown(void); | |||
| 148 | void replacemds(void); | |||
| 149 | ||||
| 150 | extern long _stac; | |||
| 151 | extern long _clac; | |||
| 152 | ||||
| 153 | int cpuid_level = 0; /* cpuid(0).eax */ | |||
| 154 | char cpu_vendor[16] = { 0 }; /* cpuid(0).e[bdc]x, \0 */ | |||
| 155 | int cpu_id = 0; /* cpuid(1).eax */ | |||
| 156 | int cpu_ebxfeature = 0; /* cpuid(1).ebx */ | |||
| 157 | int cpu_ecxfeature = 0; /* cpuid(1).ecx */ | |||
| 158 | int cpu_feature = 0; /* cpuid(1).edx */ | |||
| 159 | int cpu_perf_eax = 0; /* cpuid(0xa).eax */ | |||
| 160 | int cpu_perf_ebx = 0; /* cpuid(0xa).ebx */ | |||
| 161 | int cpu_perf_edx = 0; /* cpuid(0xa).edx */ | |||
| 162 | int cpu_apmi_edx = 0; /* cpuid(0x80000007).edx */ | |||
| 163 | int ecpu_ecxfeature = 0; /* cpuid(0x80000001).ecx */ | |||
| 164 | int cpu_meltdown = 0; | |||
| 165 | int cpu_use_xsaves = 0; | |||
| 166 | ||||
| 167 | void | |||
| 168 | replacesmap(void) | |||
| 169 | { | |||
| 170 | static int replacedone = 0; | |||
| 171 | int s; | |||
| 172 | ||||
| 173 | if (replacedone) | |||
| 174 | return; | |||
| 175 | replacedone = 1; | |||
| 176 | ||||
| 177 | s = splhigh()splraise(0xd); | |||
| 178 | ||||
| 179 | codepatch_replace(CPTAG_STAC1, &_stac, 3); | |||
| 180 | codepatch_replace(CPTAG_CLAC2, &_clac, 3); | |||
| 181 | ||||
| 182 | splx(s)spllower(s); | |||
| 183 | } | |||
| 184 | ||||
| 185 | void | |||
| 186 | replacemeltdown(void) | |||
| 187 | { | |||
| 188 | static int replacedone = 0; | |||
| 189 | struct cpu_info *ci = &cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))); | |||
| 190 | int swapgs_vuln = 0, ibrs = 0, s; | |||
| 191 | ||||
| 192 | if (strcmp(cpu_vendor, "GenuineIntel") == 0) { | |||
| 193 | int family = ci->ci_family; | |||
| 194 | int model = ci->ci_model; | |||
| 195 | ||||
| 196 | swapgs_vuln = 1; | |||
| 197 | if (family == 0x6 && | |||
| 198 | (model == 0x37 || model == 0x4a || model == 0x4c || | |||
| 199 | model == 0x4d || model == 0x5a || model == 0x5d || | |||
| 200 | model == 0x6e || model == 0x65 || model == 0x75)) { | |||
| 201 | /* Silvermont, Airmont */ | |||
| 202 | swapgs_vuln = 0; | |||
| 203 | } else if (family == 0x6 && (model == 0x85 || model == 0x57)) { | |||
| 204 | /* KnightsLanding */ | |||
| 205 | swapgs_vuln = 0; | |||
| 206 | } | |||
| 207 | if ((ci->ci_feature_sefflags_edx & SEFF0EDX_ARCH_CAP0x20000000) && | |||
| 208 | (rdmsr(MSR_ARCH_CAPABILITIES0x10a) & ARCH_CAP_IBRS_ALL(1 << 1))) { | |||
| 209 | ibrs = 2; | |||
| 210 | } else if (ci->ci_feature_sefflags_edx & SEFF0EDX_IBRS0x04000000) { | |||
| 211 | ibrs = 1; | |||
| 212 | } | |||
| 213 | } else if (strcmp(cpu_vendor, "AuthenticAMD") == 0 && | |||
| 214 | ci->ci_pnfeatset >= 0x80000008) { | |||
| 215 | if (ci->ci_feature_amdspec_ebx & CPUIDEBX_IBRS_ALWAYSON(1ULL << 16)) { | |||
| 216 | ibrs = 2; | |||
| 217 | } else if ((ci->ci_feature_amdspec_ebx & CPUIDEBX_IBRS(1ULL << 14)) && | |||
| 218 | (ci->ci_feature_amdspec_ebx & CPUIDEBX_IBRS_PREF(1ULL << 18))) { | |||
| 219 | ibrs = 1; | |||
| 220 | } | |||
| 221 | } | |||
| 222 | ||||
| 223 | /* Enhanced IBRS: turn it on once on each CPU and don't touch again */ | |||
| 224 | if (ibrs == 2) | |||
| 225 | wrmsr(MSR_SPEC_CTRL0x048, SPEC_CTRL_IBRS(1ULL << 0)); | |||
| 226 | ||||
| 227 | if (replacedone) | |||
| 228 | return; | |||
| 229 | replacedone = 1; | |||
| 230 | ||||
| 231 | s = splhigh()splraise(0xd); | |||
| 232 | if (ibrs == 2 || (ci->ci_feature_sefflags_edx & SEFF0EDX_IBT0x00100000)) { | |||
| 233 | extern const char _jmprax, _jmpr11, _jmpr13; | |||
| 234 | extern const short _jmprax_len, _jmpr11_len, _jmpr13_len; | |||
| 235 | codepatch_replace(CPTAG_RETPOLINE_RAX14, &_jmprax, _jmprax_len); | |||
| 236 | codepatch_replace(CPTAG_RETPOLINE_R1115, &_jmpr11, _jmpr11_len); | |||
| 237 | codepatch_replace(CPTAG_RETPOLINE_R1316, &_jmpr13, _jmpr13_len); | |||
| 238 | } | |||
| 239 | ||||
| 240 | if (!cpu_meltdown) | |||
| 241 | codepatch_nop(CPTAG_MELTDOWN_NOP6); | |||
| 242 | else { | |||
| 243 | extern long alltraps_kern_meltdown; | |||
| 244 | ||||
| 245 | /* eliminate conditional branch in alltraps */ | |||
| 246 | codepatch_jmp(CPTAG_MELTDOWN_ALLTRAPS7, &alltraps_kern_meltdown); | |||
| 247 | ||||
| 248 | /* enable reuse of PCID for U-K page tables */ | |||
| 249 | if (pmap_use_pcid) { | |||
| 250 | extern long _pcid_set_reuse; | |||
| 251 | DPRINTF("%s: codepatching PCID use\n", __func__); | |||
| 252 | codepatch_replace(CPTAG_PCID_SET_REUSE8, | |||
| 253 | &_pcid_set_reuse, PCID_SET_REUSE_SIZE12); | |||
| 254 | } | |||
| 255 | } | |||
| 256 | ||||
| 257 | /* | |||
| 258 | * CVE-2019-1125: if the CPU has SMAP and it's not vulnerable to | |||
| 259 | * Meltdown, then it's protected both from speculatively mis-skipping | |||
| 260 | * the swapgs during interrupts of userspace and from speculatively | |||
| 261 | * mis-taking a swapgs during interrupts while already in the kernel | |||
| 262 | * as the speculative path will fault from SMAP. Warning: enabling | |||
| 263 | * WRGSBASE would break this 'protection'. | |||
| 264 | * | |||
| 265 | * Otherwise, if the CPU's swapgs can't be speculated over and it | |||
| 266 | * _is_ vulnerable to Meltdown then the %cr3 change will serialize | |||
| 267 | * user->kern transitions, but we still need to mitigate the | |||
| 268 | * already-in-kernel cases. | |||
| 269 | */ | |||
| 270 | if (!cpu_meltdown && (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMAP0x00100000)) { | |||
| 271 | codepatch_nop(CPTAG_FENCE_SWAPGS_MIS_TAKEN11); | |||
| 272 | codepatch_nop(CPTAG_FENCE_NO_SAFE_SMAP12); | |||
| 273 | } else if (!swapgs_vuln && cpu_meltdown) { | |||
| 274 | codepatch_nop(CPTAG_FENCE_SWAPGS_MIS_TAKEN11); | |||
| 275 | } | |||
| 276 | splx(s)spllower(s); | |||
| 277 | } | |||
| 278 | ||||
| 279 | void | |||
| 280 | replacemds(void) | |||
| 281 | { | |||
| 282 | static int replacedone = 0; | |||
| 283 | extern long mds_handler_bdw, mds_handler_ivb, mds_handler_skl; | |||
| 284 | extern long mds_handler_skl_sse, mds_handler_skl_avx; | |||
| 285 | extern long mds_handler_silvermont, mds_handler_knights; | |||
| 286 | struct cpu_info *ci = &cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))); | |||
| 287 | CPU_INFO_ITERATORint cii; | |||
| 288 | void *handler = NULL((void *)0), *vmm_handler = NULL((void *)0); | |||
| 289 | const char *type; | |||
| 290 | int has_verw, s; | |||
| 291 | ||||
| 292 | /* ci_mds_tmp must be 32byte aligned for AVX instructions */ | |||
| 293 | CTASSERT((offsetof(struct cpu_info, ci_mds_tmp) -extern char _ctassert[((__builtin_offsetof(struct cpu_info, ci_mds_tmp ) - __builtin_offsetof(struct cpu_info, ci_dev)) % 32 == 0) ? 1 : -1 ] __attribute__((__unused__)) | |||
| 294 | offsetof(struct cpu_info, ci_PAGEALIGN)) % 32 == 0)extern char _ctassert[((__builtin_offsetof(struct cpu_info, ci_mds_tmp ) - __builtin_offsetof(struct cpu_info, ci_dev)) % 32 == 0) ? 1 : -1 ] __attribute__((__unused__)); | |||
| 295 | ||||
| 296 | if (replacedone) | |||
| 297 | return; | |||
| 298 | replacedone = 1; | |||
| 299 | ||||
| 300 | if (strcmp(cpu_vendor, "GenuineIntel") != 0 || | |||
| 301 | ((ci->ci_feature_sefflags_edx & SEFF0EDX_ARCH_CAP0x20000000) && | |||
| 302 | (rdmsr(MSR_ARCH_CAPABILITIES0x10a) & ARCH_CAP_MDS_NO(1 << 5)))) { | |||
| 303 | /* Unaffected, nop out the handling code */ | |||
| 304 | has_verw = 0; | |||
| 305 | } else if (ci->ci_feature_sefflags_edx & SEFF0EDX_MD_CLEAR0x00000400) { | |||
| 306 | /* new firmware, use VERW */ | |||
| 307 | has_verw = 1; | |||
| 308 | } else { | |||
| 309 | int family = ci->ci_family; | |||
| 310 | int model = ci->ci_model; | |||
| 311 | int stepping = CPUID2STEPPING(ci->ci_signature)((ci->ci_signature) & 15); | |||
| 312 | ||||
| 313 | has_verw = 0; | |||
| 314 | if (family == 0x6 && | |||
| 315 | (model == 0x2e || model == 0x1e || model == 0x1f || | |||
| 316 | model == 0x1a || model == 0x2f || model == 0x25 || | |||
| 317 | model == 0x2c || model == 0x2d || model == 0x2a || | |||
| 318 | model == 0x3e || model == 0x3a)) { | |||
| 319 | /* Nehalem, SandyBridge, IvyBridge */ | |||
| 320 | handler = vmm_handler = &mds_handler_ivb; | |||
| 321 | type = "IvyBridge"; | |||
| 322 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 323 | ci->ci_mds_buf = malloc(672, M_DEVBUF2, | |||
| 324 | M_WAITOK0x0001); | |||
| 325 | memset(ci->ci_mds_buf, 0, 16)__builtin_memset((ci->ci_mds_buf), (0), (16)); | |||
| 326 | } | |||
| 327 | } else if (family == 0x6 && | |||
| 328 | (model == 0x3f || model == 0x3c || model == 0x45 || | |||
| 329 | model == 0x46 || model == 0x56 || model == 0x4f || | |||
| 330 | model == 0x47 || model == 0x3d)) { | |||
| 331 | /* Haswell and Broadwell */ | |||
| 332 | handler = vmm_handler = &mds_handler_bdw; | |||
| 333 | type = "Broadwell"; | |||
| 334 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 335 | ci->ci_mds_buf = malloc(1536, M_DEVBUF2, | |||
| 336 | M_WAITOK0x0001); | |||
| 337 | } | |||
| 338 | } else if (family == 0x6 && | |||
| 339 | ((model == 0x55 && stepping <= 5) || model == 0x4e || | |||
| 340 | model == 0x5e || (model == 0x8e && stepping <= 0xb) || | |||
| 341 | (model == 0x9e && stepping <= 0xc))) { | |||
| 342 | /* | |||
| 343 | * Skylake, KabyLake, CoffeeLake, WhiskeyLake, | |||
| 344 | * CascadeLake | |||
| 345 | */ | |||
| 346 | /* XXX mds_handler_skl_avx512 */ | |||
| 347 | if (xgetbv(0) & XFEATURE_AVX0x00000004) { | |||
| 348 | handler = &mds_handler_skl_avx; | |||
| 349 | type = "Skylake AVX"; | |||
| 350 | } else { | |||
| 351 | handler = &mds_handler_skl_sse; | |||
| 352 | type = "Skylake SSE"; | |||
| 353 | } | |||
| 354 | vmm_handler = &mds_handler_skl; | |||
| 355 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 356 | vaddr_t b64; | |||
| 357 | b64 = (vaddr_t)malloc(6 * 1024 + 64 + 63, | |||
| 358 | M_DEVBUF2, M_WAITOK0x0001); | |||
| 359 | ci->ci_mds_buf = (void *)((b64 + 63) & ~63); | |||
| 360 | memset(ci->ci_mds_buf, 0, 64)__builtin_memset((ci->ci_mds_buf), (0), (64)); | |||
| 361 | } | |||
| 362 | } else if (family == 0x6 && | |||
| 363 | (model == 0x37 || model == 0x4a || model == 0x4c || | |||
| 364 | model == 0x4d || model == 0x5a || model == 0x5d || | |||
| 365 | model == 0x6e || model == 0x65 || model == 0x75)) { | |||
| 366 | /* Silvermont, Airmont */ | |||
| 367 | handler = vmm_handler = &mds_handler_silvermont; | |||
| 368 | type = "Silvermont"; | |||
| 369 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 370 | ci->ci_mds_buf = malloc(256, M_DEVBUF2, | |||
| 371 | M_WAITOK0x0001); | |||
| 372 | memset(ci->ci_mds_buf, 0, 16)__builtin_memset((ci->ci_mds_buf), (0), (16)); | |||
| 373 | } | |||
| 374 | } else if (family == 0x6 && (model == 0x85 || model == 0x57)) { | |||
| 375 | handler = vmm_handler = &mds_handler_knights; | |||
| 376 | type = "KnightsLanding"; | |||
| 377 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 378 | vaddr_t b64; | |||
| 379 | b64 = (vaddr_t)malloc(1152 + 63, M_DEVBUF2, | |||
| 380 | M_WAITOK0x0001); | |||
| 381 | ci->ci_mds_buf = (void *)((b64 + 63) & ~63); | |||
| 382 | } | |||
| 383 | } | |||
| 384 | } | |||
| 385 | ||||
| 386 | if (handler != NULL((void *)0)) { | |||
| 387 | printf("cpu0: using %s MDS workaround%s\n", type, ""); | |||
| 388 | s = splhigh()splraise(0xd); | |||
| 389 | codepatch_call(CPTAG_MDS9, handler); | |||
| 390 | codepatch_call(CPTAG_MDS_VMM10, vmm_handler); | |||
| 391 | splx(s)spllower(s); | |||
| 392 | } else if (has_verw) { | |||
| 393 | /* The new firmware enhances L1D_FLUSH MSR to flush MDS too */ | |||
| 394 | if (cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))).ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr == 1) { | |||
| 395 | s = splhigh()splraise(0xd); | |||
| 396 | codepatch_nop(CPTAG_MDS_VMM10); | |||
| 397 | splx(s)spllower(s); | |||
| 398 | type = " (except on vmm entry)"; | |||
| 399 | } else { | |||
| 400 | type = ""; | |||
| 401 | } | |||
| 402 | printf("cpu0: using %s MDS workaround%s\n", "VERW", type); | |||
| 403 | } else { | |||
| 404 | s = splhigh()splraise(0xd); | |||
| 405 | codepatch_nop(CPTAG_MDS9); | |||
| 406 | codepatch_nop(CPTAG_MDS_VMM10); | |||
| 407 | splx(s)spllower(s); | |||
| 408 | } | |||
| 409 | } | |||
| 410 | ||||
| 411 | #ifdef MULTIPROCESSOR1 | |||
| 412 | int mp_cpu_start(struct cpu_info *); | |||
| 413 | void mp_cpu_start_cleanup(struct cpu_info *); | |||
| 414 | struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL((void *)0), | |||
| 415 | mp_cpu_start_cleanup }; | |||
| 416 | #endif /* MULTIPROCESSOR */ | |||
| 417 | ||||
| 418 | const struct cfattach cpu_ca = { | |||
| 419 | sizeof(struct cpu_softc), cpu_match, cpu_attach, NULL((void *)0), cpu_activate | |||
| 420 | }; | |||
| 421 | ||||
| 422 | struct cfdriver cpu_cd = { | |||
| 423 | NULL((void *)0), "cpu", DV_DULL | |||
| 424 | }; | |||
| 425 | ||||
| 426 | /* | |||
| 427 | * Statically-allocated CPU info for the primary CPU (or the only | |||
| 428 | * CPU, on uniprocessors). The CPU info list is initialized to | |||
| 429 | * point at it. | |||
| 430 | */ | |||
| 431 | struct cpu_info_full cpu_info_full_primary = { .cif_cpu = { .ci_self = &cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))) } }; | |||
| 432 | ||||
| 433 | struct cpu_info *cpu_info_list = &cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))); | |||
| 434 | ||||
| 435 | #ifdef MULTIPROCESSOR1 | |||
| 436 | /* | |||
| 437 | * Array of CPU info structures. Must be statically-allocated because | |||
| 438 | * curproc, etc. are used early. | |||
| 439 | */ | |||
| 440 | struct cpu_info *cpu_info[MAXCPUS64] = { &cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))) }; | |||
| 441 | ||||
| 442 | void cpu_hatch(void *); | |||
| 443 | void cpu_boot_secondary(struct cpu_info *ci); | |||
| 444 | void cpu_start_secondary(struct cpu_info *ci); | |||
| 445 | #endif | |||
| 446 | ||||
| 447 | int | |||
| 448 | cpu_match(struct device *parent, void *match, void *aux) | |||
| 449 | { | |||
| 450 | struct cfdata *cf = match; | |||
| 451 | struct cpu_attach_args *caa = aux; | |||
| 452 | ||||
| 453 | if (strcmp(caa->caa_name, cf->cf_driver->cd_name) != 0) | |||
| 454 | return 0; | |||
| 455 | ||||
| 456 | if (cf->cf_unit >= MAXCPUS64) | |||
| 457 | return 0; | |||
| 458 | ||||
| 459 | return 1; | |||
| 460 | } | |||
| 461 | ||||
| 462 | void cpu_idle_mwait_cycle(void); | |||
| 463 | void cpu_init_mwait(struct cpu_softc *, struct cpu_info *); | |||
| 464 | ||||
| 465 | u_int cpu_mwait_size, cpu_mwait_states; | |||
| 466 | ||||
| 467 | void | |||
| 468 | cpu_idle_mwait_cycle(void) | |||
| 469 | { | |||
| 470 | struct cpu_info *ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); | |||
| 471 | ||||
| 472 | if ((read_rflags() & PSL_I0x00000200) == 0) | |||
| 473 | panic("idle with interrupts blocked!"); | |||
| 474 | ||||
| 475 | /* something already queued? */ | |||
| 476 | if (!cpu_is_idle(ci)((ci)->ci_schedstate.spc_whichqs == 0)) | |||
| 477 | return; | |||
| 478 | ||||
| 479 | /* | |||
| 480 | * About to idle; setting the MWAIT_IN_IDLE bit tells | |||
| 481 | * cpu_unidle() that it can't be a no-op and tells cpu_kick() | |||
| 482 | * that it doesn't need to use an IPI. We also set the | |||
| 483 | * MWAIT_KEEP_IDLING bit: those routines clear it to stop | |||
| 484 | * the mwait. Once they're set, we do a final check of the | |||
| 485 | * queue, in case another cpu called setrunqueue() and added | |||
| 486 | * something to the queue and called cpu_unidle() between | |||
| 487 | * the check in sched_idle() and here. | |||
| 488 | */ | |||
| 489 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_mwait, MWAIT_IDLING(0x1 | 0x2) | MWAIT_ONLY0x4); | |||
| 490 | if (cpu_is_idle(ci)((ci)->ci_schedstate.spc_whichqs == 0)) { | |||
| 491 | monitor(&ci->ci_mwait, 0, 0); | |||
| 492 | if ((ci->ci_mwait & MWAIT_IDLING(0x1 | 0x2)) == MWAIT_IDLING(0x1 | 0x2)) | |||
| 493 | mwait(0, 0); | |||
| 494 | } | |||
| 495 | ||||
| 496 | /* done idling; let cpu_kick() know that an IPI is required */ | |||
| 497 | atomic_clearbits_intx86_atomic_clearbits_u32(&ci->ci_mwait, MWAIT_IDLING(0x1 | 0x2)); | |||
| 498 | } | |||
| 499 | ||||
| 500 | void | |||
| 501 | cpu_init_mwait(struct cpu_softc *sc, struct cpu_info *ci) | |||
| 502 | { | |||
| 503 | unsigned int smallest, largest, extensions, c_substates; | |||
| 504 | ||||
| 505 | if ((cpu_ecxfeature & CPUIDECX_MWAIT0x00000008) == 0 || cpuid_level < 0x5) | |||
| 506 | return; | |||
| 507 | ||||
| 508 | /* get the monitor granularity */ | |||
| 509 | CPUID(0x5, smallest, largest, extensions, cpu_mwait_states)__asm volatile("cpuid" : "=a" (smallest), "=b" (largest), "=c" (extensions), "=d" (cpu_mwait_states) : "a" (0x5)); | |||
| 510 | smallest &= 0xffff; | |||
| 511 | largest &= 0xffff; | |||
| 512 | ||||
| 513 | /* mask out states C6/C7 in 31:24 for CHT45 errata */ | |||
| 514 | if (strcmp(cpu_vendor, "GenuineIntel") == 0 && | |||
| 515 | ci->ci_family == 0x06 && ci->ci_model == 0x4c) | |||
| 516 | cpu_mwait_states &= 0x00ffffff; | |||
| 517 | ||||
| 518 | printf("%s: mwait min=%u, max=%u", sc->sc_dev.dv_xname, | |||
| 519 | smallest, largest); | |||
| 520 | if (extensions & 0x1) { | |||
| 521 | if (cpu_mwait_states > 0) { | |||
| 522 | c_substates = cpu_mwait_states; | |||
| 523 | printf(", C-substates=%u", 0xf & c_substates); | |||
| 524 | while ((c_substates >>= 4) > 0) | |||
| 525 | printf(".%u", 0xf & c_substates); | |||
| 526 | } | |||
| 527 | if (extensions & 0x2) | |||
| 528 | printf(", IBE"); | |||
| 529 | } else { | |||
| 530 | /* substates not supported, forge the default: just C1 */ | |||
| 531 | cpu_mwait_states = 1 << 4; | |||
| 532 | } | |||
| 533 | ||||
| 534 | /* paranoia: check the values */ | |||
| 535 | if (smallest < sizeof(int) || largest < smallest || | |||
| 536 | (largest & (sizeof(int)-1))) | |||
| 537 | printf(" (bogus)"); | |||
| 538 | else | |||
| 539 | cpu_mwait_size = largest; | |||
| 540 | printf("\n"); | |||
| 541 | ||||
| 542 | /* enable use of mwait; may be overridden by acpicpu later */ | |||
| 543 | if (cpu_mwait_size > 0) | |||
| 544 | cpu_idle_cycle_fcn = &cpu_idle_mwait_cycle; | |||
| 545 | } | |||
| 546 | ||||
| 547 | void | |||
| 548 | cpu_attach(struct device *parent, struct device *self, void *aux) | |||
| 549 | { | |||
| 550 | struct cpu_softc *sc = (void *) self; | |||
| 551 | struct cpu_attach_args *caa = aux; | |||
| 552 | struct cpu_info *ci; | |||
| 553 | #if defined(MULTIPROCESSOR1) | |||
| 554 | int cpunum = sc->sc_dev.dv_unit; | |||
| 555 | vaddr_t kstack; | |||
| 556 | struct pcb *pcb; | |||
| 557 | #endif | |||
| 558 | ||||
| 559 | /* | |||
| 560 | * If we're an Application Processor, allocate a cpu_info | |||
| 561 | * structure, otherwise use the primary's. | |||
| 562 | */ | |||
| 563 | if (caa->cpu_role == CPU_ROLE_AP2) { | |||
| 564 | struct cpu_info_full *cif; | |||
| 565 | ||||
| 566 | cif = km_alloc(sizeof *cif, &kv_any, &kp_zero, &kd_waitok); | |||
| 567 | ci = &cif->cif_cpu; | |||
| 568 | #if defined(MULTIPROCESSOR1) | |||
| 569 | ci->ci_tss = &cif->cif_tsscif_RO.u_tssgdt.uu_tss; | |||
| 570 | ci->ci_gdt = &cif->cif_gdtcif_RO.u_tssgdt.uu_gdt; | |||
| 571 | memcpy(ci->ci_gdt, cpu_info_primary.ci_gdt, GDT_SIZE)__builtin_memcpy((ci->ci_gdt), ((*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096*2 - __builtin_offsetof(struct cpu_info, ci_dev))).ci_gdt), (((6 << 3) + (1 << 4 )))); | |||
| 572 | cpu_enter_pages(cif); | |||
| 573 | if (cpu_info[cpunum] != NULL((void *)0)) | |||
| 574 | panic("cpu at apic id %d already attached?", cpunum); | |||
| 575 | cpu_info[cpunum] = ci; | |||
| 576 | #endif | |||
| 577 | #ifdef TRAPLOG | |||
| 578 | ci->ci_tlog_base = malloc(sizeof(struct tlog), | |||
| 579 | M_DEVBUF2, M_WAITOK0x0001); | |||
| 580 | #endif | |||
| 581 | } else { | |||
| 582 | ci = &cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))); | |||
| 583 | #if defined(MULTIPROCESSOR1) | |||
| 584 | if (caa->cpu_apicid != lapic_cpu_number()) { | |||
| 585 | panic("%s: running cpu is at apic %d" | |||
| 586 | " instead of at expected %d", | |||
| 587 | sc->sc_dev.dv_xname, lapic_cpu_number(), caa->cpu_apicid); | |||
| 588 | } | |||
| 589 | #endif | |||
| 590 | } | |||
| 591 | ||||
| 592 | ci->ci_self = ci; | |||
| 593 | sc->sc_info = ci; | |||
| 594 | ||||
| 595 | ci->ci_dev = self; | |||
| 596 | ci->ci_apicid = caa->cpu_apicid; | |||
| 597 | ci->ci_acpi_proc_id = caa->cpu_acpi_proc_id; | |||
| 598 | #ifdef MULTIPROCESSOR1 | |||
| 599 | ci->ci_cpuid = cpunum; | |||
| 600 | #else | |||
| 601 | ci->ci_cpuid = 0; /* False for APs, but they're not used anyway */ | |||
| 602 | #endif | |||
| 603 | ci->ci_func = caa->cpu_func; | |||
| 604 | ci->ci_handled_intr_level = IPL_NONE0x0; | |||
| 605 | ||||
| 606 | #ifndef SMALL_KERNEL | |||
| 607 | strlcpy(ci->ci_sensordev.xname, ci->ci_dev->dv_xname, | |||
| 608 | sizeof(ci->ci_sensordev.xname)); | |||
| 609 | #endif | |||
| 610 | ||||
| 611 | #if defined(MULTIPROCESSOR1) | |||
| 612 | /* | |||
| 613 | * Allocate UPAGES contiguous pages for the idle PCB and stack. | |||
| 614 | */ | |||
| 615 | kstack = (vaddr_t)km_alloc(USPACE(6 * (1 << 12)), &kv_any, &kp_dirty, &kd_nowait); | |||
| 616 | if (kstack == 0) { | |||
| 617 | if (caa->cpu_role != CPU_ROLE_AP2) { | |||
| 618 | panic("cpu_attach: unable to allocate idle stack for" | |||
| 619 | " primary"); | |||
| 620 | } | |||
| 621 | printf("%s: unable to allocate idle stack\n", | |||
| 622 | sc->sc_dev.dv_xname); | |||
| 623 | return; | |||
| 624 | } | |||
| 625 | pcb = ci->ci_idle_pcb = (struct pcb *) kstack; | |||
| 626 | memset(pcb, 0, USPACE)__builtin_memset((pcb), (0), ((6 * (1 << 12)))); | |||
| 627 | ||||
| 628 | pcb->pcb_kstack = kstack + USPACE(6 * (1 << 12)) - 16; | |||
| 629 | pcb->pcb_rbp = pcb->pcb_rsp = kstack + USPACE(6 * (1 << 12)) - 16; | |||
| 630 | pcb->pcb_pmap = pmap_kernel()(&kernel_pmap_store); | |||
| 631 | pcb->pcb_cr3 = pcb->pcb_pmap->pm_pdirpa; | |||
| 632 | #endif | |||
| 633 | ||||
| 634 | /* further PCB init done later. */ | |||
| 635 | ||||
| 636 | printf(": "); | |||
| 637 | ||||
| 638 | switch (caa->cpu_role) { | |||
| 639 | case CPU_ROLE_SP0: | |||
| 640 | printf("(uniprocessor)\n"); | |||
| 641 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_flags, | |||
| 642 | CPUF_PRESENT0x1000 | CPUF_SP0x0004 | CPUF_PRIMARY0x0008); | |||
| 643 | cpu_intr_init(ci); | |||
| 644 | #ifndef SMALL_KERNEL | |||
| 645 | cpu_ucode_apply(ci); | |||
| 646 | #endif | |||
| 647 | cpu_tsx_disable(ci); | |||
| 648 | identifycpu(ci); | |||
| 649 | cpu_fix_msrs(ci); | |||
| 650 | #ifdef MTRR1 | |||
| 651 | mem_range_attach(); | |||
| 652 | #endif /* MTRR */ | |||
| 653 | /* XXX SP fpuinit(ci) is done earlier */ | |||
| 654 | cpu_init(ci); | |||
| 655 | cpu_init_mwait(sc, ci); | |||
| 656 | break; | |||
| 657 | ||||
| 658 | case CPU_ROLE_BP1: | |||
| 659 | printf("apid %d (boot processor)\n", caa->cpu_apicid); | |||
| 660 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_flags, | |||
| 661 | CPUF_PRESENT0x1000 | CPUF_BSP0x0001 | CPUF_PRIMARY0x0008); | |||
| 662 | cpu_intr_init(ci); | |||
| 663 | identifycpu(ci); | |||
| 664 | cpu_fix_msrs(ci); | |||
| 665 | #ifdef MTRR1 | |||
| 666 | mem_range_attach(); | |||
| 667 | #endif /* MTRR */ | |||
| 668 | ||||
| 669 | #if NLAPIC1 > 0 | |||
| 670 | /* | |||
| 671 | * Enable local apic | |||
| 672 | */ | |||
| 673 | lapic_enable(); | |||
| 674 | lapic_calibrate_timer(ci); | |||
| 675 | #endif | |||
| 676 | /* XXX BP fpuinit(ci) is done earlier */ | |||
| 677 | cpu_init(ci); | |||
| 678 | ||||
| 679 | #if NIOAPIC1 > 0 | |||
| 680 | ioapic_bsp_id = caa->cpu_apicid; | |||
| 681 | #endif | |||
| 682 | cpu_init_mwait(sc, ci); | |||
| 683 | break; | |||
| 684 | ||||
| 685 | case CPU_ROLE_AP2: | |||
| 686 | /* | |||
| 687 | * report on an AP | |||
| 688 | */ | |||
| 689 | printf("apid %d (application processor)\n", caa->cpu_apicid); | |||
| 690 | ||||
| 691 | #if defined(MULTIPROCESSOR1) | |||
| 692 | cpu_intr_init(ci); | |||
| 693 | cpu_start_secondary(ci); | |||
| 694 | clockqueue_init(&ci->ci_queue); | |||
| 695 | sched_init_cpu(ci); | |||
| 696 | ncpus++; | |||
| 697 | if (ci->ci_flags & CPUF_PRESENT0x1000) { | |||
| 698 | ci->ci_next = cpu_info_list->ci_next; | |||
| 699 | cpu_info_list->ci_next = ci; | |||
| 700 | } | |||
| 701 | #else | |||
| 702 | printf("%s: not started\n", sc->sc_dev.dv_xname); | |||
| 703 | #endif | |||
| 704 | break; | |||
| 705 | ||||
| 706 | default: | |||
| 707 | panic("unknown processor type??"); | |||
| 708 | } | |||
| 709 | ||||
| 710 | #if defined(MULTIPROCESSOR1) | |||
| 711 | if (mp_verbose) { | |||
| 712 | printf("%s: kstack at 0x%lx for %d bytes\n", | |||
| 713 | sc->sc_dev.dv_xname, kstack, USPACE(6 * (1 << 12))); | |||
| 714 | printf("%s: idle pcb at %p, idle sp at 0x%llx\n", | |||
| 715 | sc->sc_dev.dv_xname, pcb, pcb->pcb_rsp); | |||
| 716 | } | |||
| 717 | #endif | |||
| 718 | #if NVMM1 > 0 | |||
| 719 | cpu_init_vmm(ci); | |||
| 720 | #endif /* NVMM > 0 */ | |||
| 721 | ||||
| 722 | #ifndef SMALL_KERNEL | |||
| 723 | if (ci->ci_sensordev.sensors_count > 0) | |||
| 724 | sensordev_install(&ci->ci_sensordev); | |||
| 725 | #endif | |||
| 726 | } | |||
| 727 | ||||
| 728 | static void | |||
| 729 | replacexsave(int xsave_ext) | |||
| 730 | { | |||
| 731 | extern long _xrstor, _xrstors, _xsave, _xsaves, _xsaveopt; | |||
| 732 | static int replacedone = 0; | |||
| 733 | int s; | |||
| 734 | ||||
| 735 | if (replacedone) | |||
| 736 | return; | |||
| 737 | replacedone = 1; | |||
| 738 | ||||
| 739 | s = splhigh()splraise(0xd); | |||
| 740 | codepatch_replace(CPTAG_XRSTORS13, | |||
| 741 | (xsave_ext & XSAVE_XSAVES0x08UL) ? &_xrstors : &_xrstor, 4); | |||
| 742 | codepatch_replace(CPTAG_XRSTOR4, &_xrstor, 4); | |||
| 743 | codepatch_replace(CPTAG_XSAVE5, | |||
| 744 | (xsave_ext & XSAVE_XSAVES0x08UL) ? &_xsaves : | |||
| 745 | (xsave_ext & XSAVE_XSAVEOPT0x01UL) ? &_xsaveopt : &_xsave, 4); | |||
| 746 | splx(s)spllower(s); | |||
| 747 | } | |||
| 748 | ||||
| 749 | ||||
| 750 | /* | |||
| 751 | * Initialize the processor appropriately. | |||
| 752 | */ | |||
| 753 | ||||
| 754 | void | |||
| 755 | cpu_init(struct cpu_info *ci) | |||
| 756 | { | |||
| 757 | struct savefpu *sfp; | |||
| 758 | u_int cr4; | |||
| 759 | ||||
| 760 | /* configure the CPU if needed */ | |||
| 761 | if (ci->cpu_setup != NULL((void *)0)) | |||
| 762 | (*ci->cpu_setup)(ci); | |||
| 763 | ||||
| 764 | cr4 = rcr4() | CR4_DEFAULT(0x00000020|0x00000080|0x00000010|0x00000200|0x00000400); | |||
| 765 | if (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMEP0x00000080) | |||
| 766 | cr4 |= CR4_SMEP0x00100000; | |||
| 767 | if (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMAP0x00100000) | |||
| 768 | cr4 |= CR4_SMAP0x00200000; | |||
| 769 | if (ci->ci_feature_sefflags_ecx & SEFF0ECX_UMIP0x00000004) | |||
| 770 | cr4 |= CR4_UMIP0x00000800; | |||
| 771 | if ((cpu_ecxfeature & CPUIDECX_XSAVE0x04000000) && cpuid_level >= 0xd) | |||
| 772 | cr4 |= CR4_OSXSAVE0x00040000; | |||
| 773 | if (pg_xo) | |||
| 774 | cr4 |= CR4_PKE0x00400000; | |||
| 775 | if (pmap_use_pcid) | |||
| 776 | cr4 |= CR4_PCIDE0x00020000; | |||
| 777 | lcr4(cr4); | |||
| 778 | ||||
| 779 | if ((cpu_ecxfeature & CPUIDECX_XSAVE0x04000000) && cpuid_level >= 0xd) { | |||
| 780 | u_int32_t eax, ebx, ecx, edx; | |||
| 781 | ||||
| 782 | xsave_mask = XFEATURE_X870x00000001 | XFEATURE_SSE0x00000002; | |||
| 783 | CPUID_LEAF(0xd, 0, eax, ebx, ecx, edx)__asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0xd), "c" (0)); | |||
| 784 | xsave_mask |= eax & XFEATURE_AVX0x00000004; | |||
| 785 | xsetbv(0, xsave_mask); | |||
| 786 | CPUID_LEAF(0xd, 0, eax, ebx, ecx, edx)__asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0xd), "c" (0)); | |||
| 787 | if (CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008)) { | |||
| 788 | fpu_save_len = ebx; | |||
| 789 | KASSERT(fpu_save_len <= sizeof(struct savefpu))((fpu_save_len <= sizeof(struct savefpu)) ? (void)0 : __assert ("diagnostic ", "/usr/src/sys/arch/amd64/amd64/cpu.c", 789, "fpu_save_len <= sizeof(struct savefpu)" )); | |||
| 790 | } else { | |||
| 791 | KASSERT(ebx == fpu_save_len)((ebx == fpu_save_len) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/cpu.c" , 791, "ebx == fpu_save_len")); | |||
| 792 | } | |||
| 793 | ||||
| 794 | /* check for xsaves, xsaveopt, and supervisor features */ | |||
| 795 | CPUID_LEAF(0xd, 1, eax, ebx, ecx, edx)__asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0xd), "c" (1)); | |||
| 796 | /* Disable XSAVES on AMD family 17h due to Erratum 1386 */ | |||
| 797 | if (!strcmp(cpu_vendor, "AuthenticAMD") && | |||
| 798 | ci->ci_family == 0x17) { | |||
| 799 | eax &= ~XSAVE_XSAVES0x08UL; | |||
| 800 | } | |||
| 801 | if (eax & XSAVE_XSAVES0x08UL) { | |||
| 802 | #ifndef SMALL_KERNEL | |||
| 803 | if (ci->ci_feature_sefflags_edx & SEFF0EDX_IBT0x00100000) | |||
| 804 | xsave_mask |= ecx & XFEATURE_CET_U0x00000800; | |||
| 805 | #endif | |||
| 806 | if (xsave_mask & XFEATURE_XSS_MASK(0x00000100 | 0x00000400 | (0x00000800 | 0x00001000) | 0x00002000 | 0x00004000 | 0x00008000 | 0x00010000)) { | |||
| 807 | wrmsr(MSR_XSS0xda0, xsave_mask & XFEATURE_XSS_MASK(0x00000100 | 0x00000400 | (0x00000800 | 0x00001000) | 0x00002000 | 0x00004000 | 0x00008000 | 0x00010000)); | |||
| 808 | CPUID_LEAF(0xd, 1, eax, ebx, ecx, edx)__asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0xd), "c" (1)); | |||
| 809 | KASSERT(ebx <= sizeof(struct savefpu))((ebx <= sizeof(struct savefpu)) ? (void)0 : __assert("diagnostic " , "/usr/src/sys/arch/amd64/amd64/cpu.c", 809, "ebx <= sizeof(struct savefpu)" )); | |||
| 810 | } | |||
| 811 | if (CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008)) | |||
| 812 | cpu_use_xsaves = 1; | |||
| 813 | } | |||
| 814 | ||||
| 815 | replacexsave(eax); | |||
| 816 | } | |||
| 817 | ||||
| 818 | if (CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008)) { | |||
| 819 | /* Clean our FPU save area */ | |||
| 820 | sfp = fpu_cleandata(&proc0.p_addr->u_pcb.pcb_savefpu); | |||
| 821 | memset(sfp, 0, fpu_save_len)__builtin_memset((sfp), (0), (fpu_save_len)); | |||
| 822 | sfp->fp_fxsave.fx_fcw = __INITIAL_NPXCW__0x037f; | |||
| 823 | sfp->fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__0x1f80; | |||
| 824 | xrstor_user(sfp, xsave_mask); | |||
| 825 | if (cpu_use_xsaves || !xsave_mask) | |||
| 826 | fpusave(sfp); | |||
| 827 | else { | |||
| 828 | /* must not use xsaveopt here */ | |||
| 829 | xsave(sfp, xsave_mask); | |||
| 830 | } | |||
| 831 | } else { | |||
| 832 | fpureset()xrstor_kern((&proc0.p_addr->u_pcb.pcb_savefpu), xsave_mask ); | |||
| 833 | } | |||
| 834 | ||||
| 835 | #if NVMM1 > 0 | |||
| 836 | /* Re-enable VMM if needed */ | |||
| 837 | if (ci->ci_flags & CPUF_VMM0x20000) | |||
| 838 | start_vmm_on_cpu(ci); | |||
| 839 | #endif /* NVMM > 0 */ | |||
| 840 | ||||
| 841 | #ifdef MULTIPROCESSOR1 | |||
| 842 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_flags, CPUF_RUNNING0x2000); | |||
| 843 | /* | |||
| 844 | * Big hammer: flush all TLB entries, including ones from PTEs | |||
| 845 | * with the G bit set. This should only be necessary if TLB | |||
| 846 | * shootdown falls far behind. | |||
| 847 | */ | |||
| 848 | cr4 = rcr4(); | |||
| 849 | lcr4(cr4 & ~CR4_PGE0x00000080); | |||
| 850 | lcr4(cr4); | |||
| 851 | ||||
| 852 | /* Check if TSC is synchronized. */ | |||
| 853 | if (cold && !CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008)) | |||
| 854 | tsc_test_sync_ap(ci); | |||
| 855 | #endif | |||
| 856 | } | |||
| 857 | ||||
| 858 | #if NVMM1 > 0 | |||
| 859 | /* | |||
| 860 | * cpu_init_vmm | |||
| 861 | * | |||
| 862 | * Initializes per-cpu VMM state | |||
| 863 | * | |||
| 864 | * Parameters: | |||
| 865 | * ci: the cpu for which state is being initialized | |||
| 866 | */ | |||
| 867 | void | |||
| 868 | cpu_init_vmm(struct cpu_info *ci) | |||
| 869 | { | |||
| 870 | /* | |||
| 871 | * Allocate a per-cpu VMXON region for VMX CPUs | |||
| 872 | */ | |||
| 873 | if (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0)) { | |||
| 874 | ci->ci_vmxon_region = (struct vmxon_region *)malloc(PAGE_SIZE(1 << 12), | |||
| 875 | M_DEVBUF2, M_WAITOK0x0001 | M_ZERO0x0008); | |||
| 876 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), (vaddr_t)ci->ci_vmxon_region, | |||
| 877 | &ci->ci_vmxon_region_pa)) | |||
| 878 | panic("Can't locate VMXON region in phys mem"); | |||
| 879 | ci->ci_vmcs_pa = VMX_VMCS_PA_CLEAR0xFFFFFFFFFFFFFFFFUL; | |||
| 880 | rw_init(&ci->ci_vmcs_lock, "vmcslock")_rw_init_flags(&ci->ci_vmcs_lock, "vmcslock", 0, ((void *)0)); | |||
| 881 | } | |||
| 882 | } | |||
| 883 | #endif /* NVMM > 0 */ | |||
| 884 | ||||
| 885 | #ifdef MULTIPROCESSOR1 | |||
| 886 | void | |||
| 887 | cpu_boot_secondary_processors(void) | |||
| 888 | { | |||
| 889 | struct cpu_info *ci; | |||
| 890 | u_long i; | |||
| 891 | ||||
| 892 | for (i=0; i < MAXCPUS64; i++) { | |||
| 893 | ci = cpu_info[i]; | |||
| 894 | if (ci == NULL((void *)0)) | |||
| 895 | continue; | |||
| 896 | if (ci->ci_idle_pcb == NULL((void *)0)) | |||
| 897 | continue; | |||
| 898 | if ((ci->ci_flags & CPUF_PRESENT0x1000) == 0) | |||
| 899 | continue; | |||
| 900 | if (ci->ci_flags & (CPUF_BSP0x0001 | CPUF_SP0x0004 | CPUF_PRIMARY0x0008)) | |||
| 901 | continue; | |||
| 902 | ci->ci_randseed = (arc4random() & 0x7fffffff) + 1; | |||
| 903 | cpu_boot_secondary(ci); | |||
| 904 | } | |||
| 905 | } | |||
| 906 | ||||
| 907 | void | |||
| 908 | cpu_start_secondary(struct cpu_info *ci) | |||
| 909 | { | |||
| 910 | int i; | |||
| 911 | u_long s; | |||
| 912 | ||||
| 913 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_flags, CPUF_AP0x0002); | |||
| 914 | ||||
| 915 | pmap_kenter_pa(MP_TRAMPOLINE(16 * (1 << 12)), MP_TRAMPOLINE(16 * (1 << 12)), PROT_READ0x01 | PROT_EXEC0x04); | |||
| 916 | pmap_kenter_pa(MP_TRAMP_DATA(17 * (1 << 12)), MP_TRAMP_DATA(17 * (1 << 12)), PROT_READ0x01 | PROT_WRITE0x02); | |||
| 917 | ||||
| 918 | CPU_STARTUP(ci)((ci)->ci_func->start(ci)); | |||
| 919 | ||||
| 920 | /* | |||
| 921 | * wait for it to become ready | |||
| 922 | */ | |||
| 923 | for (i = 100000; (!(ci->ci_flags & CPUF_PRESENT0x1000)) && i>0;i--) { | |||
| 924 | delay(10)(*delay_func)(10); | |||
| 925 | } | |||
| 926 | if (! (ci->ci_flags & CPUF_PRESENT0x1000)) { | |||
| 927 | printf("%s: failed to become ready\n", ci->ci_dev->dv_xname); | |||
| 928 | #if defined(MPDEBUG) && defined(DDB1) | |||
| 929 | printf("dropping into debugger; continue from here to resume boot\n"); | |||
| 930 | db_enter(); | |||
| 931 | #endif | |||
| 932 | } | |||
| 933 | ||||
| 934 | if ((ci->ci_flags & CPUF_IDENTIFIED0x0020) == 0) { | |||
| 935 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_flags, CPUF_IDENTIFY0x0010); | |||
| 936 | ||||
| 937 | /* wait for it to identify */ | |||
| 938 | for (i = 2000000; (ci->ci_flags & CPUF_IDENTIFY0x0010) && i > 0; i--) | |||
| 939 | delay(10)(*delay_func)(10); | |||
| 940 | ||||
| 941 | if (ci->ci_flags & CPUF_IDENTIFY0x0010) | |||
| 942 | printf("%s: failed to identify\n", | |||
| 943 | ci->ci_dev->dv_xname); | |||
| 944 | } | |||
| 945 | ||||
| 946 | if (ci->ci_flags & CPUF_IDENTIFIED0x0020) { | |||
| 947 | /* | |||
| 948 | * Test if TSCs are synchronized. Invalidate cache to | |||
| 949 | * minimize possible cache effects. Disable interrupts to | |||
| 950 | * try to rule out external interference. | |||
| 951 | */ | |||
| 952 | s = intr_disable(); | |||
| 953 | wbinvd(); | |||
| 954 | tsc_test_sync_bp(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})); | |||
| 955 | intr_restore(s); | |||
| 956 | } | |||
| 957 | ||||
| 958 | CPU_START_CLEANUP(ci)((ci)->ci_func->cleanup(ci)); | |||
| 959 | ||||
| 960 | pmap_kremove(MP_TRAMPOLINE(16 * (1 << 12)), PAGE_SIZE(1 << 12)); | |||
| 961 | pmap_kremove(MP_TRAMP_DATA(17 * (1 << 12)), PAGE_SIZE(1 << 12)); | |||
| 962 | } | |||
| 963 | ||||
| 964 | void | |||
| 965 | cpu_boot_secondary(struct cpu_info *ci) | |||
| 966 | { | |||
| 967 | int i; | |||
| 968 | u_long s; | |||
| 969 | ||||
| 970 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_flags, CPUF_GO0x8000); | |||
| 971 | ||||
| 972 | for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING0x2000)) && i>0;i--) { | |||
| 973 | delay(10)(*delay_func)(10); | |||
| 974 | } | |||
| 975 | if (! (ci->ci_flags & CPUF_RUNNING0x2000)) { | |||
| 976 | printf("cpu failed to start\n"); | |||
| 977 | #if defined(MPDEBUG) && defined(DDB1) | |||
| 978 | printf("dropping into debugger; continue from here to resume boot\n"); | |||
| 979 | db_enter(); | |||
| 980 | #endif | |||
| 981 | } else if (cold) { | |||
| 982 | /* Test if TSCs are synchronized again. */ | |||
| 983 | s = intr_disable(); | |||
| 984 | wbinvd(); | |||
| 985 | tsc_test_sync_bp(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;})); | |||
| 986 | intr_restore(s); | |||
| 987 | } | |||
| 988 | } | |||
| 989 | ||||
| 990 | /* | |||
| 991 | * The CPU ends up here when it's ready to run | |||
| 992 | * This is called from code in mptramp.s; at this point, we are running | |||
| 993 | * in the idle pcb/idle stack of the new cpu. When this function returns, | |||
| 994 | * this processor will enter the idle loop and start looking for work. | |||
| 995 | * | |||
| 996 | * XXX should share some of this with init386 in machdep.c | |||
| 997 | */ | |||
| 998 | void | |||
| 999 | cpu_hatch(void *v) | |||
| 1000 | { | |||
| 1001 | struct cpu_info *ci = (struct cpu_info *)v; | |||
| 1002 | int s; | |||
| 1003 | ||||
| 1004 | cpu_init_msrs(ci); | |||
| 1005 | ||||
| 1006 | #ifdef DEBUG | |||
| 1007 | if (ci->ci_flags & CPUF_PRESENT0x1000) | |||
| 1008 | panic("%s: already running!?", ci->ci_dev->dv_xname); | |||
| 1009 | #endif | |||
| 1010 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_flags, CPUF_PRESENT0x1000); | |||
| 1011 | ||||
| 1012 | lapic_enable(); | |||
| 1013 | cpu_ucode_apply(ci); | |||
| 1014 | cpu_tsx_disable(ci); | |||
| 1015 | ||||
| 1016 | if ((ci->ci_flags & CPUF_IDENTIFIED0x0020) == 0) { | |||
| 1017 | /* | |||
| 1018 | * We need to wait until we can identify, otherwise dmesg | |||
| 1019 | * output will be messy. | |||
| 1020 | */ | |||
| 1021 | while ((ci->ci_flags & CPUF_IDENTIFY0x0010) == 0) | |||
| 1022 | delay(10)(*delay_func)(10); | |||
| 1023 | ||||
| 1024 | identifycpu(ci); | |||
| 1025 | ||||
| 1026 | /* Prevent identifycpu() from running again */ | |||
| 1027 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_flags, CPUF_IDENTIFIED0x0020); | |||
| 1028 | ||||
| 1029 | /* Signal we're done */ | |||
| 1030 | atomic_clearbits_intx86_atomic_clearbits_u32(&ci->ci_flags, CPUF_IDENTIFY0x0010); | |||
| 1031 | } | |||
| 1032 | ||||
| 1033 | /* These have to run after identifycpu() */ | |||
| 1034 | cpu_fix_msrs(ci); | |||
| 1035 | ||||
| 1036 | /* | |||
| 1037 | * Test if our TSC is synchronized for the first time. | |||
| 1038 | * Note that interrupts are off at this point. | |||
| 1039 | */ | |||
| 1040 | wbinvd(); | |||
| 1041 | tsc_test_sync_ap(ci); | |||
| 1042 | ||||
| 1043 | while ((ci->ci_flags & CPUF_GO0x8000) == 0) | |||
| 1044 | delay(10)(*delay_func)(10); | |||
| 1045 | #ifdef HIBERNATE1 | |||
| 1046 | if ((ci->ci_flags & CPUF_PARK0x10000) != 0) { | |||
| 1047 | if (ci->ci_feature_sefflags_edx & SEFF0EDX_IBT0x00100000) | |||
| 1048 | lcr4(rcr4() & ~CR4_CET0x00800000); | |||
| 1049 | atomic_clearbits_intx86_atomic_clearbits_u32(&ci->ci_flags, CPUF_PARK0x10000); | |||
| 1050 | hibernate_drop_to_real_mode(); | |||
| 1051 | } | |||
| 1052 | #endif /* HIBERNATE */ | |||
| 1053 | ||||
| 1054 | #ifdef DEBUG | |||
| 1055 | if (ci->ci_flags & CPUF_RUNNING0x2000) | |||
| 1056 | panic("%s: already running!?", ci->ci_dev->dv_xname); | |||
| 1057 | #endif | |||
| 1058 | ||||
| 1059 | cpu_init_idt(); | |||
| 1060 | lapic_set_lvt(); | |||
| 1061 | gdt_init_cpu(ci); | |||
| 1062 | fpuinit(ci); | |||
| 1063 | ||||
| 1064 | lldt(0); | |||
| 1065 | ||||
| 1066 | cpu_init(ci); | |||
| 1067 | #if NPVBUS1 > 0 | |||
| 1068 | pvbus_init_cpu(); | |||
| 1069 | #endif | |||
| 1070 | ||||
| 1071 | /* Re-initialise memory range handling on AP */ | |||
| 1072 | if (mem_range_softc.mr_op != NULL((void *)0)) | |||
| 1073 | mem_range_softc.mr_op->initAP(&mem_range_softc); | |||
| 1074 | ||||
| 1075 | s = splhigh()splraise(0xd); | |||
| 1076 | lcr8(0); | |||
| 1077 | intr_enable(); | |||
| 1078 | splx(s)spllower(s); | |||
| 1079 | ||||
| 1080 | lapic_startclock(); | |||
| 1081 | ||||
| 1082 | sched_toidle(); | |||
| 1083 | } | |||
| 1084 | ||||
| 1085 | #if defined(DDB1) | |||
| 1086 | ||||
| 1087 | #include <ddb/db_output.h> | |||
| 1088 | #include <machine/db_machdep.h> | |||
| 1089 | ||||
| 1090 | /* | |||
| 1091 | * Dump cpu information from ddb. | |||
| 1092 | */ | |||
| 1093 | void | |||
| 1094 | cpu_debug_dump(void) | |||
| 1095 | { | |||
| 1096 | struct cpu_info *ci; | |||
| 1097 | CPU_INFO_ITERATORint cii; | |||
| 1098 | ||||
| 1099 | db_printf("addr dev id flags ipis curproc\n"); | |||
| 1100 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 1101 | db_printf("%p %s %u %x %x %10p\n", | |||
| 1102 | ci, | |||
| 1103 | ci->ci_dev == NULL((void *)0) ? "BOOT" : ci->ci_dev->dv_xname, | |||
| 1104 | ci->ci_cpuid, | |||
| 1105 | ci->ci_flags, ci->ci_ipis, | |||
| 1106 | ci->ci_curproc); | |||
| 1107 | } | |||
| 1108 | } | |||
| 1109 | #endif | |||
| 1110 | ||||
| 1111 | int | |||
| 1112 | mp_cpu_start(struct cpu_info *ci) | |||
| 1113 | { | |||
| 1114 | unsigned short dwordptr[2]; | |||
| 1115 | ||||
| 1116 | /* | |||
| 1117 | * "The BSP must initialize CMOS shutdown code to 0Ah ..." | |||
| 1118 | */ | |||
| 1119 | ||||
| 1120 | outb(IO_RTC, NVRAM_RESET)( (__builtin_constant_p((0x070)) && (0x070) < 0x100 ) ? __outbc(0x070, (0xe + 1)) : __outb(0x070, (0xe + 1))); | |||
| 1121 | outb(IO_RTC+1, NVRAM_RESET_JUMP)( (__builtin_constant_p((0x070 +1)) && (0x070 +1) < 0x100) ? __outbc(0x070 +1, 0x0a) : __outb(0x070 +1, 0x0a)); | |||
| 1122 | ||||
| 1123 | /* | |||
| 1124 | * "and the warm reset vector (DWORD based at 40:67) to point | |||
| 1125 | * to the AP startup code ..." | |||
| 1126 | */ | |||
| 1127 | ||||
| 1128 | dwordptr[0] = 0; | |||
| 1129 | dwordptr[1] = MP_TRAMPOLINE(16 * (1 << 12)) >> 4; | |||
| 1130 | ||||
| 1131 | pmap_kenter_pa(0, 0, PROT_READ0x01 | PROT_WRITE0x02); | |||
| 1132 | memcpy((u_int8_t *) 0x467, dwordptr, 4)__builtin_memcpy(((u_int8_t *) 0x467), (dwordptr), (4)); | |||
| 1133 | pmap_kremove(0, PAGE_SIZE(1 << 12)); | |||
| 1134 | ||||
| 1135 | #if NLAPIC1 > 0 | |||
| 1136 | /* | |||
| 1137 | * ... prior to executing the following sequence:" | |||
| 1138 | */ | |||
| 1139 | ||||
| 1140 | if (ci->ci_flags & CPUF_AP0x0002) { | |||
| 1141 | x86_ipi_init(ci->ci_apicid); | |||
| 1142 | ||||
| 1143 | delay(10000)(*delay_func)(10000); | |||
| 1144 | ||||
| 1145 | if (cpu_feature & CPUID_APIC0x00000200) { | |||
| 1146 | x86_ipi(MP_TRAMPOLINE(16 * (1 << 12))/PAGE_SIZE(1 << 12), ci->ci_apicid, | |||
| 1147 | LAPIC_DLMODE_STARTUP0x00000600); | |||
| 1148 | delay(200)(*delay_func)(200); | |||
| 1149 | ||||
| 1150 | x86_ipi(MP_TRAMPOLINE(16 * (1 << 12))/PAGE_SIZE(1 << 12), ci->ci_apicid, | |||
| 1151 | LAPIC_DLMODE_STARTUP0x00000600); | |||
| 1152 | delay(200)(*delay_func)(200); | |||
| 1153 | } | |||
| 1154 | } | |||
| 1155 | #endif | |||
| 1156 | return 0; | |||
| 1157 | } | |||
| 1158 | ||||
| 1159 | void | |||
| 1160 | mp_cpu_start_cleanup(struct cpu_info *ci) | |||
| 1161 | { | |||
| 1162 | /* | |||
| 1163 | * Ensure the NVRAM reset byte contains something vaguely sane. | |||
| 1164 | */ | |||
| 1165 | ||||
| 1166 | outb(IO_RTC, NVRAM_RESET)( (__builtin_constant_p((0x070)) && (0x070) < 0x100 ) ? __outbc(0x070, (0xe + 1)) : __outb(0x070, (0xe + 1))); | |||
| 1167 | outb(IO_RTC+1, NVRAM_RESET_RST)( (__builtin_constant_p((0x070 +1)) && (0x070 +1) < 0x100) ? __outbc(0x070 +1, 0x00) : __outb(0x070 +1, 0x00)); | |||
| 1168 | } | |||
| 1169 | #endif /* MULTIPROCESSOR */ | |||
| 1170 | ||||
| 1171 | typedef void (vector)(void); | |||
| 1172 | extern vector Xsyscall_meltdown, Xsyscall, Xsyscall32; | |||
| 1173 | ||||
| 1174 | void | |||
| 1175 | cpu_init_msrs(struct cpu_info *ci) | |||
| 1176 | { | |||
| 1177 | wrmsr(MSR_STAR0xc0000081, | |||
| 1178 | ((uint64_t)GSEL(GCODE_SEL, SEL_KPL)(((1) << 3) | 0) << 32) | | |||
| 1179 | ((uint64_t)GSEL(GUCODE32_SEL, SEL_UPL)(((3) << 3) | 3) << 48)); | |||
| 1180 | wrmsr(MSR_LSTAR0xc0000082, cpu_meltdown ? (uint64_t)Xsyscall_meltdown : | |||
| 1181 | (uint64_t)Xsyscall); | |||
| 1182 | wrmsr(MSR_CSTAR0xc0000083, (uint64_t)Xsyscall32); | |||
| 1183 | wrmsr(MSR_SFMASK0xc0000084, PSL_NT0x00004000|PSL_T0x00000100|PSL_I0x00000200|PSL_C0x00000001|PSL_D0x00000400|PSL_AC0x00040000); | |||
| 1184 | ||||
| 1185 | wrmsr(MSR_FSBASE0xc0000100, 0); | |||
| 1186 | wrmsr(MSR_GSBASE0xc0000101, (u_int64_t)ci); | |||
| 1187 | wrmsr(MSR_KERNELGSBASE0xc0000102, 0); | |||
| 1188 | patinit(ci); | |||
| 1189 | } | |||
| 1190 | ||||
| 1191 | void | |||
| 1192 | cpu_fix_msrs(struct cpu_info *ci) | |||
| 1193 | { | |||
| 1194 | int family = ci->ci_family; | |||
| 1195 | uint64_t msr, nmsr; | |||
| 1196 | ||||
| 1197 | if (!strcmp(cpu_vendor, "GenuineIntel")) { | |||
| 1198 | if ((family > 6 || (family == 6 && ci->ci_model >= 0xd)) && | |||
| 1199 | rdmsr_safe(MSR_MISC_ENABLE0x1a0, &msr) == 0 && | |||
| 1200 | (msr & MISC_ENABLE_FAST_STRINGS(1 << 0)) == 0) { | |||
| 1201 | msr |= MISC_ENABLE_FAST_STRINGS(1 << 0); | |||
| 1202 | wrmsr(MSR_MISC_ENABLE0x1a0, msr); | |||
| 1203 | DPRINTF("%s: enabled fast strings\n", ci->ci_dev->dv_xname); | |||
| 1204 | ||||
| 1205 | /* | |||
| 1206 | * Attempt to disable Silicon Debug and lock the configuration | |||
| 1207 | * if it's enabled and unlocked. | |||
| 1208 | */ | |||
| 1209 | if (cpu_ecxfeature & CPUIDECX_SDBG0x00000800) { | |||
| 1210 | msr = rdmsr(IA32_DEBUG_INTERFACE0xc80); | |||
| 1211 | if ((msr & IA32_DEBUG_INTERFACE_ENABLE0x00000001) && | |||
| 1212 | (msr & IA32_DEBUG_INTERFACE_LOCK0x40000000) == 0) { | |||
| 1213 | msr &= IA32_DEBUG_INTERFACE_MASK0x80000000; | |||
| 1214 | msr |= IA32_DEBUG_INTERFACE_LOCK0x40000000; | |||
| 1215 | wrmsr(IA32_DEBUG_INTERFACE0xc80, msr); | |||
| 1216 | } else if (msr & IA32_DEBUG_INTERFACE_ENABLE0x00000001) | |||
| 1217 | printf("%s: cannot disable silicon debug\n", | |||
| 1218 | ci->ci_dev->dv_xname); | |||
| 1219 | } | |||
| 1220 | } | |||
| 1221 | } | |||
| 1222 | ||||
| 1223 | if (!strcmp(cpu_vendor, "AuthenticAMD")) { | |||
| 1224 | /* Apply AMD errata */ | |||
| 1225 | amd64_errata(ci); | |||
| 1226 | ||||
| 1227 | /* | |||
| 1228 | * "Mitigation G-2" per AMD's Whitepaper "Software Techniques | |||
| 1229 | * for Managing Speculation on AMD Processors" | |||
| 1230 | * | |||
| 1231 | * By setting MSR C001_1029[1]=1, LFENCE becomes a dispatch | |||
| 1232 | * serializing instruction. | |||
| 1233 | * | |||
| 1234 | * This MSR is available on all AMD families >= 10h, except 11h | |||
| 1235 | * where LFENCE is always serializing. | |||
| 1236 | */ | |||
| 1237 | if (family >= 0x10 && family != 0x11) { | |||
| 1238 | nmsr = msr = rdmsr(MSR_DE_CFG0xc0011029); | |||
| 1239 | nmsr |= DE_CFG_SERIALIZE_LFENCE(1 << 1); | |||
| 1240 | if (msr != nmsr) | |||
| 1241 | wrmsr(MSR_DE_CFG0xc0011029, nmsr); | |||
| 1242 | } | |||
| 1243 | if (family == 0x17 && ci->ci_model >= 0x31 && | |||
| 1244 | (cpu_ecxfeature & CPUIDECX_HV0x80000000) == 0) { | |||
| 1245 | nmsr = msr = rdmsr(MSR_DE_CFG0xc0011029); | |||
| 1246 | nmsr |= DE_CFG_SERIALIZE_9(1 << 9); | |||
| 1247 | if (msr != nmsr) | |||
| 1248 | wrmsr(MSR_DE_CFG0xc0011029, nmsr); | |||
| 1249 | } | |||
| 1250 | } | |||
| 1251 | ||||
| 1252 | #ifndef SMALL_KERNEL | |||
| 1253 | if (ci->ci_feature_sefflags_edx & SEFF0EDX_IBT0x00100000) { | |||
| 1254 | msr = rdmsr(MSR_S_CET0x6a2); | |||
| 1255 | wrmsr(MSR_S_CET0x6a2, msr | MSR_CET_ENDBR_EN(1 << 2)); | |||
| 1256 | lcr4(rcr4() | CR4_CET0x00800000); | |||
| 1257 | } | |||
| 1258 | #endif | |||
| 1259 | } | |||
| 1260 | ||||
| 1261 | void | |||
| 1262 | cpu_tsx_disable(struct cpu_info *ci) | |||
| 1263 | { | |||
| 1264 | uint64_t msr; | |||
| 1265 | uint32_t dummy, sefflags_edx; | |||
| 1266 | ||||
| 1267 | /* this runs before identifycpu() populates ci_feature_sefflags_edx */ | |||
| 1268 | if (cpuid_level < 0x07) | |||
| 1269 | return; | |||
| 1270 | CPUID_LEAF(0x7, 0, dummy, dummy, dummy, sefflags_edx)__asm volatile("cpuid" : "=a" (dummy), "=b" (dummy), "=c" (dummy ), "=d" (sefflags_edx) : "a" (0x7), "c" (0)); | |||
| 1271 | ||||
| 1272 | if (strcmp(cpu_vendor, "GenuineIntel") == 0 && | |||
| 1273 | (sefflags_edx & SEFF0EDX_ARCH_CAP0x20000000)) { | |||
| 1274 | msr = rdmsr(MSR_ARCH_CAPABILITIES0x10a); | |||
| 1275 | if (msr & ARCH_CAP_TSX_CTRL(1 << 7)) { | |||
| 1276 | msr = rdmsr(MSR_TSX_CTRL0x122); | |||
| 1277 | msr |= TSX_CTRL_RTM_DISABLE(1ULL << 0) | TSX_CTRL_TSX_CPUID_CLEAR(1ULL << 1); | |||
| 1278 | wrmsr(MSR_TSX_CTRL0x122, msr); | |||
| 1279 | } | |||
| 1280 | } | |||
| 1281 | } | |||
| 1282 | ||||
| 1283 | void | |||
| 1284 | patinit(struct cpu_info *ci) | |||
| 1285 | { | |||
| 1286 | extern int pmap_pg_wc; | |||
| 1287 | u_int64_t reg; | |||
| 1288 | ||||
| 1289 | if ((cpu_feature & CPUID_PAT0x00010000) == 0) | |||
| 1290 | return; | |||
| 1291 | /* | |||
| 1292 | * Set up PAT bits. | |||
| 1293 | * The default pat table is the following: | |||
| 1294 | * WB, WT, UC-, UC, WB, WT, UC-, UC | |||
| 1295 | * We change it to: | |||
| 1296 | * WB, WC, UC-, UC, WB, WC, UC-, UC | |||
| 1297 | * i.e change the WT bit to be WC. | |||
| 1298 | */ | |||
| 1299 | reg = PATENTRY(0, PAT_WB)(0x6UL << ((0) * 8)) | PATENTRY(1, PAT_WC)(0x1UL << ((1) * 8)) | | |||
| 1300 | PATENTRY(2, PAT_UCMINUS)(0x7UL << ((2) * 8)) | PATENTRY(3, PAT_UC)(0x0UL << ((3) * 8)) | | |||
| 1301 | PATENTRY(4, PAT_WB)(0x6UL << ((4) * 8)) | PATENTRY(5, PAT_WC)(0x1UL << ((5) * 8)) | | |||
| 1302 | PATENTRY(6, PAT_UCMINUS)(0x7UL << ((6) * 8)) | PATENTRY(7, PAT_UC)(0x0UL << ((7) * 8)); | |||
| 1303 | ||||
| 1304 | wrmsr(MSR_CR_PAT0x277, reg); | |||
| 1305 | pmap_pg_wc = PG_WC(0x0000000000000008UL); | |||
| 1306 | } | |||
| 1307 | ||||
| 1308 | struct timeout rdrand_tmo; | |||
| 1309 | void rdrand(void *); | |||
| 1310 | ||||
| 1311 | void | |||
| 1312 | rdrand(void *v) | |||
| 1313 | { | |||
| 1314 | struct timeout *tmo = v; | |||
| 1315 | extern int has_rdrand, has_rdseed; | |||
| 1316 | union { | |||
| 1317 | uint64_t u64; | |||
| 1318 | uint32_t u32[2]; | |||
| 1319 | } r, t; | |||
| 1320 | uint64_t tsc; | |||
| 1321 | uint8_t valid = 0; | |||
| 1322 | ||||
| 1323 | tsc = rdtsc(); | |||
| 1324 | if (has_rdseed) | |||
| 1325 | __asm volatile( | |||
| 1326 | "rdseed %0\n\t" | |||
| 1327 | "setc %1\n" | |||
| 1328 | : "=r" (r.u64), "=qm" (valid) ); | |||
| 1329 | if (has_rdrand && (has_rdseed == 0 || valid == 0)) | |||
| 1330 | __asm volatile( | |||
| 1331 | "rdrand %0\n\t" | |||
| 1332 | "setc %1\n" | |||
| 1333 | : "=r" (r.u64), "=qm" (valid) ); | |||
| 1334 | ||||
| 1335 | t.u64 = tsc; | |||
| 1336 | t.u64 ^= r.u64; | |||
| ||||
| 1337 | t.u64 ^= valid; /* potential rdrand empty */ | |||
| 1338 | if (has_rdrand) | |||
| 1339 | t.u64 += rdtsc(); /* potential vmexit latency */ | |||
| 1340 | ||||
| 1341 | enqueue_randomness(t.u32[0]); | |||
| 1342 | enqueue_randomness(t.u32[1]); | |||
| 1343 | ||||
| 1344 | if (tmo) | |||
| 1345 | timeout_add_msec(tmo, 10); | |||
| 1346 | } | |||
| 1347 | ||||
| 1348 | int | |||
| 1349 | cpu_activate(struct device *self, int act) | |||
| 1350 | { | |||
| 1351 | struct cpu_softc *sc = (struct cpu_softc *)self; | |||
| 1352 | ||||
| 1353 | switch (act) { | |||
| ||||
| 1354 | case DVACT_RESUME4: | |||
| 1355 | if (sc->sc_info->ci_cpuid == 0) | |||
| 1356 | rdrand(NULL((void *)0)); | |||
| 1357 | #if NPCTR1 > 0 | |||
| 1358 | pctr_resume(sc->sc_info); | |||
| 1359 | #endif | |||
| 1360 | break; | |||
| 1361 | } | |||
| 1362 | ||||
| 1363 | return (0); | |||
| 1364 | } | |||
| 1365 | ||||
| 1366 | /* | |||
| 1367 | * cpu_enter_pages | |||
| 1368 | * | |||
| 1369 | * Requests mapping of various special pages required in the Intel Meltdown | |||
| 1370 | * case (to be entered into the U-K page table): | |||
| 1371 | * | |||
| 1372 | * 1 tss+gdt page for each CPU | |||
| 1373 | * 1 trampoline stack page for each CPU | |||
| 1374 | * | |||
| 1375 | * The cpu_info_full struct for each CPU straddles these pages. The offset into | |||
| 1376 | * 'cif' is calculated below, for each page. For more information, consult | |||
| 1377 | * the definition of struct cpu_info_full in cpu_full.h | |||
| 1378 | * | |||
| 1379 | * On CPUs unaffected by Meltdown, this function still configures 'cif' but | |||
| 1380 | * the calls to pmap_enter_special become no-ops. | |||
| 1381 | * | |||
| 1382 | * Parameters: | |||
| 1383 | * cif : the cpu_info_full structure describing a CPU whose pages are to be | |||
| 1384 | * entered into the special meltdown U-K page table. | |||
| 1385 | */ | |||
| 1386 | void | |||
| 1387 | cpu_enter_pages(struct cpu_info_full *cif) | |||
| 1388 | { | |||
| 1389 | vaddr_t va; | |||
| 1390 | paddr_t pa; | |||
| 1391 | ||||
| 1392 | /* The TSS+GDT need to be readable */ | |||
| 1393 | va = (vaddr_t)cif; | |||
| 1394 | pmap_extract(pmap_kernel()(&kernel_pmap_store), va, &pa); | |||
| 1395 | pmap_enter_special(va, pa, PROT_READ0x01); | |||
| 1396 | DPRINTF("%s: entered tss+gdt page at va 0x%llx pa 0x%llx\n", __func__, | |||
| 1397 | (uint64_t)va, (uint64_t)pa); | |||
| 1398 | ||||
| 1399 | /* The trampoline stack page needs to be read/write */ | |||
| 1400 | va = (vaddr_t)&cif->cif_tramp_stack; | |||
| 1401 | pmap_extract(pmap_kernel()(&kernel_pmap_store), va, &pa); | |||
| 1402 | pmap_enter_special(va, pa, PROT_READ0x01 | PROT_WRITE0x02); | |||
| 1403 | DPRINTF("%s: entered t.stack page at va 0x%llx pa 0x%llx\n", __func__, | |||
| 1404 | (uint64_t)va, (uint64_t)pa); | |||
| 1405 | ||||
| 1406 | cif->cif_tsscif_RO.u_tssgdt.uu_tss.tss_rsp0 = va + sizeof(cif->cif_tramp_stack) - 16; | |||
| 1407 | DPRINTF("%s: cif_tss.tss_rsp0 = 0x%llx\n" ,__func__, | |||
| 1408 | (uint64_t)cif->cif_tss.tss_rsp0); | |||
| 1409 | cif->cif_cpu.ci_intr_rsp = cif->cif_tsscif_RO.u_tssgdt.uu_tss.tss_rsp0 - | |||
| 1410 | sizeof(struct iretq_frame); | |||
| 1411 | ||||
| 1412 | #define SETUP_IST_SPECIAL_STACK(ist, cif, member)do { (cif)->cif_RO.u_tssgdt.uu_tss.tss_ist[(ist)] = (vaddr_t )&(cif)->member + sizeof((cif)->member) - 16; (cif) ->member[(sizeof(((cif)->member)) / sizeof(((cif)->member )[0])) - 2] = (int64_t)&(cif)->cif_cpu; } while (0) do { \ | |||
| 1413 | (cif)->cif_tsscif_RO.u_tssgdt.uu_tss.tss_ist[(ist)] = (vaddr_t)&(cif)->member + \ | |||
| 1414 | sizeof((cif)->member) - 16; \ | |||
| 1415 | (cif)->member[nitems((cif)->member)(sizeof(((cif)->member)) / sizeof(((cif)->member)[0])) - 2] = (int64_t)&(cif)->cif_cpu; \ | |||
| 1416 | } while (0) | |||
| 1417 | ||||
| 1418 | SETUP_IST_SPECIAL_STACK(0, cif, cif_dblflt_stack)do { (cif)->cif_RO.u_tssgdt.uu_tss.tss_ist[(0)] = (vaddr_t )&(cif)->cif_dblflt_stack + sizeof((cif)->cif_dblflt_stack ) - 16; (cif)->cif_dblflt_stack[(sizeof(((cif)->cif_dblflt_stack )) / sizeof(((cif)->cif_dblflt_stack)[0])) - 2] = (int64_t )&(cif)->cif_cpu; } while (0); | |||
| 1419 | SETUP_IST_SPECIAL_STACK(1, cif, cif_nmi_stack)do { (cif)->cif_RO.u_tssgdt.uu_tss.tss_ist[(1)] = (vaddr_t )&(cif)->cif_nmi_stack + sizeof((cif)->cif_nmi_stack ) - 16; (cif)->cif_nmi_stack[(sizeof(((cif)->cif_nmi_stack )) / sizeof(((cif)->cif_nmi_stack)[0])) - 2] = (int64_t)& (cif)->cif_cpu; } while (0); | |||
| 1420 | ||||
| 1421 | /* an empty iomap, by setting its offset to the TSS limit */ | |||
| 1422 | cif->cif_tsscif_RO.u_tssgdt.uu_tss.tss_iobase = sizeof(cif->cif_tsscif_RO.u_tssgdt.uu_tss); | |||
| 1423 | } | |||
| 1424 | ||||
| 1425 | #ifdef MULTIPROCESSOR1 | |||
| 1426 | int | |||
| 1427 | wbinvd_on_all_cpus(void) | |||
| 1428 | { | |||
| 1429 | x86_broadcast_ipi(X86_IPI_WBINVD0x00000400); | |||
| 1430 | wbinvd(); | |||
| 1431 | return 0; | |||
| 1432 | } | |||
| 1433 | #endif |