| File: | arch/amd64/amd64/cpu.c |
| Warning: | line 1200, column 8 Assigned value is garbage or undefined |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | /* $OpenBSD: cpu.c,v 1.154 2021/08/31 17:40:59 dv Exp $ */ | |||
| 2 | /* $NetBSD: cpu.c,v 1.1 2003/04/26 18:39:26 fvdl Exp $ */ | |||
| 3 | ||||
| 4 | /*- | |||
| 5 | * Copyright (c) 2000 The NetBSD Foundation, Inc. | |||
| 6 | * All rights reserved. | |||
| 7 | * | |||
| 8 | * This code is derived from software contributed to The NetBSD Foundation | |||
| 9 | * by RedBack Networks Inc. | |||
| 10 | * | |||
| 11 | * Author: Bill Sommerfeld | |||
| 12 | * | |||
| 13 | * Redistribution and use in source and binary forms, with or without | |||
| 14 | * modification, are permitted provided that the following conditions | |||
| 15 | * are met: | |||
| 16 | * 1. Redistributions of source code must retain the above copyright | |||
| 17 | * notice, this list of conditions and the following disclaimer. | |||
| 18 | * 2. Redistributions in binary form must reproduce the above copyright | |||
| 19 | * notice, this list of conditions and the following disclaimer in the | |||
| 20 | * documentation and/or other materials provided with the distribution. | |||
| 21 | * | |||
| 22 | * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS | |||
| 23 | * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED | |||
| 24 | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |||
| 25 | * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS | |||
| 26 | * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
| 27 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
| 28 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
| 29 | * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
| 30 | * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
| 31 | * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||
| 32 | * POSSIBILITY OF SUCH DAMAGE. | |||
| 33 | */ | |||
| 34 | ||||
| 35 | /* | |||
| 36 | * Copyright (c) 1999 Stefan Grefen | |||
| 37 | * | |||
| 38 | * Redistribution and use in source and binary forms, with or without | |||
| 39 | * modification, are permitted provided that the following conditions | |||
| 40 | * are met: | |||
| 41 | * 1. Redistributions of source code must retain the above copyright | |||
| 42 | * notice, this list of conditions and the following disclaimer. | |||
| 43 | * 2. Redistributions in binary form must reproduce the above copyright | |||
| 44 | * notice, this list of conditions and the following disclaimer in the | |||
| 45 | * documentation and/or other materials provided with the distribution. | |||
| 46 | * 3. All advertising materials mentioning features or use of this software | |||
| 47 | * must display the following acknowledgement: | |||
| 48 | * This product includes software developed by the NetBSD | |||
| 49 | * Foundation, Inc. and its contributors. | |||
| 50 | * 4. Neither the name of The NetBSD Foundation nor the names of its | |||
| 51 | * contributors may be used to endorse or promote products derived | |||
| 52 | * from this software without specific prior written permission. | |||
| 53 | * | |||
| 54 | * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND ANY | |||
| 55 | * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| 56 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| 57 | * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR AND CONTRIBUTORS BE LIABLE | |||
| 58 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| 59 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | |||
| 60 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | |||
| 61 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | |||
| 62 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | |||
| 63 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | |||
| 64 | * SUCH DAMAGE. | |||
| 65 | */ | |||
| 66 | ||||
| 67 | #include "lapic.h" | |||
| 68 | #include "ioapic.h" | |||
| 69 | #include "vmm.h" | |||
| 70 | #include "pctr.h" | |||
| 71 | #include "pvbus.h" | |||
| 72 | ||||
| 73 | #include <sys/param.h> | |||
| 74 | #include <sys/proc.h> | |||
| 75 | #include <sys/timeout.h> | |||
| 76 | #include <sys/systm.h> | |||
| 77 | #include <sys/device.h> | |||
| 78 | #include <sys/malloc.h> | |||
| 79 | #include <sys/memrange.h> | |||
| 80 | #include <sys/atomic.h> | |||
| 81 | #include <sys/user.h> | |||
| 82 | ||||
| 83 | #include <uvm/uvm_extern.h> | |||
| 84 | ||||
| 85 | #include <machine/codepatch.h> | |||
| 86 | #include <machine/cpu_full.h> | |||
| 87 | #include <machine/cpufunc.h> | |||
| 88 | #include <machine/cpuvar.h> | |||
| 89 | #include <machine/pmap.h> | |||
| 90 | #include <machine/vmparam.h> | |||
| 91 | #include <machine/mpbiosvar.h> | |||
| 92 | #include <machine/pcb.h> | |||
| 93 | #include <machine/specialreg.h> | |||
| 94 | #include <machine/segments.h> | |||
| 95 | #include <machine/gdt.h> | |||
| 96 | #include <machine/pio.h> | |||
| 97 | #include <machine/vmmvar.h> | |||
| 98 | ||||
| 99 | #if NLAPIC1 > 0 | |||
| 100 | #include <machine/i82489reg.h> | |||
| 101 | #include <machine/i82489var.h> | |||
| 102 | #endif | |||
| 103 | ||||
| 104 | #if NIOAPIC1 > 0 | |||
| 105 | #include <machine/i82093var.h> | |||
| 106 | #endif | |||
| 107 | ||||
| 108 | #if NPCTR1 > 0 | |||
| 109 | #include <machine/pctr.h> | |||
| 110 | #endif | |||
| 111 | ||||
| 112 | #if NPVBUS1 > 0 | |||
| 113 | #include <dev/pv/pvvar.h> | |||
| 114 | #endif | |||
| 115 | ||||
| 116 | #include <dev/ic/mc146818reg.h> | |||
| 117 | #include <amd64/isa/nvram.h> | |||
| 118 | #include <dev/isa/isareg.h> | |||
| 119 | ||||
| 120 | #ifdef HIBERNATE1 | |||
| 121 | #include <sys/hibernate.h> | |||
| 122 | #include <machine/hibernate.h> | |||
| 123 | #endif /* HIBERNATE */ | |||
| 124 | ||||
| 125 | /* #define CPU_DEBUG */ | |||
| 126 | ||||
| 127 | #ifdef CPU_DEBUG | |||
| 128 | #define DPRINTF(x...) do { printf(x); } while(0) | |||
| 129 | #else | |||
| 130 | #define DPRINTF(x...) | |||
| 131 | #endif /* CPU_DEBUG */ | |||
| 132 | ||||
| 133 | int cpu_match(struct device *, void *, void *); | |||
| 134 | void cpu_attach(struct device *, struct device *, void *); | |||
| 135 | int cpu_activate(struct device *, int); | |||
| 136 | void patinit(struct cpu_info *ci); | |||
| 137 | #if NVMM1 > 0 | |||
| 138 | void cpu_init_vmm(struct cpu_info *ci); | |||
| 139 | #endif /* NVMM > 0 */ | |||
| 140 | ||||
| 141 | struct cpu_softc { | |||
| 142 | struct device sc_dev; /* device tree glue */ | |||
| 143 | struct cpu_info *sc_info; /* pointer to CPU info */ | |||
| 144 | }; | |||
| 145 | ||||
| 146 | void replacesmap(void); | |||
| 147 | void replacemeltdown(void); | |||
| 148 | void replacemds(void); | |||
| 149 | ||||
| 150 | extern long _stac; | |||
| 151 | extern long _clac; | |||
| 152 | ||||
| 153 | void | |||
| 154 | replacesmap(void) | |||
| 155 | { | |||
| 156 | static int replacedone = 0; | |||
| 157 | int s; | |||
| 158 | ||||
| 159 | if (replacedone) | |||
| 160 | return; | |||
| 161 | replacedone = 1; | |||
| 162 | ||||
| 163 | s = splhigh()splraise(0xd); | |||
| 164 | ||||
| 165 | codepatch_replace(CPTAG_STAC1, &_stac, 3); | |||
| 166 | codepatch_replace(CPTAG_CLAC2, &_clac, 3); | |||
| 167 | ||||
| 168 | splx(s)spllower(s); | |||
| 169 | } | |||
| 170 | ||||
| 171 | void | |||
| 172 | replacemeltdown(void) | |||
| 173 | { | |||
| 174 | static int replacedone = 0; | |||
| 175 | struct cpu_info *ci = &cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))); | |||
| 176 | int swapgs_vuln = 0, s; | |||
| 177 | ||||
| 178 | if (replacedone) | |||
| 179 | return; | |||
| 180 | replacedone = 1; | |||
| 181 | ||||
| 182 | if (strcmp(cpu_vendor, "GenuineIntel") == 0) { | |||
| 183 | int family = ci->ci_family; | |||
| 184 | int model = ci->ci_model; | |||
| 185 | ||||
| 186 | swapgs_vuln = 1; | |||
| 187 | if (family == 0x6 && | |||
| 188 | (model == 0x37 || model == 0x4a || model == 0x4c || | |||
| 189 | model == 0x4d || model == 0x5a || model == 0x5d || | |||
| 190 | model == 0x6e || model == 0x65 || model == 0x75)) { | |||
| 191 | /* Silvermont, Airmont */ | |||
| 192 | swapgs_vuln = 0; | |||
| 193 | } else if (family == 0x6 && (model == 0x85 || model == 0x57)) { | |||
| 194 | /* KnightsLanding */ | |||
| 195 | swapgs_vuln = 0; | |||
| 196 | } | |||
| 197 | } | |||
| 198 | ||||
| 199 | s = splhigh()splraise(0xd); | |||
| 200 | if (!cpu_meltdown) | |||
| 201 | codepatch_nop(CPTAG_MELTDOWN_NOP6); | |||
| 202 | else { | |||
| 203 | extern long alltraps_kern_meltdown; | |||
| 204 | ||||
| 205 | /* eliminate conditional branch in alltraps */ | |||
| 206 | codepatch_jmp(CPTAG_MELTDOWN_ALLTRAPS7, &alltraps_kern_meltdown); | |||
| 207 | ||||
| 208 | /* enable reuse of PCID for U-K page tables */ | |||
| 209 | if (pmap_use_pcid) { | |||
| 210 | extern long _pcid_set_reuse; | |||
| 211 | DPRINTF("%s: codepatching PCID use", __func__); | |||
| 212 | codepatch_replace(CPTAG_PCID_SET_REUSE8, | |||
| 213 | &_pcid_set_reuse, PCID_SET_REUSE_SIZE12); | |||
| 214 | } | |||
| 215 | } | |||
| 216 | ||||
| 217 | /* | |||
| 218 | * CVE-2019-1125: if the CPU has SMAP and it's not vulnerable to | |||
| 219 | * Meltdown, then it's protected both from speculatively mis-skipping | |||
| 220 | * the swapgs during interrupts of userspace and from speculatively | |||
| 221 | * mis-taking a swapgs during interrupts while already in the kernel | |||
| 222 | * as the speculative path will fault from SMAP. Warning: enabling | |||
| 223 | * WRGSBASE would break this 'protection'. | |||
| 224 | * | |||
| 225 | * Otherwise, if the CPU's swapgs can't be speculated over and it | |||
| 226 | * _is_ vulnerable to Meltdown then the %cr3 change will serialize | |||
| 227 | * user->kern transitions, but we still need to mitigate the | |||
| 228 | * already-in-kernel cases. | |||
| 229 | */ | |||
| 230 | if (!cpu_meltdown && (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMAP0x00100000)) { | |||
| 231 | codepatch_nop(CPTAG_FENCE_SWAPGS_MIS_TAKEN11); | |||
| 232 | codepatch_nop(CPTAG_FENCE_NO_SAFE_SMAP12); | |||
| 233 | } else if (!swapgs_vuln && cpu_meltdown) { | |||
| 234 | codepatch_nop(CPTAG_FENCE_SWAPGS_MIS_TAKEN11); | |||
| 235 | } | |||
| 236 | splx(s)spllower(s); | |||
| 237 | } | |||
| 238 | ||||
| 239 | void | |||
| 240 | replacemds(void) | |||
| 241 | { | |||
| 242 | static int replacedone = 0; | |||
| 243 | extern long mds_handler_bdw, mds_handler_ivb, mds_handler_skl; | |||
| 244 | extern long mds_handler_skl_sse, mds_handler_skl_avx; | |||
| 245 | extern long mds_handler_silvermont, mds_handler_knights; | |||
| 246 | struct cpu_info *ci = &cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))); | |||
| 247 | CPU_INFO_ITERATORint cii; | |||
| 248 | void *handler = NULL((void *)0), *vmm_handler = NULL((void *)0); | |||
| 249 | const char *type; | |||
| 250 | int has_verw, s; | |||
| 251 | ||||
| 252 | /* ci_mds_tmp must be 32byte aligned for AVX instructions */ | |||
| 253 | CTASSERT((offsetof(struct cpu_info, ci_mds_tmp) -extern char _ctassert[((__builtin_offsetof(struct cpu_info, ci_mds_tmp ) - __builtin_offsetof(struct cpu_info, ci_dev)) % 32 == 0) ? 1 : -1 ] __attribute__((__unused__)) | |||
| 254 | offsetof(struct cpu_info, ci_PAGEALIGN)) % 32 == 0)extern char _ctassert[((__builtin_offsetof(struct cpu_info, ci_mds_tmp ) - __builtin_offsetof(struct cpu_info, ci_dev)) % 32 == 0) ? 1 : -1 ] __attribute__((__unused__)); | |||
| 255 | ||||
| 256 | if (replacedone) | |||
| 257 | return; | |||
| 258 | replacedone = 1; | |||
| 259 | ||||
| 260 | if (strcmp(cpu_vendor, "GenuineIntel") != 0 || | |||
| 261 | ((ci->ci_feature_sefflags_edx & SEFF0EDX_ARCH_CAP0x20000000) && | |||
| 262 | (rdmsr(MSR_ARCH_CAPABILITIES0x10a) & ARCH_CAPABILITIES_MDS_NO(1 << 5)))) { | |||
| 263 | /* Unaffected, nop out the handling code */ | |||
| 264 | has_verw = 0; | |||
| 265 | } else if (ci->ci_feature_sefflags_edx & SEFF0EDX_MD_CLEAR0x00000400) { | |||
| 266 | /* new firmware, use VERW */ | |||
| 267 | has_verw = 1; | |||
| 268 | } else { | |||
| 269 | int family = ci->ci_family; | |||
| 270 | int model = ci->ci_model; | |||
| 271 | int stepping = CPUID2STEPPING(ci->ci_signature)((ci->ci_signature) & 15); | |||
| 272 | ||||
| 273 | has_verw = 0; | |||
| 274 | if (family == 0x6 && | |||
| 275 | (model == 0x2e || model == 0x1e || model == 0x1f || | |||
| 276 | model == 0x1a || model == 0x2f || model == 0x25 || | |||
| 277 | model == 0x2c || model == 0x2d || model == 0x2a || | |||
| 278 | model == 0x3e || model == 0x3a)) { | |||
| 279 | /* Nehalem, SandyBridge, IvyBridge */ | |||
| 280 | handler = vmm_handler = &mds_handler_ivb; | |||
| 281 | type = "IvyBridge"; | |||
| 282 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 283 | ci->ci_mds_buf = malloc(672, M_DEVBUF2, | |||
| 284 | M_WAITOK0x0001); | |||
| 285 | memset(ci->ci_mds_buf, 0, 16)__builtin_memset((ci->ci_mds_buf), (0), (16)); | |||
| 286 | } | |||
| 287 | } else if (family == 0x6 && | |||
| 288 | (model == 0x3f || model == 0x3c || model == 0x45 || | |||
| 289 | model == 0x46 || model == 0x56 || model == 0x4f || | |||
| 290 | model == 0x47 || model == 0x3d)) { | |||
| 291 | /* Haswell and Broadwell */ | |||
| 292 | handler = vmm_handler = &mds_handler_bdw; | |||
| 293 | type = "Broadwell"; | |||
| 294 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 295 | ci->ci_mds_buf = malloc(1536, M_DEVBUF2, | |||
| 296 | M_WAITOK0x0001); | |||
| 297 | } | |||
| 298 | } else if (family == 0x6 && | |||
| 299 | ((model == 0x55 && stepping <= 5) || model == 0x4e || | |||
| 300 | model == 0x5e || (model == 0x8e && stepping <= 0xb) || | |||
| 301 | (model == 0x9e && stepping <= 0xc))) { | |||
| 302 | /* | |||
| 303 | * Skylake, KabyLake, CoffeeLake, WhiskeyLake, | |||
| 304 | * CascadeLake | |||
| 305 | */ | |||
| 306 | /* XXX mds_handler_skl_avx512 */ | |||
| 307 | if (xgetbv(0) & XCR0_AVX0x00000004) { | |||
| 308 | handler = &mds_handler_skl_avx; | |||
| 309 | type = "Skylake AVX"; | |||
| 310 | } else { | |||
| 311 | handler = &mds_handler_skl_sse; | |||
| 312 | type = "Skylake SSE"; | |||
| 313 | } | |||
| 314 | vmm_handler = &mds_handler_skl; | |||
| 315 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 316 | vaddr_t b64; | |||
| 317 | b64 = (vaddr_t)malloc(6 * 1024 + 64 + 63, | |||
| 318 | M_DEVBUF2, M_WAITOK0x0001); | |||
| 319 | ci->ci_mds_buf = (void *)((b64 + 63) & ~63); | |||
| 320 | memset(ci->ci_mds_buf, 0, 64)__builtin_memset((ci->ci_mds_buf), (0), (64)); | |||
| 321 | } | |||
| 322 | } else if (family == 0x6 && | |||
| 323 | (model == 0x37 || model == 0x4a || model == 0x4c || | |||
| 324 | model == 0x4d || model == 0x5a || model == 0x5d || | |||
| 325 | model == 0x6e || model == 0x65 || model == 0x75)) { | |||
| 326 | /* Silvermont, Airmont */ | |||
| 327 | handler = vmm_handler = &mds_handler_silvermont; | |||
| 328 | type = "Silvermont"; | |||
| 329 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 330 | ci->ci_mds_buf = malloc(256, M_DEVBUF2, | |||
| 331 | M_WAITOK0x0001); | |||
| 332 | memset(ci->ci_mds_buf, 0, 16)__builtin_memset((ci->ci_mds_buf), (0), (16)); | |||
| 333 | } | |||
| 334 | } else if (family == 0x6 && (model == 0x85 || model == 0x57)) { | |||
| 335 | handler = vmm_handler = &mds_handler_knights; | |||
| 336 | type = "KnightsLanding"; | |||
| 337 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 338 | vaddr_t b64; | |||
| 339 | b64 = (vaddr_t)malloc(1152 + 63, M_DEVBUF2, | |||
| 340 | M_WAITOK0x0001); | |||
| 341 | ci->ci_mds_buf = (void *)((b64 + 63) & ~63); | |||
| 342 | } | |||
| 343 | } | |||
| 344 | } | |||
| 345 | ||||
| 346 | if (handler != NULL((void *)0)) { | |||
| 347 | printf("cpu0: using %s MDS workaround%s\n", type, ""); | |||
| 348 | s = splhigh()splraise(0xd); | |||
| 349 | codepatch_call(CPTAG_MDS9, handler); | |||
| 350 | codepatch_call(CPTAG_MDS_VMM10, vmm_handler); | |||
| 351 | splx(s)spllower(s); | |||
| 352 | } else if (has_verw) { | |||
| 353 | /* The new firmware enhances L1D_FLUSH MSR to flush MDS too */ | |||
| 354 | if (cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))).ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr == 1) { | |||
| 355 | s = splhigh()splraise(0xd); | |||
| 356 | codepatch_nop(CPTAG_MDS_VMM10); | |||
| 357 | splx(s)spllower(s); | |||
| 358 | type = " (except on vmm entry)"; | |||
| 359 | } else { | |||
| 360 | type = ""; | |||
| 361 | } | |||
| 362 | printf("cpu0: using %s MDS workaround%s\n", "VERW", type); | |||
| 363 | } else { | |||
| 364 | s = splhigh()splraise(0xd); | |||
| 365 | codepatch_nop(CPTAG_MDS9); | |||
| 366 | codepatch_nop(CPTAG_MDS_VMM10); | |||
| 367 | splx(s)spllower(s); | |||
| 368 | } | |||
| 369 | } | |||
| 370 | ||||
| 371 | #ifdef MULTIPROCESSOR1 | |||
| 372 | int mp_cpu_start(struct cpu_info *); | |||
| 373 | void mp_cpu_start_cleanup(struct cpu_info *); | |||
| 374 | struct cpu_functions mp_cpu_funcs = { mp_cpu_start, NULL((void *)0), | |||
| 375 | mp_cpu_start_cleanup }; | |||
| 376 | #endif /* MULTIPROCESSOR */ | |||
| 377 | ||||
| 378 | struct cfattach cpu_ca = { | |||
| 379 | sizeof(struct cpu_softc), cpu_match, cpu_attach, NULL((void *)0), cpu_activate | |||
| 380 | }; | |||
| 381 | ||||
| 382 | struct cfdriver cpu_cd = { | |||
| 383 | NULL((void *)0), "cpu", DV_DULL | |||
| 384 | }; | |||
| 385 | ||||
| 386 | /* | |||
| 387 | * Statically-allocated CPU info for the primary CPU (or the only | |||
| 388 | * CPU, on uniprocessors). The CPU info list is initialized to | |||
| 389 | * point at it. | |||
| 390 | */ | |||
| 391 | struct cpu_info_full cpu_info_full_primary = { .cif_cpu = { .ci_self = &cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))) } }; | |||
| 392 | ||||
| 393 | struct cpu_info *cpu_info_list = &cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))); | |||
| 394 | ||||
| 395 | #ifdef MULTIPROCESSOR1 | |||
| 396 | /* | |||
| 397 | * Array of CPU info structures. Must be statically-allocated because | |||
| 398 | * curproc, etc. are used early. | |||
| 399 | */ | |||
| 400 | struct cpu_info *cpu_info[MAXCPUS64] = { &cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))) }; | |||
| 401 | ||||
| 402 | void cpu_hatch(void *); | |||
| 403 | void cpu_boot_secondary(struct cpu_info *ci); | |||
| 404 | void cpu_start_secondary(struct cpu_info *ci); | |||
| 405 | #endif | |||
| 406 | ||||
| 407 | int | |||
| 408 | cpu_match(struct device *parent, void *match, void *aux) | |||
| 409 | { | |||
| 410 | struct cfdata *cf = match; | |||
| 411 | struct cpu_attach_args *caa = aux; | |||
| 412 | ||||
| 413 | if (strcmp(caa->caa_name, cf->cf_driver->cd_name) != 0) | |||
| 414 | return 0; | |||
| 415 | ||||
| 416 | if (cf->cf_unit >= MAXCPUS64) | |||
| 417 | return 0; | |||
| 418 | ||||
| 419 | return 1; | |||
| 420 | } | |||
| 421 | ||||
| 422 | void cpu_idle_mwait_cycle(void); | |||
| 423 | void cpu_init_mwait(struct cpu_softc *); | |||
| 424 | ||||
| 425 | u_int cpu_mwait_size, cpu_mwait_states; | |||
| 426 | ||||
| 427 | void | |||
| 428 | cpu_idle_mwait_cycle(void) | |||
| 429 | { | |||
| 430 | struct cpu_info *ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self))); __ci;}); | |||
| 431 | ||||
| 432 | if ((read_rflags() & PSL_I0x00000200) == 0) | |||
| 433 | panic("idle with interrupts blocked!"); | |||
| 434 | ||||
| 435 | /* something already queued? */ | |||
| 436 | if (!cpu_is_idle(ci)((ci)->ci_schedstate.spc_whichqs == 0)) | |||
| 437 | return; | |||
| 438 | ||||
| 439 | /* | |||
| 440 | * About to idle; setting the MWAIT_IN_IDLE bit tells | |||
| 441 | * cpu_unidle() that it can't be a no-op and tells cpu_kick() | |||
| 442 | * that it doesn't need to use an IPI. We also set the | |||
| 443 | * MWAIT_KEEP_IDLING bit: those routines clear it to stop | |||
| 444 | * the mwait. Once they're set, we do a final check of the | |||
| 445 | * queue, in case another cpu called setrunqueue() and added | |||
| 446 | * something to the queue and called cpu_unidle() between | |||
| 447 | * the check in sched_idle() and here. | |||
| 448 | */ | |||
| 449 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_mwait, MWAIT_IDLING(0x1 | 0x2) | MWAIT_ONLY0x4); | |||
| 450 | if (cpu_is_idle(ci)((ci)->ci_schedstate.spc_whichqs == 0)) { | |||
| 451 | monitor(&ci->ci_mwait, 0, 0); | |||
| 452 | if ((ci->ci_mwait & MWAIT_IDLING(0x1 | 0x2)) == MWAIT_IDLING(0x1 | 0x2)) | |||
| 453 | mwait(0, 0); | |||
| 454 | } | |||
| 455 | ||||
| 456 | /* done idling; let cpu_kick() know that an IPI is required */ | |||
| 457 | atomic_clearbits_intx86_atomic_clearbits_u32(&ci->ci_mwait, MWAIT_IDLING(0x1 | 0x2)); | |||
| 458 | } | |||
| 459 | ||||
| 460 | void | |||
| 461 | cpu_init_mwait(struct cpu_softc *sc) | |||
| 462 | { | |||
| 463 | unsigned int smallest, largest, extensions, c_substates; | |||
| 464 | ||||
| 465 | if ((cpu_ecxfeature & CPUIDECX_MWAIT0x00000008) == 0 || cpuid_level < 0x5) | |||
| 466 | return; | |||
| 467 | ||||
| 468 | /* get the monitor granularity */ | |||
| 469 | CPUID(0x5, smallest, largest, extensions, cpu_mwait_states)__asm volatile("cpuid" : "=a" (smallest), "=b" (largest), "=c" (extensions), "=d" (cpu_mwait_states) : "a" (0x5)); | |||
| 470 | smallest &= 0xffff; | |||
| 471 | largest &= 0xffff; | |||
| 472 | ||||
| 473 | printf("%s: mwait min=%u, max=%u", sc->sc_dev.dv_xname, | |||
| 474 | smallest, largest); | |||
| 475 | if (extensions & 0x1) { | |||
| 476 | if (cpu_mwait_states > 0) { | |||
| 477 | c_substates = cpu_mwait_states; | |||
| 478 | printf(", C-substates=%u", 0xf & c_substates); | |||
| 479 | while ((c_substates >>= 4) > 0) | |||
| 480 | printf(".%u", 0xf & c_substates); | |||
| 481 | } | |||
| 482 | if (extensions & 0x2) | |||
| 483 | printf(", IBE"); | |||
| 484 | } else { | |||
| 485 | /* substates not supported, forge the default: just C1 */ | |||
| 486 | cpu_mwait_states = 1 << 4; | |||
| 487 | } | |||
| 488 | ||||
| 489 | /* paranoia: check the values */ | |||
| 490 | if (smallest < sizeof(int) || largest < smallest || | |||
| 491 | (largest & (sizeof(int)-1))) | |||
| 492 | printf(" (bogus)"); | |||
| 493 | else | |||
| 494 | cpu_mwait_size = largest; | |||
| 495 | printf("\n"); | |||
| 496 | ||||
| 497 | /* enable use of mwait; may be overridden by acpicpu later */ | |||
| 498 | if (cpu_mwait_size > 0) | |||
| 499 | cpu_idle_cycle_fcn = &cpu_idle_mwait_cycle; | |||
| 500 | } | |||
| 501 | ||||
| 502 | void | |||
| 503 | cpu_attach(struct device *parent, struct device *self, void *aux) | |||
| 504 | { | |||
| 505 | struct cpu_softc *sc = (void *) self; | |||
| 506 | struct cpu_attach_args *caa = aux; | |||
| 507 | struct cpu_info *ci; | |||
| 508 | #if defined(MULTIPROCESSOR1) | |||
| 509 | int cpunum = sc->sc_dev.dv_unit; | |||
| 510 | vaddr_t kstack; | |||
| 511 | struct pcb *pcb; | |||
| 512 | #endif | |||
| 513 | ||||
| 514 | /* | |||
| 515 | * If we're an Application Processor, allocate a cpu_info | |||
| 516 | * structure, otherwise use the primary's. | |||
| 517 | */ | |||
| 518 | if (caa->cpu_role == CPU_ROLE_AP2) { | |||
| 519 | struct cpu_info_full *cif; | |||
| 520 | ||||
| 521 | cif = km_alloc(sizeof *cif, &kv_any, &kp_zero, &kd_waitok); | |||
| 522 | ci = &cif->cif_cpu; | |||
| 523 | #if defined(MULTIPROCESSOR1) | |||
| 524 | ci->ci_tss = &cif->cif_tsscif_RO.u_tssgdt.uu_tss; | |||
| 525 | ci->ci_gdt = &cif->cif_gdtcif_RO.u_tssgdt.uu_gdt; | |||
| 526 | memcpy(ci->ci_gdt, cpu_info_primary.ci_gdt, GDT_SIZE)__builtin_memcpy((ci->ci_gdt), ((*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096*2 - __builtin_offsetof(struct cpu_info, ci_dev))).ci_gdt), (((6 << 3) + (1 << 4 )))); | |||
| 527 | cpu_enter_pages(cif); | |||
| 528 | if (cpu_info[cpunum] != NULL((void *)0)) | |||
| 529 | panic("cpu at apic id %d already attached?", cpunum); | |||
| 530 | cpu_info[cpunum] = ci; | |||
| 531 | #endif | |||
| 532 | #ifdef TRAPLOG | |||
| 533 | ci->ci_tlog_base = malloc(sizeof(struct tlog), | |||
| 534 | M_DEVBUF2, M_WAITOK0x0001); | |||
| 535 | #endif | |||
| 536 | } else { | |||
| 537 | ci = &cpu_info_primary(*(struct cpu_info *)((char *)&cpu_info_full_primary + 4096 *2 - __builtin_offsetof(struct cpu_info, ci_dev))); | |||
| 538 | #if defined(MULTIPROCESSOR1) | |||
| 539 | if (caa->cpu_apicid != lapic_cpu_number()) { | |||
| 540 | panic("%s: running cpu is at apic %d" | |||
| 541 | " instead of at expected %d", | |||
| 542 | sc->sc_dev.dv_xname, lapic_cpu_number(), caa->cpu_apicid); | |||
| 543 | } | |||
| 544 | #endif | |||
| 545 | } | |||
| 546 | ||||
| 547 | ci->ci_self = ci; | |||
| 548 | sc->sc_info = ci; | |||
| 549 | ||||
| 550 | ci->ci_dev = self; | |||
| 551 | ci->ci_apicid = caa->cpu_apicid; | |||
| 552 | ci->ci_acpi_proc_id = caa->cpu_acpi_proc_id; | |||
| 553 | #ifdef MULTIPROCESSOR1 | |||
| 554 | ci->ci_cpuid = cpunum; | |||
| 555 | #else | |||
| 556 | ci->ci_cpuid = 0; /* False for APs, but they're not used anyway */ | |||
| 557 | #endif | |||
| 558 | ci->ci_func = caa->cpu_func; | |||
| 559 | ci->ci_handled_intr_level = IPL_NONE0x0; | |||
| 560 | ||||
| 561 | #if defined(MULTIPROCESSOR1) | |||
| 562 | /* | |||
| 563 | * Allocate UPAGES contiguous pages for the idle PCB and stack. | |||
| 564 | */ | |||
| 565 | kstack = (vaddr_t)km_alloc(USPACE(6 * (1 << 12)), &kv_any, &kp_dirty, &kd_nowait); | |||
| 566 | if (kstack == 0) { | |||
| 567 | if (caa->cpu_role != CPU_ROLE_AP2) { | |||
| 568 | panic("cpu_attach: unable to allocate idle stack for" | |||
| 569 | " primary"); | |||
| 570 | } | |||
| 571 | printf("%s: unable to allocate idle stack\n", | |||
| 572 | sc->sc_dev.dv_xname); | |||
| 573 | return; | |||
| 574 | } | |||
| 575 | pcb = ci->ci_idle_pcb = (struct pcb *) kstack; | |||
| 576 | memset(pcb, 0, USPACE)__builtin_memset((pcb), (0), ((6 * (1 << 12)))); | |||
| 577 | ||||
| 578 | pcb->pcb_kstack = kstack + USPACE(6 * (1 << 12)) - 16; | |||
| 579 | pcb->pcb_rbp = pcb->pcb_rsp = kstack + USPACE(6 * (1 << 12)) - 16; | |||
| 580 | pcb->pcb_pmap = pmap_kernel()(&kernel_pmap_store); | |||
| 581 | pcb->pcb_cr3 = pcb->pcb_pmap->pm_pdirpa; | |||
| 582 | #endif | |||
| 583 | ||||
| 584 | /* further PCB init done later. */ | |||
| 585 | ||||
| 586 | printf(": "); | |||
| 587 | ||||
| 588 | switch (caa->cpu_role) { | |||
| 589 | case CPU_ROLE_SP0: | |||
| 590 | printf("(uniprocessor)\n"); | |||
| 591 | ci->ci_flags |= CPUF_PRESENT0x1000 | CPUF_SP0x0004 | CPUF_PRIMARY0x0008; | |||
| 592 | cpu_intr_init(ci); | |||
| 593 | #ifndef SMALL_KERNEL | |||
| 594 | cpu_ucode_apply(ci); | |||
| 595 | #endif | |||
| 596 | cpu_tsx_disable(ci); | |||
| 597 | identifycpu(ci); | |||
| 598 | #ifdef MTRR1 | |||
| 599 | mem_range_attach(); | |||
| 600 | #endif /* MTRR */ | |||
| 601 | /* XXX SP fpuinit(ci) is done earlier */ | |||
| 602 | cpu_init(ci); | |||
| 603 | cpu_init_mwait(sc); | |||
| 604 | break; | |||
| 605 | ||||
| 606 | case CPU_ROLE_BP1: | |||
| 607 | printf("apid %d (boot processor)\n", caa->cpu_apicid); | |||
| 608 | ci->ci_flags |= CPUF_PRESENT0x1000 | CPUF_BSP0x0001 | CPUF_PRIMARY0x0008; | |||
| 609 | cpu_intr_init(ci); | |||
| 610 | identifycpu(ci); | |||
| 611 | #ifdef MTRR1 | |||
| 612 | mem_range_attach(); | |||
| 613 | #endif /* MTRR */ | |||
| 614 | ||||
| 615 | #if NLAPIC1 > 0 | |||
| 616 | /* | |||
| 617 | * Enable local apic | |||
| 618 | */ | |||
| 619 | lapic_enable(); | |||
| 620 | lapic_calibrate_timer(ci); | |||
| 621 | #endif | |||
| 622 | /* XXX BP fpuinit(ci) is done earlier */ | |||
| 623 | cpu_init(ci); | |||
| 624 | ||||
| 625 | #if NIOAPIC1 > 0 | |||
| 626 | ioapic_bsp_id = caa->cpu_apicid; | |||
| 627 | #endif | |||
| 628 | cpu_init_mwait(sc); | |||
| 629 | break; | |||
| 630 | ||||
| 631 | case CPU_ROLE_AP2: | |||
| 632 | /* | |||
| 633 | * report on an AP | |||
| 634 | */ | |||
| 635 | printf("apid %d (application processor)\n", caa->cpu_apicid); | |||
| 636 | ||||
| 637 | #if defined(MULTIPROCESSOR1) | |||
| 638 | cpu_intr_init(ci); | |||
| 639 | cpu_start_secondary(ci); | |||
| 640 | sched_init_cpu(ci); | |||
| 641 | ncpus++; | |||
| 642 | if (ci->ci_flags & CPUF_PRESENT0x1000) { | |||
| 643 | ci->ci_next = cpu_info_list->ci_next; | |||
| 644 | cpu_info_list->ci_next = ci; | |||
| 645 | } | |||
| 646 | #else | |||
| 647 | printf("%s: not started\n", sc->sc_dev.dv_xname); | |||
| 648 | #endif | |||
| 649 | break; | |||
| 650 | ||||
| 651 | default: | |||
| 652 | panic("unknown processor type??"); | |||
| 653 | } | |||
| 654 | ||||
| 655 | #if defined(MULTIPROCESSOR1) | |||
| 656 | if (mp_verbose) { | |||
| 657 | printf("%s: kstack at 0x%lx for %d bytes\n", | |||
| 658 | sc->sc_dev.dv_xname, kstack, USPACE(6 * (1 << 12))); | |||
| 659 | printf("%s: idle pcb at %p, idle sp at 0x%llx\n", | |||
| 660 | sc->sc_dev.dv_xname, pcb, pcb->pcb_rsp); | |||
| 661 | } | |||
| 662 | #endif | |||
| 663 | #if NVMM1 > 0 | |||
| 664 | cpu_init_vmm(ci); | |||
| 665 | #endif /* NVMM > 0 */ | |||
| 666 | } | |||
| 667 | ||||
| 668 | static void | |||
| 669 | replacexsave(void) | |||
| 670 | { | |||
| 671 | extern long _xrstor, _xsave, _xsaveopt; | |||
| 672 | u_int32_t eax, ebx, ecx, edx; | |||
| 673 | static int replacedone = 0; | |||
| 674 | int s; | |||
| 675 | ||||
| 676 | if (replacedone) | |||
| 677 | return; | |||
| 678 | replacedone = 1; | |||
| 679 | ||||
| 680 | /* find out whether xsaveopt is supported */ | |||
| 681 | CPUID_LEAF(0xd, 1, eax, ebx, ecx, edx)__asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0xd), "c" (1)); | |||
| 682 | s = splhigh()splraise(0xd); | |||
| 683 | codepatch_replace(CPTAG_XRSTOR4, &_xrstor, 4); | |||
| 684 | codepatch_replace(CPTAG_XSAVE5, | |||
| 685 | (eax & XSAVE_XSAVEOPT0x1UL) ? &_xsaveopt : &_xsave, 4); | |||
| 686 | splx(s)spllower(s); | |||
| 687 | } | |||
| 688 | ||||
| 689 | ||||
| 690 | /* | |||
| 691 | * Initialize the processor appropriately. | |||
| 692 | */ | |||
| 693 | ||||
| 694 | void | |||
| 695 | cpu_init(struct cpu_info *ci) | |||
| 696 | { | |||
| 697 | struct savefpu *sfp; | |||
| 698 | u_int cr4; | |||
| 699 | ||||
| 700 | /* configure the CPU if needed */ | |||
| 701 | if (ci->cpu_setup != NULL((void *)0)) | |||
| 702 | (*ci->cpu_setup)(ci); | |||
| 703 | ||||
| 704 | cr4 = rcr4() | CR4_DEFAULT(0x00000020|0x00000080|0x00000010|0x00000200|0x00000400); | |||
| 705 | if (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMEP0x00000080) | |||
| 706 | cr4 |= CR4_SMEP0x00100000; | |||
| 707 | if (ci->ci_feature_sefflags_ebx & SEFF0EBX_SMAP0x00100000) | |||
| 708 | cr4 |= CR4_SMAP0x00200000; | |||
| 709 | if (ci->ci_feature_sefflags_ecx & SEFF0ECX_UMIP0x00000004) | |||
| 710 | cr4 |= CR4_UMIP0x00000800; | |||
| 711 | if ((cpu_ecxfeature & CPUIDECX_XSAVE0x04000000) && cpuid_level >= 0xd) | |||
| 712 | cr4 |= CR4_OSXSAVE0x00040000; | |||
| 713 | if (pmap_use_pcid) | |||
| 714 | cr4 |= CR4_PCIDE0x00020000; | |||
| 715 | lcr4(cr4); | |||
| 716 | ||||
| 717 | if ((cpu_ecxfeature & CPUIDECX_XSAVE0x04000000) && cpuid_level >= 0xd) { | |||
| 718 | u_int32_t eax, ebx, ecx, edx; | |||
| 719 | ||||
| 720 | xsave_mask = XCR0_X870x00000001 | XCR0_SSE0x00000002; | |||
| 721 | CPUID_LEAF(0xd, 0, eax, ebx, ecx, edx)__asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0xd), "c" (0)); | |||
| 722 | if (eax & XCR0_AVX0x00000004) | |||
| 723 | xsave_mask |= XCR0_AVX0x00000004; | |||
| 724 | xsetbv(0, xsave_mask); | |||
| 725 | CPUID_LEAF(0xd, 0, eax, ebx, ecx, edx)__asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (0xd), "c" (0)); | |||
| 726 | if (CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008)) { | |||
| 727 | fpu_save_len = ebx; | |||
| 728 | KASSERT(fpu_save_len <= sizeof(struct savefpu))((fpu_save_len <= sizeof(struct savefpu)) ? (void)0 : __assert ("diagnostic ", "/usr/src/sys/arch/amd64/amd64/cpu.c", 728, "fpu_save_len <= sizeof(struct savefpu)" )); | |||
| 729 | } else { | |||
| 730 | KASSERT(ebx == fpu_save_len)((ebx == fpu_save_len) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/cpu.c" , 730, "ebx == fpu_save_len")); | |||
| 731 | } | |||
| 732 | ||||
| 733 | replacexsave(); | |||
| 734 | } | |||
| 735 | ||||
| 736 | /* Give proc0 a clean FPU save area */ | |||
| 737 | sfp = &proc0.p_addr->u_pcb.pcb_savefpu; | |||
| 738 | memset(sfp, 0, fpu_save_len)__builtin_memset((sfp), (0), (fpu_save_len)); | |||
| 739 | sfp->fp_fxsave.fx_fcw = __INITIAL_NPXCW__0x037f; | |||
| 740 | sfp->fp_fxsave.fx_mxcsr = __INITIAL_MXCSR__0x1f80; | |||
| 741 | fpureset()xrstor_user(&proc0.p_addr->u_pcb.pcb_savefpu, xsave_mask ); | |||
| 742 | if (xsave_mask) { | |||
| 743 | /* must not use xsaveopt here */ | |||
| 744 | xsave(sfp, xsave_mask); | |||
| 745 | } else | |||
| 746 | fxsave(sfp)__asm("fxsave %0" : "=m" (*sfp)); | |||
| 747 | ||||
| 748 | #if NVMM1 > 0 | |||
| 749 | /* Re-enable VMM if needed */ | |||
| 750 | if (ci->ci_flags & CPUF_VMM0x20000) | |||
| 751 | start_vmm_on_cpu(ci); | |||
| 752 | #endif /* NVMM > 0 */ | |||
| 753 | ||||
| 754 | #ifdef MULTIPROCESSOR1 | |||
| 755 | ci->ci_flags |= CPUF_RUNNING0x2000; | |||
| 756 | /* | |||
| 757 | * Big hammer: flush all TLB entries, including ones from PTEs | |||
| 758 | * with the G bit set. This should only be necessary if TLB | |||
| 759 | * shootdown falls far behind. | |||
| 760 | */ | |||
| 761 | cr4 = rcr4(); | |||
| 762 | lcr4(cr4 & ~CR4_PGE0x00000080); | |||
| 763 | lcr4(cr4); | |||
| 764 | ||||
| 765 | /* Synchronize TSC */ | |||
| 766 | if (cold && !CPU_IS_PRIMARY(ci)((ci)->ci_flags & 0x0008)) | |||
| 767 | tsc_sync_ap(ci); | |||
| 768 | #endif | |||
| 769 | } | |||
| 770 | ||||
| 771 | #if NVMM1 > 0 | |||
| 772 | /* | |||
| 773 | * cpu_init_vmm | |||
| 774 | * | |||
| 775 | * Initializes per-cpu VMM state | |||
| 776 | * | |||
| 777 | * Parameters: | |||
| 778 | * ci: the cpu for which state is being initialized | |||
| 779 | */ | |||
| 780 | void | |||
| 781 | cpu_init_vmm(struct cpu_info *ci) | |||
| 782 | { | |||
| 783 | /* | |||
| 784 | * Allocate a per-cpu VMXON region for VMX CPUs | |||
| 785 | */ | |||
| 786 | if (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0)) { | |||
| 787 | ci->ci_vmxon_region = (struct vmxon_region *)malloc(PAGE_SIZE(1 << 12), | |||
| 788 | M_DEVBUF2, M_WAITOK0x0001 | M_ZERO0x0008); | |||
| 789 | if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), (vaddr_t)ci->ci_vmxon_region, | |||
| 790 | &ci->ci_vmxon_region_pa)) | |||
| 791 | panic("Can't locate VMXON region in phys mem"); | |||
| 792 | ci->ci_vmcs_pa = VMX_VMCS_PA_CLEAR0xFFFFFFFFFFFFFFFFUL; | |||
| 793 | rw_init(&ci->ci_vmcs_lock, "vmcslock")_rw_init_flags(&ci->ci_vmcs_lock, "vmcslock", 0, ((void *)0)); | |||
| 794 | } | |||
| 795 | } | |||
| 796 | #endif /* NVMM > 0 */ | |||
| 797 | ||||
| 798 | #ifdef MULTIPROCESSOR1 | |||
| 799 | void | |||
| 800 | cpu_boot_secondary_processors(void) | |||
| 801 | { | |||
| 802 | struct cpu_info *ci; | |||
| 803 | u_long i; | |||
| 804 | ||||
| 805 | for (i=0; i < MAXCPUS64; i++) { | |||
| 806 | ci = cpu_info[i]; | |||
| 807 | if (ci == NULL((void *)0)) | |||
| 808 | continue; | |||
| 809 | if (ci->ci_idle_pcb == NULL((void *)0)) | |||
| 810 | continue; | |||
| 811 | if ((ci->ci_flags & CPUF_PRESENT0x1000) == 0) | |||
| 812 | continue; | |||
| 813 | if (ci->ci_flags & (CPUF_BSP0x0001 | CPUF_SP0x0004 | CPUF_PRIMARY0x0008)) | |||
| 814 | continue; | |||
| 815 | ci->ci_randseed = (arc4random() & 0x7fffffff) + 1; | |||
| 816 | cpu_boot_secondary(ci); | |||
| 817 | } | |||
| 818 | } | |||
| 819 | ||||
| 820 | void | |||
| 821 | cpu_start_secondary(struct cpu_info *ci) | |||
| 822 | { | |||
| 823 | int i; | |||
| 824 | u_long s; | |||
| 825 | ||||
| 826 | ci->ci_flags |= CPUF_AP0x0002; | |||
| 827 | ||||
| 828 | pmap_kenter_pa(MP_TRAMPOLINE(16 * (1 << 12)), MP_TRAMPOLINE(16 * (1 << 12)), PROT_READ0x01 | PROT_EXEC0x04); | |||
| 829 | pmap_kenter_pa(MP_TRAMP_DATA(17 * (1 << 12)), MP_TRAMP_DATA(17 * (1 << 12)), PROT_READ0x01 | PROT_WRITE0x02); | |||
| 830 | ||||
| 831 | CPU_STARTUP(ci)((ci)->ci_func->start(ci)); | |||
| 832 | ||||
| 833 | /* | |||
| 834 | * wait for it to become ready | |||
| 835 | */ | |||
| 836 | for (i = 100000; (!(ci->ci_flags & CPUF_PRESENT0x1000)) && i>0;i--) { | |||
| 837 | delay(10)(*delay_func)(10); | |||
| 838 | } | |||
| 839 | if (! (ci->ci_flags & CPUF_PRESENT0x1000)) { | |||
| 840 | printf("%s: failed to become ready\n", ci->ci_dev->dv_xname); | |||
| 841 | #if defined(MPDEBUG) && defined(DDB1) | |||
| 842 | printf("dropping into debugger; continue from here to resume boot\n"); | |||
| 843 | db_enter(); | |||
| 844 | #endif | |||
| 845 | } else { | |||
| 846 | /* | |||
| 847 | * Synchronize time stamp counters. Invalidate cache and | |||
| 848 | * synchronize twice (in tsc_sync_bp) to minimize possible | |||
| 849 | * cache effects. Disable interrupts to try and rule out any | |||
| 850 | * external interference. | |||
| 851 | */ | |||
| 852 | s = intr_disable(); | |||
| 853 | wbinvd(); | |||
| 854 | tsc_sync_bp(ci); | |||
| 855 | intr_restore(s); | |||
| 856 | #ifdef TSC_DEBUG | |||
| 857 | printf("TSC skew=%lld\n", (long long)ci->ci_tsc_skew); | |||
| 858 | #endif | |||
| 859 | } | |||
| 860 | ||||
| 861 | if ((ci->ci_flags & CPUF_IDENTIFIED0x0020) == 0) { | |||
| 862 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_flags, CPUF_IDENTIFY0x0010); | |||
| 863 | ||||
| 864 | /* wait for it to identify */ | |||
| 865 | for (i = 2000000; (ci->ci_flags & CPUF_IDENTIFY0x0010) && i > 0; i--) | |||
| 866 | delay(10)(*delay_func)(10); | |||
| 867 | ||||
| 868 | if (ci->ci_flags & CPUF_IDENTIFY0x0010) | |||
| 869 | printf("%s: failed to identify\n", | |||
| 870 | ci->ci_dev->dv_xname); | |||
| 871 | } | |||
| 872 | ||||
| 873 | CPU_START_CLEANUP(ci)((ci)->ci_func->cleanup(ci)); | |||
| 874 | ||||
| 875 | pmap_kremove(MP_TRAMPOLINE(16 * (1 << 12)), PAGE_SIZE(1 << 12)); | |||
| 876 | pmap_kremove(MP_TRAMP_DATA(17 * (1 << 12)), PAGE_SIZE(1 << 12)); | |||
| 877 | } | |||
| 878 | ||||
| 879 | void | |||
| 880 | cpu_boot_secondary(struct cpu_info *ci) | |||
| 881 | { | |||
| 882 | int i; | |||
| 883 | int64_t drift; | |||
| 884 | u_long s; | |||
| 885 | ||||
| 886 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_flags, CPUF_GO0x8000); | |||
| 887 | ||||
| 888 | for (i = 100000; (!(ci->ci_flags & CPUF_RUNNING0x2000)) && i>0;i--) { | |||
| 889 | delay(10)(*delay_func)(10); | |||
| 890 | } | |||
| 891 | if (! (ci->ci_flags & CPUF_RUNNING0x2000)) { | |||
| 892 | printf("cpu failed to start\n"); | |||
| 893 | #if defined(MPDEBUG) && defined(DDB1) | |||
| 894 | printf("dropping into debugger; continue from here to resume boot\n"); | |||
| 895 | db_enter(); | |||
| 896 | #endif | |||
| 897 | } else if (cold) { | |||
| 898 | /* Synchronize TSC again, check for drift. */ | |||
| 899 | drift = ci->ci_tsc_skew; | |||
| 900 | s = intr_disable(); | |||
| 901 | wbinvd(); | |||
| 902 | tsc_sync_bp(ci); | |||
| 903 | intr_restore(s); | |||
| 904 | drift -= ci->ci_tsc_skew; | |||
| 905 | #ifdef TSC_DEBUG | |||
| 906 | printf("TSC skew=%lld drift=%lld\n", | |||
| 907 | (long long)ci->ci_tsc_skew, (long long)drift); | |||
| 908 | #endif | |||
| 909 | tsc_sync_drift(drift); | |||
| 910 | } | |||
| 911 | } | |||
| 912 | ||||
| 913 | /* | |||
| 914 | * The CPU ends up here when it's ready to run | |||
| 915 | * This is called from code in mptramp.s; at this point, we are running | |||
| 916 | * in the idle pcb/idle stack of the new cpu. When this function returns, | |||
| 917 | * this processor will enter the idle loop and start looking for work. | |||
| 918 | * | |||
| 919 | * XXX should share some of this with init386 in machdep.c | |||
| 920 | */ | |||
| 921 | void | |||
| 922 | cpu_hatch(void *v) | |||
| 923 | { | |||
| 924 | struct cpu_info *ci = (struct cpu_info *)v; | |||
| 925 | int s; | |||
| 926 | ||||
| 927 | cpu_init_msrs(ci); | |||
| 928 | ||||
| 929 | #ifdef DEBUG | |||
| 930 | if (ci->ci_flags & CPUF_PRESENT0x1000) | |||
| 931 | panic("%s: already running!?", ci->ci_dev->dv_xname); | |||
| 932 | #endif | |||
| 933 | ||||
| 934 | /* | |||
| 935 | * Synchronize the TSC for the first time. Note that interrupts are | |||
| 936 | * off at this point. | |||
| 937 | */ | |||
| 938 | wbinvd(); | |||
| 939 | ci->ci_flags |= CPUF_PRESENT0x1000; | |||
| 940 | ci->ci_tsc_skew = 0; /* reset on resume */ | |||
| 941 | tsc_sync_ap(ci); | |||
| 942 | ||||
| 943 | lapic_enable(); | |||
| 944 | lapic_startclock(); | |||
| 945 | cpu_ucode_apply(ci); | |||
| 946 | cpu_tsx_disable(ci); | |||
| 947 | ||||
| 948 | if ((ci->ci_flags & CPUF_IDENTIFIED0x0020) == 0) { | |||
| 949 | /* | |||
| 950 | * We need to wait until we can identify, otherwise dmesg | |||
| 951 | * output will be messy. | |||
| 952 | */ | |||
| 953 | while ((ci->ci_flags & CPUF_IDENTIFY0x0010) == 0) | |||
| 954 | delay(10)(*delay_func)(10); | |||
| 955 | ||||
| 956 | identifycpu(ci); | |||
| 957 | ||||
| 958 | /* Signal we're done */ | |||
| 959 | atomic_clearbits_intx86_atomic_clearbits_u32(&ci->ci_flags, CPUF_IDENTIFY0x0010); | |||
| 960 | /* Prevent identifycpu() from running again */ | |||
| 961 | atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_flags, CPUF_IDENTIFIED0x0020); | |||
| 962 | } | |||
| 963 | ||||
| 964 | while ((ci->ci_flags & CPUF_GO0x8000) == 0) | |||
| 965 | delay(10)(*delay_func)(10); | |||
| 966 | #ifdef HIBERNATE1 | |||
| 967 | if ((ci->ci_flags & CPUF_PARK0x10000) != 0) { | |||
| 968 | atomic_clearbits_intx86_atomic_clearbits_u32(&ci->ci_flags, CPUF_PARK0x10000); | |||
| 969 | hibernate_drop_to_real_mode(); | |||
| 970 | } | |||
| 971 | #endif /* HIBERNATE */ | |||
| 972 | ||||
| 973 | #ifdef DEBUG | |||
| 974 | if (ci->ci_flags & CPUF_RUNNING0x2000) | |||
| 975 | panic("%s: already running!?", ci->ci_dev->dv_xname); | |||
| 976 | #endif | |||
| 977 | ||||
| 978 | cpu_init_idt(); | |||
| 979 | lapic_set_lvt(); | |||
| 980 | gdt_init_cpu(ci); | |||
| 981 | fpuinit(ci); | |||
| 982 | ||||
| 983 | lldt(0); | |||
| 984 | ||||
| 985 | cpu_init(ci); | |||
| 986 | #if NPVBUS1 > 0 | |||
| 987 | pvbus_init_cpu(); | |||
| 988 | #endif | |||
| 989 | ||||
| 990 | /* Re-initialise memory range handling on AP */ | |||
| 991 | if (mem_range_softc.mr_op != NULL((void *)0)) | |||
| 992 | mem_range_softc.mr_op->initAP(&mem_range_softc); | |||
| 993 | ||||
| 994 | s = splhigh()splraise(0xd); | |||
| 995 | lcr8(0); | |||
| 996 | intr_enable(); | |||
| 997 | ||||
| 998 | nanouptime(&ci->ci_schedstate.spc_runtime); | |||
| 999 | splx(s)spllower(s); | |||
| 1000 | ||||
| 1001 | SCHED_LOCK(s)do { s = splraise(0xc); __mp_lock(&sched_lock); } while ( 0); | |||
| 1002 | cpu_switchto(NULL((void *)0), sched_chooseproc()); | |||
| 1003 | } | |||
| 1004 | ||||
| 1005 | #if defined(DDB1) | |||
| 1006 | ||||
| 1007 | #include <ddb/db_output.h> | |||
| 1008 | #include <machine/db_machdep.h> | |||
| 1009 | ||||
| 1010 | /* | |||
| 1011 | * Dump cpu information from ddb. | |||
| 1012 | */ | |||
| 1013 | void | |||
| 1014 | cpu_debug_dump(void) | |||
| 1015 | { | |||
| 1016 | struct cpu_info *ci; | |||
| 1017 | CPU_INFO_ITERATORint cii; | |||
| 1018 | ||||
| 1019 | db_printf("addr dev id flags ipis curproc\n"); | |||
| 1020 | CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci-> ci_next) { | |||
| 1021 | db_printf("%p %s %u %x %x %10p\n", | |||
| 1022 | ci, | |||
| 1023 | ci->ci_dev == NULL((void *)0) ? "BOOT" : ci->ci_dev->dv_xname, | |||
| 1024 | ci->ci_cpuid, | |||
| 1025 | ci->ci_flags, ci->ci_ipis, | |||
| 1026 | ci->ci_curproc); | |||
| 1027 | } | |||
| 1028 | } | |||
| 1029 | #endif | |||
| 1030 | ||||
| 1031 | int | |||
| 1032 | mp_cpu_start(struct cpu_info *ci) | |||
| 1033 | { | |||
| 1034 | unsigned short dwordptr[2]; | |||
| 1035 | ||||
| 1036 | /* | |||
| 1037 | * "The BSP must initialize CMOS shutdown code to 0Ah ..." | |||
| 1038 | */ | |||
| 1039 | ||||
| 1040 | outb(IO_RTC, NVRAM_RESET)( (__builtin_constant_p((0x070)) && (0x070) < 0x100 ) ? __outbc(0x070, (0xe + 1)) : __outb(0x070, (0xe + 1))); | |||
| 1041 | outb(IO_RTC+1, NVRAM_RESET_JUMP)( (__builtin_constant_p((0x070 +1)) && (0x070 +1) < 0x100) ? __outbc(0x070 +1, 0x0a) : __outb(0x070 +1, 0x0a)); | |||
| 1042 | ||||
| 1043 | /* | |||
| 1044 | * "and the warm reset vector (DWORD based at 40:67) to point | |||
| 1045 | * to the AP startup code ..." | |||
| 1046 | */ | |||
| 1047 | ||||
| 1048 | dwordptr[0] = 0; | |||
| 1049 | dwordptr[1] = MP_TRAMPOLINE(16 * (1 << 12)) >> 4; | |||
| 1050 | ||||
| 1051 | pmap_kenter_pa(0, 0, PROT_READ0x01 | PROT_WRITE0x02); | |||
| 1052 | memcpy((u_int8_t *) 0x467, dwordptr, 4)__builtin_memcpy(((u_int8_t *) 0x467), (dwordptr), (4)); | |||
| 1053 | pmap_kremove(0, PAGE_SIZE(1 << 12)); | |||
| 1054 | ||||
| 1055 | #if NLAPIC1 > 0 | |||
| 1056 | /* | |||
| 1057 | * ... prior to executing the following sequence:" | |||
| 1058 | */ | |||
| 1059 | ||||
| 1060 | if (ci->ci_flags & CPUF_AP0x0002) { | |||
| 1061 | x86_ipi_init(ci->ci_apicid); | |||
| 1062 | ||||
| 1063 | delay(10000)(*delay_func)(10000); | |||
| 1064 | ||||
| 1065 | if (cpu_feature & CPUID_APIC0x00000200) { | |||
| 1066 | x86_ipi(MP_TRAMPOLINE(16 * (1 << 12))/PAGE_SIZE(1 << 12), ci->ci_apicid, | |||
| 1067 | LAPIC_DLMODE_STARTUP0x00000600); | |||
| 1068 | delay(200)(*delay_func)(200); | |||
| 1069 | ||||
| 1070 | x86_ipi(MP_TRAMPOLINE(16 * (1 << 12))/PAGE_SIZE(1 << 12), ci->ci_apicid, | |||
| 1071 | LAPIC_DLMODE_STARTUP0x00000600); | |||
| 1072 | delay(200)(*delay_func)(200); | |||
| 1073 | } | |||
| 1074 | } | |||
| 1075 | #endif | |||
| 1076 | return 0; | |||
| 1077 | } | |||
| 1078 | ||||
| 1079 | void | |||
| 1080 | mp_cpu_start_cleanup(struct cpu_info *ci) | |||
| 1081 | { | |||
| 1082 | /* | |||
| 1083 | * Ensure the NVRAM reset byte contains something vaguely sane. | |||
| 1084 | */ | |||
| 1085 | ||||
| 1086 | outb(IO_RTC, NVRAM_RESET)( (__builtin_constant_p((0x070)) && (0x070) < 0x100 ) ? __outbc(0x070, (0xe + 1)) : __outb(0x070, (0xe + 1))); | |||
| 1087 | outb(IO_RTC+1, NVRAM_RESET_RST)( (__builtin_constant_p((0x070 +1)) && (0x070 +1) < 0x100) ? __outbc(0x070 +1, 0x00) : __outb(0x070 +1, 0x00)); | |||
| 1088 | } | |||
| 1089 | #endif /* MULTIPROCESSOR */ | |||
| 1090 | ||||
| 1091 | typedef void (vector)(void); | |||
| 1092 | extern vector Xsyscall_meltdown, Xsyscall, Xsyscall32; | |||
| 1093 | ||||
| 1094 | void | |||
| 1095 | cpu_init_msrs(struct cpu_info *ci) | |||
| 1096 | { | |||
| 1097 | uint64_t msr; | |||
| 1098 | int family; | |||
| 1099 | ||||
| 1100 | wrmsr(MSR_STAR0xc0000081, | |||
| 1101 | ((uint64_t)GSEL(GCODE_SEL, SEL_KPL)(((1) << 3) | 0) << 32) | | |||
| 1102 | ((uint64_t)GSEL(GUCODE32_SEL, SEL_UPL)(((3) << 3) | 3) << 48)); | |||
| 1103 | wrmsr(MSR_LSTAR0xc0000082, cpu_meltdown ? (uint64_t)Xsyscall_meltdown : | |||
| 1104 | (uint64_t)Xsyscall); | |||
| 1105 | wrmsr(MSR_CSTAR0xc0000083, (uint64_t)Xsyscall32); | |||
| 1106 | wrmsr(MSR_SFMASK0xc0000084, PSL_NT0x00004000|PSL_T0x00000100|PSL_I0x00000200|PSL_C0x00000001|PSL_D0x00000400|PSL_AC0x00040000); | |||
| 1107 | ||||
| 1108 | wrmsr(MSR_FSBASE0xc0000100, 0); | |||
| 1109 | wrmsr(MSR_GSBASE0xc0000101, (u_int64_t)ci); | |||
| 1110 | wrmsr(MSR_KERNELGSBASE0xc0000102, 0); | |||
| 1111 | ||||
| 1112 | family = ci->ci_family; | |||
| 1113 | if (strcmp(cpu_vendor, "GenuineIntel") == 0 && | |||
| 1114 | (family > 6 || (family == 6 && ci->ci_model >= 0xd)) && | |||
| 1115 | rdmsr_safe(MSR_MISC_ENABLE0x1a0, &msr) == 0 && | |||
| 1116 | (msr & MISC_ENABLE_FAST_STRINGS(1 << 0)) == 0) { | |||
| 1117 | msr |= MISC_ENABLE_FAST_STRINGS(1 << 0); | |||
| 1118 | wrmsr(MSR_MISC_ENABLE0x1a0, msr); | |||
| 1119 | DPRINTF("%s: enabled fast strings\n", ci->ci_dev->dv_xname); | |||
| 1120 | } | |||
| 1121 | ||||
| 1122 | patinit(ci); | |||
| 1123 | } | |||
| 1124 | ||||
| 1125 | void | |||
| 1126 | cpu_tsx_disable(struct cpu_info *ci) | |||
| 1127 | { | |||
| 1128 | uint64_t msr; | |||
| 1129 | uint32_t dummy, sefflags_edx; | |||
| 1130 | ||||
| 1131 | /* this runs before identifycpu() populates ci_feature_sefflags_edx */ | |||
| 1132 | if (cpuid_level < 0x07) | |||
| 1133 | return; | |||
| 1134 | CPUID_LEAF(0x7, 0, dummy, dummy, dummy, sefflags_edx)__asm volatile("cpuid" : "=a" (dummy), "=b" (dummy), "=c" (dummy ), "=d" (sefflags_edx) : "a" (0x7), "c" (0)); | |||
| 1135 | ||||
| 1136 | if (strcmp(cpu_vendor, "GenuineIntel") == 0 && | |||
| 1137 | (sefflags_edx & SEFF0EDX_ARCH_CAP0x20000000)) { | |||
| 1138 | msr = rdmsr(MSR_ARCH_CAPABILITIES0x10a); | |||
| 1139 | if (msr & ARCH_CAPABILITIES_TSX_CTRL(1 << 7)) { | |||
| 1140 | msr = rdmsr(MSR_TSX_CTRL0x122); | |||
| 1141 | msr |= TSX_CTRL_RTM_DISABLE(1ULL << 0) | TSX_CTRL_TSX_CPUID_CLEAR(1ULL << 1); | |||
| 1142 | wrmsr(MSR_TSX_CTRL0x122, msr); | |||
| 1143 | } | |||
| 1144 | } | |||
| 1145 | } | |||
| 1146 | ||||
| 1147 | void | |||
| 1148 | patinit(struct cpu_info *ci) | |||
| 1149 | { | |||
| 1150 | extern int pmap_pg_wc; | |||
| 1151 | u_int64_t reg; | |||
| 1152 | ||||
| 1153 | if ((cpu_feature & CPUID_PAT0x00010000) == 0) | |||
| 1154 | return; | |||
| 1155 | /* | |||
| 1156 | * Set up PAT bits. | |||
| 1157 | * The default pat table is the following: | |||
| 1158 | * WB, WT, UC-, UC, WB, WT, UC-, UC | |||
| 1159 | * We change it to: | |||
| 1160 | * WB, WC, UC-, UC, WB, WC, UC-, UC | |||
| 1161 | * i.e change the WT bit to be WC. | |||
| 1162 | */ | |||
| 1163 | reg = PATENTRY(0, PAT_WB)(0x6UL << ((0) * 8)) | PATENTRY(1, PAT_WC)(0x1UL << ((1) * 8)) | | |||
| 1164 | PATENTRY(2, PAT_UCMINUS)(0x7UL << ((2) * 8)) | PATENTRY(3, PAT_UC)(0x0UL << ((3) * 8)) | | |||
| 1165 | PATENTRY(4, PAT_WB)(0x6UL << ((4) * 8)) | PATENTRY(5, PAT_WC)(0x1UL << ((5) * 8)) | | |||
| 1166 | PATENTRY(6, PAT_UCMINUS)(0x7UL << ((6) * 8)) | PATENTRY(7, PAT_UC)(0x0UL << ((7) * 8)); | |||
| 1167 | ||||
| 1168 | wrmsr(MSR_CR_PAT0x277, reg); | |||
| 1169 | pmap_pg_wc = PG_WC(0x0000000000000008UL); | |||
| 1170 | } | |||
| 1171 | ||||
| 1172 | struct timeout rdrand_tmo; | |||
| 1173 | void rdrand(void *); | |||
| 1174 | ||||
| 1175 | void | |||
| 1176 | rdrand(void *v) | |||
| 1177 | { | |||
| 1178 | struct timeout *tmo = v; | |||
| 1179 | extern int has_rdrand, has_rdseed; | |||
| 1180 | union { | |||
| 1181 | uint64_t u64; | |||
| 1182 | uint32_t u32[2]; | |||
| 1183 | } r, t; | |||
| 1184 | uint64_t tsc; | |||
| 1185 | uint8_t valid = 0; | |||
| 1186 | ||||
| 1187 | tsc = rdtsc(); | |||
| 1188 | if (has_rdseed) | |||
| 1189 | __asm volatile( | |||
| 1190 | "rdseed %0\n\t" | |||
| 1191 | "setc %1\n" | |||
| 1192 | : "=r" (r.u64), "=qm" (valid) ); | |||
| 1193 | if (has_rdrand && (has_rdseed == 0 || valid == 0)) | |||
| 1194 | __asm volatile( | |||
| 1195 | "rdrand %0\n\t" | |||
| 1196 | "setc %1\n" | |||
| 1197 | : "=r" (r.u64), "=qm" (valid) ); | |||
| 1198 | ||||
| 1199 | t.u64 = tsc; | |||
| 1200 | t.u64 ^= r.u64; | |||
| ||||
| 1201 | t.u64 ^= valid; /* potential rdrand empty */ | |||
| 1202 | if (has_rdrand) | |||
| 1203 | t.u64 += rdtsc(); /* potential vmexit latency */ | |||
| 1204 | ||||
| 1205 | enqueue_randomness(t.u32[0]); | |||
| 1206 | enqueue_randomness(t.u32[1]); | |||
| 1207 | ||||
| 1208 | if (tmo) | |||
| 1209 | timeout_add_msec(tmo, 10); | |||
| 1210 | } | |||
| 1211 | ||||
| 1212 | int | |||
| 1213 | cpu_activate(struct device *self, int act) | |||
| 1214 | { | |||
| 1215 | struct cpu_softc *sc = (struct cpu_softc *)self; | |||
| 1216 | ||||
| 1217 | switch (act) { | |||
| ||||
| 1218 | case DVACT_RESUME4: | |||
| 1219 | if (sc->sc_info->ci_cpuid == 0) | |||
| 1220 | rdrand(NULL((void *)0)); | |||
| 1221 | #if NPCTR1 > 0 | |||
| 1222 | pctr_resume(sc->sc_info); | |||
| 1223 | #endif | |||
| 1224 | break; | |||
| 1225 | } | |||
| 1226 | ||||
| 1227 | return (0); | |||
| 1228 | } | |||
| 1229 | ||||
| 1230 | /* | |||
| 1231 | * cpu_enter_pages | |||
| 1232 | * | |||
| 1233 | * Requests mapping of various special pages required in the Intel Meltdown | |||
| 1234 | * case (to be entered into the U-K page table): | |||
| 1235 | * | |||
| 1236 | * 1 tss+gdt page for each CPU | |||
| 1237 | * 1 trampoline stack page for each CPU | |||
| 1238 | * | |||
| 1239 | * The cpu_info_full struct for each CPU straddles these pages. The offset into | |||
| 1240 | * 'cif' is calculated below, for each page. For more information, consult | |||
| 1241 | * the definition of struct cpu_info_full in cpu_full.h | |||
| 1242 | * | |||
| 1243 | * On CPUs unaffected by Meltdown, this function still configures 'cif' but | |||
| 1244 | * the calls to pmap_enter_special become no-ops. | |||
| 1245 | * | |||
| 1246 | * Parameters: | |||
| 1247 | * cif : the cpu_info_full structure describing a CPU whose pages are to be | |||
| 1248 | * entered into the special meltdown U-K page table. | |||
| 1249 | */ | |||
| 1250 | void | |||
| 1251 | cpu_enter_pages(struct cpu_info_full *cif) | |||
| 1252 | { | |||
| 1253 | vaddr_t va; | |||
| 1254 | paddr_t pa; | |||
| 1255 | ||||
| 1256 | /* The TSS+GDT need to be readable */ | |||
| 1257 | va = (vaddr_t)cif; | |||
| 1258 | pmap_extract(pmap_kernel()(&kernel_pmap_store), va, &pa); | |||
| 1259 | pmap_enter_special(va, pa, PROT_READ0x01); | |||
| 1260 | DPRINTF("%s: entered tss+gdt page at va 0x%llx pa 0x%llx\n", __func__, | |||
| 1261 | (uint64_t)va, (uint64_t)pa); | |||
| 1262 | ||||
| 1263 | /* The trampoline stack page needs to be read/write */ | |||
| 1264 | va = (vaddr_t)&cif->cif_tramp_stack; | |||
| 1265 | pmap_extract(pmap_kernel()(&kernel_pmap_store), va, &pa); | |||
| 1266 | pmap_enter_special(va, pa, PROT_READ0x01 | PROT_WRITE0x02); | |||
| 1267 | DPRINTF("%s: entered t.stack page at va 0x%llx pa 0x%llx\n", __func__, | |||
| 1268 | (uint64_t)va, (uint64_t)pa); | |||
| 1269 | ||||
| 1270 | cif->cif_tsscif_RO.u_tssgdt.uu_tss.tss_rsp0 = va + sizeof(cif->cif_tramp_stack) - 16; | |||
| 1271 | DPRINTF("%s: cif_tss.tss_rsp0 = 0x%llx\n" ,__func__, | |||
| 1272 | (uint64_t)cif->cif_tss.tss_rsp0); | |||
| 1273 | cif->cif_cpu.ci_intr_rsp = cif->cif_tsscif_RO.u_tssgdt.uu_tss.tss_rsp0 - | |||
| 1274 | sizeof(struct iretq_frame); | |||
| 1275 | ||||
| 1276 | #define SETUP_IST_SPECIAL_STACK(ist, cif, member)do { (cif)->cif_RO.u_tssgdt.uu_tss.tss_ist[(ist)] = (vaddr_t )&(cif)->member + sizeof((cif)->member) - 16; (cif) ->member[(sizeof(((cif)->member)) / sizeof(((cif)->member )[0])) - 2] = (int64_t)&(cif)->cif_cpu; } while (0) do { \ | |||
| 1277 | (cif)->cif_tsscif_RO.u_tssgdt.uu_tss.tss_ist[(ist)] = (vaddr_t)&(cif)->member + \ | |||
| 1278 | sizeof((cif)->member) - 16; \ | |||
| 1279 | (cif)->member[nitems((cif)->member)(sizeof(((cif)->member)) / sizeof(((cif)->member)[0])) - 2] = (int64_t)&(cif)->cif_cpu; \ | |||
| 1280 | } while (0) | |||
| 1281 | ||||
| 1282 | SETUP_IST_SPECIAL_STACK(0, cif, cif_dblflt_stack)do { (cif)->cif_RO.u_tssgdt.uu_tss.tss_ist[(0)] = (vaddr_t )&(cif)->cif_dblflt_stack + sizeof((cif)->cif_dblflt_stack ) - 16; (cif)->cif_dblflt_stack[(sizeof(((cif)->cif_dblflt_stack )) / sizeof(((cif)->cif_dblflt_stack)[0])) - 2] = (int64_t )&(cif)->cif_cpu; } while (0); | |||
| 1283 | SETUP_IST_SPECIAL_STACK(1, cif, cif_nmi_stack)do { (cif)->cif_RO.u_tssgdt.uu_tss.tss_ist[(1)] = (vaddr_t )&(cif)->cif_nmi_stack + sizeof((cif)->cif_nmi_stack ) - 16; (cif)->cif_nmi_stack[(sizeof(((cif)->cif_nmi_stack )) / sizeof(((cif)->cif_nmi_stack)[0])) - 2] = (int64_t)& (cif)->cif_cpu; } while (0); | |||
| 1284 | ||||
| 1285 | /* an empty iomap, by setting its offset to the TSS limit */ | |||
| 1286 | cif->cif_tsscif_RO.u_tssgdt.uu_tss.tss_iobase = sizeof(cif->cif_tsscif_RO.u_tssgdt.uu_tss); | |||
| 1287 | } | |||
| 1288 | ||||
| 1289 | #ifdef MULTIPROCESSOR1 | |||
| 1290 | int | |||
| 1291 | wbinvd_on_all_cpus(void) | |||
| 1292 | { | |||
| 1293 | x86_broadcast_ipi(X86_IPI_WBINVD0x00000400); | |||
| 1294 | wbinvd(); | |||
| 1295 | return 0; | |||
| 1296 | } | |||
| 1297 | #endif |