Bug Summary

File:arch/amd64/amd64/vmm_machdep.c
Warning:line 6581, column 8
Access to field 'spc_schedflags' results in a dereference of a null pointer (loaded from variable 'spc')

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.4 -analyze -disable-free -clear-ast-before-backend -disable-llvm-verifier -discard-value-names -main-file-name vmm_machdep.c -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -ffp-contract=on -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -target-feature +retpoline-external-thunk -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/llvm16/lib/clang/16 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/legacy-dpm -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu13 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/inc -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/inc/pmfw_if -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D SUSPEND -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fcf-protection=branch -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /home/ben/Projects/scan/2024-01-11-110808-61670-1 -x c /usr/src/sys/arch/amd64/amd64/vmm_machdep.c
1/* $OpenBSD: vmm_machdep.c,v 1.14 2024/01/10 04:13:59 dv Exp $ */
2/*
3 * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <sys/param.h>
19#include <sys/systm.h>
20#include <sys/signalvar.h>
21#include <sys/malloc.h>
22#include <sys/device.h>
23#include <sys/pool.h>
24#include <sys/proc.h>
25#include <sys/user.h>
26#include <sys/ioctl.h>
27#include <sys/queue.h>
28#include <sys/refcnt.h>
29#include <sys/rwlock.h>
30#include <sys/pledge.h>
31#include <sys/memrange.h>
32#include <sys/tracepoint.h>
33
34#include <uvm/uvm_extern.h>
35
36#include <machine/fpu.h>
37#include <machine/pmap.h>
38#include <machine/biosvar.h>
39#include <machine/segments.h>
40#include <machine/cpufunc.h>
41#include <machine/vmmvar.h>
42
43#include <dev/isa/isareg.h>
44#include <dev/pv/pvreg.h>
45
46#include <dev/vmm/vmm.h>
47
48#ifdef MP_LOCKDEBUG
49#include <ddb/db_output.h>
50extern int __mp_lock_spinout;
51#endif /* MP_LOCKDEBUG */
52
53void *l1tf_flush_region;
54
55#define DEVNAME(s)((s)->sc_dev.dv_xname) ((s)->sc_dev.dv_xname)
56
57#define CTRL_DUMP(x,y,z)printf(" %s: Can set:%s Can clear:%s\n", "z" , vcpu_vmx_check_cap
(x, IA32_VMX_y_CTLS, IA32_VMX_z, 1) ? "Yes" : "No", vcpu_vmx_check_cap
(x, IA32_VMX_y_CTLS, IA32_VMX_z, 0) ? "Yes" : "No");
printf(" %s: Can set:%s Can clear:%s\n", #z , \
58 vcpu_vmx_check_cap(x, IA32_VMX_##y ##_CTLS, \
59 IA32_VMX_##z, 1) ? "Yes" : "No", \
60 vcpu_vmx_check_cap(x, IA32_VMX_##y ##_CTLS, \
61 IA32_VMX_##z, 0) ? "Yes" : "No");
62
63#define VMX_EXIT_INFO_HAVE_RIP0x1 0x1
64#define VMX_EXIT_INFO_HAVE_REASON0x2 0x2
65#define VMX_EXIT_INFO_COMPLETE(0x1 | 0x2) \
66 (VMX_EXIT_INFO_HAVE_RIP0x1 | VMX_EXIT_INFO_HAVE_REASON0x2)
67
68void vmx_dump_vmcs_field(uint16_t, const char *);
69int vmm_enabled(void);
70void vmm_activate_machdep(struct device *, int);
71int vmmioctl_machdep(dev_t, u_long, caddr_t, int, struct proc *);
72int vmm_quiesce_vmx(void);
73int vm_run(struct vm_run_params *);
74int vm_intr_pending(struct vm_intr_params *);
75int vm_rwregs(struct vm_rwregs_params *, int);
76int vm_mprotect_ept(struct vm_mprotect_ept_params *);
77int vm_rwvmparams(struct vm_rwvmparams_params *, int);
78int vcpu_readregs_vmx(struct vcpu *, uint64_t, int, struct vcpu_reg_state *);
79int vcpu_readregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *);
80int vcpu_writeregs_vmx(struct vcpu *, uint64_t, int, struct vcpu_reg_state *);
81int vcpu_writeregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *);
82int vcpu_reset_regs(struct vcpu *, struct vcpu_reg_state *);
83int vcpu_reset_regs_vmx(struct vcpu *, struct vcpu_reg_state *);
84int vcpu_reset_regs_svm(struct vcpu *, struct vcpu_reg_state *);
85int vcpu_reload_vmcs_vmx(struct vcpu *);
86int vcpu_init(struct vcpu *);
87int vcpu_init_vmx(struct vcpu *);
88int vcpu_init_svm(struct vcpu *);
89int vcpu_run_vmx(struct vcpu *, struct vm_run_params *);
90int vcpu_run_svm(struct vcpu *, struct vm_run_params *);
91void vcpu_deinit(struct vcpu *);
92void vcpu_deinit_svm(struct vcpu *);
93void vcpu_deinit_vmx(struct vcpu *);
94int vm_impl_init(struct vm *, struct proc *);
95int vm_impl_init_vmx(struct vm *, struct proc *);
96int vm_impl_init_svm(struct vm *, struct proc *);
97void vm_impl_deinit(struct vm *);
98int vcpu_vmx_check_cap(struct vcpu *, uint32_t, uint32_t, int);
99int vcpu_vmx_compute_ctrl(uint64_t, uint16_t, uint32_t, uint32_t, uint32_t *);
100int vmx_get_exit_info(uint64_t *, uint64_t *);
101int vmx_load_pdptes(struct vcpu *);
102int vmx_handle_exit(struct vcpu *);
103int svm_handle_exit(struct vcpu *);
104int svm_handle_msr(struct vcpu *);
105int vmm_handle_xsetbv(struct vcpu *, uint64_t *);
106int vmx_handle_xsetbv(struct vcpu *);
107int svm_handle_xsetbv(struct vcpu *);
108int vmm_handle_cpuid(struct vcpu *);
109int vmx_handle_rdmsr(struct vcpu *);
110int vmx_handle_wrmsr(struct vcpu *);
111int vmx_handle_cr0_write(struct vcpu *, uint64_t);
112int vmx_handle_cr4_write(struct vcpu *, uint64_t);
113int vmx_handle_cr(struct vcpu *);
114int svm_handle_inout(struct vcpu *);
115int vmx_handle_inout(struct vcpu *);
116int svm_handle_hlt(struct vcpu *);
117int vmx_handle_hlt(struct vcpu *);
118int vmm_inject_ud(struct vcpu *);
119int vmm_inject_gp(struct vcpu *);
120int vmm_inject_db(struct vcpu *);
121void vmx_handle_intr(struct vcpu *);
122void vmx_handle_intwin(struct vcpu *);
123void vmx_handle_misc_enable_msr(struct vcpu *);
124int vmm_get_guest_memtype(struct vm *, paddr_t);
125int vmx_get_guest_faulttype(void);
126int svm_get_guest_faulttype(struct vmcb *);
127int vmx_get_exit_qualification(uint64_t *);
128int vmm_get_guest_cpu_cpl(struct vcpu *);
129int vmm_get_guest_cpu_mode(struct vcpu *);
130int svm_fault_page(struct vcpu *, paddr_t);
131int vmx_fault_page(struct vcpu *, paddr_t);
132int vmx_handle_np_fault(struct vcpu *);
133int svm_handle_np_fault(struct vcpu *);
134int vmx_mprotect_ept(vm_map_t, paddr_t, paddr_t, int);
135pt_entry_t *vmx_pmap_find_pte_ept(pmap_t, paddr_t);
136int vmm_alloc_vpid(uint16_t *);
137void vmm_free_vpid(uint16_t);
138const char *vcpu_state_decode(u_int);
139const char *vmx_exit_reason_decode(uint32_t);
140const char *svm_exit_reason_decode(uint32_t);
141const char *vmx_instruction_error_decode(uint32_t);
142void svm_setmsrbr(struct vcpu *, uint32_t);
143void svm_setmsrbw(struct vcpu *, uint32_t);
144void svm_setmsrbrw(struct vcpu *, uint32_t);
145void vmx_setmsrbr(struct vcpu *, uint32_t);
146void vmx_setmsrbw(struct vcpu *, uint32_t);
147void vmx_setmsrbrw(struct vcpu *, uint32_t);
148void svm_set_clean(struct vcpu *, uint32_t);
149void svm_set_dirty(struct vcpu *, uint32_t);
150
151int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size);
152void vmm_init_pvclock(struct vcpu *, paddr_t);
153int vmm_update_pvclock(struct vcpu *);
154int vmm_pat_is_valid(uint64_t);
155
156#ifdef MULTIPROCESSOR1
157static int vmx_remote_vmclear(struct cpu_info*, struct vcpu *);
158#endif
159
160#ifdef VMM_DEBUG
161void dump_vcpu(struct vcpu *);
162void vmx_vcpu_dump_regs(struct vcpu *);
163void vmx_dump_vmcs(struct vcpu *);
164const char *msr_name_decode(uint32_t);
165void vmm_segment_desc_decode(uint64_t);
166void vmm_decode_cr0(uint64_t);
167void vmm_decode_cr3(uint64_t);
168void vmm_decode_cr4(uint64_t);
169void vmm_decode_msr_value(uint64_t, uint64_t);
170void vmm_decode_apicbase_msr_value(uint64_t);
171void vmm_decode_ia32_fc_value(uint64_t);
172void vmm_decode_mtrrcap_value(uint64_t);
173void vmm_decode_perf_status_value(uint64_t);
174void vmm_decode_perf_ctl_value(uint64_t);
175void vmm_decode_mtrrdeftype_value(uint64_t);
176void vmm_decode_efer_value(uint64_t);
177void vmm_decode_rflags(uint64_t);
178void vmm_decode_misc_enable_value(uint64_t);
179const char *vmm_decode_cpu_mode(struct vcpu *);
180
181extern int mtrr2mrt(int);
182
183struct vmm_reg_debug_info {
184 uint64_t vrdi_bit;
185 const char *vrdi_present;
186 const char *vrdi_absent;
187};
188#endif /* VMM_DEBUG */
189
190extern uint64_t tsc_frequency;
191extern int tsc_is_invariant;
192
193const char *vmm_hv_signature = VMM_HV_SIGNATURE"OpenBSDVMM58";
194
195const struct kmem_pa_mode vmm_kp_contig = {
196 .kp_constraint = &no_constraint,
197 .kp_maxseg = 1,
198 .kp_align = 4096,
199 .kp_zero = 1,
200};
201
202extern struct cfdriver vmm_cd;
203extern const struct cfattach vmm_ca;
204
205/*
206 * Helper struct to easily get the VMCS field IDs needed in vmread/vmwrite
207 * to access the individual fields of the guest segment registers. This
208 * struct is indexed by VCPU_REGS_* id.
209 */
210const struct {
211 uint64_t selid;
212 uint64_t limitid;
213 uint64_t arid;
214 uint64_t baseid;
215} vmm_vmx_sreg_vmcs_fields[] = {
216 { VMCS_GUEST_IA32_ES_SEL0x0800, VMCS_GUEST_IA32_ES_LIMIT0x4800,
217 VMCS_GUEST_IA32_ES_AR0x4814, VMCS_GUEST_IA32_ES_BASE0x6806 },
218 { VMCS_GUEST_IA32_CS_SEL0x0802, VMCS_GUEST_IA32_CS_LIMIT0x4802,
219 VMCS_GUEST_IA32_CS_AR0x4816, VMCS_GUEST_IA32_CS_BASE0x6808 },
220 { VMCS_GUEST_IA32_SS_SEL0x0804, VMCS_GUEST_IA32_SS_LIMIT0x4804,
221 VMCS_GUEST_IA32_SS_AR0x4818, VMCS_GUEST_IA32_SS_BASE0x680A },
222 { VMCS_GUEST_IA32_DS_SEL0x0806, VMCS_GUEST_IA32_DS_LIMIT0x4806,
223 VMCS_GUEST_IA32_DS_AR0x481A, VMCS_GUEST_IA32_DS_BASE0x680C },
224 { VMCS_GUEST_IA32_FS_SEL0x0808, VMCS_GUEST_IA32_FS_LIMIT0x4808,
225 VMCS_GUEST_IA32_FS_AR0x481C, VMCS_GUEST_IA32_FS_BASE0x680E },
226 { VMCS_GUEST_IA32_GS_SEL0x080A, VMCS_GUEST_IA32_GS_LIMIT0x480A,
227 VMCS_GUEST_IA32_GS_AR0x481E, VMCS_GUEST_IA32_GS_BASE0x6810 },
228 { VMCS_GUEST_IA32_LDTR_SEL0x080C, VMCS_GUEST_IA32_LDTR_LIMIT0x480C,
229 VMCS_GUEST_IA32_LDTR_AR0x4820, VMCS_GUEST_IA32_LDTR_BASE0x6812 },
230 { VMCS_GUEST_IA32_TR_SEL0x080E, VMCS_GUEST_IA32_TR_LIMIT0x480E,
231 VMCS_GUEST_IA32_TR_AR0x4822, VMCS_GUEST_IA32_TR_BASE0x6814 }
232};
233
234/* Pools for VMs and VCPUs */
235extern struct pool vm_pool;
236extern struct pool vcpu_pool;
237
238extern struct vmm_softc *vmm_softc;
239
240/* IDT information used when populating host state area */
241extern vaddr_t idt_vaddr;
242extern struct gate_descriptor *idt;
243
244/* Constants used in "CR access exit" */
245#define CR_WRITE0 0
246#define CR_READ1 1
247#define CR_CLTS2 2
248#define CR_LMSW3 3
249
250/*
251 * vmm_enabled
252 *
253 * Checks if we have at least one CPU with either VMX or SVM.
254 * Returns 1 if we have at least one of either type, but not both, 0 otherwise.
255 */
256int
257vmm_enabled(void)
258{
259 struct cpu_info *ci;
260 CPU_INFO_ITERATORint cii;
261 int found_vmx = 0, found_svm = 0;
262
263 /* Check if we have at least one CPU with either VMX or SVM */
264 CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci->
ci_next)
{
265 if (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0))
266 found_vmx = 1;
267 if (ci->ci_vmm_flags & CI_VMM_SVM(1 << 1))
268 found_svm = 1;
269 }
270
271 /* Don't support both SVM and VMX at the same time */
272 if (found_vmx && found_svm)
273 return (0);
274
275 if (found_vmx || found_svm)
276 return 1;
277
278 return 0;
279}
280
281void
282vmm_attach_machdep(struct device *parent, struct device *self, void *aux)
283{
284 struct vmm_softc *sc = (struct vmm_softc *)self;
285 struct cpu_info *ci;
286 CPU_INFO_ITERATORint cii;
287
288 sc->sc_md.nr_rvi_cpus = 0;
289 sc->sc_md.nr_ept_cpus = 0;
290
291 /* Calculate CPU features */
292 CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci->
ci_next)
{
293 if (ci->ci_vmm_flags & CI_VMM_RVI(1 << 2))
294 sc->sc_md.nr_rvi_cpus++;
295 if (ci->ci_vmm_flags & CI_VMM_EPT(1 << 3))
296 sc->sc_md.nr_ept_cpus++;
297 }
298
299 sc->sc_md.pkru_enabled = 0;
300 if (rcr4() & CR4_PKE0x00400000)
301 sc->sc_md.pkru_enabled = 1;
302
303 if (sc->sc_md.nr_ept_cpus) {
304 printf(": VMX/EPT");
305 sc->mode = VMM_MODE_EPT;
306 } else if (sc->sc_md.nr_rvi_cpus) {
307 printf(": SVM/RVI");
308 sc->mode = VMM_MODE_RVI;
309 } else {
310 printf(": unknown");
311 sc->mode = VMM_MODE_UNKNOWN;
312 }
313
314 if (sc->mode == VMM_MODE_EPT) {
315 if (!(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr)) {
316 l1tf_flush_region = km_alloc(VMX_L1D_FLUSH_SIZE(64 * 1024),
317 &kv_any, &vmm_kp_contig, &kd_waitok);
318 if (!l1tf_flush_region) {
319 printf(" (failing, no memory)");
320 sc->mode = VMM_MODE_UNKNOWN;
321 } else {
322 printf(" (using slow L1TF mitigation)");
323 memset(l1tf_flush_region, 0xcc,__builtin_memset((l1tf_flush_region), (0xcc), ((64 * 1024)))
324 VMX_L1D_FLUSH_SIZE)__builtin_memset((l1tf_flush_region), (0xcc), ((64 * 1024)));
325 }
326 }
327 }
328
329 if (sc->mode == VMM_MODE_RVI) {
330 sc->max_vpid = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_svm.svm_max_asid;
331 } else {
332 sc->max_vpid = 0xFFF;
333 }
334
335 bzero(&sc->vpids, sizeof(sc->vpids))__builtin_bzero((&sc->vpids), (sizeof(sc->vpids)));
336 rw_init(&sc->vpid_lock, "vpid")_rw_init_flags(&sc->vpid_lock, "vpid", 0, ((void *)0));
337}
338
339/*
340 * vmm_quiesce_vmx
341 *
342 * Prepare the host for suspend by flushing all VMCS states.
343 */
344int
345vmm_quiesce_vmx(void)
346{
347 struct vm *vm;
348 struct vcpu *vcpu;
349 int err;
350
351 /*
352 * We should be only called from a quiescing device state so we
353 * don't expect to sleep here. If we can't get all our locks,
354 * something is wrong.
355 */
356 if ((err = rw_enter(&vmm_softc->vm_lock, RW_WRITE0x0001UL | RW_NOSLEEP0x0040UL)))
357 return (err);
358
359 /* Iterate over each vm... */
360 SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link)for((vm) = ((&vmm_softc->vm_list)->slh_first); (vm)
!= ((void *)0); (vm) = ((vm)->vm_link.sle_next))
{
361 /* Iterate over each vcpu... */
362 SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link)for((vcpu) = ((&vm->vm_vcpu_list)->slh_first); (vcpu
) != ((void *)0); (vcpu) = ((vcpu)->vc_vcpu_link.sle_next)
)
{
363 err = rw_enter(&vcpu->vc_lock, RW_WRITE0x0001UL | RW_NOSLEEP0x0040UL);
364 if (err)
365 break;
366
367 /* We can skip unlaunched VMCS. Nothing to flush. */
368 if (atomic_load_int(&vcpu->vc_vmx_vmcs_state)
369 != VMCS_LAUNCHED1) {
370 DPRINTF("%s: skipping vcpu %d for vm %d\n",
371 __func__, vcpu->vc_id, vm->vm_id);
372 rw_exit_write(&vcpu->vc_lock);
373 continue;
374 }
375
376#ifdef MULTIPROCESSOR1
377 if (vcpu->vc_last_pcpu != curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
) {
378 /* Remote cpu vmclear via ipi. */
379 err = vmx_remote_vmclear(vcpu->vc_last_pcpu,
380 vcpu);
381 if (err)
382 printf("%s: failed to remote vmclear "
383 "vcpu %d of vm %d\n", __func__,
384 vcpu->vc_id, vm->vm_id);
385 } else
386#endif
387 {
388 /* Local cpu vmclear instruction. */
389 if ((err = vmclear(&vcpu->vc_control_pa)))
390 printf("%s: failed to locally vmclear "
391 "vcpu %d of vm %d\n", __func__,
392 vcpu->vc_id, vm->vm_id);
393 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state,_atomic_swap_uint((&vcpu->vc_vmx_vmcs_state), (0))
394 VMCS_CLEARED)_atomic_swap_uint((&vcpu->vc_vmx_vmcs_state), (0));
395 }
396
397 rw_exit_write(&vcpu->vc_lock);
398 if (err)
399 break;
400 DPRINTF("%s: cleared vcpu %d for vm %d\n", __func__,
401 vcpu->vc_id, vm->vm_id);
402 }
403 if (err)
404 break;
405 }
406 rw_exit_write(&vmm_softc->vm_lock);
407
408 if (err)
409 return (err);
410 return (0);
411}
412
413void
414vmm_activate_machdep(struct device *self, int act)
415{
416 struct cpu_info *ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
417
418 switch (act) {
419 case DVACT_QUIESCE2:
420 /* If we're not in vmm mode, nothing to do. */
421 if ((ci->ci_flags & CPUF_VMM0x20000) == 0)
422 break;
423
424 /* Intel systems need extra steps to sync vcpu state. */
425 if (vmm_softc->mode == VMM_MODE_EPT)
426 if (vmm_quiesce_vmx())
427 DPRINTF("%s: vmx quiesce failed\n", __func__);
428
429 /* Stop virtualization mode on all cpus. */
430 vmm_stop();
431 break;
432
433 case DVACT_WAKEUP5:
434 /* Restart virtualization mode on all cpu's. */
435 if (vmm_softc->vm_ct > 0)
436 vmm_start();
437 break;
438 }
439}
440
441int
442vmmioctl_machdep(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
443{
444 int ret;
445
446 switch (cmd) {
447 case VMM_IOC_INTR((unsigned long)0x80000000 | ((sizeof(struct vm_intr_params) &
0x1fff) << 16) | ((('V')) << 8) | ((6)))
:
448 ret = vm_intr_pending((struct vm_intr_params *)data);
449 break;
450 case VMM_IOC_MPROTECT_EPT((unsigned long)0x80000000 | ((sizeof(struct vm_mprotect_ept_params
) & 0x1fff) << 16) | ((('V')) << 8) | ((11)))
:
451 ret = vm_mprotect_ept((struct vm_mprotect_ept_params *)data);
452 break;
453 default:
454 DPRINTF("%s: unknown ioctl code 0x%lx\n", __func__, cmd);
455 ret = ENOTTY25;
456 }
457
458 return (ret);
459}
460
461int
462pledge_ioctl_vmm_machdep(struct proc *p, long com)
463{
464 switch (com) {
465 case VMM_IOC_INTR((unsigned long)0x80000000 | ((sizeof(struct vm_intr_params) &
0x1fff) << 16) | ((('V')) << 8) | ((6)))
:
466 case VMM_IOC_MPROTECT_EPT((unsigned long)0x80000000 | ((sizeof(struct vm_mprotect_ept_params
) & 0x1fff) << 16) | ((('V')) << 8) | ((11)))
:
467 return (0);
468 }
469
470 return (EPERM1);
471}
472
473/*
474 * vm_intr_pending
475 *
476 * IOCTL handler routine for VMM_IOC_INTR messages, sent from vmd when an
477 * interrupt is pending and needs acknowledgment
478 *
479 * Parameters:
480 * vip: Describes the vm/vcpu for which the interrupt is pending
481 *
482 * Return values:
483 * 0: if successful
484 * ENOENT: if the VM/VCPU defined by 'vip' cannot be found
485 */
486int
487vm_intr_pending(struct vm_intr_params *vip)
488{
489 struct vm *vm;
490 struct vcpu *vcpu;
491#ifdef MULTIPROCESSOR1
492 struct cpu_info *ci;
493#endif
494 int error, ret = 0;
495
496 /* Find the desired VM */
497 error = vm_find(vip->vip_vm_id, &vm);
498
499 /* Not found? exit. */
500 if (error != 0)
501 return (error);
502
503 vcpu = vm_find_vcpu(vm, vip->vip_vcpu_id);
504
505 if (vcpu == NULL((void *)0)) {
506 ret = ENOENT2;
507 goto out;
508 }
509
510 vcpu->vc_intr = vip->vip_intr;
511#ifdef MULTIPROCESSOR1
512 ci = READ_ONCE(vcpu->vc_curcpu)({ typeof(vcpu->vc_curcpu) __tmp = *(volatile typeof(vcpu->
vc_curcpu) *)&(vcpu->vc_curcpu); membar_datadep_consumer
(); __tmp; })
;
513 if (ci != NULL((void *)0))
514 x86_send_ipi(ci, X86_IPI_NOP0x00000002);
515#endif
516
517out:
518 refcnt_rele_wake(&vm->vm_refcnt);
519 return (ret);
520}
521
522/*
523 * vm_rwvmparams
524 *
525 * IOCTL handler to read/write the current vmm params like pvclock gpa, pvclock
526 * version, etc.
527 *
528 * Parameters:
529 * vrwp: Describes the VM and VCPU to get/set the params from
530 * dir: 0 for reading, 1 for writing
531 *
532 * Return values:
533 * 0: if successful
534 * ENOENT: if the VM/VCPU defined by 'vpp' cannot be found
535 * EINVAL: if an error occurred reading the registers of the guest
536 */
537int
538vm_rwvmparams(struct vm_rwvmparams_params *vpp, int dir)
539{
540 struct vm *vm;
541 struct vcpu *vcpu;
542 int error, ret = 0;
543
544 /* Find the desired VM */
545 error = vm_find(vpp->vpp_vm_id, &vm);
546
547 /* Not found? exit. */
548 if (error != 0)
549 return (error);
550
551 vcpu = vm_find_vcpu(vm, vpp->vpp_vcpu_id);
552
553 if (vcpu == NULL((void *)0)) {
554 ret = ENOENT2;
555 goto out;
556 }
557
558 if (dir == 0) {
559 if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_VERSION0x2)
560 vpp->vpp_pvclock_version = vcpu->vc_pvclock_version;
561 if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_SYSTEM_GPA0x1)
562 vpp->vpp_pvclock_system_gpa = \
563 vcpu->vc_pvclock_system_gpa;
564 } else {
565 if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_VERSION0x2)
566 vcpu->vc_pvclock_version = vpp->vpp_pvclock_version;
567 if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_SYSTEM_GPA0x1) {
568 vmm_init_pvclock(vcpu, vpp->vpp_pvclock_system_gpa);
569 }
570 }
571out:
572 refcnt_rele_wake(&vm->vm_refcnt);
573 return (ret);
574}
575
576/*
577 * vm_readregs
578 *
579 * IOCTL handler to read/write the current register values of a guest VCPU.
580 * The VCPU must not be running.
581 *
582 * Parameters:
583 * vrwp: Describes the VM and VCPU to get/set the registers from. The
584 * register values are returned here as well.
585 * dir: 0 for reading, 1 for writing
586 *
587 * Return values:
588 * 0: if successful
589 * ENOENT: if the VM/VCPU defined by 'vrwp' cannot be found
590 * EINVAL: if an error occurred accessing the registers of the guest
591 * EPERM: if the vm cannot be accessed from the calling process
592 */
593int
594vm_rwregs(struct vm_rwregs_params *vrwp, int dir)
595{
596 struct vm *vm;
597 struct vcpu *vcpu;
598 struct vcpu_reg_state *vrs = &vrwp->vrwp_regs;
599 int error, ret = 0;
600
601 /* Find the desired VM */
602 error = vm_find(vrwp->vrwp_vm_id, &vm);
603
604 /* Not found? exit. */
605 if (error != 0)
606 return (error);
607
608 vcpu = vm_find_vcpu(vm, vrwp->vrwp_vcpu_id);
609
610 if (vcpu == NULL((void *)0)) {
611 ret = ENOENT2;
612 goto out;
613 }
614
615 rw_enter_write(&vcpu->vc_lock);
616 if (vmm_softc->mode == VMM_MODE_EPT)
617 ret = (dir == 0) ?
618 vcpu_readregs_vmx(vcpu, vrwp->vrwp_mask, 1, vrs) :
619 vcpu_writeregs_vmx(vcpu, vrwp->vrwp_mask, 1, vrs);
620 else if (vmm_softc->mode == VMM_MODE_RVI)
621 ret = (dir == 0) ?
622 vcpu_readregs_svm(vcpu, vrwp->vrwp_mask, vrs) :
623 vcpu_writeregs_svm(vcpu, vrwp->vrwp_mask, vrs);
624 else {
625 DPRINTF("%s: unknown vmm mode", __func__);
626 ret = EINVAL22;
627 }
628 rw_exit_write(&vcpu->vc_lock);
629out:
630 refcnt_rele_wake(&vm->vm_refcnt);
631 return (ret);
632}
633
634/*
635 * vm_mprotect_ept
636 *
637 * IOCTL handler to sets the access protections of the ept
638 *
639 * Parameters:
640 * vmep: describes the memory for which the protect will be applied..
641 *
642 * Return values:
643 * 0: if successful
644 * ENOENT: if the VM defined by 'vmep' cannot be found
645 * EINVAL: if the sgpa or size is not page aligned, the prot is invalid,
646 * size is too large (512GB), there is wraparound
647 * (like start = 512GB-1 and end = 512GB-2),
648 * the address specified is not within the vm's mem range
649 * or the address lies inside reserved (MMIO) memory
650 */
651int
652vm_mprotect_ept(struct vm_mprotect_ept_params *vmep)
653{
654 struct vm *vm;
655 struct vcpu *vcpu;
656 vaddr_t sgpa;
657 size_t size;
658 vm_prot_t prot;
659 uint64_t msr;
660 int ret = 0, memtype;
661
662 /* If not EPT or RVI, nothing to do here */
663 if (!(vmm_softc->mode == VMM_MODE_EPT
664 || vmm_softc->mode == VMM_MODE_RVI))
665 return (0);
666
667 /* Find the desired VM */
668 ret = vm_find(vmep->vmep_vm_id, &vm);
669
670 /* Not found? exit. */
671 if (ret != 0) {
672 DPRINTF("%s: vm id %u not found\n", __func__,
673 vmep->vmep_vm_id);
674 return (ret);
675 }
676
677 vcpu = vm_find_vcpu(vm, vmep->vmep_vcpu_id);
678
679 if (vcpu == NULL((void *)0)) {
680 DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__,
681 vmep->vmep_vcpu_id, vmep->vmep_vm_id);
682 ret = ENOENT2;
683 goto out_nolock;
684 }
685
686 rw_enter_write(&vcpu->vc_lock);
687
688 if (vcpu->vc_state != VCPU_STATE_STOPPED) {
689 DPRINTF("%s: mprotect_ept %u on vm %u attempted "
690 "while vcpu was in state %u (%s)\n", __func__,
691 vmep->vmep_vcpu_id, vmep->vmep_vm_id, vcpu->vc_state,
692 vcpu_state_decode(vcpu->vc_state));
693 ret = EBUSY16;
694 goto out;
695 }
696
697 /* Only proceed if the pmap is in the correct mode */
698 KASSERT((vmm_softc->mode == VMM_MODE_EPT &&(((vmm_softc->mode == VMM_MODE_EPT && vm->vm_map
->pmap->pm_type == 2) || (vmm_softc->mode == VMM_MODE_RVI
&& vm->vm_map->pmap->pm_type == 3)) ? (void
)0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm_machdep.c"
, 701, "(vmm_softc->mode == VMM_MODE_EPT && vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI)"
))
699 vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) ||(((vmm_softc->mode == VMM_MODE_EPT && vm->vm_map
->pmap->pm_type == 2) || (vmm_softc->mode == VMM_MODE_RVI
&& vm->vm_map->pmap->pm_type == 3)) ? (void
)0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm_machdep.c"
, 701, "(vmm_softc->mode == VMM_MODE_EPT && vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI)"
))
700 (vmm_softc->mode == VMM_MODE_RVI &&(((vmm_softc->mode == VMM_MODE_EPT && vm->vm_map
->pmap->pm_type == 2) || (vmm_softc->mode == VMM_MODE_RVI
&& vm->vm_map->pmap->pm_type == 3)) ? (void
)0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm_machdep.c"
, 701, "(vmm_softc->mode == VMM_MODE_EPT && vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI)"
))
701 vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI))(((vmm_softc->mode == VMM_MODE_EPT && vm->vm_map
->pmap->pm_type == 2) || (vmm_softc->mode == VMM_MODE_RVI
&& vm->vm_map->pmap->pm_type == 3)) ? (void
)0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm_machdep.c"
, 701, "(vmm_softc->mode == VMM_MODE_EPT && vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI)"
))
;
702
703 sgpa = vmep->vmep_sgpa;
704 size = vmep->vmep_size;
705 prot = vmep->vmep_prot;
706
707 /* No W^X permissions */
708 if ((prot & PROT_MASK(0x01 | 0x02 | 0x04)) != prot &&
709 (prot & (PROT_WRITE0x02 | PROT_EXEC0x04)) == (PROT_WRITE0x02 | PROT_EXEC0x04)) {
710 DPRINTF("%s: W+X permission requested\n", __func__);
711 ret = EINVAL22;
712 goto out;
713 }
714
715 /* No Write only permissions */
716 if ((prot & (PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04)) == PROT_WRITE0x02) {
717 DPRINTF("%s: No Write only permissions\n", __func__);
718 ret = EINVAL22;
719 goto out;
720 }
721
722 /* No empty permissions */
723 if (prot == 0) {
724 DPRINTF("%s: No empty permissions\n", __func__);
725 ret = EINVAL22;
726 goto out;
727 }
728
729 /* No execute only on EPT CPUs that don't have that capability */
730 if (vmm_softc->mode == VMM_MODE_EPT) {
731 msr = rdmsr(IA32_VMX_EPT_VPID_CAP0x48C);
732 if (prot == PROT_EXEC0x04 &&
733 (msr & IA32_EPT_VPID_CAP_XO_TRANSLATIONS(1ULL << 0)) == 0) {
734 DPRINTF("%s: Execute only permissions unsupported,"
735 " adding read permission\n", __func__);
736
737 prot |= PROT_READ0x01;
738 }
739 }
740
741 /* Must be page aligned */
742 if ((sgpa & PAGE_MASK((1 << 12) - 1)) || (size & PAGE_MASK((1 << 12) - 1)) || size == 0) {
743 ret = EINVAL22;
744 goto out;
745 }
746
747 /* size must be less then 512GB */
748 if (size >= NBPD_L4(1ULL << 39)) {
749 ret = EINVAL22;
750 goto out;
751 }
752
753 /* no wraparound */
754 if (sgpa + size < sgpa) {
755 ret = EINVAL22;
756 goto out;
757 }
758
759 /*
760 * Specifying addresses within the PCI MMIO space is forbidden.
761 * Disallow addresses that start inside the MMIO space:
762 * [VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
763 */
764 if (sgpa >= VMM_PCI_MMIO_BAR_BASE0xF0000000ULL && sgpa <= VMM_PCI_MMIO_BAR_END0xFFDFFFFFULL) {
765 ret = EINVAL22;
766 goto out;
767 }
768
769 /*
770 * ... and disallow addresses that end inside the MMIO space:
771 * (VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
772 */
773 if (sgpa + size > VMM_PCI_MMIO_BAR_BASE0xF0000000ULL &&
774 sgpa + size <= VMM_PCI_MMIO_BAR_END0xFFDFFFFFULL) {
775 ret = EINVAL22;
776 goto out;
777 }
778
779 memtype = vmm_get_guest_memtype(vm, sgpa);
780 if (memtype == VMM_MEM_TYPE_UNKNOWN) {
781 ret = EINVAL22;
782 goto out;
783 }
784
785 if (vmm_softc->mode == VMM_MODE_EPT)
786 ret = vmx_mprotect_ept(vm->vm_map, sgpa, sgpa + size, prot);
787 else if (vmm_softc->mode == VMM_MODE_RVI) {
788 pmap_write_protect(vm->vm_map->pmap, sgpa, sgpa + size, prot);
789 /* XXX requires a invlpga */
790 ret = 0;
791 } else
792 ret = EINVAL22;
793out:
794 if (vcpu != NULL((void *)0))
795 rw_exit_write(&vcpu->vc_lock);
796out_nolock:
797 refcnt_rele_wake(&vm->vm_refcnt);
798 return (ret);
799}
800
801/*
802 * vmx_mprotect_ept
803 *
804 * apply the ept protections to the requested pages, faulting in the page if
805 * required.
806 */
807int
808vmx_mprotect_ept(vm_map_t vm_map, paddr_t sgpa, paddr_t egpa, int prot)
809{
810 struct vmx_invept_descriptor vid;
811 pmap_t pmap;
812 pt_entry_t *pte;
813 paddr_t addr;
814 int ret = 0;
815
816 pmap = vm_map->pmap;
817
818 KERNEL_LOCK()_kernel_lock();
819
820 for (addr = sgpa; addr < egpa; addr += PAGE_SIZE(1 << 12)) {
821 pte = vmx_pmap_find_pte_ept(pmap, addr);
822 if (pte == NULL((void *)0)) {
823 ret = uvm_fault(vm_map, addr, VM_FAULT_WIRE((vm_fault_t) 0x2),
824 PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04);
825 if (ret)
826 printf("%s: uvm_fault returns %d, GPA=0x%llx\n",
827 __func__, ret, (uint64_t)addr);
828
829 pte = vmx_pmap_find_pte_ept(pmap, addr);
830 if (pte == NULL((void *)0)) {
831 KERNEL_UNLOCK()_kernel_unlock();
832 return EFAULT14;
833 }
834 }
835
836 if (prot & PROT_READ0x01)
837 *pte |= EPT_R(1ULL << 0);
838 else
839 *pte &= ~EPT_R(1ULL << 0);
840
841 if (prot & PROT_WRITE0x02)
842 *pte |= EPT_W(1ULL << 1);
843 else
844 *pte &= ~EPT_W(1ULL << 1);
845
846 if (prot & PROT_EXEC0x04)
847 *pte |= EPT_X(1ULL << 2);
848 else
849 *pte &= ~EPT_X(1ULL << 2);
850 }
851
852 /*
853 * SDM 3C: 28.3.3.4 Guidelines for Use of the INVEPT Instruction
854 * the first bullet point seems to say we should call invept.
855 *
856 * Software should use the INVEPT instruction with the “single-context”
857 * INVEPT type after making any of the following changes to an EPT
858 * paging-structure entry (the INVEPT descriptor should contain an
859 * EPTP value that references — directly or indirectly
860 * — the modified EPT paging structure):
861 * — Changing any of the privilege bits 2:0 from 1 to 0.
862 * */
863 if (pmap->eptp != 0) {
864 memset(&vid, 0, sizeof(vid))__builtin_memset((&vid), (0), (sizeof(vid)));
865 vid.vid_eptp = pmap->eptp;
866 DPRINTF("%s: flushing EPT TLB for EPTP 0x%llx\n", __func__,
867 vid.vid_eptp);
868 invept(IA32_VMX_INVEPT_SINGLE_CTX0x1, &vid);
869 }
870
871 KERNEL_UNLOCK()_kernel_unlock();
872
873 return ret;
874}
875
876/*
877 * vmx_pmap_find_pte_ept
878 *
879 * find the page table entry specified by addr in the pmap supplied.
880 */
881pt_entry_t *
882vmx_pmap_find_pte_ept(pmap_t pmap, paddr_t addr)
883{
884 int l4idx, l3idx, l2idx, l1idx;
885 pd_entry_t *pd;
886 paddr_t pdppa;
887 pt_entry_t *ptes, *pte;
888
889 l4idx = (addr & L4_MASK0x0000ff8000000000UL) >> L4_SHIFT39; /* PML4E idx */
890 l3idx = (addr & L3_MASK0x0000007fc0000000UL) >> L3_SHIFT30; /* PDPTE idx */
891 l2idx = (addr & L2_MASK0x000000003fe00000UL) >> L2_SHIFT21; /* PDE idx */
892 l1idx = (addr & L1_MASK0x00000000001ff000UL) >> L1_SHIFT12; /* PTE idx */
893
894 pd = (pd_entry_t *)pmap->pm_pdir;
895 if (pd == NULL((void *)0))
896 return NULL((void *)0);
897
898 /*
899 * l4idx should always be 0 since we don't support more than 512GB
900 * guest physical memory.
901 */
902 if (l4idx > 0)
903 return NULL((void *)0);
904
905 /*
906 * l3idx should always be < MAXDSIZ/1GB because we don't support more
907 * than MAXDSIZ guest phys mem.
908 */
909 if (l3idx >= MAXDSIZ((paddr_t)128*1024*1024*1024) / ((paddr_t)1024 * 1024 * 1024))
910 return NULL((void *)0);
911
912 pdppa = pd[l4idx] & PG_FRAME0x000ffffffffff000UL;
913 if (pdppa == 0)
914 return NULL((void *)0);
915
916 ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (pdppa))
;
917
918 pdppa = ptes[l3idx] & PG_FRAME0x000ffffffffff000UL;
919 if (pdppa == 0)
920 return NULL((void *)0);
921
922 ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (pdppa))
;
923
924 pdppa = ptes[l2idx] & PG_FRAME0x000ffffffffff000UL;
925 if (pdppa == 0)
926 return NULL((void *)0);
927
928 ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (pdppa))
;
929
930 pte = &ptes[l1idx];
931 if (*pte == 0)
932 return NULL((void *)0);
933
934 return pte;
935}
936
937/*
938 * vmm_start
939 *
940 * Starts VMM mode on the system
941 */
942int
943vmm_start(void)
944{
945 struct cpu_info *self = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
946#ifdef MULTIPROCESSOR1
947 struct cpu_info *ci;
948 CPU_INFO_ITERATORint cii;
949#ifdef MP_LOCKDEBUG
950 int nticks;
951#endif /* MP_LOCKDEBUG */
952#endif /* MULTIPROCESSOR */
953
954 /* VMM is already running */
955 if (self->ci_flags & CPUF_VMM0x20000)
956 return (0);
957
958 /* Start VMM on this CPU */
959 start_vmm_on_cpu(self);
960 if (!(self->ci_flags & CPUF_VMM0x20000)) {
961 printf("%s: failed to enter VMM mode\n",
962 self->ci_dev->dv_xname);
963 return (EIO5);
964 }
965
966#ifdef MULTIPROCESSOR1
967 /* Broadcast start VMM IPI */
968 x86_broadcast_ipi(X86_IPI_START_VMM0x00000100);
969
970 CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci->
ci_next)
{
971 if (ci == self)
972 continue;
973#ifdef MP_LOCKDEBUG
974 nticks = __mp_lock_spinout;
975#endif /* MP_LOCKDEBUG */
976 while (!(ci->ci_flags & CPUF_VMM0x20000)) {
977 CPU_BUSY_CYCLE()__asm volatile("pause": : : "memory");
978#ifdef MP_LOCKDEBUG
979 if (--nticks <= 0) {
980 db_printf("%s: spun out", __func__);
981 db_enter();
982 nticks = __mp_lock_spinout;
983 }
984#endif /* MP_LOCKDEBUG */
985 }
986 }
987#endif /* MULTIPROCESSOR */
988
989 return (0);
990}
991
992/*
993 * vmm_stop
994 *
995 * Stops VMM mode on the system
996 */
997int
998vmm_stop(void)
999{
1000 struct cpu_info *self = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
1001#ifdef MULTIPROCESSOR1
1002 struct cpu_info *ci;
1003 CPU_INFO_ITERATORint cii;
1004#ifdef MP_LOCKDEBUG
1005 int nticks;
1006#endif /* MP_LOCKDEBUG */
1007#endif /* MULTIPROCESSOR */
1008
1009 /* VMM is not running */
1010 if (!(self->ci_flags & CPUF_VMM0x20000))
1011 return (0);
1012
1013 /* Stop VMM on this CPU */
1014 stop_vmm_on_cpu(self);
1015 if (self->ci_flags & CPUF_VMM0x20000) {
1016 printf("%s: failed to exit VMM mode\n",
1017 self->ci_dev->dv_xname);
1018 return (EIO5);
1019 }
1020
1021#ifdef MULTIPROCESSOR1
1022 /* Stop VMM on other CPUs */
1023 x86_broadcast_ipi(X86_IPI_STOP_VMM0x00000200);
1024
1025 CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci->
ci_next)
{
1026 if (ci == self)
1027 continue;
1028#ifdef MP_LOCKDEBUG
1029 nticks = __mp_lock_spinout;
1030#endif /* MP_LOCKDEBUG */
1031 while ((ci->ci_flags & CPUF_VMM0x20000)) {
1032 CPU_BUSY_CYCLE()__asm volatile("pause": : : "memory");
1033#ifdef MP_LOCKDEBUG
1034 if (--nticks <= 0) {
1035 db_printf("%s: spunout", __func__);
1036 db_enter();
1037 nticks = __mp_lock_spinout;
1038 }
1039#endif /* MP_LOCKDEBUG */
1040 }
1041 }
1042#endif /* MULTIPROCESSOR */
1043
1044 return (0);
1045}
1046
1047/*
1048 * start_vmm_on_cpu
1049 *
1050 * Starts VMM mode on 'ci' by executing the appropriate CPU-specific insn
1051 * sequence to enter VMM mode (eg, VMXON)
1052 */
1053void
1054start_vmm_on_cpu(struct cpu_info *ci)
1055{
1056 uint64_t msr;
1057 uint32_t cr4;
1058 struct vmx_invept_descriptor vid;
1059
1060 /* No VMM mode? exit. */
1061 if ((ci->ci_vmm_flags & CI_VMM_VMX(1 << 0)) == 0 &&
1062 (ci->ci_vmm_flags & CI_VMM_SVM(1 << 1)) == 0)
1063 return;
1064
1065 /*
1066 * AMD SVM
1067 */
1068 if (ci->ci_vmm_flags & CI_VMM_SVM(1 << 1)) {
1069 msr = rdmsr(MSR_EFER0xc0000080);
1070 msr |= EFER_SVME0x00001000;
1071 wrmsr(MSR_EFER0xc0000080, msr);
1072 }
1073
1074 /*
1075 * Intel VMX
1076 */
1077 if (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0)) {
1078 if (ci->ci_vmxon_region == 0)
1079 return;
1080 else {
1081 bzero(ci->ci_vmxon_region, PAGE_SIZE)__builtin_bzero((ci->ci_vmxon_region), ((1 << 12)));
1082 ci->ci_vmxon_region->vr_revision =
1083 ci->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision;
1084
1085 /* Enable VMX */
1086 msr = rdmsr(MSR_IA32_FEATURE_CONTROL0x03a);
1087 if (msr & IA32_FEATURE_CONTROL_LOCK0x01) {
1088 if (!(msr & IA32_FEATURE_CONTROL_VMX_EN0x04))
1089 return;
1090 } else {
1091 msr |= IA32_FEATURE_CONTROL_VMX_EN0x04 |
1092 IA32_FEATURE_CONTROL_LOCK0x01;
1093 wrmsr(MSR_IA32_FEATURE_CONTROL0x03a, msr);
1094 }
1095
1096 /* Set CR4.VMXE */
1097 cr4 = rcr4();
1098 cr4 |= CR4_VMXE0x00002000;
1099 lcr4(cr4);
1100
1101 /* Enter VMX mode and clear EPTs on this cpu */
1102 if (vmxon((uint64_t *)&ci->ci_vmxon_region_pa))
1103 panic("vmxon failed");
1104
1105 memset(&vid, 0, sizeof(vid))__builtin_memset((&vid), (0), (sizeof(vid)));
1106 if (invept(IA32_VMX_INVEPT_GLOBAL_CTX0x2, &vid))
1107 panic("invept failed");
1108 }
1109 }
1110
1111 atomic_setbits_intx86_atomic_setbits_u32(&ci->ci_flags, CPUF_VMM0x20000);
1112}
1113
1114/*
1115 * stop_vmm_on_cpu
1116 *
1117 * Stops VMM mode on 'ci' by executing the appropriate CPU-specific insn
1118 * sequence to exit VMM mode (eg, VMXOFF)
1119 */
1120void
1121stop_vmm_on_cpu(struct cpu_info *ci)
1122{
1123 uint64_t msr;
1124 uint32_t cr4;
1125
1126 if (!(ci->ci_flags & CPUF_VMM0x20000))
1127 return;
1128
1129 /*
1130 * AMD SVM
1131 */
1132 if (ci->ci_vmm_flags & CI_VMM_SVM(1 << 1)) {
1133 msr = rdmsr(MSR_EFER0xc0000080);
1134 msr &= ~EFER_SVME0x00001000;
1135 wrmsr(MSR_EFER0xc0000080, msr);
1136 }
1137
1138 /*
1139 * Intel VMX
1140 */
1141 if (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0)) {
1142 if (vmxoff())
1143 panic("VMXOFF failed");
1144
1145 cr4 = rcr4();
1146 cr4 &= ~CR4_VMXE0x00002000;
1147 lcr4(cr4);
1148 }
1149
1150 atomic_clearbits_intx86_atomic_clearbits_u32(&ci->ci_flags, CPUF_VMM0x20000);
1151}
1152
1153/*
1154 * vmclear_on_cpu
1155 *
1156 * Flush and clear VMCS on 'ci' by executing vmclear.
1157 *
1158 */
1159void
1160vmclear_on_cpu(struct cpu_info *ci)
1161{
1162 if ((ci->ci_flags & CPUF_VMM0x20000) && (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0))) {
1163 if (vmclear(&ci->ci_vmcs_pa))
1164 panic("VMCLEAR ipi failed");
1165 atomic_swap_ulong(&ci->ci_vmcs_pa, VMX_VMCS_PA_CLEAR)_atomic_swap_ulong((&ci->ci_vmcs_pa), (0xFFFFFFFFFFFFFFFFUL
))
;
1166 }
1167}
1168
1169#ifdef MULTIPROCESSOR1
1170static int
1171vmx_remote_vmclear(struct cpu_info *ci, struct vcpu *vcpu)
1172{
1173#ifdef MP_LOCKDEBUG
1174 int nticks = __mp_lock_spinout;
1175#endif /* MP_LOCKDEBUG */
1176
1177 rw_enter_write(&ci->ci_vmcs_lock);
1178 atomic_swap_ulong(&ci->ci_vmcs_pa, vcpu->vc_control_pa)_atomic_swap_ulong((&ci->ci_vmcs_pa), (vcpu->vc_control_pa
))
;
1179 x86_send_ipi(ci, X86_IPI_VMCLEAR_VMM0x00000004);
1180
1181 while (ci->ci_vmcs_pa != VMX_VMCS_PA_CLEAR0xFFFFFFFFFFFFFFFFUL) {
1182 CPU_BUSY_CYCLE()__asm volatile("pause": : : "memory");
1183#ifdef MP_LOCKDEBUG
1184 if (--nticks <= 0) {
1185 db_printf("%s: spun out\n", __func__);
1186 db_enter();
1187 nticks = __mp_lock_spinout;
1188 }
1189#endif /* MP_LOCKDEBUG */
1190 }
1191 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED)_atomic_swap_uint((&vcpu->vc_vmx_vmcs_state), (0));
1192 rw_exit_write(&ci->ci_vmcs_lock);
1193
1194 return (0);
1195}
1196#endif /* MULTIPROCESSOR */
1197
1198/*
1199 * vm_impl_init_vmx
1200 *
1201 * Intel VMX specific VM initialization routine
1202 *
1203 * Parameters:
1204 * vm: the VM being initialized
1205 * p: vmd process owning the VM
1206 *
1207 * Return values:
1208 * 0: the initialization was successful
1209 * ENOMEM: the initialization failed (lack of resources)
1210 */
1211int
1212vm_impl_init_vmx(struct vm *vm, struct proc *p)
1213{
1214 int i, ret;
1215 vaddr_t mingpa, maxgpa;
1216 struct vm_mem_range *vmr;
1217
1218 /* If not EPT, nothing to do here */
1219 if (vmm_softc->mode != VMM_MODE_EPT)
1220 return (0);
1221
1222 vmr = &vm->vm_memranges[0];
1223 mingpa = vmr->vmr_gpa;
1224 vmr = &vm->vm_memranges[vm->vm_nmemranges - 1];
1225 maxgpa = vmr->vmr_gpa + vmr->vmr_size;
1226
1227 /*
1228 * uvmspace_alloc (currently) always returns a valid vmspace
1229 */
1230 vm->vm_vmspace = uvmspace_alloc(mingpa, maxgpa, TRUE1, FALSE0);
1231 vm->vm_map = &vm->vm_vmspace->vm_map;
1232
1233 /* Map the new map with an anon */
1234 DPRINTF("%s: created vm_map @ %p\n", __func__, vm->vm_map);
1235 for (i = 0; i < vm->vm_nmemranges; i++) {
1236 vmr = &vm->vm_memranges[i];
1237 ret = uvm_share(vm->vm_map, vmr->vmr_gpa,
1238 PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04,
1239 &p->p_vmspace->vm_map, vmr->vmr_va, vmr->vmr_size);
1240 if (ret) {
1241 printf("%s: uvm_share failed (%d)\n", __func__, ret);
1242 /* uvmspace_free calls pmap_destroy for us */
1243 uvmspace_free(vm->vm_vmspace);
1244 vm->vm_vmspace = NULL((void *)0);
1245 return (ENOMEM12);
1246 }
1247 }
1248
1249 pmap_convert(vm->vm_map->pmap, PMAP_TYPE_EPT2);
1250
1251 return (0);
1252}
1253
1254/*
1255 * vm_impl_init_svm
1256 *
1257 * AMD SVM specific VM initialization routine
1258 *
1259 * Parameters:
1260 * vm: the VM being initialized
1261 * p: vmd process owning the VM
1262 *
1263 * Return values:
1264 * 0: the initialization was successful
1265 * ENOMEM: the initialization failed (lack of resources)
1266 */
1267int
1268vm_impl_init_svm(struct vm *vm, struct proc *p)
1269{
1270 int i, ret;
1271 vaddr_t mingpa, maxgpa;
1272 struct vm_mem_range *vmr;
1273
1274 /* If not RVI, nothing to do here */
1275 if (vmm_softc->mode != VMM_MODE_RVI)
1276 return (0);
1277
1278 vmr = &vm->vm_memranges[0];
1279 mingpa = vmr->vmr_gpa;
1280 vmr = &vm->vm_memranges[vm->vm_nmemranges - 1];
1281 maxgpa = vmr->vmr_gpa + vmr->vmr_size;
1282
1283 /*
1284 * uvmspace_alloc (currently) always returns a valid vmspace
1285 */
1286 vm->vm_vmspace = uvmspace_alloc(mingpa, maxgpa, TRUE1, FALSE0);
1287 vm->vm_map = &vm->vm_vmspace->vm_map;
1288
1289 /* Map the new map with an anon */
1290 DPRINTF("%s: created vm_map @ %p\n", __func__, vm->vm_map);
1291 for (i = 0; i < vm->vm_nmemranges; i++) {
1292 vmr = &vm->vm_memranges[i];
1293 ret = uvm_share(vm->vm_map, vmr->vmr_gpa,
1294 PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04,
1295 &p->p_vmspace->vm_map, vmr->vmr_va, vmr->vmr_size);
1296 if (ret) {
1297 printf("%s: uvm_share failed (%d)\n", __func__, ret);
1298 /* uvmspace_free calls pmap_destroy for us */
1299 uvmspace_free(vm->vm_vmspace);
1300 vm->vm_vmspace = NULL((void *)0);
1301 return (ENOMEM12);
1302 }
1303 }
1304
1305 /* Convert pmap to RVI */
1306 pmap_convert(vm->vm_map->pmap, PMAP_TYPE_RVI3);
1307
1308 return (0);
1309}
1310
1311/*
1312 * vm_impl_init
1313 *
1314 * Calls the architecture-specific VM init routine
1315 *
1316 * Parameters:
1317 * vm: the VM being initialized
1318 * p: vmd process owning the VM
1319 *
1320 * Return values (from architecture-specific init routines):
1321 * 0: the initialization was successful
1322 * ENOMEM: the initialization failed (lack of resources)
1323 */
1324int
1325vm_impl_init(struct vm *vm, struct proc *p)
1326{
1327 int ret;
1328
1329 KERNEL_LOCK()_kernel_lock();
1330 if (vmm_softc->mode == VMM_MODE_EPT)
1331 ret = vm_impl_init_vmx(vm, p);
1332 else if (vmm_softc->mode == VMM_MODE_RVI)
1333 ret = vm_impl_init_svm(vm, p);
1334 else
1335 panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode);
1336 KERNEL_UNLOCK()_kernel_unlock();
1337
1338 return (ret);
1339}
1340
1341void
1342vm_impl_deinit(struct vm *vm)
1343{
1344 /* unused */
1345}
1346
1347/*
1348 * vcpu_reload_vmcs_vmx
1349 *
1350 * (Re)load the VMCS on the current cpu. Must be called with the VMCS write
1351 * lock acquired. If the VMCS is determined to be loaded on a remote cpu, an
1352 * ipi will be used to remotely flush it before loading the VMCS locally.
1353 *
1354 * Parameters:
1355 * vcpu: Pointer to the vcpu needing its VMCS
1356 *
1357 * Return values:
1358 * 0: if successful
1359 * EINVAL: an error occurred during flush or reload
1360 */
1361int
1362vcpu_reload_vmcs_vmx(struct vcpu *vcpu)
1363{
1364 struct cpu_info *ci, *last_ci;
1365
1366 rw_assert_wrlock(&vcpu->vc_lock);
1367
1368 ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
1369 last_ci = vcpu->vc_last_pcpu;
1370
1371 if (last_ci == NULL((void *)0)) {
1372 /* First launch */
1373 if (vmclear(&vcpu->vc_control_pa))
1374 return (EINVAL22);
1375 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED)_atomic_swap_uint((&vcpu->vc_vmx_vmcs_state), (0));
1376#ifdef MULTIPROCESSOR1
1377 } else if (last_ci != ci) {
1378 /* We've moved CPUs at some point, so remote VMCLEAR */
1379 if (vmx_remote_vmclear(last_ci, vcpu))
1380 return (EINVAL22);
1381 KASSERT(vcpu->vc_vmx_vmcs_state == VMCS_CLEARED)((vcpu->vc_vmx_vmcs_state == 0) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/arch/amd64/amd64/vmm_machdep.c", 1381, "vcpu->vc_vmx_vmcs_state == VMCS_CLEARED"
))
;
1382#endif /* MULTIPROCESSOR */
1383 }
1384
1385 if (vmptrld(&vcpu->vc_control_pa)) {
1386 printf("%s: vmptrld\n", __func__);
1387 return (EINVAL22);
1388 }
1389
1390 return (0);
1391}
1392
1393/*
1394 * vcpu_readregs_vmx
1395 *
1396 * Reads 'vcpu's registers
1397 *
1398 * Parameters:
1399 * vcpu: the vcpu to read register values from
1400 * regmask: the types of registers to read
1401 * loadvmcs: bit to indicate whether the VMCS has to be loaded first
1402 * vrs: output parameter where register values are stored
1403 *
1404 * Return values:
1405 * 0: if successful
1406 * EINVAL: an error reading registers occurred
1407 */
1408int
1409vcpu_readregs_vmx(struct vcpu *vcpu, uint64_t regmask, int loadvmcs,
1410 struct vcpu_reg_state *vrs)
1411{
1412 int i, ret = 0;
1413 uint64_t sel, limit, ar;
1414 uint64_t *gprs = vrs->vrs_gprs;
1415 uint64_t *crs = vrs->vrs_crs;
1416 uint64_t *msrs = vrs->vrs_msrs;
1417 uint64_t *drs = vrs->vrs_drs;
1418 struct vcpu_segment_info *sregs = vrs->vrs_sregs;
1419 struct vmx_msr_store *msr_store;
1420
1421 if (loadvmcs) {
1422 if (vcpu_reload_vmcs_vmx(vcpu))
1423 return (EINVAL22);
1424 }
1425
1426#ifdef VMM_DEBUG
1427 /* VMCS should be loaded... */
1428 paddr_t pa = 0ULL;
1429 if (vmptrst(&pa))
1430 panic("%s: vmptrst", __func__);
1431 KASSERT(pa == vcpu->vc_control_pa)((pa == vcpu->vc_control_pa) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/arch/amd64/amd64/vmm_machdep.c", 1431, "pa == vcpu->vc_control_pa"
))
;
1432#endif /* VMM_DEBUG */
1433
1434 if (regmask & VM_RWREGS_GPRS0x1) {
1435 gprs[VCPU_REGS_RAX0] = vcpu->vc_gueststate.vg_rax;
1436 gprs[VCPU_REGS_RBX3] = vcpu->vc_gueststate.vg_rbx;
1437 gprs[VCPU_REGS_RCX1] = vcpu->vc_gueststate.vg_rcx;
1438 gprs[VCPU_REGS_RDX2] = vcpu->vc_gueststate.vg_rdx;
1439 gprs[VCPU_REGS_RSI6] = vcpu->vc_gueststate.vg_rsi;
1440 gprs[VCPU_REGS_RDI7] = vcpu->vc_gueststate.vg_rdi;
1441 gprs[VCPU_REGS_R88] = vcpu->vc_gueststate.vg_r8;
1442 gprs[VCPU_REGS_R99] = vcpu->vc_gueststate.vg_r9;
1443 gprs[VCPU_REGS_R1010] = vcpu->vc_gueststate.vg_r10;
1444 gprs[VCPU_REGS_R1111] = vcpu->vc_gueststate.vg_r11;
1445 gprs[VCPU_REGS_R1212] = vcpu->vc_gueststate.vg_r12;
1446 gprs[VCPU_REGS_R1313] = vcpu->vc_gueststate.vg_r13;
1447 gprs[VCPU_REGS_R1414] = vcpu->vc_gueststate.vg_r14;
1448 gprs[VCPU_REGS_R1515] = vcpu->vc_gueststate.vg_r15;
1449 gprs[VCPU_REGS_RBP5] = vcpu->vc_gueststate.vg_rbp;
1450 gprs[VCPU_REGS_RIP16] = vcpu->vc_gueststate.vg_rip;
1451 if (vmread(VMCS_GUEST_IA32_RSP0x681C, &gprs[VCPU_REGS_RSP4]))
1452 goto errout;
1453 if (vmread(VMCS_GUEST_IA32_RFLAGS0x6820, &gprs[VCPU_REGS_RFLAGS17]))
1454 goto errout;
1455 }
1456
1457 if (regmask & VM_RWREGS_SREGS0x2) {
1458 for (i = 0; i < nitems(vmm_vmx_sreg_vmcs_fields)(sizeof((vmm_vmx_sreg_vmcs_fields)) / sizeof((vmm_vmx_sreg_vmcs_fields
)[0]))
; i++) {
1459 if (vmread(vmm_vmx_sreg_vmcs_fields[i].selid, &sel))
1460 goto errout;
1461 if (vmread(vmm_vmx_sreg_vmcs_fields[i].limitid, &limit))
1462 goto errout;
1463 if (vmread(vmm_vmx_sreg_vmcs_fields[i].arid, &ar))
1464 goto errout;
1465 if (vmread(vmm_vmx_sreg_vmcs_fields[i].baseid,
1466 &sregs[i].vsi_base))
1467 goto errout;
1468
1469 sregs[i].vsi_sel = sel;
1470 sregs[i].vsi_limit = limit;
1471 sregs[i].vsi_ar = ar;
1472 }
1473
1474 if (vmread(VMCS_GUEST_IA32_GDTR_LIMIT0x4810, &limit))
1475 goto errout;
1476 if (vmread(VMCS_GUEST_IA32_GDTR_BASE0x6816,
1477 &vrs->vrs_gdtr.vsi_base))
1478 goto errout;
1479 vrs->vrs_gdtr.vsi_limit = limit;
1480
1481 if (vmread(VMCS_GUEST_IA32_IDTR_LIMIT0x4812, &limit))
1482 goto errout;
1483 if (vmread(VMCS_GUEST_IA32_IDTR_BASE0x6818,
1484 &vrs->vrs_idtr.vsi_base))
1485 goto errout;
1486 vrs->vrs_idtr.vsi_limit = limit;
1487 }
1488
1489 if (regmask & VM_RWREGS_CRS0x4) {
1490 crs[VCPU_REGS_CR21] = vcpu->vc_gueststate.vg_cr2;
1491 crs[VCPU_REGS_XCR05] = vcpu->vc_gueststate.vg_xcr0;
1492 if (vmread(VMCS_GUEST_IA32_CR00x6800, &crs[VCPU_REGS_CR00]))
1493 goto errout;
1494 if (vmread(VMCS_GUEST_IA32_CR30x6802, &crs[VCPU_REGS_CR32]))
1495 goto errout;
1496 if (vmread(VMCS_GUEST_IA32_CR40x6804, &crs[VCPU_REGS_CR43]))
1497 goto errout;
1498 if (vmread(VMCS_GUEST_PDPTE00x280A, &crs[VCPU_REGS_PDPTE06]))
1499 goto errout;
1500 if (vmread(VMCS_GUEST_PDPTE10x280C, &crs[VCPU_REGS_PDPTE17]))
1501 goto errout;
1502 if (vmread(VMCS_GUEST_PDPTE20x280E, &crs[VCPU_REGS_PDPTE28]))
1503 goto errout;
1504 if (vmread(VMCS_GUEST_PDPTE30x2810, &crs[VCPU_REGS_PDPTE39]))
1505 goto errout;
1506 }
1507
1508 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
1509
1510 if (regmask & VM_RWREGS_MSRS0x8) {
1511 for (i = 0; i < VCPU_REGS_NMSRS(6 + 1); i++) {
1512 msrs[i] = msr_store[i].vms_data;
1513 }
1514 }
1515
1516 if (regmask & VM_RWREGS_DRS0x10) {
1517 drs[VCPU_REGS_DR00] = vcpu->vc_gueststate.vg_dr0;
1518 drs[VCPU_REGS_DR11] = vcpu->vc_gueststate.vg_dr1;
1519 drs[VCPU_REGS_DR22] = vcpu->vc_gueststate.vg_dr2;
1520 drs[VCPU_REGS_DR33] = vcpu->vc_gueststate.vg_dr3;
1521 drs[VCPU_REGS_DR64] = vcpu->vc_gueststate.vg_dr6;
1522 if (vmread(VMCS_GUEST_IA32_DR70x681A, &drs[VCPU_REGS_DR75]))
1523 goto errout;
1524 }
1525
1526 goto out;
1527
1528errout:
1529 ret = EINVAL22;
1530out:
1531 return (ret);
1532}
1533
1534/*
1535 * vcpu_readregs_svm
1536 *
1537 * Reads 'vcpu's registers
1538 *
1539 * Parameters:
1540 * vcpu: the vcpu to read register values from
1541 * regmask: the types of registers to read
1542 * vrs: output parameter where register values are stored
1543 *
1544 * Return values:
1545 * 0: if successful
1546 */
1547int
1548vcpu_readregs_svm(struct vcpu *vcpu, uint64_t regmask,
1549 struct vcpu_reg_state *vrs)
1550{
1551 uint64_t *gprs = vrs->vrs_gprs;
1552 uint64_t *crs = vrs->vrs_crs;
1553 uint64_t *msrs = vrs->vrs_msrs;
1554 uint64_t *drs = vrs->vrs_drs;
1555 uint32_t attr;
1556 struct vcpu_segment_info *sregs = vrs->vrs_sregs;
1557 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
1558
1559 if (regmask & VM_RWREGS_GPRS0x1) {
1560 gprs[VCPU_REGS_RAX0] = vmcb->v_rax;
1561 gprs[VCPU_REGS_RBX3] = vcpu->vc_gueststate.vg_rbx;
1562 gprs[VCPU_REGS_RCX1] = vcpu->vc_gueststate.vg_rcx;
1563 gprs[VCPU_REGS_RDX2] = vcpu->vc_gueststate.vg_rdx;
1564 gprs[VCPU_REGS_RSI6] = vcpu->vc_gueststate.vg_rsi;
1565 gprs[VCPU_REGS_RDI7] = vcpu->vc_gueststate.vg_rdi;
1566 gprs[VCPU_REGS_R88] = vcpu->vc_gueststate.vg_r8;
1567 gprs[VCPU_REGS_R99] = vcpu->vc_gueststate.vg_r9;
1568 gprs[VCPU_REGS_R1010] = vcpu->vc_gueststate.vg_r10;
1569 gprs[VCPU_REGS_R1111] = vcpu->vc_gueststate.vg_r11;
1570 gprs[VCPU_REGS_R1212] = vcpu->vc_gueststate.vg_r12;
1571 gprs[VCPU_REGS_R1313] = vcpu->vc_gueststate.vg_r13;
1572 gprs[VCPU_REGS_R1414] = vcpu->vc_gueststate.vg_r14;
1573 gprs[VCPU_REGS_R1515] = vcpu->vc_gueststate.vg_r15;
1574 gprs[VCPU_REGS_RBP5] = vcpu->vc_gueststate.vg_rbp;
1575 gprs[VCPU_REGS_RIP16] = vmcb->v_rip;
1576 gprs[VCPU_REGS_RSP4] = vmcb->v_rsp;
1577 gprs[VCPU_REGS_RFLAGS17] = vmcb->v_rflags;
1578 }
1579
1580 if (regmask & VM_RWREGS_SREGS0x2) {
1581 sregs[VCPU_REGS_CS1].vsi_sel = vmcb->v_cs.vs_sel;
1582 sregs[VCPU_REGS_CS1].vsi_limit = vmcb->v_cs.vs_lim;
1583 attr = vmcb->v_cs.vs_attr;
1584 sregs[VCPU_REGS_CS1].vsi_ar = (attr & 0xff) | ((attr << 4) &
1585 0xf000);
1586 sregs[VCPU_REGS_CS1].vsi_base = vmcb->v_cs.vs_base;
1587
1588 sregs[VCPU_REGS_DS3].vsi_sel = vmcb->v_ds.vs_sel;
1589 sregs[VCPU_REGS_DS3].vsi_limit = vmcb->v_ds.vs_lim;
1590 attr = vmcb->v_ds.vs_attr;
1591 sregs[VCPU_REGS_DS3].vsi_ar = (attr & 0xff) | ((attr << 4) &
1592 0xf000);
1593 sregs[VCPU_REGS_DS3].vsi_base = vmcb->v_ds.vs_base;
1594
1595 sregs[VCPU_REGS_ES0].vsi_sel = vmcb->v_es.vs_sel;
1596 sregs[VCPU_REGS_ES0].vsi_limit = vmcb->v_es.vs_lim;
1597 attr = vmcb->v_es.vs_attr;
1598 sregs[VCPU_REGS_ES0].vsi_ar = (attr & 0xff) | ((attr << 4) &
1599 0xf000);
1600 sregs[VCPU_REGS_ES0].vsi_base = vmcb->v_es.vs_base;
1601
1602 sregs[VCPU_REGS_FS4].vsi_sel = vmcb->v_fs.vs_sel;
1603 sregs[VCPU_REGS_FS4].vsi_limit = vmcb->v_fs.vs_lim;
1604 attr = vmcb->v_fs.vs_attr;
1605 sregs[VCPU_REGS_FS4].vsi_ar = (attr & 0xff) | ((attr << 4) &
1606 0xf000);
1607 sregs[VCPU_REGS_FS4].vsi_base = vmcb->v_fs.vs_base;
1608
1609 sregs[VCPU_REGS_GS5].vsi_sel = vmcb->v_gs.vs_sel;
1610 sregs[VCPU_REGS_GS5].vsi_limit = vmcb->v_gs.vs_lim;
1611 attr = vmcb->v_gs.vs_attr;
1612 sregs[VCPU_REGS_GS5].vsi_ar = (attr & 0xff) | ((attr << 4) &
1613 0xf000);
1614 sregs[VCPU_REGS_GS5].vsi_base = vmcb->v_gs.vs_base;
1615
1616 sregs[VCPU_REGS_SS2].vsi_sel = vmcb->v_ss.vs_sel;
1617 sregs[VCPU_REGS_SS2].vsi_limit = vmcb->v_ss.vs_lim;
1618 attr = vmcb->v_ss.vs_attr;
1619 sregs[VCPU_REGS_SS2].vsi_ar = (attr & 0xff) | ((attr << 4) &
1620 0xf000);
1621 sregs[VCPU_REGS_SS2].vsi_base = vmcb->v_ss.vs_base;
1622
1623 sregs[VCPU_REGS_LDTR6].vsi_sel = vmcb->v_ldtr.vs_sel;
1624 sregs[VCPU_REGS_LDTR6].vsi_limit = vmcb->v_ldtr.vs_lim;
1625 attr = vmcb->v_ldtr.vs_attr;
1626 sregs[VCPU_REGS_LDTR6].vsi_ar = (attr & 0xff) | ((attr << 4)
1627 & 0xf000);
1628 sregs[VCPU_REGS_LDTR6].vsi_base = vmcb->v_ldtr.vs_base;
1629
1630 sregs[VCPU_REGS_TR7].vsi_sel = vmcb->v_tr.vs_sel;
1631 sregs[VCPU_REGS_TR7].vsi_limit = vmcb->v_tr.vs_lim;
1632 attr = vmcb->v_tr.vs_attr;
1633 sregs[VCPU_REGS_TR7].vsi_ar = (attr & 0xff) | ((attr << 4) &
1634 0xf000);
1635 sregs[VCPU_REGS_TR7].vsi_base = vmcb->v_tr.vs_base;
1636
1637 vrs->vrs_gdtr.vsi_limit = vmcb->v_gdtr.vs_lim;
1638 vrs->vrs_gdtr.vsi_base = vmcb->v_gdtr.vs_base;
1639 vrs->vrs_idtr.vsi_limit = vmcb->v_idtr.vs_lim;
1640 vrs->vrs_idtr.vsi_base = vmcb->v_idtr.vs_base;
1641 }
1642
1643 if (regmask & VM_RWREGS_CRS0x4) {
1644 crs[VCPU_REGS_CR00] = vmcb->v_cr0;
1645 crs[VCPU_REGS_CR32] = vmcb->v_cr3;
1646 crs[VCPU_REGS_CR43] = vmcb->v_cr4;
1647 crs[VCPU_REGS_CR21] = vcpu->vc_gueststate.vg_cr2;
1648 crs[VCPU_REGS_XCR05] = vcpu->vc_gueststate.vg_xcr0;
1649 }
1650
1651 if (regmask & VM_RWREGS_MSRS0x8) {
1652 msrs[VCPU_REGS_EFER0] = vmcb->v_efer;
1653 msrs[VCPU_REGS_STAR1] = vmcb->v_star;
1654 msrs[VCPU_REGS_LSTAR2] = vmcb->v_lstar;
1655 msrs[VCPU_REGS_CSTAR3] = vmcb->v_cstar;
1656 msrs[VCPU_REGS_SFMASK4] = vmcb->v_sfmask;
1657 msrs[VCPU_REGS_KGSBASE5] = vmcb->v_kgsbase;
1658 }
1659
1660 if (regmask & VM_RWREGS_DRS0x10) {
1661 drs[VCPU_REGS_DR00] = vcpu->vc_gueststate.vg_dr0;
1662 drs[VCPU_REGS_DR11] = vcpu->vc_gueststate.vg_dr1;
1663 drs[VCPU_REGS_DR22] = vcpu->vc_gueststate.vg_dr2;
1664 drs[VCPU_REGS_DR33] = vcpu->vc_gueststate.vg_dr3;
1665 drs[VCPU_REGS_DR64] = vmcb->v_dr6;
1666 drs[VCPU_REGS_DR75] = vmcb->v_dr7;
1667 }
1668
1669 return (0);
1670}
1671
1672/*
1673 * vcpu_writeregs_vmx
1674 *
1675 * Writes VCPU registers
1676 *
1677 * Parameters:
1678 * vcpu: the vcpu that has to get its registers written to
1679 * regmask: the types of registers to write
1680 * loadvmcs: bit to indicate whether the VMCS has to be loaded first
1681 * vrs: the register values to write
1682 *
1683 * Return values:
1684 * 0: if successful
1685 * EINVAL an error writing registers occurred
1686 */
1687int
1688vcpu_writeregs_vmx(struct vcpu *vcpu, uint64_t regmask, int loadvmcs,
1689 struct vcpu_reg_state *vrs)
1690{
1691 int i, ret = 0;
1692 uint16_t sel;
1693 uint64_t limit, ar;
1694 uint64_t *gprs = vrs->vrs_gprs;
1695 uint64_t *crs = vrs->vrs_crs;
1696 uint64_t *msrs = vrs->vrs_msrs;
1697 uint64_t *drs = vrs->vrs_drs;
1698 struct vcpu_segment_info *sregs = vrs->vrs_sregs;
1699 struct vmx_msr_store *msr_store;
1700
1701 if (loadvmcs) {
1702 if (vcpu_reload_vmcs_vmx(vcpu))
1703 return (EINVAL22);
1704 }
1705
1706#ifdef VMM_DEBUG
1707 /* VMCS should be loaded... */
1708 paddr_t pa = 0ULL;
1709 if (vmptrst(&pa))
1710 panic("%s: vmptrst", __func__);
1711 KASSERT(pa == vcpu->vc_control_pa)((pa == vcpu->vc_control_pa) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/arch/amd64/amd64/vmm_machdep.c", 1711, "pa == vcpu->vc_control_pa"
))
;
1712#endif /* VMM_DEBUG */
1713
1714 if (regmask & VM_RWREGS_GPRS0x1) {
1715 vcpu->vc_gueststate.vg_rax = gprs[VCPU_REGS_RAX0];
1716 vcpu->vc_gueststate.vg_rbx = gprs[VCPU_REGS_RBX3];
1717 vcpu->vc_gueststate.vg_rcx = gprs[VCPU_REGS_RCX1];
1718 vcpu->vc_gueststate.vg_rdx = gprs[VCPU_REGS_RDX2];
1719 vcpu->vc_gueststate.vg_rsi = gprs[VCPU_REGS_RSI6];
1720 vcpu->vc_gueststate.vg_rdi = gprs[VCPU_REGS_RDI7];
1721 vcpu->vc_gueststate.vg_r8 = gprs[VCPU_REGS_R88];
1722 vcpu->vc_gueststate.vg_r9 = gprs[VCPU_REGS_R99];
1723 vcpu->vc_gueststate.vg_r10 = gprs[VCPU_REGS_R1010];
1724 vcpu->vc_gueststate.vg_r11 = gprs[VCPU_REGS_R1111];
1725 vcpu->vc_gueststate.vg_r12 = gprs[VCPU_REGS_R1212];
1726 vcpu->vc_gueststate.vg_r13 = gprs[VCPU_REGS_R1313];
1727 vcpu->vc_gueststate.vg_r14 = gprs[VCPU_REGS_R1414];
1728 vcpu->vc_gueststate.vg_r15 = gprs[VCPU_REGS_R1515];
1729 vcpu->vc_gueststate.vg_rbp = gprs[VCPU_REGS_RBP5];
1730 vcpu->vc_gueststate.vg_rip = gprs[VCPU_REGS_RIP16];
1731 if (vmwrite(VMCS_GUEST_IA32_RIP0x681E, gprs[VCPU_REGS_RIP16]))
1732 goto errout;
1733 if (vmwrite(VMCS_GUEST_IA32_RSP0x681C, gprs[VCPU_REGS_RSP4]))
1734 goto errout;
1735 if (vmwrite(VMCS_GUEST_IA32_RFLAGS0x6820, gprs[VCPU_REGS_RFLAGS17]))
1736 goto errout;
1737 }
1738
1739 if (regmask & VM_RWREGS_SREGS0x2) {
1740 for (i = 0; i < nitems(vmm_vmx_sreg_vmcs_fields)(sizeof((vmm_vmx_sreg_vmcs_fields)) / sizeof((vmm_vmx_sreg_vmcs_fields
)[0]))
; i++) {
1741 sel = sregs[i].vsi_sel;
1742 limit = sregs[i].vsi_limit;
1743 ar = sregs[i].vsi_ar;
1744
1745 if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].selid, sel))
1746 goto errout;
1747 if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].limitid, limit))
1748 goto errout;
1749 if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].arid, ar))
1750 goto errout;
1751 if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].baseid,
1752 sregs[i].vsi_base))
1753 goto errout;
1754 }
1755
1756 if (vmwrite(VMCS_GUEST_IA32_GDTR_LIMIT0x4810,
1757 vrs->vrs_gdtr.vsi_limit))
1758 goto errout;
1759 if (vmwrite(VMCS_GUEST_IA32_GDTR_BASE0x6816,
1760 vrs->vrs_gdtr.vsi_base))
1761 goto errout;
1762 if (vmwrite(VMCS_GUEST_IA32_IDTR_LIMIT0x4812,
1763 vrs->vrs_idtr.vsi_limit))
1764 goto errout;
1765 if (vmwrite(VMCS_GUEST_IA32_IDTR_BASE0x6818,
1766 vrs->vrs_idtr.vsi_base))
1767 goto errout;
1768 }
1769
1770 if (regmask & VM_RWREGS_CRS0x4) {
1771 vcpu->vc_gueststate.vg_xcr0 = crs[VCPU_REGS_XCR05];
1772 if (vmwrite(VMCS_GUEST_IA32_CR00x6800, crs[VCPU_REGS_CR00]))
1773 goto errout;
1774 if (vmwrite(VMCS_GUEST_IA32_CR30x6802, crs[VCPU_REGS_CR32]))
1775 goto errout;
1776 if (vmwrite(VMCS_GUEST_IA32_CR40x6804, crs[VCPU_REGS_CR43]))
1777 goto errout;
1778 if (vmwrite(VMCS_GUEST_PDPTE00x280A, crs[VCPU_REGS_PDPTE06]))
1779 goto errout;
1780 if (vmwrite(VMCS_GUEST_PDPTE10x280C, crs[VCPU_REGS_PDPTE17]))
1781 goto errout;
1782 if (vmwrite(VMCS_GUEST_PDPTE20x280E, crs[VCPU_REGS_PDPTE28]))
1783 goto errout;
1784 if (vmwrite(VMCS_GUEST_PDPTE30x2810, crs[VCPU_REGS_PDPTE39]))
1785 goto errout;
1786 }
1787
1788 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
1789
1790 if (regmask & VM_RWREGS_MSRS0x8) {
1791 for (i = 0; i < VCPU_REGS_NMSRS(6 + 1); i++) {
1792 msr_store[i].vms_data = msrs[i];
1793 }
1794 }
1795
1796 if (regmask & VM_RWREGS_DRS0x10) {
1797 vcpu->vc_gueststate.vg_dr0 = drs[VCPU_REGS_DR00];
1798 vcpu->vc_gueststate.vg_dr1 = drs[VCPU_REGS_DR11];
1799 vcpu->vc_gueststate.vg_dr2 = drs[VCPU_REGS_DR22];
1800 vcpu->vc_gueststate.vg_dr3 = drs[VCPU_REGS_DR33];
1801 vcpu->vc_gueststate.vg_dr6 = drs[VCPU_REGS_DR64];
1802 if (vmwrite(VMCS_GUEST_IA32_DR70x681A, drs[VCPU_REGS_DR75]))
1803 goto errout;
1804 }
1805
1806 goto out;
1807
1808errout:
1809 ret = EINVAL22;
1810out:
1811 if (loadvmcs) {
1812 if (vmclear(&vcpu->vc_control_pa))
1813 ret = EINVAL22;
1814 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED)_atomic_swap_uint((&vcpu->vc_vmx_vmcs_state), (0));
1815 }
1816 return (ret);
1817}
1818
1819/*
1820 * vcpu_writeregs_svm
1821 *
1822 * Writes 'vcpu's registers
1823 *
1824 * Parameters:
1825 * vcpu: the vcpu that has to get its registers written to
1826 * regmask: the types of registers to write
1827 * vrs: the register values to write
1828 *
1829 * Return values:
1830 * 0: if successful
1831 * EINVAL an error writing registers occurred
1832 */
1833int
1834vcpu_writeregs_svm(struct vcpu *vcpu, uint64_t regmask,
1835 struct vcpu_reg_state *vrs)
1836{
1837 uint64_t *gprs = vrs->vrs_gprs;
1838 uint64_t *crs = vrs->vrs_crs;
1839 uint16_t attr;
1840 uint64_t *msrs = vrs->vrs_msrs;
1841 uint64_t *drs = vrs->vrs_drs;
1842 struct vcpu_segment_info *sregs = vrs->vrs_sregs;
1843 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
1844
1845 if (regmask & VM_RWREGS_GPRS0x1) {
1846 vcpu->vc_gueststate.vg_rax = gprs[VCPU_REGS_RAX0];
1847 vcpu->vc_gueststate.vg_rbx = gprs[VCPU_REGS_RBX3];
1848 vcpu->vc_gueststate.vg_rcx = gprs[VCPU_REGS_RCX1];
1849 vcpu->vc_gueststate.vg_rdx = gprs[VCPU_REGS_RDX2];
1850 vcpu->vc_gueststate.vg_rsi = gprs[VCPU_REGS_RSI6];
1851 vcpu->vc_gueststate.vg_rdi = gprs[VCPU_REGS_RDI7];
1852 vcpu->vc_gueststate.vg_r8 = gprs[VCPU_REGS_R88];
1853 vcpu->vc_gueststate.vg_r9 = gprs[VCPU_REGS_R99];
1854 vcpu->vc_gueststate.vg_r10 = gprs[VCPU_REGS_R1010];
1855 vcpu->vc_gueststate.vg_r11 = gprs[VCPU_REGS_R1111];
1856 vcpu->vc_gueststate.vg_r12 = gprs[VCPU_REGS_R1212];
1857 vcpu->vc_gueststate.vg_r13 = gprs[VCPU_REGS_R1313];
1858 vcpu->vc_gueststate.vg_r14 = gprs[VCPU_REGS_R1414];
1859 vcpu->vc_gueststate.vg_r15 = gprs[VCPU_REGS_R1515];
1860 vcpu->vc_gueststate.vg_rbp = gprs[VCPU_REGS_RBP5];
1861 vcpu->vc_gueststate.vg_rip = gprs[VCPU_REGS_RIP16];
1862
1863 vmcb->v_rax = gprs[VCPU_REGS_RAX0];
1864 vmcb->v_rip = gprs[VCPU_REGS_RIP16];
1865 vmcb->v_rsp = gprs[VCPU_REGS_RSP4];
1866 vmcb->v_rflags = gprs[VCPU_REGS_RFLAGS17];
1867 }
1868
1869 if (regmask & VM_RWREGS_SREGS0x2) {
1870 vmcb->v_cs.vs_sel = sregs[VCPU_REGS_CS1].vsi_sel;
1871 vmcb->v_cs.vs_lim = sregs[VCPU_REGS_CS1].vsi_limit;
1872 attr = sregs[VCPU_REGS_CS1].vsi_ar;
1873 vmcb->v_cs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1874 vmcb->v_cs.vs_base = sregs[VCPU_REGS_CS1].vsi_base;
1875 vmcb->v_ds.vs_sel = sregs[VCPU_REGS_DS3].vsi_sel;
1876 vmcb->v_ds.vs_lim = sregs[VCPU_REGS_DS3].vsi_limit;
1877 attr = sregs[VCPU_REGS_DS3].vsi_ar;
1878 vmcb->v_ds.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1879 vmcb->v_ds.vs_base = sregs[VCPU_REGS_DS3].vsi_base;
1880 vmcb->v_es.vs_sel = sregs[VCPU_REGS_ES0].vsi_sel;
1881 vmcb->v_es.vs_lim = sregs[VCPU_REGS_ES0].vsi_limit;
1882 attr = sregs[VCPU_REGS_ES0].vsi_ar;
1883 vmcb->v_es.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1884 vmcb->v_es.vs_base = sregs[VCPU_REGS_ES0].vsi_base;
1885 vmcb->v_fs.vs_sel = sregs[VCPU_REGS_FS4].vsi_sel;
1886 vmcb->v_fs.vs_lim = sregs[VCPU_REGS_FS4].vsi_limit;
1887 attr = sregs[VCPU_REGS_FS4].vsi_ar;
1888 vmcb->v_fs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1889 vmcb->v_fs.vs_base = sregs[VCPU_REGS_FS4].vsi_base;
1890 vmcb->v_gs.vs_sel = sregs[VCPU_REGS_GS5].vsi_sel;
1891 vmcb->v_gs.vs_lim = sregs[VCPU_REGS_GS5].vsi_limit;
1892 attr = sregs[VCPU_REGS_GS5].vsi_ar;
1893 vmcb->v_gs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1894 vmcb->v_gs.vs_base = sregs[VCPU_REGS_GS5].vsi_base;
1895 vmcb->v_ss.vs_sel = sregs[VCPU_REGS_SS2].vsi_sel;
1896 vmcb->v_ss.vs_lim = sregs[VCPU_REGS_SS2].vsi_limit;
1897 attr = sregs[VCPU_REGS_SS2].vsi_ar;
1898 vmcb->v_ss.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1899 vmcb->v_ss.vs_base = sregs[VCPU_REGS_SS2].vsi_base;
1900 vmcb->v_ldtr.vs_sel = sregs[VCPU_REGS_LDTR6].vsi_sel;
1901 vmcb->v_ldtr.vs_lim = sregs[VCPU_REGS_LDTR6].vsi_limit;
1902 attr = sregs[VCPU_REGS_LDTR6].vsi_ar;
1903 vmcb->v_ldtr.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1904 vmcb->v_ldtr.vs_base = sregs[VCPU_REGS_LDTR6].vsi_base;
1905 vmcb->v_tr.vs_sel = sregs[VCPU_REGS_TR7].vsi_sel;
1906 vmcb->v_tr.vs_lim = sregs[VCPU_REGS_TR7].vsi_limit;
1907 attr = sregs[VCPU_REGS_TR7].vsi_ar;
1908 vmcb->v_tr.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
1909 vmcb->v_tr.vs_base = sregs[VCPU_REGS_TR7].vsi_base;
1910 vmcb->v_gdtr.vs_lim = vrs->vrs_gdtr.vsi_limit;
1911 vmcb->v_gdtr.vs_base = vrs->vrs_gdtr.vsi_base;
1912 vmcb->v_idtr.vs_lim = vrs->vrs_idtr.vsi_limit;
1913 vmcb->v_idtr.vs_base = vrs->vrs_idtr.vsi_base;
1914 }
1915
1916 if (regmask & VM_RWREGS_CRS0x4) {
1917 vmcb->v_cr0 = crs[VCPU_REGS_CR00];
1918 vmcb->v_cr3 = crs[VCPU_REGS_CR32];
1919 vmcb->v_cr4 = crs[VCPU_REGS_CR43];
1920 vcpu->vc_gueststate.vg_cr2 = crs[VCPU_REGS_CR21];
1921 vcpu->vc_gueststate.vg_xcr0 = crs[VCPU_REGS_XCR05];
1922 }
1923
1924 if (regmask & VM_RWREGS_MSRS0x8) {
1925 vmcb->v_efer |= msrs[VCPU_REGS_EFER0];
1926 vmcb->v_star = msrs[VCPU_REGS_STAR1];
1927 vmcb->v_lstar = msrs[VCPU_REGS_LSTAR2];
1928 vmcb->v_cstar = msrs[VCPU_REGS_CSTAR3];
1929 vmcb->v_sfmask = msrs[VCPU_REGS_SFMASK4];
1930 vmcb->v_kgsbase = msrs[VCPU_REGS_KGSBASE5];
1931 }
1932
1933 if (regmask & VM_RWREGS_DRS0x10) {
1934 vcpu->vc_gueststate.vg_dr0 = drs[VCPU_REGS_DR00];
1935 vcpu->vc_gueststate.vg_dr1 = drs[VCPU_REGS_DR11];
1936 vcpu->vc_gueststate.vg_dr2 = drs[VCPU_REGS_DR22];
1937 vcpu->vc_gueststate.vg_dr3 = drs[VCPU_REGS_DR33];
1938 vmcb->v_dr6 = drs[VCPU_REGS_DR64];
1939 vmcb->v_dr7 = drs[VCPU_REGS_DR75];
1940 }
1941
1942 return (0);
1943}
1944
1945/*
1946 * vcpu_reset_regs_svm
1947 *
1948 * Initializes 'vcpu's registers to supplied state
1949 *
1950 * Parameters:
1951 * vcpu: the vcpu whose register state is to be initialized
1952 * vrs: the register state to set
1953 *
1954 * Return values:
1955 * 0: registers init'ed successfully
1956 * EINVAL: an error occurred setting register state
1957 */
1958int
1959vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
1960{
1961 struct vmcb *vmcb;
1962 int ret;
1963 uint16_t asid;
1964
1965 vmcb = (struct vmcb *)vcpu->vc_control_va;
1966
1967 /*
1968 * Intercept controls
1969 *
1970 * External Interrupt exiting (SVM_INTERCEPT_INTR)
1971 * External NMI exiting (SVM_INTERCEPT_NMI)
1972 * CPUID instruction (SVM_INTERCEPT_CPUID)
1973 * HLT instruction (SVM_INTERCEPT_HLT)
1974 * I/O instructions (SVM_INTERCEPT_INOUT)
1975 * MSR access (SVM_INTERCEPT_MSR)
1976 * shutdown events (SVM_INTERCEPT_SHUTDOWN)
1977 *
1978 * VMRUN instruction (SVM_INTERCEPT_VMRUN)
1979 * VMMCALL instruction (SVM_INTERCEPT_VMMCALL)
1980 * VMLOAD instruction (SVM_INTERCEPT_VMLOAD)
1981 * VMSAVE instruction (SVM_INTERCEPT_VMSAVE)
1982 * STGI instruction (SVM_INTERCEPT_STGI)
1983 * CLGI instruction (SVM_INTERCEPT_CLGI)
1984 * SKINIT instruction (SVM_INTERCEPT_SKINIT)
1985 * ICEBP instruction (SVM_INTERCEPT_ICEBP)
1986 * MWAIT instruction (SVM_INTERCEPT_MWAIT_UNCOND)
1987 * MWAIT instruction (SVM_INTERCEPT_MWAIT_COND)
1988 * MONITOR instruction (SVM_INTERCEPT_MONITOR)
1989 * RDTSCP instruction (SVM_INTERCEPT_RDTSCP)
1990 * INVLPGA instruction (SVM_INTERCEPT_INVLPGA)
1991 * XSETBV instruction (SVM_INTERCEPT_XSETBV) (if available)
1992 */
1993 vmcb->v_intercept1 = SVM_INTERCEPT_INTR(1UL << 0) | SVM_INTERCEPT_NMI(1UL << 1) |
1994 SVM_INTERCEPT_CPUID(1UL << 18) | SVM_INTERCEPT_HLT(1UL << 24) | SVM_INTERCEPT_INOUT(1UL << 27) |
1995 SVM_INTERCEPT_MSR(1UL << 28) | SVM_INTERCEPT_SHUTDOWN(1UL << 31);
1996
1997 vmcb->v_intercept2 = SVM_INTERCEPT_VMRUN(1UL << 0) | SVM_INTERCEPT_VMMCALL(1UL << 1) |
1998 SVM_INTERCEPT_VMLOAD(1UL << 2) | SVM_INTERCEPT_VMSAVE(1UL << 3) | SVM_INTERCEPT_STGI(1UL << 4) |
1999 SVM_INTERCEPT_CLGI(1UL << 5) | SVM_INTERCEPT_SKINIT(1UL << 6) | SVM_INTERCEPT_ICEBP(1UL << 8) |
2000 SVM_INTERCEPT_MWAIT_UNCOND(1UL << 11) | SVM_INTERCEPT_MONITOR(1UL << 10) |
2001 SVM_INTERCEPT_MWAIT_COND(1UL << 12) | SVM_INTERCEPT_RDTSCP(1UL << 7) |
2002 SVM_INTERCEPT_INVLPGA(1UL << 26);
2003
2004 if (xsave_mask)
2005 vmcb->v_intercept2 |= SVM_INTERCEPT_XSETBV(1UL << 13);
2006
2007 /* Setup I/O bitmap */
2008 memset((uint8_t *)vcpu->vc_svm_ioio_va, 0xFF, 3 * PAGE_SIZE)__builtin_memset(((uint8_t *)vcpu->vc_svm_ioio_va), (0xFF)
, (3 * (1 << 12)))
;
2009 vmcb->v_iopm_pa = (uint64_t)(vcpu->vc_svm_ioio_pa);
2010
2011 /* Setup MSR bitmap */
2012 memset((uint8_t *)vcpu->vc_msr_bitmap_va, 0xFF, 2 * PAGE_SIZE)__builtin_memset(((uint8_t *)vcpu->vc_msr_bitmap_va), (0xFF
), (2 * (1 << 12)))
;
2013 vmcb->v_msrpm_pa = (uint64_t)(vcpu->vc_msr_bitmap_pa);
2014 svm_setmsrbrw(vcpu, MSR_IA32_FEATURE_CONTROL0x03a);
2015 svm_setmsrbrw(vcpu, MSR_SYSENTER_CS0x174);
2016 svm_setmsrbrw(vcpu, MSR_SYSENTER_ESP0x175);
2017 svm_setmsrbrw(vcpu, MSR_SYSENTER_EIP0x176);
2018 svm_setmsrbrw(vcpu, MSR_STAR0xc0000081);
2019 svm_setmsrbrw(vcpu, MSR_LSTAR0xc0000082);
2020 svm_setmsrbrw(vcpu, MSR_CSTAR0xc0000083);
2021 svm_setmsrbrw(vcpu, MSR_SFMASK0xc0000084);
2022 svm_setmsrbrw(vcpu, MSR_FSBASE0xc0000100);
2023 svm_setmsrbrw(vcpu, MSR_GSBASE0xc0000101);
2024 svm_setmsrbrw(vcpu, MSR_KERNELGSBASE0xc0000102);
2025
2026 /* EFER is R/O so we can ensure the guest always has SVME */
2027 svm_setmsrbr(vcpu, MSR_EFER0xc0000080);
2028
2029 /* allow reading TSC */
2030 svm_setmsrbr(vcpu, MSR_TSC0x010);
2031
2032 /* allow reading HWCR and PSTATEDEF to determine TSC frequency */
2033 svm_setmsrbr(vcpu, MSR_HWCR0xc0010015);
2034 svm_setmsrbr(vcpu, MSR_PSTATEDEF(0)(0xc0010064 + (0)));
2035
2036 /* Guest VCPU ASID */
2037 if (vmm_alloc_vpid(&asid)) {
2038 DPRINTF("%s: could not allocate asid\n", __func__);
2039 ret = EINVAL22;
2040 goto exit;
2041 }
2042
2043 vmcb->v_asid = asid;
2044 vcpu->vc_vpid = asid;
2045
2046 /* TLB Control - First time in, flush all*/
2047 vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_ALL1;
2048
2049 /* INTR masking */
2050 vmcb->v_intr_masking = 1;
2051
2052 /* PAT */
2053 vmcb->v_g_pat = PATENTRY(0, PAT_WB)(0x6UL << ((0) * 8)) | PATENTRY(1, PAT_WC)(0x1UL << ((1) * 8)) |
2054 PATENTRY(2, PAT_UCMINUS)(0x7UL << ((2) * 8)) | PATENTRY(3, PAT_UC)(0x0UL << ((3) * 8)) |
2055 PATENTRY(4, PAT_WB)(0x6UL << ((4) * 8)) | PATENTRY(5, PAT_WC)(0x1UL << ((5) * 8)) |
2056 PATENTRY(6, PAT_UCMINUS)(0x7UL << ((6) * 8)) | PATENTRY(7, PAT_UC)(0x0UL << ((7) * 8));
2057
2058 /* NPT */
2059 if (vmm_softc->mode == VMM_MODE_RVI) {
2060 vmcb->v_np_enable = 1;
2061 vmcb->v_n_cr3 = vcpu->vc_parent->vm_map->pmap->pm_pdirpa;
2062 }
2063
2064 /* Enable SVME in EFER (must always be set) */
2065 vmcb->v_efer |= EFER_SVME0x00001000;
2066
2067 ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), vrs);
2068
2069 /* xcr0 power on default sets bit 0 (x87 state) */
2070 vcpu->vc_gueststate.vg_xcr0 = XFEATURE_X870x00000001 & xsave_mask;
2071
2072 vcpu->vc_parent->vm_map->pmap->eptp = 0;
2073
2074exit:
2075 return ret;
2076}
2077
2078/*
2079 * svm_setmsrbr
2080 *
2081 * Allow read access to the specified msr on the supplied vcpu.
2082 *
2083 * Parameters:
2084 * vcpu: the VCPU to allow access
2085 * msr: the MSR number to allow access to
2086 */
2087void
2088svm_setmsrbr(struct vcpu *vcpu, uint32_t msr)
2089{
2090 uint8_t *msrs;
2091 uint16_t idx;
2092
2093 msrs = (uint8_t *)vcpu->vc_msr_bitmap_va;
2094
2095 /*
2096 * MSR Read bitmap layout:
2097 * Pentium MSRs (0x0 - 0x1fff) @ 0x0
2098 * Gen6 and Syscall MSRs (0xc0000000 - 0xc0001fff) @ 0x800
2099 * Gen7 and Gen8 MSRs (0xc0010000 - 0xc0011fff) @ 0x1000
2100 *
2101 * Read enable bit is low order bit of 2-bit pair
2102 * per MSR (eg, MSR 0x0 write bit is at bit 0 @ 0x0)
2103 */
2104 if (msr <= 0x1fff) {
2105 idx = SVM_MSRIDX(msr)((msr) / 4);
2106 msrs[idx] &= ~(SVM_MSRBIT_R(msr)(1 << (((msr) % 4) * 2)));
2107 } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) {
2108 idx = SVM_MSRIDX(msr - 0xc0000000)((msr - 0xc0000000) / 4) + 0x800;
2109 msrs[idx] &= ~(SVM_MSRBIT_R(msr - 0xc0000000)(1 << (((msr - 0xc0000000) % 4) * 2)));
2110 } else if (msr >= 0xc0010000 && msr <= 0xc0011fff) {
2111 idx = SVM_MSRIDX(msr - 0xc0010000)((msr - 0xc0010000) / 4) + 0x1000;
2112 msrs[idx] &= ~(SVM_MSRBIT_R(msr - 0xc0010000)(1 << (((msr - 0xc0010000) % 4) * 2)));
2113 } else {
2114 printf("%s: invalid msr 0x%x\n", __func__, msr);
2115 return;
2116 }
2117}
2118
2119/*
2120 * svm_setmsrbw
2121 *
2122 * Allow write access to the specified msr on the supplied vcpu
2123 *
2124 * Parameters:
2125 * vcpu: the VCPU to allow access
2126 * msr: the MSR number to allow access to
2127 */
2128void
2129svm_setmsrbw(struct vcpu *vcpu, uint32_t msr)
2130{
2131 uint8_t *msrs;
2132 uint16_t idx;
2133
2134 msrs = (uint8_t *)vcpu->vc_msr_bitmap_va;
2135
2136 /*
2137 * MSR Write bitmap layout:
2138 * Pentium MSRs (0x0 - 0x1fff) @ 0x0
2139 * Gen6 and Syscall MSRs (0xc0000000 - 0xc0001fff) @ 0x800
2140 * Gen7 and Gen8 MSRs (0xc0010000 - 0xc0011fff) @ 0x1000
2141 *
2142 * Write enable bit is high order bit of 2-bit pair
2143 * per MSR (eg, MSR 0x0 write bit is at bit 1 @ 0x0)
2144 */
2145 if (msr <= 0x1fff) {
2146 idx = SVM_MSRIDX(msr)((msr) / 4);
2147 msrs[idx] &= ~(SVM_MSRBIT_W(msr)(1 << (((msr) % 4) * 2 + 1)));
2148 } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) {
2149 idx = SVM_MSRIDX(msr - 0xc0000000)((msr - 0xc0000000) / 4) + 0x800;
2150 msrs[idx] &= ~(SVM_MSRBIT_W(msr - 0xc0000000)(1 << (((msr - 0xc0000000) % 4) * 2 + 1)));
2151 } else if (msr >= 0xc0010000 && msr <= 0xc0011fff) {
2152 idx = SVM_MSRIDX(msr - 0xc0010000)((msr - 0xc0010000) / 4) + 0x1000;
2153 msrs[idx] &= ~(SVM_MSRBIT_W(msr - 0xc0010000)(1 << (((msr - 0xc0010000) % 4) * 2 + 1)));
2154 } else {
2155 printf("%s: invalid msr 0x%x\n", __func__, msr);
2156 return;
2157 }
2158}
2159
2160/*
2161 * svm_setmsrbrw
2162 *
2163 * Allow read/write access to the specified msr on the supplied vcpu
2164 *
2165 * Parameters:
2166 * vcpu: the VCPU to allow access
2167 * msr: the MSR number to allow access to
2168 */
2169void
2170svm_setmsrbrw(struct vcpu *vcpu, uint32_t msr)
2171{
2172 svm_setmsrbr(vcpu, msr);
2173 svm_setmsrbw(vcpu, msr);
2174}
2175
2176/*
2177 * vmx_setmsrbr
2178 *
2179 * Allow read access to the specified msr on the supplied vcpu.
2180 *
2181 * Parameters:
2182 * vcpu: the VCPU to allow access
2183 * msr: the MSR number to allow access to
2184 */
2185void
2186vmx_setmsrbr(struct vcpu *vcpu, uint32_t msr)
2187{
2188 uint8_t *msrs;
2189 uint16_t idx;
2190
2191 msrs = (uint8_t *)vcpu->vc_msr_bitmap_va;
2192
2193 /*
2194 * MSR Read bitmap layout:
2195 * "Low" MSRs (0x0 - 0x1fff) @ 0x0
2196 * "High" MSRs (0xc0000000 - 0xc0001fff) @ 0x400
2197 */
2198 if (msr <= 0x1fff) {
2199 idx = VMX_MSRIDX(msr)((msr) / 8);
2200 msrs[idx] &= ~(VMX_MSRBIT(msr)(1 << (msr) % 8));
2201 } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) {
2202 idx = VMX_MSRIDX(msr - 0xc0000000)((msr - 0xc0000000) / 8) + 0x400;
2203 msrs[idx] &= ~(VMX_MSRBIT(msr - 0xc0000000)(1 << (msr - 0xc0000000) % 8));
2204 } else
2205 printf("%s: invalid msr 0x%x\n", __func__, msr);
2206}
2207
2208/*
2209 * vmx_setmsrbw
2210 *
2211 * Allow write access to the specified msr on the supplied vcpu
2212 *
2213 * Parameters:
2214 * vcpu: the VCPU to allow access
2215 * msr: the MSR number to allow access to
2216 */
2217void
2218vmx_setmsrbw(struct vcpu *vcpu, uint32_t msr)
2219{
2220 uint8_t *msrs;
2221 uint16_t idx;
2222
2223 msrs = (uint8_t *)vcpu->vc_msr_bitmap_va;
2224
2225 /*
2226 * MSR Write bitmap layout:
2227 * "Low" MSRs (0x0 - 0x1fff) @ 0x800
2228 * "High" MSRs (0xc0000000 - 0xc0001fff) @ 0xc00
2229 */
2230 if (msr <= 0x1fff) {
2231 idx = VMX_MSRIDX(msr)((msr) / 8) + 0x800;
2232 msrs[idx] &= ~(VMX_MSRBIT(msr)(1 << (msr) % 8));
2233 } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) {
2234 idx = VMX_MSRIDX(msr - 0xc0000000)((msr - 0xc0000000) / 8) + 0xc00;
2235 msrs[idx] &= ~(VMX_MSRBIT(msr - 0xc0000000)(1 << (msr - 0xc0000000) % 8));
2236 } else
2237 printf("%s: invalid msr 0x%x\n", __func__, msr);
2238}
2239
2240/*
2241 * vmx_setmsrbrw
2242 *
2243 * Allow read/write access to the specified msr on the supplied vcpu
2244 *
2245 * Parameters:
2246 * vcpu: the VCPU to allow access
2247 * msr: the MSR number to allow access to
2248 */
2249void
2250vmx_setmsrbrw(struct vcpu *vcpu, uint32_t msr)
2251{
2252 vmx_setmsrbr(vcpu, msr);
2253 vmx_setmsrbw(vcpu, msr);
2254}
2255
2256/*
2257 * svm_set_clean
2258 *
2259 * Sets (mark as unmodified) the VMCB clean bit set in 'value'.
2260 * For example, to set the clean bit for the VMCB intercepts (bit position 0),
2261 * the caller provides 'SVM_CLEANBITS_I' (0x1) for the 'value' argument.
2262 * Multiple cleanbits can be provided in 'value' at the same time (eg,
2263 * "SVM_CLEANBITS_I | SVM_CLEANBITS_TPR").
2264 *
2265 * Note that this function does not clear any bits; to clear bits in the
2266 * vmcb cleanbits bitfield, use 'svm_set_dirty'.
2267 *
2268 * Parameters:
2269 * vmcs: the VCPU whose VMCB clean value should be set
2270 * value: the value(s) to enable in the cleanbits mask
2271 */
2272void
2273svm_set_clean(struct vcpu *vcpu, uint32_t value)
2274{
2275 struct vmcb *vmcb;
2276
2277 /* If no cleanbits support, do nothing */
2278 if (!curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_svm.svm_vmcb_clean)
2279 return;
2280
2281 vmcb = (struct vmcb *)vcpu->vc_control_va;
2282
2283 vmcb->v_vmcb_clean_bits |= value;
2284}
2285
2286/*
2287 * svm_set_dirty
2288 *
2289 * Clears (mark as modified) the VMCB clean bit set in 'value'.
2290 * For example, to clear the bit for the VMCB intercepts (bit position 0)
2291 * the caller provides 'SVM_CLEANBITS_I' (0x1) for the 'value' argument.
2292 * Multiple dirty bits can be provided in 'value' at the same time (eg,
2293 * "SVM_CLEANBITS_I | SVM_CLEANBITS_TPR").
2294 *
2295 * Parameters:
2296 * vmcs: the VCPU whose VMCB dirty value should be set
2297 * value: the value(s) to dirty in the cleanbits mask
2298 */
2299void
2300svm_set_dirty(struct vcpu *vcpu, uint32_t value)
2301{
2302 struct vmcb *vmcb;
2303
2304 /* If no cleanbits support, do nothing */
2305 if (!curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_svm.svm_vmcb_clean)
2306 return;
2307
2308 vmcb = (struct vmcb *)vcpu->vc_control_va;
2309
2310 vmcb->v_vmcb_clean_bits &= ~value;
2311}
2312
2313/*
2314 * vcpu_reset_regs_vmx
2315 *
2316 * Initializes 'vcpu's registers to supplied state
2317 *
2318 * Parameters:
2319 * vcpu: the vcpu whose register state is to be initialized
2320 * vrs: the register state to set
2321 *
2322 * Return values:
2323 * 0: registers init'ed successfully
2324 * EINVAL: an error occurred setting register state
2325 */
2326int
2327vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
2328{
2329 int ret = 0, ug = 0;
2330 uint32_t cr0, cr4;
2331 uint32_t pinbased, procbased, procbased2, exit, entry;
2332 uint32_t want1, want0;
2333 uint64_t ctrlval, cr3;
2334 uint16_t ctrl, vpid;
2335 struct vmx_msr_store *msr_store;
2336
2337 rw_assert_wrlock(&vcpu->vc_lock);
2338
2339 cr0 = vrs->vrs_crs[VCPU_REGS_CR00];
2340
2341 if (vcpu_reload_vmcs_vmx(vcpu)) {
2342 DPRINTF("%s: error reloading VMCS\n", __func__);
2343 ret = EINVAL22;
2344 goto exit;
2345 }
2346
2347#ifdef VMM_DEBUG
2348 /* VMCS should be loaded... */
2349 paddr_t pa = 0ULL;
2350 if (vmptrst(&pa))
2351 panic("%s: vmptrst", __func__);
2352 KASSERT(pa == vcpu->vc_control_pa)((pa == vcpu->vc_control_pa) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/arch/amd64/amd64/vmm_machdep.c", 2352, "pa == vcpu->vc_control_pa"
))
;
2353#endif /* VMM_DEBUG */
2354
2355 /* Compute Basic Entry / Exit Controls */
2356 vcpu->vc_vmx_basic = rdmsr(IA32_VMX_BASIC0x480);
2357 vcpu->vc_vmx_entry_ctls = rdmsr(IA32_VMX_ENTRY_CTLS0x484);
2358 vcpu->vc_vmx_exit_ctls = rdmsr(IA32_VMX_EXIT_CTLS0x483);
2359 vcpu->vc_vmx_pinbased_ctls = rdmsr(IA32_VMX_PINBASED_CTLS0x481);
2360 vcpu->vc_vmx_procbased_ctls = rdmsr(IA32_VMX_PROCBASED_CTLS0x482);
2361
2362 /* Compute True Entry / Exit Controls (if applicable) */
2363 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) {
2364 vcpu->vc_vmx_true_entry_ctls = rdmsr(IA32_VMX_TRUE_ENTRY_CTLS0x490);
2365 vcpu->vc_vmx_true_exit_ctls = rdmsr(IA32_VMX_TRUE_EXIT_CTLS0x48F);
2366 vcpu->vc_vmx_true_pinbased_ctls =
2367 rdmsr(IA32_VMX_TRUE_PINBASED_CTLS0x48D);
2368 vcpu->vc_vmx_true_procbased_ctls =
2369 rdmsr(IA32_VMX_TRUE_PROCBASED_CTLS0x48E);
2370 }
2371
2372 /* Compute Secondary Procbased Controls (if applicable) */
2373 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482,
2374 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1))
2375 vcpu->vc_vmx_procbased2_ctls = rdmsr(IA32_VMX_PROCBASED2_CTLS0x48B);
2376
2377 /*
2378 * Pinbased ctrls
2379 *
2380 * We must be able to set the following:
2381 * IA32_VMX_EXTERNAL_INT_EXITING - exit on host interrupt
2382 * IA32_VMX_NMI_EXITING - exit on host NMI
2383 */
2384 want1 = IA32_VMX_EXTERNAL_INT_EXITING(1ULL << 0) |
2385 IA32_VMX_NMI_EXITING(1ULL << 3);
2386 want0 = 0;
2387
2388 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) {
2389 ctrl = IA32_VMX_TRUE_PINBASED_CTLS0x48D;
2390 ctrlval = vcpu->vc_vmx_true_pinbased_ctls;
2391 } else {
2392 ctrl = IA32_VMX_PINBASED_CTLS0x481;
2393 ctrlval = vcpu->vc_vmx_pinbased_ctls;
2394 }
2395
2396 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &pinbased)) {
2397 DPRINTF("%s: error computing pinbased controls\n", __func__);
2398 ret = EINVAL22;
2399 goto exit;
2400 }
2401
2402 if (vmwrite(VMCS_PINBASED_CTLS0x4000, pinbased)) {
2403 DPRINTF("%s: error setting pinbased controls\n", __func__);
2404 ret = EINVAL22;
2405 goto exit;
2406 }
2407
2408 /*
2409 * Procbased ctrls
2410 *
2411 * We must be able to set the following:
2412 * IA32_VMX_HLT_EXITING - exit on HLT instruction
2413 * IA32_VMX_MWAIT_EXITING - exit on MWAIT instruction
2414 * IA32_VMX_UNCONDITIONAL_IO_EXITING - exit on I/O instructions
2415 * IA32_VMX_USE_MSR_BITMAPS - exit on various MSR accesses
2416 * IA32_VMX_CR8_LOAD_EXITING - guest TPR access
2417 * IA32_VMX_CR8_STORE_EXITING - guest TPR access
2418 * IA32_VMX_USE_TPR_SHADOW - guest TPR access (shadow)
2419 * IA32_VMX_MONITOR_EXITING - exit on MONITOR instruction
2420 *
2421 * If we have EPT, we must be able to clear the following
2422 * IA32_VMX_CR3_LOAD_EXITING - don't care about guest CR3 accesses
2423 * IA32_VMX_CR3_STORE_EXITING - don't care about guest CR3 accesses
2424 */
2425 want1 = IA32_VMX_HLT_EXITING(1ULL << 7) |
2426 IA32_VMX_MWAIT_EXITING(1ULL << 10) |
2427 IA32_VMX_UNCONDITIONAL_IO_EXITING(1ULL << 24) |
2428 IA32_VMX_USE_MSR_BITMAPS(1ULL << 28) |
2429 IA32_VMX_CR8_LOAD_EXITING(1ULL << 19) |
2430 IA32_VMX_CR8_STORE_EXITING(1ULL << 20) |
2431 IA32_VMX_MONITOR_EXITING(1ULL << 29) |
2432 IA32_VMX_USE_TPR_SHADOW(1ULL << 21);
2433 want0 = 0;
2434
2435 if (vmm_softc->mode == VMM_MODE_EPT) {
2436 want1 |= IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31);
2437 want0 |= IA32_VMX_CR3_LOAD_EXITING(1ULL << 15) |
2438 IA32_VMX_CR3_STORE_EXITING(1ULL << 16);
2439 }
2440
2441 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) {
2442 ctrl = IA32_VMX_TRUE_PROCBASED_CTLS0x48E;
2443 ctrlval = vcpu->vc_vmx_true_procbased_ctls;
2444 } else {
2445 ctrl = IA32_VMX_PROCBASED_CTLS0x482;
2446 ctrlval = vcpu->vc_vmx_procbased_ctls;
2447 }
2448
2449 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &procbased)) {
2450 DPRINTF("%s: error computing procbased controls\n", __func__);
2451 ret = EINVAL22;
2452 goto exit;
2453 }
2454
2455 if (vmwrite(VMCS_PROCBASED_CTLS0x4002, procbased)) {
2456 DPRINTF("%s: error setting procbased controls\n", __func__);
2457 ret = EINVAL22;
2458 goto exit;
2459 }
2460
2461 /*
2462 * Secondary Procbased ctrls
2463 *
2464 * We want to be able to set the following, if available:
2465 * IA32_VMX_ENABLE_VPID - use VPIDs where available
2466 *
2467 * If we have EPT, we must be able to set the following:
2468 * IA32_VMX_ENABLE_EPT - enable EPT
2469 *
2470 * If we have unrestricted guest capability, we must be able to set
2471 * the following:
2472 * IA32_VMX_UNRESTRICTED_GUEST - enable unrestricted guest (if caller
2473 * specified CR0_PG | CR0_PE in %cr0 in the 'vrs' parameter)
2474 */
2475 want1 = 0;
2476
2477 /* XXX checking for 2ndary controls can be combined here */
2478 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482,
2479 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1)) {
2480 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
2481 IA32_VMX_ENABLE_VPID(1ULL << 5), 1)) {
2482 want1 |= IA32_VMX_ENABLE_VPID(1ULL << 5);
2483 vcpu->vc_vmx_vpid_enabled = 1;
2484 }
2485 }
2486
2487 if (vmm_softc->mode == VMM_MODE_EPT)
2488 want1 |= IA32_VMX_ENABLE_EPT(1ULL << 1);
2489
2490 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482,
2491 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1)) {
2492 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
2493 IA32_VMX_UNRESTRICTED_GUEST(1ULL << 7), 1)) {
2494 if ((cr0 & (CR0_PE0x00000001 | CR0_PG0x80000000)) == 0) {
2495 want1 |= IA32_VMX_UNRESTRICTED_GUEST(1ULL << 7);
2496 ug = 1;
2497 }
2498 }
2499 }
2500
2501 want0 = ~want1;
2502 ctrlval = vcpu->vc_vmx_procbased2_ctls;
2503 ctrl = IA32_VMX_PROCBASED2_CTLS0x48B;
2504
2505 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &procbased2)) {
2506 DPRINTF("%s: error computing secondary procbased controls\n",
2507 __func__);
2508 ret = EINVAL22;
2509 goto exit;
2510 }
2511
2512 if (vmwrite(VMCS_PROCBASED2_CTLS0x401E, procbased2)) {
2513 DPRINTF("%s: error setting secondary procbased controls\n",
2514 __func__);
2515 ret = EINVAL22;
2516 goto exit;
2517 }
2518
2519 /*
2520 * Exit ctrls
2521 *
2522 * We must be able to set the following:
2523 * IA32_VMX_SAVE_DEBUG_CONTROLS
2524 * IA32_VMX_HOST_SPACE_ADDRESS_SIZE - exit to long mode
2525 * IA32_VMX_ACKNOWLEDGE_INTERRUPT_ON_EXIT - ack interrupt on exit
2526 */
2527 want1 = IA32_VMX_HOST_SPACE_ADDRESS_SIZE(1ULL << 9) |
2528 IA32_VMX_ACKNOWLEDGE_INTERRUPT_ON_EXIT(1ULL << 15) |
2529 IA32_VMX_SAVE_DEBUG_CONTROLS(1ULL << 2);
2530 want0 = 0;
2531
2532 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) {
2533 ctrl = IA32_VMX_TRUE_EXIT_CTLS0x48F;
2534 ctrlval = vcpu->vc_vmx_true_exit_ctls;
2535 } else {
2536 ctrl = IA32_VMX_EXIT_CTLS0x483;
2537 ctrlval = vcpu->vc_vmx_exit_ctls;
2538 }
2539
2540 if (rcr4() & CR4_CET0x00800000)
2541 want1 |= IA32_VMX_LOAD_HOST_CET_STATE(1ULL << 28);
2542 else
2543 want0 |= IA32_VMX_LOAD_HOST_CET_STATE(1ULL << 28);
2544
2545 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &exit)) {
2546 DPRINTF("%s: error computing exit controls\n", __func__);
2547 ret = EINVAL22;
2548 goto exit;
2549 }
2550
2551 if (vmwrite(VMCS_EXIT_CTLS0x400C, exit)) {
2552 DPRINTF("%s: error setting exit controls\n", __func__);
2553 ret = EINVAL22;
2554 goto exit;
2555 }
2556
2557 /*
2558 * Entry ctrls
2559 *
2560 * We must be able to set the following:
2561 * IA32_VMX_IA32E_MODE_GUEST (if no unrestricted guest)
2562 * IA32_VMX_LOAD_DEBUG_CONTROLS
2563 * We must be able to clear the following:
2564 * IA32_VMX_ENTRY_TO_SMM - enter to SMM
2565 * IA32_VMX_DEACTIVATE_DUAL_MONITOR_TREATMENT
2566 * IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY
2567 */
2568 want1 = IA32_VMX_LOAD_DEBUG_CONTROLS(1ULL << 2);
2569 if (vrs->vrs_msrs[VCPU_REGS_EFER0] & EFER_LMA0x00000400)
2570 want1 |= IA32_VMX_IA32E_MODE_GUEST(1ULL << 9);
2571
2572 want0 = IA32_VMX_ENTRY_TO_SMM(1ULL << 10) |
2573 IA32_VMX_DEACTIVATE_DUAL_MONITOR_TREATMENT(1ULL << 11) |
2574 IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY(1ULL << 13);
2575
2576 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) {
2577 ctrl = IA32_VMX_TRUE_ENTRY_CTLS0x490;
2578 ctrlval = vcpu->vc_vmx_true_entry_ctls;
2579 } else {
2580 ctrl = IA32_VMX_ENTRY_CTLS0x484;
2581 ctrlval = vcpu->vc_vmx_entry_ctls;
2582 }
2583
2584 if (rcr4() & CR4_CET0x00800000)
2585 want1 |= IA32_VMX_LOAD_GUEST_CET_STATE(1ULL << 20);
2586 else
2587 want0 |= IA32_VMX_LOAD_GUEST_CET_STATE(1ULL << 20);
2588
2589 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &entry)) {
2590 ret = EINVAL22;
2591 goto exit;
2592 }
2593
2594 if (vmwrite(VMCS_ENTRY_CTLS0x4012, entry)) {
2595 ret = EINVAL22;
2596 goto exit;
2597 }
2598
2599 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482,
2600 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1)) {
2601 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
2602 IA32_VMX_ENABLE_VPID(1ULL << 5), 1)) {
2603
2604 /* We may sleep during allocation, so reload VMCS. */
2605 vcpu->vc_last_pcpu = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
2606 ret = vmm_alloc_vpid(&vpid);
2607 if (vcpu_reload_vmcs_vmx(vcpu)) {
2608 printf("%s: failed to reload vmcs\n", __func__);
2609 ret = EINVAL22;
2610 goto exit;
2611 }
2612 if (ret) {
2613 DPRINTF("%s: could not allocate VPID\n",
2614 __func__);
2615 ret = EINVAL22;
2616 goto exit;
2617 }
2618
2619 if (vmwrite(VMCS_GUEST_VPID0x0000, vpid)) {
2620 DPRINTF("%s: error setting guest VPID\n",
2621 __func__);
2622 ret = EINVAL22;
2623 goto exit;
2624 }
2625
2626 vcpu->vc_vpid = vpid;
2627 }
2628 }
2629
2630 /*
2631 * Determine which bits in CR0 have to be set to a fixed
2632 * value as per Intel SDM A.7.
2633 * CR0 bits in the vrs parameter must match these.
2634 */
2635 want1 = (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0) &
2636 (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1);
2637 want0 = ~(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0) &
2638 ~(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1);
2639
2640 /*
2641 * CR0_FIXED0 and CR0_FIXED1 may report the CR0_PG and CR0_PE bits as
2642 * fixed to 1 even if the CPU supports the unrestricted guest
2643 * feature. Update want1 and want0 accordingly to allow
2644 * any value for CR0_PG and CR0_PE in vrs->vrs_crs[VCPU_REGS_CR0] if
2645 * the CPU has the unrestricted guest capability.
2646 */
2647 if (ug) {
2648 want1 &= ~(CR0_PG0x80000000 | CR0_PE0x00000001);
2649 want0 &= ~(CR0_PG0x80000000 | CR0_PE0x00000001);
2650 }
2651
2652 /*
2653 * VMX may require some bits to be set that userland should not have
2654 * to care about. Set those here.
2655 */
2656 if (want1 & CR0_NE0x00000020)
2657 cr0 |= CR0_NE0x00000020;
2658
2659 if ((cr0 & want1) != want1) {
2660 ret = EINVAL22;
2661 goto exit;
2662 }
2663
2664 if ((~cr0 & want0) != want0) {
2665 ret = EINVAL22;
2666 goto exit;
2667 }
2668
2669 vcpu->vc_vmx_cr0_fixed1 = want1;
2670 vcpu->vc_vmx_cr0_fixed0 = want0;
2671 /*
2672 * Determine which bits in CR4 have to be set to a fixed
2673 * value as per Intel SDM A.8.
2674 * CR4 bits in the vrs parameter must match these, except
2675 * CR4_VMXE - we add that here since it must always be set.
2676 */
2677 want1 = (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) &
2678 (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
2679 want0 = ~(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) &
2680 ~(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
2681
2682 cr4 = vrs->vrs_crs[VCPU_REGS_CR43] | CR4_VMXE0x00002000;
2683
2684 if ((cr4 & want1) != want1) {
2685 ret = EINVAL22;
2686 goto exit;
2687 }
2688
2689 if ((~cr4 & want0) != want0) {
2690 ret = EINVAL22;
2691 goto exit;
2692 }
2693
2694 cr3 = vrs->vrs_crs[VCPU_REGS_CR32];
2695
2696 /* Restore PDPTEs if 32-bit PAE paging is being used */
2697 if (cr3 && (cr4 & CR4_PAE0x00000020) &&
2698 !(vrs->vrs_msrs[VCPU_REGS_EFER0] & EFER_LMA0x00000400)) {
2699 if (vmwrite(VMCS_GUEST_PDPTE00x280A,
2700 vrs->vrs_crs[VCPU_REGS_PDPTE06])) {
2701 ret = EINVAL22;
2702 goto exit;
2703 }
2704
2705 if (vmwrite(VMCS_GUEST_PDPTE10x280C,
2706 vrs->vrs_crs[VCPU_REGS_PDPTE17])) {
2707 ret = EINVAL22;
2708 goto exit;
2709 }
2710
2711 if (vmwrite(VMCS_GUEST_PDPTE20x280E,
2712 vrs->vrs_crs[VCPU_REGS_PDPTE28])) {
2713 ret = EINVAL22;
2714 goto exit;
2715 }
2716
2717 if (vmwrite(VMCS_GUEST_PDPTE30x2810,
2718 vrs->vrs_crs[VCPU_REGS_PDPTE39])) {
2719 ret = EINVAL22;
2720 goto exit;
2721 }
2722 }
2723
2724 vrs->vrs_crs[VCPU_REGS_CR00] = cr0;
2725 vrs->vrs_crs[VCPU_REGS_CR43] = cr4;
2726
2727 /*
2728 * Select host MSRs to be loaded on exit
2729 */
2730 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_load_va;
2731 msr_store[0].vms_index = MSR_EFER0xc0000080;
2732 msr_store[0].vms_data = rdmsr(MSR_EFER0xc0000080);
2733 msr_store[1].vms_index = MSR_STAR0xc0000081;
2734 msr_store[1].vms_data = rdmsr(MSR_STAR0xc0000081);
2735 msr_store[2].vms_index = MSR_LSTAR0xc0000082;
2736 msr_store[2].vms_data = rdmsr(MSR_LSTAR0xc0000082);
2737 msr_store[3].vms_index = MSR_CSTAR0xc0000083;
2738 msr_store[3].vms_data = rdmsr(MSR_CSTAR0xc0000083);
2739 msr_store[4].vms_index = MSR_SFMASK0xc0000084;
2740 msr_store[4].vms_data = rdmsr(MSR_SFMASK0xc0000084);
2741 msr_store[5].vms_index = MSR_KERNELGSBASE0xc0000102;
2742 msr_store[5].vms_data = rdmsr(MSR_KERNELGSBASE0xc0000102);
2743 msr_store[6].vms_index = MSR_MISC_ENABLE0x1a0;
2744 msr_store[6].vms_data = rdmsr(MSR_MISC_ENABLE0x1a0);
2745
2746 /*
2747 * Select guest MSRs to be loaded on entry / saved on exit
2748 */
2749 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
2750
2751 msr_store[VCPU_REGS_EFER0].vms_index = MSR_EFER0xc0000080;
2752 msr_store[VCPU_REGS_STAR1].vms_index = MSR_STAR0xc0000081;
2753 msr_store[VCPU_REGS_LSTAR2].vms_index = MSR_LSTAR0xc0000082;
2754 msr_store[VCPU_REGS_CSTAR3].vms_index = MSR_CSTAR0xc0000083;
2755 msr_store[VCPU_REGS_SFMASK4].vms_index = MSR_SFMASK0xc0000084;
2756 msr_store[VCPU_REGS_KGSBASE5].vms_index = MSR_KERNELGSBASE0xc0000102;
2757 msr_store[VCPU_REGS_MISC_ENABLE6].vms_index = MSR_MISC_ENABLE0x1a0;
2758
2759 /*
2760 * Initialize MSR_MISC_ENABLE as it can't be read and populated from vmd
2761 * and some of the content is based on the host.
2762 */
2763 msr_store[VCPU_REGS_MISC_ENABLE6].vms_data = rdmsr(MSR_MISC_ENABLE0x1a0);
2764 msr_store[VCPU_REGS_MISC_ENABLE6].vms_data &=
2765 ~(MISC_ENABLE_TCC(1 << 3) | MISC_ENABLE_PERF_MON_AVAILABLE(1 << 7) |
2766 MISC_ENABLE_EIST_ENABLED(1 << 16) | MISC_ENABLE_ENABLE_MONITOR_FSM(1 << 18) |
2767 MISC_ENABLE_xTPR_MESSAGE_DISABLE(1 << 23));
2768 msr_store[VCPU_REGS_MISC_ENABLE6].vms_data |=
2769 MISC_ENABLE_BTS_UNAVAILABLE(1 << 11) | MISC_ENABLE_PEBS_UNAVAILABLE(1 << 12);
2770
2771 /*
2772 * Currently we have the same count of entry/exit MSRs loads/stores
2773 * but this is not an architectural requirement.
2774 */
2775 if (vmwrite(VMCS_EXIT_MSR_STORE_COUNT0x400E, VMX_NUM_MSR_STORE7)) {
2776 DPRINTF("%s: error setting guest MSR exit store count\n",
2777 __func__);
2778 ret = EINVAL22;
2779 goto exit;
2780 }
2781
2782 if (vmwrite(VMCS_EXIT_MSR_LOAD_COUNT0x4010, VMX_NUM_MSR_STORE7)) {
2783 DPRINTF("%s: error setting guest MSR exit load count\n",
2784 __func__);
2785 ret = EINVAL22;
2786 goto exit;
2787 }
2788
2789 if (vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT0x4014, VMX_NUM_MSR_STORE7)) {
2790 DPRINTF("%s: error setting guest MSR entry load count\n",
2791 __func__);
2792 ret = EINVAL22;
2793 goto exit;
2794 }
2795
2796 if (vmwrite(VMCS_EXIT_STORE_MSR_ADDRESS0x2006,
2797 vcpu->vc_vmx_msr_exit_save_pa)) {
2798 DPRINTF("%s: error setting guest MSR exit store address\n",
2799 __func__);
2800 ret = EINVAL22;
2801 goto exit;
2802 }
2803
2804 if (vmwrite(VMCS_EXIT_LOAD_MSR_ADDRESS0x2008,
2805 vcpu->vc_vmx_msr_exit_load_pa)) {
2806 DPRINTF("%s: error setting guest MSR exit load address\n",
2807 __func__);
2808 ret = EINVAL22;
2809 goto exit;
2810 }
2811
2812 if (vmwrite(VMCS_ENTRY_LOAD_MSR_ADDRESS0x200A,
2813 vcpu->vc_vmx_msr_exit_save_pa)) {
2814 DPRINTF("%s: error setting guest MSR entry load address\n",
2815 __func__);
2816 ret = EINVAL22;
2817 goto exit;
2818 }
2819
2820 if (vmwrite(VMCS_MSR_BITMAP_ADDRESS0x2004,
2821 vcpu->vc_msr_bitmap_pa)) {
2822 DPRINTF("%s: error setting guest MSR bitmap address\n",
2823 __func__);
2824 ret = EINVAL22;
2825 goto exit;
2826 }
2827
2828 if (vmwrite(VMCS_CR4_MASK0x6002, CR4_VMXE0x00002000)) {
2829 DPRINTF("%s: error setting guest CR4 mask\n", __func__);
2830 ret = EINVAL22;
2831 goto exit;
2832 }
2833
2834 if (vmwrite(VMCS_CR0_MASK0x6000, CR0_NE0x00000020)) {
2835 DPRINTF("%s: error setting guest CR0 mask\n", __func__);
2836 ret = EINVAL22;
2837 goto exit;
2838 }
2839
2840 /*
2841 * Set up the VMCS for the register state we want during VCPU start.
2842 * This matches what the CPU state would be after a bootloader
2843 * transition to 'start'.
2844 */
2845 ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), 0, vrs);
2846
2847 /*
2848 * Set up the MSR bitmap
2849 */
2850 memset((uint8_t *)vcpu->vc_msr_bitmap_va, 0xFF, PAGE_SIZE)__builtin_memset(((uint8_t *)vcpu->vc_msr_bitmap_va), (0xFF
), ((1 << 12)))
;
2851 vmx_setmsrbrw(vcpu, MSR_IA32_FEATURE_CONTROL0x03a);
2852 vmx_setmsrbrw(vcpu, MSR_SYSENTER_CS0x174);
2853 vmx_setmsrbrw(vcpu, MSR_SYSENTER_ESP0x175);
2854 vmx_setmsrbrw(vcpu, MSR_SYSENTER_EIP0x176);
2855 vmx_setmsrbrw(vcpu, MSR_EFER0xc0000080);
2856 vmx_setmsrbrw(vcpu, MSR_STAR0xc0000081);
2857 vmx_setmsrbrw(vcpu, MSR_LSTAR0xc0000082);
2858 vmx_setmsrbrw(vcpu, MSR_CSTAR0xc0000083);
2859 vmx_setmsrbrw(vcpu, MSR_SFMASK0xc0000084);
2860 vmx_setmsrbrw(vcpu, MSR_FSBASE0xc0000100);
2861 vmx_setmsrbrw(vcpu, MSR_GSBASE0xc0000101);
2862 vmx_setmsrbrw(vcpu, MSR_KERNELGSBASE0xc0000102);
2863
2864 vmx_setmsrbr(vcpu, MSR_MISC_ENABLE0x1a0);
2865 vmx_setmsrbr(vcpu, MSR_TSC0x010);
2866
2867 /* If host supports CET, pass through access to the guest. */
2868 if (rcr4() & CR4_CET0x00800000)
2869 vmx_setmsrbrw(vcpu, MSR_S_CET0x6a2);
2870
2871 /* XXX CR0 shadow */
2872 /* XXX CR4 shadow */
2873
2874 /* xcr0 power on default sets bit 0 (x87 state) */
2875 vcpu->vc_gueststate.vg_xcr0 = XFEATURE_X870x00000001 & xsave_mask;
2876
2877 /* XXX PAT shadow */
2878 vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT0x277);
2879
2880 /* Flush the VMCS */
2881 if (vmclear(&vcpu->vc_control_pa)) {
2882 DPRINTF("%s: vmclear failed\n", __func__);
2883 ret = EINVAL22;
2884 }
2885 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED)_atomic_swap_uint((&vcpu->vc_vmx_vmcs_state), (0));
2886
2887exit:
2888 return (ret);
2889}
2890
2891/*
2892 * vcpu_init_vmx
2893 *
2894 * Intel VMX specific VCPU initialization routine.
2895 *
2896 * This function allocates various per-VCPU memory regions, sets up initial
2897 * VCPU VMCS controls, and sets initial register values.
2898 *
2899 * Parameters:
2900 * vcpu: the VCPU structure being initialized
2901 *
2902 * Return values:
2903 * 0: the VCPU was initialized successfully
2904 * ENOMEM: insufficient resources
2905 * EINVAL: an error occurred during VCPU initialization
2906 */
2907int
2908vcpu_init_vmx(struct vcpu *vcpu)
2909{
2910 struct vmcs *vmcs;
2911 uint64_t msr, eptp;
2912 uint32_t cr0, cr4;
2913 int ret = 0;
2914
2915 /* Allocate VMCS VA */
2916 vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page, &kp_zero,
2917 &kd_waitok);
2918 vcpu->vc_vmx_vmcs_state = VMCS_CLEARED0;
2919
2920 if (!vcpu->vc_control_va)
2921 return (ENOMEM12);
2922
2923 /* Compute VMCS PA */
2924 if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_control_va,
2925 (paddr_t *)&vcpu->vc_control_pa)) {
2926 ret = ENOMEM12;
2927 goto exit;
2928 }
2929
2930 /* Allocate MSR bitmap VA */
2931 vcpu->vc_msr_bitmap_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page, &kp_zero,
2932 &kd_waitok);
2933
2934 if (!vcpu->vc_msr_bitmap_va) {
2935 ret = ENOMEM12;
2936 goto exit;
2937 }
2938
2939 /* Compute MSR bitmap PA */
2940 if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_msr_bitmap_va,
2941 (paddr_t *)&vcpu->vc_msr_bitmap_pa)) {
2942 ret = ENOMEM12;
2943 goto exit;
2944 }
2945
2946 /* Allocate MSR exit load area VA */
2947 vcpu->vc_vmx_msr_exit_load_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page,
2948 &kp_zero, &kd_waitok);
2949
2950 if (!vcpu->vc_vmx_msr_exit_load_va) {
2951 ret = ENOMEM12;
2952 goto exit;
2953 }
2954
2955 /* Compute MSR exit load area PA */
2956 if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_vmx_msr_exit_load_va,
2957 &vcpu->vc_vmx_msr_exit_load_pa)) {
2958 ret = ENOMEM12;
2959 goto exit;
2960 }
2961
2962 /* Allocate MSR exit save area VA */
2963 vcpu->vc_vmx_msr_exit_save_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page,
2964 &kp_zero, &kd_waitok);
2965
2966 if (!vcpu->vc_vmx_msr_exit_save_va) {
2967 ret = ENOMEM12;
2968 goto exit;
2969 }
2970
2971 /* Compute MSR exit save area PA */
2972 if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_vmx_msr_exit_save_va,
2973 &vcpu->vc_vmx_msr_exit_save_pa)) {
2974 ret = ENOMEM12;
2975 goto exit;
2976 }
2977
2978 /* Allocate MSR entry load area VA */
2979 vcpu->vc_vmx_msr_entry_load_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page,
2980 &kp_zero, &kd_waitok);
2981
2982 if (!vcpu->vc_vmx_msr_entry_load_va) {
2983 ret = ENOMEM12;
2984 goto exit;
2985 }
2986
2987 /* Compute MSR entry load area PA */
2988 if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_vmx_msr_entry_load_va,
2989 &vcpu->vc_vmx_msr_entry_load_pa)) {
2990 ret = ENOMEM12;
2991 goto exit;
2992 }
2993
2994 vmcs = (struct vmcs *)vcpu->vc_control_va;
2995 vmcs->vmcs_revision = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision;
2996
2997 /*
2998 * Load the VMCS onto this PCPU so we can write registers
2999 */
3000 if (vmptrld(&vcpu->vc_control_pa)) {
3001 ret = EINVAL22;
3002 goto exit;
3003 }
3004
3005 /* Configure EPT Pointer */
3006 eptp = vcpu->vc_parent->vm_map->pmap->pm_pdirpa;
3007 msr = rdmsr(IA32_VMX_EPT_VPID_CAP0x48C);
3008 if (msr & IA32_EPT_VPID_CAP_PAGE_WALK_4(1ULL << 6)) {
3009 /* Page walk length 4 supported */
3010 eptp |= ((IA32_EPT_PAGE_WALK_LENGTH0x4 - 1) << 3);
3011 } else {
3012 DPRINTF("EPT page walk length 4 not supported\n");
3013 ret = EINVAL22;
3014 goto exit;
3015 }
3016
3017 if (msr & IA32_EPT_VPID_CAP_WB(1ULL << 14)) {
3018 /* WB cache type supported */
3019 eptp |= IA32_EPT_PAGING_CACHE_TYPE_WB0x6;
3020 } else
3021 DPRINTF("%s: no WB cache type available, guest VM will run "
3022 "uncached\n", __func__);
3023
3024 DPRINTF("Guest EPTP = 0x%llx\n", eptp);
3025 if (vmwrite(VMCS_GUEST_IA32_EPTP0x201A, eptp)) {
3026 DPRINTF("%s: error setting guest EPTP\n", __func__);
3027 ret = EINVAL22;
3028 goto exit;
3029 }
3030
3031 vcpu->vc_parent->vm_map->pmap->eptp = eptp;
3032
3033 /* Host CR0 */
3034 cr0 = rcr0() & ~CR0_TS0x00000008;
3035 if (vmwrite(VMCS_HOST_IA32_CR00x6C00, cr0)) {
3036 DPRINTF("%s: error writing host CR0\n", __func__);
3037 ret = EINVAL22;
3038 goto exit;
3039 }
3040
3041 /* Host CR4 */
3042 cr4 = rcr4();
3043 if (vmwrite(VMCS_HOST_IA32_CR40x6C04, cr4)) {
3044 DPRINTF("%s: error writing host CR4\n", __func__);
3045 ret = EINVAL22;
3046 goto exit;
3047 }
3048
3049 /* Host Segment Selectors */
3050 if (vmwrite(VMCS_HOST_IA32_CS_SEL0x0C02, GSEL(GCODE_SEL, SEL_KPL)(((1) << 3) | 0))) {
3051 DPRINTF("%s: error writing host CS selector\n", __func__);
3052 ret = EINVAL22;
3053 goto exit;
3054 }
3055
3056 if (vmwrite(VMCS_HOST_IA32_DS_SEL0x0C06, GSEL(GDATA_SEL, SEL_KPL)(((2) << 3) | 0))) {
3057 DPRINTF("%s: error writing host DS selector\n", __func__);
3058 ret = EINVAL22;
3059 goto exit;
3060 }
3061
3062 if (vmwrite(VMCS_HOST_IA32_ES_SEL0x0C00, GSEL(GDATA_SEL, SEL_KPL)(((2) << 3) | 0))) {
3063 DPRINTF("%s: error writing host ES selector\n", __func__);
3064 ret = EINVAL22;
3065 goto exit;
3066 }
3067
3068 if (vmwrite(VMCS_HOST_IA32_FS_SEL0x0C08, GSEL(GDATA_SEL, SEL_KPL)(((2) << 3) | 0))) {
3069 DPRINTF("%s: error writing host FS selector\n", __func__);
3070 ret = EINVAL22;
3071 goto exit;
3072 }
3073
3074 if (vmwrite(VMCS_HOST_IA32_GS_SEL0x0C0A, GSEL(GDATA_SEL, SEL_KPL)(((2) << 3) | 0))) {
3075 DPRINTF("%s: error writing host GS selector\n", __func__);
3076 ret = EINVAL22;
3077 goto exit;
3078 }
3079
3080 if (vmwrite(VMCS_HOST_IA32_SS_SEL0x0C04, GSEL(GDATA_SEL, SEL_KPL)(((2) << 3) | 0))) {
3081 DPRINTF("%s: error writing host SS selector\n", __func__);
3082 ret = EINVAL22;
3083 goto exit;
3084 }
3085
3086 if (vmwrite(VMCS_HOST_IA32_TR_SEL0x0C0C, GSYSSEL(GPROC0_SEL, SEL_KPL)((((0) << 4) + (6 << 3)) | 0))) {
3087 DPRINTF("%s: error writing host TR selector\n", __func__);
3088 ret = EINVAL22;
3089 goto exit;
3090 }
3091
3092 /* Host IDTR base */
3093 if (vmwrite(VMCS_HOST_IA32_IDTR_BASE0x6C0E, idt_vaddr)) {
3094 DPRINTF("%s: error writing host IDTR base\n", __func__);
3095 ret = EINVAL22;
3096 goto exit;
3097 }
3098
3099 /* VMCS link */
3100 if (vmwrite(VMCS_LINK_POINTER0x2800, VMX_VMCS_PA_CLEAR0xFFFFFFFFFFFFFFFFUL)) {
3101 DPRINTF("%s: error writing VMCS link pointer\n", __func__);
3102 ret = EINVAL22;
3103 goto exit;
3104 }
3105
3106 /* Flush the initial VMCS */
3107 if (vmclear(&vcpu->vc_control_pa)) {
3108 DPRINTF("%s: vmclear failed\n", __func__);
3109 ret = EINVAL22;
3110 }
3111
3112exit:
3113 if (ret)
3114 vcpu_deinit_vmx(vcpu);
3115
3116 return (ret);
3117}
3118
3119/*
3120 * vcpu_reset_regs
3121 *
3122 * Resets a vcpu's registers to the provided state
3123 *
3124 * Parameters:
3125 * vcpu: the vcpu whose registers shall be reset
3126 * vrs: the desired register state
3127 *
3128 * Return values:
3129 * 0: the vcpu's registers were successfully reset
3130 * !0: the vcpu's registers could not be reset (see arch-specific reset
3131 * function for various values that can be returned here)
3132 */
3133int
3134vcpu_reset_regs(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
3135{
3136 int ret;
3137
3138 if (vmm_softc->mode == VMM_MODE_EPT)
3139 ret = vcpu_reset_regs_vmx(vcpu, vrs);
3140 else if (vmm_softc->mode == VMM_MODE_RVI)
3141 ret = vcpu_reset_regs_svm(vcpu, vrs);
3142 else
3143 panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode);
3144
3145 return (ret);
3146}
3147
3148/*
3149 * vcpu_init_svm
3150 *
3151 * AMD SVM specific VCPU initialization routine.
3152 *
3153 * This function allocates various per-VCPU memory regions, sets up initial
3154 * VCPU VMCB controls, and sets initial register values.
3155 *
3156 * Parameters:
3157 * vcpu: the VCPU structure being initialized
3158 *
3159 * Return values:
3160 * 0: the VCPU was initialized successfully
3161 * ENOMEM: insufficient resources
3162 * EINVAL: an error occurred during VCPU initialization
3163 */
3164int
3165vcpu_init_svm(struct vcpu *vcpu)
3166{
3167 int ret = 0;
3168
3169 /* Allocate VMCB VA */
3170 vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page, &kp_zero,
3171 &kd_waitok);
3172
3173 if (!vcpu->vc_control_va)
3174 return (ENOMEM12);
3175
3176 /* Compute VMCB PA */
3177 if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_control_va,
3178 (paddr_t *)&vcpu->vc_control_pa)) {
3179 ret = ENOMEM12;
3180 goto exit;
3181 }
3182
3183 DPRINTF("%s: VMCB va @ 0x%llx, pa @ 0x%llx\n", __func__,
3184 (uint64_t)vcpu->vc_control_va,
3185 (uint64_t)vcpu->vc_control_pa);
3186
3187
3188 /* Allocate MSR bitmap VA (2 pages) */
3189 vcpu->vc_msr_bitmap_va = (vaddr_t)km_alloc(2 * PAGE_SIZE(1 << 12), &kv_any,
3190 &vmm_kp_contig, &kd_waitok);
3191
3192 if (!vcpu->vc_msr_bitmap_va) {
3193 ret = ENOMEM12;
3194 goto exit;
3195 }
3196
3197 /* Compute MSR bitmap PA */
3198 if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_msr_bitmap_va,
3199 (paddr_t *)&vcpu->vc_msr_bitmap_pa)) {
3200 ret = ENOMEM12;
3201 goto exit;
3202 }
3203
3204 DPRINTF("%s: MSR bitmap va @ 0x%llx, pa @ 0x%llx\n", __func__,
3205 (uint64_t)vcpu->vc_msr_bitmap_va,
3206 (uint64_t)vcpu->vc_msr_bitmap_pa);
3207
3208 /* Allocate host state area VA */
3209 vcpu->vc_svm_hsa_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page,
3210 &kp_zero, &kd_waitok);
3211
3212 if (!vcpu->vc_svm_hsa_va) {
3213 ret = ENOMEM12;
3214 goto exit;
3215 }
3216
3217 /* Compute host state area PA */
3218 if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_svm_hsa_va,
3219 &vcpu->vc_svm_hsa_pa)) {
3220 ret = ENOMEM12;
3221 goto exit;
3222 }
3223
3224 DPRINTF("%s: HSA va @ 0x%llx, pa @ 0x%llx\n", __func__,
3225 (uint64_t)vcpu->vc_svm_hsa_va,
3226 (uint64_t)vcpu->vc_svm_hsa_pa);
3227
3228 /* Allocate IOIO area VA (3 pages) */
3229 vcpu->vc_svm_ioio_va = (vaddr_t)km_alloc(3 * PAGE_SIZE(1 << 12), &kv_any,
3230 &vmm_kp_contig, &kd_waitok);
3231
3232 if (!vcpu->vc_svm_ioio_va) {
3233 ret = ENOMEM12;
3234 goto exit;
3235 }
3236
3237 /* Compute IOIO area PA */
3238 if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_svm_ioio_va,
3239 &vcpu->vc_svm_ioio_pa)) {
3240 ret = ENOMEM12;
3241 goto exit;
3242 }
3243
3244 DPRINTF("%s: IOIO va @ 0x%llx, pa @ 0x%llx\n", __func__,
3245 (uint64_t)vcpu->vc_svm_ioio_va,
3246 (uint64_t)vcpu->vc_svm_ioio_pa);
3247
3248exit:
3249 if (ret)
3250 vcpu_deinit_svm(vcpu);
3251
3252 return (ret);
3253}
3254
3255/*
3256 * vcpu_init
3257 *
3258 * Calls the architecture-specific VCPU init routine
3259 */
3260int
3261vcpu_init(struct vcpu *vcpu)
3262{
3263 int ret = 0;
3264
3265 vcpu->vc_virt_mode = vmm_softc->mode;
3266 vcpu->vc_state = VCPU_STATE_STOPPED;
3267 vcpu->vc_vpid = 0;
3268 vcpu->vc_pvclock_system_gpa = 0;
3269 vcpu->vc_last_pcpu = NULL((void *)0);
3270
3271 rw_init(&vcpu->vc_lock, "vcpu")_rw_init_flags(&vcpu->vc_lock, "vcpu", 0, ((void *)0));
3272
3273 /* Shadow PAT MSR, starting with host's value. */
3274 vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT0x277);
3275
3276 if (vmm_softc->mode == VMM_MODE_EPT)
3277 ret = vcpu_init_vmx(vcpu);
3278 else if (vmm_softc->mode == VMM_MODE_RVI)
3279 ret = vcpu_init_svm(vcpu);
3280 else
3281 panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode);
3282
3283 return (ret);
3284}
3285
3286/*
3287 * vcpu_deinit_vmx
3288 *
3289 * Deinitializes the vcpu described by 'vcpu'
3290 *
3291 * Parameters:
3292 * vcpu: the vcpu to be deinited
3293 */
3294void
3295vcpu_deinit_vmx(struct vcpu *vcpu)
3296{
3297 if (vcpu->vc_control_va) {
3298 km_free((void *)vcpu->vc_control_va, PAGE_SIZE(1 << 12),
3299 &kv_page, &kp_zero);
3300 vcpu->vc_control_va = 0;
3301 }
3302 if (vcpu->vc_vmx_msr_exit_save_va) {
3303 km_free((void *)vcpu->vc_vmx_msr_exit_save_va,
3304 PAGE_SIZE(1 << 12), &kv_page, &kp_zero);
3305 vcpu->vc_vmx_msr_exit_save_va = 0;
3306 }
3307 if (vcpu->vc_vmx_msr_exit_load_va) {
3308 km_free((void *)vcpu->vc_vmx_msr_exit_load_va,
3309 PAGE_SIZE(1 << 12), &kv_page, &kp_zero);
3310 vcpu->vc_vmx_msr_exit_load_va = 0;
3311 }
3312 if (vcpu->vc_vmx_msr_entry_load_va) {
3313 km_free((void *)vcpu->vc_vmx_msr_entry_load_va,
3314 PAGE_SIZE(1 << 12), &kv_page, &kp_zero);
3315 vcpu->vc_vmx_msr_entry_load_va = 0;
3316 }
3317
3318 if (vcpu->vc_vmx_vpid_enabled)
3319 vmm_free_vpid(vcpu->vc_vpid);
3320}
3321
3322/*
3323 * vcpu_deinit_svm
3324 *
3325 * Deinitializes the vcpu described by 'vcpu'
3326 *
3327 * Parameters:
3328 * vcpu: the vcpu to be deinited
3329 */
3330void
3331vcpu_deinit_svm(struct vcpu *vcpu)
3332{
3333 if (vcpu->vc_control_va) {
3334 km_free((void *)vcpu->vc_control_va, PAGE_SIZE(1 << 12), &kv_page,
3335 &kp_zero);
3336 vcpu->vc_control_va = 0;
3337 }
3338 if (vcpu->vc_msr_bitmap_va) {
3339 km_free((void *)vcpu->vc_msr_bitmap_va, 2 * PAGE_SIZE(1 << 12), &kv_any,
3340 &vmm_kp_contig);
3341 vcpu->vc_msr_bitmap_va = 0;
3342 }
3343 if (vcpu->vc_svm_hsa_va) {
3344 km_free((void *)vcpu->vc_svm_hsa_va, PAGE_SIZE(1 << 12), &kv_page,
3345 &kp_zero);
3346 vcpu->vc_svm_hsa_va = 0;
3347 }
3348 if (vcpu->vc_svm_ioio_va) {
3349 km_free((void *)vcpu->vc_svm_ioio_va, 3 * PAGE_SIZE(1 << 12), &kv_any,
3350 &vmm_kp_contig);
3351 vcpu->vc_svm_ioio_va = 0;
3352 }
3353
3354 vmm_free_vpid(vcpu->vc_vpid);
3355}
3356
3357/*
3358 * vcpu_deinit
3359 *
3360 * Calls the architecture-specific VCPU deinit routine
3361 *
3362 * Parameters:
3363 * vcpu: the vcpu to be deinited
3364 */
3365void
3366vcpu_deinit(struct vcpu *vcpu)
3367{
3368 if (vmm_softc->mode == VMM_MODE_EPT)
3369 vcpu_deinit_vmx(vcpu);
3370 else if (vmm_softc->mode == VMM_MODE_RVI)
3371 vcpu_deinit_svm(vcpu);
3372 else
3373 panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode);
3374}
3375
3376/*
3377 * vcpu_vmx_check_cap
3378 *
3379 * Checks if the 'cap' bit in the 'msr' MSR can be set or cleared (set = 1
3380 * or set = 0, respectively).
3381 *
3382 * When considering 'msr', we check to see if true controls are available,
3383 * and use those if so.
3384 *
3385 * Returns 1 of 'cap' can be set/cleared as requested, 0 otherwise.
3386 */
3387int
3388vcpu_vmx_check_cap(struct vcpu *vcpu, uint32_t msr, uint32_t cap, int set)
3389{
3390 uint64_t ctl;
3391
3392 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) {
3393 switch (msr) {
3394 case IA32_VMX_PINBASED_CTLS0x481:
3395 ctl = vcpu->vc_vmx_true_pinbased_ctls;
3396 break;
3397 case IA32_VMX_PROCBASED_CTLS0x482:
3398 ctl = vcpu->vc_vmx_true_procbased_ctls;
3399 break;
3400 case IA32_VMX_PROCBASED2_CTLS0x48B:
3401 ctl = vcpu->vc_vmx_procbased2_ctls;
3402 break;
3403 case IA32_VMX_ENTRY_CTLS0x484:
3404 ctl = vcpu->vc_vmx_true_entry_ctls;
3405 break;
3406 case IA32_VMX_EXIT_CTLS0x483:
3407 ctl = vcpu->vc_vmx_true_exit_ctls;
3408 break;
3409 default:
3410 return (0);
3411 }
3412 } else {
3413 switch (msr) {
3414 case IA32_VMX_PINBASED_CTLS0x481:
3415 ctl = vcpu->vc_vmx_pinbased_ctls;
3416 break;
3417 case IA32_VMX_PROCBASED_CTLS0x482:
3418 ctl = vcpu->vc_vmx_procbased_ctls;
3419 break;
3420 case IA32_VMX_PROCBASED2_CTLS0x48B:
3421 ctl = vcpu->vc_vmx_procbased2_ctls;
3422 break;
3423 case IA32_VMX_ENTRY_CTLS0x484:
3424 ctl = vcpu->vc_vmx_entry_ctls;
3425 break;
3426 case IA32_VMX_EXIT_CTLS0x483:
3427 ctl = vcpu->vc_vmx_exit_ctls;
3428 break;
3429 default:
3430 return (0);
3431 }
3432 }
3433
3434 if (set) {
3435 /* Check bit 'cap << 32', must be !0 */
3436 return (ctl & ((uint64_t)cap << 32)) != 0;
3437 } else {
3438 /* Check bit 'cap', must be 0 */
3439 return (ctl & cap) == 0;
3440 }
3441}
3442
3443/*
3444 * vcpu_vmx_compute_ctrl
3445 *
3446 * Computes the appropriate control value, given the supplied parameters
3447 * and CPU capabilities.
3448 *
3449 * Intel has made somewhat of a mess of this computation - it is described
3450 * using no fewer than three different approaches, spread across many
3451 * pages of the SDM. Further compounding the problem is the fact that now
3452 * we have "true controls" for each type of "control", and each needs to
3453 * be examined to get the calculation right, but only if "true" controls
3454 * are present on the CPU we're on.
3455 *
3456 * Parameters:
3457 * ctrlval: the control value, as read from the CPU MSR
3458 * ctrl: which control is being set (eg, pinbased, procbased, etc)
3459 * want0: the set of desired 0 bits
3460 * want1: the set of desired 1 bits
3461 * out: (out) the correct value to write into the VMCS for this VCPU,
3462 * for the 'ctrl' desired.
3463 *
3464 * Returns 0 if successful, or EINVAL if the supplied parameters define
3465 * an unworkable control setup.
3466 */
3467int
3468vcpu_vmx_compute_ctrl(uint64_t ctrlval, uint16_t ctrl, uint32_t want1,
3469 uint32_t want0, uint32_t *out)
3470{
3471 int i, set, clear;
3472
3473 *out = 0;
3474
3475 /*
3476 * The Intel SDM gives three formulae for determining which bits to
3477 * set/clear for a given control and desired functionality. Formula
3478 * 1 is the simplest but disallows use of newer features that are
3479 * enabled by functionality in later CPUs.
3480 *
3481 * Formulas 2 and 3 allow such extra functionality. We use formula
3482 * 2 - this requires us to know the identity of controls in the
3483 * "default1" class for each control register, but allows us to not
3484 * have to pass along and/or query both sets of capability MSRs for
3485 * each control lookup. This makes the code slightly longer,
3486 * however.
3487 */
3488 for (i = 0; i < 32; i++) {
3489 /* Figure out if we can set and / or clear this bit */
3490 set = (ctrlval & (1ULL << (i + 32))) != 0;
3491 clear = ((1ULL << i) & ((uint64_t)ctrlval)) == 0;
3492
3493 /* If the bit can't be set nor cleared, something's wrong */
3494 if (!set && !clear)
3495 return (EINVAL22);
3496
3497 /*
3498 * Formula 2.c.i - "If the relevant VMX capability MSR
3499 * reports that a control has a single setting, use that
3500 * setting."
3501 */
3502 if (set && !clear) {
3503 if (want0 & (1ULL << i))
3504 return (EINVAL22);
3505 else
3506 *out |= (1ULL << i);
3507 } else if (clear && !set) {
3508 if (want1 & (1ULL << i))
3509 return (EINVAL22);
3510 else
3511 *out &= ~(1ULL << i);
3512 } else {
3513 /*
3514 * 2.c.ii - "If the relevant VMX capability MSR
3515 * reports that a control can be set to 0 or 1
3516 * and that control's meaning is known to the VMM,
3517 * set the control based on the functionality desired."
3518 */
3519 if (want1 & (1ULL << i))
3520 *out |= (1ULL << i);
3521 else if (want0 & (1 << i))
3522 *out &= ~(1ULL << i);
3523 else {
3524 /*
3525 * ... assuming the control's meaning is not
3526 * known to the VMM ...
3527 *
3528 * 2.c.iii - "If the relevant VMX capability
3529 * MSR reports that a control can be set to 0
3530 * or 1 and the control is not in the default1
3531 * class, set the control to 0."
3532 *
3533 * 2.c.iv - "If the relevant VMX capability
3534 * MSR reports that a control can be set to 0
3535 * or 1 and the control is in the default1
3536 * class, set the control to 1."
3537 */
3538 switch (ctrl) {
3539 case IA32_VMX_PINBASED_CTLS0x481:
3540 case IA32_VMX_TRUE_PINBASED_CTLS0x48D:
3541 /*
3542 * A.3.1 - default1 class of pinbased
3543 * controls comprises bits 1,2,4
3544 */
3545 switch (i) {
3546 case 1:
3547 case 2:
3548 case 4:
3549 *out |= (1ULL << i);
3550 break;
3551 default:
3552 *out &= ~(1ULL << i);
3553 break;
3554 }
3555 break;
3556 case IA32_VMX_PROCBASED_CTLS0x482:
3557 case IA32_VMX_TRUE_PROCBASED_CTLS0x48E:
3558 /*
3559 * A.3.2 - default1 class of procbased
3560 * controls comprises bits 1, 4-6, 8,
3561 * 13-16, 26
3562 */
3563 switch (i) {
3564 case 1:
3565 case 4 ... 6:
3566 case 8:
3567 case 13 ... 16:
3568 case 26:
3569 *out |= (1ULL << i);
3570 break;
3571 default:
3572 *out &= ~(1ULL << i);
3573 break;
3574 }
3575 break;
3576 /*
3577 * Unknown secondary procbased controls
3578 * can always be set to 0
3579 */
3580 case IA32_VMX_PROCBASED2_CTLS0x48B:
3581 *out &= ~(1ULL << i);
3582 break;
3583 case IA32_VMX_EXIT_CTLS0x483:
3584 case IA32_VMX_TRUE_EXIT_CTLS0x48F:
3585 /*
3586 * A.4 - default1 class of exit
3587 * controls comprises bits 0-8, 10,
3588 * 11, 13, 14, 16, 17
3589 */
3590 switch (i) {
3591 case 0 ... 8:
3592 case 10 ... 11:
3593 case 13 ... 14:
3594 case 16 ... 17:
3595 *out |= (1ULL << i);
3596 break;
3597 default:
3598 *out &= ~(1ULL << i);
3599 break;
3600 }
3601 break;
3602 case IA32_VMX_ENTRY_CTLS0x484:
3603 case IA32_VMX_TRUE_ENTRY_CTLS0x490:
3604 /*
3605 * A.5 - default1 class of entry
3606 * controls comprises bits 0-8, 12
3607 */
3608 switch (i) {
3609 case 0 ... 8:
3610 case 12:
3611 *out |= (1ULL << i);
3612 break;
3613 default:
3614 *out &= ~(1ULL << i);
3615 break;
3616 }
3617 break;
3618 }
3619 }
3620 }
3621 }
3622
3623 return (0);
3624}
3625
3626/*
3627 * vm_run
3628 *
3629 * Run the vm / vcpu specified by 'vrp'
3630 *
3631 * Parameters:
3632 * vrp: structure defining the VM to run
3633 *
3634 * Return value:
3635 * ENOENT: the VM defined in 'vrp' could not be located
3636 * EBUSY: the VM defined in 'vrp' is already running
3637 * EFAULT: error copying data from userspace (vmd) on return from previous
3638 * exit.
3639 * EAGAIN: help is needed from vmd(8) (device I/O or exit vmm(4) cannot
3640 * handle in-kernel.)
3641 * 0: the run loop exited and no help is needed from vmd(8)
3642 */
3643int
3644vm_run(struct vm_run_params *vrp)
3645{
3646 struct vm *vm;
3647 struct vcpu *vcpu;
3648 int ret = 0;
3649 u_int old, next;
3650
3651 /*
3652 * Find desired VM
3653 */
3654 ret = vm_find(vrp->vrp_vm_id, &vm);
3655 if (ret)
1
Assuming 'ret' is 0
2
Taking false branch
3656 return (ret);
3657
3658 vcpu = vm_find_vcpu(vm, vrp->vrp_vcpu_id);
3659 if (vcpu == NULL((void *)0)) {
3
Assuming 'vcpu' is not equal to NULL
4
Taking false branch
3660 ret = ENOENT2;
3661 goto out;
3662 }
3663
3664 /*
3665 * Attempt to transition from VCPU_STATE_STOPPED -> VCPU_STATE_RUNNING.
3666 * Failure to make the transition indicates the VCPU is busy.
3667 */
3668 rw_enter_write(&vcpu->vc_lock);
3669 old = VCPU_STATE_STOPPED;
3670 next = VCPU_STATE_RUNNING;
3671 if (atomic_cas_uint(&vcpu->vc_state, old, next)_atomic_cas_uint((&vcpu->vc_state), (old), (next)) != old) {
5
Assuming the condition is false
6
Taking false branch
3672 ret = EBUSY16;
3673 goto out_unlock;
3674 }
3675
3676 /*
3677 * We may be returning from userland helping us from the last exit.
3678 * If so (vrp_continue == 1), copy in the exit data from vmd. The
3679 * exit data will be consumed before the next entry (this typically
3680 * comprises VCPU register changes as the result of vmd(8)'s actions).
3681 */
3682 if (vrp->vrp_continue) {
7
Assuming field 'vrp_continue' is 0
8
Taking false branch
3683 if (copyin(vrp->vrp_exit, &vcpu->vc_exit,
3684 sizeof(struct vm_exit)) == EFAULT14) {
3685 ret = EFAULT14;
3686 goto out_unlock;
3687 }
3688 }
3689
3690 WRITE_ONCE(vcpu->vc_curcpu, curcpu())({ typeof(vcpu->vc_curcpu) __tmp = (({struct cpu_info *__ci
; asm volatile("movq %%gs:%P1,%0" : "=r" (__ci) :"n" (__builtin_offsetof
(struct cpu_info, ci_self))); __ci;})); *(volatile typeof(vcpu
->vc_curcpu) *)&(vcpu->vc_curcpu) = __tmp; __tmp; }
)
;
3691 /* Run the VCPU specified in vrp */
3692 if (vcpu->vc_virt_mode == VMM_MODE_EPT) {
9
Assuming field 'vc_virt_mode' is not equal to VMM_MODE_EPT
10
Taking false branch
3693 ret = vcpu_run_vmx(vcpu, vrp);
3694 } else if (vcpu->vc_virt_mode == VMM_MODE_RVI) {
11
Assuming field 'vc_virt_mode' is equal to VMM_MODE_RVI
12
Taking true branch
3695 ret = vcpu_run_svm(vcpu, vrp);
13
Calling 'vcpu_run_svm'
3696 }
3697 WRITE_ONCE(vcpu->vc_curcpu, NULL)({ typeof(vcpu->vc_curcpu) __tmp = (((void *)0)); *(volatile
typeof(vcpu->vc_curcpu) *)&(vcpu->vc_curcpu) = __tmp
; __tmp; })
;
3698
3699 if (ret == 0 || ret == EAGAIN35) {
3700 /* If we are exiting, populate exit data so vmd can help. */
3701 vrp->vrp_exit_reason = (ret == 0) ? VM_EXIT_NONE0xFFFF
3702 : vcpu->vc_gueststate.vg_exit_reason;
3703 vrp->vrp_irqready = vcpu->vc_irqready;
3704 vcpu->vc_state = VCPU_STATE_STOPPED;
3705
3706 if (copyout(&vcpu->vc_exit, vrp->vrp_exit,
3707 sizeof(struct vm_exit)) == EFAULT14) {
3708 ret = EFAULT14;
3709 } else
3710 ret = 0;
3711 } else {
3712 vrp->vrp_exit_reason = VM_EXIT_TERMINATED0xFFFE;
3713 vcpu->vc_state = VCPU_STATE_TERMINATED;
3714 }
3715out_unlock:
3716 rw_exit_write(&vcpu->vc_lock);
3717out:
3718 refcnt_rele_wake(&vm->vm_refcnt);
3719 return (ret);
3720}
3721
3722/*
3723 * vmm_fpurestore
3724 *
3725 * Restore the guest's FPU state, saving the existing userland thread's
3726 * FPU context if necessary. Must be called with interrupts disabled.
3727 */
3728int
3729vmm_fpurestore(struct vcpu *vcpu)
3730{
3731 struct cpu_info *ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
3732
3733 rw_assert_wrlock(&vcpu->vc_lock);
3734
3735 /* save vmm's FPU state if we haven't already */
3736 if (ci->ci_pflags & CPUPF_USERXSTATE0x02) {
30
Assuming the condition is false
31
Taking false branch
3737 ci->ci_pflags &= ~CPUPF_USERXSTATE0x02;
3738 fpusavereset(&curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_addr->u_pcb.pcb_savefpu);
3739 }
3740
3741 if (vcpu->vc_fpuinited)
32
Assuming field 'vc_fpuinited' is 0
33
Taking false branch
3742 xrstor_kern(&vcpu->vc_g_fpu, xsave_mask);
3743
3744 if (xsave_mask) {
34
Assuming 'xsave_mask' is 0
35
Taking false branch
3745 /* Restore guest %xcr0 */
3746 if (xsetbv_user(0, vcpu->vc_gueststate.vg_xcr0)) {
3747 DPRINTF("%s: guest attempted to set invalid bits in "
3748 "xcr0 (guest %%xcr0=0x%llx, host %%xcr0=0x%llx)\n",
3749 __func__, vcpu->vc_gueststate.vg_xcr0, xsave_mask);
3750 return EINVAL22;
3751 }
3752 }
3753
3754 return 0;
3755}
3756
3757/*
3758 * vmm_fpusave
3759 *
3760 * Save the guest's FPU state. Must be called with interrupts disabled.
3761 */
3762void
3763vmm_fpusave(struct vcpu *vcpu)
3764{
3765 rw_assert_wrlock(&vcpu->vc_lock);
3766
3767 if (xsave_mask) {
3768 /* Save guest %xcr0 */
3769 vcpu->vc_gueststate.vg_xcr0 = xgetbv(0);
3770
3771 /* Restore host %xcr0 */
3772 xsetbv(0, xsave_mask & XFEATURE_XCR0_MASK(0x00000001 | 0x00000002 | 0x00000004 | (0x00000008 | 0x00000010
) | (0x00000020 | 0x00000040 | 0x00000080) | 0x00000200 | (0x00040000
| 0x00040000))
);
3773 }
3774
3775 /*
3776 * Save full copy of FPU state - guest content is always
3777 * a subset of host's save area (see xsetbv exit handler)
3778 */
3779 fpusavereset(&vcpu->vc_g_fpu);
3780 vcpu->vc_fpuinited = 1;
3781}
3782
3783/*
3784 * vmm_translate_gva
3785 *
3786 * Translates a guest virtual address to a guest physical address by walking
3787 * the currently active page table (if needed).
3788 *
3789 * Note - this function can possibly alter the supplied VCPU state.
3790 * Specifically, it may inject exceptions depending on the current VCPU
3791 * configuration, and may alter %cr2 on #PF. Consequently, this function
3792 * should only be used as part of instruction emulation.
3793 *
3794 * Parameters:
3795 * vcpu: The VCPU this translation should be performed for (guest MMU settings
3796 * are gathered from this VCPU)
3797 * va: virtual address to translate
3798 * pa: pointer to paddr_t variable that will receive the translated physical
3799 * address. 'pa' is unchanged on error.
3800 * mode: one of PROT_READ, PROT_WRITE, PROT_EXEC indicating the mode in which
3801 * the address should be translated
3802 *
3803 * Return values:
3804 * 0: the address was successfully translated - 'pa' contains the physical
3805 * address currently mapped by 'va'.
3806 * EFAULT: the PTE for 'VA' is unmapped. A #PF will be injected in this case
3807 * and %cr2 set in the vcpu structure.
3808 * EINVAL: an error occurred reading paging table structures
3809 */
3810int
3811vmm_translate_gva(struct vcpu *vcpu, uint64_t va, uint64_t *pa, int mode)
3812{
3813 int level, shift, pdidx;
3814 uint64_t pte, pt_paddr, pte_paddr, mask, low_mask, high_mask;
3815 uint64_t shift_width, pte_size, *hva;
3816 paddr_t hpa;
3817 struct vcpu_reg_state vrs;
3818
3819 level = 0;
3820
3821 if (vmm_softc->mode == VMM_MODE_EPT) {
3822 if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), 1, &vrs))
3823 return (EINVAL22);
3824 } else if (vmm_softc->mode == VMM_MODE_RVI) {
3825 if (vcpu_readregs_svm(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), &vrs))
3826 return (EINVAL22);
3827 } else {
3828 printf("%s: unknown vmm mode", __func__);
3829 return (EINVAL22);
3830 }
3831
3832 DPRINTF("%s: guest %%cr0=0x%llx, %%cr3=0x%llx\n", __func__,
3833 vrs.vrs_crs[VCPU_REGS_CR0], vrs.vrs_crs[VCPU_REGS_CR3]);
3834
3835 if (!(vrs.vrs_crs[VCPU_REGS_CR00] & CR0_PG0x80000000)) {
3836 DPRINTF("%s: unpaged, va=pa=0x%llx\n", __func__,
3837 va);
3838 *pa = va;
3839 return (0);
3840 }
3841
3842 pt_paddr = vrs.vrs_crs[VCPU_REGS_CR32];
3843
3844 if (vrs.vrs_crs[VCPU_REGS_CR00] & CR0_PE0x00000001) {
3845 if (vrs.vrs_crs[VCPU_REGS_CR43] & CR4_PAE0x00000020) {
3846 pte_size = sizeof(uint64_t);
3847 shift_width = 9;
3848
3849 if (vrs.vrs_msrs[VCPU_REGS_EFER0] & EFER_LMA0x00000400) {
3850 level = 4;
3851 mask = L4_MASK0x0000ff8000000000UL;
3852 shift = L4_SHIFT39;
3853 } else {
3854 level = 3;
3855 mask = L3_MASK0x0000007fc0000000UL;
3856 shift = L3_SHIFT30;
3857 }
3858 } else {
3859 level = 2;
3860 shift_width = 10;
3861 mask = 0xFFC00000;
3862 shift = 22;
3863 pte_size = sizeof(uint32_t);
3864 }
3865 } else {
3866 return (EINVAL22);
3867 }
3868
3869 DPRINTF("%s: pte size=%lld level=%d mask=0x%llx, shift=%d, "
3870 "shift_width=%lld\n", __func__, pte_size, level, mask, shift,
3871 shift_width);
3872
3873 /* XXX: Check for R bit in segment selector and set A bit */
3874
3875 for (;level > 0; level--) {
3876 pdidx = (va & mask) >> shift;
3877 pte_paddr = (pt_paddr) + (pdidx * pte_size);
3878
3879 DPRINTF("%s: read pte level %d @ GPA 0x%llx\n", __func__,
3880 level, pte_paddr);
3881 if (!pmap_extract(vcpu->vc_parent->vm_map->pmap, pte_paddr,
3882 &hpa)) {
3883 DPRINTF("%s: cannot extract HPA for GPA 0x%llx\n",
3884 __func__, pte_paddr);
3885 return (EINVAL22);
3886 }
3887
3888 hpa = hpa | (pte_paddr & 0xFFF);
3889 hva = (uint64_t *)PMAP_DIRECT_MAP(hpa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (hpa))
;
3890 DPRINTF("%s: GPA 0x%llx -> HPA 0x%llx -> HVA 0x%llx\n",
3891 __func__, pte_paddr, (uint64_t)hpa, (uint64_t)hva);
3892 if (pte_size == 8)
3893 pte = *hva;
3894 else
3895 pte = *(uint32_t *)hva;
3896
3897 DPRINTF("%s: PTE @ 0x%llx = 0x%llx\n", __func__, pte_paddr,
3898 pte);
3899
3900 /* XXX: Set CR2 */
3901 if (!(pte & PG_V0x0000000000000001UL))
3902 return (EFAULT14);
3903
3904 /* XXX: Check for SMAP */
3905 if ((mode == PROT_WRITE0x02) && !(pte & PG_RW0x0000000000000002UL))
3906 return (EPERM1);
3907
3908 if ((vcpu->vc_exit.cpl > 0) && !(pte & PG_u0x0000000000000004UL))
3909 return (EPERM1);
3910
3911 pte = pte | PG_U0x0000000000000020UL;
3912 if (mode == PROT_WRITE0x02)
3913 pte = pte | PG_M0x0000000000000040UL;
3914 *hva = pte;
3915
3916 /* XXX: EINVAL if in 32bit and PG_PS is 1 but CR4.PSE is 0 */
3917 if (pte & PG_PS0x0000000000000080UL)
3918 break;
3919
3920 if (level > 1) {
3921 pt_paddr = pte & PG_FRAME0x000ffffffffff000UL;
3922 shift -= shift_width;
3923 mask = mask >> shift_width;
3924 }
3925 }
3926
3927 low_mask = ((uint64_t)1ULL << shift) - 1;
3928 high_mask = (((uint64_t)1ULL << ((pte_size * 8) - 1)) - 1) ^ low_mask;
3929 *pa = (pte & high_mask) | (va & low_mask);
3930
3931 DPRINTF("%s: final GPA for GVA 0x%llx = 0x%llx\n", __func__,
3932 va, *pa);
3933
3934 return (0);
3935}
3936
3937
3938/*
3939 * vcpu_run_vmx
3940 *
3941 * VMX main loop used to run a VCPU.
3942 *
3943 * Parameters:
3944 * vcpu: The VCPU to run
3945 * vrp: run parameters
3946 *
3947 * Return values:
3948 * 0: The run loop exited and no help is needed from vmd
3949 * EAGAIN: The run loop exited and help from vmd is needed
3950 * EINVAL: an error occurred
3951 */
3952int
3953vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp)
3954{
3955 int ret = 0, exitinfo;
3956 struct region_descriptor gdt;
3957 struct cpu_info *ci = NULL((void *)0);
3958 uint64_t exit_reason, cr3, insn_error;
3959 struct schedstate_percpu *spc;
3960 struct vmx_invvpid_descriptor vid;
3961 uint64_t eii, procbased, int_st;
3962 uint16_t irq, ldt_sel;
3963 u_long s;
3964 struct region_descriptor idtr;
3965
3966 rw_assert_wrlock(&vcpu->vc_lock);
3967
3968 if (vcpu_reload_vmcs_vmx(vcpu)) {
3969 printf("%s: failed (re)loading vmcs\n", __func__);
3970 return (EINVAL22);
3971 }
3972
3973 /*
3974 * If we are returning from userspace (vmd) because we exited
3975 * last time, fix up any needed vcpu state first. Which state
3976 * needs to be fixed up depends on what vmd populated in the
3977 * exit data structure.
3978 */
3979 irq = vrp->vrp_irq;
3980
3981 if (vrp->vrp_intr_pending)
3982 vcpu->vc_intr = 1;
3983 else
3984 vcpu->vc_intr = 0;
3985
3986 if (vrp->vrp_continue) {
3987 switch (vcpu->vc_gueststate.vg_exit_reason) {
3988 case VMX_EXIT_IO30:
3989 if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN)
3990 vcpu->vc_gueststate.vg_rax =
3991 vcpu->vc_exit.vei.vei_data;
3992 vcpu->vc_gueststate.vg_rip =
3993 vcpu->vc_exit.vrs.vrs_gprs[VCPU_REGS_RIP16];
3994 if (vmwrite(VMCS_GUEST_IA32_RIP0x681E,
3995 vcpu->vc_gueststate.vg_rip)) {
3996 printf("%s: failed to update rip\n", __func__);
3997 return (EINVAL22);
3998 }
3999 break;
4000 case VMX_EXIT_EPT_VIOLATION48:
4001 ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_GPRS0x1, 0,
4002 &vcpu->vc_exit.vrs);
4003 if (ret) {
4004 printf("%s: vm %d vcpu %d failed to update "
4005 "registers\n", __func__,
4006 vcpu->vc_parent->vm_id, vcpu->vc_id);
4007 return (EINVAL22);
4008 }
4009 break;
4010 case VM_EXIT_NONE0xFFFF:
4011 case VMX_EXIT_HLT12:
4012 case VMX_EXIT_INT_WINDOW7:
4013 case VMX_EXIT_EXTINT1:
4014 case VMX_EXIT_CPUID10:
4015 case VMX_EXIT_XSETBV55:
4016 break;
4017#ifdef VMM_DEBUG
4018 case VMX_EXIT_TRIPLE_FAULT2:
4019 DPRINTF("%s: vm %d vcpu %d triple fault\n",
4020 __func__, vcpu->vc_parent->vm_id,
4021 vcpu->vc_id);
4022 vmx_vcpu_dump_regs(vcpu);
4023 dump_vcpu(vcpu);
4024 vmx_dump_vmcs(vcpu);
4025 break;
4026 case VMX_EXIT_ENTRY_FAILED_GUEST_STATE33:
4027 DPRINTF("%s: vm %d vcpu %d failed entry "
4028 "due to invalid guest state\n",
4029 __func__, vcpu->vc_parent->vm_id,
4030 vcpu->vc_id);
4031 vmx_vcpu_dump_regs(vcpu);
4032 dump_vcpu(vcpu);
4033 return (EINVAL22);
4034 default:
4035 DPRINTF("%s: unimplemented exit type %d (%s)\n",
4036 __func__,
4037 vcpu->vc_gueststate.vg_exit_reason,
4038 vmx_exit_reason_decode(
4039 vcpu->vc_gueststate.vg_exit_reason));
4040 vmx_vcpu_dump_regs(vcpu);
4041 dump_vcpu(vcpu);
4042 break;
4043#endif /* VMM_DEBUG */
4044 }
4045 memset(&vcpu->vc_exit, 0, sizeof(vcpu->vc_exit))__builtin_memset((&vcpu->vc_exit), (0), (sizeof(vcpu->
vc_exit)))
;
4046 }
4047
4048 /* Host CR3 */
4049 cr3 = rcr3();
4050 if (vmwrite(VMCS_HOST_IA32_CR30x6C02, cr3)) {
4051 printf("%s: vmwrite(0x%04X, 0x%llx)\n", __func__,
4052 VMCS_HOST_IA32_CR30x6C02, cr3);
4053 return (EINVAL22);
4054 }
4055
4056 /* Handle vmd(8) injected interrupts */
4057 /* Is there an interrupt pending injection? */
4058 if (irq != 0xFFFF) {
4059 if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST0x4824, &int_st)) {
4060 printf("%s: can't get interruptibility state\n",
4061 __func__);
4062 return (EINVAL22);
4063 }
4064
4065 /* Interruptibility state 0x3 covers NMIs and STI */
4066 if (!(int_st & 0x3) && vcpu->vc_irqready) {
4067 eii = (irq & 0xFF);
4068 eii |= (1ULL << 31); /* Valid */
4069 eii |= (0ULL << 8); /* Hardware Interrupt */
4070 if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO0x4016, eii)) {
4071 printf("vcpu_run_vmx: can't vector "
4072 "interrupt to guest\n");
4073 return (EINVAL22);
4074 }
4075
4076 irq = 0xFFFF;
4077 }
4078 } else if (!vcpu->vc_intr) {
4079 /*
4080 * Disable window exiting
4081 */
4082 if (vmread(VMCS_PROCBASED_CTLS0x4002, &procbased)) {
4083 printf("%s: can't read procbased ctls on exit\n",
4084 __func__);
4085 return (EINVAL22);
4086 } else {
4087 procbased &= ~IA32_VMX_INTERRUPT_WINDOW_EXITING(1ULL << 2);
4088 if (vmwrite(VMCS_PROCBASED_CTLS0x4002, procbased)) {
4089 printf("%s: can't write procbased ctls "
4090 "on exit\n", __func__);
4091 return (EINVAL22);
4092 }
4093 }
4094 }
4095
4096 while (ret == 0) {
4097#ifdef VMM_DEBUG
4098 paddr_t pa = 0ULL;
4099 vmptrst(&pa);
4100 KASSERT(pa == vcpu->vc_control_pa)((pa == vcpu->vc_control_pa) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/arch/amd64/amd64/vmm_machdep.c", 4100, "pa == vcpu->vc_control_pa"
))
;
4101#endif /* VMM_DEBUG */
4102
4103 vmm_update_pvclock(vcpu);
4104
4105 if (ci != curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
) {
4106 ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
4107 vcpu->vc_last_pcpu = ci;
4108
4109 setregion(&gdt, ci->ci_gdt, GDT_SIZE((6 << 3) + (1 << 4)) - 1);
4110 if (gdt.rd_base == 0) {
4111 printf("%s: setregion\n", __func__);
4112 return (EINVAL22);
4113 }
4114
4115 /* Host GDTR base */
4116 if (vmwrite(VMCS_HOST_IA32_GDTR_BASE0x6C0C, gdt.rd_base)) {
4117 printf("%s: vmwrite(0x%04X, 0x%llx)\n",
4118 __func__, VMCS_HOST_IA32_GDTR_BASE0x6C0C,
4119 gdt.rd_base);
4120 return (EINVAL22);
4121 }
4122
4123 /* Host TR base */
4124 if (vmwrite(VMCS_HOST_IA32_TR_BASE0x6C0A,
4125 (uint64_t)ci->ci_tss)) {
4126 printf("%s: vmwrite(0x%04X, 0x%llx)\n",
4127 __func__, VMCS_HOST_IA32_TR_BASE0x6C0A,
4128 (uint64_t)ci->ci_tss);
4129 return (EINVAL22);
4130 }
4131 }
4132
4133 /* Inject event if present */
4134 if (vcpu->vc_event != 0) {
4135 eii = (vcpu->vc_event & 0xFF);
4136 eii |= (1ULL << 31); /* Valid */
4137
4138 /* Set the "Send error code" flag for certain vectors */
4139 switch (vcpu->vc_event & 0xFF) {
4140 case VMM_EX_DF8:
4141 case VMM_EX_TS10:
4142 case VMM_EX_NP11:
4143 case VMM_EX_SS12:
4144 case VMM_EX_GP13:
4145 case VMM_EX_PF14:
4146 case VMM_EX_AC17:
4147 eii |= (1ULL << 11);
4148 }
4149
4150 eii |= (3ULL << 8); /* Hardware Exception */
4151 if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO0x4016, eii)) {
4152 printf("%s: can't vector event to guest\n",
4153 __func__);
4154 ret = EINVAL22;
4155 break;
4156 }
4157
4158 if (vmwrite(VMCS_ENTRY_EXCEPTION_ERROR_CODE0x4018, 0)) {
4159 printf("%s: can't write error code to guest\n",
4160 __func__);
4161 ret = EINVAL22;
4162 break;
4163 }
4164
4165 vcpu->vc_event = 0;
4166 }
4167
4168 if (vcpu->vc_vmx_vpid_enabled) {
4169 /* Invalidate old TLB mappings */
4170 vid.vid_vpid = vcpu->vc_vpid;
4171 vid.vid_addr = 0;
4172 invvpid(IA32_VMX_INVVPID_SINGLE_CTX_GLB0x3, &vid);
4173 }
4174
4175 /* Start / resume the VCPU */
4176
4177 /* Disable interrupts and save the current host FPU state. */
4178 s = intr_disable();
4179 if ((ret = vmm_fpurestore(vcpu))) {
4180 intr_restore(s);
4181 break;
4182 }
4183
4184 sidt(&idtr);
4185 sldt(&ldt_sel);
4186
4187 TRACEPOINT(vmm, guest_enter, vcpu, vrp)do { extern struct dt_probe (dt_static_vmm_guest_enter); struct
dt_probe *dtp = &(dt_static_vmm_guest_enter); if (__builtin_expect
(((dt_tracing) != 0), 0) && __builtin_expect(((dtp->
dtp_recording) != 0), 0)) { struct dt_provider *dtpv = dtp->
dtp_prov; dtpv->dtpv_enter(dtpv, dtp, vcpu, vrp); } } while
(0)
;
4188
4189 /* Restore any guest PKRU state. */
4190 if (vmm_softc->sc_md.pkru_enabled)
4191 wrpkru(vcpu->vc_pkru);
4192
4193 ret = vmx_enter_guest(&vcpu->vc_control_pa,
4194 &vcpu->vc_gueststate,
4195 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED1),
4196 ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr);
4197
4198 /* Restore host PKRU state. */
4199 if (vmm_softc->sc_md.pkru_enabled) {
4200 vcpu->vc_pkru = rdpkru(0);
4201 wrpkru(PGK_VALUE0xfffffffc);
4202 }
4203
4204 lidt(&idtr);
4205 lldt(ldt_sel);
4206
4207 /*
4208 * On exit, interrupts are disabled, and we are running with
4209 * the guest FPU state still possibly on the CPU. Save the FPU
4210 * state before re-enabling interrupts.
4211 */
4212 vmm_fpusave(vcpu);
4213 intr_restore(s);
4214
4215 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_LAUNCHED)_atomic_swap_uint((&vcpu->vc_vmx_vmcs_state), (1));
4216 exit_reason = VM_EXIT_NONE0xFFFF;
4217
4218 /* If we exited successfully ... */
4219 if (ret == 0) {
4220 exitinfo = vmx_get_exit_info(
4221 &vcpu->vc_gueststate.vg_rip, &exit_reason);
4222 if (!(exitinfo & VMX_EXIT_INFO_HAVE_RIP0x1)) {
4223 printf("%s: cannot read guest rip\n", __func__);
4224 ret = EINVAL22;
4225 break;
4226 }
4227 if (!(exitinfo & VMX_EXIT_INFO_HAVE_REASON0x2)) {
4228 printf("%s: cant read exit reason\n", __func__);
4229 ret = EINVAL22;
4230 break;
4231 }
4232 vcpu->vc_gueststate.vg_exit_reason = exit_reason;
4233 TRACEPOINT(vmm, guest_exit, vcpu, vrp, exit_reason)do { extern struct dt_probe (dt_static_vmm_guest_exit); struct
dt_probe *dtp = &(dt_static_vmm_guest_exit); if (__builtin_expect
(((dt_tracing) != 0), 0) && __builtin_expect(((dtp->
dtp_recording) != 0), 0)) { struct dt_provider *dtpv = dtp->
dtp_prov; dtpv->dtpv_enter(dtpv, dtp, vcpu, vrp, exit_reason
); } } while (0)
;
4234
4235 /* Update our state */
4236 if (vmread(VMCS_GUEST_IA32_RFLAGS0x6820,
4237 &vcpu->vc_gueststate.vg_rflags)) {
4238 printf("%s: can't read guest rflags during "
4239 "exit\n", __func__);
4240 ret = EINVAL22;
4241 break;
4242 }
4243
4244 /*
4245 * Handle the exit. This will alter "ret" to EAGAIN if
4246 * the exit handler determines help from vmd is needed.
4247 */
4248 ret = vmx_handle_exit(vcpu);
4249
4250 if (vcpu->vc_gueststate.vg_rflags & PSL_I0x00000200)
4251 vcpu->vc_irqready = 1;
4252 else
4253 vcpu->vc_irqready = 0;
4254
4255 /*
4256 * If not ready for interrupts, but interrupts pending,
4257 * enable interrupt window exiting.
4258 */
4259 if (vcpu->vc_irqready == 0 && vcpu->vc_intr) {
4260 if (vmread(VMCS_PROCBASED_CTLS0x4002, &procbased)) {
4261 printf("%s: can't read procbased ctls "
4262 "on intwin exit\n", __func__);
4263 ret = EINVAL22;
4264 break;
4265 }
4266
4267 procbased |= IA32_VMX_INTERRUPT_WINDOW_EXITING(1ULL << 2);
4268 if (vmwrite(VMCS_PROCBASED_CTLS0x4002, procbased)) {
4269 printf("%s: can't write procbased ctls "
4270 "on intwin exit\n", __func__);
4271 ret = EINVAL22;
4272 break;
4273 }
4274 }
4275
4276 /*
4277 * Exit to vmd if we are terminating, failed to enter,
4278 * or need help (device I/O)
4279 */
4280 if (ret || vcpu_must_stop(vcpu))
4281 break;
4282
4283 if (vcpu->vc_intr && vcpu->vc_irqready) {
4284 ret = EAGAIN35;
4285 break;
4286 }
4287
4288 /* Check if we should yield - don't hog the {p,v}pu */
4289 spc = &ci->ci_schedstate;
4290 if (spc->spc_schedflags & SPCF_SHOULDYIELD0x0002)
4291 break;
4292
4293 } else {
4294 /*
4295 * We failed vmresume or vmlaunch for some reason,
4296 * typically due to invalid vmcs state or other
4297 * reasons documented in SDM Vol 3C 30.4.
4298 */
4299 switch (ret) {
4300 case VMX_FAIL_LAUNCH_INVALID_VMCS2:
4301 printf("%s: failed %s with invalid vmcs\n",
4302 __func__,
4303 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED1
4304 ? "vmresume" : "vmlaunch"));
4305 break;
4306 case VMX_FAIL_LAUNCH_VALID_VMCS3:
4307 printf("%s: failed %s with valid vmcs\n",
4308 __func__,
4309 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED1
4310 ? "vmresume" : "vmlaunch"));
4311 break;
4312 default:
4313 printf("%s: failed %s for unknown reason\n",
4314 __func__,
4315 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED1
4316 ? "vmresume" : "vmlaunch"));
4317 }
4318
4319 ret = EINVAL22;
4320
4321 /* Try to translate a vmfail error code, if possible. */
4322 if (vmread(VMCS_INSTRUCTION_ERROR0x4400, &insn_error)) {
4323 printf("%s: can't read insn error field\n",
4324 __func__);
4325 } else
4326 printf("%s: error code = %lld, %s\n", __func__,
4327 insn_error,
4328 vmx_instruction_error_decode(insn_error));
4329#ifdef VMM_DEBUG
4330 vmx_vcpu_dump_regs(vcpu);
4331 dump_vcpu(vcpu);
4332#endif /* VMM_DEBUG */
4333 }
4334 }
4335
4336 vcpu->vc_last_pcpu = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
4337
4338 /* Copy the VCPU register state to the exit structure */
4339 if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), 0, &vcpu->vc_exit.vrs))
4340 ret = EINVAL22;
4341 vcpu->vc_exit.cpl = vmm_get_guest_cpu_cpl(vcpu);
4342
4343 return (ret);
4344}
4345
4346/*
4347 * vmx_handle_intr
4348 *
4349 * Handle host (external) interrupts. We read which interrupt fired by
4350 * extracting the vector from the VMCS and dispatch the interrupt directly
4351 * to the host using vmm_dispatch_intr.
4352 */
4353void
4354vmx_handle_intr(struct vcpu *vcpu)
4355{
4356 uint8_t vec;
4357 uint64_t eii;
4358 struct gate_descriptor *idte;
4359 vaddr_t handler;
4360
4361 if (vmread(VMCS_EXIT_INTERRUPTION_INFO0x4404, &eii)) {
4362 printf("%s: can't obtain intr info\n", __func__);
4363 return;
4364 }
4365
4366 vec = eii & 0xFF;
4367
4368 /* XXX check "error valid" code in eii, abort if 0 */
4369 idte=&idt[vec];
4370 handler = idte->gd_looffset + ((uint64_t)idte->gd_hioffset << 16);
4371 vmm_dispatch_intr(handler);
4372}
4373
4374/*
4375 * svm_handle_hlt
4376 *
4377 * Handle HLT exits
4378 *
4379 * Parameters
4380 * vcpu: The VCPU that executed the HLT instruction
4381 *
4382 * Return Values:
4383 * EIO: The guest halted with interrupts disabled
4384 * EAGAIN: Normal return to vmd - vmd should halt scheduling this VCPU
4385 * until a virtual interrupt is ready to inject
4386 */
4387int
4388svm_handle_hlt(struct vcpu *vcpu)
4389{
4390 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
4391 uint64_t rflags = vmcb->v_rflags;
4392
4393 /* All HLT insns are 1 byte */
4394 vcpu->vc_gueststate.vg_rip += 1;
4395
4396 if (!(rflags & PSL_I0x00000200)) {
4397 DPRINTF("%s: guest halted with interrupts disabled\n",
4398 __func__);
4399 return (EIO5);
4400 }
4401
4402 return (EAGAIN35);
4403}
4404
4405/*
4406 * vmx_handle_hlt
4407 *
4408 * Handle HLT exits. HLTing the CPU with interrupts disabled will terminate
4409 * the guest (no NMIs handled) by returning EIO to vmd.
4410 *
4411 * Parameters:
4412 * vcpu: The VCPU that executed the HLT instruction
4413 *
4414 * Return Values:
4415 * EINVAL: An error occurred extracting information from the VMCS, or an
4416 * invalid HLT instruction was encountered
4417 * EIO: The guest halted with interrupts disabled
4418 * EAGAIN: Normal return to vmd - vmd should halt scheduling this VCPU
4419 * until a virtual interrupt is ready to inject
4420 *
4421 */
4422int
4423vmx_handle_hlt(struct vcpu *vcpu)
4424{
4425 uint64_t insn_length, rflags;
4426
4427 if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) {
4428 printf("%s: can't obtain instruction length\n", __func__);
4429 return (EINVAL22);
4430 }
4431
4432 if (vmread(VMCS_GUEST_IA32_RFLAGS0x6820, &rflags)) {
4433 printf("%s: can't obtain guest rflags\n", __func__);
4434 return (EINVAL22);
4435 }
4436
4437 if (insn_length != 1) {
4438 DPRINTF("%s: HLT with instruction length %lld not supported\n",
4439 __func__, insn_length);
4440 return (EINVAL22);
4441 }
4442
4443 if (!(rflags & PSL_I0x00000200)) {
4444 DPRINTF("%s: guest halted with interrupts disabled\n",
4445 __func__);
4446 return (EIO5);
4447 }
4448
4449 vcpu->vc_gueststate.vg_rip += insn_length;
4450 return (EAGAIN35);
4451}
4452
4453/*
4454 * vmx_get_exit_info
4455 *
4456 * Returns exit information containing the current guest RIP and exit reason
4457 * in rip and exit_reason. The return value is a bitmask indicating whether
4458 * reading the RIP and exit reason was successful.
4459 */
4460int
4461vmx_get_exit_info(uint64_t *rip, uint64_t *exit_reason)
4462{
4463 int rv = 0;
4464
4465 if (vmread(VMCS_GUEST_IA32_RIP0x681E, rip) == 0) {
4466 rv |= VMX_EXIT_INFO_HAVE_RIP0x1;
4467 if (vmread(VMCS_EXIT_REASON0x4402, exit_reason) == 0)
4468 rv |= VMX_EXIT_INFO_HAVE_REASON0x2;
4469 }
4470 return (rv);
4471}
4472
4473/*
4474 * svm_handle_exit
4475 *
4476 * Handle exits from the VM by decoding the exit reason and calling various
4477 * subhandlers as needed.
4478 */
4479int
4480svm_handle_exit(struct vcpu *vcpu)
4481{
4482 uint64_t exit_reason, rflags;
4483 int update_rip, ret = 0;
4484 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
4485
4486 update_rip = 0;
4487 exit_reason = vcpu->vc_gueststate.vg_exit_reason;
4488 rflags = vcpu->vc_gueststate.vg_rflags;
4489
4490 switch (exit_reason) {
50
Control jumps to 'case 139:' at line 4540
4491 case SVM_VMEXIT_VINTR0x64:
4492 if (!(rflags & PSL_I0x00000200)) {
4493 DPRINTF("%s: impossible interrupt window exit "
4494 "config\n", __func__);
4495 ret = EINVAL22;
4496 break;
4497 }
4498
4499 /*
4500 * Guest is now ready for interrupts, so disable interrupt
4501 * window exiting.
4502 */
4503 vmcb->v_irq = 0;
4504 vmcb->v_intr_vector = 0;
4505 vmcb->v_intercept1 &= ~SVM_INTERCEPT_VINTR(1UL << 4);
4506 svm_set_dirty(vcpu, SVM_CLEANBITS_TPR(1 << 3) | SVM_CLEANBITS_I(1 << 0));
4507
4508 update_rip = 0;
4509 break;
4510 case SVM_VMEXIT_INTR0x60:
4511 update_rip = 0;
4512 break;
4513 case SVM_VMEXIT_SHUTDOWN0x7F:
4514 update_rip = 0;
4515 ret = EAGAIN35;
4516 break;
4517 case SVM_VMEXIT_NPF0x400:
4518 ret = svm_handle_np_fault(vcpu);
4519 break;
4520 case SVM_VMEXIT_CPUID0x72:
4521 ret = vmm_handle_cpuid(vcpu);
4522 update_rip = 1;
4523 break;
4524 case SVM_VMEXIT_MSR0x7C:
4525 ret = svm_handle_msr(vcpu);
4526 update_rip = 1;
4527 break;
4528 case SVM_VMEXIT_XSETBV0x8D:
4529 ret = svm_handle_xsetbv(vcpu);
4530 update_rip = 1;
4531 break;
4532 case SVM_VMEXIT_IOIO0x7B:
4533 if (svm_handle_inout(vcpu) == 0)
4534 ret = EAGAIN35;
4535 break;
4536 case SVM_VMEXIT_HLT0x78:
4537 ret = svm_handle_hlt(vcpu);
4538 update_rip = 1;
4539 break;
4540 case SVM_VMEXIT_MWAIT0x8B:
4541 case SVM_VMEXIT_MWAIT_CONDITIONAL0x8C:
4542 case SVM_VMEXIT_MONITOR0x8A:
4543 case SVM_VMEXIT_VMRUN0x80:
4544 case SVM_VMEXIT_VMMCALL0x81:
4545 case SVM_VMEXIT_VMLOAD0x82:
4546 case SVM_VMEXIT_VMSAVE0x83:
4547 case SVM_VMEXIT_STGI0x84:
4548 case SVM_VMEXIT_CLGI0x85:
4549 case SVM_VMEXIT_SKINIT0x86:
4550 case SVM_VMEXIT_RDTSCP0x87:
4551 case SVM_VMEXIT_ICEBP0x88:
4552 case SVM_VMEXIT_INVLPGA0x7A:
4553 ret = vmm_inject_ud(vcpu);
51
Calling 'vmm_inject_ud'
53
Returning from 'vmm_inject_ud'
4554 update_rip = 0;
4555 break;
4556 default:
4557 DPRINTF("%s: unhandled exit 0x%llx (pa=0x%llx)\n", __func__,
4558 exit_reason, (uint64_t)vcpu->vc_control_pa);
4559 return (EINVAL22);
4560 }
4561
4562 if (update_rip
54.1
'update_rip' is 0
) {
54
Execution continues on line 4562
55
Taking false branch
4563 vmcb->v_rip = vcpu->vc_gueststate.vg_rip;
4564
4565 if (rflags & PSL_T0x00000100) {
4566 if (vmm_inject_db(vcpu)) {
4567 printf("%s: can't inject #DB exception to "
4568 "guest", __func__);
4569 return (EINVAL22);
4570 }
4571 }
4572 }
4573
4574 /* Enable SVME in EFER (must always be set) */
4575 vmcb->v_efer |= EFER_SVME0x00001000;
4576 svm_set_dirty(vcpu, SVM_CLEANBITS_CR(1 << 5));
4577
4578 return (ret);
56
Returning zero (loaded from 'ret'), which participates in a condition later
4579}
4580
4581/*
4582 * vmx_handle_exit
4583 *
4584 * Handle exits from the VM by decoding the exit reason and calling various
4585 * subhandlers as needed.
4586 */
4587int
4588vmx_handle_exit(struct vcpu *vcpu)
4589{
4590 uint64_t exit_reason, rflags, istate;
4591 int update_rip, ret = 0;
4592
4593 update_rip = 0;
4594 exit_reason = vcpu->vc_gueststate.vg_exit_reason;
4595 rflags = vcpu->vc_gueststate.vg_rflags;
4596
4597 switch (exit_reason) {
4598 case VMX_EXIT_INT_WINDOW7:
4599 if (!(rflags & PSL_I0x00000200)) {
4600 DPRINTF("%s: impossible interrupt window exit "
4601 "config\n", __func__);
4602 ret = EINVAL22;
4603 break;
4604 }
4605
4606 ret = EAGAIN35;
4607 update_rip = 0;
4608 break;
4609 case VMX_EXIT_EPT_VIOLATION48:
4610 ret = vmx_handle_np_fault(vcpu);
4611 break;
4612 case VMX_EXIT_CPUID10:
4613 ret = vmm_handle_cpuid(vcpu);
4614 update_rip = 1;
4615 break;
4616 case VMX_EXIT_IO30:
4617 if (vmx_handle_inout(vcpu) == 0)
4618 ret = EAGAIN35;
4619 break;
4620 case VMX_EXIT_EXTINT1:
4621 vmx_handle_intr(vcpu);
4622 update_rip = 0;
4623 break;
4624 case VMX_EXIT_CR_ACCESS28:
4625 ret = vmx_handle_cr(vcpu);
4626 update_rip = 1;
4627 break;
4628 case VMX_EXIT_HLT12:
4629 ret = vmx_handle_hlt(vcpu);
4630 update_rip = 1;
4631 break;
4632 case VMX_EXIT_RDMSR31:
4633 ret = vmx_handle_rdmsr(vcpu);
4634 update_rip = 1;
4635 break;
4636 case VMX_EXIT_WRMSR32:
4637 ret = vmx_handle_wrmsr(vcpu);
4638 update_rip = 1;
4639 break;
4640 case VMX_EXIT_XSETBV55:
4641 ret = vmx_handle_xsetbv(vcpu);
4642 update_rip = 1;
4643 break;
4644 case VMX_EXIT_MWAIT36:
4645 case VMX_EXIT_MONITOR39:
4646 case VMX_EXIT_VMXON27:
4647 case VMX_EXIT_VMWRITE25:
4648 case VMX_EXIT_VMREAD23:
4649 case VMX_EXIT_VMLAUNCH20:
4650 case VMX_EXIT_VMRESUME24:
4651 case VMX_EXIT_VMPTRLD21:
4652 case VMX_EXIT_VMPTRST22:
4653 case VMX_EXIT_VMCLEAR19:
4654 case VMX_EXIT_VMCALL18:
4655 case VMX_EXIT_VMFUNC59:
4656 case VMX_EXIT_VMXOFF26:
4657 case VMX_EXIT_INVVPID53:
4658 case VMX_EXIT_INVEPT50:
4659 ret = vmm_inject_ud(vcpu);
4660 update_rip = 0;
4661 break;
4662 case VMX_EXIT_TRIPLE_FAULT2:
4663#ifdef VMM_DEBUG
4664 DPRINTF("%s: vm %d vcpu %d triple fault\n", __func__,
4665 vcpu->vc_parent->vm_id, vcpu->vc_id);
4666 vmx_vcpu_dump_regs(vcpu);
4667 dump_vcpu(vcpu);
4668 vmx_dump_vmcs(vcpu);
4669#endif /* VMM_DEBUG */
4670 ret = EAGAIN35;
4671 update_rip = 0;
4672 break;
4673 default:
4674#ifdef VMM_DEBUG
4675 DPRINTF("%s: unhandled exit 0x%llx (%s)\n", __func__,
4676 exit_reason, vmx_exit_reason_decode(exit_reason));
4677#endif /* VMM_DEBUG */
4678 return (EINVAL22);
4679 }
4680
4681 if (update_rip) {
4682 if (vmwrite(VMCS_GUEST_IA32_RIP0x681E,
4683 vcpu->vc_gueststate.vg_rip)) {
4684 printf("%s: can't advance rip\n", __func__);
4685 return (EINVAL22);
4686 }
4687
4688 if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST0x4824,
4689 &istate)) {
4690 printf("%s: can't read interruptibility state\n",
4691 __func__);
4692 return (EINVAL22);
4693 }
4694
4695 /* Interruptibility state 0x3 covers NMIs and STI */
4696 istate &= ~0x3;
4697
4698 if (vmwrite(VMCS_GUEST_INTERRUPTIBILITY_ST0x4824,
4699 istate)) {
4700 printf("%s: can't write interruptibility state\n",
4701 __func__);
4702 return (EINVAL22);
4703 }
4704
4705 if (rflags & PSL_T0x00000100) {
4706 if (vmm_inject_db(vcpu)) {
4707 printf("%s: can't inject #DB exception to "
4708 "guest", __func__);
4709 return (EINVAL22);
4710 }
4711 }
4712 }
4713
4714 return (ret);
4715}
4716
4717/*
4718 * vmm_inject_gp
4719 *
4720 * Injects an #GP exception into the guest VCPU.
4721 *
4722 * Parameters:
4723 * vcpu: vcpu to inject into
4724 *
4725 * Return values:
4726 * Always 0
4727 */
4728int
4729vmm_inject_gp(struct vcpu *vcpu)
4730{
4731 DPRINTF("%s: injecting #GP at guest %%rip 0x%llx\n", __func__,
4732 vcpu->vc_gueststate.vg_rip);
4733 vcpu->vc_event = VMM_EX_GP13;
4734
4735 return (0);
4736}
4737
4738/*
4739 * vmm_inject_ud
4740 *
4741 * Injects an #UD exception into the guest VCPU.
4742 *
4743 * Parameters:
4744 * vcpu: vcpu to inject into
4745 *
4746 * Return values:
4747 * Always 0
4748 */
4749int
4750vmm_inject_ud(struct vcpu *vcpu)
4751{
4752 DPRINTF("%s: injecting #UD at guest %%rip 0x%llx\n", __func__,
4753 vcpu->vc_gueststate.vg_rip);
4754 vcpu->vc_event = VMM_EX_UD6;
4755
4756 return (0);
52
Returning zero, which participates in a condition later
4757}
4758
4759/*
4760 * vmm_inject_db
4761 *
4762 * Injects a #DB exception into the guest VCPU.
4763 *
4764 * Parameters:
4765 * vcpu: vcpu to inject into
4766 *
4767 * Return values:
4768 * Always 0
4769 */
4770int
4771vmm_inject_db(struct vcpu *vcpu)
4772{
4773 DPRINTF("%s: injecting #DB at guest %%rip 0x%llx\n", __func__,
4774 vcpu->vc_gueststate.vg_rip);
4775 vcpu->vc_event = VMM_EX_DB1;
4776
4777 return (0);
4778}
4779
4780/*
4781 * vmm_get_guest_memtype
4782 *
4783 * Returns the type of memory 'gpa' refers to in the context of vm 'vm'
4784 */
4785int
4786vmm_get_guest_memtype(struct vm *vm, paddr_t gpa)
4787{
4788 int i;
4789 struct vm_mem_range *vmr;
4790
4791 /* XXX Use binary search? */
4792 for (i = 0; i < vm->vm_nmemranges; i++) {
4793 vmr = &vm->vm_memranges[i];
4794
4795 /*
4796 * vm_memranges are ascending. gpa can no longer be in one of
4797 * the memranges
4798 */
4799 if (gpa < vmr->vmr_gpa)
4800 break;
4801
4802 if (gpa < vmr->vmr_gpa + vmr->vmr_size) {
4803 if (vmr->vmr_type == VM_MEM_MMIO2)
4804 return (VMM_MEM_TYPE_MMIO);
4805 return (VMM_MEM_TYPE_REGULAR);
4806 }
4807 }
4808
4809 DPRINTF("guest memtype @ 0x%llx unknown\n", (uint64_t)gpa);
4810 return (VMM_MEM_TYPE_UNKNOWN);
4811}
4812
4813/*
4814 * vmx_get_exit_qualification
4815 *
4816 * Return the current VMCS' exit qualification information
4817 */
4818int
4819vmx_get_exit_qualification(uint64_t *exit_qualification)
4820{
4821 if (vmread(VMCS_GUEST_EXIT_QUALIFICATION0x6400, exit_qualification)) {
4822 printf("%s: can't extract exit qual\n", __func__);
4823 return (EINVAL22);
4824 }
4825
4826 return (0);
4827}
4828
4829/*
4830 * vmx_get_guest_faulttype
4831 *
4832 * Determines the type (R/W/X) of the last fault on the VCPU last run on
4833 * this PCPU.
4834 */
4835int
4836vmx_get_guest_faulttype(void)
4837{
4838 uint64_t exit_qual;
4839 uint64_t presentmask = IA32_VMX_EPT_FAULT_WAS_READABLE(1ULL << 3) |
4840 IA32_VMX_EPT_FAULT_WAS_WRITABLE(1ULL << 4) | IA32_VMX_EPT_FAULT_WAS_EXECABLE(1ULL << 5);
4841 vm_prot_t prot, was_prot;
4842
4843 if (vmx_get_exit_qualification(&exit_qual))
4844 return (-1);
4845
4846 if ((exit_qual & presentmask) == 0)
4847 return VM_FAULT_INVALID((vm_fault_t) 0x0);
4848
4849 was_prot = 0;
4850 if (exit_qual & IA32_VMX_EPT_FAULT_WAS_READABLE(1ULL << 3))
4851 was_prot |= PROT_READ0x01;
4852 if (exit_qual & IA32_VMX_EPT_FAULT_WAS_WRITABLE(1ULL << 4))
4853 was_prot |= PROT_WRITE0x02;
4854 if (exit_qual & IA32_VMX_EPT_FAULT_WAS_EXECABLE(1ULL << 5))
4855 was_prot |= PROT_EXEC0x04;
4856
4857 prot = 0;
4858 if (exit_qual & IA32_VMX_EPT_FAULT_READ(1ULL << 0))
4859 prot = PROT_READ0x01;
4860 else if (exit_qual & IA32_VMX_EPT_FAULT_WRITE(1ULL << 1))
4861 prot = PROT_WRITE0x02;
4862 else if (exit_qual & IA32_VMX_EPT_FAULT_EXEC(1ULL << 2))
4863 prot = PROT_EXEC0x04;
4864
4865 if ((was_prot & prot) == 0)
4866 return VM_FAULT_PROTECT((vm_fault_t) 0x1);
4867
4868 return (-1);
4869}
4870
4871/*
4872 * svm_get_guest_faulttype
4873 *
4874 * Determines the type (R/W/X) of the last fault on the VCPU last run on
4875 * this PCPU.
4876 */
4877int
4878svm_get_guest_faulttype(struct vmcb *vmcb)
4879{
4880 if (!(vmcb->v_exitinfo1 & 0x1))
4881 return VM_FAULT_INVALID((vm_fault_t) 0x0);
4882 return VM_FAULT_PROTECT((vm_fault_t) 0x1);
4883}
4884
4885/*
4886 * svm_fault_page
4887 *
4888 * Request a new page to be faulted into the UVM map of the VM owning 'vcpu'
4889 * at address 'gpa'.
4890 */
4891int
4892svm_fault_page(struct vcpu *vcpu, paddr_t gpa)
4893{
4894 int ret;
4895
4896 ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, VM_FAULT_WIRE((vm_fault_t) 0x2),
4897 PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04);
4898 if (ret)
4899 printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n",
4900 __func__, ret, (uint64_t)gpa, vcpu->vc_gueststate.vg_rip);
4901
4902 return (ret);
4903}
4904
4905/*
4906 * svm_handle_np_fault
4907 *
4908 * High level nested paging handler for SVM. Verifies that a fault is for a
4909 * valid memory region, then faults a page, or aborts otherwise.
4910 */
4911int
4912svm_handle_np_fault(struct vcpu *vcpu)
4913{
4914 uint64_t gpa;
4915 int gpa_memtype, ret = 0;
4916 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
4917 struct vm_exit_eptviolation *vee = &vcpu->vc_exit.vee;
4918 struct cpu_info *ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
4919
4920 memset(vee, 0, sizeof(*vee))__builtin_memset((vee), (0), (sizeof(*vee)));
4921
4922 gpa = vmcb->v_exitinfo2;
4923
4924 gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa);
4925 switch (gpa_memtype) {
4926 case VMM_MEM_TYPE_REGULAR:
4927 vee->vee_fault_type = VEE_FAULT_HANDLED;
4928 ret = svm_fault_page(vcpu, gpa);
4929 break;
4930 case VMM_MEM_TYPE_MMIO:
4931 vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST;
4932 if (ci->ci_vmm_cap.vcc_svm.svm_decode_assist) {
4933 vee->vee_insn_len = vmcb->v_n_bytes_fetched;
4934 memcpy(&vee->vee_insn_bytes, vmcb->v_guest_ins_bytes,__builtin_memcpy((&vee->vee_insn_bytes), (vmcb->v_guest_ins_bytes
), (sizeof(vee->vee_insn_bytes)))
4935 sizeof(vee->vee_insn_bytes))__builtin_memcpy((&vee->vee_insn_bytes), (vmcb->v_guest_ins_bytes
), (sizeof(vee->vee_insn_bytes)))
;
4936 vee->vee_insn_info |= VEE_BYTES_VALID0x2;
4937 }
4938 ret = EAGAIN35;
4939 break;
4940 default:
4941 printf("%s: unknown memory type %d for GPA 0x%llx\n",
4942 __func__, gpa_memtype, gpa);
4943 return (EINVAL22);
4944 }
4945
4946 return (ret);
4947}
4948
4949/*
4950 * vmx_fault_page
4951 *
4952 * Request a new page to be faulted into the UVM map of the VM owning 'vcpu'
4953 * at address 'gpa'.
4954 *
4955 * Parameters:
4956 * vcpu: guest VCPU requiring the page to be faulted into the UVM map
4957 * gpa: guest physical address that triggered the fault
4958 *
4959 * Return Values:
4960 * 0: if successful
4961 * EINVAL: if fault type could not be determined or VMCS reload fails
4962 * EAGAIN: if a protection fault occurred, ie writing to a read-only page
4963 * errno: if uvm_fault(9) fails to wire in the page
4964 */
4965int
4966vmx_fault_page(struct vcpu *vcpu, paddr_t gpa)
4967{
4968 int fault_type, ret;
4969
4970 fault_type = vmx_get_guest_faulttype();
4971 switch (fault_type) {
4972 case -1:
4973 printf("%s: invalid fault type\n", __func__);
4974 return (EINVAL22);
4975 case VM_FAULT_PROTECT((vm_fault_t) 0x1):
4976 vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_PROTECT;
4977 return (EAGAIN35);
4978 default:
4979 vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_HANDLED;
4980 break;
4981 }
4982
4983 /* We may sleep during uvm_fault(9), so reload VMCS. */
4984 vcpu->vc_last_pcpu = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
4985 ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, VM_FAULT_WIRE((vm_fault_t) 0x2),
4986 PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04);
4987 if (vcpu_reload_vmcs_vmx(vcpu)) {
4988 printf("%s: failed to reload vmcs\n", __func__);
4989 return (EINVAL22);
4990 }
4991
4992 if (ret)
4993 printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n",
4994 __func__, ret, (uint64_t)gpa, vcpu->vc_gueststate.vg_rip);
4995
4996 return (ret);
4997}
4998
4999/*
5000 * vmx_handle_np_fault
5001 *
5002 * High level nested paging handler for VMX. Verifies that a fault is for a
5003 * valid memory region, then faults a page, or aborts otherwise.
5004 */
5005int
5006vmx_handle_np_fault(struct vcpu *vcpu)
5007{
5008 uint64_t insn_len = 0, gpa;
5009 int gpa_memtype, ret = 0;
5010 struct vm_exit_eptviolation *vee = &vcpu->vc_exit.vee;
5011
5012 memset(vee, 0, sizeof(*vee))__builtin_memset((vee), (0), (sizeof(*vee)));
5013
5014 if (vmread(VMCS_GUEST_PHYSICAL_ADDRESS0x2400, &gpa)) {
5015 printf("%s: cannot extract faulting pa\n", __func__);
5016 return (EINVAL22);
5017 }
5018
5019 gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa);
5020 switch (gpa_memtype) {
5021 case VMM_MEM_TYPE_REGULAR:
5022 vee->vee_fault_type = VEE_FAULT_HANDLED;
5023 ret = vmx_fault_page(vcpu, gpa);
5024 break;
5025 case VMM_MEM_TYPE_MMIO:
5026 vee->vee_fault_type = VEE_FAULT_MMIO_ASSIST;
5027 if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_len) ||
5028 insn_len == 0 || insn_len > 15) {
5029 printf("%s: failed to extract instruction length\n",
5030 __func__);
5031 ret = EINVAL22;
5032 } else {
5033 vee->vee_insn_len = (uint32_t)insn_len;
5034 vee->vee_insn_info |= VEE_LEN_VALID0x1;
5035 ret = EAGAIN35;
5036 }
5037 break;
5038 default:
5039 printf("%s: unknown memory type %d for GPA 0x%llx\n",
5040 __func__, gpa_memtype, gpa);
5041 return (EINVAL22);
5042 }
5043
5044 return (ret);
5045}
5046
5047/*
5048 * vmm_get_guest_cpu_cpl
5049 *
5050 * Determines current CPL of 'vcpu'. On VMX/Intel, this is gathered from the
5051 * VMCS field for the DPL of SS (this seems odd, but is documented that way
5052 * in the SDM). For SVM/AMD, this is gathered directly from the VMCB's 'cpl'
5053 * field, as per the APM.
5054 *
5055 * Parameters:
5056 * vcpu: guest VCPU for which CPL is to be checked
5057 *
5058 * Return Values:
5059 * -1: the CPL could not be determined
5060 * 0-3 indicating the current CPL. For real mode operation, 0 is returned.
5061 */
5062int
5063vmm_get_guest_cpu_cpl(struct vcpu *vcpu)
5064{
5065 int mode;
5066 struct vmcb *vmcb;
5067 uint64_t ss_ar;
5068
5069 mode = vmm_get_guest_cpu_mode(vcpu);
5070
5071 if (mode == VMM_CPU_MODE_UNKNOWN)
5072 return (-1);
5073
5074 if (mode == VMM_CPU_MODE_REAL)
5075 return (0);
5076
5077 if (vmm_softc->mode == VMM_MODE_RVI) {
5078 vmcb = (struct vmcb *)vcpu->vc_control_va;
5079 return (vmcb->v_cpl);
5080 } else if (vmm_softc->mode == VMM_MODE_EPT) {
5081 if (vmread(VMCS_GUEST_IA32_SS_AR0x4818, &ss_ar))
5082 return (-1);
5083 return ((ss_ar & 0x60) >> 5);
5084 } else
5085 return (-1);
5086}
5087
5088/*
5089 * vmm_get_guest_cpu_mode
5090 *
5091 * Determines current CPU mode of 'vcpu'.
5092 *
5093 * Parameters:
5094 * vcpu: guest VCPU for which mode is to be checked
5095 *
5096 * Return Values:
5097 * One of VMM_CPU_MODE_*, or VMM_CPU_MODE_UNKNOWN if the mode could not be
5098 * ascertained.
5099 */
5100int
5101vmm_get_guest_cpu_mode(struct vcpu *vcpu)
5102{
5103 uint64_t cr0, efer, cs_ar;
5104 uint8_t l, dib;
5105 struct vmcb *vmcb;
5106 struct vmx_msr_store *msr_store;
5107
5108 if (vmm_softc->mode == VMM_MODE_RVI) {
5109 vmcb = (struct vmcb *)vcpu->vc_control_va;
5110 cr0 = vmcb->v_cr0;
5111 efer = vmcb->v_efer;
5112 cs_ar = vmcb->v_cs.vs_attr;
5113 cs_ar = (cs_ar & 0xff) | ((cs_ar << 4) & 0xf000);
5114 } else if (vmm_softc->mode == VMM_MODE_EPT) {
5115 if (vmread(VMCS_GUEST_IA32_CR00x6800, &cr0))
5116 return (VMM_CPU_MODE_UNKNOWN);
5117 if (vmread(VMCS_GUEST_IA32_CS_AR0x4816, &cs_ar))
5118 return (VMM_CPU_MODE_UNKNOWN);
5119 msr_store =
5120 (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
5121 efer = msr_store[VCPU_REGS_EFER0].vms_data;
5122 } else
5123 return (VMM_CPU_MODE_UNKNOWN);
5124
5125 l = (cs_ar & 0x2000) >> 13;
5126 dib = (cs_ar & 0x4000) >> 14;
5127
5128 /* Check CR0.PE */
5129 if (!(cr0 & CR0_PE0x00000001))
5130 return (VMM_CPU_MODE_REAL);
5131
5132 /* Check EFER */
5133 if (efer & EFER_LMA0x00000400) {
5134 /* Could be compat or long mode, check CS.L */
5135 if (l)
5136 return (VMM_CPU_MODE_LONG);
5137 else
5138 return (VMM_CPU_MODE_COMPAT);
5139 }
5140
5141 /* Check prot vs prot32 */
5142 if (dib)
5143 return (VMM_CPU_MODE_PROT32);
5144 else
5145 return (VMM_CPU_MODE_PROT);
5146}
5147
5148/*
5149 * svm_handle_inout
5150 *
5151 * Exit handler for IN/OUT instructions.
5152 *
5153 * Parameters:
5154 * vcpu: The VCPU where the IN/OUT instruction occurred
5155 *
5156 * Return values:
5157 * 0: if successful
5158 * EINVAL: an invalid IN/OUT instruction was encountered
5159 */
5160int
5161svm_handle_inout(struct vcpu *vcpu)
5162{
5163 uint64_t insn_length, exit_qual;
5164 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
5165
5166 insn_length = vmcb->v_exitinfo2 - vmcb->v_rip;
5167 exit_qual = vmcb->v_exitinfo1;
5168
5169 /* Bit 0 - direction */
5170 if (exit_qual & 0x1)
5171 vcpu->vc_exit.vei.vei_dir = VEI_DIR_IN;
5172 else
5173 vcpu->vc_exit.vei.vei_dir = VEI_DIR_OUT;
5174 /* Bit 2 - string instruction? */
5175 vcpu->vc_exit.vei.vei_string = (exit_qual & 0x4) >> 2;
5176 /* Bit 3 - REP prefix? */
5177 vcpu->vc_exit.vei.vei_rep = (exit_qual & 0x8) >> 3;
5178
5179 /* Bits 4:6 - size of exit */
5180 if (exit_qual & 0x10)
5181 vcpu->vc_exit.vei.vei_size = 1;
5182 else if (exit_qual & 0x20)
5183 vcpu->vc_exit.vei.vei_size = 2;
5184 else if (exit_qual & 0x40)
5185 vcpu->vc_exit.vei.vei_size = 4;
5186
5187 /* Bit 16:31 - port */
5188 vcpu->vc_exit.vei.vei_port = (exit_qual & 0xFFFF0000) >> 16;
5189 /* Data */
5190 vcpu->vc_exit.vei.vei_data = vmcb->v_rax;
5191
5192 vcpu->vc_exit.vei.vei_insn_len = (uint8_t)insn_length;
5193
5194 TRACEPOINT(vmm, inout, vcpu, vcpu->vc_exit.vei.vei_port,do { extern struct dt_probe (dt_static_vmm_inout); struct dt_probe
*dtp = &(dt_static_vmm_inout); if (__builtin_expect(((dt_tracing
) != 0), 0) && __builtin_expect(((dtp->dtp_recording
) != 0), 0)) { struct dt_provider *dtpv = dtp->dtp_prov; dtpv
->dtpv_enter(dtpv, dtp, vcpu, vcpu->vc_exit.vei.vei_port
, vcpu->vc_exit.vei.vei_dir, vcpu->vc_exit.vei.vei_data
); } } while (0)
5195 vcpu->vc_exit.vei.vei_dir, vcpu->vc_exit.vei.vei_data)do { extern struct dt_probe (dt_static_vmm_inout); struct dt_probe
*dtp = &(dt_static_vmm_inout); if (__builtin_expect(((dt_tracing
) != 0), 0) && __builtin_expect(((dtp->dtp_recording
) != 0), 0)) { struct dt_provider *dtpv = dtp->dtp_prov; dtpv
->dtpv_enter(dtpv, dtp, vcpu, vcpu->vc_exit.vei.vei_port
, vcpu->vc_exit.vei.vei_dir, vcpu->vc_exit.vei.vei_data
); } } while (0)
;
5196
5197 return (0);
5198}
5199
5200/*
5201 * vmx_handle_inout
5202 *
5203 * Exit handler for IN/OUT instructions.
5204 *
5205 * Parameters:
5206 * vcpu: The VCPU where the IN/OUT instruction occurred
5207 *
5208 * Return values:
5209 * 0: if successful
5210 * EINVAL: invalid IN/OUT instruction or vmread failures occurred
5211 */
5212int
5213vmx_handle_inout(struct vcpu *vcpu)
5214{
5215 uint64_t insn_length, exit_qual;
5216
5217 if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) {
5218 printf("%s: can't obtain instruction length\n", __func__);
5219 return (EINVAL22);
5220 }
5221
5222 if (vmx_get_exit_qualification(&exit_qual)) {
5223 printf("%s: can't get exit qual\n", __func__);
5224 return (EINVAL22);
5225 }
5226
5227 /* Bits 0:2 - size of exit */
5228 vcpu->vc_exit.vei.vei_size = (exit_qual & 0x7) + 1;
5229 /* Bit 3 - direction */
5230 if ((exit_qual & 0x8) >> 3)
5231 vcpu->vc_exit.vei.vei_dir = VEI_DIR_IN;
5232 else
5233 vcpu->vc_exit.vei.vei_dir = VEI_DIR_OUT;
5234 /* Bit 4 - string instruction? */
5235 vcpu->vc_exit.vei.vei_string = (exit_qual & 0x10) >> 4;
5236 /* Bit 5 - REP prefix? */
5237 vcpu->vc_exit.vei.vei_rep = (exit_qual & 0x20) >> 5;
5238 /* Bit 6 - Operand encoding */
5239 vcpu->vc_exit.vei.vei_encoding = (exit_qual & 0x40) >> 6;
5240 /* Bit 16:31 - port */
5241 vcpu->vc_exit.vei.vei_port = (exit_qual & 0xFFFF0000) >> 16;
5242 /* Data */
5243 vcpu->vc_exit.vei.vei_data = (uint32_t)vcpu->vc_gueststate.vg_rax;
5244
5245 vcpu->vc_exit.vei.vei_insn_len = (uint8_t)insn_length;
5246
5247 TRACEPOINT(vmm, inout, vcpu, vcpu->vc_exit.vei.vei_port,do { extern struct dt_probe (dt_static_vmm_inout); struct dt_probe
*dtp = &(dt_static_vmm_inout); if (__builtin_expect(((dt_tracing
) != 0), 0) && __builtin_expect(((dtp->dtp_recording
) != 0), 0)) { struct dt_provider *dtpv = dtp->dtp_prov; dtpv
->dtpv_enter(dtpv, dtp, vcpu, vcpu->vc_exit.vei.vei_port
, vcpu->vc_exit.vei.vei_dir, vcpu->vc_exit.vei.vei_data
); } } while (0)
5248 vcpu->vc_exit.vei.vei_dir, vcpu->vc_exit.vei.vei_data)do { extern struct dt_probe (dt_static_vmm_inout); struct dt_probe
*dtp = &(dt_static_vmm_inout); if (__builtin_expect(((dt_tracing
) != 0), 0) && __builtin_expect(((dtp->dtp_recording
) != 0), 0)) { struct dt_provider *dtpv = dtp->dtp_prov; dtpv
->dtpv_enter(dtpv, dtp, vcpu, vcpu->vc_exit.vei.vei_port
, vcpu->vc_exit.vei.vei_dir, vcpu->vc_exit.vei.vei_data
); } } while (0)
;
5249
5250 return (0);
5251}
5252
5253/*
5254 * vmx_load_pdptes
5255 *
5256 * Update the PDPTEs in the VMCS with the values currently indicated by the
5257 * guest CR3. This is used for 32-bit PAE guests when enabling paging.
5258 *
5259 * Parameters
5260 * vcpu: The vcpu whose PDPTEs should be loaded
5261 *
5262 * Return values:
5263 * 0: if successful
5264 * EINVAL: if the PDPTEs could not be loaded
5265 * ENOMEM: memory allocation failure
5266 */
5267int
5268vmx_load_pdptes(struct vcpu *vcpu)
5269{
5270 uint64_t cr3, cr3_host_phys;
5271 vaddr_t cr3_host_virt;
5272 pd_entry_t *pdptes;
5273 int ret;
5274
5275 if (vmread(VMCS_GUEST_IA32_CR30x6802, &cr3)) {
5276 printf("%s: can't read guest cr3\n", __func__);
5277 return (EINVAL22);
5278 }
5279
5280 if (!pmap_extract(vcpu->vc_parent->vm_map->pmap, (vaddr_t)cr3,
5281 (paddr_t *)&cr3_host_phys)) {
5282 DPRINTF("%s: nonmapped guest CR3, setting PDPTEs to 0\n",
5283 __func__);
5284 if (vmwrite(VMCS_GUEST_PDPTE00x280A, 0)) {
5285 printf("%s: can't write guest PDPTE0\n", __func__);
5286 return (EINVAL22);
5287 }
5288
5289 if (vmwrite(VMCS_GUEST_PDPTE10x280C, 0)) {
5290 printf("%s: can't write guest PDPTE1\n", __func__);
5291 return (EINVAL22);
5292 }
5293
5294 if (vmwrite(VMCS_GUEST_PDPTE20x280E, 0)) {
5295 printf("%s: can't write guest PDPTE2\n", __func__);
5296 return (EINVAL22);
5297 }
5298
5299 if (vmwrite(VMCS_GUEST_PDPTE30x2810, 0)) {
5300 printf("%s: can't write guest PDPTE3\n", __func__);
5301 return (EINVAL22);
5302 }
5303 return (0);
5304 }
5305
5306 ret = 0;
5307
5308 /* We may sleep during km_alloc(9), so reload VMCS. */
5309 vcpu->vc_last_pcpu = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
5310 cr3_host_virt = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_any, &kp_none,
5311 &kd_waitok);
5312 if (vcpu_reload_vmcs_vmx(vcpu)) {
5313 printf("%s: failed to reload vmcs\n", __func__);
5314 ret = EINVAL22;
5315 goto exit;
5316 }
5317
5318 if (!cr3_host_virt) {
5319 printf("%s: can't allocate address for guest CR3 mapping\n",
5320 __func__);
5321 return (ENOMEM12);
5322 }
5323
5324 pmap_kenter_pa(cr3_host_virt, cr3_host_phys, PROT_READ0x01);
5325
5326 pdptes = (pd_entry_t *)cr3_host_virt;
5327 if (vmwrite(VMCS_GUEST_PDPTE00x280A, pdptes[0])) {
5328 printf("%s: can't write guest PDPTE0\n", __func__);
5329 ret = EINVAL22;
5330 goto exit;
5331 }
5332
5333 if (vmwrite(VMCS_GUEST_PDPTE10x280C, pdptes[1])) {
5334 printf("%s: can't write guest PDPTE1\n", __func__);
5335 ret = EINVAL22;
5336 goto exit;
5337 }
5338
5339 if (vmwrite(VMCS_GUEST_PDPTE20x280E, pdptes[2])) {
5340 printf("%s: can't write guest PDPTE2\n", __func__);
5341 ret = EINVAL22;
5342 goto exit;
5343 }
5344
5345 if (vmwrite(VMCS_GUEST_PDPTE30x2810, pdptes[3])) {
5346 printf("%s: can't write guest PDPTE3\n", __func__);
5347 ret = EINVAL22;
5348 goto exit;
5349 }
5350
5351exit:
5352 pmap_kremove(cr3_host_virt, PAGE_SIZE(1 << 12));
5353
5354 /* km_free(9) might sleep, so we need to reload VMCS. */
5355 vcpu->vc_last_pcpu = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
5356 km_free((void *)cr3_host_virt, PAGE_SIZE(1 << 12), &kv_any, &kp_none);
5357 if (vcpu_reload_vmcs_vmx(vcpu)) {
5358 printf("%s: failed to reload vmcs after km_free\n", __func__);
5359 ret = EINVAL22;
5360 }
5361
5362 return (ret);
5363}
5364
5365/*
5366 * vmx_handle_cr0_write
5367 *
5368 * Write handler for CR0. This function ensures valid values are written into
5369 * CR0 for the cpu/vmm mode in use (cr0 must-be-0 and must-be-1 bits, etc).
5370 *
5371 * Parameters
5372 * vcpu: The vcpu taking the cr0 write exit
5373 * r: The guest's desired (incoming) cr0 value
5374 *
5375 * Return values:
5376 * 0: if successful
5377 * EINVAL: if an error occurred
5378 */
5379int
5380vmx_handle_cr0_write(struct vcpu *vcpu, uint64_t r)
5381{
5382 struct vmx_msr_store *msr_store;
5383 struct vmx_invvpid_descriptor vid;
5384 uint64_t ectls, oldcr0, cr4, mask;
5385 int ret;
5386
5387 /* Check must-be-0 bits */
5388 mask = vcpu->vc_vmx_cr0_fixed1;
5389 if (~r & mask) {
5390 /* Inject #GP, let the guest handle it */
5391 DPRINTF("%s: guest set invalid bits in %%cr0. Zeros "
5392 "mask=0x%llx, data=0x%llx\n", __func__,
5393 vcpu->vc_vmx_cr0_fixed1, r);
5394 vmm_inject_gp(vcpu);
5395 return (0);
5396 }
5397
5398 /* Check must-be-1 bits */
5399 mask = vcpu->vc_vmx_cr0_fixed0;
5400 if ((r & mask) != mask) {
5401 /* Inject #GP, let the guest handle it */
5402 DPRINTF("%s: guest set invalid bits in %%cr0. Ones "
5403 "mask=0x%llx, data=0x%llx\n", __func__,
5404 vcpu->vc_vmx_cr0_fixed0, r);
5405 vmm_inject_gp(vcpu);
5406 return (0);
5407 }
5408
5409 if (r & 0xFFFFFFFF00000000ULL) {
5410 DPRINTF("%s: setting bits 63:32 of %%cr0 is invalid,"
5411 " inject #GP, cr0=0x%llx\n", __func__, r);
5412 vmm_inject_gp(vcpu);
5413 return (0);
5414 }
5415
5416 if ((r & CR0_PG0x80000000) && (r & CR0_PE0x00000001) == 0) {
5417 DPRINTF("%s: PG flag set when the PE flag is clear,"
5418 " inject #GP, cr0=0x%llx\n", __func__, r);
5419 vmm_inject_gp(vcpu);
5420 return (0);
5421 }
5422
5423 if ((r & CR0_NW0x20000000) && (r & CR0_CD0x40000000) == 0) {
5424 DPRINTF("%s: NW flag set when the CD flag is clear,"
5425 " inject #GP, cr0=0x%llx\n", __func__, r);
5426 vmm_inject_gp(vcpu);
5427 return (0);
5428 }
5429
5430 if (vmread(VMCS_GUEST_IA32_CR00x6800, &oldcr0)) {
5431 printf("%s: can't read guest cr0\n", __func__);
5432 return (EINVAL22);
5433 }
5434
5435 /* CR0 must always have NE set */
5436 r |= CR0_NE0x00000020;
5437
5438 if (vmwrite(VMCS_GUEST_IA32_CR00x6800, r)) {
5439 printf("%s: can't write guest cr0\n", __func__);
5440 return (EINVAL22);
5441 }
5442
5443 /* If the guest hasn't enabled paging ... */
5444 if (!(r & CR0_PG0x80000000) && (oldcr0 & CR0_PG0x80000000)) {
5445 /* Paging was disabled (prev. enabled) - Flush TLB */
5446 if (vmm_softc->mode == VMM_MODE_EPT &&
5447 vcpu->vc_vmx_vpid_enabled) {
5448 vid.vid_vpid = vcpu->vc_vpid;
5449 vid.vid_addr = 0;
5450 invvpid(IA32_VMX_INVVPID_SINGLE_CTX_GLB0x3, &vid);
5451 }
5452 } else if (!(oldcr0 & CR0_PG0x80000000) && (r & CR0_PG0x80000000)) {
5453 /*
5454 * Since the guest has enabled paging, then the IA32_VMX_IA32E_MODE_GUEST
5455 * control must be set to the same as EFER_LME.
5456 */
5457 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
5458
5459 if (vmread(VMCS_ENTRY_CTLS0x4012, &ectls)) {
5460 printf("%s: can't read entry controls", __func__);
5461 return (EINVAL22);
5462 }
5463
5464 if (msr_store[VCPU_REGS_EFER0].vms_data & EFER_LME0x00000100)
5465 ectls |= IA32_VMX_IA32E_MODE_GUEST(1ULL << 9);
5466 else
5467 ectls &= ~IA32_VMX_IA32E_MODE_GUEST(1ULL << 9);
5468
5469 if (vmwrite(VMCS_ENTRY_CTLS0x4012, ectls)) {
5470 printf("%s: can't write entry controls", __func__);
5471 return (EINVAL22);
5472 }
5473
5474 if (vmread(VMCS_GUEST_IA32_CR40x6804, &cr4)) {
5475 printf("%s: can't read guest cr4\n", __func__);
5476 return (EINVAL22);
5477 }
5478
5479 /* Load PDPTEs if PAE guest enabling paging */
5480 if (cr4 & CR4_PAE0x00000020) {
5481 ret = vmx_load_pdptes(vcpu);
5482
5483 if (ret) {
5484 printf("%s: updating PDPTEs failed\n", __func__);
5485 return (ret);
5486 }
5487 }
5488 }
5489
5490 return (0);
5491}
5492
5493/*
5494 * vmx_handle_cr4_write
5495 *
5496 * Write handler for CR4. This function ensures valid values are written into
5497 * CR4 for the cpu/vmm mode in use (cr4 must-be-0 and must-be-1 bits, etc).
5498 *
5499 * Parameters
5500 * vcpu: The vcpu taking the cr4 write exit
5501 * r: The guest's desired (incoming) cr4 value
5502 *
5503 * Return values:
5504 * 0: if successful
5505 * EINVAL: if an error occurred
5506 */
5507int
5508vmx_handle_cr4_write(struct vcpu *vcpu, uint64_t r)
5509{
5510 uint64_t mask;
5511
5512 /* Check must-be-0 bits */
5513 mask = ~(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
5514 if (r & mask) {
5515 /* Inject #GP, let the guest handle it */
5516 DPRINTF("%s: guest set invalid bits in %%cr4. Zeros "
5517 "mask=0x%llx, data=0x%llx\n", __func__,
5518 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1,
5519 r);
5520 vmm_inject_gp(vcpu);
5521 return (0);
5522 }
5523
5524 /* Check must-be-1 bits */
5525 mask = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0;
5526 if ((r & mask) != mask) {
5527 /* Inject #GP, let the guest handle it */
5528 DPRINTF("%s: guest set invalid bits in %%cr4. Ones "
5529 "mask=0x%llx, data=0x%llx\n", __func__,
5530 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0,
5531 r);
5532 vmm_inject_gp(vcpu);
5533 return (0);
5534 }
5535
5536 /* CR4_VMXE must always be enabled */
5537 r |= CR4_VMXE0x00002000;
5538
5539 if (vmwrite(VMCS_GUEST_IA32_CR40x6804, r)) {
5540 printf("%s: can't write guest cr4\n", __func__);
5541 return (EINVAL22);
5542 }
5543
5544 return (0);
5545}
5546
5547/*
5548 * vmx_handle_cr
5549 *
5550 * Handle reads/writes to control registers (except CR3)
5551 */
5552int
5553vmx_handle_cr(struct vcpu *vcpu)
5554{
5555 uint64_t insn_length, exit_qual, r;
5556 uint8_t crnum, dir, reg;
5557
5558 if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) {
5559 printf("%s: can't obtain instruction length\n", __func__);
5560 return (EINVAL22);
5561 }
5562
5563 if (vmx_get_exit_qualification(&exit_qual)) {
5564 printf("%s: can't get exit qual\n", __func__);
5565 return (EINVAL22);
5566 }
5567
5568 /* Low 4 bits of exit_qual represent the CR number */
5569 crnum = exit_qual & 0xf;
5570
5571 /*
5572 * Bits 5:4 indicate the direction of operation (or special CR-modifying
5573 * instruction)
5574 */
5575 dir = (exit_qual & 0x30) >> 4;
5576
5577 /* Bits 11:8 encode the source/target register */
5578 reg = (exit_qual & 0xf00) >> 8;
5579
5580 switch (dir) {
5581 case CR_WRITE0:
5582 if (crnum == 0 || crnum == 4) {
5583 switch (reg) {
5584 case 0: r = vcpu->vc_gueststate.vg_rax; break;
5585 case 1: r = vcpu->vc_gueststate.vg_rcx; break;
5586 case 2: r = vcpu->vc_gueststate.vg_rdx; break;
5587 case 3: r = vcpu->vc_gueststate.vg_rbx; break;
5588 case 4: if (vmread(VMCS_GUEST_IA32_RSP0x681C, &r)) {
5589 printf("%s: unable to read guest "
5590 "RSP\n", __func__);
5591 return (EINVAL22);
5592 }
5593 break;
5594 case 5: r = vcpu->vc_gueststate.vg_rbp; break;
5595 case 6: r = vcpu->vc_gueststate.vg_rsi; break;
5596 case 7: r = vcpu->vc_gueststate.vg_rdi; break;
5597 case 8: r = vcpu->vc_gueststate.vg_r8; break;
5598 case 9: r = vcpu->vc_gueststate.vg_r9; break;
5599 case 10: r = vcpu->vc_gueststate.vg_r10; break;
5600 case 11: r = vcpu->vc_gueststate.vg_r11; break;
5601 case 12: r = vcpu->vc_gueststate.vg_r12; break;
5602 case 13: r = vcpu->vc_gueststate.vg_r13; break;
5603 case 14: r = vcpu->vc_gueststate.vg_r14; break;
5604 case 15: r = vcpu->vc_gueststate.vg_r15; break;
5605 }
5606 DPRINTF("%s: mov to cr%d @ %llx, data=0x%llx\n",
5607 __func__, crnum, vcpu->vc_gueststate.vg_rip, r);
5608 }
5609
5610 if (crnum == 0)
5611 vmx_handle_cr0_write(vcpu, r);
5612
5613 if (crnum == 4)
5614 vmx_handle_cr4_write(vcpu, r);
5615
5616 break;
5617 case CR_READ1:
5618 DPRINTF("%s: mov from cr%d @ %llx\n", __func__, crnum,
5619 vcpu->vc_gueststate.vg_rip);
5620 break;
5621 case CR_CLTS2:
5622 DPRINTF("%s: clts instruction @ %llx\n", __func__,
5623 vcpu->vc_gueststate.vg_rip);
5624 break;
5625 case CR_LMSW3:
5626 DPRINTF("%s: lmsw instruction @ %llx\n", __func__,
5627 vcpu->vc_gueststate.vg_rip);
5628 break;
5629 default:
5630 DPRINTF("%s: unknown cr access @ %llx\n", __func__,
5631 vcpu->vc_gueststate.vg_rip);
5632 }
5633
5634 vcpu->vc_gueststate.vg_rip += insn_length;
5635
5636 return (0);
5637}
5638
5639/*
5640 * vmx_handle_rdmsr
5641 *
5642 * Handler for rdmsr instructions. Bitmap MSRs are allowed implicit access
5643 * and won't end up here. This handler is primarily intended to catch otherwise
5644 * unknown MSR access for possible later inclusion in the bitmap list. For
5645 * each MSR access that ends up here, we log the access (when VMM_DEBUG is
5646 * enabled)
5647 *
5648 * Parameters:
5649 * vcpu: vcpu structure containing instruction info causing the exit
5650 *
5651 * Return value:
5652 * 0: The operation was successful
5653 * EINVAL: An error occurred
5654 */
5655int
5656vmx_handle_rdmsr(struct vcpu *vcpu)
5657{
5658 uint64_t insn_length;
5659 uint64_t *rax, *rdx;
5660 uint64_t *rcx;
5661 int ret;
5662
5663 if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) {
5664 printf("%s: can't obtain instruction length\n", __func__);
5665 return (EINVAL22);
5666 }
5667
5668 if (insn_length != 2) {
5669 DPRINTF("%s: RDMSR with instruction length %lld not "
5670 "supported\n", __func__, insn_length);
5671 return (EINVAL22);
5672 }
5673
5674 rax = &vcpu->vc_gueststate.vg_rax;
5675 rcx = &vcpu->vc_gueststate.vg_rcx;
5676 rdx = &vcpu->vc_gueststate.vg_rdx;
5677
5678 switch (*rcx) {
5679 case MSR_BIOS_SIGN0x08b:
5680 case MSR_PLATFORM_ID0x017:
5681 /* Ignored */
5682 *rax = 0;
5683 *rdx = 0;
5684 break;
5685 case MSR_CR_PAT0x277:
5686 *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
5687 *rdx = (vcpu->vc_shadow_pat >> 32);
5688 break;
5689 default:
5690 /* Unsupported MSRs causes #GP exception, don't advance %rip */
5691 DPRINTF("%s: unsupported rdmsr (msr=0x%llx), injecting #GP\n",
5692 __func__, *rcx);
5693 ret = vmm_inject_gp(vcpu);
5694 return (ret);
5695 }
5696
5697 vcpu->vc_gueststate.vg_rip += insn_length;
5698
5699 return (0);
5700}
5701
5702/*
5703 * vmx_handle_xsetbv
5704 *
5705 * VMX-specific part of the xsetbv instruction exit handler
5706 *
5707 * Parameters:
5708 * vcpu: vcpu structure containing instruction info causing the exit
5709 *
5710 * Return value:
5711 * 0: The operation was successful
5712 * EINVAL: An error occurred
5713 */
5714int
5715vmx_handle_xsetbv(struct vcpu *vcpu)
5716{
5717 uint64_t insn_length, *rax;
5718 int ret;
5719
5720 if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) {
5721 printf("%s: can't obtain instruction length\n", __func__);
5722 return (EINVAL22);
5723 }
5724
5725 /* All XSETBV instructions are 3 bytes */
5726 if (insn_length != 3) {
5727 DPRINTF("%s: XSETBV with instruction length %lld not "
5728 "supported\n", __func__, insn_length);
5729 return (EINVAL22);
5730 }
5731
5732 rax = &vcpu->vc_gueststate.vg_rax;
5733
5734 ret = vmm_handle_xsetbv(vcpu, rax);
5735
5736 vcpu->vc_gueststate.vg_rip += insn_length;
5737
5738 return ret;
5739}
5740
5741/*
5742 * svm_handle_xsetbv
5743 *
5744 * SVM-specific part of the xsetbv instruction exit handler
5745 *
5746 * Parameters:
5747 * vcpu: vcpu structure containing instruction info causing the exit
5748 *
5749 * Return value:
5750 * 0: The operation was successful
5751 * EINVAL: An error occurred
5752 */
5753int
5754svm_handle_xsetbv(struct vcpu *vcpu)
5755{
5756 uint64_t insn_length, *rax;
5757 int ret;
5758 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
5759
5760 /* All XSETBV instructions are 3 bytes */
5761 insn_length = 3;
5762
5763 rax = &vmcb->v_rax;
5764
5765 ret = vmm_handle_xsetbv(vcpu, rax);
5766
5767 vcpu->vc_gueststate.vg_rip += insn_length;
5768
5769 return ret;
5770}
5771
5772/*
5773 * vmm_handle_xsetbv
5774 *
5775 * Handler for xsetbv instructions. We allow the guest VM to set xcr0 values
5776 * limited to the xsave_mask in use in the host.
5777 *
5778 * Parameters:
5779 * vcpu: vcpu structure containing instruction info causing the exit
5780 * rax: pointer to guest %rax
5781 *
5782 * Return value:
5783 * 0: The operation was successful
5784 * EINVAL: An error occurred
5785 */
5786int
5787vmm_handle_xsetbv(struct vcpu *vcpu, uint64_t *rax)
5788{
5789 uint64_t *rdx, *rcx, val;
5790
5791 rcx = &vcpu->vc_gueststate.vg_rcx;
5792 rdx = &vcpu->vc_gueststate.vg_rdx;
5793
5794 if (vmm_get_guest_cpu_cpl(vcpu) != 0) {
5795 DPRINTF("%s: guest cpl not zero\n", __func__);
5796 return (vmm_inject_gp(vcpu));
5797 }
5798
5799 if (*rcx != 0) {
5800 DPRINTF("%s: guest specified invalid xcr register number "
5801 "%lld\n", __func__, *rcx);
5802 return (vmm_inject_gp(vcpu));
5803 }
5804
5805 val = *rax + (*rdx << 32);
5806 if (val & ~xsave_mask) {
5807 DPRINTF("%s: guest specified xcr0 outside xsave_mask %lld\n",
5808 __func__, val);
5809 return (vmm_inject_gp(vcpu));
5810 }
5811
5812 vcpu->vc_gueststate.vg_xcr0 = val;
5813
5814 return (0);
5815}
5816
5817/*
5818 * vmx_handle_misc_enable_msr
5819 *
5820 * Handler for writes to the MSR_MISC_ENABLE (0x1a0) MSR on Intel CPUs. We
5821 * limit what the guest can write to this MSR (certain hardware-related
5822 * settings like speedstep, etc).
5823 *
5824 * Parameters:
5825 * vcpu: vcpu structure containing information about the wrmsr causing this
5826 * exit
5827 */
5828void
5829vmx_handle_misc_enable_msr(struct vcpu *vcpu)
5830{
5831 uint64_t *rax, *rdx;
5832 struct vmx_msr_store *msr_store;
5833
5834 rax = &vcpu->vc_gueststate.vg_rax;
5835 rdx = &vcpu->vc_gueststate.vg_rdx;
5836 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
5837
5838 /* Filter out guest writes to TCC, EIST, and xTPR */
5839 *rax &= ~(MISC_ENABLE_TCC(1 << 3) | MISC_ENABLE_EIST_ENABLED(1 << 16) |
5840 MISC_ENABLE_xTPR_MESSAGE_DISABLE(1 << 23));
5841
5842 msr_store[VCPU_REGS_MISC_ENABLE6].vms_data = *rax | (*rdx << 32);
5843}
5844
5845/*
5846 * vmx_handle_wrmsr
5847 *
5848 * Handler for wrmsr instructions. This handler logs the access, and discards
5849 * the written data (when VMM_DEBUG is enabled). Any valid wrmsr will not end
5850 * up here (it will be whitelisted in the MSR bitmap).
5851 *
5852 * Parameters:
5853 * vcpu: vcpu structure containing instruction info causing the exit
5854 *
5855 * Return value:
5856 * 0: The operation was successful
5857 * EINVAL: An error occurred
5858 */
5859int
5860vmx_handle_wrmsr(struct vcpu *vcpu)
5861{
5862 uint64_t insn_length, val;
5863 uint64_t *rax, *rdx, *rcx;
5864 int ret;
5865
5866 if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) {
5867 printf("%s: can't obtain instruction length\n", __func__);
5868 return (EINVAL22);
5869 }
5870
5871 if (insn_length != 2) {
5872 DPRINTF("%s: WRMSR with instruction length %lld not "
5873 "supported\n", __func__, insn_length);
5874 return (EINVAL22);
5875 }
5876
5877 rax = &vcpu->vc_gueststate.vg_rax;
5878 rcx = &vcpu->vc_gueststate.vg_rcx;
5879 rdx = &vcpu->vc_gueststate.vg_rdx;
5880 val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
5881
5882 switch (*rcx) {
5883 case MSR_CR_PAT0x277:
5884 if (!vmm_pat_is_valid(val)) {
5885 ret = vmm_inject_gp(vcpu);
5886 return (ret);
5887 }
5888 vcpu->vc_shadow_pat = val;
5889 break;
5890 case MSR_MISC_ENABLE0x1a0:
5891 vmx_handle_misc_enable_msr(vcpu);
5892 break;
5893 case MSR_SMM_MONITOR_CTL0x09b:
5894 /*
5895 * 34.15.5 - Enabling dual monitor treatment
5896 *
5897 * Unsupported, so inject #GP and return without
5898 * advancing %rip.
5899 */
5900 ret = vmm_inject_gp(vcpu);
5901 return (ret);
5902 case KVM_MSR_SYSTEM_TIME0x4b564d01:
5903 vmm_init_pvclock(vcpu,
5904 (*rax & 0xFFFFFFFFULL) | (*rdx << 32));
5905 break;
5906#ifdef VMM_DEBUG
5907 default:
5908 /*
5909 * Log the access, to be able to identify unknown MSRs
5910 */
5911 DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data "
5912 "written from guest=0x%llx:0x%llx\n", __func__,
5913 *rcx, *rdx, *rax);
5914#endif /* VMM_DEBUG */
5915 }
5916
5917 vcpu->vc_gueststate.vg_rip += insn_length;
5918
5919 return (0);
5920}
5921
5922/*
5923 * svm_handle_msr
5924 *
5925 * Handler for MSR instructions.
5926 *
5927 * Parameters:
5928 * vcpu: vcpu structure containing instruction info causing the exit
5929 *
5930 * Return value:
5931 * Always 0 (successful)
5932 */
5933int
5934svm_handle_msr(struct vcpu *vcpu)
5935{
5936 uint64_t insn_length, val;
5937 uint64_t *rax, *rcx, *rdx;
5938 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
5939 int ret;
5940
5941 /* XXX: Validate RDMSR / WRMSR insn_length */
5942 insn_length = 2;
5943
5944 rax = &vmcb->v_rax;
5945 rcx = &vcpu->vc_gueststate.vg_rcx;
5946 rdx = &vcpu->vc_gueststate.vg_rdx;
5947
5948 if (vmcb->v_exitinfo1 == 1) {
5949 /* WRMSR */
5950 val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
5951
5952 switch (*rcx) {
5953 case MSR_CR_PAT0x277:
5954 if (!vmm_pat_is_valid(val)) {
5955 ret = vmm_inject_gp(vcpu);
5956 return (ret);
5957 }
5958 vcpu->vc_shadow_pat = val;
5959 break;
5960 case MSR_EFER0xc0000080:
5961 vmcb->v_efer = *rax | EFER_SVME0x00001000;
5962 break;
5963 case KVM_MSR_SYSTEM_TIME0x4b564d01:
5964 vmm_init_pvclock(vcpu,
5965 (*rax & 0xFFFFFFFFULL) | (*rdx << 32));
5966 break;
5967 default:
5968 /* Log the access, to be able to identify unknown MSRs */
5969 DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data "
5970 "written from guest=0x%llx:0x%llx\n", __func__,
5971 *rcx, *rdx, *rax);
5972 }
5973 } else {
5974 /* RDMSR */
5975 switch (*rcx) {
5976 case MSR_BIOS_SIGN0x08b:
5977 case MSR_INT_PEN_MSG0xc0010055:
5978 case MSR_PLATFORM_ID0x017:
5979 /* Ignored */
5980 *rax = 0;
5981 *rdx = 0;
5982 break;
5983 case MSR_CR_PAT0x277:
5984 *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
5985 *rdx = (vcpu->vc_shadow_pat >> 32);
5986 break;
5987 case MSR_DE_CFG0xc0011029:
5988 /* LFENCE serializing bit is set by host */
5989 *rax = DE_CFG_SERIALIZE_LFENCE(1 << 1);
5990 *rdx = 0;
5991 break;
5992 default:
5993 /*
5994 * Unsupported MSRs causes #GP exception, don't advance
5995 * %rip
5996 */
5997 DPRINTF("%s: unsupported rdmsr (msr=0x%llx), "
5998 "injecting #GP\n", __func__, *rcx);
5999 ret = vmm_inject_gp(vcpu);
6000 return (ret);
6001 }
6002 }
6003
6004 vcpu->vc_gueststate.vg_rip += insn_length;
6005
6006 return (0);
6007}
6008
6009/*
6010 * vmm_handle_cpuid
6011 *
6012 * Exit handler for CPUID instruction
6013 *
6014 * Parameters:
6015 * vcpu: vcpu causing the CPUID exit
6016 *
6017 * Return value:
6018 * 0: the exit was processed successfully
6019 * EINVAL: error occurred validating the CPUID instruction arguments
6020 */
6021int
6022vmm_handle_cpuid(struct vcpu *vcpu)
6023{
6024 uint64_t insn_length, cr4;
6025 uint64_t *rax, *rbx, *rcx, *rdx;
6026 struct vmcb *vmcb;
6027 uint32_t leaf, subleaf, eax, ebx, ecx, edx;
6028 struct vmx_msr_store *msr_store;
6029 int vmm_cpuid_level;
6030
6031 /* what's the cpuid level we support/advertise? */
6032 vmm_cpuid_level = cpuid_level;
6033 if (vmm_cpuid_level < 0x15 && tsc_is_invariant)
6034 vmm_cpuid_level = 0x15;
6035
6036 if (vmm_softc->mode == VMM_MODE_EPT) {
6037 if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) {
6038 DPRINTF("%s: can't obtain instruction length\n",
6039 __func__);
6040 return (EINVAL22);
6041 }
6042
6043 if (vmread(VMCS_GUEST_IA32_CR40x6804, &cr4)) {
6044 DPRINTF("%s: can't obtain cr4\n", __func__);
6045 return (EINVAL22);
6046 }
6047
6048 rax = &vcpu->vc_gueststate.vg_rax;
6049
6050 /*
6051 * "CPUID leaves above 02H and below 80000000H are only
6052 * visible when IA32_MISC_ENABLE MSR has bit 22 set to its
6053 * default value 0"
6054 */
6055 msr_store =
6056 (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
6057 if (msr_store[VCPU_REGS_MISC_ENABLE6].vms_data &
6058 MISC_ENABLE_LIMIT_CPUID_MAXVAL(1 << 22))
6059 vmm_cpuid_level = 0x02;
6060 } else {
6061 /* XXX: validate insn_length 2 */
6062 insn_length = 2;
6063 vmcb = (struct vmcb *)vcpu->vc_control_va;
6064 rax = &vmcb->v_rax;
6065 cr4 = vmcb->v_cr4;
6066 }
6067
6068 rbx = &vcpu->vc_gueststate.vg_rbx;
6069 rcx = &vcpu->vc_gueststate.vg_rcx;
6070 rdx = &vcpu->vc_gueststate.vg_rdx;
6071 vcpu->vc_gueststate.vg_rip += insn_length;
6072
6073 leaf = *rax;
6074 subleaf = *rcx;
6075
6076 /*
6077 * "If a value entered for CPUID.EAX is higher than the maximum input
6078 * value for basic or extended function for that processor then the
6079 * data for the highest basic information leaf is returned."
6080 *
6081 * "When CPUID returns the highest basic leaf information as a result
6082 * of an invalid input EAX value, any dependence on input ECX value
6083 * in the basic leaf is honored."
6084 *
6085 * This means if leaf is between vmm_cpuid_level and 0x40000000 (the start
6086 * of the hypervisor info leaves), clamp to vmm_cpuid_level, but without
6087 * altering subleaf. Also, if leaf is greater than the extended function
6088 * info, clamp also to vmm_cpuid_level.
6089 */
6090 if ((leaf > vmm_cpuid_level && leaf < 0x40000000) ||
6091 (leaf > curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_pnfeatset)) {
6092 DPRINTF("%s: invalid cpuid input leaf 0x%x, guest rip="
6093 "0x%llx - resetting to 0x%x\n", __func__, leaf,
6094 vcpu->vc_gueststate.vg_rip - insn_length,
6095 vmm_cpuid_level);
6096 leaf = vmm_cpuid_level;
6097 }
6098
6099 /* we fake up values in the range (cpuid_level, vmm_cpuid_level] */
6100 if (leaf <= cpuid_level || leaf > 0x80000000)
6101 CPUID_LEAF(leaf, subleaf, eax, ebx, ecx, edx)__asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d"
(edx) : "a" (leaf), "c" (subleaf))
;
6102 else
6103 eax = ebx = ecx = edx = 0;
6104
6105 switch (leaf) {
6106 case 0x00: /* Max level and vendor ID */
6107 *rax = vmm_cpuid_level;
6108 *rbx = *((uint32_t *)&cpu_vendor);
6109 *rdx = *((uint32_t *)&cpu_vendor + 1);
6110 *rcx = *((uint32_t *)&cpu_vendor + 2);
6111 break;
6112 case 0x01: /* Version, brand, feature info */
6113 *rax = cpu_id;
6114 /* mask off host's APIC ID, reset to vcpu id */
6115 *rbx = cpu_ebxfeature & 0x0000FFFF;
6116 *rbx |= (vcpu->vc_id & 0xFF) << 24;
6117 *rcx = (cpu_ecxfeature | CPUIDECX_HV0x80000000) & VMM_CPUIDECX_MASK~(0x00000080 | 0x00000100 | 0x00000008 | 0x00008000 | 0x00000020
| 0x00000004 | 0x00000010 | 0x00000040 | 0x00000400 | 0x00000800
| 0x00004000 | 0x00020000 | 0x00040000 | 0x00200000 | 0x01000000
)
;
6118
6119 /* Guest CR4.OSXSAVE determines presence of CPUIDECX_OSXSAVE */
6120 if (cr4 & CR4_OSXSAVE0x00040000)
6121 *rcx |= CPUIDECX_OSXSAVE0x08000000;
6122 else
6123 *rcx &= ~CPUIDECX_OSXSAVE0x08000000;
6124
6125 *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_feature_flags & VMM_CPUIDEDX_MASK~(0x00400000 | 0x20000000 | 0x10000000 | 0x00200000 | 0x00000200
| 0x00040000 | 0x08000000 | 0x80000000 | 0x00001000 | 0x00000080
| 0x00004000)
;
6126 break;
6127 case 0x02: /* Cache and TLB information */
6128 *rax = eax;
6129 *rbx = ebx;
6130 *rcx = ecx;
6131 *rdx = edx;
6132 break;
6133 case 0x03: /* Processor serial number (not supported) */
6134 DPRINTF("%s: function 0x03 (processor serial number) not "
6135 "supported\n", __func__);
6136 *rax = 0;
6137 *rbx = 0;
6138 *rcx = 0;
6139 *rdx = 0;
6140 break;
6141 case 0x04: /* Deterministic cache info */
6142 *rax = eax & VMM_CPUID4_CACHE_TOPOLOGY_MASK0x3FF;
6143 *rbx = ebx;
6144 *rcx = ecx;
6145 *rdx = edx;
6146 break;
6147 case 0x05: /* MONITOR/MWAIT (not supported) */
6148 DPRINTF("%s: function 0x05 (monitor/mwait) not supported\n",
6149 __func__);
6150 *rax = 0;
6151 *rbx = 0;
6152 *rcx = 0;
6153 *rdx = 0;
6154 break;
6155 case 0x06: /* Thermal / Power management (not supported) */
6156 DPRINTF("%s: function 0x06 (thermal/power mgt) not supported\n",
6157 __func__);
6158 *rax = 0;
6159 *rbx = 0;
6160 *rcx = 0;
6161 *rdx = 0;
6162 break;
6163 case 0x07: /* SEFF */
6164 if (subleaf == 0) {
6165 *rax = 0; /* Highest subleaf supported */
6166 *rbx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_feature_sefflags_ebx & VMM_SEFF0EBX_MASK~(0x00000002 | 0x00000004 | 0x00000010 | 0x00000400 | 0x00000800
| 0x00001000 | 0x00004000 | 0x00400000 | 0x02000000 | 0x00010000
| 0x00020000 | 0x00200000 | 0x04000000 | 0x08000000 | 0x10000000
| 0x40000000 | 0x80000000)
;
6167 *rcx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_feature_sefflags_ecx & VMM_SEFF0ECX_MASK(0x00000004);
6168 *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_feature_sefflags_edx & VMM_SEFF0EDX_MASK(0x00000400);
6169 /*
6170 * Only expose PKU support if we've detected it in use
6171 * on the host.
6172 */
6173 if (vmm_softc->sc_md.pkru_enabled)
6174 *rcx |= SEFF0ECX_PKU0x00000008;
6175 else
6176 *rcx &= ~SEFF0ECX_PKU0x00000008;
6177
6178 /* Expose IBT bit if we've enabled CET on the host. */
6179 if (rcr4() & CR4_CET0x00800000)
6180 *rdx |= SEFF0EDX_IBT0x00100000;
6181 else
6182 *rdx &= ~SEFF0EDX_IBT0x00100000;
6183
6184 } else {
6185 /* Unsupported subleaf */
6186 DPRINTF("%s: function 0x07 (SEFF) unsupported subleaf "
6187 "0x%x not supported\n", __func__, subleaf);
6188 *rax = 0;
6189 *rbx = 0;
6190 *rcx = 0;
6191 *rdx = 0;
6192 }
6193 break;
6194 case 0x09: /* Direct Cache Access (not supported) */
6195 DPRINTF("%s: function 0x09 (direct cache access) not "
6196 "supported\n", __func__);
6197 *rax = 0;
6198 *rbx = 0;
6199 *rcx = 0;
6200 *rdx = 0;
6201 break;
6202 case 0x0a: /* Architectural perf monitoring (not supported) */
6203 DPRINTF("%s: function 0x0a (arch. perf mon) not supported\n",
6204 __func__);
6205 *rax = 0;
6206 *rbx = 0;
6207 *rcx = 0;
6208 *rdx = 0;
6209 break;
6210 case 0x0b: /* Extended topology enumeration (not supported) */
6211 DPRINTF("%s: function 0x0b (topology enumeration) not "
6212 "supported\n", __func__);
6213 *rax = 0;
6214 *rbx = 0;
6215 *rcx = 0;
6216 *rdx = 0;
6217 break;
6218 case 0x0d: /* Processor ext. state information */
6219 if (subleaf == 0) {
6220 *rax = xsave_mask;
6221 *rbx = ebx;
6222 *rcx = ecx;
6223 *rdx = edx;
6224 } else if (subleaf == 1) {
6225 *rax = 0;
6226 *rbx = 0;
6227 *rcx = 0;
6228 *rdx = 0;
6229 } else {
6230 *rax = eax;
6231 *rbx = ebx;
6232 *rcx = ecx;
6233 *rdx = edx;
6234 }
6235 break;
6236 case 0x0f: /* QoS info (not supported) */
6237 DPRINTF("%s: function 0x0f (QoS info) not supported\n",
6238 __func__);
6239 *rax = 0;
6240 *rbx = 0;
6241 *rcx = 0;
6242 *rdx = 0;
6243 break;
6244 case 0x14: /* Processor Trace info (not supported) */
6245 DPRINTF("%s: function 0x14 (processor trace info) not "
6246 "supported\n", __func__);
6247 *rax = 0;
6248 *rbx = 0;
6249 *rcx = 0;
6250 *rdx = 0;
6251 break;
6252 case 0x15:
6253 if (cpuid_level >= 0x15) {
6254 *rax = eax;
6255 *rbx = ebx;
6256 *rcx = ecx;
6257 *rdx = edx;
6258 } else {
6259 KASSERT(tsc_is_invariant)((tsc_is_invariant) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm_machdep.c"
, 6259, "tsc_is_invariant"))
;
6260 *rax = 1;
6261 *rbx = 100;
6262 *rcx = tsc_frequency / 100;
6263 *rdx = 0;
6264 }
6265 break;
6266 case 0x16: /* Processor frequency info */
6267 *rax = eax;
6268 *rbx = ebx;
6269 *rcx = ecx;
6270 *rdx = edx;
6271 break;
6272 case 0x40000000: /* Hypervisor information */
6273 *rax = 0;
6274 *rbx = *((uint32_t *)&vmm_hv_signature[0]);
6275 *rcx = *((uint32_t *)&vmm_hv_signature[4]);
6276 *rdx = *((uint32_t *)&vmm_hv_signature[8]);
6277 break;
6278 case 0x40000001: /* KVM hypervisor features */
6279 *rax = (1 << KVM_FEATURE_CLOCKSOURCE23) |
6280 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT24);
6281 *rbx = 0;
6282 *rcx = 0;
6283 *rdx = 0;
6284 break;
6285 case 0x80000000: /* Extended function level */
6286 *rax = 0x80000008; /* curcpu()->ci_pnfeatset */
6287 *rbx = 0;
6288 *rcx = 0;
6289 *rdx = 0;
6290 break;
6291 case 0x80000001: /* Extended function info */
6292 *rax = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_efeature_eax;
6293 *rbx = 0; /* Reserved */
6294 *rcx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_efeature_ecx & VMM_ECPUIDECX_MASK~(0x00000004 | 0x20000000);
6295 *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_feature_eflags & VMM_FEAT_EFLAGS_MASK~(0x08000000);
6296 break;
6297 case 0x80000002: /* Brand string */
6298 *rax = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[0];
6299 *rbx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[1];
6300 *rcx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[2];
6301 *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[3];
6302 break;
6303 case 0x80000003: /* Brand string */
6304 *rax = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[4];
6305 *rbx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[5];
6306 *rcx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[6];
6307 *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[7];
6308 break;
6309 case 0x80000004: /* Brand string */
6310 *rax = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[8];
6311 *rbx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[9];
6312 *rcx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[10];
6313 *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[11];
6314 break;
6315 case 0x80000005: /* Reserved (Intel), cacheinfo (AMD) */
6316 *rax = eax;
6317 *rbx = ebx;
6318 *rcx = ecx;
6319 *rdx = edx;
6320 break;
6321 case 0x80000006: /* ext. cache info */
6322 *rax = eax;
6323 *rbx = ebx;
6324 *rcx = ecx;
6325 *rdx = edx;
6326 break;
6327 case 0x80000007: /* apmi */
6328 *rax = eax;
6329 *rbx = ebx;
6330 *rcx = ecx;
6331 *rdx = edx & VMM_APMI_EDX_INCLUDE_MASK((1 << 8));
6332 break;
6333 case 0x80000008: /* Phys bits info and topology (AMD) */
6334 *rax = eax;
6335 *rbx = ebx & VMM_AMDSPEC_EBX_MASK~((1ULL << 12) | (1ULL << 14) | (1ULL << 15
) | (1ULL << 16) | (1ULL << 17) | (1ULL << 18
) | (1ULL << 24) | (1ULL << 25) | (1ULL << 26
))
;
6336 /* Reset %rcx (topology) */
6337 *rcx = 0;
6338 *rdx = edx;
6339 break;
6340 case 0x8000001d: /* cache topology (AMD) */
6341 *rax = eax;
6342 *rbx = ebx;
6343 *rcx = ecx;
6344 *rdx = edx;
6345 break;
6346 default:
6347 DPRINTF("%s: unsupported rax=0x%llx\n", __func__, *rax);
6348 *rax = 0;
6349 *rbx = 0;
6350 *rcx = 0;
6351 *rdx = 0;
6352 }
6353
6354
6355 if (vmm_softc->mode == VMM_MODE_RVI) {
6356 /*
6357 * update %rax. the rest of the registers get updated in
6358 * svm_enter_guest
6359 */
6360 vmcb->v_rax = *rax;
6361 }
6362
6363 return (0);
6364}
6365
6366/*
6367 * vcpu_run_svm
6368 *
6369 * SVM main loop used to run a VCPU.
6370 *
6371 * Parameters:
6372 * vcpu: The VCPU to run
6373 * vrp: run parameters
6374 *
6375 * Return values:
6376 * 0: The run loop exited and no help is needed from vmd
6377 * EAGAIN: The run loop exited and help from vmd is needed
6378 * EINVAL: an error occurred
6379 */
6380int
6381vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *vrp)
6382{
6383 int ret = 0;
6384 struct region_descriptor gdt;
6385 struct cpu_info *ci = NULL((void *)0);
14
'ci' initialized to a null pointer value
6386 uint64_t exit_reason;
6387 struct schedstate_percpu *spc;
6388 uint16_t irq;
6389 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
6390
6391 irq = vrp->vrp_irq;
6392
6393 if (vrp->vrp_intr_pending)
15
Assuming field 'vrp_intr_pending' is 0
16
Taking false branch
6394 vcpu->vc_intr = 1;
6395 else
6396 vcpu->vc_intr = 0;
6397
6398 /*
6399 * If we are returning from userspace (vmd) because we exited
6400 * last time, fix up any needed vcpu state first. Which state
6401 * needs to be fixed up depends on what vmd populated in the
6402 * exit data structure.
6403 */
6404 if (vrp->vrp_continue
16.1
Field 'vrp_continue' is 0
) {
17
Taking false branch
6405 switch (vcpu->vc_gueststate.vg_exit_reason) {
6406 case SVM_VMEXIT_IOIO0x7B:
6407 if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN) {
6408 vcpu->vc_gueststate.vg_rax =
6409 vcpu->vc_exit.vei.vei_data;
6410 vmcb->v_rax = vcpu->vc_gueststate.vg_rax;
6411 }
6412 vcpu->vc_gueststate.vg_rip =
6413 vcpu->vc_exit.vrs.vrs_gprs[VCPU_REGS_RIP16];
6414 vmcb->v_rip = vcpu->vc_gueststate.vg_rip;
6415 break;
6416 case SVM_VMEXIT_NPF0x400:
6417 ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_GPRS0x1,
6418 &vcpu->vc_exit.vrs);
6419 if (ret) {
6420 printf("%s: vm %d vcpu %d failed to update "
6421 "registers\n", __func__,
6422 vcpu->vc_parent->vm_id, vcpu->vc_id);
6423 return (EINVAL22);
6424 }
6425 break;
6426 }
6427 memset(&vcpu->vc_exit, 0, sizeof(vcpu->vc_exit))__builtin_memset((&vcpu->vc_exit), (0), (sizeof(vcpu->
vc_exit)))
;
6428 }
6429
6430 while (ret == 0) {
18
Loop condition is true. Entering loop body
6431 vmm_update_pvclock(vcpu);
19
Calling 'vmm_update_pvclock'
22
Returning from 'vmm_update_pvclock'
6432 if (ci != curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
) {
23
Assuming the condition is false
6433 /*
6434 * We are launching for the first time, or we are
6435 * resuming from a different pcpu, so we need to
6436 * reset certain pcpu-specific values.
6437 */
6438 ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
6439 setregion(&gdt, ci->ci_gdt, GDT_SIZE((6 << 3) + (1 << 4)) - 1);
6440
6441 if (ci != vcpu->vc_last_pcpu) {
6442 /*
6443 * Flush TLB by guest ASID if feature
6444 * available, flush entire TLB if not.
6445 */
6446 if (ci->ci_vmm_cap.vcc_svm.svm_flush_by_asid)
6447 vmcb->v_tlb_control =
6448 SVM_TLB_CONTROL_FLUSH_ASID3;
6449 else
6450 vmcb->v_tlb_control =
6451 SVM_TLB_CONTROL_FLUSH_ALL1;
6452
6453 svm_set_dirty(vcpu, SVM_CLEANBITS_ALL((1 << 0) | (1 << 1) | (1 << 2) | (1 <<
3) | (1 << 4) | (1 << 5) | (1 << 6) | (1 <<
7) | (1 << 8) | (1 << 9) | (1 << 10) | (1 <<
11) )
);
6454 }
6455
6456 vcpu->vc_last_pcpu = ci;
6457
6458 if (gdt.rd_base == 0) {
6459 ret = EINVAL22;
6460 break;
6461 }
6462 }
6463
6464 /* Handle vmd(8) injected interrupts */
6465 /* Is there an interrupt pending injection? */
6466 if (irq != 0xFFFF && vcpu->vc_irqready) {
24
Assuming 'irq' is equal to 65535
6467 vmcb->v_eventinj = (irq & 0xFF) | (1U << 31);
6468 irq = 0xFFFF;
6469 }
6470
6471 /* Inject event if present */
6472 if (vcpu->vc_event != 0) {
25
Assuming field 'vc_event' is equal to 0
26
Taking false branch
6473 DPRINTF("%s: inject event %d\n", __func__,
6474 vcpu->vc_event);
6475 vmcb->v_eventinj = 0;
6476 /* Set the "Event Valid" flag for certain vectors */
6477 switch (vcpu->vc_event & 0xFF) {
6478 case VMM_EX_DF8:
6479 case VMM_EX_TS10:
6480 case VMM_EX_NP11:
6481 case VMM_EX_SS12:
6482 case VMM_EX_GP13:
6483 case VMM_EX_PF14:
6484 case VMM_EX_AC17:
6485 vmcb->v_eventinj |= (1ULL << 11);
6486 }
6487 vmcb->v_eventinj |= (vcpu->vc_event) | (1U << 31);
6488 vmcb->v_eventinj |= (3ULL << 8); /* Exception */
6489 vcpu->vc_event = 0;
6490 }
6491
6492 TRACEPOINT(vmm, guest_enter, vcpu, vrp)do { extern struct dt_probe (dt_static_vmm_guest_enter); struct
dt_probe *dtp = &(dt_static_vmm_guest_enter); if (__builtin_expect
(((dt_tracing) != 0), 0) && __builtin_expect(((dtp->
dtp_recording) != 0), 0)) { struct dt_provider *dtpv = dtp->
dtp_prov; dtpv->dtpv_enter(dtpv, dtp, vcpu, vrp); } } while
(0)
;
27
Assuming 'dt_tracing' is equal to 0
28
Loop condition is false. Exiting loop
6493
6494 /* Start / resume the VCPU */
6495 /* Disable interrupts and save the current host FPU state. */
6496 clgi();
6497 if ((ret = vmm_fpurestore(vcpu))) {
29
Calling 'vmm_fpurestore'
36
Returning from 'vmm_fpurestore'
37
Assuming 'ret' is 0
38
Taking false branch
6498 stgi();
6499 break;
6500 }
6501
6502 /* Restore any guest PKRU state. */
6503 if (vmm_softc->sc_md.pkru_enabled)
39
Assuming field 'pkru_enabled' is 0
6504 wrpkru(vcpu->vc_pkru);
6505
6506 KASSERT(vmcb->v_intercept1 & SVM_INTERCEPT_INTR)((vmcb->v_intercept1 & (1UL << 0)) ? (void)0 : __assert
("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm_machdep.c"
, 6506, "vmcb->v_intercept1 & SVM_INTERCEPT_INTR"))
;
40
Taking false branch
41
Assuming the condition is true
42
'?' condition is true
6507 wrmsr(MSR_AMD_VM_HSAVE_PA0xc0010117, vcpu->vc_svm_hsa_pa);
6508
6509 ret = svm_enter_guest(vcpu->vc_control_pa,
6510 &vcpu->vc_gueststate, &gdt);
6511
6512 /* Restore host PKRU state. */
6513 if (vmm_softc->sc_md.pkru_enabled) {
43
Assuming field 'pkru_enabled' is 0
44
Taking false branch
6514 vcpu->vc_pkru = rdpkru(0);
6515 wrpkru(PGK_VALUE0xfffffffc);
6516 }
6517
6518 /*
6519 * On exit, interrupts are disabled, and we are running with
6520 * the guest FPU state still possibly on the CPU. Save the FPU
6521 * state before re-enabling interrupts.
6522 */
6523 vmm_fpusave(vcpu);
6524
6525 /*
6526 * Enable interrupts now. Note that if the exit was due to INTR
6527 * (external interrupt), the interrupt will be processed now.
6528 */
6529 stgi();
6530
6531 vcpu->vc_gueststate.vg_rip = vmcb->v_rip;
6532 vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_NONE0;
6533 svm_set_clean(vcpu, SVM_CLEANBITS_ALL((1 << 0) | (1 << 1) | (1 << 2) | (1 <<
3) | (1 << 4) | (1 << 5) | (1 << 6) | (1 <<
7) | (1 << 8) | (1 << 9) | (1 << 10) | (1 <<
11) )
);
6534
6535 /* If we exited successfully ... */
6536 if (ret == 0) {
45
Assuming 'ret' is equal to 0
46
Taking true branch
6537 exit_reason = vmcb->v_exitcode;
6538 vcpu->vc_gueststate.vg_exit_reason = exit_reason;
6539 TRACEPOINT(vmm, guest_exit, vcpu, vrp, exit_reason)do { extern struct dt_probe (dt_static_vmm_guest_exit); struct
dt_probe *dtp = &(dt_static_vmm_guest_exit); if (__builtin_expect
(((dt_tracing) != 0), 0) && __builtin_expect(((dtp->
dtp_recording) != 0), 0)) { struct dt_provider *dtpv = dtp->
dtp_prov; dtpv->dtpv_enter(dtpv, dtp, vcpu, vrp, exit_reason
); } } while (0)
;
47
Assuming 'dt_tracing' is equal to 0
48
Loop condition is false. Exiting loop
6540
6541 vcpu->vc_gueststate.vg_rflags = vmcb->v_rflags;
6542
6543 /*
6544 * Handle the exit. This will alter "ret" to EAGAIN if
6545 * the exit handler determines help from vmd is needed.
6546 */
6547 ret = svm_handle_exit(vcpu);
49
Calling 'svm_handle_exit'
57
Returning from 'svm_handle_exit'
6548
6549 if (vcpu->vc_gueststate.vg_rflags & PSL_I0x00000200)
58
Assuming the condition is true
59
Taking true branch
6550 vcpu->vc_irqready = 1;
6551 else
6552 vcpu->vc_irqready = 0;
6553
6554 /*
6555 * If not ready for interrupts, but interrupts pending,
6556 * enable interrupt window exiting.
6557 */
6558 if (vcpu->vc_irqready
59.1
Field 'vc_irqready' is not equal to 0
== 0 && vcpu->vc_intr) {
6559 vmcb->v_intercept1 |= SVM_INTERCEPT_VINTR(1UL << 4);
6560 vmcb->v_irq = 1;
6561 vmcb->v_intr_misc = SVM_INTR_MISC_V_IGN_TPR0x10;
6562 vmcb->v_intr_vector = 0;
6563 svm_set_dirty(vcpu, SVM_CLEANBITS_TPR(1 << 3) |
6564 SVM_CLEANBITS_I(1 << 0));
6565 }
6566
6567 /*
6568 * Exit to vmd if we are terminating, failed to enter,
6569 * or need help (device I/O)
6570 */
6571 if (ret
59.2
'ret' is 0
|| vcpu_must_stop(vcpu))
60
Assuming the condition is false
6572 break;
6573
6574 if (vcpu->vc_intr && vcpu->vc_irqready) {
61
Assuming field 'vc_intr' is 0
6575 ret = EAGAIN35;
6576 break;
6577 }
6578
6579 /* Check if we should yield - don't hog the cpu */
6580 spc = &ci->ci_schedstate;
62
Null pointer value stored to 'spc'
6581 if (spc->spc_schedflags & SPCF_SHOULDYIELD0x0002)
63
Access to field 'spc_schedflags' results in a dereference of a null pointer (loaded from variable 'spc')
6582 break;
6583 }
6584 }
6585
6586 /*
6587 * We are heading back to userspace (vmd), either because we need help
6588 * handling an exit, a guest interrupt is pending, or we failed in some
6589 * way to enter the guest. Copy the guest registers to the exit struct
6590 * and return to vmd.
6591 */
6592 if (vcpu_readregs_svm(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), &vcpu->vc_exit.vrs))
6593 ret = EINVAL22;
6594
6595 return (ret);
6596}
6597
6598/*
6599 * vmm_alloc_vpid
6600 *
6601 * Sets the memory location pointed to by "vpid" to the next available VPID
6602 * or ASID.
6603 *
6604 * Parameters:
6605 * vpid: Pointer to location to receive the next VPID/ASID
6606 *
6607 * Return Values:
6608 * 0: The operation completed successfully
6609 * ENOMEM: No VPIDs/ASIDs were available. Content of 'vpid' is unchanged.
6610 */
6611int
6612vmm_alloc_vpid(uint16_t *vpid)
6613{
6614 uint16_t i;
6615 uint8_t idx, bit;
6616 struct vmm_softc *sc = vmm_softc;
6617
6618 rw_enter_write(&vmm_softc->vpid_lock);
6619 for (i = 1; i <= sc->max_vpid; i++) {
6620 idx = i / 8;
6621 bit = i - (idx * 8);
6622
6623 if (!(sc->vpids[idx] & (1 << bit))) {
6624 sc->vpids[idx] |= (1 << bit);
6625 *vpid = i;
6626 DPRINTF("%s: allocated VPID/ASID %d\n", __func__,
6627 i);
6628 rw_exit_write(&vmm_softc->vpid_lock);
6629 return 0;
6630 }
6631 }
6632
6633 printf("%s: no available %ss\n", __func__,
6634 (sc->mode == VMM_MODE_EPT) ? "VPID" :
6635 "ASID");
6636
6637 rw_exit_write(&vmm_softc->vpid_lock);
6638 return ENOMEM12;
6639}
6640
6641/*
6642 * vmm_free_vpid
6643 *
6644 * Frees the VPID/ASID id supplied in "vpid".
6645 *
6646 * Parameters:
6647 * vpid: VPID/ASID to free.
6648 */
6649void
6650vmm_free_vpid(uint16_t vpid)
6651{
6652 uint8_t idx, bit;
6653 struct vmm_softc *sc = vmm_softc;
6654
6655 rw_enter_write(&vmm_softc->vpid_lock);
6656 idx = vpid / 8;
6657 bit = vpid - (idx * 8);
6658 sc->vpids[idx] &= ~(1 << bit);
6659
6660 DPRINTF("%s: freed VPID/ASID %d\n", __func__, vpid);
6661 rw_exit_write(&vmm_softc->vpid_lock);
6662}
6663
6664
6665/* vmm_gpa_is_valid
6666 *
6667 * Check if the given gpa is within guest memory space.
6668 *
6669 * Parameters:
6670 * vcpu: The virtual cpu we are running on.
6671 * gpa: The address to check.
6672 * obj_size: The size of the object assigned to gpa
6673 *
6674 * Return values:
6675 * 1: gpa is within the memory ranges allocated for the vcpu
6676 * 0: otherwise
6677 */
6678int
6679vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size)
6680{
6681 struct vm *vm = vcpu->vc_parent;
6682 struct vm_mem_range *vmr;
6683 size_t i;
6684
6685 for (i = 0; i < vm->vm_nmemranges; ++i) {
6686 vmr = &vm->vm_memranges[i];
6687 if (vmr->vmr_size >= obj_size &&
6688 vmr->vmr_gpa <= gpa &&
6689 gpa < (vmr->vmr_gpa + vmr->vmr_size - obj_size)) {
6690 return 1;
6691 }
6692 }
6693 return 0;
6694}
6695
6696void
6697vmm_init_pvclock(struct vcpu *vcpu, paddr_t gpa)
6698{
6699 paddr_t pvclock_gpa = gpa & 0xFFFFFFFFFFFFFFF0;
6700 if (!vmm_gpa_is_valid(vcpu, pvclock_gpa,
6701 sizeof(struct pvclock_time_info))) {
6702 /* XXX: Kill guest? */
6703 vmm_inject_gp(vcpu);
6704 return;
6705 }
6706
6707 /* XXX: handle case when this struct goes over page boundaries */
6708 if ((pvclock_gpa & PAGE_MASK((1 << 12) - 1)) + sizeof(struct pvclock_time_info) >
6709 PAGE_SIZE(1 << 12)) {
6710 vmm_inject_gp(vcpu);
6711 return;
6712 }
6713
6714 vcpu->vc_pvclock_system_gpa = gpa;
6715 if (tsc_frequency > 0)
6716 vcpu->vc_pvclock_system_tsc_mul =
6717 (int) ((1000000000L << 20) / tsc_frequency);
6718 else
6719 vcpu->vc_pvclock_system_tsc_mul = 0;
6720 vmm_update_pvclock(vcpu);
6721}
6722
6723int
6724vmm_update_pvclock(struct vcpu *vcpu)
6725{
6726 struct pvclock_time_info *pvclock_ti;
6727 struct timespec tv;
6728 struct vm *vm = vcpu->vc_parent;
6729 paddr_t pvclock_hpa, pvclock_gpa;
6730
6731 if (vcpu->vc_pvclock_system_gpa & PVCLOCK_SYSTEM_TIME_ENABLE0x01) {
20
Assuming the condition is false
21
Taking false branch
6732 pvclock_gpa = vcpu->vc_pvclock_system_gpa & 0xFFFFFFFFFFFFFFF0;
6733 if (!pmap_extract(vm->vm_map->pmap, pvclock_gpa, &pvclock_hpa))
6734 return (EINVAL22);
6735 pvclock_ti = (void*) PMAP_DIRECT_MAP(pvclock_hpa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (pvclock_hpa))
;
6736
6737 /* START next cycle (must be odd) */
6738 pvclock_ti->ti_version =
6739 (++vcpu->vc_pvclock_version << 1) | 0x1;
6740
6741 pvclock_ti->ti_tsc_timestamp = rdtsc();
6742 nanotime(&tv);
6743 pvclock_ti->ti_system_time =
6744 tv.tv_sec * 1000000000L + tv.tv_nsec;
6745 pvclock_ti->ti_tsc_shift = 12;
6746 pvclock_ti->ti_tsc_to_system_mul =
6747 vcpu->vc_pvclock_system_tsc_mul;
6748 pvclock_ti->ti_flags = PVCLOCK_FLAG_TSC_STABLE0x01;
6749
6750 /* END (must be even) */
6751 pvclock_ti->ti_version &= ~0x1;
6752 }
6753 return (0);
6754}
6755
6756int
6757vmm_pat_is_valid(uint64_t pat)
6758{
6759 int i;
6760 uint8_t *byte = (uint8_t *)&pat;
6761
6762 /* Intel SDM Vol 3A, 11.12.2: 0x02, 0x03, and 0x08-0xFF result in #GP */
6763 for (i = 0; i < 8; i++) {
6764 if (byte[i] == 0x02 || byte[i] == 0x03 || byte[i] > 0x07) {
6765 DPRINTF("%s: invalid pat %llx\n", __func__, pat);
6766 return 0;
6767 }
6768 }
6769
6770 return 1;
6771}
6772
6773/*
6774 * vmx_exit_reason_decode
6775 *
6776 * Returns a human readable string describing exit type 'code'
6777 */
6778const char *
6779vmx_exit_reason_decode(uint32_t code)
6780{
6781 switch (code) {
6782 case VMX_EXIT_NMI0: return "NMI";
6783 case VMX_EXIT_EXTINT1: return "External interrupt";
6784 case VMX_EXIT_TRIPLE_FAULT2: return "Triple fault";
6785 case VMX_EXIT_INIT3: return "INIT signal";
6786 case VMX_EXIT_SIPI4: return "SIPI signal";
6787 case VMX_EXIT_IO_SMI5: return "I/O SMI";
6788 case VMX_EXIT_OTHER_SMI6: return "other SMI";
6789 case VMX_EXIT_INT_WINDOW7: return "Interrupt window";
6790 case VMX_EXIT_NMI_WINDOW8: return "NMI window";
6791 case VMX_EXIT_TASK_SWITCH9: return "Task switch";
6792 case VMX_EXIT_CPUID10: return "CPUID instruction";
6793 case VMX_EXIT_GETSEC11: return "GETSEC instruction";
6794 case VMX_EXIT_HLT12: return "HLT instruction";
6795 case VMX_EXIT_INVD13: return "INVD instruction";
6796 case VMX_EXIT_INVLPG14: return "INVLPG instruction";
6797 case VMX_EXIT_RDPMC15: return "RDPMC instruction";
6798 case VMX_EXIT_RDTSC16: return "RDTSC instruction";
6799 case VMX_EXIT_RSM17: return "RSM instruction";
6800 case VMX_EXIT_VMCALL18: return "VMCALL instruction";
6801 case VMX_EXIT_VMCLEAR19: return "VMCLEAR instruction";
6802 case VMX_EXIT_VMLAUNCH20: return "VMLAUNCH instruction";
6803 case VMX_EXIT_VMPTRLD21: return "VMPTRLD instruction";
6804 case VMX_EXIT_VMPTRST22: return "VMPTRST instruction";
6805 case VMX_EXIT_VMREAD23: return "VMREAD instruction";
6806 case VMX_EXIT_VMRESUME24: return "VMRESUME instruction";
6807 case VMX_EXIT_VMWRITE25: return "VMWRITE instruction";
6808 case VMX_EXIT_VMXOFF26: return "VMXOFF instruction";
6809 case VMX_EXIT_VMXON27: return "VMXON instruction";
6810 case VMX_EXIT_CR_ACCESS28: return "CR access";
6811 case VMX_EXIT_MOV_DR29: return "MOV DR instruction";
6812 case VMX_EXIT_IO30: return "I/O instruction";
6813 case VMX_EXIT_RDMSR31: return "RDMSR instruction";
6814 case VMX_EXIT_WRMSR32: return "WRMSR instruction";
6815 case VMX_EXIT_ENTRY_FAILED_GUEST_STATE33: return "guest state invalid";
6816 case VMX_EXIT_ENTRY_FAILED_MSR_LOAD34: return "MSR load failed";
6817 case VMX_EXIT_MWAIT36: return "MWAIT instruction";
6818 case VMX_EXIT_MTF37: return "monitor trap flag";
6819 case VMX_EXIT_MONITOR39: return "MONITOR instruction";
6820 case VMX_EXIT_PAUSE40: return "PAUSE instruction";
6821 case VMX_EXIT_ENTRY_FAILED_MCE41: return "MCE during entry";
6822 case VMX_EXIT_TPR_BELOW_THRESHOLD43: return "TPR below threshold";
6823 case VMX_EXIT_APIC_ACCESS44: return "APIC access";
6824 case VMX_EXIT_VIRTUALIZED_EOI45: return "virtualized EOI";
6825 case VMX_EXIT_GDTR_IDTR46: return "GDTR/IDTR access";
6826 case VMX_EXIT_LDTR_TR47: return "LDTR/TR access";
6827 case VMX_EXIT_EPT_VIOLATION48: return "EPT violation";
6828 case VMX_EXIT_EPT_MISCONFIGURATION49: return "EPT misconfiguration";
6829 case VMX_EXIT_INVEPT50: return "INVEPT instruction";
6830 case VMX_EXIT_RDTSCP51: return "RDTSCP instruction";
6831 case VMX_EXIT_VMX_PREEMPTION_TIMER_EXPIRED52:
6832 return "preemption timer expired";
6833 case VMX_EXIT_INVVPID53: return "INVVPID instruction";
6834 case VMX_EXIT_WBINVD54: return "WBINVD instruction";
6835 case VMX_EXIT_XSETBV55: return "XSETBV instruction";
6836 case VMX_EXIT_APIC_WRITE56: return "APIC write";
6837 case VMX_EXIT_RDRAND57: return "RDRAND instruction";
6838 case VMX_EXIT_INVPCID58: return "INVPCID instruction";
6839 case VMX_EXIT_VMFUNC59: return "VMFUNC instruction";
6840 case VMX_EXIT_RDSEED61: return "RDSEED instruction";
6841 case VMX_EXIT_XSAVES63: return "XSAVES instruction";
6842 case VMX_EXIT_XRSTORS64: return "XRSTORS instruction";
6843 default: return "unknown";
6844 }
6845}
6846
6847/*
6848 * svm_exit_reason_decode
6849 *
6850 * Returns a human readable string describing exit type 'code'
6851 */
6852const char *
6853svm_exit_reason_decode(uint32_t code)
6854{
6855 switch (code) {
6856 case SVM_VMEXIT_CR0_READ0x00: return "CR0 read"; /* 0x00 */
6857 case SVM_VMEXIT_CR1_READ0x01: return "CR1 read"; /* 0x01 */
6858 case SVM_VMEXIT_CR2_READ0x02: return "CR2 read"; /* 0x02 */
6859 case SVM_VMEXIT_CR3_READ0x03: return "CR3 read"; /* 0x03 */
6860 case SVM_VMEXIT_CR4_READ0x04: return "CR4 read"; /* 0x04 */
6861 case SVM_VMEXIT_CR5_READ0x05: return "CR5 read"; /* 0x05 */
6862 case SVM_VMEXIT_CR6_READ0x06: return "CR6 read"; /* 0x06 */
6863 case SVM_VMEXIT_CR7_READ0x07: return "CR7 read"; /* 0x07 */
6864 case SVM_VMEXIT_CR8_READ0x08: return "CR8 read"; /* 0x08 */
6865 case SVM_VMEXIT_CR9_READ0x09: return "CR9 read"; /* 0x09 */
6866 case SVM_VMEXIT_CR10_READ0x0A: return "CR10 read"; /* 0x0A */
6867 case SVM_VMEXIT_CR11_READ0x0B: return "CR11 read"; /* 0x0B */
6868 case SVM_VMEXIT_CR12_READ0x0C: return "CR12 read"; /* 0x0C */
6869 case SVM_VMEXIT_CR13_READ0x0D: return "CR13 read"; /* 0x0D */
6870 case SVM_VMEXIT_CR14_READ0x0E: return "CR14 read"; /* 0x0E */
6871 case SVM_VMEXIT_CR15_READ0x0F: return "CR15 read"; /* 0x0F */
6872 case SVM_VMEXIT_CR0_WRITE0x10: return "CR0 write"; /* 0x10 */
6873 case SVM_VMEXIT_CR1_WRITE0x11: return "CR1 write"; /* 0x11 */
6874 case SVM_VMEXIT_CR2_WRITE0x12: return "CR2 write"; /* 0x12 */
6875 case SVM_VMEXIT_CR3_WRITE0x13: return "CR3 write"; /* 0x13 */
6876 case SVM_VMEXIT_CR4_WRITE0x14: return "CR4 write"; /* 0x14 */
6877 case SVM_VMEXIT_CR5_WRITE0x15: return "CR5 write"; /* 0x15 */
6878 case SVM_VMEXIT_CR6_WRITE0x16: return "CR6 write"; /* 0x16 */
6879 case SVM_VMEXIT_CR7_WRITE0x17: return "CR7 write"; /* 0x17 */
6880 case SVM_VMEXIT_CR8_WRITE0x18: return "CR8 write"; /* 0x18 */
6881 case SVM_VMEXIT_CR9_WRITE0x19: return "CR9 write"; /* 0x19 */
6882 case SVM_VMEXIT_CR10_WRITE0x1A: return "CR10 write"; /* 0x1A */
6883 case SVM_VMEXIT_CR11_WRITE0x1B: return "CR11 write"; /* 0x1B */
6884 case SVM_VMEXIT_CR12_WRITE0x1C: return "CR12 write"; /* 0x1C */
6885 case SVM_VMEXIT_CR13_WRITE0x1D: return "CR13 write"; /* 0x1D */
6886 case SVM_VMEXIT_CR14_WRITE0x1E: return "CR14 write"; /* 0x1E */
6887 case SVM_VMEXIT_CR15_WRITE0x1F: return "CR15 write"; /* 0x1F */
6888 case SVM_VMEXIT_DR0_READ0x20: return "DR0 read"; /* 0x20 */
6889 case SVM_VMEXIT_DR1_READ0x21: return "DR1 read"; /* 0x21 */
6890 case SVM_VMEXIT_DR2_READ0x22: return "DR2 read"; /* 0x22 */
6891 case SVM_VMEXIT_DR3_READ0x23: return "DR3 read"; /* 0x23 */
6892 case SVM_VMEXIT_DR4_READ0x24: return "DR4 read"; /* 0x24 */
6893 case SVM_VMEXIT_DR5_READ0x25: return "DR5 read"; /* 0x25 */
6894 case SVM_VMEXIT_DR6_READ0x26: return "DR6 read"; /* 0x26 */
6895 case SVM_VMEXIT_DR7_READ0x27: return "DR7 read"; /* 0x27 */
6896 case SVM_VMEXIT_DR8_READ0x28: return "DR8 read"; /* 0x28 */
6897 case SVM_VMEXIT_DR9_READ0x29: return "DR9 read"; /* 0x29 */
6898 case SVM_VMEXIT_DR10_READ0x2A: return "DR10 read"; /* 0x2A */
6899 case SVM_VMEXIT_DR11_READ0x2B: return "DR11 read"; /* 0x2B */
6900 case SVM_VMEXIT_DR12_READ0x2C: return "DR12 read"; /* 0x2C */
6901 case SVM_VMEXIT_DR13_READ0x2D: return "DR13 read"; /* 0x2D */
6902 case SVM_VMEXIT_DR14_READ0x2E: return "DR14 read"; /* 0x2E */
6903 case SVM_VMEXIT_DR15_READ0x2F: return "DR15 read"; /* 0x2F */
6904 case SVM_VMEXIT_DR0_WRITE0x30: return "DR0 write"; /* 0x30 */
6905 case SVM_VMEXIT_DR1_WRITE0x31: return "DR1 write"; /* 0x31 */
6906 case SVM_VMEXIT_DR2_WRITE0x32: return "DR2 write"; /* 0x32 */
6907 case SVM_VMEXIT_DR3_WRITE0x33: return "DR3 write"; /* 0x33 */
6908 case SVM_VMEXIT_DR4_WRITE0x34: return "DR4 write"; /* 0x34 */
6909 case SVM_VMEXIT_DR5_WRITE0x35: return "DR5 write"; /* 0x35 */
6910 case SVM_VMEXIT_DR6_WRITE0x36: return "DR6 write"; /* 0x36 */
6911 case SVM_VMEXIT_DR7_WRITE0x37: return "DR7 write"; /* 0x37 */
6912 case SVM_VMEXIT_DR8_WRITE0x38: return "DR8 write"; /* 0x38 */
6913 case SVM_VMEXIT_DR9_WRITE0x39: return "DR9 write"; /* 0x39 */
6914 case SVM_VMEXIT_DR10_WRITE0x3A: return "DR10 write"; /* 0x3A */
6915 case SVM_VMEXIT_DR11_WRITE0x3B: return "DR11 write"; /* 0x3B */
6916 case SVM_VMEXIT_DR12_WRITE0x3C: return "DR12 write"; /* 0x3C */
6917 case SVM_VMEXIT_DR13_WRITE0x3D: return "DR13 write"; /* 0x3D */
6918 case SVM_VMEXIT_DR14_WRITE0x3E: return "DR14 write"; /* 0x3E */
6919 case SVM_VMEXIT_DR15_WRITE0x3F: return "DR15 write"; /* 0x3F */
6920 case SVM_VMEXIT_EXCP00x40: return "Exception 0x00"; /* 0x40 */
6921 case SVM_VMEXIT_EXCP10x41: return "Exception 0x01"; /* 0x41 */
6922 case SVM_VMEXIT_EXCP20x42: return "Exception 0x02"; /* 0x42 */
6923 case SVM_VMEXIT_EXCP30x43: return "Exception 0x03"; /* 0x43 */
6924 case SVM_VMEXIT_EXCP40x44: return "Exception 0x04"; /* 0x44 */
6925 case SVM_VMEXIT_EXCP50x45: return "Exception 0x05"; /* 0x45 */
6926 case SVM_VMEXIT_EXCP60x46: return "Exception 0x06"; /* 0x46 */
6927 case SVM_VMEXIT_EXCP70x47: return "Exception 0x07"; /* 0x47 */
6928 case SVM_VMEXIT_EXCP80x48: return "Exception 0x08"; /* 0x48 */
6929 case SVM_VMEXIT_EXCP90x49: return "Exception 0x09"; /* 0x49 */
6930 case SVM_VMEXIT_EXCP100x4A: return "Exception 0x0A"; /* 0x4A */
6931 case SVM_VMEXIT_EXCP110x4B: return "Exception 0x0B"; /* 0x4B */
6932 case SVM_VMEXIT_EXCP120x4C: return "Exception 0x0C"; /* 0x4C */
6933 case SVM_VMEXIT_EXCP130x4D: return "Exception 0x0D"; /* 0x4D */
6934 case SVM_VMEXIT_EXCP140x4E: return "Exception 0x0E"; /* 0x4E */
6935 case SVM_VMEXIT_EXCP150x4F: return "Exception 0x0F"; /* 0x4F */
6936 case SVM_VMEXIT_EXCP160x50: return "Exception 0x10"; /* 0x50 */
6937 case SVM_VMEXIT_EXCP170x51: return "Exception 0x11"; /* 0x51 */
6938 case SVM_VMEXIT_EXCP180x52: return "Exception 0x12"; /* 0x52 */
6939 case SVM_VMEXIT_EXCP190x53: return "Exception 0x13"; /* 0x53 */
6940 case SVM_VMEXIT_EXCP200x54: return "Exception 0x14"; /* 0x54 */
6941 case SVM_VMEXIT_EXCP210x55: return "Exception 0x15"; /* 0x55 */
6942 case SVM_VMEXIT_EXCP220x56: return "Exception 0x16"; /* 0x56 */
6943 case SVM_VMEXIT_EXCP230x57: return "Exception 0x17"; /* 0x57 */
6944 case SVM_VMEXIT_EXCP240x58: return "Exception 0x18"; /* 0x58 */
6945 case SVM_VMEXIT_EXCP250x59: return "Exception 0x19"; /* 0x59 */
6946 case SVM_VMEXIT_EXCP260x5A: return "Exception 0x1A"; /* 0x5A */
6947 case SVM_VMEXIT_EXCP270x5B: return "Exception 0x1B"; /* 0x5B */
6948 case SVM_VMEXIT_EXCP280x5C: return "Exception 0x1C"; /* 0x5C */
6949 case SVM_VMEXIT_EXCP290x5D: return "Exception 0x1D"; /* 0x5D */
6950 case SVM_VMEXIT_EXCP300x5E: return "Exception 0x1E"; /* 0x5E */
6951 case SVM_VMEXIT_EXCP310x5F: return "Exception 0x1F"; /* 0x5F */
6952 case SVM_VMEXIT_INTR0x60: return "External interrupt"; /* 0x60 */
6953 case SVM_VMEXIT_NMI0x61: return "NMI"; /* 0x61 */
6954 case SVM_VMEXIT_SMI0x62: return "SMI"; /* 0x62 */
6955 case SVM_VMEXIT_INIT0x63: return "INIT"; /* 0x63 */
6956 case SVM_VMEXIT_VINTR0x64: return "Interrupt window"; /* 0x64 */
6957 case SVM_VMEXIT_CR0_SEL_WRITE0x65: return "Sel CR0 write"; /* 0x65 */
6958 case SVM_VMEXIT_IDTR_READ0x66: return "IDTR read"; /* 0x66 */
6959 case SVM_VMEXIT_GDTR_READ0x67: return "GDTR read"; /* 0x67 */
6960 case SVM_VMEXIT_LDTR_READ0x68: return "LDTR read"; /* 0x68 */
6961 case SVM_VMEXIT_TR_READ0x69: return "TR read"; /* 0x69 */
6962 case SVM_VMEXIT_IDTR_WRITE0x6A: return "IDTR write"; /* 0x6A */
6963 case SVM_VMEXIT_GDTR_WRITE0x6B: return "GDTR write"; /* 0x6B */
6964 case SVM_VMEXIT_LDTR_WRITE0x6C: return "LDTR write"; /* 0x6C */
6965 case SVM_VMEXIT_TR_WRITE0x6D: return "TR write"; /* 0x6D */
6966 case SVM_VMEXIT_RDTSC0x6E: return "RDTSC instruction"; /* 0x6E */
6967 case SVM_VMEXIT_RDPMC0x6F: return "RDPMC instruction"; /* 0x6F */
6968 case SVM_VMEXIT_PUSHF0x70: return "PUSHF instruction"; /* 0x70 */
6969 case SVM_VMEXIT_POPF0x71: return "POPF instruction"; /* 0x71 */
6970 case SVM_VMEXIT_CPUID0x72: return "CPUID instruction"; /* 0x72 */
6971 case SVM_VMEXIT_RSM0x73: return "RSM instruction"; /* 0x73 */
6972 case SVM_VMEXIT_IRET0x74: return "IRET instruction"; /* 0x74 */
6973 case SVM_VMEXIT_SWINT0x75: return "SWINT instruction"; /* 0x75 */
6974 case SVM_VMEXIT_INVD0x76: return "INVD instruction"; /* 0x76 */
6975 case SVM_VMEXIT_PAUSE0x77: return "PAUSE instruction"; /* 0x77 */
6976 case SVM_VMEXIT_HLT0x78: return "HLT instruction"; /* 0x78 */
6977 case SVM_VMEXIT_INVLPG0x79: return "INVLPG instruction"; /* 0x79 */
6978 case SVM_VMEXIT_INVLPGA0x7A: return "INVLPGA instruction"; /* 0x7A */
6979 case SVM_VMEXIT_IOIO0x7B: return "I/O instruction"; /* 0x7B */
6980 case SVM_VMEXIT_MSR0x7C: return "RDMSR/WRMSR instruction"; /* 0x7C */
6981 case SVM_VMEXIT_TASK_SWITCH0x7D: return "Task switch"; /* 0x7D */
6982 case SVM_VMEXIT_FERR_FREEZE0x7E: return "FERR_FREEZE"; /* 0x7E */
6983 case SVM_VMEXIT_SHUTDOWN0x7F: return "Triple fault"; /* 0x7F */
6984 case SVM_VMEXIT_VMRUN0x80: return "VMRUN instruction"; /* 0x80 */
6985 case SVM_VMEXIT_VMMCALL0x81: return "VMMCALL instruction"; /* 0x81 */
6986 case SVM_VMEXIT_VMLOAD0x82: return "VMLOAD instruction"; /* 0x82 */
6987 case SVM_VMEXIT_VMSAVE0x83: return "VMSAVE instruction"; /* 0x83 */
6988 case SVM_VMEXIT_STGI0x84: return "STGI instruction"; /* 0x84 */
6989 case SVM_VMEXIT_CLGI0x85: return "CLGI instruction"; /* 0x85 */
6990 case SVM_VMEXIT_SKINIT0x86: return "SKINIT instruction"; /* 0x86 */
6991 case SVM_VMEXIT_RDTSCP0x87: return "RDTSCP instruction"; /* 0x87 */
6992 case SVM_VMEXIT_ICEBP0x88: return "ICEBP instruction"; /* 0x88 */
6993 case SVM_VMEXIT_WBINVD0x89: return "WBINVD instruction"; /* 0x89 */
6994 case SVM_VMEXIT_MONITOR0x8A: return "MONITOR instruction"; /* 0x8A */
6995 case SVM_VMEXIT_MWAIT0x8B: return "MWAIT instruction"; /* 0x8B */
6996 case SVM_VMEXIT_MWAIT_CONDITIONAL0x8C: return "Cond MWAIT"; /* 0x8C */
6997 case SVM_VMEXIT_NPF0x400: return "NPT violation"; /* 0x400 */
6998 default: return "unknown";
6999 }
7000}
7001
7002/*
7003 * vmx_instruction_error_decode
7004 *
7005 * Returns a human readable string describing the instruction error in 'code'
7006 */
7007const char *
7008vmx_instruction_error_decode(uint32_t code)
7009{
7010 switch (code) {
7011 case 1: return "VMCALL: unsupported in VMX root";
7012 case 2: return "VMCLEAR: invalid paddr";
7013 case 3: return "VMCLEAR: VMXON pointer";
7014 case 4: return "VMLAUNCH: non-clear VMCS";
7015 case 5: return "VMRESUME: non-launched VMCS";
7016 case 6: return "VMRESUME: executed after VMXOFF";
7017 case 7: return "VM entry: invalid control field(s)";
7018 case 8: return "VM entry: invalid host state field(s)";
7019 case 9: return "VMPTRLD: invalid paddr";
7020 case 10: return "VMPTRLD: VMXON pointer";
7021 case 11: return "VMPTRLD: incorrect VMCS revid";
7022 case 12: return "VMREAD/VMWRITE: unsupported VMCS field";
7023 case 13: return "VMWRITE: RO VMCS field";
7024 case 15: return "VMXON: unsupported in VMX root";
7025 case 20: return "VMCALL: invalid VM exit control fields";
7026 case 26: return "VM entry: blocked by MOV SS";
7027 case 28: return "Invalid operand to INVEPT/INVVPID";
7028 case 0x80000021: return "VM entry: invalid guest state";
7029 case 0x80000022: return "VM entry: failure due to MSR loading";
7030 case 0x80000029: return "VM entry: machine-check event";
7031 default: return "unknown";
7032 }
7033}
7034
7035/*
7036 * vcpu_state_decode
7037 *
7038 * Returns a human readable string describing the vcpu state in 'state'.
7039 */
7040const char *
7041vcpu_state_decode(u_int state)
7042{
7043 switch (state) {
7044 case VCPU_STATE_STOPPED: return "stopped";
7045 case VCPU_STATE_RUNNING: return "running";
7046 case VCPU_STATE_REQTERM: return "requesting termination";
7047 case VCPU_STATE_TERMINATED: return "terminated";
7048 case VCPU_STATE_UNKNOWN: return "unknown";
7049 default: return "invalid";
7050 }
7051}
7052
7053#ifdef VMM_DEBUG
7054/*
7055 * dump_vcpu
7056 *
7057 * Dumps the VMX capabilities of vcpu 'vcpu'
7058 */
7059void
7060dump_vcpu(struct vcpu *vcpu)
7061{
7062 printf("vcpu @ %p\n", vcpu);
7063 printf(" parent vm @ %p\n", vcpu->vc_parent);
7064 printf(" mode: ");
7065 if (vcpu->vc_virt_mode == VMM_MODE_EPT) {
7066 printf("VMX\n");
7067 printf(" pinbased ctls: 0x%llx\n",
7068 vcpu->vc_vmx_pinbased_ctls);
7069 printf(" true pinbased ctls: 0x%llx\n",
7070 vcpu->vc_vmx_true_pinbased_ctls);
7071 CTRL_DUMP(vcpu, PINBASED, EXTERNAL_INT_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "EXTERNAL_INT_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 0), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 0), 0
) ? "Yes" : "No");
;
7072 CTRL_DUMP(vcpu, PINBASED, NMI_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "NMI_EXITING" , vcpu_vmx_check_cap
(vcpu, 0x481, (1ULL << 3), 1) ? "Yes" : "No", vcpu_vmx_check_cap
(vcpu, 0x481, (1ULL << 3), 0) ? "Yes" : "No");
;
7073 CTRL_DUMP(vcpu, PINBASED, VIRTUAL_NMIS)printf(" %s: Can set:%s Can clear:%s\n", "VIRTUAL_NMIS" ,
vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 5), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 5), 0
) ? "Yes" : "No");
;
7074 CTRL_DUMP(vcpu, PINBASED, ACTIVATE_VMX_PREEMPTION_TIMER)printf(" %s: Can set:%s Can clear:%s\n", "ACTIVATE_VMX_PREEMPTION_TIMER"
, vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 6), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 6), 0
) ? "Yes" : "No");
;
7075 CTRL_DUMP(vcpu, PINBASED, PROCESS_POSTED_INTERRUPTS)printf(" %s: Can set:%s Can clear:%s\n", "PROCESS_POSTED_INTERRUPTS"
, vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 7), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 7), 0
) ? "Yes" : "No");
;
7076 printf(" procbased ctls: 0x%llx\n",
7077 vcpu->vc_vmx_procbased_ctls);
7078 printf(" true procbased ctls: 0x%llx\n",
7079 vcpu->vc_vmx_true_procbased_ctls);
7080 CTRL_DUMP(vcpu, PROCBASED, INTERRUPT_WINDOW_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "INTERRUPT_WINDOW_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 2), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 2), 0
) ? "Yes" : "No");
;
7081 CTRL_DUMP(vcpu, PROCBASED, USE_TSC_OFFSETTING)printf(" %s: Can set:%s Can clear:%s\n", "USE_TSC_OFFSETTING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 3), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 3), 0
) ? "Yes" : "No");
;
7082 CTRL_DUMP(vcpu, PROCBASED, HLT_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "HLT_EXITING" , vcpu_vmx_check_cap
(vcpu, 0x482, (1ULL << 7), 1) ? "Yes" : "No", vcpu_vmx_check_cap
(vcpu, 0x482, (1ULL << 7), 0) ? "Yes" : "No");
;
7083 CTRL_DUMP(vcpu, PROCBASED, INVLPG_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "INVLPG_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 9), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 9), 0
) ? "Yes" : "No");
;
7084 CTRL_DUMP(vcpu, PROCBASED, MWAIT_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "MWAIT_EXITING" ,
vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 10), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 10), 0
) ? "Yes" : "No");
;
7085 CTRL_DUMP(vcpu, PROCBASED, RDPMC_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "RDPMC_EXITING" ,
vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 11), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 11), 0
) ? "Yes" : "No");
;
7086 CTRL_DUMP(vcpu, PROCBASED, RDTSC_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "RDTSC_EXITING" ,
vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 12), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 12), 0
) ? "Yes" : "No");
;
7087 CTRL_DUMP(vcpu, PROCBASED, CR3_LOAD_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "CR3_LOAD_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 15), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 15), 0
) ? "Yes" : "No");
;
7088 CTRL_DUMP(vcpu, PROCBASED, CR3_STORE_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "CR3_STORE_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 16), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 16), 0
) ? "Yes" : "No");
;
7089 CTRL_DUMP(vcpu, PROCBASED, CR8_LOAD_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "CR8_LOAD_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 19), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 19), 0
) ? "Yes" : "No");
;
7090 CTRL_DUMP(vcpu, PROCBASED, CR8_STORE_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "CR8_STORE_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 20), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 20), 0
) ? "Yes" : "No");
;
7091 CTRL_DUMP(vcpu, PROCBASED, USE_TPR_SHADOW)printf(" %s: Can set:%s Can clear:%s\n", "USE_TPR_SHADOW"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 21), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 21), 0
) ? "Yes" : "No");
;
7092 CTRL_DUMP(vcpu, PROCBASED, NMI_WINDOW_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "NMI_WINDOW_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 22), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 22), 0
) ? "Yes" : "No");
;
7093 CTRL_DUMP(vcpu, PROCBASED, MOV_DR_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "MOV_DR_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 23), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 23), 0
) ? "Yes" : "No");
;
7094 CTRL_DUMP(vcpu, PROCBASED, UNCONDITIONAL_IO_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "UNCONDITIONAL_IO_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 24), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 24), 0
) ? "Yes" : "No");
;
7095 CTRL_DUMP(vcpu, PROCBASED, USE_IO_BITMAPS)printf(" %s: Can set:%s Can clear:%s\n", "USE_IO_BITMAPS"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 25), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 25), 0
) ? "Yes" : "No");
;
7096 CTRL_DUMP(vcpu, PROCBASED, MONITOR_TRAP_FLAG)printf(" %s: Can set:%s Can clear:%s\n", "MONITOR_TRAP_FLAG"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 27), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 27), 0
) ? "Yes" : "No");
;
7097 CTRL_DUMP(vcpu, PROCBASED, USE_MSR_BITMAPS)printf(" %s: Can set:%s Can clear:%s\n", "USE_MSR_BITMAPS"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 28), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 28), 0
) ? "Yes" : "No");
;
7098 CTRL_DUMP(vcpu, PROCBASED, MONITOR_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "MONITOR_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 29), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 29), 0
) ? "Yes" : "No");
;
7099 CTRL_DUMP(vcpu, PROCBASED, PAUSE_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "PAUSE_EXITING" ,
vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 30), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 30), 0
) ? "Yes" : "No");
;
7100 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482,
7101 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1)) {
7102 printf(" procbased2 ctls: 0x%llx\n",
7103 vcpu->vc_vmx_procbased2_ctls);
7104 CTRL_DUMP(vcpu, PROCBASED2, VIRTUALIZE_APIC)printf(" %s: Can set:%s Can clear:%s\n", "VIRTUALIZE_APIC"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 0), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 0), 0
) ? "Yes" : "No");
;
7105 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_EPT)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_EPT" , vcpu_vmx_check_cap
(vcpu, 0x48B, (1ULL << 1), 1) ? "Yes" : "No", vcpu_vmx_check_cap
(vcpu, 0x48B, (1ULL << 1), 0) ? "Yes" : "No");
;
7106 CTRL_DUMP(vcpu, PROCBASED2, DESCRIPTOR_TABLE_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "DESCRIPTOR_TABLE_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 2), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 2), 0
) ? "Yes" : "No");
;
7107 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_RDTSCP)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_RDTSCP" ,
vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 3), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 3), 0
) ? "Yes" : "No");
;
7108 CTRL_DUMP(vcpu, PROCBASED2, VIRTUALIZE_X2APIC_MODE)printf(" %s: Can set:%s Can clear:%s\n", "VIRTUALIZE_X2APIC_MODE"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 4), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 4), 0
) ? "Yes" : "No");
;
7109 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_VPID)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_VPID" , vcpu_vmx_check_cap
(vcpu, 0x48B, (1ULL << 5), 1) ? "Yes" : "No", vcpu_vmx_check_cap
(vcpu, 0x48B, (1ULL << 5), 0) ? "Yes" : "No");
;
7110 CTRL_DUMP(vcpu, PROCBASED2, WBINVD_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "WBINVD_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 6), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 6), 0
) ? "Yes" : "No");
;
7111 CTRL_DUMP(vcpu, PROCBASED2, UNRESTRICTED_GUEST)printf(" %s: Can set:%s Can clear:%s\n", "UNRESTRICTED_GUEST"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 7), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 7), 0
) ? "Yes" : "No");
;
7112 CTRL_DUMP(vcpu, PROCBASED2,printf(" %s: Can set:%s Can clear:%s\n", "APIC_REGISTER_VIRTUALIZATION"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 8), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 8), 0
) ? "Yes" : "No");
7113 APIC_REGISTER_VIRTUALIZATION)printf(" %s: Can set:%s Can clear:%s\n", "APIC_REGISTER_VIRTUALIZATION"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 8), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 8), 0
) ? "Yes" : "No");
;
7114 CTRL_DUMP(vcpu, PROCBASED2,printf(" %s: Can set:%s Can clear:%s\n", "VIRTUAL_INTERRUPT_DELIVERY"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 9), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 9), 0
) ? "Yes" : "No");
7115 VIRTUAL_INTERRUPT_DELIVERY)printf(" %s: Can set:%s Can clear:%s\n", "VIRTUAL_INTERRUPT_DELIVERY"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 9), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 9), 0
) ? "Yes" : "No");
;
7116 CTRL_DUMP(vcpu, PROCBASED2, PAUSE_LOOP_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "PAUSE_LOOP_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 10), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 10), 0
) ? "Yes" : "No");
;
7117 CTRL_DUMP(vcpu, PROCBASED2, RDRAND_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "RDRAND_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 11), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 11), 0
) ? "Yes" : "No");
;
7118 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_INVPCID)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_INVPCID"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 12), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 12), 0
) ? "Yes" : "No");
;
7119 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_VM_FUNCTIONS)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_VM_FUNCTIONS"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 13), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 13), 0
) ? "Yes" : "No");
;
7120 CTRL_DUMP(vcpu, PROCBASED2, VMCS_SHADOWING)printf(" %s: Can set:%s Can clear:%s\n", "VMCS_SHADOWING"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 14), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 14), 0
) ? "Yes" : "No");
;
7121 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_ENCLS_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_ENCLS_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 15), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 15), 0
) ? "Yes" : "No");
;
7122 CTRL_DUMP(vcpu, PROCBASED2, RDSEED_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "RDSEED_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 16), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 16), 0
) ? "Yes" : "No");
;
7123 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_PML)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_PML" , vcpu_vmx_check_cap
(vcpu, 0x48B, (1ULL << 17), 1) ? "Yes" : "No", vcpu_vmx_check_cap
(vcpu, 0x48B, (1ULL << 17), 0) ? "Yes" : "No");
;
7124 CTRL_DUMP(vcpu, PROCBASED2, EPT_VIOLATION_VE)printf(" %s: Can set:%s Can clear:%s\n", "EPT_VIOLATION_VE"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 18), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 18), 0
) ? "Yes" : "No");
;
7125 CTRL_DUMP(vcpu, PROCBASED2, CONCEAL_VMX_FROM_PT)printf(" %s: Can set:%s Can clear:%s\n", "CONCEAL_VMX_FROM_PT"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 19), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 19), 0
) ? "Yes" : "No");
;
7126 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_XSAVES_XRSTORS)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_XSAVES_XRSTORS"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 20), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 20), 0
) ? "Yes" : "No");
;
7127 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_TSC_SCALING)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_TSC_SCALING"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 25), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 25), 0
) ? "Yes" : "No");
;
7128 }
7129 printf(" entry ctls: 0x%llx\n",
7130 vcpu->vc_vmx_entry_ctls);
7131 printf(" true entry ctls: 0x%llx\n",
7132 vcpu->vc_vmx_true_entry_ctls);
7133 CTRL_DUMP(vcpu, ENTRY, LOAD_DEBUG_CONTROLS)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_DEBUG_CONTROLS"
, vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 2), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 2), 0
) ? "Yes" : "No");
;
7134 CTRL_DUMP(vcpu, ENTRY, IA32E_MODE_GUEST)printf(" %s: Can set:%s Can clear:%s\n", "IA32E_MODE_GUEST"
, vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 9), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 9), 0
) ? "Yes" : "No");
;
7135 CTRL_DUMP(vcpu, ENTRY, ENTRY_TO_SMM)printf(" %s: Can set:%s Can clear:%s\n", "ENTRY_TO_SMM" ,
vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 10), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 10), 0
) ? "Yes" : "No");
;
7136 CTRL_DUMP(vcpu, ENTRY, DEACTIVATE_DUAL_MONITOR_TREATMENT)printf(" %s: Can set:%s Can clear:%s\n", "DEACTIVATE_DUAL_MONITOR_TREATMENT"
, vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 11), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 11), 0
) ? "Yes" : "No");
;
7137 CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY"
, vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 13), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 13), 0
) ? "Yes" : "No");
;
7138 CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_PAT_ON_ENTRY)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_PAT_ON_ENTRY"
, vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 14), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 14), 0
) ? "Yes" : "No");
;
7139 CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_EFER_ON_ENTRY)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_EFER_ON_ENTRY"
, vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 15), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 15), 0
) ? "Yes" : "No");
;
7140 CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_BNDCFGS_ON_ENTRY)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_BNDCFGS_ON_ENTRY"
, vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 16), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 16), 0
) ? "Yes" : "No");
;
7141 CTRL_DUMP(vcpu, ENTRY, CONCEAL_VM_ENTRIES_FROM_PT)printf(" %s: Can set:%s Can clear:%s\n", "CONCEAL_VM_ENTRIES_FROM_PT"
, vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 17), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 17), 0
) ? "Yes" : "No");
;
7142 printf(" exit ctls: 0x%llx\n",
7143 vcpu->vc_vmx_exit_ctls);
7144 printf(" true exit ctls: 0x%llx\n",
7145 vcpu->vc_vmx_true_exit_ctls);
7146 CTRL_DUMP(vcpu, EXIT, SAVE_DEBUG_CONTROLS)printf(" %s: Can set:%s Can clear:%s\n", "SAVE_DEBUG_CONTROLS"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 2), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 2), 0
) ? "Yes" : "No");
;
7147 CTRL_DUMP(vcpu, EXIT, HOST_SPACE_ADDRESS_SIZE)printf(" %s: Can set:%s Can clear:%s\n", "HOST_SPACE_ADDRESS_SIZE"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 9), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 9), 0
) ? "Yes" : "No");
;
7148 CTRL_DUMP(vcpu, EXIT, LOAD_IA32_PERF_GLOBAL_CTRL_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_PERF_GLOBAL_CTRL_ON_EXIT"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 12), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 12), 0
) ? "Yes" : "No");
;
7149 CTRL_DUMP(vcpu, EXIT, ACKNOWLEDGE_INTERRUPT_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "ACKNOWLEDGE_INTERRUPT_ON_EXIT"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 15), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 15), 0
) ? "Yes" : "No");
;
7150 CTRL_DUMP(vcpu, EXIT, SAVE_IA32_PAT_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "SAVE_IA32_PAT_ON_EXIT"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 18), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 18), 0
) ? "Yes" : "No");
;
7151 CTRL_DUMP(vcpu, EXIT, LOAD_IA32_PAT_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_PAT_ON_EXIT"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 19), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 19), 0
) ? "Yes" : "No");
;
7152 CTRL_DUMP(vcpu, EXIT, SAVE_IA32_EFER_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "SAVE_IA32_EFER_ON_EXIT"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 20), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 20), 0
) ? "Yes" : "No");
;
7153 CTRL_DUMP(vcpu, EXIT, LOAD_IA32_EFER_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_EFER_ON_EXIT"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 21), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 21), 0
) ? "Yes" : "No");
;
7154 CTRL_DUMP(vcpu, EXIT, SAVE_VMX_PREEMPTION_TIMER)printf(" %s: Can set:%s Can clear:%s\n", "SAVE_VMX_PREEMPTION_TIMER"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 22), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 22), 0
) ? "Yes" : "No");
;
7155 CTRL_DUMP(vcpu, EXIT, CLEAR_IA32_BNDCFGS_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "CLEAR_IA32_BNDCFGS_ON_EXIT"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 23), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 23), 0
) ? "Yes" : "No");
;
7156 CTRL_DUMP(vcpu, EXIT, CONCEAL_VM_EXITS_FROM_PT)printf(" %s: Can set:%s Can clear:%s\n", "CONCEAL_VM_EXITS_FROM_PT"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 24), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 24), 0
) ? "Yes" : "No");
;
7157 }
7158}
7159
7160/*
7161 * vmx_dump_vmcs_field
7162 *
7163 * Debug function to dump the contents of a single VMCS field
7164 *
7165 * Parameters:
7166 * fieldid: VMCS Field ID
7167 * msg: string to display
7168 */
7169void
7170vmx_dump_vmcs_field(uint16_t fieldid, const char *msg)
7171{
7172 uint8_t width;
7173 uint64_t val;
7174
7175
7176 DPRINTF("%s (0x%04x): ", msg, fieldid);
7177 if (vmread(fieldid, &val))
7178 DPRINTF("???? ");
7179 else {
7180 /*
7181 * Field width encoding : bits 13:14
7182 *
7183 * 0: 16-bit
7184 * 1: 64-bit
7185 * 2: 32-bit
7186 * 3: natural width
7187 */
7188 width = (fieldid >> 13) & 0x3;
7189 switch (width) {
7190 case 0: DPRINTF("0x%04llx ", val); break;
7191 case 1:
7192 case 3: DPRINTF("0x%016llx ", val); break;
7193 case 2: DPRINTF("0x%08llx ", val);
7194 }
7195 }
7196}
7197
7198/*
7199 * vmx_dump_vmcs
7200 *
7201 * Debug function to dump the contents of the current VMCS.
7202 */
7203void
7204vmx_dump_vmcs(struct vcpu *vcpu)
7205{
7206 int has_sec, i;
7207 uint32_t cr3_tgt_ct;
7208
7209 /* XXX save and load new vmcs, restore at end */
7210
7211 DPRINTF("--CURRENT VMCS STATE--\n");
7212 printf("VMCS launched: %s\n",
7213 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED1) ? "Yes" : "No");
7214 DPRINTF("VMXON revision : 0x%x\n",
7215 curcpu()->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision);
7216 DPRINTF("CR0 fixed0: 0x%llx\n",
7217 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0);
7218 DPRINTF("CR0 fixed1: 0x%llx\n",
7219 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1);
7220 DPRINTF("CR4 fixed0: 0x%llx\n",
7221 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0);
7222 DPRINTF("CR4 fixed1: 0x%llx\n",
7223 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
7224 DPRINTF("MSR table size: 0x%x\n",
7225 512 * (curcpu()->ci_vmm_cap.vcc_vmx.vmx_msr_table_size + 1));
7226
7227 has_sec = vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482,
7228 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1);
7229
7230 if (has_sec) {
7231 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7232 IA32_VMX_ENABLE_VPID(1ULL << 5), 1)) {
7233 vmx_dump_vmcs_field(VMCS_GUEST_VPID0x0000, "VPID");
7234 }
7235 }
7236
7237 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS0x481,
7238 IA32_VMX_PROCESS_POSTED_INTERRUPTS(1ULL << 7), 1)) {
7239 vmx_dump_vmcs_field(VMCS_POSTED_INT_NOTIF_VECTOR0x0002,
7240 "Posted Int Notif Vec");
7241 }
7242
7243 if (has_sec) {
7244 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7245 IA32_VMX_EPT_VIOLATION_VE(1ULL << 18), 1)) {
7246 vmx_dump_vmcs_field(VMCS_EPTP_INDEX0x0004, "EPTP idx");
7247 }
7248 }
7249
7250 DPRINTF("\n");
7251 vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_SEL0x0800, "G.ES");
7252 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_SEL0x0802, "G.CS");
7253 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_SEL0x0804, "G.SS");
7254 DPRINTF("\n");
7255 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_SEL0x0806, "G.DS");
7256 vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_SEL0x0808, "G.FS");
7257 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_SEL0x080A, "G.GS");
7258 DPRINTF("\n");
7259 vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_SEL0x080C, "LDTR");
7260 vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_SEL0x080E, "G.TR");
7261
7262 if (has_sec) {
7263 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7264 IA32_VMX_VIRTUAL_INTERRUPT_DELIVERY(1ULL << 9), 1)) {
7265 vmx_dump_vmcs_field(VMCS_GUEST_INTERRUPT_STATUS0x0810,
7266 "Int sts");
7267 }
7268
7269 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7270 IA32_VMX_ENABLE_PML(1ULL << 17), 1)) {
7271 vmx_dump_vmcs_field(VMCS_GUEST_PML_INDEX0x0812, "PML Idx");
7272 }
7273 }
7274
7275 DPRINTF("\n");
7276 vmx_dump_vmcs_field(VMCS_HOST_IA32_ES_SEL0x0C00, "H.ES");
7277 vmx_dump_vmcs_field(VMCS_HOST_IA32_CS_SEL0x0C02, "H.CS");
7278 vmx_dump_vmcs_field(VMCS_HOST_IA32_SS_SEL0x0C04, "H.SS");
7279 DPRINTF("\n");
7280 vmx_dump_vmcs_field(VMCS_HOST_IA32_DS_SEL0x0C06, "H.DS");
7281 vmx_dump_vmcs_field(VMCS_HOST_IA32_FS_SEL0x0C08, "H.FS");
7282 vmx_dump_vmcs_field(VMCS_HOST_IA32_GS_SEL0x0C0A, "H.GS");
7283 DPRINTF("\n");
7284
7285 vmx_dump_vmcs_field(VMCS_IO_BITMAP_A0x2000, "I/O Bitmap A");
7286 DPRINTF("\n");
7287 vmx_dump_vmcs_field(VMCS_IO_BITMAP_B0x2002, "I/O Bitmap B");
7288 DPRINTF("\n");
7289
7290 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482,
7291 IA32_VMX_USE_MSR_BITMAPS(1ULL << 28), 1)) {
7292 vmx_dump_vmcs_field(VMCS_MSR_BITMAP_ADDRESS0x2004, "MSR Bitmap");
7293 DPRINTF("\n");
7294 }
7295
7296 vmx_dump_vmcs_field(VMCS_EXIT_STORE_MSR_ADDRESS0x2006, "Exit Store MSRs");
7297 DPRINTF("\n");
7298 vmx_dump_vmcs_field(VMCS_EXIT_LOAD_MSR_ADDRESS0x2008, "Exit Load MSRs");
7299 DPRINTF("\n");
7300 vmx_dump_vmcs_field(VMCS_ENTRY_LOAD_MSR_ADDRESS0x200A, "Entry Load MSRs");
7301 DPRINTF("\n");
7302 vmx_dump_vmcs_field(VMCS_EXECUTIVE_VMCS_POINTER0x200C, "Exec VMCS Ptr");
7303 DPRINTF("\n");
7304
7305 if (has_sec) {
7306 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7307 IA32_VMX_ENABLE_PML(1ULL << 17), 1)) {
7308 vmx_dump_vmcs_field(VMCS_PML_ADDRESS0x200E, "PML Addr");
7309 DPRINTF("\n");
7310 }
7311 }
7312
7313 vmx_dump_vmcs_field(VMCS_TSC_OFFSET0x2010, "TSC Offset");
7314 DPRINTF("\n");
7315
7316 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482,
7317 IA32_VMX_USE_TPR_SHADOW(1ULL << 21), 1)) {
7318 vmx_dump_vmcs_field(VMCS_VIRTUAL_APIC_ADDRESS0x2012,
7319 "Virtual APIC Addr");
7320 DPRINTF("\n");
7321 }
7322
7323 if (has_sec) {
7324 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7325 IA32_VMX_VIRTUALIZE_APIC(1ULL << 0), 1)) {
7326 vmx_dump_vmcs_field(VMCS_APIC_ACCESS_ADDRESS0x2014,
7327 "APIC Access Addr");
7328 DPRINTF("\n");
7329 }
7330 }
7331
7332 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS0x481,
7333 IA32_VMX_PROCESS_POSTED_INTERRUPTS(1ULL << 7), 1)) {
7334 vmx_dump_vmcs_field(VMCS_POSTED_INTERRUPT_DESC0x2016,
7335 "Posted Int Desc Addr");
7336 DPRINTF("\n");
7337 }
7338
7339 if (has_sec) {
7340 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7341 IA32_VMX_ENABLE_VM_FUNCTIONS(1ULL << 13), 1)) {
7342 vmx_dump_vmcs_field(VMCS_VM_FUNCTION_CONTROLS0x2018,
7343 "VM Function Controls");
7344 DPRINTF("\n");
7345 }
7346
7347 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7348 IA32_VMX_ENABLE_EPT(1ULL << 1), 1)) {
7349 vmx_dump_vmcs_field(VMCS_GUEST_IA32_EPTP0x201A,
7350 "EPT Pointer");
7351 DPRINTF("\n");
7352 }
7353
7354 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7355 IA32_VMX_VIRTUAL_INTERRUPT_DELIVERY(1ULL << 9), 1)) {
7356 vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_00x201C,
7357 "EOI Exit Bitmap 0");
7358 DPRINTF("\n");
7359 vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_10x201E,
7360 "EOI Exit Bitmap 1");
7361 DPRINTF("\n");
7362 vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_20x2020,
7363 "EOI Exit Bitmap 2");
7364 DPRINTF("\n");
7365 vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_30x2022,
7366 "EOI Exit Bitmap 3");
7367 DPRINTF("\n");
7368 }
7369
7370 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7371 IA32_VMX_ENABLE_VM_FUNCTIONS(1ULL << 13), 1)) {
7372 /* We assume all CPUs have the same VMFUNC caps */
7373 if (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_vm_func & 0x1) {
7374 vmx_dump_vmcs_field(VMCS_EPTP_LIST_ADDRESS0x2024,
7375 "EPTP List Addr");
7376 DPRINTF("\n");
7377 }
7378 }
7379
7380 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7381 IA32_VMX_VMCS_SHADOWING(1ULL << 14), 1)) {
7382 vmx_dump_vmcs_field(VMCS_VMREAD_BITMAP_ADDRESS0x2026,
7383 "VMREAD Bitmap Addr");
7384 DPRINTF("\n");
7385 vmx_dump_vmcs_field(VMCS_VMWRITE_BITMAP_ADDRESS0x2028,
7386 "VMWRITE Bitmap Addr");
7387 DPRINTF("\n");
7388 }
7389
7390 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7391 IA32_VMX_EPT_VIOLATION_VE(1ULL << 18), 1)) {
7392 vmx_dump_vmcs_field(VMCS_VIRTUALIZATION_EXC_ADDRESS0x202A,
7393 "#VE Addr");
7394 DPRINTF("\n");
7395 }
7396
7397 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7398 IA32_VMX_ENABLE_XSAVES_XRSTORS(1ULL << 20), 1)) {
7399 vmx_dump_vmcs_field(VMCS_XSS_EXITING_BITMAP0x202C,
7400 "XSS exiting bitmap addr");
7401 DPRINTF("\n");
7402 }
7403
7404 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7405 IA32_VMX_ENABLE_ENCLS_EXITING(1ULL << 15), 1)) {
7406 vmx_dump_vmcs_field(VMCS_ENCLS_EXITING_BITMAP0x202E,
7407 "Encls exiting bitmap addr");
7408 DPRINTF("\n");
7409 }
7410
7411 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7412 IA32_VMX_ENABLE_TSC_SCALING(1ULL << 25), 1)) {
7413 vmx_dump_vmcs_field(VMCS_TSC_MULTIPLIER0x2032,
7414 "TSC scaling factor");
7415 DPRINTF("\n");
7416 }
7417
7418 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7419 IA32_VMX_ENABLE_EPT(1ULL << 1), 1)) {
7420 vmx_dump_vmcs_field(VMCS_GUEST_PHYSICAL_ADDRESS0x2400,
7421 "Guest PA");
7422 DPRINTF("\n");
7423 }
7424 }
7425
7426 vmx_dump_vmcs_field(VMCS_LINK_POINTER0x2800, "VMCS Link Pointer");
7427 DPRINTF("\n");
7428 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DEBUGCTL0x2802, "Guest DEBUGCTL");
7429 DPRINTF("\n");
7430
7431 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS0x484,
7432 IA32_VMX_LOAD_IA32_PAT_ON_ENTRY(1ULL << 14), 1) ||
7433 vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS0x483,
7434 IA32_VMX_SAVE_IA32_PAT_ON_EXIT(1ULL << 18), 1)) {
7435 vmx_dump_vmcs_field(VMCS_GUEST_IA32_PAT0x2804,
7436 "Guest PAT");
7437 DPRINTF("\n");
7438 }
7439
7440 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS0x484,
7441 IA32_VMX_LOAD_IA32_EFER_ON_ENTRY(1ULL << 15), 1) ||
7442 vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS0x483,
7443 IA32_VMX_SAVE_IA32_EFER_ON_EXIT(1ULL << 20), 1)) {
7444 vmx_dump_vmcs_field(VMCS_GUEST_IA32_EFER0x2806,
7445 "Guest EFER");
7446 DPRINTF("\n");
7447 }
7448
7449 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS0x484,
7450 IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY(1ULL << 13), 1)) {
7451 vmx_dump_vmcs_field(VMCS_GUEST_IA32_PERF_GBL_CTRL0x2808,
7452 "Guest Perf Global Ctrl");
7453 DPRINTF("\n");
7454 }
7455
7456 if (has_sec) {
7457 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7458 IA32_VMX_ENABLE_EPT(1ULL << 1), 1)) {
7459 vmx_dump_vmcs_field(VMCS_GUEST_PDPTE00x280A, "Guest PDPTE0");
7460 DPRINTF("\n");
7461 vmx_dump_vmcs_field(VMCS_GUEST_PDPTE10x280C, "Guest PDPTE1");
7462 DPRINTF("\n");
7463 vmx_dump_vmcs_field(VMCS_GUEST_PDPTE20x280E, "Guest PDPTE2");
7464 DPRINTF("\n");
7465 vmx_dump_vmcs_field(VMCS_GUEST_PDPTE30x2810, "Guest PDPTE3");
7466 DPRINTF("\n");
7467 }
7468 }
7469
7470 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS0x484,
7471 IA32_VMX_LOAD_IA32_BNDCFGS_ON_ENTRY(1ULL << 16), 1) ||
7472 vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS0x483,
7473 IA32_VMX_CLEAR_IA32_BNDCFGS_ON_EXIT(1ULL << 23), 1)) {
7474 vmx_dump_vmcs_field(VMCS_GUEST_IA32_BNDCFGS0x2812,
7475 "Guest BNDCFGS");
7476 DPRINTF("\n");
7477 }
7478
7479 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS0x483,
7480 IA32_VMX_LOAD_IA32_PAT_ON_EXIT(1ULL << 19), 1)) {
7481 vmx_dump_vmcs_field(VMCS_HOST_IA32_PAT0x2C00,
7482 "Host PAT");
7483 DPRINTF("\n");
7484 }
7485
7486 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS0x483,
7487 IA32_VMX_LOAD_IA32_EFER_ON_EXIT(1ULL << 21), 1)) {
7488 vmx_dump_vmcs_field(VMCS_HOST_IA32_EFER0x2C02,
7489 "Host EFER");
7490 DPRINTF("\n");
7491 }
7492
7493 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS0x483,
7494 IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_EXIT(1ULL << 12), 1)) {
7495 vmx_dump_vmcs_field(VMCS_HOST_IA32_PERF_GBL_CTRL0x2C04,
7496 "Host Perf Global Ctrl");
7497 DPRINTF("\n");
7498 }
7499
7500 vmx_dump_vmcs_field(VMCS_PINBASED_CTLS0x4000, "Pinbased Ctrls");
7501 vmx_dump_vmcs_field(VMCS_PROCBASED_CTLS0x4002, "Procbased Ctrls");
7502 DPRINTF("\n");
7503 vmx_dump_vmcs_field(VMCS_EXCEPTION_BITMAP0x4004, "Exception Bitmap");
7504 vmx_dump_vmcs_field(VMCS_PF_ERROR_CODE_MASK0x4006, "#PF Err Code Mask");
7505 DPRINTF("\n");
7506 vmx_dump_vmcs_field(VMCS_PF_ERROR_CODE_MATCH0x4008, "#PF Err Code Match");
7507 vmx_dump_vmcs_field(VMCS_CR3_TARGET_COUNT0x400A, "CR3 Tgt Count");
7508 DPRINTF("\n");
7509 vmx_dump_vmcs_field(VMCS_EXIT_CTLS0x400C, "Exit Ctrls");
7510 vmx_dump_vmcs_field(VMCS_EXIT_MSR_STORE_COUNT0x400E, "Exit MSR Store Ct");
7511 DPRINTF("\n");
7512 vmx_dump_vmcs_field(VMCS_EXIT_MSR_LOAD_COUNT0x4010, "Exit MSR Load Ct");
7513 vmx_dump_vmcs_field(VMCS_ENTRY_CTLS0x4012, "Entry Ctrls");
7514 DPRINTF("\n");
7515 vmx_dump_vmcs_field(VMCS_ENTRY_MSR_LOAD_COUNT0x4014, "Entry MSR Load Ct");
7516 vmx_dump_vmcs_field(VMCS_ENTRY_INTERRUPTION_INFO0x4016, "Entry Int. Info");
7517 DPRINTF("\n");
7518 vmx_dump_vmcs_field(VMCS_ENTRY_EXCEPTION_ERROR_CODE0x4018,
7519 "Entry Ex. Err Code");
7520 vmx_dump_vmcs_field(VMCS_ENTRY_INSTRUCTION_LENGTH0x401A, "Entry Insn Len");
7521 DPRINTF("\n");
7522
7523 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482,
7524 IA32_VMX_USE_TPR_SHADOW(1ULL << 21), 1)) {
7525 vmx_dump_vmcs_field(VMCS_TPR_THRESHOLD0x401C, "TPR Threshold");
7526 DPRINTF("\n");
7527 }
7528
7529 if (has_sec) {
7530 vmx_dump_vmcs_field(VMCS_PROCBASED2_CTLS0x401E, "2ndary Ctrls");
7531 DPRINTF("\n");
7532 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7533 IA32_VMX_PAUSE_LOOP_EXITING(1ULL << 10), 1)) {
7534 vmx_dump_vmcs_field(VMCS_PLE_GAP0x4020, "PLE Gap");
7535 vmx_dump_vmcs_field(VMCS_PLE_WINDOW0x4022, "PLE Window");
7536 }
7537 DPRINTF("\n");
7538 }
7539
7540 vmx_dump_vmcs_field(VMCS_INSTRUCTION_ERROR0x4400, "Insn Error");
7541 vmx_dump_vmcs_field(VMCS_EXIT_REASON0x4402, "Exit Reason");
7542 DPRINTF("\n");
7543
7544 vmx_dump_vmcs_field(VMCS_EXIT_INTERRUPTION_INFO0x4404, "Exit Int. Info");
7545 vmx_dump_vmcs_field(VMCS_EXIT_INTERRUPTION_ERR_CODE0x4406,
7546 "Exit Int. Err Code");
7547 DPRINTF("\n");
7548
7549 vmx_dump_vmcs_field(VMCS_IDT_VECTORING_INFO0x4408, "IDT vect info");
7550 vmx_dump_vmcs_field(VMCS_IDT_VECTORING_ERROR_CODE0x440A,
7551 "IDT vect err code");
7552 DPRINTF("\n");
7553
7554 vmx_dump_vmcs_field(VMCS_INSTRUCTION_LENGTH0x440C, "Insn Len");
7555 vmx_dump_vmcs_field(VMCS_EXIT_INSTRUCTION_INFO0x440E, "Exit Insn Info");
7556 DPRINTF("\n");
7557
7558 vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_LIMIT0x4800, "G. ES Lim");
7559 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_LIMIT0x4802, "G. CS Lim");
7560 DPRINTF("\n");
7561
7562 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_LIMIT0x4804, "G. SS Lim");
7563 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_LIMIT0x4806, "G. DS Lim");
7564 DPRINTF("\n");
7565
7566 vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_LIMIT0x4808, "G. FS Lim");
7567 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_LIMIT0x480A, "G. GS Lim");
7568 DPRINTF("\n");
7569
7570 vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_LIMIT0x480C, "G. LDTR Lim");
7571 vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_LIMIT0x480E, "G. TR Lim");
7572 DPRINTF("\n");
7573
7574 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GDTR_LIMIT0x4810, "G. GDTR Lim");
7575 vmx_dump_vmcs_field(VMCS_GUEST_IA32_IDTR_LIMIT0x4812, "G. IDTR Lim");
7576 DPRINTF("\n");
7577
7578 vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_AR0x4814, "G. ES AR");
7579 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_AR0x4816, "G. CS AR");
7580 DPRINTF("\n");
7581
7582 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_AR0x4818, "G. SS AR");
7583 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_AR0x481A, "G. DS AR");
7584 DPRINTF("\n");
7585
7586 vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_AR0x481C, "G. FS AR");
7587 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_AR0x481E, "G. GS AR");
7588 DPRINTF("\n");
7589
7590 vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_AR0x4820, "G. LDTR AR");
7591 vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_AR0x4822, "G. TR AR");
7592 DPRINTF("\n");
7593
7594 vmx_dump_vmcs_field(VMCS_GUEST_INTERRUPTIBILITY_ST0x4824, "G. Int St.");
7595 vmx_dump_vmcs_field(VMCS_GUEST_ACTIVITY_STATE0x4826, "G. Act St.");
7596 DPRINTF("\n");
7597
7598 vmx_dump_vmcs_field(VMCS_GUEST_SMBASE0x4828, "G. SMBASE");
7599 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_CS0x482A, "G. SYSENTER CS");
7600 DPRINTF("\n");
7601
7602 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS0x481,
7603 IA32_VMX_ACTIVATE_VMX_PREEMPTION_TIMER(1ULL << 6), 1)) {
7604 vmx_dump_vmcs_field(VMCS_VMX_PREEMPTION_TIMER_VAL0x482E,
7605 "VMX Preempt Timer");
7606 DPRINTF("\n");
7607 }
7608
7609 vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_CS0x4C00, "H. SYSENTER CS");
7610 DPRINTF("\n");
7611
7612 vmx_dump_vmcs_field(VMCS_CR0_MASK0x6000, "CR0 Mask");
7613 DPRINTF("\n");
7614 vmx_dump_vmcs_field(VMCS_CR4_MASK0x6002, "CR4 Mask");
7615 DPRINTF("\n");
7616
7617 vmx_dump_vmcs_field(VMCS_CR0_READ_SHADOW0x6004, "CR0 RD Shadow");
7618 DPRINTF("\n");
7619 vmx_dump_vmcs_field(VMCS_CR4_READ_SHADOW0x6006, "CR4 RD Shadow");
7620 DPRINTF("\n");
7621
7622 /* We assume all CPUs have the same max CR3 target ct */
7623 cr3_tgt_ct = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr3_tgt_count;
7624 DPRINTF("Max CR3 target count: 0x%x\n", cr3_tgt_ct);
7625 if (cr3_tgt_ct <= VMX_MAX_CR3_TARGETS256) {
7626 for (i = 0 ; i < cr3_tgt_ct; i++) {
7627 vmx_dump_vmcs_field(VMCS_CR3_TARGET_00x6008 + (2 * i),
7628 "CR3 Target");
7629 DPRINTF("\n");
7630 }
7631 } else {
7632 DPRINTF("(Bogus CR3 Target Count > %d", VMX_MAX_CR3_TARGETS);
7633 }
7634
7635 vmx_dump_vmcs_field(VMCS_GUEST_EXIT_QUALIFICATION0x6400, "G. Exit Qual");
7636 DPRINTF("\n");
7637 vmx_dump_vmcs_field(VMCS_IO_RCX0x6402, "I/O RCX");
7638 DPRINTF("\n");
7639 vmx_dump_vmcs_field(VMCS_IO_RSI0x6404, "I/O RSI");
7640 DPRINTF("\n");
7641 vmx_dump_vmcs_field(VMCS_IO_RDI0x6406, "I/O RDI");
7642 DPRINTF("\n");
7643 vmx_dump_vmcs_field(VMCS_IO_RIP0x6408, "I/O RIP");
7644 DPRINTF("\n");
7645 vmx_dump_vmcs_field(VMCS_GUEST_LINEAR_ADDRESS0x640A, "G. Lin Addr");
7646 DPRINTF("\n");
7647 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR00x6800, "G. CR0");
7648 DPRINTF("\n");
7649 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR30x6802, "G. CR3");
7650 DPRINTF("\n");
7651 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR40x6804, "G. CR4");
7652 DPRINTF("\n");
7653 vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_BASE0x6806, "G. ES Base");
7654 DPRINTF("\n");
7655 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_BASE0x6808, "G. CS Base");
7656 DPRINTF("\n");
7657 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_BASE0x680A, "G. SS Base");
7658 DPRINTF("\n");
7659 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_BASE0x680C, "G. DS Base");
7660 DPRINTF("\n");
7661 vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_BASE0x680E, "G. FS Base");
7662 DPRINTF("\n");
7663 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_BASE0x6810, "G. GS Base");
7664 DPRINTF("\n");
7665 vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_BASE0x6812, "G. LDTR Base");
7666 DPRINTF("\n");
7667 vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_BASE0x6814, "G. TR Base");
7668 DPRINTF("\n");
7669 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GDTR_BASE0x6816, "G. GDTR Base");
7670 DPRINTF("\n");
7671 vmx_dump_vmcs_field(VMCS_GUEST_IA32_IDTR_BASE0x6818, "G. IDTR Base");
7672 DPRINTF("\n");
7673 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DR70x681A, "G. DR7");
7674 DPRINTF("\n");
7675 vmx_dump_vmcs_field(VMCS_GUEST_IA32_RSP0x681C, "G. RSP");
7676 DPRINTF("\n");
7677 vmx_dump_vmcs_field(VMCS_GUEST_IA32_RIP0x681E, "G. RIP");
7678 DPRINTF("\n");
7679 vmx_dump_vmcs_field(VMCS_GUEST_IA32_RFLAGS0x6820, "G. RFLAGS");
7680 DPRINTF("\n");
7681 vmx_dump_vmcs_field(VMCS_GUEST_PENDING_DBG_EXC0x6822, "G. Pend Dbg Exc");
7682 DPRINTF("\n");
7683 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_ESP0x6824, "G. SYSENTER ESP");
7684 DPRINTF("\n");
7685 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_EIP0x6826, "G. SYSENTER EIP");
7686 DPRINTF("\n");
7687 vmx_dump_vmcs_field(VMCS_HOST_IA32_CR00x6C00, "H. CR0");
7688 DPRINTF("\n");
7689 vmx_dump_vmcs_field(VMCS_HOST_IA32_CR30x6C02, "H. CR3");
7690 DPRINTF("\n");
7691 vmx_dump_vmcs_field(VMCS_HOST_IA32_CR40x6C04, "H. CR4");
7692 DPRINTF("\n");
7693 vmx_dump_vmcs_field(VMCS_HOST_IA32_FS_BASE0x6C06, "H. FS Base");
7694 DPRINTF("\n");
7695 vmx_dump_vmcs_field(VMCS_HOST_IA32_GS_BASE0x6C08, "H. GS Base");
7696 DPRINTF("\n");
7697 vmx_dump_vmcs_field(VMCS_HOST_IA32_TR_BASE0x6C0A, "H. TR Base");
7698 DPRINTF("\n");
7699 vmx_dump_vmcs_field(VMCS_HOST_IA32_GDTR_BASE0x6C0C, "H. GDTR Base");
7700 DPRINTF("\n");
7701 vmx_dump_vmcs_field(VMCS_HOST_IA32_IDTR_BASE0x6C0E, "H. IDTR Base");
7702 DPRINTF("\n");
7703 vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_ESP0x6C10, "H. SYSENTER ESP");
7704 DPRINTF("\n");
7705 vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_EIP0x6C12, "H. SYSENTER EIP");
7706 DPRINTF("\n");
7707 vmx_dump_vmcs_field(VMCS_HOST_IA32_RSP0x6C14, "H. RSP");
7708 DPRINTF("\n");
7709 vmx_dump_vmcs_field(VMCS_HOST_IA32_RIP0x6C16, "H. RIP");
7710 DPRINTF("\n");
7711}
7712
7713/*
7714 * vmx_vcpu_dump_regs
7715 *
7716 * Debug function to print vcpu regs from the current vcpu
7717 * note - vmcs for 'vcpu' must be on this pcpu.
7718 *
7719 * Parameters:
7720 * vcpu - vcpu whose registers should be dumped
7721 */
7722void
7723vmx_vcpu_dump_regs(struct vcpu *vcpu)
7724{
7725 uint64_t r;
7726 int i;
7727 struct vmx_msr_store *msr_store;
7728
7729 /* XXX reformat this for 32 bit guest as needed */
7730 DPRINTF("vcpu @ %p in %s mode\n", vcpu, vmm_decode_cpu_mode(vcpu));
7731 i = vmm_get_guest_cpu_cpl(vcpu);
7732 if (i == -1)
7733 DPRINTF(" CPL=unknown\n");
7734 else
7735 DPRINTF(" CPL=%d\n", i);
7736 DPRINTF(" rax=0x%016llx rbx=0x%016llx rcx=0x%016llx\n",
7737 vcpu->vc_gueststate.vg_rax, vcpu->vc_gueststate.vg_rbx,
7738 vcpu->vc_gueststate.vg_rcx);
7739 DPRINTF(" rdx=0x%016llx rbp=0x%016llx rdi=0x%016llx\n",
7740 vcpu->vc_gueststate.vg_rdx, vcpu->vc_gueststate.vg_rbp,
7741 vcpu->vc_gueststate.vg_rdi);
7742 DPRINTF(" rsi=0x%016llx r8=0x%016llx r9=0x%016llx\n",
7743 vcpu->vc_gueststate.vg_rsi, vcpu->vc_gueststate.vg_r8,
7744 vcpu->vc_gueststate.vg_r9);
7745 DPRINTF(" r10=0x%016llx r11=0x%016llx r12=0x%016llx\n",
7746 vcpu->vc_gueststate.vg_r10, vcpu->vc_gueststate.vg_r11,
7747 vcpu->vc_gueststate.vg_r12);
7748 DPRINTF(" r13=0x%016llx r14=0x%016llx r15=0x%016llx\n",
7749 vcpu->vc_gueststate.vg_r13, vcpu->vc_gueststate.vg_r14,
7750 vcpu->vc_gueststate.vg_r15);
7751
7752 DPRINTF(" rip=0x%016llx rsp=", vcpu->vc_gueststate.vg_rip);
7753 if (vmread(VMCS_GUEST_IA32_RSP0x681C, &r))
7754 DPRINTF("(error reading)\n");
7755 else
7756 DPRINTF("0x%016llx\n", r);
7757
7758 DPRINTF(" rflags=");
7759 if (vmread(VMCS_GUEST_IA32_RFLAGS0x6820, &r))
7760 DPRINTF("(error reading)\n");
7761 else {
7762 DPRINTF("0x%016llx ", r);
7763 vmm_decode_rflags(r);
7764 }
7765
7766 DPRINTF(" cr0=");
7767 if (vmread(VMCS_GUEST_IA32_CR00x6800, &r))
7768 DPRINTF("(error reading)\n");
7769 else {
7770 DPRINTF("0x%016llx ", r);
7771 vmm_decode_cr0(r);
7772 }
7773
7774 DPRINTF(" cr2=0x%016llx\n", vcpu->vc_gueststate.vg_cr2);
7775
7776 DPRINTF(" cr3=");
7777 if (vmread(VMCS_GUEST_IA32_CR30x6802, &r))
7778 DPRINTF("(error reading)\n");
7779 else {
7780 DPRINTF("0x%016llx ", r);
7781 vmm_decode_cr3(r);
7782 }
7783
7784 DPRINTF(" cr4=");
7785 if (vmread(VMCS_GUEST_IA32_CR40x6804, &r))
7786 DPRINTF("(error reading)\n");
7787 else {
7788 DPRINTF("0x%016llx ", r);
7789 vmm_decode_cr4(r);
7790 }
7791
7792 DPRINTF(" --Guest Segment Info--\n");
7793
7794 DPRINTF(" cs=");
7795 if (vmread(VMCS_GUEST_IA32_CS_SEL0x0802, &r))
7796 DPRINTF("(error reading)");
7797 else
7798 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7799
7800 DPRINTF(" base=");
7801 if (vmread(VMCS_GUEST_IA32_CS_BASE0x6808, &r))
7802 DPRINTF("(error reading)");
7803 else
7804 DPRINTF("0x%016llx", r);
7805
7806 DPRINTF(" limit=");
7807 if (vmread(VMCS_GUEST_IA32_CS_LIMIT0x4802, &r))
7808 DPRINTF("(error reading)");
7809 else
7810 DPRINTF("0x%016llx", r);
7811
7812 DPRINTF(" a/r=");
7813 if (vmread(VMCS_GUEST_IA32_CS_AR0x4816, &r))
7814 DPRINTF("(error reading)\n");
7815 else {
7816 DPRINTF("0x%04llx\n ", r);
7817 vmm_segment_desc_decode(r);
7818 }
7819
7820 DPRINTF(" ds=");
7821 if (vmread(VMCS_GUEST_IA32_DS_SEL0x0806, &r))
7822 DPRINTF("(error reading)");
7823 else
7824 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7825
7826 DPRINTF(" base=");
7827 if (vmread(VMCS_GUEST_IA32_DS_BASE0x680C, &r))
7828 DPRINTF("(error reading)");
7829 else
7830 DPRINTF("0x%016llx", r);
7831
7832 DPRINTF(" limit=");
7833 if (vmread(VMCS_GUEST_IA32_DS_LIMIT0x4806, &r))
7834 DPRINTF("(error reading)");
7835 else
7836 DPRINTF("0x%016llx", r);
7837
7838 DPRINTF(" a/r=");
7839 if (vmread(VMCS_GUEST_IA32_DS_AR0x481A, &r))
7840 DPRINTF("(error reading)\n");
7841 else {
7842 DPRINTF("0x%04llx\n ", r);
7843 vmm_segment_desc_decode(r);
7844 }
7845
7846 DPRINTF(" es=");
7847 if (vmread(VMCS_GUEST_IA32_ES_SEL0x0800, &r))
7848 DPRINTF("(error reading)");
7849 else
7850 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7851
7852 DPRINTF(" base=");
7853 if (vmread(VMCS_GUEST_IA32_ES_BASE0x6806, &r))
7854 DPRINTF("(error reading)");
7855 else
7856 DPRINTF("0x%016llx", r);
7857
7858 DPRINTF(" limit=");
7859 if (vmread(VMCS_GUEST_IA32_ES_LIMIT0x4800, &r))
7860 DPRINTF("(error reading)");
7861 else
7862 DPRINTF("0x%016llx", r);
7863
7864 DPRINTF(" a/r=");
7865 if (vmread(VMCS_GUEST_IA32_ES_AR0x4814, &r))
7866 DPRINTF("(error reading)\n");
7867 else {
7868 DPRINTF("0x%04llx\n ", r);
7869 vmm_segment_desc_decode(r);
7870 }
7871
7872 DPRINTF(" fs=");
7873 if (vmread(VMCS_GUEST_IA32_FS_SEL0x0808, &r))
7874 DPRINTF("(error reading)");
7875 else
7876 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7877
7878 DPRINTF(" base=");
7879 if (vmread(VMCS_GUEST_IA32_FS_BASE0x680E, &r))
7880 DPRINTF("(error reading)");
7881 else
7882 DPRINTF("0x%016llx", r);
7883
7884 DPRINTF(" limit=");
7885 if (vmread(VMCS_GUEST_IA32_FS_LIMIT0x4808, &r))
7886 DPRINTF("(error reading)");
7887 else
7888 DPRINTF("0x%016llx", r);
7889
7890 DPRINTF(" a/r=");
7891 if (vmread(VMCS_GUEST_IA32_FS_AR0x481C, &r))
7892 DPRINTF("(error reading)\n");
7893 else {
7894 DPRINTF("0x%04llx\n ", r);
7895 vmm_segment_desc_decode(r);
7896 }
7897
7898 DPRINTF(" gs=");
7899 if (vmread(VMCS_GUEST_IA32_GS_SEL0x080A, &r))
7900 DPRINTF("(error reading)");
7901 else
7902 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7903
7904 DPRINTF(" base=");
7905 if (vmread(VMCS_GUEST_IA32_GS_BASE0x6810, &r))
7906 DPRINTF("(error reading)");
7907 else
7908 DPRINTF("0x%016llx", r);
7909
7910 DPRINTF(" limit=");
7911 if (vmread(VMCS_GUEST_IA32_GS_LIMIT0x480A, &r))
7912 DPRINTF("(error reading)");
7913 else
7914 DPRINTF("0x%016llx", r);
7915
7916 DPRINTF(" a/r=");
7917 if (vmread(VMCS_GUEST_IA32_GS_AR0x481E, &r))
7918 DPRINTF("(error reading)\n");
7919 else {
7920 DPRINTF("0x%04llx\n ", r);
7921 vmm_segment_desc_decode(r);
7922 }
7923
7924 DPRINTF(" ss=");
7925 if (vmread(VMCS_GUEST_IA32_SS_SEL0x0804, &r))
7926 DPRINTF("(error reading)");
7927 else
7928 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
7929
7930 DPRINTF(" base=");
7931 if (vmread(VMCS_GUEST_IA32_SS_BASE0x680A, &r))
7932 DPRINTF("(error reading)");
7933 else
7934 DPRINTF("0x%016llx", r);
7935
7936 DPRINTF(" limit=");
7937 if (vmread(VMCS_GUEST_IA32_SS_LIMIT0x4804, &r))
7938 DPRINTF("(error reading)");
7939 else
7940 DPRINTF("0x%016llx", r);
7941
7942 DPRINTF(" a/r=");
7943 if (vmread(VMCS_GUEST_IA32_SS_AR0x4818, &r))
7944 DPRINTF("(error reading)\n");
7945 else {
7946 DPRINTF("0x%04llx\n ", r);
7947 vmm_segment_desc_decode(r);
7948 }
7949
7950 DPRINTF(" tr=");
7951 if (vmread(VMCS_GUEST_IA32_TR_SEL0x080E, &r))
7952 DPRINTF("(error reading)");
7953 else
7954 DPRINTF("0x%04llx", r);
7955
7956 DPRINTF(" base=");
7957 if (vmread(VMCS_GUEST_IA32_TR_BASE0x6814, &r))
7958 DPRINTF("(error reading)");
7959 else
7960 DPRINTF("0x%016llx", r);
7961
7962 DPRINTF(" limit=");
7963 if (vmread(VMCS_GUEST_IA32_TR_LIMIT0x480E, &r))
7964 DPRINTF("(error reading)");
7965 else
7966 DPRINTF("0x%016llx", r);
7967
7968 DPRINTF(" a/r=");
7969 if (vmread(VMCS_GUEST_IA32_TR_AR0x4822, &r))
7970 DPRINTF("(error reading)\n");
7971 else {
7972 DPRINTF("0x%04llx\n ", r);
7973 vmm_segment_desc_decode(r);
7974 }
7975
7976 DPRINTF(" gdtr base=");
7977 if (vmread(VMCS_GUEST_IA32_GDTR_BASE0x6816, &r))
7978 DPRINTF("(error reading) ");
7979 else
7980 DPRINTF("0x%016llx", r);
7981
7982 DPRINTF(" limit=");
7983 if (vmread(VMCS_GUEST_IA32_GDTR_LIMIT0x4810, &r))
7984 DPRINTF("(error reading)\n");
7985 else
7986 DPRINTF("0x%016llx\n", r);
7987
7988 DPRINTF(" idtr base=");
7989 if (vmread(VMCS_GUEST_IA32_IDTR_BASE0x6818, &r))
7990 DPRINTF("(error reading) ");
7991 else
7992 DPRINTF("0x%016llx", r);
7993
7994 DPRINTF(" limit=");
7995 if (vmread(VMCS_GUEST_IA32_IDTR_LIMIT0x4812, &r))
7996 DPRINTF("(error reading)\n");
7997 else
7998 DPRINTF("0x%016llx\n", r);
7999
8000 DPRINTF(" ldtr=");
8001 if (vmread(VMCS_GUEST_IA32_LDTR_SEL0x080C, &r))
8002 DPRINTF("(error reading)");
8003 else
8004 DPRINTF("0x%04llx", r);
8005
8006 DPRINTF(" base=");
8007 if (vmread(VMCS_GUEST_IA32_LDTR_BASE0x6812, &r))
8008 DPRINTF("(error reading)");
8009 else
8010 DPRINTF("0x%016llx", r);
8011
8012 DPRINTF(" limit=");
8013 if (vmread(VMCS_GUEST_IA32_LDTR_LIMIT0x480C, &r))
8014 DPRINTF("(error reading)");
8015 else
8016 DPRINTF("0x%016llx", r);
8017
8018 DPRINTF(" a/r=");
8019 if (vmread(VMCS_GUEST_IA32_LDTR_AR0x4820, &r))
8020 DPRINTF("(error reading)\n");
8021 else {
8022 DPRINTF("0x%04llx\n ", r);
8023 vmm_segment_desc_decode(r);
8024 }
8025
8026 DPRINTF(" --Guest MSRs @ 0x%016llx (paddr: 0x%016llx)--\n",
8027 (uint64_t)vcpu->vc_vmx_msr_exit_save_va,
8028 (uint64_t)vcpu->vc_vmx_msr_exit_save_pa);
8029
8030 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
8031
8032 for (i = 0; i < VMX_NUM_MSR_STORE7; i++) {
8033 DPRINTF(" MSR %d @ %p : 0x%08llx (%s), "
8034 "value=0x%016llx ",
8035 i, &msr_store[i], msr_store[i].vms_index,
8036 msr_name_decode(msr_store[i].vms_index),
8037 msr_store[i].vms_data);
8038 vmm_decode_msr_value(msr_store[i].vms_index,
8039 msr_store[i].vms_data);
8040 }
8041}
8042
8043/*
8044 * msr_name_decode
8045 *
8046 * Returns a human-readable name for the MSR supplied in 'msr'.
8047 *
8048 * Parameters:
8049 * msr - The MSR to decode
8050 *
8051 * Return value:
8052 * NULL-terminated character string containing the name of the MSR requested
8053 */
8054const char *
8055msr_name_decode(uint32_t msr)
8056{
8057 /*
8058 * Add as needed. Also consider adding a decode function when
8059 * adding to this table.
8060 */
8061
8062 switch (msr) {
8063 case MSR_TSC0x010: return "TSC";
8064 case MSR_APICBASE0x01b: return "APIC base";
8065 case MSR_IA32_FEATURE_CONTROL0x03a: return "IA32 feature control";
8066 case MSR_PERFCTR00x0c1: return "perf counter 0";
8067 case MSR_PERFCTR10x0c2: return "perf counter 1";
8068 case MSR_TEMPERATURE_TARGET0x1a2: return "temperature target";
8069 case MSR_MTRRcap0x0fe: return "MTRR cap";
8070 case MSR_PERF_STATUS0x198: return "perf status";
8071 case MSR_PERF_CTL0x199: return "perf control";
8072 case MSR_MTRRvarBase0x200: return "MTRR variable base";
8073 case MSR_MTRRfix64K_000000x250: return "MTRR fixed 64K";
8074 case MSR_MTRRfix16K_800000x258: return "MTRR fixed 16K";
8075 case MSR_MTRRfix4K_C00000x268: return "MTRR fixed 4K";
8076 case MSR_CR_PAT0x277: return "PAT";
8077 case MSR_MTRRdefType0x2ff: return "MTRR default type";
8078 case MSR_EFER0xc0000080: return "EFER";
8079 case MSR_STAR0xc0000081: return "STAR";
8080 case MSR_LSTAR0xc0000082: return "LSTAR";
8081 case MSR_CSTAR0xc0000083: return "CSTAR";
8082 case MSR_SFMASK0xc0000084: return "SFMASK";
8083 case MSR_FSBASE0xc0000100: return "FSBASE";
8084 case MSR_GSBASE0xc0000101: return "GSBASE";
8085 case MSR_KERNELGSBASE0xc0000102: return "KGSBASE";
8086 case MSR_MISC_ENABLE0x1a0: return "Misc Enable";
8087 default: return "Unknown MSR";
8088 }
8089}
8090
8091/*
8092 * vmm_segment_desc_decode
8093 *
8094 * Debug function to print segment information for supplied descriptor
8095 *
8096 * Parameters:
8097 * val - The A/R bytes for the segment descriptor to decode
8098 */
8099void
8100vmm_segment_desc_decode(uint64_t val)
8101{
8102 uint16_t ar;
8103 uint8_t g, type, s, dpl, p, dib, l;
8104 uint32_t unusable;
8105
8106 /* Exit early on unusable descriptors */
8107 unusable = val & 0x10000;
8108 if (unusable) {
8109 DPRINTF("(unusable)\n");
8110 return;
8111 }
8112
8113 ar = (uint16_t)val;
8114
8115 g = (ar & 0x8000) >> 15;
8116 dib = (ar & 0x4000) >> 14;
8117 l = (ar & 0x2000) >> 13;
8118 p = (ar & 0x80) >> 7;
8119 dpl = (ar & 0x60) >> 5;
8120 s = (ar & 0x10) >> 4;
8121 type = (ar & 0xf);
8122
8123 DPRINTF("granularity=%d dib=%d l(64 bit)=%d present=%d sys=%d ",
8124 g, dib, l, p, s);
8125
8126 DPRINTF("type=");
8127 if (!s) {
8128 switch (type) {
8129 case SDT_SYSLDT2: DPRINTF("ldt\n"); break;
8130 case SDT_SYS386TSS9: DPRINTF("tss (available)\n"); break;
8131 case SDT_SYS386BSY11: DPRINTF("tss (busy)\n"); break;
8132 case SDT_SYS386CGT12: DPRINTF("call gate\n"); break;
8133 case SDT_SYS386IGT14: DPRINTF("interrupt gate\n"); break;
8134 case SDT_SYS386TGT15: DPRINTF("trap gate\n"); break;
8135 /* XXX handle 32 bit segment types by inspecting mode */
8136 default: DPRINTF("unknown");
8137 }
8138 } else {
8139 switch (type + 16) {
8140 case SDT_MEMRO16: DPRINTF("data, r/o\n"); break;
8141 case SDT_MEMROA17: DPRINTF("data, r/o, accessed\n"); break;
8142 case SDT_MEMRW18: DPRINTF("data, r/w\n"); break;
8143 case SDT_MEMRWA19: DPRINTF("data, r/w, accessed\n"); break;
8144 case SDT_MEMROD20: DPRINTF("data, r/o, expand down\n"); break;
8145 case SDT_MEMRODA21: DPRINTF("data, r/o, expand down, "
8146 "accessed\n");
8147 break;
8148 case SDT_MEMRWD22: DPRINTF("data, r/w, expand down\n"); break;
8149 case SDT_MEMRWDA23: DPRINTF("data, r/w, expand down, "
8150 "accessed\n");
8151 break;
8152 case SDT_MEME24: DPRINTF("code, x only\n"); break;
8153 case SDT_MEMEA25: DPRINTF("code, x only, accessed\n");
8154 case SDT_MEMER26: DPRINTF("code, r/x\n"); break;
8155 case SDT_MEMERA27: DPRINTF("code, r/x, accessed\n"); break;
8156 case SDT_MEMEC28: DPRINTF("code, x only, conforming\n"); break;
8157 case SDT_MEMEAC29: DPRINTF("code, x only, conforming, "
8158 "accessed\n");
8159 break;
8160 case SDT_MEMERC30: DPRINTF("code, r/x, conforming\n"); break;
8161 case SDT_MEMERAC31: DPRINTF("code, r/x, conforming, accessed\n");
8162 break;
8163 }
8164 }
8165}
8166
8167void
8168vmm_decode_cr0(uint64_t cr0)
8169{
8170 struct vmm_reg_debug_info cr0_info[11] = {
8171 { CR0_PG0x80000000, "PG ", "pg " },
8172 { CR0_CD0x40000000, "CD ", "cd " },
8173 { CR0_NW0x20000000, "NW ", "nw " },
8174 { CR0_AM0x00040000, "AM ", "am " },
8175 { CR0_WP0x00010000, "WP ", "wp " },
8176 { CR0_NE0x00000020, "NE ", "ne " },
8177 { CR0_ET0x00000010, "ET ", "et " },
8178 { CR0_TS0x00000008, "TS ", "ts " },
8179 { CR0_EM0x00000004, "EM ", "em " },
8180 { CR0_MP0x00000002, "MP ", "mp " },
8181 { CR0_PE0x00000001, "PE", "pe" }
8182 };
8183
8184 uint8_t i;
8185
8186 DPRINTF("(");
8187 for (i = 0; i < nitems(cr0_info)(sizeof((cr0_info)) / sizeof((cr0_info)[0])); i++)
8188 if (cr0 & cr0_info[i].vrdi_bit)
8189 DPRINTF("%s", cr0_info[i].vrdi_present);
8190 else
8191 DPRINTF("%s", cr0_info[i].vrdi_absent);
8192
8193 DPRINTF(")\n");
8194}
8195
8196void
8197vmm_decode_cr3(uint64_t cr3)
8198{
8199 struct vmm_reg_debug_info cr3_info[2] = {
8200 { CR3_PWT(1ULL << 3), "PWT ", "pwt "},
8201 { CR3_PCD(1ULL << 4), "PCD", "pcd"}
8202 };
8203
8204 uint64_t cr4;
8205 uint8_t i;
8206
8207 if (vmread(VMCS_GUEST_IA32_CR40x6804, &cr4)) {
8208 DPRINTF("(error)\n");
8209 return;
8210 }
8211
8212 /* If CR4.PCIDE = 0, interpret CR3.PWT and CR3.PCD */
8213 if ((cr4 & CR4_PCIDE0x00020000) == 0) {
8214 DPRINTF("(");
8215 for (i = 0 ; i < nitems(cr3_info)(sizeof((cr3_info)) / sizeof((cr3_info)[0])) ; i++)
8216 if (cr3 & cr3_info[i].vrdi_bit)
8217 DPRINTF("%s", cr3_info[i].vrdi_present);
8218 else
8219 DPRINTF("%s", cr3_info[i].vrdi_absent);
8220
8221 DPRINTF(")\n");
8222 } else {
8223 DPRINTF("(pcid=0x%llx)\n", cr3 & 0xFFF);
8224 }
8225}
8226
8227void
8228vmm_decode_cr4(uint64_t cr4)
8229{
8230 struct vmm_reg_debug_info cr4_info[19] = {
8231 { CR4_PKE0x00400000, "PKE ", "pke "},
8232 { CR4_SMAP0x00200000, "SMAP ", "smap "},
8233 { CR4_SMEP0x00100000, "SMEP ", "smep "},
8234 { CR4_OSXSAVE0x00040000, "OSXSAVE ", "osxsave "},
8235 { CR4_PCIDE0x00020000, "PCIDE ", "pcide "},
8236 { CR4_FSGSBASE0x00010000, "FSGSBASE ", "fsgsbase "},
8237 { CR4_SMXE0x00004000, "SMXE ", "smxe "},
8238 { CR4_VMXE0x00002000, "VMXE ", "vmxe "},
8239 { CR4_OSXMMEXCPT0x00000400, "OSXMMEXCPT ", "osxmmexcpt "},
8240 { CR4_OSFXSR0x00000200, "OSFXSR ", "osfxsr "},
8241 { CR4_PCE0x00000100, "PCE ", "pce "},
8242 { CR4_PGE0x00000080, "PGE ", "pge "},
8243 { CR4_MCE0x00000040, "MCE ", "mce "},
8244 { CR4_PAE0x00000020, "PAE ", "pae "},
8245 { CR4_PSE0x00000010, "PSE ", "pse "},
8246 { CR4_DE0x00000008, "DE ", "de "},
8247 { CR4_TSD0x00000004, "TSD ", "tsd "},
8248 { CR4_PVI0x00000002, "PVI ", "pvi "},
8249 { CR4_VME0x00000001, "VME", "vme"}
8250 };
8251
8252 uint8_t i;
8253
8254 DPRINTF("(");
8255 for (i = 0; i < nitems(cr4_info)(sizeof((cr4_info)) / sizeof((cr4_info)[0])); i++)
8256 if (cr4 & cr4_info[i].vrdi_bit)
8257 DPRINTF("%s", cr4_info[i].vrdi_present);
8258 else
8259 DPRINTF("%s", cr4_info[i].vrdi_absent);
8260
8261 DPRINTF(")\n");
8262}
8263
8264void
8265vmm_decode_apicbase_msr_value(uint64_t apicbase)
8266{
8267 struct vmm_reg_debug_info apicbase_info[3] = {
8268 { APICBASE_BSP0x100, "BSP ", "bsp "},
8269 { APICBASE_ENABLE_X2APIC0x400, "X2APIC ", "x2apic "},
8270 { APICBASE_GLOBAL_ENABLE0x800, "GLB_EN", "glb_en"}
8271 };
8272
8273 uint8_t i;
8274
8275 DPRINTF("(");
8276 for (i = 0; i < nitems(apicbase_info)(sizeof((apicbase_info)) / sizeof((apicbase_info)[0])); i++)
8277 if (apicbase & apicbase_info[i].vrdi_bit)
8278 DPRINTF("%s", apicbase_info[i].vrdi_present);
8279 else
8280 DPRINTF("%s", apicbase_info[i].vrdi_absent);
8281
8282 DPRINTF(")\n");
8283}
8284
8285void
8286vmm_decode_ia32_fc_value(uint64_t fcr)
8287{
8288 struct vmm_reg_debug_info fcr_info[4] = {
8289 { IA32_FEATURE_CONTROL_LOCK0x01, "LOCK ", "lock "},
8290 { IA32_FEATURE_CONTROL_SMX_EN0x02, "SMX ", "smx "},
8291 { IA32_FEATURE_CONTROL_VMX_EN0x04, "VMX ", "vmx "},
8292 { IA32_FEATURE_CONTROL_SENTER_EN(1ULL << 15), "SENTER ", "senter "}
8293 };
8294
8295 uint8_t i;
8296
8297 DPRINTF("(");
8298 for (i = 0; i < nitems(fcr_info)(sizeof((fcr_info)) / sizeof((fcr_info)[0])); i++)
8299 if (fcr & fcr_info[i].vrdi_bit)
8300 DPRINTF("%s", fcr_info[i].vrdi_present);
8301 else
8302 DPRINTF("%s", fcr_info[i].vrdi_absent);
8303
8304 if (fcr & IA32_FEATURE_CONTROL_SENTER_EN(1ULL << 15))
8305 DPRINTF(" [SENTER param = 0x%llx]",
8306 (fcr & IA32_FEATURE_CONTROL_SENTER_PARAM_MASK) >> 8);
8307
8308 DPRINTF(")\n");
8309}
8310
8311void
8312vmm_decode_mtrrcap_value(uint64_t val)
8313{
8314 struct vmm_reg_debug_info mtrrcap_info[3] = {
8315 { MTRRcap_FIXED0x100, "FIXED ", "fixed "},
8316 { MTRRcap_WC0x400, "WC ", "wc "},
8317 { MTRRcap_SMRR0x800, "SMRR ", "smrr "}
8318 };
8319
8320 uint8_t i;
8321
8322 DPRINTF("(");
8323 for (i = 0; i < nitems(mtrrcap_info)(sizeof((mtrrcap_info)) / sizeof((mtrrcap_info)[0])); i++)
8324 if (val & mtrrcap_info[i].vrdi_bit)
8325 DPRINTF("%s", mtrrcap_info[i].vrdi_present);
8326 else
8327 DPRINTF("%s", mtrrcap_info[i].vrdi_absent);
8328
8329 if (val & MTRRcap_FIXED0x100)
8330 DPRINTF(" [nr fixed ranges = 0x%llx]",
8331 (val & 0xff));
8332
8333 DPRINTF(")\n");
8334}
8335
8336void
8337vmm_decode_perf_status_value(uint64_t val)
8338{
8339 DPRINTF("(pstate ratio = 0x%llx)\n", (val & 0xffff));
8340}
8341
8342void vmm_decode_perf_ctl_value(uint64_t val)
8343{
8344 DPRINTF("(%s ", (val & PERF_CTL_TURBO) ? "TURBO" : "turbo");
8345 DPRINTF("pstate req = 0x%llx)\n", (val & 0xfffF));
8346}
8347
8348void
8349vmm_decode_mtrrdeftype_value(uint64_t mtrrdeftype)
8350{
8351 struct vmm_reg_debug_info mtrrdeftype_info[2] = {
8352 { MTRRdefType_FIXED_ENABLE0x400, "FIXED ", "fixed "},
8353 { MTRRdefType_ENABLE0x800, "ENABLED ", "enabled "},
8354 };
8355
8356 uint8_t i;
8357 int type;
8358
8359 DPRINTF("(");
8360 for (i = 0; i < nitems(mtrrdeftype_info)(sizeof((mtrrdeftype_info)) / sizeof((mtrrdeftype_info)[0])); i++)
8361 if (mtrrdeftype & mtrrdeftype_info[i].vrdi_bit)
8362 DPRINTF("%s", mtrrdeftype_info[i].vrdi_present);
8363 else
8364 DPRINTF("%s", mtrrdeftype_info[i].vrdi_absent);
8365
8366 DPRINTF("type = ");
8367 type = mtrr2mrt(mtrrdeftype & 0xff);
8368 switch (type) {
8369 case MDF_UNCACHEABLE(1<<0): DPRINTF("UC"); break;
8370 case MDF_WRITECOMBINE(1<<1): DPRINTF("WC"); break;
8371 case MDF_WRITETHROUGH(1<<2): DPRINTF("WT"); break;
8372 case MDF_WRITEPROTECT(1<<4): DPRINTF("RO"); break;
8373 case MDF_WRITEBACK(1<<3): DPRINTF("WB"); break;
8374 case MDF_UNKNOWN(1<<5):
8375 default:
8376 DPRINTF("??");
8377 break;
8378 }
8379
8380 DPRINTF(")\n");
8381}
8382
8383void
8384vmm_decode_efer_value(uint64_t efer)
8385{
8386 struct vmm_reg_debug_info efer_info[4] = {
8387 { EFER_SCE0x00000001, "SCE ", "sce "},
8388 { EFER_LME0x00000100, "LME ", "lme "},
8389 { EFER_LMA0x00000400, "LMA ", "lma "},
8390 { EFER_NXE0x00000800, "NXE", "nxe"},
8391 };
8392
8393 uint8_t i;
8394
8395 DPRINTF("(");
8396 for (i = 0; i < nitems(efer_info)(sizeof((efer_info)) / sizeof((efer_info)[0])); i++)
8397 if (efer & efer_info[i].vrdi_bit)
8398 DPRINTF("%s", efer_info[i].vrdi_present);
8399 else
8400 DPRINTF("%s", efer_info[i].vrdi_absent);
8401
8402 DPRINTF(")\n");
8403}
8404
8405void
8406vmm_decode_msr_value(uint64_t msr, uint64_t val)
8407{
8408 switch (msr) {
8409 case MSR_APICBASE0x01b: vmm_decode_apicbase_msr_value(val); break;
8410 case MSR_IA32_FEATURE_CONTROL0x03a: vmm_decode_ia32_fc_value(val); break;
8411 case MSR_MTRRcap0x0fe: vmm_decode_mtrrcap_value(val); break;
8412 case MSR_PERF_STATUS0x198: vmm_decode_perf_status_value(val); break;
8413 case MSR_PERF_CTL0x199: vmm_decode_perf_ctl_value(val); break;
8414 case MSR_MTRRdefType0x2ff: vmm_decode_mtrrdeftype_value(val); break;
8415 case MSR_EFER0xc0000080: vmm_decode_efer_value(val); break;
8416 case MSR_MISC_ENABLE0x1a0: vmm_decode_misc_enable_value(val); break;
8417 default: DPRINTF("\n");
8418 }
8419}
8420
8421void
8422vmm_decode_rflags(uint64_t rflags)
8423{
8424 struct vmm_reg_debug_info rflags_info[16] = {
8425 { PSL_C0x00000001, "CF ", "cf "},
8426 { PSL_PF0x00000004, "PF ", "pf "},
8427 { PSL_AF0x00000010, "AF ", "af "},
8428 { PSL_Z0x00000040, "ZF ", "zf "},
8429 { PSL_N0x00000080, "SF ", "sf "}, /* sign flag */
8430 { PSL_T0x00000100, "TF ", "tf "},
8431 { PSL_I0x00000200, "IF ", "if "},
8432 { PSL_D0x00000400, "DF ", "df "},
8433 { PSL_V0x00000800, "OF ", "of "}, /* overflow flag */
8434 { PSL_NT0x00004000, "NT ", "nt "},
8435 { PSL_RF0x00010000, "RF ", "rf "},
8436 { PSL_VM0x00020000, "VM ", "vm "},
8437 { PSL_AC0x00040000, "AC ", "ac "},
8438 { PSL_VIF0x00080000, "VIF ", "vif "},
8439 { PSL_VIP0x00100000, "VIP ", "vip "},
8440 { PSL_ID0x00200000, "ID ", "id "},
8441 };
8442
8443 uint8_t i, iopl;
8444
8445 DPRINTF("(");
8446 for (i = 0; i < nitems(rflags_info)(sizeof((rflags_info)) / sizeof((rflags_info)[0])); i++)
8447 if (rflags & rflags_info[i].vrdi_bit)
8448 DPRINTF("%s", rflags_info[i].vrdi_present);
8449 else
8450 DPRINTF("%s", rflags_info[i].vrdi_absent);
8451
8452 iopl = (rflags & PSL_IOPL0x00003000) >> 12;
8453 DPRINTF("IOPL=%d", iopl);
8454
8455 DPRINTF(")\n");
8456}
8457
8458void
8459vmm_decode_misc_enable_value(uint64_t misc)
8460{
8461 struct vmm_reg_debug_info misc_info[10] = {
8462 { MISC_ENABLE_FAST_STRINGS(1 << 0), "FSE ", "fse "},
8463 { MISC_ENABLE_TCC(1 << 3), "TCC ", "tcc "},
8464 { MISC_ENABLE_PERF_MON_AVAILABLE(1 << 7), "PERF ", "perf "},
8465 { MISC_ENABLE_BTS_UNAVAILABLE(1 << 11), "BTSU ", "btsu "},
8466 { MISC_ENABLE_PEBS_UNAVAILABLE(1 << 12), "PEBSU ", "pebsu "},
8467 { MISC_ENABLE_EIST_ENABLED(1 << 16), "EIST ", "eist "},
8468 { MISC_ENABLE_ENABLE_MONITOR_FSM(1 << 18), "MFSM ", "mfsm "},
8469 { MISC_ENABLE_LIMIT_CPUID_MAXVAL(1 << 22), "CMAX ", "cmax "},
8470 { MISC_ENABLE_xTPR_MESSAGE_DISABLE(1 << 23), "xTPRD ", "xtprd "},
8471 { MISC_ENABLE_XD_BIT_DISABLE(1 << 2), "NXD", "nxd"},
8472 };
8473
8474 uint8_t i;
8475
8476 DPRINTF("(");
8477 for (i = 0; i < nitems(misc_info)(sizeof((misc_info)) / sizeof((misc_info)[0])); i++)
8478 if (misc & misc_info[i].vrdi_bit)
8479 DPRINTF("%s", misc_info[i].vrdi_present);
8480 else
8481 DPRINTF("%s", misc_info[i].vrdi_absent);
8482
8483 DPRINTF(")\n");
8484}
8485
8486const char *
8487vmm_decode_cpu_mode(struct vcpu *vcpu)
8488{
8489 int mode = vmm_get_guest_cpu_mode(vcpu);
8490
8491 switch (mode) {
8492 case VMM_CPU_MODE_REAL: return "real";
8493 case VMM_CPU_MODE_PROT: return "16 bit protected";
8494 case VMM_CPU_MODE_PROT32: return "32 bit protected";
8495 case VMM_CPU_MODE_COMPAT: return "compatibility";
8496 case VMM_CPU_MODE_LONG: return "long";
8497 default: return "unknown";
8498 }
8499}
8500#endif /* VMM_DEBUG */