Bug Summary

File:arch/amd64/amd64/vmm.c
Warning:line 7015, column 15
Dereference of undefined pointer value

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name vmm.c -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -ffreestanding -mcmodel=kernel -target-cpu x86-64 -target-feature +retpoline-indirect-calls -target-feature +retpoline-indirect-branches -target-feature -sse2 -target-feature -sse -target-feature -3dnow -target-feature -mmx -target-feature +save-args -disable-red-zone -no-implicit-float -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -nostdsysteminc -nobuiltininc -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/sys -I /usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -I /usr/src/sys/arch -I /usr/src/sys/dev/pci/drm/include -I /usr/src/sys/dev/pci/drm/include/uapi -I /usr/src/sys/dev/pci/drm/amd/include/asic_reg -I /usr/src/sys/dev/pci/drm/amd/include -I /usr/src/sys/dev/pci/drm/amd/amdgpu -I /usr/src/sys/dev/pci/drm/amd/display -I /usr/src/sys/dev/pci/drm/amd/display/include -I /usr/src/sys/dev/pci/drm/amd/display/dc -I /usr/src/sys/dev/pci/drm/amd/display/amdgpu_dm -I /usr/src/sys/dev/pci/drm/amd/pm/inc -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu11 -I /usr/src/sys/dev/pci/drm/amd/pm/swsmu/smu12 -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/hwmgr -I /usr/src/sys/dev/pci/drm/amd/pm/powerplay/smumgr -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc -I /usr/src/sys/dev/pci/drm/amd/display/dc/inc/hw -I /usr/src/sys/dev/pci/drm/amd/display/dc/clk_mgr -I /usr/src/sys/dev/pci/drm/amd/display/modules/inc -I /usr/src/sys/dev/pci/drm/amd/display/modules/hdcp -I /usr/src/sys/dev/pci/drm/amd/display/dmub/inc -I /usr/src/sys/dev/pci/drm/i915 -D DDB -D DIAGNOSTIC -D KTRACE -D ACCOUNTING -D KMEMSTATS -D PTRACE -D POOL_DEBUG -D CRYPTO -D SYSVMSG -D SYSVSEM -D SYSVSHM -D UVM_SWAP_ENCRYPT -D FFS -D FFS2 -D FFS_SOFTUPDATES -D UFS_DIRHASH -D QUOTA -D EXT2FS -D MFS -D NFSCLIENT -D NFSSERVER -D CD9660 -D UDF -D MSDOSFS -D FIFO -D FUSE -D SOCKET_SPLICE -D TCP_ECN -D TCP_SIGNATURE -D INET6 -D IPSEC -D PPP_BSDCOMP -D PPP_DEFLATE -D PIPEX -D MROUTING -D MPLS -D BOOT_CONFIG -D USER_PCICONF -D APERTURE -D MTRR -D NTFS -D HIBERNATE -D PCIVERBOSE -D USBVERBOSE -D WSDISPLAY_COMPAT_USL -D WSDISPLAY_COMPAT_RAWKBD -D WSDISPLAY_DEFAULTSCREENS=6 -D X86EMU -D ONEWIREVERBOSE -D MULTIPROCESSOR -D MAXUSERS=80 -D _KERNEL -D CONFIG_DRM_AMD_DC_DCN3_0 -O2 -Wno-pointer-sign -Wno-address-of-packed-member -Wno-constant-conversion -Wno-unused-but-set-variable -Wno-gnu-folding-constant -fdebug-compilation-dir=/usr/src/sys/arch/amd64/compile/GENERIC.MP/obj -ferror-limit 19 -fwrapv -D_RET_PROTECTOR -ret-protector -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -o /usr/obj/sys/arch/amd64/compile/GENERIC.MP/scan-build/2022-01-12-131800-47421-1 -x c /usr/src/sys/arch/amd64/amd64/vmm.c
1/* $OpenBSD: vmm.c,v 1.301 2022/01/11 20:34:22 tobhe Exp $ */
2/*
3 * Copyright (c) 2014 Mike Larkin <mlarkin@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18#include <sys/param.h>
19#include <sys/systm.h>
20#include <sys/signalvar.h>
21#include <sys/malloc.h>
22#include <sys/device.h>
23#include <sys/pool.h>
24#include <sys/proc.h>
25#include <sys/user.h>
26#include <sys/ioctl.h>
27#include <sys/queue.h>
28#include <sys/rwlock.h>
29#include <sys/pledge.h>
30#include <sys/memrange.h>
31#include <sys/tracepoint.h>
32
33#include <uvm/uvm_extern.h>
34
35#include <machine/fpu.h>
36#include <machine/pmap.h>
37#include <machine/biosvar.h>
38#include <machine/segments.h>
39#include <machine/cpufunc.h>
40#include <machine/vmmvar.h>
41
42#include <dev/isa/isareg.h>
43#include <dev/pv/pvreg.h>
44
45/* #define VMM_DEBUG */
46
47void *l1tf_flush_region;
48
49#ifdef VMM_DEBUG
50#define DPRINTF(x...) do { printf(x); } while(0)
51#else
52#define DPRINTF(x...)
53#endif /* VMM_DEBUG */
54
55#define DEVNAME(s)((s)->sc_dev.dv_xname) ((s)->sc_dev.dv_xname)
56
57#define CTRL_DUMP(x,y,z)printf(" %s: Can set:%s Can clear:%s\n", "z" , vcpu_vmx_check_cap
(x, IA32_VMX_y_CTLS, IA32_VMX_z, 1) ? "Yes" : "No", vcpu_vmx_check_cap
(x, IA32_VMX_y_CTLS, IA32_VMX_z, 0) ? "Yes" : "No");
printf(" %s: Can set:%s Can clear:%s\n", #z , \
58 vcpu_vmx_check_cap(x, IA32_VMX_##y ##_CTLS, \
59 IA32_VMX_##z, 1) ? "Yes" : "No", \
60 vcpu_vmx_check_cap(x, IA32_VMX_##y ##_CTLS, \
61 IA32_VMX_##z, 0) ? "Yes" : "No");
62
63#define VMX_EXIT_INFO_HAVE_RIP0x1 0x1
64#define VMX_EXIT_INFO_HAVE_REASON0x2 0x2
65#define VMX_EXIT_INFO_COMPLETE(0x1 | 0x2) \
66 (VMX_EXIT_INFO_HAVE_RIP0x1 | VMX_EXIT_INFO_HAVE_REASON0x2)
67
68struct vm {
69 struct vmspace *vm_vmspace;
70 vm_map_t vm_map;
71 uint32_t vm_id;
72 pid_t vm_creator_pid;
73 size_t vm_nmemranges;
74 size_t vm_memory_size;
75 char vm_name[VMM_MAX_NAME_LEN64];
76 struct vm_mem_range vm_memranges[VMM_MAX_MEM_RANGES16];
77
78 struct vcpu_head vm_vcpu_list;
79 uint32_t vm_vcpu_ct;
80 u_int vm_vcpus_running;
81 struct rwlock vm_vcpu_lock;
82
83 SLIST_ENTRY(vm)struct { struct vm *sle_next; } vm_link;
84};
85
86SLIST_HEAD(vmlist_head, vm)struct vmlist_head { struct vm *slh_first; };
87
88struct vmm_softc {
89 struct device sc_dev;
90
91 /* Capabilities */
92 uint32_t nr_vmx_cpus;
93 uint32_t nr_svm_cpus;
94 uint32_t nr_rvi_cpus;
95 uint32_t nr_ept_cpus;
96
97 /* Managed VMs */
98 struct vmlist_head vm_list;
99
100 int mode;
101
102 size_t vcpu_ct;
103 size_t vcpu_max;
104
105 struct rwlock vm_lock;
106 size_t vm_ct; /* number of in-memory VMs */
107 size_t vm_idx; /* next unique VM index */
108
109 struct rwlock vpid_lock;
110 uint16_t max_vpid;
111 uint8_t vpids[512]; /* bitmap of used VPID/ASIDs */
112};
113
114void vmx_dump_vmcs_field(uint16_t, const char *);
115int vmm_enabled(void);
116int vmm_probe(struct device *, void *, void *);
117void vmm_attach(struct device *, struct device *, void *);
118int vmmopen(dev_t, int, int, struct proc *);
119int vmmioctl(dev_t, u_long, caddr_t, int, struct proc *);
120int vmmclose(dev_t, int, int, struct proc *);
121int vmm_start(void);
122int vmm_stop(void);
123size_t vm_create_check_mem_ranges(struct vm_create_params *);
124int vm_create(struct vm_create_params *, struct proc *);
125int vm_run(struct vm_run_params *);
126int vm_terminate(struct vm_terminate_params *);
127int vm_get_info(struct vm_info_params *);
128int vm_resetcpu(struct vm_resetcpu_params *);
129int vm_intr_pending(struct vm_intr_params *);
130int vm_rwregs(struct vm_rwregs_params *, int);
131int vm_mprotect_ept(struct vm_mprotect_ept_params *);
132int vm_rwvmparams(struct vm_rwvmparams_params *, int);
133int vm_find(uint32_t, struct vm **);
134int vcpu_readregs_vmx(struct vcpu *, uint64_t, struct vcpu_reg_state *);
135int vcpu_readregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *);
136int vcpu_writeregs_vmx(struct vcpu *, uint64_t, int, struct vcpu_reg_state *);
137int vcpu_writeregs_svm(struct vcpu *, uint64_t, struct vcpu_reg_state *);
138int vcpu_reset_regs(struct vcpu *, struct vcpu_reg_state *);
139int vcpu_reset_regs_vmx(struct vcpu *, struct vcpu_reg_state *);
140int vcpu_reset_regs_svm(struct vcpu *, struct vcpu_reg_state *);
141int vcpu_reload_vmcs_vmx(struct vcpu *);
142int vcpu_init(struct vcpu *);
143int vcpu_init_vmx(struct vcpu *);
144int vcpu_init_svm(struct vcpu *);
145int vcpu_must_stop(struct vcpu *);
146int vcpu_run_vmx(struct vcpu *, struct vm_run_params *);
147int vcpu_run_svm(struct vcpu *, struct vm_run_params *);
148void vcpu_deinit(struct vcpu *);
149void vcpu_deinit_vmx(struct vcpu *);
150void vcpu_deinit_svm(struct vcpu *);
151int vm_impl_init(struct vm *, struct proc *);
152int vm_impl_init_vmx(struct vm *, struct proc *);
153int vm_impl_init_svm(struct vm *, struct proc *);
154void vm_impl_deinit(struct vm *);
155void vm_impl_deinit_vmx(struct vm *);
156void vm_impl_deinit_svm(struct vm *);
157void vm_teardown(struct vm *);
158int vcpu_vmx_check_cap(struct vcpu *, uint32_t, uint32_t, int);
159int vcpu_vmx_compute_ctrl(uint64_t, uint16_t, uint32_t, uint32_t, uint32_t *);
160int vmx_get_exit_info(uint64_t *, uint64_t *);
161int vmx_load_pdptes(struct vcpu *);
162int vmx_handle_exit(struct vcpu *);
163int svm_handle_exit(struct vcpu *);
164int svm_handle_msr(struct vcpu *);
165int vmm_handle_xsetbv(struct vcpu *, uint64_t *);
166int vmx_handle_xsetbv(struct vcpu *);
167int svm_handle_xsetbv(struct vcpu *);
168int vmm_handle_cpuid(struct vcpu *);
169int vmx_handle_rdmsr(struct vcpu *);
170int vmx_handle_wrmsr(struct vcpu *);
171int vmx_handle_cr0_write(struct vcpu *, uint64_t);
172int vmx_handle_cr4_write(struct vcpu *, uint64_t);
173int vmx_handle_cr(struct vcpu *);
174int svm_handle_inout(struct vcpu *);
175int vmx_handle_inout(struct vcpu *);
176int svm_handle_hlt(struct vcpu *);
177int vmx_handle_hlt(struct vcpu *);
178int vmm_inject_ud(struct vcpu *);
179int vmm_inject_gp(struct vcpu *);
180int vmm_inject_db(struct vcpu *);
181void vmx_handle_intr(struct vcpu *);
182void vmx_handle_intwin(struct vcpu *);
183void vmx_handle_misc_enable_msr(struct vcpu *);
184int vmm_get_guest_memtype(struct vm *, paddr_t);
185int vmx_get_guest_faulttype(void);
186int svm_get_guest_faulttype(struct vmcb *);
187int vmx_get_exit_qualification(uint64_t *);
188int vmm_get_guest_cpu_cpl(struct vcpu *);
189int vmm_get_guest_cpu_mode(struct vcpu *);
190int svm_fault_page(struct vcpu *, paddr_t);
191int vmx_fault_page(struct vcpu *, paddr_t);
192int vmx_handle_np_fault(struct vcpu *);
193int svm_handle_np_fault(struct vcpu *);
194int vmx_mprotect_ept(vm_map_t, paddr_t, paddr_t, int);
195pt_entry_t *vmx_pmap_find_pte_ept(pmap_t, paddr_t);
196int vmm_alloc_vpid(uint16_t *);
197void vmm_free_vpid(uint16_t);
198const char *vcpu_state_decode(u_int);
199const char *vmx_exit_reason_decode(uint32_t);
200const char *svm_exit_reason_decode(uint32_t);
201const char *vmx_instruction_error_decode(uint32_t);
202void svm_setmsrbr(struct vcpu *, uint32_t);
203void svm_setmsrbw(struct vcpu *, uint32_t);
204void svm_setmsrbrw(struct vcpu *, uint32_t);
205void vmx_setmsrbr(struct vcpu *, uint32_t);
206void vmx_setmsrbw(struct vcpu *, uint32_t);
207void vmx_setmsrbrw(struct vcpu *, uint32_t);
208void svm_set_clean(struct vcpu *, uint32_t);
209void svm_set_dirty(struct vcpu *, uint32_t);
210
211int vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size);
212void vmm_init_pvclock(struct vcpu *, paddr_t);
213int vmm_update_pvclock(struct vcpu *);
214int vmm_pat_is_valid(uint64_t);
215
216#ifdef MULTIPROCESSOR1
217static int vmx_remote_vmclear(struct cpu_info*, struct vcpu *);
218#endif
219
220#ifdef VMM_DEBUG
221void dump_vcpu(struct vcpu *);
222void vmx_vcpu_dump_regs(struct vcpu *);
223void vmx_dump_vmcs(struct vcpu *);
224const char *msr_name_decode(uint32_t);
225void vmm_segment_desc_decode(uint64_t);
226void vmm_decode_cr0(uint64_t);
227void vmm_decode_cr3(uint64_t);
228void vmm_decode_cr4(uint64_t);
229void vmm_decode_msr_value(uint64_t, uint64_t);
230void vmm_decode_apicbase_msr_value(uint64_t);
231void vmm_decode_ia32_fc_value(uint64_t);
232void vmm_decode_mtrrcap_value(uint64_t);
233void vmm_decode_perf_status_value(uint64_t);
234void vmm_decode_perf_ctl_value(uint64_t);
235void vmm_decode_mtrrdeftype_value(uint64_t);
236void vmm_decode_efer_value(uint64_t);
237void vmm_decode_rflags(uint64_t);
238void vmm_decode_misc_enable_value(uint64_t);
239const char *vmm_decode_cpu_mode(struct vcpu *);
240
241extern int mtrr2mrt(int);
242
243struct vmm_reg_debug_info {
244 uint64_t vrdi_bit;
245 const char *vrdi_present;
246 const char *vrdi_absent;
247};
248#endif /* VMM_DEBUG */
249
250extern uint64_t tsc_frequency;
251extern int tsc_is_invariant;
252
253const char *vmm_hv_signature = VMM_HV_SIGNATURE"OpenBSDVMM58";
254
255const struct kmem_pa_mode vmm_kp_contig = {
256 .kp_constraint = &no_constraint,
257 .kp_maxseg = 1,
258 .kp_align = 4096,
259 .kp_zero = 1,
260};
261
262struct cfdriver vmm_cd = {
263 NULL((void *)0), "vmm", DV_DULL, CD_SKIPHIBERNATE2
264};
265
266const struct cfattach vmm_ca = {
267 sizeof(struct vmm_softc), vmm_probe, vmm_attach, NULL((void *)0), NULL((void *)0)
268};
269
270/*
271 * Helper struct to easily get the VMCS field IDs needed in vmread/vmwrite
272 * to access the individual fields of the guest segment registers. This
273 * struct is indexed by VCPU_REGS_* id.
274 */
275const struct {
276 uint64_t selid;
277 uint64_t limitid;
278 uint64_t arid;
279 uint64_t baseid;
280} vmm_vmx_sreg_vmcs_fields[] = {
281 { VMCS_GUEST_IA32_CS_SEL0x0802, VMCS_GUEST_IA32_CS_LIMIT0x4802,
282 VMCS_GUEST_IA32_CS_AR0x4816, VMCS_GUEST_IA32_CS_BASE0x6808 },
283 { VMCS_GUEST_IA32_DS_SEL0x0806, VMCS_GUEST_IA32_DS_LIMIT0x4806,
284 VMCS_GUEST_IA32_DS_AR0x481A, VMCS_GUEST_IA32_DS_BASE0x680C },
285 { VMCS_GUEST_IA32_ES_SEL0x0800, VMCS_GUEST_IA32_ES_LIMIT0x4800,
286 VMCS_GUEST_IA32_ES_AR0x4814, VMCS_GUEST_IA32_ES_BASE0x6806 },
287 { VMCS_GUEST_IA32_FS_SEL0x0808, VMCS_GUEST_IA32_FS_LIMIT0x4808,
288 VMCS_GUEST_IA32_FS_AR0x481C, VMCS_GUEST_IA32_FS_BASE0x680E },
289 { VMCS_GUEST_IA32_GS_SEL0x080A, VMCS_GUEST_IA32_GS_LIMIT0x480A,
290 VMCS_GUEST_IA32_GS_AR0x481E, VMCS_GUEST_IA32_GS_BASE0x6810 },
291 { VMCS_GUEST_IA32_SS_SEL0x0804, VMCS_GUEST_IA32_SS_LIMIT0x4804,
292 VMCS_GUEST_IA32_SS_AR0x4818, VMCS_GUEST_IA32_SS_BASE0x680A },
293 { VMCS_GUEST_IA32_LDTR_SEL0x080C, VMCS_GUEST_IA32_LDTR_LIMIT0x480C,
294 VMCS_GUEST_IA32_LDTR_AR0x4820, VMCS_GUEST_IA32_LDTR_BASE0x6812 },
295 { VMCS_GUEST_IA32_TR_SEL0x080E, VMCS_GUEST_IA32_TR_LIMIT0x480E,
296 VMCS_GUEST_IA32_TR_AR0x4822, VMCS_GUEST_IA32_TR_BASE0x6814 }
297};
298
299/* Pools for VMs and VCPUs */
300struct pool vm_pool;
301struct pool vcpu_pool;
302
303struct vmm_softc *vmm_softc;
304
305/* IDT information used when populating host state area */
306extern vaddr_t idt_vaddr;
307extern struct gate_descriptor *idt;
308
309/* Constants used in "CR access exit" */
310#define CR_WRITE0 0
311#define CR_READ1 1
312#define CR_CLTS2 2
313#define CR_LMSW3 3
314
315/*
316 * vmm_enabled
317 *
318 * Checks if we have at least one CPU with either VMX or SVM.
319 * Returns 1 if we have at least one of either type, but not both, 0 otherwise.
320 */
321int
322vmm_enabled(void)
323{
324 struct cpu_info *ci;
325 CPU_INFO_ITERATORint cii;
326 int found_vmx = 0, found_svm = 0;
327
328 /* Check if we have at least one CPU with either VMX or SVM */
329 CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci->
ci_next)
{
330 if (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0))
331 found_vmx = 1;
332 if (ci->ci_vmm_flags & CI_VMM_SVM(1 << 1))
333 found_svm = 1;
334 }
335
336 /* Don't support both SVM and VMX at the same time */
337 if (found_vmx && found_svm)
338 return (0);
339
340 if (found_vmx || found_svm)
341 return 1;
342
343 return 0;
344}
345
346int
347vmm_probe(struct device *parent, void *match, void *aux)
348{
349 const char **busname = (const char **)aux;
350
351 if (strcmp(*busname, vmm_cd.cd_name) != 0)
352 return (0);
353 return (1);
354}
355
356/*
357 * vmm_attach
358 *
359 * Calculates how many of each type of CPU we have, prints this into dmesg
360 * during attach. Initializes various locks, pools, and list structures for the
361 * VMM.
362 */
363void
364vmm_attach(struct device *parent, struct device *self, void *aux)
365{
366 struct vmm_softc *sc = (struct vmm_softc *)self;
367 struct cpu_info *ci;
368 CPU_INFO_ITERATORint cii;
369
370 sc->nr_vmx_cpus = 0;
371 sc->nr_svm_cpus = 0;
372 sc->nr_rvi_cpus = 0;
373 sc->nr_ept_cpus = 0;
374 sc->vcpu_ct = 0;
375 sc->vm_ct = 0;
376 sc->vm_idx = 0;
377
378 /* Calculate CPU features */
379 CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci->
ci_next)
{
380 if (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0))
381 sc->nr_vmx_cpus++;
382 if (ci->ci_vmm_flags & CI_VMM_SVM(1 << 1))
383 sc->nr_svm_cpus++;
384 if (ci->ci_vmm_flags & CI_VMM_RVI(1 << 2))
385 sc->nr_rvi_cpus++;
386 if (ci->ci_vmm_flags & CI_VMM_EPT(1 << 3))
387 sc->nr_ept_cpus++;
388 }
389
390 SLIST_INIT(&sc->vm_list){ ((&sc->vm_list)->slh_first) = ((void *)0); };
391 rw_init(&sc->vm_lock, "vm_list")_rw_init_flags(&sc->vm_lock, "vm_list", 0, ((void *)0)
)
;
392
393 if (sc->nr_ept_cpus) {
394 printf(": VMX/EPT");
395 sc->mode = VMM_MODE_EPT;
396 } else if (sc->nr_vmx_cpus) {
397 printf(": VMX");
398 sc->mode = VMM_MODE_VMX;
399 } else if (sc->nr_rvi_cpus) {
400 printf(": SVM/RVI");
401 sc->mode = VMM_MODE_RVI;
402 } else if (sc->nr_svm_cpus) {
403 printf(": SVM");
404 sc->mode = VMM_MODE_SVM;
405 } else {
406 printf(": unknown");
407 sc->mode = VMM_MODE_UNKNOWN;
408 }
409
410 if (sc->mode == VMM_MODE_EPT || sc->mode == VMM_MODE_VMX) {
411 if (!(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr)) {
412 l1tf_flush_region = km_alloc(VMX_L1D_FLUSH_SIZE(64 * 1024),
413 &kv_any, &vmm_kp_contig, &kd_waitok);
414 if (!l1tf_flush_region) {
415 printf(" (failing, no memory)");
416 sc->mode = VMM_MODE_UNKNOWN;
417 } else {
418 printf(" (using slow L1TF mitigation)");
419 memset(l1tf_flush_region, 0xcc,__builtin_memset((l1tf_flush_region), (0xcc), ((64 * 1024)))
420 VMX_L1D_FLUSH_SIZE)__builtin_memset((l1tf_flush_region), (0xcc), ((64 * 1024)));
421 }
422 }
423 }
424 printf("\n");
425
426 if (sc->mode == VMM_MODE_SVM || sc->mode == VMM_MODE_RVI) {
427 sc->max_vpid = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_svm.svm_max_asid;
428 } else {
429 sc->max_vpid = 0xFFF;
430 }
431
432 bzero(&sc->vpids, sizeof(sc->vpids))__builtin_bzero((&sc->vpids), (sizeof(sc->vpids)));
433 rw_init(&sc->vpid_lock, "vpid")_rw_init_flags(&sc->vpid_lock, "vpid", 0, ((void *)0));
434
435 pool_init(&vm_pool, sizeof(struct vm), 0, IPL_MPFLOOR0x9, PR_WAITOK0x0001,
436 "vmpool", NULL((void *)0));
437 pool_init(&vcpu_pool, sizeof(struct vcpu), 64, IPL_MPFLOOR0x9, PR_WAITOK0x0001,
438 "vcpupl", NULL((void *)0));
439
440 vmm_softc = sc;
441}
442
443/*
444 * vmmopen
445 *
446 * Called during open of /dev/vmm.
447 *
448 * Parameters:
449 * dev, flag, mode, p: These come from the character device and are
450 * all unused for this function
451 *
452 * Return values:
453 * ENODEV: if vmm(4) didn't attach or no supported CPUs detected
454 * 0: successful open
455 */
456int
457vmmopen(dev_t dev, int flag, int mode, struct proc *p)
458{
459 /* Don't allow open if we didn't attach */
460 if (vmm_softc == NULL((void *)0))
461 return (ENODEV19);
462
463 /* Don't allow open if we didn't detect any supported CPUs */
464 if (vmm_softc->mode != VMM_MODE_EPT && vmm_softc->mode != VMM_MODE_RVI)
465 return (ENODEV19);
466
467 return 0;
468}
469
470/*
471 * vmmioctl
472 *
473 * Main ioctl dispatch routine for /dev/vmm. Parses ioctl type and calls
474 * appropriate lower level handler routine. Returns result to ioctl caller.
475 */
476int
477vmmioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p)
478{
479 int ret;
480
481 KERNEL_UNLOCK()_kernel_unlock();
482
483 switch (cmd) {
484 case VMM_IOC_CREATE(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof
(struct vm_create_params) & 0x1fff) << 16) | ((('V'
)) << 8) | ((1)))
:
485 if ((ret = vmm_start()) != 0) {
486 vmm_stop();
487 break;
488 }
489 ret = vm_create((struct vm_create_params *)data, p);
490 break;
491 case VMM_IOC_RUN(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof
(struct vm_run_params) & 0x1fff) << 16) | ((('V')) <<
8) | ((2)))
:
492 ret = vm_run((struct vm_run_params *)data);
493 break;
494 case VMM_IOC_INFO(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof
(struct vm_info_params) & 0x1fff) << 16) | ((('V'))
<< 8) | ((3)))
:
495 ret = vm_get_info((struct vm_info_params *)data);
496 break;
497 case VMM_IOC_TERM((unsigned long)0x80000000 | ((sizeof(struct vm_terminate_params
) & 0x1fff) << 16) | ((('V')) << 8) | ((4)))
:
498 ret = vm_terminate((struct vm_terminate_params *)data);
499 break;
500 case VMM_IOC_RESETCPU((unsigned long)0x80000000 | ((sizeof(struct vm_resetcpu_params
) & 0x1fff) << 16) | ((('V')) << 8) | ((5)))
:
501 ret = vm_resetcpu((struct vm_resetcpu_params *)data);
502 break;
503 case VMM_IOC_INTR((unsigned long)0x80000000 | ((sizeof(struct vm_intr_params) &
0x1fff) << 16) | ((('V')) << 8) | ((6)))
:
504 ret = vm_intr_pending((struct vm_intr_params *)data);
505 break;
506 case VMM_IOC_READREGS(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof
(struct vm_rwregs_params) & 0x1fff) << 16) | ((('V'
)) << 8) | ((7)))
:
507 ret = vm_rwregs((struct vm_rwregs_params *)data, 0);
508 break;
509 case VMM_IOC_WRITEREGS((unsigned long)0x80000000 | ((sizeof(struct vm_rwregs_params
) & 0x1fff) << 16) | ((('V')) << 8) | ((8)))
:
510 ret = vm_rwregs((struct vm_rwregs_params *)data, 1);
511 break;
512 case VMM_IOC_MPROTECT_EPT((unsigned long)0x80000000 | ((sizeof(struct vm_mprotect_ept_params
) & 0x1fff) << 16) | ((('V')) << 8) | ((11)))
:
513 ret = vm_mprotect_ept((struct vm_mprotect_ept_params *)data);
514 break;
515 case VMM_IOC_READVMPARAMS(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof
(struct vm_rwvmparams_params) & 0x1fff) << 16) | ((
('V')) << 8) | ((9)))
:
516 ret = vm_rwvmparams((struct vm_rwvmparams_params *)data, 0);
517 break;
518 case VMM_IOC_WRITEVMPARAMS((unsigned long)0x80000000 | ((sizeof(struct vm_rwvmparams_params
) & 0x1fff) << 16) | ((('V')) << 8) | ((10)))
:
519 ret = vm_rwvmparams((struct vm_rwvmparams_params *)data, 1);
520 break;
521
522 default:
523 DPRINTF("%s: unknown ioctl code 0x%lx\n", __func__, cmd);
524 ret = ENOTTY25;
525 }
526
527 KERNEL_LOCK()_kernel_lock();
528
529 return (ret);
530}
531
532/*
533 * pledge_ioctl_vmm
534 *
535 * Restrict the allowed ioctls in a pledged process context.
536 * Is called from pledge_ioctl().
537 */
538int
539pledge_ioctl_vmm(struct proc *p, long com)
540{
541 switch (com) {
542 case VMM_IOC_CREATE(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof
(struct vm_create_params) & 0x1fff) << 16) | ((('V'
)) << 8) | ((1)))
:
543 case VMM_IOC_INFO(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof
(struct vm_info_params) & 0x1fff) << 16) | ((('V'))
<< 8) | ((3)))
:
544 /* The "parent" process in vmd forks and manages VMs */
545 if (p->p_p->ps_pledge & PLEDGE_PROC0x0000000000001000ULL)
546 return (0);
547 break;
548 case VMM_IOC_TERM((unsigned long)0x80000000 | ((sizeof(struct vm_terminate_params
) & 0x1fff) << 16) | ((('V')) << 8) | ((4)))
:
549 /* XXX VM processes should only terminate themselves */
550 case VMM_IOC_RUN(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof
(struct vm_run_params) & 0x1fff) << 16) | ((('V')) <<
8) | ((2)))
:
551 case VMM_IOC_RESETCPU((unsigned long)0x80000000 | ((sizeof(struct vm_resetcpu_params
) & 0x1fff) << 16) | ((('V')) << 8) | ((5)))
:
552 case VMM_IOC_INTR((unsigned long)0x80000000 | ((sizeof(struct vm_intr_params) &
0x1fff) << 16) | ((('V')) << 8) | ((6)))
:
553 case VMM_IOC_READREGS(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof
(struct vm_rwregs_params) & 0x1fff) << 16) | ((('V'
)) << 8) | ((7)))
:
554 case VMM_IOC_WRITEREGS((unsigned long)0x80000000 | ((sizeof(struct vm_rwregs_params
) & 0x1fff) << 16) | ((('V')) << 8) | ((8)))
:
555 case VMM_IOC_MPROTECT_EPT((unsigned long)0x80000000 | ((sizeof(struct vm_mprotect_ept_params
) & 0x1fff) << 16) | ((('V')) << 8) | ((11)))
:
556 case VMM_IOC_READVMPARAMS(((unsigned long)0x80000000|(unsigned long)0x40000000) | ((sizeof
(struct vm_rwvmparams_params) & 0x1fff) << 16) | ((
('V')) << 8) | ((9)))
:
557 case VMM_IOC_WRITEVMPARAMS((unsigned long)0x80000000 | ((sizeof(struct vm_rwvmparams_params
) & 0x1fff) << 16) | ((('V')) << 8) | ((10)))
:
558 return (0);
559 }
560
561 return (EPERM1);
562}
563
564/*
565 * vmmclose
566 *
567 * Called when /dev/vmm is closed. Presently unused.
568 */
569int
570vmmclose(dev_t dev, int flag, int mode, struct proc *p)
571{
572 return 0;
573}
574
575/*
576 * vm_find_vcpu
577 *
578 * Lookup VMM VCPU by ID number
579 *
580 * Parameters:
581 * vm: vm structure
582 * id: index id of vcpu
583 *
584 * Returns pointer to vcpu structure if successful, NULL otherwise
585 */
586static struct vcpu *
587vm_find_vcpu(struct vm *vm, uint32_t id)
588{
589 struct vcpu *vcpu;
590
591 if (vm == NULL((void *)0))
592 return NULL((void *)0);
593 rw_enter_read(&vm->vm_vcpu_lock);
594 SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link)for((vcpu) = ((&vm->vm_vcpu_list)->slh_first); (vcpu
) != ((void *)0); (vcpu) = ((vcpu)->vc_vcpu_link.sle_next)
)
{
595 if (vcpu->vc_id == id)
596 break;
597 }
598 rw_exit_read(&vm->vm_vcpu_lock);
599 return vcpu;
600}
601
602
603/*
604 * vm_resetcpu
605 *
606 * Resets the vcpu defined in 'vrp' to power-on-init register state
607 *
608 * Parameters:
609 * vrp: ioctl structure defining the vcpu to reset (see vmmvar.h)
610 *
611 * Returns 0 if successful, or various error codes on failure:
612 * ENOENT if the VM id contained in 'vrp' refers to an unknown VM or
613 * if vrp describes an unknown vcpu for this VM
614 * EBUSY if the indicated VCPU is not stopped
615 * EIO if the indicated VCPU failed to reset
616 */
617int
618vm_resetcpu(struct vm_resetcpu_params *vrp)
619{
620 struct vm *vm;
621 struct vcpu *vcpu;
622 int error;
623
624 /* Find the desired VM */
625 rw_enter_read(&vmm_softc->vm_lock);
626 error = vm_find(vrp->vrp_vm_id, &vm);
627 rw_exit_read(&vmm_softc->vm_lock);
628
629 /* Not found? exit. */
630 if (error != 0) {
631 DPRINTF("%s: vm id %u not found\n", __func__,
632 vrp->vrp_vm_id);
633 return (error);
634 }
635
636 vcpu = vm_find_vcpu(vm, vrp->vrp_vcpu_id);
637
638 if (vcpu == NULL((void *)0)) {
639 DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__,
640 vrp->vrp_vcpu_id, vrp->vrp_vm_id);
641 return (ENOENT2);
642 }
643
644 rw_enter_write(&vcpu->vc_lock);
645
646 if (vcpu->vc_state != VCPU_STATE_STOPPED) {
647 DPRINTF("%s: reset of vcpu %u on vm %u attempted "
648 "while vcpu was in state %u (%s)\n", __func__,
649 vrp->vrp_vcpu_id, vrp->vrp_vm_id, vcpu->vc_state,
650 vcpu_state_decode(vcpu->vc_state));
651
652 rw_exit_write(&vcpu->vc_lock);
653 return (EBUSY16);
654 }
655
656 DPRINTF("%s: resetting vm %d vcpu %d to power on defaults\n", __func__,
657 vm->vm_id, vcpu->vc_id);
658
659 if (vcpu_reset_regs(vcpu, &vrp->vrp_init_state)) {
660 printf("%s: failed\n", __func__);
661#ifdef VMM_DEBUG
662 dump_vcpu(vcpu);
663#endif /* VMM_DEBUG */
664 rw_exit_write(&vcpu->vc_lock);
665 return (EIO5);
666 }
667
668 rw_exit_write(&vcpu->vc_lock);
669 return (0);
670}
671
672/*
673 * vm_intr_pending
674 *
675 * IOCTL handler routine for VMM_IOC_INTR messages, sent from vmd when an
676 * interrupt is pending and needs acknowledgment
677 *
678 * Parameters:
679 * vip: Describes the vm/vcpu for which the interrupt is pending
680 *
681 * Return values:
682 * 0: if successful
683 * ENOENT: if the VM/VCPU defined by 'vip' cannot be found
684 */
685int
686vm_intr_pending(struct vm_intr_params *vip)
687{
688 struct vm *vm;
689 struct vcpu *vcpu;
690 int error;
691
692 /* Find the desired VM */
693 rw_enter_read(&vmm_softc->vm_lock);
694 error = vm_find(vip->vip_vm_id, &vm);
695
696 /* Not found? exit. */
697 if (error != 0) {
698 rw_exit_read(&vmm_softc->vm_lock);
699 return (error);
700 }
701
702 vcpu = vm_find_vcpu(vm, vip->vip_vcpu_id);
703 rw_exit_read(&vmm_softc->vm_lock);
704
705 if (vcpu == NULL((void *)0))
706 return (ENOENT2);
707
708 rw_enter_write(&vcpu->vc_lock);
709 vcpu->vc_intr = vip->vip_intr;
710 rw_exit_write(&vcpu->vc_lock);
711
712 return (0);
713}
714
715/*
716 * vm_rwvmparams
717 *
718 * IOCTL handler to read/write the current vmm params like pvclock gpa, pvclock
719 * version, etc.
720 *
721 * Parameters:
722 * vrwp: Describes the VM and VCPU to get/set the params from
723 * dir: 0 for reading, 1 for writing
724 *
725 * Return values:
726 * 0: if successful
727 * ENOENT: if the VM/VCPU defined by 'vpp' cannot be found
728 * EINVAL: if an error occurred reading the registers of the guest
729 */
730int
731vm_rwvmparams(struct vm_rwvmparams_params *vpp, int dir) {
732 struct vm *vm;
733 struct vcpu *vcpu;
734 int error;
735
736 /* Find the desired VM */
737 rw_enter_read(&vmm_softc->vm_lock);
738 error = vm_find(vpp->vpp_vm_id, &vm);
739
740 /* Not found? exit. */
741 if (error != 0) {
742 rw_exit_read(&vmm_softc->vm_lock);
743 return (error);
744 }
745
746 vcpu = vm_find_vcpu(vm, vpp->vpp_vcpu_id);
747 rw_exit_read(&vmm_softc->vm_lock);
748
749 if (vcpu == NULL((void *)0))
750 return (ENOENT2);
751
752 if (dir == 0) {
753 if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_VERSION0x2)
754 vpp->vpp_pvclock_version = vcpu->vc_pvclock_version;
755 if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_SYSTEM_GPA0x1)
756 vpp->vpp_pvclock_system_gpa = \
757 vcpu->vc_pvclock_system_gpa;
758 return (0);
759 }
760
761 if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_VERSION0x2)
762 vcpu->vc_pvclock_version = vpp->vpp_pvclock_version;
763 if (vpp->vpp_mask & VM_RWVMPARAMS_PVCLOCK_SYSTEM_GPA0x1) {
764 vmm_init_pvclock(vcpu, vpp->vpp_pvclock_system_gpa);
765 }
766 return (0);
767
768}
769
770/*
771 * vm_readregs
772 *
773 * IOCTL handler to read/write the current register values of a guest VCPU.
774 * The VCPU must not be running.
775 *
776 * Parameters:
777 * vrwp: Describes the VM and VCPU to get/set the registers from. The
778 * register values are returned here as well.
779 * dir: 0 for reading, 1 for writing
780 *
781 * Return values:
782 * 0: if successful
783 * ENOENT: if the VM/VCPU defined by 'vrwp' cannot be found
784 * EINVAL: if an error occurred accessing the registers of the guest
785 * EPERM: if the vm cannot be accessed from the calling process
786 */
787int
788vm_rwregs(struct vm_rwregs_params *vrwp, int dir)
789{
790 struct vm *vm;
791 struct vcpu *vcpu;
792 struct vcpu_reg_state *vrs = &vrwp->vrwp_regs;
793 int error, ret;
794
795 /* Find the desired VM */
796 rw_enter_read(&vmm_softc->vm_lock);
797 error = vm_find(vrwp->vrwp_vm_id, &vm);
798
799 /* Not found? exit. */
800 if (error != 0) {
801 rw_exit_read(&vmm_softc->vm_lock);
802 return (error);
803 }
804
805 vcpu = vm_find_vcpu(vm, vrwp->vrwp_vcpu_id);
806 rw_exit_read(&vmm_softc->vm_lock);
807
808 if (vcpu == NULL((void *)0))
809 return (ENOENT2);
810
811 rw_enter_write(&vcpu->vc_lock);
812 if (vmm_softc->mode == VMM_MODE_VMX ||
813 vmm_softc->mode == VMM_MODE_EPT)
814 ret = (dir == 0) ?
815 vcpu_readregs_vmx(vcpu, vrwp->vrwp_mask, vrs) :
816 vcpu_writeregs_vmx(vcpu, vrwp->vrwp_mask, 1, vrs);
817 else if (vmm_softc->mode == VMM_MODE_SVM ||
818 vmm_softc->mode == VMM_MODE_RVI)
819 ret = (dir == 0) ?
820 vcpu_readregs_svm(vcpu, vrwp->vrwp_mask, vrs) :
821 vcpu_writeregs_svm(vcpu, vrwp->vrwp_mask, vrs);
822 else {
823 DPRINTF("%s: unknown vmm mode", __func__);
824 ret = EINVAL22;
825 }
826 rw_exit_write(&vcpu->vc_lock);
827
828 return (ret);
829}
830
831/*
832 * vm_mprotect_ept
833 *
834 * IOCTL handler to sets the access protections of the ept
835 *
836 * Parameters:
837 * vmep: describes the memory for which the protect will be applied..
838 *
839 * Return values:
840 * 0: if successful
841 * ENOENT: if the VM defined by 'vmep' cannot be found
842 * EINVAL: if the sgpa or size is not page aligned, the prot is invalid,
843 * size is too large (512GB), there is wraparound
844 * (like start = 512GB-1 and end = 512GB-2),
845 * the address specified is not within the vm's mem range
846 * or the address lies inside reserved (MMIO) memory
847 */
848int
849vm_mprotect_ept(struct vm_mprotect_ept_params *vmep)
850{
851 struct vm *vm;
852 struct vcpu *vcpu;
853 vaddr_t sgpa;
854 size_t size;
855 vm_prot_t prot;
856 uint64_t msr;
857 int ret, memtype;
858
859 /* If not EPT or RVI, nothing to do here */
860 if (!(vmm_softc->mode == VMM_MODE_EPT
861 || vmm_softc->mode == VMM_MODE_RVI))
862 return (0);
863
864 /* Find the desired VM */
865 rw_enter_read(&vmm_softc->vm_lock);
866 ret = vm_find(vmep->vmep_vm_id, &vm);
867 rw_exit_read(&vmm_softc->vm_lock);
868
869 /* Not found? exit. */
870 if (ret != 0) {
871 DPRINTF("%s: vm id %u not found\n", __func__,
872 vmep->vmep_vm_id);
873 return (ret);
874 }
875
876 vcpu = vm_find_vcpu(vm, vmep->vmep_vcpu_id);
877
878 if (vcpu == NULL((void *)0)) {
879 DPRINTF("%s: vcpu id %u of vm %u not found\n", __func__,
880 vmep->vmep_vcpu_id, vmep->vmep_vm_id);
881 return (ENOENT2);
882 }
883
884 if (vcpu->vc_state != VCPU_STATE_STOPPED) {
885 DPRINTF("%s: mprotect_ept %u on vm %u attempted "
886 "while vcpu was in state %u (%s)\n", __func__,
887 vmep->vmep_vcpu_id, vmep->vmep_vm_id, vcpu->vc_state,
888 vcpu_state_decode(vcpu->vc_state));
889
890 return (EBUSY16);
891 }
892
893 /* Only proceed if the pmap is in the correct mode */
894 KASSERT((vmm_softc->mode == VMM_MODE_EPT &&(((vmm_softc->mode == VMM_MODE_EPT && vm->vm_map
->pmap->pm_type == 2) || (vmm_softc->mode == VMM_MODE_RVI
&& vm->vm_map->pmap->pm_type == 3)) ? (void
)0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm.c"
, 897, "(vmm_softc->mode == VMM_MODE_EPT && vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI)"
))
895 vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) ||(((vmm_softc->mode == VMM_MODE_EPT && vm->vm_map
->pmap->pm_type == 2) || (vmm_softc->mode == VMM_MODE_RVI
&& vm->vm_map->pmap->pm_type == 3)) ? (void
)0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm.c"
, 897, "(vmm_softc->mode == VMM_MODE_EPT && vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI)"
))
896 (vmm_softc->mode == VMM_MODE_RVI &&(((vmm_softc->mode == VMM_MODE_EPT && vm->vm_map
->pmap->pm_type == 2) || (vmm_softc->mode == VMM_MODE_RVI
&& vm->vm_map->pmap->pm_type == 3)) ? (void
)0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm.c"
, 897, "(vmm_softc->mode == VMM_MODE_EPT && vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI)"
))
897 vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI))(((vmm_softc->mode == VMM_MODE_EPT && vm->vm_map
->pmap->pm_type == 2) || (vmm_softc->mode == VMM_MODE_RVI
&& vm->vm_map->pmap->pm_type == 3)) ? (void
)0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm.c"
, 897, "(vmm_softc->mode == VMM_MODE_EPT && vm->vm_map->pmap->pm_type == PMAP_TYPE_EPT) || (vmm_softc->mode == VMM_MODE_RVI && vm->vm_map->pmap->pm_type == PMAP_TYPE_RVI)"
))
;
898
899 sgpa = vmep->vmep_sgpa;
900 size = vmep->vmep_size;
901 prot = vmep->vmep_prot;
902
903 /* No W^X permissions */
904 if ((prot & PROT_MASK(0x01 | 0x02 | 0x04)) != prot &&
905 (prot & (PROT_WRITE0x02 | PROT_EXEC0x04)) == (PROT_WRITE0x02 | PROT_EXEC0x04)) {
906 DPRINTF("%s: W+X permission requested\n", __func__);
907 return (EINVAL22);
908 }
909
910 /* No Write only permissions */
911 if ((prot & (PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04)) == PROT_WRITE0x02) {
912 DPRINTF("%s: No Write only permissions\n", __func__);
913 return (EINVAL22);
914 }
915
916 /* No empty permissions */
917 if (prot == 0) {
918 DPRINTF("%s: No empty permissions\n", __func__);
919 return (EINVAL22);
920 }
921
922 /* No execute only on EPT CPUs that don't have that capability */
923 if (vmm_softc->mode == VMM_MODE_EPT) {
924 msr = rdmsr(IA32_VMX_EPT_VPID_CAP0x48C);
925 if (prot == PROT_EXEC0x04 &&
926 (msr & IA32_EPT_VPID_CAP_XO_TRANSLATIONS(1ULL << 0)) == 0) {
927 DPRINTF("%s: Execute only permissions unsupported,"
928 " adding read permission\n", __func__);
929
930 prot |= PROT_READ0x01;
931 }
932 }
933
934 /* Must be page aligned */
935 if ((sgpa & PAGE_MASK((1 << 12) - 1)) || (size & PAGE_MASK((1 << 12) - 1)) || size == 0)
936 return (EINVAL22);
937
938 /* size must be less then 512GB */
939 if (size >= NBPD_L4(1ULL << 39))
940 return (EINVAL22);
941
942 /* no wraparound */
943 if (sgpa + size < sgpa)
944 return (EINVAL22);
945
946 /*
947 * Specifying addresses within the PCI MMIO space is forbidden.
948 * Disallow addresses that start inside the MMIO space:
949 * [VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
950 */
951 if (sgpa >= VMM_PCI_MMIO_BAR_BASE0xF0000000ULL && sgpa <= VMM_PCI_MMIO_BAR_END0xFFFFFFFFULL)
952 return (EINVAL22);
953
954 /*
955 * ... and disallow addresses that end inside the MMIO space:
956 * (VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
957 */
958 if (sgpa + size > VMM_PCI_MMIO_BAR_BASE0xF0000000ULL &&
959 sgpa + size <= VMM_PCI_MMIO_BAR_END0xFFFFFFFFULL)
960 return (EINVAL22);
961
962 memtype = vmm_get_guest_memtype(vm, sgpa);
963 if (memtype == VMM_MEM_TYPE_UNKNOWN)
964 return (EINVAL22);
965
966 if (vmm_softc->mode == VMM_MODE_EPT)
967 ret = vmx_mprotect_ept(vm->vm_map, sgpa, sgpa + size, prot);
968 else if (vmm_softc->mode == VMM_MODE_RVI) {
969 pmap_write_protect(vm->vm_map->pmap, sgpa, sgpa + size, prot);
970 /* XXX requires a invlpga */
971 ret = 0;
972 } else
973 return (EINVAL22);
974
975 return (ret);
976}
977
978/*
979 * vmx_mprotect_ept
980 *
981 * apply the ept protections to the requested pages, faulting in the page if
982 * required.
983 */
984int
985vmx_mprotect_ept(vm_map_t vm_map, paddr_t sgpa, paddr_t egpa, int prot)
986{
987 struct vmx_invept_descriptor vid;
988 pmap_t pmap;
989 pt_entry_t *pte;
990 paddr_t addr;
991 int ret = 0;
992
993 pmap = vm_map->pmap;
994
995 KERNEL_LOCK()_kernel_lock();
996
997 for (addr = sgpa; addr < egpa; addr += PAGE_SIZE(1 << 12)) {
998 pte = vmx_pmap_find_pte_ept(pmap, addr);
999 if (pte == NULL((void *)0)) {
1000 ret = uvm_fault(vm_map, addr, VM_FAULT_WIRE((vm_fault_t) 0x2),
1001 PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04);
1002 if (ret)
1003 printf("%s: uvm_fault returns %d, GPA=0x%llx\n",
1004 __func__, ret, (uint64_t)addr);
1005
1006 pte = vmx_pmap_find_pte_ept(pmap, addr);
1007 if (pte == NULL((void *)0)) {
1008 KERNEL_UNLOCK()_kernel_unlock();
1009 return EFAULT14;
1010 }
1011 }
1012
1013 if (prot & PROT_READ0x01)
1014 *pte |= EPT_R(1ULL << 0);
1015 else
1016 *pte &= ~EPT_R(1ULL << 0);
1017
1018 if (prot & PROT_WRITE0x02)
1019 *pte |= EPT_W(1ULL << 1);
1020 else
1021 *pte &= ~EPT_W(1ULL << 1);
1022
1023 if (prot & PROT_EXEC0x04)
1024 *pte |= EPT_X(1ULL << 2);
1025 else
1026 *pte &= ~EPT_X(1ULL << 2);
1027 }
1028
1029 /*
1030 * SDM 3C: 28.3.3.4 Guidelines for Use of the INVEPT Instruction
1031 * the first bullet point seems to say we should call invept.
1032 *
1033 * Software should use the INVEPT instruction with the “single-context”
1034 * INVEPT type after making any of the following changes to an EPT
1035 * paging-structure entry (the INVEPT descriptor should contain an
1036 * EPTP value that references — directly or indirectly
1037 * — the modified EPT paging structure):
1038 * — Changing any of the privilege bits 2:0 from 1 to 0.
1039 * */
1040 if (pmap->eptp != 0) {
1041 memset(&vid, 0, sizeof(vid))__builtin_memset((&vid), (0), (sizeof(vid)));
1042 vid.vid_eptp = pmap->eptp;
1043 DPRINTF("%s: flushing EPT TLB for EPTP 0x%llx\n", __func__,
1044 vid.vid_eptp);
1045 invept(IA32_VMX_INVEPT_SINGLE_CTX0x1, &vid);
1046 }
1047
1048 KERNEL_UNLOCK()_kernel_unlock();
1049
1050 return ret;
1051}
1052
1053/*
1054 * vmx_pmap_find_pte_ept
1055 *
1056 * find the page table entry specified by addr in the pmap supplied.
1057 */
1058pt_entry_t *
1059vmx_pmap_find_pte_ept(pmap_t pmap, paddr_t addr)
1060{
1061 int l4idx, l3idx, l2idx, l1idx;
1062 pd_entry_t *pd;
1063 paddr_t pdppa;
1064 pt_entry_t *ptes, *pte;
1065
1066 l4idx = (addr & L4_MASK0x0000ff8000000000UL) >> L4_SHIFT39; /* PML4E idx */
1067 l3idx = (addr & L3_MASK0x0000007fc0000000UL) >> L3_SHIFT30; /* PDPTE idx */
1068 l2idx = (addr & L2_MASK0x000000003fe00000UL) >> L2_SHIFT21; /* PDE idx */
1069 l1idx = (addr & L1_MASK0x00000000001ff000UL) >> L1_SHIFT12; /* PTE idx */
1070
1071 pd = (pd_entry_t *)pmap->pm_pdir;
1072 if (pd == NULL((void *)0))
1073 return NULL((void *)0);
1074
1075 /*
1076 * l4idx should always be 0 since we don't support more than 512GB
1077 * guest physical memory.
1078 */
1079 if (l4idx > 0)
1080 return NULL((void *)0);
1081
1082 /*
1083 * l3idx should always be < MAXDSIZ/1GB because we don't support more
1084 * than MAXDSIZ guest phys mem.
1085 */
1086 if (l3idx >= MAXDSIZ((paddr_t)32*1024*1024*1024) / ((paddr_t)1024 * 1024 * 1024))
1087 return NULL((void *)0);
1088
1089 pdppa = pd[l4idx] & PG_FRAME0x000ffffffffff000UL;
1090 if (pdppa == 0)
1091 return NULL((void *)0);
1092
1093 ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (pdppa))
;
1094
1095 pdppa = ptes[l3idx] & PG_FRAME0x000ffffffffff000UL;
1096 if (pdppa == 0)
1097 return NULL((void *)0);
1098
1099 ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (pdppa))
;
1100
1101 pdppa = ptes[l2idx] & PG_FRAME0x000ffffffffff000UL;
1102 if (pdppa == 0)
1103 return NULL((void *)0);
1104
1105 ptes = (pt_entry_t *)PMAP_DIRECT_MAP(pdppa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (pdppa))
;
1106
1107 pte = &ptes[l1idx];
1108 if (*pte == 0)
1109 return NULL((void *)0);
1110
1111 return pte;
1112}
1113
1114/*
1115 * vm_find
1116 *
1117 * Function to find an existing VM by its identifier.
1118 * Must be called under the global vm_lock.
1119 *
1120 * Parameters:
1121 * id: The VM identifier.
1122 * *res: A pointer to the VM or NULL if not found
1123 *
1124 * Return values:
1125 * 0: if successful
1126 * ENOENT: if the VM defined by 'id' cannot be found
1127 * EPERM: if the VM cannot be accessed by the current process
1128 */
1129int
1130vm_find(uint32_t id, struct vm **res)
1131{
1132 struct proc *p = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
;
1133 struct vm *vm;
1134
1135 *res = NULL((void *)0);
1136 SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link)for((vm) = ((&vmm_softc->vm_list)->slh_first); (vm)
!= ((void *)0); (vm) = ((vm)->vm_link.sle_next))
{
1137 if (vm->vm_id == id) {
1138 /*
1139 * In the pledged VM process, only allow to find
1140 * the VM that is running in the current process.
1141 * The managing vmm parent process can lookup all
1142 * all VMs and is indicated by PLEDGE_PROC.
1143 */
1144 if (((p->p_p->ps_pledge &
1145 (PLEDGE_VMM0x0000000040000000ULL | PLEDGE_PROC0x0000000000001000ULL)) == PLEDGE_VMM0x0000000040000000ULL) &&
1146 (vm->vm_creator_pid != p->p_p->ps_pid))
1147 return (pledge_fail(p, EPERM1, PLEDGE_VMM0x0000000040000000ULL));
1148 *res = vm;
1149 return (0);
1150 }
1151 }
1152
1153 return (ENOENT2);
1154}
1155
1156/*
1157 * vmm_start
1158 *
1159 * Starts VMM mode on the system
1160 */
1161int
1162vmm_start(void)
1163{
1164 struct cpu_info *self = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
1165 int ret = 0;
1166#ifdef MULTIPROCESSOR1
1167 struct cpu_info *ci;
1168 CPU_INFO_ITERATORint cii;
1169 int i;
1170#endif
1171
1172 /* VMM is already running */
1173 if (self->ci_flags & CPUF_VMM0x20000)
1174 return (0);
1175
1176#ifdef MULTIPROCESSOR1
1177 /* Broadcast start VMM IPI */
1178 x86_broadcast_ipi(X86_IPI_START_VMM0x00000100);
1179
1180 CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci->
ci_next)
{
1181 if (ci == self)
1182 continue;
1183 for (i = 100000; (!(ci->ci_flags & CPUF_VMM0x20000)) && i>0;i--)
1184 delay(10)(*delay_func)(10);
1185 if (!(ci->ci_flags & CPUF_VMM0x20000)) {
1186 printf("%s: failed to enter VMM mode\n",
1187 ci->ci_dev->dv_xname);
1188 ret = EIO5;
1189 }
1190 }
1191#endif /* MULTIPROCESSOR */
1192
1193 /* Start VMM on this CPU */
1194 start_vmm_on_cpu(self);
1195 if (!(self->ci_flags & CPUF_VMM0x20000)) {
1196 printf("%s: failed to enter VMM mode\n",
1197 self->ci_dev->dv_xname);
1198 ret = EIO5;
1199 }
1200
1201 return (ret);
1202}
1203
1204/*
1205 * vmm_stop
1206 *
1207 * Stops VMM mode on the system
1208 */
1209int
1210vmm_stop(void)
1211{
1212 struct cpu_info *self = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
1213 int ret = 0;
1214#ifdef MULTIPROCESSOR1
1215 struct cpu_info *ci;
1216 CPU_INFO_ITERATORint cii;
1217 int i;
1218#endif
1219
1220 /* VMM is not running */
1221 if (!(self->ci_flags & CPUF_VMM0x20000))
1222 return (0);
1223
1224#ifdef MULTIPROCESSOR1
1225 /* Stop VMM on other CPUs */
1226 x86_broadcast_ipi(X86_IPI_STOP_VMM0x00000200);
1227
1228 CPU_INFO_FOREACH(cii, ci)for (cii = 0, ci = cpu_info_list; ci != ((void *)0); ci = ci->
ci_next)
{
1229 if (ci == self)
1230 continue;
1231 for (i = 100000; (ci->ci_flags & CPUF_VMM0x20000) && i>0 ;i--)
1232 delay(10)(*delay_func)(10);
1233 if (ci->ci_flags & CPUF_VMM0x20000) {
1234 printf("%s: failed to exit VMM mode\n",
1235 ci->ci_dev->dv_xname);
1236 ret = EIO5;
1237 }
1238 }
1239#endif /* MULTIPROCESSOR */
1240
1241 /* Stop VMM on this CPU */
1242 stop_vmm_on_cpu(self);
1243 if (self->ci_flags & CPUF_VMM0x20000) {
1244 printf("%s: failed to exit VMM mode\n",
1245 self->ci_dev->dv_xname);
1246 ret = EIO5;
1247 }
1248
1249 return (ret);
1250}
1251
1252/*
1253 * start_vmm_on_cpu
1254 *
1255 * Starts VMM mode on 'ci' by executing the appropriate CPU-specific insn
1256 * sequence to enter VMM mode (eg, VMXON)
1257 */
1258void
1259start_vmm_on_cpu(struct cpu_info *ci)
1260{
1261 uint64_t msr;
1262 uint32_t cr4;
1263
1264 /* No VMM mode? exit. */
1265 if ((ci->ci_vmm_flags & CI_VMM_VMX(1 << 0)) == 0 &&
1266 (ci->ci_vmm_flags & CI_VMM_SVM(1 << 1)) == 0)
1267 return;
1268
1269 /*
1270 * AMD SVM
1271 */
1272 if (ci->ci_vmm_flags & CI_VMM_SVM(1 << 1)) {
1273 msr = rdmsr(MSR_EFER0xc0000080);
1274 msr |= EFER_SVME0x00001000;
1275 wrmsr(MSR_EFER0xc0000080, msr);
1276 }
1277
1278 /*
1279 * Intel VMX
1280 */
1281 if (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0)) {
1282 if (ci->ci_vmxon_region == 0)
1283 return;
1284 else {
1285 bzero(ci->ci_vmxon_region, PAGE_SIZE)__builtin_bzero((ci->ci_vmxon_region), ((1 << 12)));
1286 ci->ci_vmxon_region->vr_revision =
1287 ci->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision;
1288
1289 /* Set CR4.VMXE */
1290 cr4 = rcr4();
1291 cr4 |= CR4_VMXE0x00002000;
1292 lcr4(cr4);
1293
1294 /* Enable VMX */
1295 msr = rdmsr(MSR_IA32_FEATURE_CONTROL0x03a);
1296 if (msr & IA32_FEATURE_CONTROL_LOCK0x01) {
1297 if (!(msr & IA32_FEATURE_CONTROL_VMX_EN0x04))
1298 return;
1299 } else {
1300 msr |= IA32_FEATURE_CONTROL_VMX_EN0x04 |
1301 IA32_FEATURE_CONTROL_LOCK0x01;
1302 wrmsr(MSR_IA32_FEATURE_CONTROL0x03a, msr);
1303 }
1304
1305 /* Enter VMX mode */
1306 if (vmxon((uint64_t *)&ci->ci_vmxon_region_pa))
1307 return;
1308 }
1309 }
1310
1311 ci->ci_flags |= CPUF_VMM0x20000;
1312}
1313
1314/*
1315 * stop_vmm_on_cpu
1316 *
1317 * Stops VMM mode on 'ci' by executing the appropriate CPU-specific insn
1318 * sequence to exit VMM mode (eg, VMXOFF)
1319 */
1320void
1321stop_vmm_on_cpu(struct cpu_info *ci)
1322{
1323 uint64_t msr;
1324 uint32_t cr4;
1325
1326 if (!(ci->ci_flags & CPUF_VMM0x20000))
1327 return;
1328
1329 /*
1330 * AMD SVM
1331 */
1332 if (ci->ci_vmm_flags & CI_VMM_SVM(1 << 1)) {
1333 msr = rdmsr(MSR_EFER0xc0000080);
1334 msr &= ~EFER_SVME0x00001000;
1335 wrmsr(MSR_EFER0xc0000080, msr);
1336 }
1337
1338 /*
1339 * Intel VMX
1340 */
1341 if (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0)) {
1342 if (vmxoff())
1343 panic("VMXOFF failed");
1344
1345 cr4 = rcr4();
1346 cr4 &= ~CR4_VMXE0x00002000;
1347 lcr4(cr4);
1348 }
1349
1350 ci->ci_flags &= ~CPUF_VMM0x20000;
1351}
1352
1353/*
1354 * vmclear_on_cpu
1355 *
1356 * Flush and clear VMCS on 'ci' by executing vmclear.
1357 *
1358 */
1359void
1360vmclear_on_cpu(struct cpu_info *ci)
1361{
1362 if ((ci->ci_flags & CPUF_VMM0x20000) && (ci->ci_vmm_flags & CI_VMM_VMX(1 << 0))) {
1363 if (vmclear(&ci->ci_vmcs_pa))
1364 panic("VMCLEAR ipi failed");
1365 atomic_swap_ulong(&ci->ci_vmcs_pa, VMX_VMCS_PA_CLEAR)_atomic_swap_ulong((&ci->ci_vmcs_pa), (0xFFFFFFFFFFFFFFFFUL
))
;
1366 }
1367}
1368
1369#ifdef MULTIPROCESSOR1
1370static int
1371vmx_remote_vmclear(struct cpu_info *ci, struct vcpu *vcpu)
1372{
1373 int ret = 0, nticks = 200000000;
1374
1375 rw_enter_write(&ci->ci_vmcs_lock);
1376 atomic_swap_ulong(&ci->ci_vmcs_pa, vcpu->vc_control_pa)_atomic_swap_ulong((&ci->ci_vmcs_pa), (vcpu->vc_control_pa
))
;
1377 x86_send_ipi(ci, X86_IPI_VMCLEAR_VMM0x00000004);
1378
1379 while (ci->ci_vmcs_pa != VMX_VMCS_PA_CLEAR0xFFFFFFFFFFFFFFFFUL) {
1380 CPU_BUSY_CYCLE()__asm volatile("pause": : : "memory");
1381 if (--nticks <= 0) {
1382 printf("%s: spun out\n", __func__);
1383 ret = 1;
1384 break;
1385 }
1386 }
1387 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED)_atomic_swap_uint((&vcpu->vc_vmx_vmcs_state), (0));
1388 rw_exit_write(&ci->ci_vmcs_lock);
1389
1390 return (ret);
1391}
1392#endif /* MULTIPROCESSOR */
1393
1394/*
1395 * vm_create_check_mem_ranges
1396 *
1397 * Make sure that the guest physical memory ranges given by the user process
1398 * do not overlap and are in ascending order.
1399 *
1400 * The last physical address may not exceed VMM_MAX_VM_MEM_SIZE.
1401 *
1402 * Return Values:
1403 * The total memory size in MB if the checks were successful
1404 * 0: One of the memory ranges was invalid, or VMM_MAX_VM_MEM_SIZE was
1405 * exceeded
1406 */
1407size_t
1408vm_create_check_mem_ranges(struct vm_create_params *vcp)
1409{
1410 size_t i, memsize = 0;
1411 struct vm_mem_range *vmr, *pvmr;
1412 const paddr_t maxgpa = (uint64_t)VMM_MAX_VM_MEM_SIZE32768 * 1024 * 1024;
1413
1414 if (vcp->vcp_nmemranges == 0 ||
1415 vcp->vcp_nmemranges > VMM_MAX_MEM_RANGES16)
1416 return (0);
1417
1418 for (i = 0; i < vcp->vcp_nmemranges; i++) {
1419 vmr = &vcp->vcp_memranges[i];
1420
1421 /* Only page-aligned addresses and sizes are permitted */
1422 if ((vmr->vmr_gpa & PAGE_MASK((1 << 12) - 1)) || (vmr->vmr_va & PAGE_MASK((1 << 12) - 1)) ||
1423 (vmr->vmr_size & PAGE_MASK((1 << 12) - 1)) || vmr->vmr_size == 0)
1424 return (0);
1425
1426 /* Make sure that VMM_MAX_VM_MEM_SIZE is not exceeded */
1427 if (vmr->vmr_gpa >= maxgpa ||
1428 vmr->vmr_size > maxgpa - vmr->vmr_gpa)
1429 return (0);
1430
1431 /*
1432 * Make sure that all virtual addresses are within the address
1433 * space of the process and that they do not wrap around.
1434 * Calling uvm_share() when creating the VM will take care of
1435 * further checks.
1436 */
1437 if (vmr->vmr_va < VM_MIN_ADDRESS(1 << 12) ||
1438 vmr->vmr_va >= VM_MAXUSER_ADDRESS0x00007f7fffffc000 ||
1439 vmr->vmr_size >= VM_MAXUSER_ADDRESS0x00007f7fffffc000 - vmr->vmr_va)
1440 return (0);
1441
1442 /*
1443 * Specifying ranges within the PCI MMIO space is forbidden.
1444 * Disallow ranges that start inside the MMIO space:
1445 * [VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
1446 */
1447 if (vmr->vmr_gpa >= VMM_PCI_MMIO_BAR_BASE0xF0000000ULL &&
1448 vmr->vmr_gpa <= VMM_PCI_MMIO_BAR_END0xFFFFFFFFULL)
1449 return (0);
1450
1451 /*
1452 * ... and disallow ranges that end inside the MMIO space:
1453 * (VMM_PCI_MMIO_BAR_BASE .. VMM_PCI_MMIO_BAR_END]
1454 */
1455 if (vmr->vmr_gpa + vmr->vmr_size > VMM_PCI_MMIO_BAR_BASE0xF0000000ULL &&
1456 vmr->vmr_gpa + vmr->vmr_size <= VMM_PCI_MMIO_BAR_END0xFFFFFFFFULL)
1457 return (0);
1458
1459 /*
1460 * Make sure that guest physical memory ranges do not overlap
1461 * and that they are ascending.
1462 */
1463 if (i > 0 && pvmr->vmr_gpa + pvmr->vmr_size > vmr->vmr_gpa)
1464 return (0);
1465
1466 memsize += vmr->vmr_size;
1467 pvmr = vmr;
1468 }
1469
1470 if (memsize % (1024 * 1024) != 0)
1471 return (0);
1472 memsize /= 1024 * 1024;
1473 return (memsize);
1474}
1475
1476/*
1477 * vm_create
1478 *
1479 * Creates the in-memory VMM structures for the VM defined by 'vcp'. The
1480 * parent of this VM shall be the process defined by 'p'.
1481 * This function does not start the VCPU(s) - see vm_start.
1482 *
1483 * Return Values:
1484 * 0: the create operation was successful
1485 * ENOMEM: out of memory
1486 * various other errors from vcpu_init/vm_impl_init
1487 */
1488int
1489vm_create(struct vm_create_params *vcp, struct proc *p)
1490{
1491 int i, ret;
1492 size_t memsize;
1493 struct vm *vm;
1494 struct vcpu *vcpu;
1495
1496 if (!(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_flags & CPUF_VMM0x20000))
1497 return (EINVAL22);
1498
1499 memsize = vm_create_check_mem_ranges(vcp);
1500 if (memsize == 0)
1501 return (EINVAL22);
1502
1503 /* XXX - support UP only (for now) */
1504 if (vcp->vcp_ncpus != 1)
1505 return (EINVAL22);
1506
1507 rw_enter_write(&vmm_softc->vm_lock);
1508 if (vmm_softc->vcpu_ct + vcp->vcp_ncpus > VMM_MAX_VCPUS512) {
1509 DPRINTF("%s: maximum vcpus (%lu) reached\n", __func__,
1510 vmm_softc->vcpu_max);
1511 rw_exit_write(&vmm_softc->vm_lock);
1512 return (ENOMEM12);
1513 }
1514 vmm_softc->vcpu_ct += vcp->vcp_ncpus;
1515
1516 vm = pool_get(&vm_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
1517 SLIST_INIT(&vm->vm_vcpu_list){ ((&vm->vm_vcpu_list)->slh_first) = ((void *)0); };
1518 rw_init(&vm->vm_vcpu_lock, "vcpu_list")_rw_init_flags(&vm->vm_vcpu_lock, "vcpu_list", 0, ((void
*)0))
;
1519
1520 vm->vm_creator_pid = p->p_p->ps_pid;
1521 vm->vm_nmemranges = vcp->vcp_nmemranges;
1522 memcpy(vm->vm_memranges, vcp->vcp_memranges,__builtin_memcpy((vm->vm_memranges), (vcp->vcp_memranges
), (vm->vm_nmemranges * sizeof(vm->vm_memranges[0])))
1523 vm->vm_nmemranges * sizeof(vm->vm_memranges[0]))__builtin_memcpy((vm->vm_memranges), (vcp->vcp_memranges
), (vm->vm_nmemranges * sizeof(vm->vm_memranges[0])))
;
1524 vm->vm_memory_size = memsize;
1525 strncpy(vm->vm_name, vcp->vcp_name, VMM_MAX_NAME_LEN64 - 1);
1526
1527 if (vm_impl_init(vm, p)) {
1528 printf("failed to init arch-specific features for vm %p\n", vm);
1529 vm_teardown(vm);
1530 rw_exit_write(&vmm_softc->vm_lock);
1531 return (ENOMEM12);
1532 }
1533
1534 vmm_softc->vm_ct++;
1535 vmm_softc->vm_idx++;
1536
1537 vm->vm_id = vmm_softc->vm_idx;
1538 vm->vm_vcpu_ct = 0;
1539 vm->vm_vcpus_running = 0;
1540
1541 /* Initialize each VCPU defined in 'vcp' */
1542 for (i = 0; i < vcp->vcp_ncpus; i++) {
1543 vcpu = pool_get(&vcpu_pool, PR_WAITOK0x0001 | PR_ZERO0x0008);
1544 vcpu->vc_parent = vm;
1545 if ((ret = vcpu_init(vcpu)) != 0) {
1546 printf("failed to init vcpu %d for vm %p\n", i, vm);
1547 vm_teardown(vm);
1548 vmm_softc->vm_idx--;
1549 rw_exit_write(&vmm_softc->vm_lock);
1550 return (ret);
1551 }
1552 rw_enter_write(&vm->vm_vcpu_lock);
1553 vcpu->vc_id = vm->vm_vcpu_ct;
1554 vm->vm_vcpu_ct++;
1555 SLIST_INSERT_HEAD(&vm->vm_vcpu_list, vcpu, vc_vcpu_link)do { (vcpu)->vc_vcpu_link.sle_next = (&vm->vm_vcpu_list
)->slh_first; (&vm->vm_vcpu_list)->slh_first = (
vcpu); } while (0)
;
1556 rw_exit_write(&vm->vm_vcpu_lock);
1557 }
1558
1559 /* XXX init various other hardware parts (vlapic, vioapic, etc) */
1560
1561 SLIST_INSERT_HEAD(&vmm_softc->vm_list, vm, vm_link)do { (vm)->vm_link.sle_next = (&vmm_softc->vm_list)
->slh_first; (&vmm_softc->vm_list)->slh_first = (
vm); } while (0)
;
1562 rw_exit_write(&vmm_softc->vm_lock);
1563
1564 vcp->vcp_id = vm->vm_id;
1565
1566 return (0);
1567}
1568
1569/*
1570 * vm_impl_init_vmx
1571 *
1572 * Intel VMX specific VM initialization routine
1573 *
1574 * Parameters:
1575 * vm: the VM being initialized
1576 * p: vmd process owning the VM
1577 *
1578 * Return values:
1579 * 0: the initialization was successful
1580 * ENOMEM: the initialization failed (lack of resources)
1581 */
1582int
1583vm_impl_init_vmx(struct vm *vm, struct proc *p)
1584{
1585 int i, ret;
1586 vaddr_t mingpa, maxgpa;
1587 struct vm_mem_range *vmr;
1588
1589 /* If not EPT, nothing to do here */
1590 if (vmm_softc->mode != VMM_MODE_EPT)
1591 return (0);
1592
1593 vmr = &vm->vm_memranges[0];
1594 mingpa = vmr->vmr_gpa;
1595 vmr = &vm->vm_memranges[vm->vm_nmemranges - 1];
1596 maxgpa = vmr->vmr_gpa + vmr->vmr_size;
1597
1598 /*
1599 * uvmspace_alloc (currently) always returns a valid vmspace
1600 */
1601 vm->vm_vmspace = uvmspace_alloc(mingpa, maxgpa, TRUE1, FALSE0);
1602 vm->vm_map = &vm->vm_vmspace->vm_map;
1603
1604 /* Map the new map with an anon */
1605 DPRINTF("%s: created vm_map @ %p\n", __func__, vm->vm_map);
1606 for (i = 0; i < vm->vm_nmemranges; i++) {
1607 vmr = &vm->vm_memranges[i];
1608 ret = uvm_share(vm->vm_map, vmr->vmr_gpa,
1609 PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04,
1610 &p->p_vmspace->vm_map, vmr->vmr_va, vmr->vmr_size);
1611 if (ret) {
1612 printf("%s: uvm_share failed (%d)\n", __func__, ret);
1613 /* uvmspace_free calls pmap_destroy for us */
1614 uvmspace_free(vm->vm_vmspace);
1615 vm->vm_vmspace = NULL((void *)0);
1616 return (ENOMEM12);
1617 }
1618 }
1619
1620 ret = pmap_convert(vm->vm_map->pmap, PMAP_TYPE_EPT2);
1621 if (ret) {
1622 printf("%s: pmap_convert failed\n", __func__);
1623 /* uvmspace_free calls pmap_destroy for us */
1624 uvmspace_free(vm->vm_vmspace);
1625 vm->vm_vmspace = NULL((void *)0);
1626 return (ENOMEM12);
1627 }
1628
1629 return (0);
1630}
1631
1632/*
1633 * vm_impl_init_svm
1634 *
1635 * AMD SVM specific VM initialization routine
1636 *
1637 * Parameters:
1638 * vm: the VM being initialized
1639 * p: vmd process owning the VM
1640 *
1641 * Return values:
1642 * 0: the initialization was successful
1643 * ENOMEM: the initialization failed (lack of resources)
1644 */
1645int
1646vm_impl_init_svm(struct vm *vm, struct proc *p)
1647{
1648 int i, ret;
1649 vaddr_t mingpa, maxgpa;
1650 struct vm_mem_range *vmr;
1651
1652 /* If not RVI, nothing to do here */
1653 if (vmm_softc->mode != VMM_MODE_RVI)
1654 return (0);
1655
1656 vmr = &vm->vm_memranges[0];
1657 mingpa = vmr->vmr_gpa;
1658 vmr = &vm->vm_memranges[vm->vm_nmemranges - 1];
1659 maxgpa = vmr->vmr_gpa + vmr->vmr_size;
1660
1661 /*
1662 * uvmspace_alloc (currently) always returns a valid vmspace
1663 */
1664 vm->vm_vmspace = uvmspace_alloc(mingpa, maxgpa, TRUE1, FALSE0);
1665 vm->vm_map = &vm->vm_vmspace->vm_map;
1666
1667 /* Map the new map with an anon */
1668 DPRINTF("%s: created vm_map @ %p\n", __func__, vm->vm_map);
1669 for (i = 0; i < vm->vm_nmemranges; i++) {
1670 vmr = &vm->vm_memranges[i];
1671 ret = uvm_share(vm->vm_map, vmr->vmr_gpa,
1672 PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04,
1673 &p->p_vmspace->vm_map, vmr->vmr_va, vmr->vmr_size);
1674 if (ret) {
1675 printf("%s: uvm_share failed (%d)\n", __func__, ret);
1676 /* uvmspace_free calls pmap_destroy for us */
1677 uvmspace_free(vm->vm_vmspace);
1678 vm->vm_vmspace = NULL((void *)0);
1679 return (ENOMEM12);
1680 }
1681 }
1682
1683 /* Convert pmap to RVI */
1684 ret = pmap_convert(vm->vm_map->pmap, PMAP_TYPE_RVI3);
1685
1686 return (ret);
1687}
1688
1689/*
1690 * vm_impl_init
1691 *
1692 * Calls the architecture-specific VM init routine
1693 *
1694 * Parameters:
1695 * vm: the VM being initialized
1696 * p: vmd process owning the VM
1697 *
1698 * Return values (from architecture-specific init routines):
1699 * 0: the initialization was successful
1700 * ENOMEM: the initialization failed (lack of resources)
1701 */
1702int
1703vm_impl_init(struct vm *vm, struct proc *p)
1704{
1705 int ret;
1706
1707 KERNEL_LOCK()_kernel_lock();
1708 if (vmm_softc->mode == VMM_MODE_VMX ||
1709 vmm_softc->mode == VMM_MODE_EPT)
1710 ret = vm_impl_init_vmx(vm, p);
1711 else if (vmm_softc->mode == VMM_MODE_SVM ||
1712 vmm_softc->mode == VMM_MODE_RVI)
1713 ret = vm_impl_init_svm(vm, p);
1714 else
1715 panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode);
1716 KERNEL_UNLOCK()_kernel_unlock();
1717
1718 return (ret);
1719}
1720
1721/*
1722 * vm_impl_deinit_vmx
1723 *
1724 * Intel VMX specific VM deinitialization routine
1725 *
1726 * Parameters:
1727 * vm: VM to deinit
1728 */
1729void
1730vm_impl_deinit_vmx(struct vm *vm)
1731{
1732 /* Unused */
1733}
1734
1735/*
1736 * vm_impl_deinit_svm
1737 *
1738 * AMD SVM specific VM deinitialization routine
1739 *
1740 * Parameters:
1741 * vm: VM to deinit
1742 */
1743void
1744vm_impl_deinit_svm(struct vm *vm)
1745{
1746 /* Unused */
1747}
1748
1749/*
1750 * vm_impl_deinit
1751 *
1752 * Calls the architecture-specific VM init routine
1753 *
1754 * Parameters:
1755 * vm: VM to deinit
1756 */
1757void
1758vm_impl_deinit(struct vm *vm)
1759{
1760 if (vmm_softc->mode == VMM_MODE_VMX ||
1761 vmm_softc->mode == VMM_MODE_EPT)
1762 vm_impl_deinit_vmx(vm);
1763 else if (vmm_softc->mode == VMM_MODE_SVM ||
1764 vmm_softc->mode == VMM_MODE_RVI)
1765 vm_impl_deinit_svm(vm);
1766 else
1767 panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode);
1768}
1769
1770/*
1771 * vcpu_reload_vmcs_vmx
1772 *
1773 * (Re)load the VMCS on the current cpu. Must be called with the VMCS write
1774 * lock acquired. If the VMCS is determined to be loaded on a remote cpu, an
1775 * ipi will be used to remotely flush it before loading the VMCS locally.
1776 *
1777 * Parameters:
1778 * vcpu: Pointer to the vcpu needing its VMCS
1779 *
1780 * Return values:
1781 * 0: if successful
1782 * EINVAL: an error occurred during flush or reload
1783 */
1784int
1785vcpu_reload_vmcs_vmx(struct vcpu *vcpu)
1786{
1787 struct cpu_info *ci, *last_ci;
1788
1789 rw_assert_wrlock(&vcpu->vc_lock);
1790
1791 ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
1792 last_ci = vcpu->vc_last_pcpu;
1793
1794 if (last_ci == NULL((void *)0)) {
1795 /* First launch */
1796 if (vmclear(&vcpu->vc_control_pa))
1797 return (EINVAL22);
1798 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED)_atomic_swap_uint((&vcpu->vc_vmx_vmcs_state), (0));
1799#ifdef MULTIPROCESSOR1
1800 } else if (last_ci != ci) {
1801 /* We've moved CPUs at some point, so remote VMCLEAR */
1802 if (vmx_remote_vmclear(last_ci, vcpu))
1803 return (EINVAL22);
1804 KASSERT(vcpu->vc_vmx_vmcs_state == VMCS_CLEARED)((vcpu->vc_vmx_vmcs_state == 0) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/arch/amd64/amd64/vmm.c", 1804, "vcpu->vc_vmx_vmcs_state == VMCS_CLEARED"
))
;
1805#endif /* MULTIPROCESSOR */
1806 }
1807
1808 if (vmptrld(&vcpu->vc_control_pa)) {
1809 printf("%s: vmptrld\n", __func__);
1810 return (EINVAL22);
1811 }
1812
1813 return (0);
1814}
1815
1816/*
1817 * vcpu_readregs_vmx
1818 *
1819 * Reads 'vcpu's registers
1820 *
1821 * Parameters:
1822 * vcpu: the vcpu to read register values from
1823 * regmask: the types of registers to read
1824 * vrs: output parameter where register values are stored
1825 *
1826 * Return values:
1827 * 0: if successful
1828 * EINVAL: an error reading registers occurred
1829 */
1830int
1831vcpu_readregs_vmx(struct vcpu *vcpu, uint64_t regmask,
1832 struct vcpu_reg_state *vrs)
1833{
1834 int i, ret = 0;
1835 uint64_t sel, limit, ar;
1836 uint64_t *gprs = vrs->vrs_gprs;
1837 uint64_t *crs = vrs->vrs_crs;
1838 uint64_t *msrs = vrs->vrs_msrs;
1839 uint64_t *drs = vrs->vrs_drs;
1840 struct vcpu_segment_info *sregs = vrs->vrs_sregs;
1841 struct vmx_msr_store *msr_store;
1842
1843#ifdef VMM_DEBUG
1844 /* VMCS should be loaded... */
1845 paddr_t pa = 0ULL;
1846 if (vmptrst(&pa))
1847 panic("%s: vmptrst", __func__);
1848 KASSERT(pa == vcpu->vc_control_pa)((pa == vcpu->vc_control_pa) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/arch/amd64/amd64/vmm.c", 1848, "pa == vcpu->vc_control_pa"
))
;
1849#endif /* VMM_DEBUG */
1850
1851 if (regmask & VM_RWREGS_GPRS0x1) {
1852 gprs[VCPU_REGS_RAX0] = vcpu->vc_gueststate.vg_rax;
1853 gprs[VCPU_REGS_RBX1] = vcpu->vc_gueststate.vg_rbx;
1854 gprs[VCPU_REGS_RCX2] = vcpu->vc_gueststate.vg_rcx;
1855 gprs[VCPU_REGS_RDX3] = vcpu->vc_gueststate.vg_rdx;
1856 gprs[VCPU_REGS_RSI4] = vcpu->vc_gueststate.vg_rsi;
1857 gprs[VCPU_REGS_RDI5] = vcpu->vc_gueststate.vg_rdi;
1858 gprs[VCPU_REGS_R86] = vcpu->vc_gueststate.vg_r8;
1859 gprs[VCPU_REGS_R97] = vcpu->vc_gueststate.vg_r9;
1860 gprs[VCPU_REGS_R108] = vcpu->vc_gueststate.vg_r10;
1861 gprs[VCPU_REGS_R119] = vcpu->vc_gueststate.vg_r11;
1862 gprs[VCPU_REGS_R1210] = vcpu->vc_gueststate.vg_r12;
1863 gprs[VCPU_REGS_R1311] = vcpu->vc_gueststate.vg_r13;
1864 gprs[VCPU_REGS_R1412] = vcpu->vc_gueststate.vg_r14;
1865 gprs[VCPU_REGS_R1513] = vcpu->vc_gueststate.vg_r15;
1866 gprs[VCPU_REGS_RBP15] = vcpu->vc_gueststate.vg_rbp;
1867 gprs[VCPU_REGS_RIP16] = vcpu->vc_gueststate.vg_rip;
1868 if (vmread(VMCS_GUEST_IA32_RSP0x681C, &gprs[VCPU_REGS_RSP14]))
1869 goto errout;
1870 if (vmread(VMCS_GUEST_IA32_RFLAGS0x6820, &gprs[VCPU_REGS_RFLAGS17]))
1871 goto errout;
1872 }
1873
1874 if (regmask & VM_RWREGS_SREGS0x2) {
1875 for (i = 0; i < nitems(vmm_vmx_sreg_vmcs_fields)(sizeof((vmm_vmx_sreg_vmcs_fields)) / sizeof((vmm_vmx_sreg_vmcs_fields
)[0]))
; i++) {
1876 if (vmread(vmm_vmx_sreg_vmcs_fields[i].selid, &sel))
1877 goto errout;
1878 if (vmread(vmm_vmx_sreg_vmcs_fields[i].limitid, &limit))
1879 goto errout;
1880 if (vmread(vmm_vmx_sreg_vmcs_fields[i].arid, &ar))
1881 goto errout;
1882 if (vmread(vmm_vmx_sreg_vmcs_fields[i].baseid,
1883 &sregs[i].vsi_base))
1884 goto errout;
1885
1886 sregs[i].vsi_sel = sel;
1887 sregs[i].vsi_limit = limit;
1888 sregs[i].vsi_ar = ar;
1889 }
1890
1891 if (vmread(VMCS_GUEST_IA32_GDTR_LIMIT0x4810, &limit))
1892 goto errout;
1893 if (vmread(VMCS_GUEST_IA32_GDTR_BASE0x6816,
1894 &vrs->vrs_gdtr.vsi_base))
1895 goto errout;
1896 vrs->vrs_gdtr.vsi_limit = limit;
1897
1898 if (vmread(VMCS_GUEST_IA32_IDTR_LIMIT0x4812, &limit))
1899 goto errout;
1900 if (vmread(VMCS_GUEST_IA32_IDTR_BASE0x6818,
1901 &vrs->vrs_idtr.vsi_base))
1902 goto errout;
1903 vrs->vrs_idtr.vsi_limit = limit;
1904 }
1905
1906 if (regmask & VM_RWREGS_CRS0x4) {
1907 crs[VCPU_REGS_CR21] = vcpu->vc_gueststate.vg_cr2;
1908 crs[VCPU_REGS_XCR05] = vcpu->vc_gueststate.vg_xcr0;
1909 if (vmread(VMCS_GUEST_IA32_CR00x6800, &crs[VCPU_REGS_CR00]))
1910 goto errout;
1911 if (vmread(VMCS_GUEST_IA32_CR30x6802, &crs[VCPU_REGS_CR32]))
1912 goto errout;
1913 if (vmread(VMCS_GUEST_IA32_CR40x6804, &crs[VCPU_REGS_CR43]))
1914 goto errout;
1915 if (vmread(VMCS_GUEST_PDPTE00x280A, &crs[VCPU_REGS_PDPTE06]))
1916 goto errout;
1917 if (vmread(VMCS_GUEST_PDPTE10x280C, &crs[VCPU_REGS_PDPTE17]))
1918 goto errout;
1919 if (vmread(VMCS_GUEST_PDPTE20x280E, &crs[VCPU_REGS_PDPTE28]))
1920 goto errout;
1921 if (vmread(VMCS_GUEST_PDPTE30x2810, &crs[VCPU_REGS_PDPTE39]))
1922 goto errout;
1923 }
1924
1925 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
1926
1927 if (regmask & VM_RWREGS_MSRS0x8) {
1928 for (i = 0; i < VCPU_REGS_NMSRS(6 + 1); i++) {
1929 msrs[i] = msr_store[i].vms_data;
1930 }
1931 }
1932
1933 if (regmask & VM_RWREGS_DRS0x10) {
1934 drs[VCPU_REGS_DR00] = vcpu->vc_gueststate.vg_dr0;
1935 drs[VCPU_REGS_DR11] = vcpu->vc_gueststate.vg_dr1;
1936 drs[VCPU_REGS_DR22] = vcpu->vc_gueststate.vg_dr2;
1937 drs[VCPU_REGS_DR33] = vcpu->vc_gueststate.vg_dr3;
1938 drs[VCPU_REGS_DR64] = vcpu->vc_gueststate.vg_dr6;
1939 if (vmread(VMCS_GUEST_IA32_DR70x681A, &drs[VCPU_REGS_DR75]))
1940 goto errout;
1941 }
1942
1943 goto out;
1944
1945errout:
1946 ret = EINVAL22;
1947out:
1948 return (ret);
1949}
1950
1951/*
1952 * vcpu_readregs_svm
1953 *
1954 * Reads 'vcpu's registers
1955 *
1956 * Parameters:
1957 * vcpu: the vcpu to read register values from
1958 * regmask: the types of registers to read
1959 * vrs: output parameter where register values are stored
1960 *
1961 * Return values:
1962 * 0: if successful
1963 */
1964int
1965vcpu_readregs_svm(struct vcpu *vcpu, uint64_t regmask,
1966 struct vcpu_reg_state *vrs)
1967{
1968 uint64_t *gprs = vrs->vrs_gprs;
1969 uint64_t *crs = vrs->vrs_crs;
1970 uint64_t *msrs = vrs->vrs_msrs;
1971 uint64_t *drs = vrs->vrs_drs;
1972 uint32_t attr;
1973 struct vcpu_segment_info *sregs = vrs->vrs_sregs;
1974 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
1975
1976 if (regmask & VM_RWREGS_GPRS0x1) {
1977 gprs[VCPU_REGS_RAX0] = vcpu->vc_gueststate.vg_rax;
1978 gprs[VCPU_REGS_RBX1] = vcpu->vc_gueststate.vg_rbx;
1979 gprs[VCPU_REGS_RCX2] = vcpu->vc_gueststate.vg_rcx;
1980 gprs[VCPU_REGS_RDX3] = vcpu->vc_gueststate.vg_rdx;
1981 gprs[VCPU_REGS_RSI4] = vcpu->vc_gueststate.vg_rsi;
1982 gprs[VCPU_REGS_RDI5] = vcpu->vc_gueststate.vg_rdi;
1983 gprs[VCPU_REGS_R86] = vcpu->vc_gueststate.vg_r8;
1984 gprs[VCPU_REGS_R97] = vcpu->vc_gueststate.vg_r9;
1985 gprs[VCPU_REGS_R108] = vcpu->vc_gueststate.vg_r10;
1986 gprs[VCPU_REGS_R119] = vcpu->vc_gueststate.vg_r11;
1987 gprs[VCPU_REGS_R1210] = vcpu->vc_gueststate.vg_r12;
1988 gprs[VCPU_REGS_R1311] = vcpu->vc_gueststate.vg_r13;
1989 gprs[VCPU_REGS_R1412] = vcpu->vc_gueststate.vg_r14;
1990 gprs[VCPU_REGS_R1513] = vcpu->vc_gueststate.vg_r15;
1991 gprs[VCPU_REGS_RBP15] = vcpu->vc_gueststate.vg_rbp;
1992 gprs[VCPU_REGS_RIP16] = vmcb->v_rip;
1993 gprs[VCPU_REGS_RSP14] = vmcb->v_rsp;
1994 gprs[VCPU_REGS_RFLAGS17] = vmcb->v_rflags;
1995 }
1996
1997 if (regmask & VM_RWREGS_SREGS0x2) {
1998 sregs[VCPU_REGS_CS0].vsi_sel = vmcb->v_cs.vs_sel;
1999 sregs[VCPU_REGS_CS0].vsi_limit = vmcb->v_cs.vs_lim;
2000 attr = vmcb->v_cs.vs_attr;
2001 sregs[VCPU_REGS_CS0].vsi_ar = (attr & 0xff) | ((attr << 4) &
2002 0xf000);
2003 sregs[VCPU_REGS_CS0].vsi_base = vmcb->v_cs.vs_base;
2004
2005 sregs[VCPU_REGS_DS1].vsi_sel = vmcb->v_ds.vs_sel;
2006 sregs[VCPU_REGS_DS1].vsi_limit = vmcb->v_ds.vs_lim;
2007 attr = vmcb->v_ds.vs_attr;
2008 sregs[VCPU_REGS_DS1].vsi_ar = (attr & 0xff) | ((attr << 4) &
2009 0xf000);
2010 sregs[VCPU_REGS_DS1].vsi_base = vmcb->v_ds.vs_base;
2011
2012 sregs[VCPU_REGS_ES2].vsi_sel = vmcb->v_es.vs_sel;
2013 sregs[VCPU_REGS_ES2].vsi_limit = vmcb->v_es.vs_lim;
2014 attr = vmcb->v_es.vs_attr;
2015 sregs[VCPU_REGS_ES2].vsi_ar = (attr & 0xff) | ((attr << 4) &
2016 0xf000);
2017 sregs[VCPU_REGS_ES2].vsi_base = vmcb->v_es.vs_base;
2018
2019 sregs[VCPU_REGS_FS3].vsi_sel = vmcb->v_fs.vs_sel;
2020 sregs[VCPU_REGS_FS3].vsi_limit = vmcb->v_fs.vs_lim;
2021 attr = vmcb->v_fs.vs_attr;
2022 sregs[VCPU_REGS_FS3].vsi_ar = (attr & 0xff) | ((attr << 4) &
2023 0xf000);
2024 sregs[VCPU_REGS_FS3].vsi_base = vmcb->v_fs.vs_base;
2025
2026 sregs[VCPU_REGS_GS4].vsi_sel = vmcb->v_gs.vs_sel;
2027 sregs[VCPU_REGS_GS4].vsi_limit = vmcb->v_gs.vs_lim;
2028 attr = vmcb->v_gs.vs_attr;
2029 sregs[VCPU_REGS_GS4].vsi_ar = (attr & 0xff) | ((attr << 4) &
2030 0xf000);
2031 sregs[VCPU_REGS_GS4].vsi_base = vmcb->v_gs.vs_base;
2032
2033 sregs[VCPU_REGS_SS5].vsi_sel = vmcb->v_ss.vs_sel;
2034 sregs[VCPU_REGS_SS5].vsi_limit = vmcb->v_ss.vs_lim;
2035 attr = vmcb->v_ss.vs_attr;
2036 sregs[VCPU_REGS_SS5].vsi_ar = (attr & 0xff) | ((attr << 4) &
2037 0xf000);
2038 sregs[VCPU_REGS_SS5].vsi_base = vmcb->v_ss.vs_base;
2039
2040 sregs[VCPU_REGS_LDTR6].vsi_sel = vmcb->v_ldtr.vs_sel;
2041 sregs[VCPU_REGS_LDTR6].vsi_limit = vmcb->v_ldtr.vs_lim;
2042 attr = vmcb->v_ldtr.vs_attr;
2043 sregs[VCPU_REGS_LDTR6].vsi_ar = (attr & 0xff) | ((attr << 4)
2044 & 0xf000);
2045 sregs[VCPU_REGS_LDTR6].vsi_base = vmcb->v_ldtr.vs_base;
2046
2047 sregs[VCPU_REGS_TR7].vsi_sel = vmcb->v_tr.vs_sel;
2048 sregs[VCPU_REGS_TR7].vsi_limit = vmcb->v_tr.vs_lim;
2049 attr = vmcb->v_tr.vs_attr;
2050 sregs[VCPU_REGS_TR7].vsi_ar = (attr & 0xff) | ((attr << 4) &
2051 0xf000);
2052 sregs[VCPU_REGS_TR7].vsi_base = vmcb->v_tr.vs_base;
2053
2054 vrs->vrs_gdtr.vsi_limit = vmcb->v_gdtr.vs_lim;
2055 vrs->vrs_gdtr.vsi_base = vmcb->v_gdtr.vs_base;
2056 vrs->vrs_idtr.vsi_limit = vmcb->v_idtr.vs_lim;
2057 vrs->vrs_idtr.vsi_base = vmcb->v_idtr.vs_base;
2058 }
2059
2060 if (regmask & VM_RWREGS_CRS0x4) {
2061 crs[VCPU_REGS_CR00] = vmcb->v_cr0;
2062 crs[VCPU_REGS_CR32] = vmcb->v_cr3;
2063 crs[VCPU_REGS_CR43] = vmcb->v_cr4;
2064 crs[VCPU_REGS_CR21] = vcpu->vc_gueststate.vg_cr2;
2065 crs[VCPU_REGS_XCR05] = vcpu->vc_gueststate.vg_xcr0;
2066 }
2067
2068 if (regmask & VM_RWREGS_MSRS0x8) {
2069 msrs[VCPU_REGS_EFER0] = vmcb->v_efer;
2070 msrs[VCPU_REGS_STAR1] = vmcb->v_star;
2071 msrs[VCPU_REGS_LSTAR2] = vmcb->v_lstar;
2072 msrs[VCPU_REGS_CSTAR3] = vmcb->v_cstar;
2073 msrs[VCPU_REGS_SFMASK4] = vmcb->v_sfmask;
2074 msrs[VCPU_REGS_KGSBASE5] = vmcb->v_kgsbase;
2075 }
2076
2077 if (regmask & VM_RWREGS_DRS0x10) {
2078 drs[VCPU_REGS_DR00] = vcpu->vc_gueststate.vg_dr0;
2079 drs[VCPU_REGS_DR11] = vcpu->vc_gueststate.vg_dr1;
2080 drs[VCPU_REGS_DR22] = vcpu->vc_gueststate.vg_dr2;
2081 drs[VCPU_REGS_DR33] = vcpu->vc_gueststate.vg_dr3;
2082 drs[VCPU_REGS_DR64] = vmcb->v_dr6;
2083 drs[VCPU_REGS_DR75] = vmcb->v_dr7;
2084 }
2085
2086 return (0);
2087}
2088
2089/*
2090 * vcpu_writeregs_vmx
2091 *
2092 * Writes VCPU registers
2093 *
2094 * Parameters:
2095 * vcpu: the vcpu that has to get its registers written to
2096 * regmask: the types of registers to write
2097 * loadvmcs: bit to indicate whether the VMCS has to be loaded first
2098 * vrs: the register values to write
2099 *
2100 * Return values:
2101 * 0: if successful
2102 * EINVAL an error writing registers occurred
2103 */
2104int
2105vcpu_writeregs_vmx(struct vcpu *vcpu, uint64_t regmask, int loadvmcs,
2106 struct vcpu_reg_state *vrs)
2107{
2108 int i, ret = 0;
2109 uint16_t sel;
2110 uint64_t limit, ar;
2111 uint64_t *gprs = vrs->vrs_gprs;
2112 uint64_t *crs = vrs->vrs_crs;
2113 uint64_t *msrs = vrs->vrs_msrs;
2114 uint64_t *drs = vrs->vrs_drs;
2115 struct vcpu_segment_info *sregs = vrs->vrs_sregs;
2116 struct vmx_msr_store *msr_store;
2117
2118 if (loadvmcs) {
2119 if (vcpu_reload_vmcs_vmx(vcpu))
2120 return (EINVAL22);
2121 }
2122
2123#ifdef VMM_DEBUG
2124 /* VMCS should be loaded... */
2125 paddr_t pa = 0ULL;
2126 if (vmptrst(&pa))
2127 panic("%s: vmptrst", __func__);
2128 KASSERT(pa == vcpu->vc_control_pa)((pa == vcpu->vc_control_pa) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/arch/amd64/amd64/vmm.c", 2128, "pa == vcpu->vc_control_pa"
))
;
2129#endif /* VMM_DEBUG */
2130
2131 if (regmask & VM_RWREGS_GPRS0x1) {
2132 vcpu->vc_gueststate.vg_rax = gprs[VCPU_REGS_RAX0];
2133 vcpu->vc_gueststate.vg_rbx = gprs[VCPU_REGS_RBX1];
2134 vcpu->vc_gueststate.vg_rcx = gprs[VCPU_REGS_RCX2];
2135 vcpu->vc_gueststate.vg_rdx = gprs[VCPU_REGS_RDX3];
2136 vcpu->vc_gueststate.vg_rsi = gprs[VCPU_REGS_RSI4];
2137 vcpu->vc_gueststate.vg_rdi = gprs[VCPU_REGS_RDI5];
2138 vcpu->vc_gueststate.vg_r8 = gprs[VCPU_REGS_R86];
2139 vcpu->vc_gueststate.vg_r9 = gprs[VCPU_REGS_R97];
2140 vcpu->vc_gueststate.vg_r10 = gprs[VCPU_REGS_R108];
2141 vcpu->vc_gueststate.vg_r11 = gprs[VCPU_REGS_R119];
2142 vcpu->vc_gueststate.vg_r12 = gprs[VCPU_REGS_R1210];
2143 vcpu->vc_gueststate.vg_r13 = gprs[VCPU_REGS_R1311];
2144 vcpu->vc_gueststate.vg_r14 = gprs[VCPU_REGS_R1412];
2145 vcpu->vc_gueststate.vg_r15 = gprs[VCPU_REGS_R1513];
2146 vcpu->vc_gueststate.vg_rbp = gprs[VCPU_REGS_RBP15];
2147 vcpu->vc_gueststate.vg_rip = gprs[VCPU_REGS_RIP16];
2148 if (vmwrite(VMCS_GUEST_IA32_RIP0x681E, gprs[VCPU_REGS_RIP16]))
2149 goto errout;
2150 if (vmwrite(VMCS_GUEST_IA32_RSP0x681C, gprs[VCPU_REGS_RSP14]))
2151 goto errout;
2152 if (vmwrite(VMCS_GUEST_IA32_RFLAGS0x6820, gprs[VCPU_REGS_RFLAGS17]))
2153 goto errout;
2154 }
2155
2156 if (regmask & VM_RWREGS_SREGS0x2) {
2157 for (i = 0; i < nitems(vmm_vmx_sreg_vmcs_fields)(sizeof((vmm_vmx_sreg_vmcs_fields)) / sizeof((vmm_vmx_sreg_vmcs_fields
)[0]))
; i++) {
2158 sel = sregs[i].vsi_sel;
2159 limit = sregs[i].vsi_limit;
2160 ar = sregs[i].vsi_ar;
2161
2162 if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].selid, sel))
2163 goto errout;
2164 if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].limitid, limit))
2165 goto errout;
2166 if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].arid, ar))
2167 goto errout;
2168 if (vmwrite(vmm_vmx_sreg_vmcs_fields[i].baseid,
2169 sregs[i].vsi_base))
2170 goto errout;
2171 }
2172
2173 if (vmwrite(VMCS_GUEST_IA32_GDTR_LIMIT0x4810,
2174 vrs->vrs_gdtr.vsi_limit))
2175 goto errout;
2176 if (vmwrite(VMCS_GUEST_IA32_GDTR_BASE0x6816,
2177 vrs->vrs_gdtr.vsi_base))
2178 goto errout;
2179 if (vmwrite(VMCS_GUEST_IA32_IDTR_LIMIT0x4812,
2180 vrs->vrs_idtr.vsi_limit))
2181 goto errout;
2182 if (vmwrite(VMCS_GUEST_IA32_IDTR_BASE0x6818,
2183 vrs->vrs_idtr.vsi_base))
2184 goto errout;
2185 }
2186
2187 if (regmask & VM_RWREGS_CRS0x4) {
2188 vcpu->vc_gueststate.vg_xcr0 = crs[VCPU_REGS_XCR05];
2189 if (vmwrite(VMCS_GUEST_IA32_CR00x6800, crs[VCPU_REGS_CR00]))
2190 goto errout;
2191 if (vmwrite(VMCS_GUEST_IA32_CR30x6802, crs[VCPU_REGS_CR32]))
2192 goto errout;
2193 if (vmwrite(VMCS_GUEST_IA32_CR40x6804, crs[VCPU_REGS_CR43]))
2194 goto errout;
2195 if (vmwrite(VMCS_GUEST_PDPTE00x280A, crs[VCPU_REGS_PDPTE06]))
2196 goto errout;
2197 if (vmwrite(VMCS_GUEST_PDPTE10x280C, crs[VCPU_REGS_PDPTE17]))
2198 goto errout;
2199 if (vmwrite(VMCS_GUEST_PDPTE20x280E, crs[VCPU_REGS_PDPTE28]))
2200 goto errout;
2201 if (vmwrite(VMCS_GUEST_PDPTE30x2810, crs[VCPU_REGS_PDPTE39]))
2202 goto errout;
2203 }
2204
2205 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
2206
2207 if (regmask & VM_RWREGS_MSRS0x8) {
2208 for (i = 0; i < VCPU_REGS_NMSRS(6 + 1); i++) {
2209 msr_store[i].vms_data = msrs[i];
2210 }
2211 }
2212
2213 if (regmask & VM_RWREGS_DRS0x10) {
2214 vcpu->vc_gueststate.vg_dr0 = drs[VCPU_REGS_DR00];
2215 vcpu->vc_gueststate.vg_dr1 = drs[VCPU_REGS_DR11];
2216 vcpu->vc_gueststate.vg_dr2 = drs[VCPU_REGS_DR22];
2217 vcpu->vc_gueststate.vg_dr3 = drs[VCPU_REGS_DR33];
2218 vcpu->vc_gueststate.vg_dr6 = drs[VCPU_REGS_DR64];
2219 if (vmwrite(VMCS_GUEST_IA32_DR70x681A, drs[VCPU_REGS_DR75]))
2220 goto errout;
2221 }
2222
2223 goto out;
2224
2225errout:
2226 ret = EINVAL22;
2227out:
2228 if (loadvmcs) {
2229 if (vmclear(&vcpu->vc_control_pa))
2230 ret = EINVAL22;
2231 }
2232 return (ret);
2233}
2234
2235/*
2236 * vcpu_writeregs_svm
2237 *
2238 * Writes 'vcpu's registers
2239 *
2240 * Parameters:
2241 * vcpu: the vcpu that has to get its registers written to
2242 * regmask: the types of registers to write
2243 * vrs: the register values to write
2244 *
2245 * Return values:
2246 * 0: if successful
2247 * EINVAL an error writing registers occurred
2248 */
2249int
2250vcpu_writeregs_svm(struct vcpu *vcpu, uint64_t regmask,
2251 struct vcpu_reg_state *vrs)
2252{
2253 uint64_t *gprs = vrs->vrs_gprs;
2254 uint64_t *crs = vrs->vrs_crs;
2255 uint16_t attr;
2256 uint64_t *msrs = vrs->vrs_msrs;
2257 uint64_t *drs = vrs->vrs_drs;
2258 struct vcpu_segment_info *sregs = vrs->vrs_sregs;
2259 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
2260
2261 if (regmask & VM_RWREGS_GPRS0x1) {
2262 vcpu->vc_gueststate.vg_rax = gprs[VCPU_REGS_RAX0];
2263 vcpu->vc_gueststate.vg_rbx = gprs[VCPU_REGS_RBX1];
2264 vcpu->vc_gueststate.vg_rcx = gprs[VCPU_REGS_RCX2];
2265 vcpu->vc_gueststate.vg_rdx = gprs[VCPU_REGS_RDX3];
2266 vcpu->vc_gueststate.vg_rsi = gprs[VCPU_REGS_RSI4];
2267 vcpu->vc_gueststate.vg_rdi = gprs[VCPU_REGS_RDI5];
2268 vcpu->vc_gueststate.vg_r8 = gprs[VCPU_REGS_R86];
2269 vcpu->vc_gueststate.vg_r9 = gprs[VCPU_REGS_R97];
2270 vcpu->vc_gueststate.vg_r10 = gprs[VCPU_REGS_R108];
2271 vcpu->vc_gueststate.vg_r11 = gprs[VCPU_REGS_R119];
2272 vcpu->vc_gueststate.vg_r12 = gprs[VCPU_REGS_R1210];
2273 vcpu->vc_gueststate.vg_r13 = gprs[VCPU_REGS_R1311];
2274 vcpu->vc_gueststate.vg_r14 = gprs[VCPU_REGS_R1412];
2275 vcpu->vc_gueststate.vg_r15 = gprs[VCPU_REGS_R1513];
2276 vcpu->vc_gueststate.vg_rbp = gprs[VCPU_REGS_RBP15];
2277 vcpu->vc_gueststate.vg_rip = gprs[VCPU_REGS_RIP16];
2278
2279 vmcb->v_rip = gprs[VCPU_REGS_RIP16];
2280 vmcb->v_rsp = gprs[VCPU_REGS_RSP14];
2281 vmcb->v_rflags = gprs[VCPU_REGS_RFLAGS17];
2282 }
2283
2284 if (regmask & VM_RWREGS_SREGS0x2) {
2285 vmcb->v_cs.vs_sel = sregs[VCPU_REGS_CS0].vsi_sel;
2286 vmcb->v_cs.vs_lim = sregs[VCPU_REGS_CS0].vsi_limit;
2287 attr = sregs[VCPU_REGS_CS0].vsi_ar;
2288 vmcb->v_cs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
2289 vmcb->v_cs.vs_base = sregs[VCPU_REGS_CS0].vsi_base;
2290 vmcb->v_ds.vs_sel = sregs[VCPU_REGS_DS1].vsi_sel;
2291 vmcb->v_ds.vs_lim = sregs[VCPU_REGS_DS1].vsi_limit;
2292 attr = sregs[VCPU_REGS_DS1].vsi_ar;
2293 vmcb->v_ds.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
2294 vmcb->v_ds.vs_base = sregs[VCPU_REGS_DS1].vsi_base;
2295 vmcb->v_es.vs_sel = sregs[VCPU_REGS_ES2].vsi_sel;
2296 vmcb->v_es.vs_lim = sregs[VCPU_REGS_ES2].vsi_limit;
2297 attr = sregs[VCPU_REGS_ES2].vsi_ar;
2298 vmcb->v_es.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
2299 vmcb->v_es.vs_base = sregs[VCPU_REGS_ES2].vsi_base;
2300 vmcb->v_fs.vs_sel = sregs[VCPU_REGS_FS3].vsi_sel;
2301 vmcb->v_fs.vs_lim = sregs[VCPU_REGS_FS3].vsi_limit;
2302 attr = sregs[VCPU_REGS_FS3].vsi_ar;
2303 vmcb->v_fs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
2304 vmcb->v_fs.vs_base = sregs[VCPU_REGS_FS3].vsi_base;
2305 vmcb->v_gs.vs_sel = sregs[VCPU_REGS_GS4].vsi_sel;
2306 vmcb->v_gs.vs_lim = sregs[VCPU_REGS_GS4].vsi_limit;
2307 attr = sregs[VCPU_REGS_GS4].vsi_ar;
2308 vmcb->v_gs.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
2309 vmcb->v_gs.vs_base = sregs[VCPU_REGS_GS4].vsi_base;
2310 vmcb->v_ss.vs_sel = sregs[VCPU_REGS_SS5].vsi_sel;
2311 vmcb->v_ss.vs_lim = sregs[VCPU_REGS_SS5].vsi_limit;
2312 attr = sregs[VCPU_REGS_SS5].vsi_ar;
2313 vmcb->v_ss.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
2314 vmcb->v_ss.vs_base = sregs[VCPU_REGS_SS5].vsi_base;
2315 vmcb->v_ldtr.vs_sel = sregs[VCPU_REGS_LDTR6].vsi_sel;
2316 vmcb->v_ldtr.vs_lim = sregs[VCPU_REGS_LDTR6].vsi_limit;
2317 attr = sregs[VCPU_REGS_LDTR6].vsi_ar;
2318 vmcb->v_ldtr.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
2319 vmcb->v_ldtr.vs_base = sregs[VCPU_REGS_LDTR6].vsi_base;
2320 vmcb->v_tr.vs_sel = sregs[VCPU_REGS_TR7].vsi_sel;
2321 vmcb->v_tr.vs_lim = sregs[VCPU_REGS_TR7].vsi_limit;
2322 attr = sregs[VCPU_REGS_TR7].vsi_ar;
2323 vmcb->v_tr.vs_attr = (attr & 0xff) | ((attr >> 4) & 0xf00);
2324 vmcb->v_tr.vs_base = sregs[VCPU_REGS_TR7].vsi_base;
2325 vmcb->v_gdtr.vs_lim = vrs->vrs_gdtr.vsi_limit;
2326 vmcb->v_gdtr.vs_base = vrs->vrs_gdtr.vsi_base;
2327 vmcb->v_idtr.vs_lim = vrs->vrs_idtr.vsi_limit;
2328 vmcb->v_idtr.vs_base = vrs->vrs_idtr.vsi_base;
2329 }
2330
2331 if (regmask & VM_RWREGS_CRS0x4) {
2332 vmcb->v_cr0 = crs[VCPU_REGS_CR00];
2333 vmcb->v_cr3 = crs[VCPU_REGS_CR32];
2334 vmcb->v_cr4 = crs[VCPU_REGS_CR43];
2335 vcpu->vc_gueststate.vg_cr2 = crs[VCPU_REGS_CR21];
2336 vcpu->vc_gueststate.vg_xcr0 = crs[VCPU_REGS_XCR05];
2337 }
2338
2339 if (regmask & VM_RWREGS_MSRS0x8) {
2340 vmcb->v_efer |= msrs[VCPU_REGS_EFER0];
2341 vmcb->v_star = msrs[VCPU_REGS_STAR1];
2342 vmcb->v_lstar = msrs[VCPU_REGS_LSTAR2];
2343 vmcb->v_cstar = msrs[VCPU_REGS_CSTAR3];
2344 vmcb->v_sfmask = msrs[VCPU_REGS_SFMASK4];
2345 vmcb->v_kgsbase = msrs[VCPU_REGS_KGSBASE5];
2346 }
2347
2348 if (regmask & VM_RWREGS_DRS0x10) {
2349 vcpu->vc_gueststate.vg_dr0 = drs[VCPU_REGS_DR00];
2350 vcpu->vc_gueststate.vg_dr1 = drs[VCPU_REGS_DR11];
2351 vcpu->vc_gueststate.vg_dr2 = drs[VCPU_REGS_DR22];
2352 vcpu->vc_gueststate.vg_dr3 = drs[VCPU_REGS_DR33];
2353 vmcb->v_dr6 = drs[VCPU_REGS_DR64];
2354 vmcb->v_dr7 = drs[VCPU_REGS_DR75];
2355 }
2356
2357 return (0);
2358}
2359
2360/*
2361 * vcpu_reset_regs_svm
2362 *
2363 * Initializes 'vcpu's registers to supplied state
2364 *
2365 * Parameters:
2366 * vcpu: the vcpu whose register state is to be initialized
2367 * vrs: the register state to set
2368 *
2369 * Return values:
2370 * 0: registers init'ed successfully
2371 * EINVAL: an error occurred setting register state
2372 */
2373int
2374vcpu_reset_regs_svm(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
2375{
2376 struct vmcb *vmcb;
2377 int ret;
2378 uint16_t asid;
2379
2380 vmcb = (struct vmcb *)vcpu->vc_control_va;
2381
2382 /*
2383 * Intercept controls
2384 *
2385 * External Interrupt exiting (SVM_INTERCEPT_INTR)
2386 * External NMI exiting (SVM_INTERCEPT_NMI)
2387 * CPUID instruction (SVM_INTERCEPT_CPUID)
2388 * HLT instruction (SVM_INTERCEPT_HLT)
2389 * I/O instructions (SVM_INTERCEPT_INOUT)
2390 * MSR access (SVM_INTERCEPT_MSR)
2391 * shutdown events (SVM_INTERCEPT_SHUTDOWN)
2392 *
2393 * VMRUN instruction (SVM_INTERCEPT_VMRUN)
2394 * VMMCALL instruction (SVM_INTERCEPT_VMMCALL)
2395 * VMLOAD instruction (SVM_INTERCEPT_VMLOAD)
2396 * VMSAVE instruction (SVM_INTERCEPT_VMSAVE)
2397 * STGI instruction (SVM_INTERCEPT_STGI)
2398 * CLGI instruction (SVM_INTERCEPT_CLGI)
2399 * SKINIT instruction (SVM_INTERCEPT_SKINIT)
2400 * ICEBP instruction (SVM_INTERCEPT_ICEBP)
2401 * MWAIT instruction (SVM_INTERCEPT_MWAIT_UNCOND)
2402 * MWAIT instruction (SVM_INTERCEPT_MWAIT_COND)
2403 * MONITOR instruction (SVM_INTERCEPT_MONITOR)
2404 * RDTSCP instruction (SVM_INTERCEPT_RDTSCP)
2405 * INVLPGA instruction (SVM_INTERCEPT_INVLPGA)
2406 * XSETBV instruction (SVM_INTERCEPT_XSETBV) (if available)
2407 */
2408 vmcb->v_intercept1 = SVM_INTERCEPT_INTR(1UL << 0) | SVM_INTERCEPT_NMI(1UL << 1) |
2409 SVM_INTERCEPT_CPUID(1UL << 18) | SVM_INTERCEPT_HLT(1UL << 24) | SVM_INTERCEPT_INOUT(1UL << 27) |
2410 SVM_INTERCEPT_MSR(1UL << 28) | SVM_INTERCEPT_SHUTDOWN(1UL << 31);
2411
2412 vmcb->v_intercept2 = SVM_INTERCEPT_VMRUN(1UL << 0) | SVM_INTERCEPT_VMMCALL(1UL << 1) |
2413 SVM_INTERCEPT_VMLOAD(1UL << 2) | SVM_INTERCEPT_VMSAVE(1UL << 3) | SVM_INTERCEPT_STGI(1UL << 4) |
2414 SVM_INTERCEPT_CLGI(1UL << 5) | SVM_INTERCEPT_SKINIT(1UL << 6) | SVM_INTERCEPT_ICEBP(1UL << 8) |
2415 SVM_INTERCEPT_MWAIT_UNCOND(1UL << 11) | SVM_INTERCEPT_MONITOR(1UL << 10) |
2416 SVM_INTERCEPT_MWAIT_COND(1UL << 12) | SVM_INTERCEPT_RDTSCP(1UL << 7) |
2417 SVM_INTERCEPT_INVLPGA(1UL << 26);
2418
2419 if (xsave_mask)
2420 vmcb->v_intercept2 |= SVM_INTERCEPT_XSETBV(1UL << 13);
2421
2422 /* Setup I/O bitmap */
2423 memset((uint8_t *)vcpu->vc_svm_ioio_va, 0xFF, 3 * PAGE_SIZE)__builtin_memset(((uint8_t *)vcpu->vc_svm_ioio_va), (0xFF)
, (3 * (1 << 12)))
;
2424 vmcb->v_iopm_pa = (uint64_t)(vcpu->vc_svm_ioio_pa);
2425
2426 /* Setup MSR bitmap */
2427 memset((uint8_t *)vcpu->vc_msr_bitmap_va, 0xFF, 2 * PAGE_SIZE)__builtin_memset(((uint8_t *)vcpu->vc_msr_bitmap_va), (0xFF
), (2 * (1 << 12)))
;
2428 vmcb->v_msrpm_pa = (uint64_t)(vcpu->vc_msr_bitmap_pa);
2429 svm_setmsrbrw(vcpu, MSR_IA32_FEATURE_CONTROL0x03a);
2430 svm_setmsrbrw(vcpu, MSR_SYSENTER_CS0x174);
2431 svm_setmsrbrw(vcpu, MSR_SYSENTER_ESP0x175);
2432 svm_setmsrbrw(vcpu, MSR_SYSENTER_EIP0x176);
2433 svm_setmsrbrw(vcpu, MSR_STAR0xc0000081);
2434 svm_setmsrbrw(vcpu, MSR_LSTAR0xc0000082);
2435 svm_setmsrbrw(vcpu, MSR_CSTAR0xc0000083);
2436 svm_setmsrbrw(vcpu, MSR_SFMASK0xc0000084);
2437 svm_setmsrbrw(vcpu, MSR_FSBASE0xc0000100);
2438 svm_setmsrbrw(vcpu, MSR_GSBASE0xc0000101);
2439 svm_setmsrbrw(vcpu, MSR_KERNELGSBASE0xc0000102);
2440
2441 /* EFER is R/O so we can ensure the guest always has SVME */
2442 svm_setmsrbr(vcpu, MSR_EFER0xc0000080);
2443
2444 /* allow reading TSC */
2445 svm_setmsrbr(vcpu, MSR_TSC0x010);
2446
2447 /* Guest VCPU ASID */
2448 if (vmm_alloc_vpid(&asid)) {
2449 DPRINTF("%s: could not allocate asid\n", __func__);
2450 ret = EINVAL22;
2451 goto exit;
2452 }
2453
2454 vmcb->v_asid = asid;
2455 vcpu->vc_vpid = asid;
2456
2457 /* TLB Control - First time in, flush all*/
2458 vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_ALL1;
2459
2460 /* INTR masking */
2461 vmcb->v_intr_masking = 1;
2462
2463 /* PAT */
2464 vmcb->v_g_pat = PATENTRY(0, PAT_WB)(0x6UL << ((0) * 8)) | PATENTRY(1, PAT_WC)(0x1UL << ((1) * 8)) |
2465 PATENTRY(2, PAT_UCMINUS)(0x7UL << ((2) * 8)) | PATENTRY(3, PAT_UC)(0x0UL << ((3) * 8)) |
2466 PATENTRY(4, PAT_WB)(0x6UL << ((4) * 8)) | PATENTRY(5, PAT_WC)(0x1UL << ((5) * 8)) |
2467 PATENTRY(6, PAT_UCMINUS)(0x7UL << ((6) * 8)) | PATENTRY(7, PAT_UC)(0x0UL << ((7) * 8));
2468
2469 /* NPT */
2470 if (vmm_softc->mode == VMM_MODE_RVI) {
2471 vmcb->v_np_enable = 1;
2472 vmcb->v_n_cr3 = vcpu->vc_parent->vm_map->pmap->pm_pdirpa;
2473 }
2474
2475 /* Enable SVME in EFER (must always be set) */
2476 vmcb->v_efer |= EFER_SVME0x00001000;
2477
2478 ret = vcpu_writeregs_svm(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), vrs);
2479
2480 /* xcr0 power on default sets bit 0 (x87 state) */
2481 vcpu->vc_gueststate.vg_xcr0 = XCR0_X870x00000001 & xsave_mask;
2482
2483 vcpu->vc_parent->vm_map->pmap->eptp = 0;
2484
2485exit:
2486 return ret;
2487}
2488
2489/*
2490 * svm_setmsrbr
2491 *
2492 * Allow read access to the specified msr on the supplied vcpu.
2493 *
2494 * Parameters:
2495 * vcpu: the VCPU to allow access
2496 * msr: the MSR number to allow access to
2497 */
2498void
2499svm_setmsrbr(struct vcpu *vcpu, uint32_t msr)
2500{
2501 uint8_t *msrs;
2502 uint16_t idx;
2503
2504 msrs = (uint8_t *)vcpu->vc_msr_bitmap_va;
2505
2506 /*
2507 * MSR Read bitmap layout:
2508 * Pentium MSRs (0x0 - 0x1fff) @ 0x0
2509 * Gen6 and Syscall MSRs (0xc0000000 - 0xc0001fff) @ 0x800
2510 * Gen7 and Gen8 MSRs (0xc0010000 - 0xc0011fff) @ 0x1000
2511 *
2512 * Read enable bit is low order bit of 2-bit pair
2513 * per MSR (eg, MSR 0x0 write bit is at bit 0 @ 0x0)
2514 */
2515 if (msr <= 0x1fff) {
2516 idx = SVM_MSRIDX(msr)((msr) / 4);
2517 msrs[idx] &= ~(SVM_MSRBIT_R(msr)(1 << (((msr) % 4) * 2)));
2518 } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) {
2519 idx = SVM_MSRIDX(msr - 0xc0000000)((msr - 0xc0000000) / 4) + 0x800;
2520 msrs[idx] &= ~(SVM_MSRBIT_R(msr - 0xc0000000)(1 << (((msr - 0xc0000000) % 4) * 2)));
2521 } else if (msr >= 0xc0010000 && msr <= 0xc0011fff) {
2522 idx = SVM_MSRIDX(msr - 0xc0010000)((msr - 0xc0010000) / 4) + 0x1000;
2523 msrs[idx] &= ~(SVM_MSRBIT_R(msr - 0xc0010000)(1 << (((msr - 0xc0010000) % 4) * 2)));
2524 } else {
2525 printf("%s: invalid msr 0x%x\n", __func__, msr);
2526 return;
2527 }
2528}
2529
2530/*
2531 * svm_setmsrbw
2532 *
2533 * Allow write access to the specified msr on the supplied vcpu
2534 *
2535 * Parameters:
2536 * vcpu: the VCPU to allow access
2537 * msr: the MSR number to allow access to
2538 */
2539void
2540svm_setmsrbw(struct vcpu *vcpu, uint32_t msr)
2541{
2542 uint8_t *msrs;
2543 uint16_t idx;
2544
2545 msrs = (uint8_t *)vcpu->vc_msr_bitmap_va;
2546
2547 /*
2548 * MSR Write bitmap layout:
2549 * Pentium MSRs (0x0 - 0x1fff) @ 0x0
2550 * Gen6 and Syscall MSRs (0xc0000000 - 0xc0001fff) @ 0x800
2551 * Gen7 and Gen8 MSRs (0xc0010000 - 0xc0011fff) @ 0x1000
2552 *
2553 * Write enable bit is high order bit of 2-bit pair
2554 * per MSR (eg, MSR 0x0 write bit is at bit 1 @ 0x0)
2555 */
2556 if (msr <= 0x1fff) {
2557 idx = SVM_MSRIDX(msr)((msr) / 4);
2558 msrs[idx] &= ~(SVM_MSRBIT_W(msr)(1 << (((msr) % 4) * 2 + 1)));
2559 } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) {
2560 idx = SVM_MSRIDX(msr - 0xc0000000)((msr - 0xc0000000) / 4) + 0x800;
2561 msrs[idx] &= ~(SVM_MSRBIT_W(msr - 0xc0000000)(1 << (((msr - 0xc0000000) % 4) * 2 + 1)));
2562 } else if (msr >= 0xc0010000 && msr <= 0xc0011fff) {
2563 idx = SVM_MSRIDX(msr - 0xc0010000)((msr - 0xc0010000) / 4) + 0x1000;
2564 msrs[idx] &= ~(SVM_MSRBIT_W(msr - 0xc0010000)(1 << (((msr - 0xc0010000) % 4) * 2 + 1)));
2565 } else {
2566 printf("%s: invalid msr 0x%x\n", __func__, msr);
2567 return;
2568 }
2569}
2570
2571/*
2572 * svm_setmsrbrw
2573 *
2574 * Allow read/write access to the specified msr on the supplied vcpu
2575 *
2576 * Parameters:
2577 * vcpu: the VCPU to allow access
2578 * msr: the MSR number to allow access to
2579 */
2580void
2581svm_setmsrbrw(struct vcpu *vcpu, uint32_t msr)
2582{
2583 svm_setmsrbr(vcpu, msr);
2584 svm_setmsrbw(vcpu, msr);
2585}
2586
2587/*
2588 * vmx_setmsrbr
2589 *
2590 * Allow read access to the specified msr on the supplied vcpu.
2591 *
2592 * Parameters:
2593 * vcpu: the VCPU to allow access
2594 * msr: the MSR number to allow access to
2595 */
2596void
2597vmx_setmsrbr(struct vcpu *vcpu, uint32_t msr)
2598{
2599 uint8_t *msrs;
2600 uint16_t idx;
2601
2602 msrs = (uint8_t *)vcpu->vc_msr_bitmap_va;
2603
2604 /*
2605 * MSR Read bitmap layout:
2606 * "Low" MSRs (0x0 - 0x1fff) @ 0x0
2607 * "High" MSRs (0xc0000000 - 0xc0001fff) @ 0x400
2608 */
2609 if (msr <= 0x1fff) {
2610 idx = VMX_MSRIDX(msr)((msr) / 8);
2611 msrs[idx] &= ~(VMX_MSRBIT(msr)(1 << (msr) % 8));
2612 } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) {
2613 idx = VMX_MSRIDX(msr - 0xc0000000)((msr - 0xc0000000) / 8) + 0x400;
2614 msrs[idx] &= ~(VMX_MSRBIT(msr - 0xc0000000)(1 << (msr - 0xc0000000) % 8));
2615 } else
2616 printf("%s: invalid msr 0x%x\n", __func__, msr);
2617}
2618
2619/*
2620 * vmx_setmsrbw
2621 *
2622 * Allow write access to the specified msr on the supplied vcpu
2623 *
2624 * Parameters:
2625 * vcpu: the VCPU to allow access
2626 * msr: the MSR number to allow access to
2627 */
2628void
2629vmx_setmsrbw(struct vcpu *vcpu, uint32_t msr)
2630{
2631 uint8_t *msrs;
2632 uint16_t idx;
2633
2634 msrs = (uint8_t *)vcpu->vc_msr_bitmap_va;
2635
2636 /*
2637 * MSR Write bitmap layout:
2638 * "Low" MSRs (0x0 - 0x1fff) @ 0x800
2639 * "High" MSRs (0xc0000000 - 0xc0001fff) @ 0xc00
2640 */
2641 if (msr <= 0x1fff) {
2642 idx = VMX_MSRIDX(msr)((msr) / 8) + 0x800;
2643 msrs[idx] &= ~(VMX_MSRBIT(msr)(1 << (msr) % 8));
2644 } else if (msr >= 0xc0000000 && msr <= 0xc0001fff) {
2645 idx = VMX_MSRIDX(msr - 0xc0000000)((msr - 0xc0000000) / 8) + 0xc00;
2646 msrs[idx] &= ~(VMX_MSRBIT(msr - 0xc0000000)(1 << (msr - 0xc0000000) % 8));
2647 } else
2648 printf("%s: invalid msr 0x%x\n", __func__, msr);
2649}
2650
2651/*
2652 * vmx_setmsrbrw
2653 *
2654 * Allow read/write access to the specified msr on the supplied vcpu
2655 *
2656 * Parameters:
2657 * vcpu: the VCPU to allow access
2658 * msr: the MSR number to allow access to
2659 */
2660void
2661vmx_setmsrbrw(struct vcpu *vcpu, uint32_t msr)
2662{
2663 vmx_setmsrbr(vcpu, msr);
2664 vmx_setmsrbw(vcpu, msr);
2665}
2666
2667/*
2668 * svm_set_clean
2669 *
2670 * Sets (mark as unmodified) the VMCB clean bit set in 'value'.
2671 * For example, to set the clean bit for the VMCB intercepts (bit position 0),
2672 * the caller provides 'SVM_CLEANBITS_I' (0x1) for the 'value' argument.
2673 * Multiple cleanbits can be provided in 'value' at the same time (eg,
2674 * "SVM_CLEANBITS_I | SVM_CLEANBITS_TPR").
2675 *
2676 * Note that this function does not clear any bits; to clear bits in the
2677 * vmcb cleanbits bitfield, use 'svm_set_dirty'.
2678 *
2679 * Parameters:
2680 * vmcs: the VCPU whose VMCB clean value should be set
2681 * value: the value(s) to enable in the cleanbits mask
2682 */
2683void
2684svm_set_clean(struct vcpu *vcpu, uint32_t value)
2685{
2686 struct vmcb *vmcb;
2687
2688 /* If no cleanbits support, do nothing */
2689 if (!curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_svm.svm_vmcb_clean)
2690 return;
2691
2692 vmcb = (struct vmcb *)vcpu->vc_control_va;
2693
2694 vmcb->v_vmcb_clean_bits |= value;
2695}
2696
2697/*
2698 * svm_set_dirty
2699 *
2700 * Clears (mark as modified) the VMCB clean bit set in 'value'.
2701 * For example, to clear the bit for the VMCB intercepts (bit position 0)
2702 * the caller provides 'SVM_CLEANBITS_I' (0x1) for the 'value' argument.
2703 * Multiple dirty bits can be provided in 'value' at the same time (eg,
2704 * "SVM_CLEANBITS_I | SVM_CLEANBITS_TPR").
2705 *
2706 * Parameters:
2707 * vmcs: the VCPU whose VMCB dirty value should be set
2708 * value: the value(s) to dirty in the cleanbits mask
2709 */
2710void
2711svm_set_dirty(struct vcpu *vcpu, uint32_t value)
2712{
2713 struct vmcb *vmcb;
2714
2715 /* If no cleanbits support, do nothing */
2716 if (!curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_svm.svm_vmcb_clean)
2717 return;
2718
2719 vmcb = (struct vmcb *)vcpu->vc_control_va;
2720
2721 vmcb->v_vmcb_clean_bits &= ~value;
2722}
2723
2724/*
2725 * vcpu_reset_regs_vmx
2726 *
2727 * Initializes 'vcpu's registers to supplied state
2728 *
2729 * Parameters:
2730 * vcpu: the vcpu whose register state is to be initialized
2731 * vrs: the register state to set
2732 *
2733 * Return values:
2734 * 0: registers init'ed successfully
2735 * EINVAL: an error occurred setting register state
2736 */
2737int
2738vcpu_reset_regs_vmx(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
2739{
2740 int ret = 0, ug = 0;
2741 uint32_t cr0, cr4;
2742 uint32_t pinbased, procbased, procbased2, exit, entry;
2743 uint32_t want1, want0;
2744 uint64_t msr, ctrlval, eptp, cr3;
2745 uint16_t ctrl, vpid;
2746 struct vmx_msr_store *msr_store;
2747
2748 rw_assert_wrlock(&vcpu->vc_lock);
2749
2750 cr0 = vrs->vrs_crs[VCPU_REGS_CR00];
2751
2752 if (vcpu_reload_vmcs_vmx(vcpu)) {
2753 DPRINTF("%s: error reloading VMCS\n", __func__);
2754 ret = EINVAL22;
2755 goto exit;
2756 }
2757
2758#ifdef VMM_DEBUG
2759 /* VMCS should be loaded... */
2760 paddr_t pa = 0ULL;
2761 if (vmptrst(&pa))
2762 panic("%s: vmptrst", __func__);
2763 KASSERT(pa == vcpu->vc_control_pa)((pa == vcpu->vc_control_pa) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/arch/amd64/amd64/vmm.c", 2763, "pa == vcpu->vc_control_pa"
))
;
2764#endif /* VMM_DEBUG */
2765
2766 /* Compute Basic Entry / Exit Controls */
2767 vcpu->vc_vmx_basic = rdmsr(IA32_VMX_BASIC0x480);
2768 vcpu->vc_vmx_entry_ctls = rdmsr(IA32_VMX_ENTRY_CTLS0x484);
2769 vcpu->vc_vmx_exit_ctls = rdmsr(IA32_VMX_EXIT_CTLS0x483);
2770 vcpu->vc_vmx_pinbased_ctls = rdmsr(IA32_VMX_PINBASED_CTLS0x481);
2771 vcpu->vc_vmx_procbased_ctls = rdmsr(IA32_VMX_PROCBASED_CTLS0x482);
2772
2773 /* Compute True Entry / Exit Controls (if applicable) */
2774 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) {
2775 vcpu->vc_vmx_true_entry_ctls = rdmsr(IA32_VMX_TRUE_ENTRY_CTLS0x490);
2776 vcpu->vc_vmx_true_exit_ctls = rdmsr(IA32_VMX_TRUE_EXIT_CTLS0x48F);
2777 vcpu->vc_vmx_true_pinbased_ctls =
2778 rdmsr(IA32_VMX_TRUE_PINBASED_CTLS0x48D);
2779 vcpu->vc_vmx_true_procbased_ctls =
2780 rdmsr(IA32_VMX_TRUE_PROCBASED_CTLS0x48E);
2781 }
2782
2783 /* Compute Secondary Procbased Controls (if applicable) */
2784 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482,
2785 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1))
2786 vcpu->vc_vmx_procbased2_ctls = rdmsr(IA32_VMX_PROCBASED2_CTLS0x48B);
2787
2788 /*
2789 * Pinbased ctrls
2790 *
2791 * We must be able to set the following:
2792 * IA32_VMX_EXTERNAL_INT_EXITING - exit on host interrupt
2793 * IA32_VMX_NMI_EXITING - exit on host NMI
2794 */
2795 want1 = IA32_VMX_EXTERNAL_INT_EXITING(1ULL << 0) |
2796 IA32_VMX_NMI_EXITING(1ULL << 3);
2797 want0 = 0;
2798
2799 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) {
2800 ctrl = IA32_VMX_TRUE_PINBASED_CTLS0x48D;
2801 ctrlval = vcpu->vc_vmx_true_pinbased_ctls;
2802 } else {
2803 ctrl = IA32_VMX_PINBASED_CTLS0x481;
2804 ctrlval = vcpu->vc_vmx_pinbased_ctls;
2805 }
2806
2807 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &pinbased)) {
2808 DPRINTF("%s: error computing pinbased controls\n", __func__);
2809 ret = EINVAL22;
2810 goto exit;
2811 }
2812
2813 if (vmwrite(VMCS_PINBASED_CTLS0x4000, pinbased)) {
2814 DPRINTF("%s: error setting pinbased controls\n", __func__);
2815 ret = EINVAL22;
2816 goto exit;
2817 }
2818
2819 /*
2820 * Procbased ctrls
2821 *
2822 * We must be able to set the following:
2823 * IA32_VMX_HLT_EXITING - exit on HLT instruction
2824 * IA32_VMX_MWAIT_EXITING - exit on MWAIT instruction
2825 * IA32_VMX_UNCONDITIONAL_IO_EXITING - exit on I/O instructions
2826 * IA32_VMX_USE_MSR_BITMAPS - exit on various MSR accesses
2827 * IA32_VMX_CR8_LOAD_EXITING - guest TPR access
2828 * IA32_VMX_CR8_STORE_EXITING - guest TPR access
2829 * IA32_VMX_USE_TPR_SHADOW - guest TPR access (shadow)
2830 * IA32_VMX_MONITOR_EXITING - exit on MONITOR instruction
2831 *
2832 * If we have EPT, we must be able to clear the following
2833 * IA32_VMX_CR3_LOAD_EXITING - don't care about guest CR3 accesses
2834 * IA32_VMX_CR3_STORE_EXITING - don't care about guest CR3 accesses
2835 */
2836 want1 = IA32_VMX_HLT_EXITING(1ULL << 7) |
2837 IA32_VMX_MWAIT_EXITING(1ULL << 10) |
2838 IA32_VMX_UNCONDITIONAL_IO_EXITING(1ULL << 24) |
2839 IA32_VMX_USE_MSR_BITMAPS(1ULL << 28) |
2840 IA32_VMX_CR8_LOAD_EXITING(1ULL << 19) |
2841 IA32_VMX_CR8_STORE_EXITING(1ULL << 20) |
2842 IA32_VMX_MONITOR_EXITING(1ULL << 29) |
2843 IA32_VMX_USE_TPR_SHADOW(1ULL << 21);
2844 want0 = 0;
2845
2846 if (vmm_softc->mode == VMM_MODE_EPT) {
2847 want1 |= IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31);
2848 want0 |= IA32_VMX_CR3_LOAD_EXITING(1ULL << 15) |
2849 IA32_VMX_CR3_STORE_EXITING(1ULL << 16);
2850 }
2851
2852 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) {
2853 ctrl = IA32_VMX_TRUE_PROCBASED_CTLS0x48E;
2854 ctrlval = vcpu->vc_vmx_true_procbased_ctls;
2855 } else {
2856 ctrl = IA32_VMX_PROCBASED_CTLS0x482;
2857 ctrlval = vcpu->vc_vmx_procbased_ctls;
2858 }
2859
2860 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &procbased)) {
2861 DPRINTF("%s: error computing procbased controls\n", __func__);
2862 ret = EINVAL22;
2863 goto exit;
2864 }
2865
2866 if (vmwrite(VMCS_PROCBASED_CTLS0x4002, procbased)) {
2867 DPRINTF("%s: error setting procbased controls\n", __func__);
2868 ret = EINVAL22;
2869 goto exit;
2870 }
2871
2872 /*
2873 * Secondary Procbased ctrls
2874 *
2875 * We want to be able to set the following, if available:
2876 * IA32_VMX_ENABLE_VPID - use VPIDs where available
2877 *
2878 * If we have EPT, we must be able to set the following:
2879 * IA32_VMX_ENABLE_EPT - enable EPT
2880 *
2881 * If we have unrestricted guest capability, we must be able to set
2882 * the following:
2883 * IA32_VMX_UNRESTRICTED_GUEST - enable unrestricted guest (if caller
2884 * specified CR0_PG | CR0_PE in %cr0 in the 'vrs' parameter)
2885 */
2886 want1 = 0;
2887
2888 /* XXX checking for 2ndary controls can be combined here */
2889 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482,
2890 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1)) {
2891 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
2892 IA32_VMX_ENABLE_VPID(1ULL << 5), 1)) {
2893 want1 |= IA32_VMX_ENABLE_VPID(1ULL << 5);
2894 vcpu->vc_vmx_vpid_enabled = 1;
2895 }
2896 }
2897
2898 if (vmm_softc->mode == VMM_MODE_EPT)
2899 want1 |= IA32_VMX_ENABLE_EPT(1ULL << 1);
2900
2901 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482,
2902 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1)) {
2903 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
2904 IA32_VMX_UNRESTRICTED_GUEST(1ULL << 7), 1)) {
2905 if ((cr0 & (CR0_PE0x00000001 | CR0_PG0x80000000)) == 0) {
2906 want1 |= IA32_VMX_UNRESTRICTED_GUEST(1ULL << 7);
2907 ug = 1;
2908 }
2909 }
2910 }
2911
2912 want0 = ~want1;
2913 ctrlval = vcpu->vc_vmx_procbased2_ctls;
2914 ctrl = IA32_VMX_PROCBASED2_CTLS0x48B;
2915
2916 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &procbased2)) {
2917 DPRINTF("%s: error computing secondary procbased controls\n",
2918 __func__);
2919 ret = EINVAL22;
2920 goto exit;
2921 }
2922
2923 if (vmwrite(VMCS_PROCBASED2_CTLS0x401E, procbased2)) {
2924 DPRINTF("%s: error setting secondary procbased controls\n",
2925 __func__);
2926 ret = EINVAL22;
2927 goto exit;
2928 }
2929
2930 /*
2931 * Exit ctrls
2932 *
2933 * We must be able to set the following:
2934 * IA32_VMX_SAVE_DEBUG_CONTROLS
2935 * IA32_VMX_HOST_SPACE_ADDRESS_SIZE - exit to long mode
2936 * IA32_VMX_ACKNOWLEDGE_INTERRUPT_ON_EXIT - ack interrupt on exit
2937 */
2938 want1 = IA32_VMX_HOST_SPACE_ADDRESS_SIZE(1ULL << 9) |
2939 IA32_VMX_ACKNOWLEDGE_INTERRUPT_ON_EXIT(1ULL << 15) |
2940 IA32_VMX_SAVE_DEBUG_CONTROLS(1ULL << 2);
2941 want0 = 0;
2942
2943 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) {
2944 ctrl = IA32_VMX_TRUE_EXIT_CTLS0x48F;
2945 ctrlval = vcpu->vc_vmx_true_exit_ctls;
2946 } else {
2947 ctrl = IA32_VMX_EXIT_CTLS0x483;
2948 ctrlval = vcpu->vc_vmx_exit_ctls;
2949 }
2950
2951 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &exit)) {
2952 DPRINTF("%s: error computing exit controls\n", __func__);
2953 ret = EINVAL22;
2954 goto exit;
2955 }
2956
2957 if (vmwrite(VMCS_EXIT_CTLS0x400C, exit)) {
2958 DPRINTF("%s: error setting exit controls\n", __func__);
2959 ret = EINVAL22;
2960 goto exit;
2961 }
2962
2963 /*
2964 * Entry ctrls
2965 *
2966 * We must be able to set the following:
2967 * IA32_VMX_IA32E_MODE_GUEST (if no unrestricted guest)
2968 * IA32_VMX_LOAD_DEBUG_CONTROLS
2969 * We must be able to clear the following:
2970 * IA32_VMX_ENTRY_TO_SMM - enter to SMM
2971 * IA32_VMX_DEACTIVATE_DUAL_MONITOR_TREATMENT
2972 * IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY
2973 */
2974 want1 = IA32_VMX_LOAD_DEBUG_CONTROLS(1ULL << 2);
2975 if (vrs->vrs_msrs[VCPU_REGS_EFER0] & EFER_LMA0x00000400)
2976 want1 |= IA32_VMX_IA32E_MODE_GUEST(1ULL << 9);
2977
2978 want0 = IA32_VMX_ENTRY_TO_SMM(1ULL << 10) |
2979 IA32_VMX_DEACTIVATE_DUAL_MONITOR_TREATMENT(1ULL << 11) |
2980 IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY(1ULL << 13);
2981
2982 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) {
2983 ctrl = IA32_VMX_TRUE_ENTRY_CTLS0x490;
2984 ctrlval = vcpu->vc_vmx_true_entry_ctls;
2985 } else {
2986 ctrl = IA32_VMX_ENTRY_CTLS0x484;
2987 ctrlval = vcpu->vc_vmx_entry_ctls;
2988 }
2989
2990 if (vcpu_vmx_compute_ctrl(ctrlval, ctrl, want1, want0, &entry)) {
2991 ret = EINVAL22;
2992 goto exit;
2993 }
2994
2995 if (vmwrite(VMCS_ENTRY_CTLS0x4012, entry)) {
2996 ret = EINVAL22;
2997 goto exit;
2998 }
2999
3000 if (vmm_softc->mode == VMM_MODE_EPT) {
3001 eptp = vcpu->vc_parent->vm_map->pmap->pm_pdirpa;
3002 msr = rdmsr(IA32_VMX_EPT_VPID_CAP0x48C);
3003 if (msr & IA32_EPT_VPID_CAP_PAGE_WALK_4(1ULL << 6)) {
3004 /* Page walk length 4 supported */
3005 eptp |= ((IA32_EPT_PAGE_WALK_LENGTH0x4 - 1) << 3);
3006 } else {
3007 DPRINTF("EPT page walk length 4 not supported\n");
3008 ret = EINVAL22;
3009 goto exit;
3010 }
3011
3012 if (msr & IA32_EPT_VPID_CAP_WB(1ULL << 14)) {
3013 /* WB cache type supported */
3014 eptp |= IA32_EPT_PAGING_CACHE_TYPE_WB0x6;
3015 } else
3016 DPRINTF("%s: no WB cache type available, guest VM "
3017 "will run uncached\n", __func__);
3018
3019 DPRINTF("Guest EPTP = 0x%llx\n", eptp);
3020 if (vmwrite(VMCS_GUEST_IA32_EPTP0x201A, eptp)) {
3021 DPRINTF("%s: error setting guest EPTP\n", __func__);
3022 ret = EINVAL22;
3023 goto exit;
3024 }
3025
3026 vcpu->vc_parent->vm_map->pmap->eptp = eptp;
3027 }
3028
3029 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482,
3030 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1)) {
3031 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
3032 IA32_VMX_ENABLE_VPID(1ULL << 5), 1)) {
3033
3034 /* We may sleep during allocation, so reload VMCS. */
3035 vcpu->vc_last_pcpu = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
3036 ret = vmm_alloc_vpid(&vpid);
3037 if (vcpu_reload_vmcs_vmx(vcpu)) {
3038 printf("%s: failed to reload vmcs\n", __func__);
3039 ret = EINVAL22;
3040 goto exit;
3041 }
3042 if (ret) {
3043 DPRINTF("%s: could not allocate VPID\n",
3044 __func__);
3045 ret = EINVAL22;
3046 goto exit;
3047 }
3048
3049 if (vmwrite(VMCS_GUEST_VPID0x0000, vpid)) {
3050 DPRINTF("%s: error setting guest VPID\n",
3051 __func__);
3052 ret = EINVAL22;
3053 goto exit;
3054 }
3055
3056 vcpu->vc_vpid = vpid;
3057 }
3058 }
3059
3060 /*
3061 * Determine which bits in CR0 have to be set to a fixed
3062 * value as per Intel SDM A.7.
3063 * CR0 bits in the vrs parameter must match these.
3064 */
3065 want1 = (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0) &
3066 (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1);
3067 want0 = ~(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0) &
3068 ~(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1);
3069
3070 /*
3071 * CR0_FIXED0 and CR0_FIXED1 may report the CR0_PG and CR0_PE bits as
3072 * fixed to 1 even if the CPU supports the unrestricted guest
3073 * feature. Update want1 and want0 accordingly to allow
3074 * any value for CR0_PG and CR0_PE in vrs->vrs_crs[VCPU_REGS_CR0] if
3075 * the CPU has the unrestricted guest capability.
3076 */
3077 if (ug) {
3078 want1 &= ~(CR0_PG0x80000000 | CR0_PE0x00000001);
3079 want0 &= ~(CR0_PG0x80000000 | CR0_PE0x00000001);
3080 }
3081
3082 /*
3083 * VMX may require some bits to be set that userland should not have
3084 * to care about. Set those here.
3085 */
3086 if (want1 & CR0_NE0x00000020)
3087 cr0 |= CR0_NE0x00000020;
3088
3089 if ((cr0 & want1) != want1) {
3090 ret = EINVAL22;
3091 goto exit;
3092 }
3093
3094 if ((~cr0 & want0) != want0) {
3095 ret = EINVAL22;
3096 goto exit;
3097 }
3098
3099 vcpu->vc_vmx_cr0_fixed1 = want1;
3100 vcpu->vc_vmx_cr0_fixed0 = want0;
3101 /*
3102 * Determine which bits in CR4 have to be set to a fixed
3103 * value as per Intel SDM A.8.
3104 * CR4 bits in the vrs parameter must match these, except
3105 * CR4_VMXE - we add that here since it must always be set.
3106 */
3107 want1 = (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) &
3108 (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
3109 want0 = ~(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0) &
3110 ~(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
3111
3112 cr4 = vrs->vrs_crs[VCPU_REGS_CR43] | CR4_VMXE0x00002000;
3113
3114 if ((cr4 & want1) != want1) {
3115 ret = EINVAL22;
3116 goto exit;
3117 }
3118
3119 if ((~cr4 & want0) != want0) {
3120 ret = EINVAL22;
3121 goto exit;
3122 }
3123
3124 cr3 = vrs->vrs_crs[VCPU_REGS_CR32];
3125
3126 /* Restore PDPTEs if 32-bit PAE paging is being used */
3127 if (cr3 && (cr4 & CR4_PAE0x00000020) &&
3128 !(vrs->vrs_msrs[VCPU_REGS_EFER0] & EFER_LMA0x00000400)) {
3129 if (vmwrite(VMCS_GUEST_PDPTE00x280A,
3130 vrs->vrs_crs[VCPU_REGS_PDPTE06])) {
3131 ret = EINVAL22;
3132 goto exit;
3133 }
3134
3135 if (vmwrite(VMCS_GUEST_PDPTE10x280C,
3136 vrs->vrs_crs[VCPU_REGS_PDPTE17])) {
3137 ret = EINVAL22;
3138 goto exit;
3139 }
3140
3141 if (vmwrite(VMCS_GUEST_PDPTE20x280E,
3142 vrs->vrs_crs[VCPU_REGS_PDPTE28])) {
3143 ret = EINVAL22;
3144 goto exit;
3145 }
3146
3147 if (vmwrite(VMCS_GUEST_PDPTE30x2810,
3148 vrs->vrs_crs[VCPU_REGS_PDPTE39])) {
3149 ret = EINVAL22;
3150 goto exit;
3151 }
3152 }
3153
3154 vrs->vrs_crs[VCPU_REGS_CR00] = cr0;
3155 vrs->vrs_crs[VCPU_REGS_CR43] = cr4;
3156
3157 /*
3158 * Select host MSRs to be loaded on exit
3159 */
3160 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_load_va;
3161 msr_store[0].vms_index = MSR_EFER0xc0000080;
3162 msr_store[0].vms_data = rdmsr(MSR_EFER0xc0000080);
3163 msr_store[1].vms_index = MSR_STAR0xc0000081;
3164 msr_store[1].vms_data = rdmsr(MSR_STAR0xc0000081);
3165 msr_store[2].vms_index = MSR_LSTAR0xc0000082;
3166 msr_store[2].vms_data = rdmsr(MSR_LSTAR0xc0000082);
3167 msr_store[3].vms_index = MSR_CSTAR0xc0000083;
3168 msr_store[3].vms_data = rdmsr(MSR_CSTAR0xc0000083);
3169 msr_store[4].vms_index = MSR_SFMASK0xc0000084;
3170 msr_store[4].vms_data = rdmsr(MSR_SFMASK0xc0000084);
3171 msr_store[5].vms_index = MSR_KERNELGSBASE0xc0000102;
3172 msr_store[5].vms_data = rdmsr(MSR_KERNELGSBASE0xc0000102);
3173 msr_store[6].vms_index = MSR_MISC_ENABLE0x1a0;
3174 msr_store[6].vms_data = rdmsr(MSR_MISC_ENABLE0x1a0);
3175
3176 /*
3177 * Select guest MSRs to be loaded on entry / saved on exit
3178 */
3179 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
3180
3181 msr_store[VCPU_REGS_EFER0].vms_index = MSR_EFER0xc0000080;
3182 msr_store[VCPU_REGS_STAR1].vms_index = MSR_STAR0xc0000081;
3183 msr_store[VCPU_REGS_LSTAR2].vms_index = MSR_LSTAR0xc0000082;
3184 msr_store[VCPU_REGS_CSTAR3].vms_index = MSR_CSTAR0xc0000083;
3185 msr_store[VCPU_REGS_SFMASK4].vms_index = MSR_SFMASK0xc0000084;
3186 msr_store[VCPU_REGS_KGSBASE5].vms_index = MSR_KERNELGSBASE0xc0000102;
3187 msr_store[VCPU_REGS_MISC_ENABLE6].vms_index = MSR_MISC_ENABLE0x1a0;
3188
3189 /*
3190 * Initialize MSR_MISC_ENABLE as it can't be read and populated from vmd
3191 * and some of the content is based on the host.
3192 */
3193 msr_store[VCPU_REGS_MISC_ENABLE6].vms_data = rdmsr(MSR_MISC_ENABLE0x1a0);
3194 msr_store[VCPU_REGS_MISC_ENABLE6].vms_data &=
3195 ~(MISC_ENABLE_TCC(1 << 3) | MISC_ENABLE_PERF_MON_AVAILABLE(1 << 7) |
3196 MISC_ENABLE_EIST_ENABLED(1 << 16) | MISC_ENABLE_ENABLE_MONITOR_FSM(1 << 18) |
3197 MISC_ENABLE_xTPR_MESSAGE_DISABLE(1 << 23));
3198 msr_store[VCPU_REGS_MISC_ENABLE6].vms_data |=
3199 MISC_ENABLE_BTS_UNAVAILABLE(1 << 11) | MISC_ENABLE_PEBS_UNAVAILABLE(1 << 12);
3200
3201 /*
3202 * Currently we have the same count of entry/exit MSRs loads/stores
3203 * but this is not an architectural requirement.
3204 */
3205 if (vmwrite(VMCS_EXIT_MSR_STORE_COUNT0x400E, VMX_NUM_MSR_STORE7)) {
3206 DPRINTF("%s: error setting guest MSR exit store count\n",
3207 __func__);
3208 ret = EINVAL22;
3209 goto exit;
3210 }
3211
3212 if (vmwrite(VMCS_EXIT_MSR_LOAD_COUNT0x4010, VMX_NUM_MSR_STORE7)) {
3213 DPRINTF("%s: error setting guest MSR exit load count\n",
3214 __func__);
3215 ret = EINVAL22;
3216 goto exit;
3217 }
3218
3219 if (vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT0x4014, VMX_NUM_MSR_STORE7)) {
3220 DPRINTF("%s: error setting guest MSR entry load count\n",
3221 __func__);
3222 ret = EINVAL22;
3223 goto exit;
3224 }
3225
3226 if (vmwrite(VMCS_EXIT_STORE_MSR_ADDRESS0x2006,
3227 vcpu->vc_vmx_msr_exit_save_pa)) {
3228 DPRINTF("%s: error setting guest MSR exit store address\n",
3229 __func__);
3230 ret = EINVAL22;
3231 goto exit;
3232 }
3233
3234 if (vmwrite(VMCS_EXIT_LOAD_MSR_ADDRESS0x2008,
3235 vcpu->vc_vmx_msr_exit_load_pa)) {
3236 DPRINTF("%s: error setting guest MSR exit load address\n",
3237 __func__);
3238 ret = EINVAL22;
3239 goto exit;
3240 }
3241
3242 if (vmwrite(VMCS_ENTRY_LOAD_MSR_ADDRESS0x200A,
3243 vcpu->vc_vmx_msr_exit_save_pa)) {
3244 DPRINTF("%s: error setting guest MSR entry load address\n",
3245 __func__);
3246 ret = EINVAL22;
3247 goto exit;
3248 }
3249
3250 if (vmwrite(VMCS_MSR_BITMAP_ADDRESS0x2004,
3251 vcpu->vc_msr_bitmap_pa)) {
3252 DPRINTF("%s: error setting guest MSR bitmap address\n",
3253 __func__);
3254 ret = EINVAL22;
3255 goto exit;
3256 }
3257
3258 if (vmwrite(VMCS_CR4_MASK0x6002, CR4_VMXE0x00002000)) {
3259 DPRINTF("%s: error setting guest CR4 mask\n", __func__);
3260 ret = EINVAL22;
3261 goto exit;
3262 }
3263
3264 if (vmwrite(VMCS_CR0_MASK0x6000, CR0_NE0x00000020)) {
3265 DPRINTF("%s: error setting guest CR0 mask\n", __func__);
3266 ret = EINVAL22;
3267 goto exit;
3268 }
3269
3270 /*
3271 * Set up the VMCS for the register state we want during VCPU start.
3272 * This matches what the CPU state would be after a bootloader
3273 * transition to 'start'.
3274 */
3275 ret = vcpu_writeregs_vmx(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), 0, vrs);
3276
3277 /*
3278 * Set up the MSR bitmap
3279 */
3280 memset((uint8_t *)vcpu->vc_msr_bitmap_va, 0xFF, PAGE_SIZE)__builtin_memset(((uint8_t *)vcpu->vc_msr_bitmap_va), (0xFF
), ((1 << 12)))
;
3281 vmx_setmsrbrw(vcpu, MSR_IA32_FEATURE_CONTROL0x03a);
3282 vmx_setmsrbrw(vcpu, MSR_SYSENTER_CS0x174);
3283 vmx_setmsrbrw(vcpu, MSR_SYSENTER_ESP0x175);
3284 vmx_setmsrbrw(vcpu, MSR_SYSENTER_EIP0x176);
3285 vmx_setmsrbrw(vcpu, MSR_EFER0xc0000080);
3286 vmx_setmsrbrw(vcpu, MSR_STAR0xc0000081);
3287 vmx_setmsrbrw(vcpu, MSR_LSTAR0xc0000082);
3288 vmx_setmsrbrw(vcpu, MSR_CSTAR0xc0000083);
3289 vmx_setmsrbrw(vcpu, MSR_SFMASK0xc0000084);
3290 vmx_setmsrbrw(vcpu, MSR_FSBASE0xc0000100);
3291 vmx_setmsrbrw(vcpu, MSR_GSBASE0xc0000101);
3292 vmx_setmsrbrw(vcpu, MSR_KERNELGSBASE0xc0000102);
3293 vmx_setmsrbr(vcpu, MSR_MISC_ENABLE0x1a0);
3294
3295 /* XXX CR0 shadow */
3296 /* XXX CR4 shadow */
3297
3298 /* xcr0 power on default sets bit 0 (x87 state) */
3299 vcpu->vc_gueststate.vg_xcr0 = XCR0_X870x00000001 & xsave_mask;
3300
3301 /* XXX PAT shadow */
3302 vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT0x277);
3303
3304 /* Flush the VMCS */
3305 if (vmclear(&vcpu->vc_control_pa)) {
3306 DPRINTF("%s: vmclear failed\n", __func__);
3307 ret = EINVAL22;
3308 }
3309 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_CLEARED)_atomic_swap_uint((&vcpu->vc_vmx_vmcs_state), (0));
3310
3311exit:
3312 return (ret);
3313}
3314
3315/*
3316 * vcpu_init_vmx
3317 *
3318 * Intel VMX specific VCPU initialization routine.
3319 *
3320 * This function allocates various per-VCPU memory regions, sets up initial
3321 * VCPU VMCS controls, and sets initial register values.
3322 *
3323 * Parameters:
3324 * vcpu: the VCPU structure being initialized
3325 *
3326 * Return values:
3327 * 0: the VCPU was initialized successfully
3328 * ENOMEM: insufficient resources
3329 * EINVAL: an error occurred during VCPU initialization
3330 */
3331int
3332vcpu_init_vmx(struct vcpu *vcpu)
3333{
3334 struct vmcs *vmcs;
3335 uint32_t cr0, cr4;
3336 int ret = 0;
3337
3338 /* Allocate VMCS VA */
3339 vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page, &kp_zero,
3340 &kd_waitok);
3341 vcpu->vc_vmx_vmcs_state = VMCS_CLEARED0;
3342
3343 if (!vcpu->vc_control_va)
3344 return (ENOMEM12);
3345
3346 /* Compute VMCS PA */
3347 if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_control_va,
3348 (paddr_t *)&vcpu->vc_control_pa)) {
3349 ret = ENOMEM12;
3350 goto exit;
3351 }
3352
3353 /* Allocate MSR bitmap VA */
3354 vcpu->vc_msr_bitmap_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page, &kp_zero,
3355 &kd_waitok);
3356
3357 if (!vcpu->vc_msr_bitmap_va) {
3358 ret = ENOMEM12;
3359 goto exit;
3360 }
3361
3362 /* Compute MSR bitmap PA */
3363 if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_msr_bitmap_va,
3364 (paddr_t *)&vcpu->vc_msr_bitmap_pa)) {
3365 ret = ENOMEM12;
3366 goto exit;
3367 }
3368
3369 /* Allocate MSR exit load area VA */
3370 vcpu->vc_vmx_msr_exit_load_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page,
3371 &kp_zero, &kd_waitok);
3372
3373 if (!vcpu->vc_vmx_msr_exit_load_va) {
3374 ret = ENOMEM12;
3375 goto exit;
3376 }
3377
3378 /* Compute MSR exit load area PA */
3379 if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_vmx_msr_exit_load_va,
3380 &vcpu->vc_vmx_msr_exit_load_pa)) {
3381 ret = ENOMEM12;
3382 goto exit;
3383 }
3384
3385 /* Allocate MSR exit save area VA */
3386 vcpu->vc_vmx_msr_exit_save_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page,
3387 &kp_zero, &kd_waitok);
3388
3389 if (!vcpu->vc_vmx_msr_exit_save_va) {
3390 ret = ENOMEM12;
3391 goto exit;
3392 }
3393
3394 /* Compute MSR exit save area PA */
3395 if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_vmx_msr_exit_save_va,
3396 &vcpu->vc_vmx_msr_exit_save_pa)) {
3397 ret = ENOMEM12;
3398 goto exit;
3399 }
3400
3401 /* Allocate MSR entry load area VA */
3402 vcpu->vc_vmx_msr_entry_load_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page,
3403 &kp_zero, &kd_waitok);
3404
3405 if (!vcpu->vc_vmx_msr_entry_load_va) {
3406 ret = ENOMEM12;
3407 goto exit;
3408 }
3409
3410 /* Compute MSR entry load area PA */
3411 if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_vmx_msr_entry_load_va,
3412 &vcpu->vc_vmx_msr_entry_load_pa)) {
3413 ret = ENOMEM12;
3414 goto exit;
3415 }
3416
3417 vmcs = (struct vmcs *)vcpu->vc_control_va;
3418 vmcs->vmcs_revision = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision;
3419
3420 /*
3421 * Load the VMCS onto this PCPU so we can write registers
3422 */
3423 if (vmptrld(&vcpu->vc_control_pa)) {
3424 ret = EINVAL22;
3425 goto exit;
3426 }
3427
3428 /* Host CR0 */
3429 cr0 = rcr0() & ~CR0_TS0x00000008;
3430 if (vmwrite(VMCS_HOST_IA32_CR00x6C00, cr0)) {
3431 DPRINTF("%s: error writing host CR0\n", __func__);
3432 ret = EINVAL22;
3433 goto exit;
3434 }
3435
3436 /* Host CR4 */
3437 cr4 = rcr4();
3438 if (vmwrite(VMCS_HOST_IA32_CR40x6C04, cr4)) {
3439 DPRINTF("%s: error writing host CR4\n", __func__);
3440 ret = EINVAL22;
3441 goto exit;
3442 }
3443
3444 /* Host Segment Selectors */
3445 if (vmwrite(VMCS_HOST_IA32_CS_SEL0x0C02, GSEL(GCODE_SEL, SEL_KPL)(((1) << 3) | 0))) {
3446 DPRINTF("%s: error writing host CS selector\n", __func__);
3447 ret = EINVAL22;
3448 goto exit;
3449 }
3450
3451 if (vmwrite(VMCS_HOST_IA32_DS_SEL0x0C06, GSEL(GDATA_SEL, SEL_KPL)(((2) << 3) | 0))) {
3452 DPRINTF("%s: error writing host DS selector\n", __func__);
3453 ret = EINVAL22;
3454 goto exit;
3455 }
3456
3457 if (vmwrite(VMCS_HOST_IA32_ES_SEL0x0C00, GSEL(GDATA_SEL, SEL_KPL)(((2) << 3) | 0))) {
3458 DPRINTF("%s: error writing host ES selector\n", __func__);
3459 ret = EINVAL22;
3460 goto exit;
3461 }
3462
3463 if (vmwrite(VMCS_HOST_IA32_FS_SEL0x0C08, GSEL(GDATA_SEL, SEL_KPL)(((2) << 3) | 0))) {
3464 DPRINTF("%s: error writing host FS selector\n", __func__);
3465 ret = EINVAL22;
3466 goto exit;
3467 }
3468
3469 if (vmwrite(VMCS_HOST_IA32_GS_SEL0x0C0A, GSEL(GDATA_SEL, SEL_KPL)(((2) << 3) | 0))) {
3470 DPRINTF("%s: error writing host GS selector\n", __func__);
3471 ret = EINVAL22;
3472 goto exit;
3473 }
3474
3475 if (vmwrite(VMCS_HOST_IA32_SS_SEL0x0C04, GSEL(GDATA_SEL, SEL_KPL)(((2) << 3) | 0))) {
3476 DPRINTF("%s: error writing host SS selector\n", __func__);
3477 ret = EINVAL22;
3478 goto exit;
3479 }
3480
3481 if (vmwrite(VMCS_HOST_IA32_TR_SEL0x0C0C, GSYSSEL(GPROC0_SEL, SEL_KPL)((((0) << 4) + (6 << 3)) | 0))) {
3482 DPRINTF("%s: error writing host TR selector\n", __func__);
3483 ret = EINVAL22;
3484 goto exit;
3485 }
3486
3487 /* Host IDTR base */
3488 if (vmwrite(VMCS_HOST_IA32_IDTR_BASE0x6C0E, idt_vaddr)) {
3489 DPRINTF("%s: error writing host IDTR base\n", __func__);
3490 ret = EINVAL22;
3491 goto exit;
3492 }
3493
3494 /* VMCS link */
3495 if (vmwrite(VMCS_LINK_POINTER0x2800, VMX_VMCS_PA_CLEAR0xFFFFFFFFFFFFFFFFUL)) {
3496 DPRINTF("%s: error writing VMCS link pointer\n", __func__);
3497 ret = EINVAL22;
3498 goto exit;
3499 }
3500
3501 /* Flush the initial VMCS */
3502 if (vmclear(&vcpu->vc_control_pa)) {
3503 DPRINTF("%s: vmclear failed\n", __func__);
3504 ret = EINVAL22;
3505 }
3506
3507exit:
3508 if (ret) {
3509 if (vcpu->vc_control_va)
3510 km_free((void *)vcpu->vc_control_va, PAGE_SIZE(1 << 12),
3511 &kv_page, &kp_zero);
3512 if (vcpu->vc_msr_bitmap_va)
3513 km_free((void *)vcpu->vc_msr_bitmap_va, PAGE_SIZE(1 << 12),
3514 &kv_page, &kp_zero);
3515 if (vcpu->vc_vmx_msr_exit_save_va)
3516 km_free((void *)vcpu->vc_vmx_msr_exit_save_va,
3517 PAGE_SIZE(1 << 12), &kv_page, &kp_zero);
3518 if (vcpu->vc_vmx_msr_exit_load_va)
3519 km_free((void *)vcpu->vc_vmx_msr_exit_load_va,
3520 PAGE_SIZE(1 << 12), &kv_page, &kp_zero);
3521 if (vcpu->vc_vmx_msr_entry_load_va)
3522 km_free((void *)vcpu->vc_vmx_msr_entry_load_va,
3523 PAGE_SIZE(1 << 12), &kv_page, &kp_zero);
3524 }
3525
3526 return (ret);
3527}
3528
3529/*
3530 * vcpu_reset_regs
3531 *
3532 * Resets a vcpu's registers to the provided state
3533 *
3534 * Parameters:
3535 * vcpu: the vcpu whose registers shall be reset
3536 * vrs: the desired register state
3537 *
3538 * Return values:
3539 * 0: the vcpu's registers were successfully reset
3540 * !0: the vcpu's registers could not be reset (see arch-specific reset
3541 * function for various values that can be returned here)
3542 */
3543int
3544vcpu_reset_regs(struct vcpu *vcpu, struct vcpu_reg_state *vrs)
3545{
3546 int ret;
3547
3548 if (vmm_softc->mode == VMM_MODE_VMX ||
3549 vmm_softc->mode == VMM_MODE_EPT)
3550 ret = vcpu_reset_regs_vmx(vcpu, vrs);
3551 else if (vmm_softc->mode == VMM_MODE_SVM ||
3552 vmm_softc->mode == VMM_MODE_RVI)
3553 ret = vcpu_reset_regs_svm(vcpu, vrs);
3554 else
3555 panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode);
3556
3557 return (ret);
3558}
3559
3560/*
3561 * vcpu_init_svm
3562 *
3563 * AMD SVM specific VCPU initialization routine.
3564 *
3565 * This function allocates various per-VCPU memory regions, sets up initial
3566 * VCPU VMCB controls, and sets initial register values.
3567 *
3568 * Parameters:
3569 * vcpu: the VCPU structure being initialized
3570 *
3571 * Return values:
3572 * 0: the VCPU was initialized successfully
3573 * ENOMEM: insufficient resources
3574 * EINVAL: an error occurred during VCPU initialization
3575 */
3576int
3577vcpu_init_svm(struct vcpu *vcpu)
3578{
3579 int ret = 0;
3580
3581 /* Allocate VMCB VA */
3582 vcpu->vc_control_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page, &kp_zero,
3583 &kd_waitok);
3584
3585 if (!vcpu->vc_control_va)
3586 return (ENOMEM12);
3587
3588 /* Compute VMCB PA */
3589 if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_control_va,
3590 (paddr_t *)&vcpu->vc_control_pa)) {
3591 ret = ENOMEM12;
3592 goto exit;
3593 }
3594
3595 DPRINTF("%s: VMCB va @ 0x%llx, pa @ 0x%llx\n", __func__,
3596 (uint64_t)vcpu->vc_control_va,
3597 (uint64_t)vcpu->vc_control_pa);
3598
3599
3600 /* Allocate MSR bitmap VA (2 pages) */
3601 vcpu->vc_msr_bitmap_va = (vaddr_t)km_alloc(2 * PAGE_SIZE(1 << 12), &kv_any,
3602 &vmm_kp_contig, &kd_waitok);
3603
3604 if (!vcpu->vc_msr_bitmap_va) {
3605 ret = ENOMEM12;
3606 goto exit;
3607 }
3608
3609 /* Compute MSR bitmap PA */
3610 if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_msr_bitmap_va,
3611 (paddr_t *)&vcpu->vc_msr_bitmap_pa)) {
3612 ret = ENOMEM12;
3613 goto exit;
3614 }
3615
3616 DPRINTF("%s: MSR bitmap va @ 0x%llx, pa @ 0x%llx\n", __func__,
3617 (uint64_t)vcpu->vc_msr_bitmap_va,
3618 (uint64_t)vcpu->vc_msr_bitmap_pa);
3619
3620 /* Allocate host state area VA */
3621 vcpu->vc_svm_hsa_va = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_page,
3622 &kp_zero, &kd_waitok);
3623
3624 if (!vcpu->vc_svm_hsa_va) {
3625 ret = ENOMEM12;
3626 goto exit;
3627 }
3628
3629 /* Compute host state area PA */
3630 if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_svm_hsa_va,
3631 &vcpu->vc_svm_hsa_pa)) {
3632 ret = ENOMEM12;
3633 goto exit;
3634 }
3635
3636 DPRINTF("%s: HSA va @ 0x%llx, pa @ 0x%llx\n", __func__,
3637 (uint64_t)vcpu->vc_svm_hsa_va,
3638 (uint64_t)vcpu->vc_svm_hsa_pa);
3639
3640 /* Allocate IOIO area VA (3 pages) */
3641 vcpu->vc_svm_ioio_va = (vaddr_t)km_alloc(3 * PAGE_SIZE(1 << 12), &kv_any,
3642 &vmm_kp_contig, &kd_waitok);
3643
3644 if (!vcpu->vc_svm_ioio_va) {
3645 ret = ENOMEM12;
3646 goto exit;
3647 }
3648
3649 /* Compute IOIO area PA */
3650 if (!pmap_extract(pmap_kernel()(&kernel_pmap_store), vcpu->vc_svm_ioio_va,
3651 &vcpu->vc_svm_ioio_pa)) {
3652 ret = ENOMEM12;
3653 goto exit;
3654 }
3655
3656 DPRINTF("%s: IOIO va @ 0x%llx, pa @ 0x%llx\n", __func__,
3657 (uint64_t)vcpu->vc_svm_ioio_va,
3658 (uint64_t)vcpu->vc_svm_ioio_pa);
3659
3660exit:
3661 if (ret) {
3662 if (vcpu->vc_control_va)
3663 km_free((void *)vcpu->vc_control_va, PAGE_SIZE(1 << 12),
3664 &kv_page, &kp_zero);
3665 if (vcpu->vc_msr_bitmap_va)
3666 km_free((void *)vcpu->vc_msr_bitmap_va, 2 * PAGE_SIZE(1 << 12),
3667 &kv_any, &vmm_kp_contig);
3668 if (vcpu->vc_svm_hsa_va)
3669 km_free((void *)vcpu->vc_svm_hsa_va, PAGE_SIZE(1 << 12),
3670 &kv_page, &kp_zero);
3671 if (vcpu->vc_svm_ioio_va)
3672 km_free((void *)vcpu->vc_svm_ioio_va,
3673 3 * PAGE_SIZE(1 << 12), &kv_any, &vmm_kp_contig);
3674 }
3675
3676 return (ret);
3677}
3678
3679/*
3680 * vcpu_init
3681 *
3682 * Calls the architecture-specific VCPU init routine
3683 */
3684int
3685vcpu_init(struct vcpu *vcpu)
3686{
3687 int ret = 0;
3688
3689 vcpu->vc_virt_mode = vmm_softc->mode;
3690 vcpu->vc_state = VCPU_STATE_STOPPED;
3691 vcpu->vc_vpid = 0;
3692 vcpu->vc_pvclock_system_gpa = 0;
3693 vcpu->vc_last_pcpu = NULL((void *)0);
3694
3695 rw_init(&vcpu->vc_lock, "vcpu")_rw_init_flags(&vcpu->vc_lock, "vcpu", 0, ((void *)0));
3696
3697 /* Shadow PAT MSR, starting with host's value. */
3698 vcpu->vc_shadow_pat = rdmsr(MSR_CR_PAT0x277);
3699
3700 if (vmm_softc->mode == VMM_MODE_VMX ||
3701 vmm_softc->mode == VMM_MODE_EPT)
3702 ret = vcpu_init_vmx(vcpu);
3703 else if (vmm_softc->mode == VMM_MODE_SVM ||
3704 vmm_softc->mode == VMM_MODE_RVI)
3705 ret = vcpu_init_svm(vcpu);
3706 else
3707 panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode);
3708
3709 return (ret);
3710}
3711
3712/*
3713 * vcpu_deinit_vmx
3714 *
3715 * Deinitializes the vcpu described by 'vcpu'
3716 *
3717 * Parameters:
3718 * vcpu: the vcpu to be deinited
3719 */
3720void
3721vcpu_deinit_vmx(struct vcpu *vcpu)
3722{
3723 if (vcpu->vc_control_va)
3724 km_free((void *)vcpu->vc_control_va, PAGE_SIZE(1 << 12),
3725 &kv_page, &kp_zero);
3726 if (vcpu->vc_vmx_msr_exit_save_va)
3727 km_free((void *)vcpu->vc_vmx_msr_exit_save_va,
3728 PAGE_SIZE(1 << 12), &kv_page, &kp_zero);
3729 if (vcpu->vc_vmx_msr_exit_load_va)
3730 km_free((void *)vcpu->vc_vmx_msr_exit_load_va,
3731 PAGE_SIZE(1 << 12), &kv_page, &kp_zero);
3732 if (vcpu->vc_vmx_msr_entry_load_va)
3733 km_free((void *)vcpu->vc_vmx_msr_entry_load_va,
3734 PAGE_SIZE(1 << 12), &kv_page, &kp_zero);
3735
3736 if (vcpu->vc_vmx_vpid_enabled)
3737 vmm_free_vpid(vcpu->vc_vpid);
3738}
3739
3740/*
3741 * vcpu_deinit_svm
3742 *
3743 * Deinitializes the vcpu described by 'vcpu'
3744 *
3745 * Parameters:
3746 * vcpu: the vcpu to be deinited
3747 */
3748void
3749vcpu_deinit_svm(struct vcpu *vcpu)
3750{
3751 if (vcpu->vc_control_va)
3752 km_free((void *)vcpu->vc_control_va, PAGE_SIZE(1 << 12), &kv_page,
3753 &kp_zero);
3754 if (vcpu->vc_msr_bitmap_va)
3755 km_free((void *)vcpu->vc_msr_bitmap_va, 2 * PAGE_SIZE(1 << 12), &kv_any,
3756 &vmm_kp_contig);
3757 if (vcpu->vc_svm_hsa_va)
3758 km_free((void *)vcpu->vc_svm_hsa_va, PAGE_SIZE(1 << 12), &kv_page,
3759 &kp_zero);
3760 if (vcpu->vc_svm_ioio_va)
3761 km_free((void *)vcpu->vc_svm_ioio_va, 3 * PAGE_SIZE(1 << 12), &kv_any,
3762 &vmm_kp_contig);
3763
3764 vmm_free_vpid(vcpu->vc_vpid);
3765}
3766
3767/*
3768 * vcpu_deinit
3769 *
3770 * Calls the architecture-specific VCPU deinit routine
3771 *
3772 * Parameters:
3773 * vcpu: the vcpu to be deinited
3774 */
3775void
3776vcpu_deinit(struct vcpu *vcpu)
3777{
3778 if (vmm_softc->mode == VMM_MODE_VMX ||
3779 vmm_softc->mode == VMM_MODE_EPT)
3780 vcpu_deinit_vmx(vcpu);
3781 else if (vmm_softc->mode == VMM_MODE_SVM ||
3782 vmm_softc->mode == VMM_MODE_RVI)
3783 vcpu_deinit_svm(vcpu);
3784 else
3785 panic("%s: unknown vmm mode: %d", __func__, vmm_softc->mode);
3786}
3787
3788/*
3789 * vm_teardown
3790 *
3791 * Tears down (destroys) the vm indicated by 'vm'.
3792 *
3793 * Parameters:
3794 * vm: vm to be torn down
3795 */
3796void
3797vm_teardown(struct vm *vm)
3798{
3799 struct vcpu *vcpu, *tmp;
3800
3801 rw_assert_wrlock(&vmm_softc->vm_lock);
3802 KERNEL_LOCK()_kernel_lock();
3803
3804 /* Free VCPUs */
3805 rw_enter_write(&vm->vm_vcpu_lock);
3806 SLIST_FOREACH_SAFE(vcpu, &vm->vm_vcpu_list, vc_vcpu_link, tmp)for ((vcpu) = ((&vm->vm_vcpu_list)->slh_first); (vcpu
) && ((tmp) = ((vcpu)->vc_vcpu_link.sle_next), 1);
(vcpu) = (tmp))
{
3807 SLIST_REMOVE(&vm->vm_vcpu_list, vcpu, vcpu, vc_vcpu_link)do { if ((&vm->vm_vcpu_list)->slh_first == (vcpu)) {
do { ((&vm->vm_vcpu_list))->slh_first = ((&vm->
vm_vcpu_list))->slh_first->vc_vcpu_link.sle_next; } while
(0); } else { struct vcpu *curelm = (&vm->vm_vcpu_list
)->slh_first; while (curelm->vc_vcpu_link.sle_next != (
vcpu)) curelm = curelm->vc_vcpu_link.sle_next; curelm->
vc_vcpu_link.sle_next = curelm->vc_vcpu_link.sle_next->
vc_vcpu_link.sle_next; } ((vcpu)->vc_vcpu_link.sle_next) =
((void *)-1); } while (0)
;
3808 vcpu_deinit(vcpu);
3809 pool_put(&vcpu_pool, vcpu);
3810 vmm_softc->vcpu_ct--;
3811 }
3812
3813 vm_impl_deinit(vm);
3814
3815 /* teardown guest vmspace */
3816 if (vm->vm_vmspace != NULL((void *)0)) {
3817 uvmspace_free(vm->vm_vmspace);
3818 vm->vm_vmspace = NULL((void *)0);
3819 }
3820
3821 if (vm->vm_id > 0) {
3822 vmm_softc->vm_ct--;
3823 if (vmm_softc->vm_ct < 1)
3824 vmm_stop();
3825 }
3826 pool_put(&vm_pool, vm);
3827
3828 KERNEL_UNLOCK()_kernel_unlock();
3829 rw_exit_write(&vm->vm_vcpu_lock);
3830}
3831
3832/*
3833 * vcpu_vmx_check_cap
3834 *
3835 * Checks if the 'cap' bit in the 'msr' MSR can be set or cleared (set = 1
3836 * or set = 0, respectively).
3837 *
3838 * When considering 'msr', we check to see if true controls are available,
3839 * and use those if so.
3840 *
3841 * Returns 1 of 'cap' can be set/cleared as requested, 0 otherwise.
3842 */
3843int
3844vcpu_vmx_check_cap(struct vcpu *vcpu, uint32_t msr, uint32_t cap, int set)
3845{
3846 uint64_t ctl;
3847
3848 if (vcpu->vc_vmx_basic & IA32_VMX_TRUE_CTLS_AVAIL(1ULL << 55)) {
3849 switch (msr) {
3850 case IA32_VMX_PINBASED_CTLS0x481:
3851 ctl = vcpu->vc_vmx_true_pinbased_ctls;
3852 break;
3853 case IA32_VMX_PROCBASED_CTLS0x482:
3854 ctl = vcpu->vc_vmx_true_procbased_ctls;
3855 break;
3856 case IA32_VMX_PROCBASED2_CTLS0x48B:
3857 ctl = vcpu->vc_vmx_procbased2_ctls;
3858 break;
3859 case IA32_VMX_ENTRY_CTLS0x484:
3860 ctl = vcpu->vc_vmx_true_entry_ctls;
3861 break;
3862 case IA32_VMX_EXIT_CTLS0x483:
3863 ctl = vcpu->vc_vmx_true_exit_ctls;
3864 break;
3865 default:
3866 return (0);
3867 }
3868 } else {
3869 switch (msr) {
3870 case IA32_VMX_PINBASED_CTLS0x481:
3871 ctl = vcpu->vc_vmx_pinbased_ctls;
3872 break;
3873 case IA32_VMX_PROCBASED_CTLS0x482:
3874 ctl = vcpu->vc_vmx_procbased_ctls;
3875 break;
3876 case IA32_VMX_PROCBASED2_CTLS0x48B:
3877 ctl = vcpu->vc_vmx_procbased2_ctls;
3878 break;
3879 case IA32_VMX_ENTRY_CTLS0x484:
3880 ctl = vcpu->vc_vmx_entry_ctls;
3881 break;
3882 case IA32_VMX_EXIT_CTLS0x483:
3883 ctl = vcpu->vc_vmx_exit_ctls;
3884 break;
3885 default:
3886 return (0);
3887 }
3888 }
3889
3890 if (set) {
3891 /* Check bit 'cap << 32', must be !0 */
3892 return (ctl & ((uint64_t)cap << 32)) != 0;
3893 } else {
3894 /* Check bit 'cap', must be 0 */
3895 return (ctl & cap) == 0;
3896 }
3897}
3898
3899/*
3900 * vcpu_vmx_compute_ctrl
3901 *
3902 * Computes the appropriate control value, given the supplied parameters
3903 * and CPU capabilities.
3904 *
3905 * Intel has made somewhat of a mess of this computation - it is described
3906 * using no fewer than three different approaches, spread across many
3907 * pages of the SDM. Further compounding the problem is the fact that now
3908 * we have "true controls" for each type of "control", and each needs to
3909 * be examined to get the calculation right, but only if "true" controls
3910 * are present on the CPU we're on.
3911 *
3912 * Parameters:
3913 * ctrlval: the control value, as read from the CPU MSR
3914 * ctrl: which control is being set (eg, pinbased, procbased, etc)
3915 * want0: the set of desired 0 bits
3916 * want1: the set of desired 1 bits
3917 * out: (out) the correct value to write into the VMCS for this VCPU,
3918 * for the 'ctrl' desired.
3919 *
3920 * Returns 0 if successful, or EINVAL if the supplied parameters define
3921 * an unworkable control setup.
3922 */
3923int
3924vcpu_vmx_compute_ctrl(uint64_t ctrlval, uint16_t ctrl, uint32_t want1,
3925 uint32_t want0, uint32_t *out)
3926{
3927 int i, set, clear;
3928
3929 *out = 0;
3930
3931 /*
3932 * The Intel SDM gives three formulae for determining which bits to
3933 * set/clear for a given control and desired functionality. Formula
3934 * 1 is the simplest but disallows use of newer features that are
3935 * enabled by functionality in later CPUs.
3936 *
3937 * Formulas 2 and 3 allow such extra functionality. We use formula
3938 * 2 - this requires us to know the identity of controls in the
3939 * "default1" class for each control register, but allows us to not
3940 * have to pass along and/or query both sets of capability MSRs for
3941 * each control lookup. This makes the code slightly longer,
3942 * however.
3943 */
3944 for (i = 0; i < 32; i++) {
3945 /* Figure out if we can set and / or clear this bit */
3946 set = (ctrlval & (1ULL << (i + 32))) != 0;
3947 clear = ((1ULL << i) & ((uint64_t)ctrlval)) == 0;
3948
3949 /* If the bit can't be set nor cleared, something's wrong */
3950 if (!set && !clear)
3951 return (EINVAL22);
3952
3953 /*
3954 * Formula 2.c.i - "If the relevant VMX capability MSR
3955 * reports that a control has a single setting, use that
3956 * setting."
3957 */
3958 if (set && !clear) {
3959 if (want0 & (1ULL << i))
3960 return (EINVAL22);
3961 else
3962 *out |= (1ULL << i);
3963 } else if (clear && !set) {
3964 if (want1 & (1ULL << i))
3965 return (EINVAL22);
3966 else
3967 *out &= ~(1ULL << i);
3968 } else {
3969 /*
3970 * 2.c.ii - "If the relevant VMX capability MSR
3971 * reports that a control can be set to 0 or 1
3972 * and that control's meaning is known to the VMM,
3973 * set the control based on the functionality desired."
3974 */
3975 if (want1 & (1ULL << i))
3976 *out |= (1ULL << i);
3977 else if (want0 & (1 << i))
3978 *out &= ~(1ULL << i);
3979 else {
3980 /*
3981 * ... assuming the control's meaning is not
3982 * known to the VMM ...
3983 *
3984 * 2.c.iii - "If the relevant VMX capability
3985 * MSR reports that a control can be set to 0
3986 * or 1 and the control is not in the default1
3987 * class, set the control to 0."
3988 *
3989 * 2.c.iv - "If the relevant VMX capability
3990 * MSR reports that a control can be set to 0
3991 * or 1 and the control is in the default1
3992 * class, set the control to 1."
3993 */
3994 switch (ctrl) {
3995 case IA32_VMX_PINBASED_CTLS0x481:
3996 case IA32_VMX_TRUE_PINBASED_CTLS0x48D:
3997 /*
3998 * A.3.1 - default1 class of pinbased
3999 * controls comprises bits 1,2,4
4000 */
4001 switch (i) {
4002 case 1:
4003 case 2:
4004 case 4:
4005 *out |= (1ULL << i);
4006 break;
4007 default:
4008 *out &= ~(1ULL << i);
4009 break;
4010 }
4011 break;
4012 case IA32_VMX_PROCBASED_CTLS0x482:
4013 case IA32_VMX_TRUE_PROCBASED_CTLS0x48E:
4014 /*
4015 * A.3.2 - default1 class of procbased
4016 * controls comprises bits 1, 4-6, 8,
4017 * 13-16, 26
4018 */
4019 switch (i) {
4020 case 1:
4021 case 4 ... 6:
4022 case 8:
4023 case 13 ... 16:
4024 case 26:
4025 *out |= (1ULL << i);
4026 break;
4027 default:
4028 *out &= ~(1ULL << i);
4029 break;
4030 }
4031 break;
4032 /*
4033 * Unknown secondary procbased controls
4034 * can always be set to 0
4035 */
4036 case IA32_VMX_PROCBASED2_CTLS0x48B:
4037 *out &= ~(1ULL << i);
4038 break;
4039 case IA32_VMX_EXIT_CTLS0x483:
4040 case IA32_VMX_TRUE_EXIT_CTLS0x48F:
4041 /*
4042 * A.4 - default1 class of exit
4043 * controls comprises bits 0-8, 10,
4044 * 11, 13, 14, 16, 17
4045 */
4046 switch (i) {
4047 case 0 ... 8:
4048 case 10 ... 11:
4049 case 13 ... 14:
4050 case 16 ... 17:
4051 *out |= (1ULL << i);
4052 break;
4053 default:
4054 *out &= ~(1ULL << i);
4055 break;
4056 }
4057 break;
4058 case IA32_VMX_ENTRY_CTLS0x484:
4059 case IA32_VMX_TRUE_ENTRY_CTLS0x490:
4060 /*
4061 * A.5 - default1 class of entry
4062 * controls comprises bits 0-8, 12
4063 */
4064 switch (i) {
4065 case 0 ... 8:
4066 case 12:
4067 *out |= (1ULL << i);
4068 break;
4069 default:
4070 *out &= ~(1ULL << i);
4071 break;
4072 }
4073 break;
4074 }
4075 }
4076 }
4077 }
4078
4079 return (0);
4080}
4081
4082/*
4083 * vm_get_info
4084 *
4085 * Returns information about the VM indicated by 'vip'. The 'vip_size' field
4086 * in the 'vip' parameter is used to indicate the size of the caller's buffer.
4087 * If insufficient space exists in that buffer, the required size needed is
4088 * returned in vip_size and the number of VM information structures returned
4089 * in vip_info_count is set to 0. The caller should then try the ioctl again
4090 * after allocating a sufficiently large buffer.
4091 *
4092 * Parameters:
4093 * vip: information structure identifying the VM to query
4094 *
4095 * Return values:
4096 * 0: the operation succeeded
4097 * ENOMEM: memory allocation error during processing
4098 * EFAULT: error copying data to user process
4099 */
4100int
4101vm_get_info(struct vm_info_params *vip)
4102{
4103 struct vm_info_result *out;
4104 struct vm *vm;
4105 struct vcpu *vcpu;
4106 int i, j;
4107 size_t need;
4108
4109 rw_enter_read(&vmm_softc->vm_lock);
4110 need = vmm_softc->vm_ct * sizeof(struct vm_info_result);
4111 if (vip->vip_size < need) {
4112 vip->vip_info_ct = 0;
4113 vip->vip_size = need;
4114 rw_exit_read(&vmm_softc->vm_lock);
4115 return (0);
4116 }
4117
4118 out = malloc(need, M_DEVBUF2, M_NOWAIT0x0002|M_ZERO0x0008);
4119 if (out == NULL((void *)0)) {
4120 vip->vip_info_ct = 0;
4121 rw_exit_read(&vmm_softc->vm_lock);
4122 return (ENOMEM12);
4123 }
4124
4125 i = 0;
4126 vip->vip_info_ct = vmm_softc->vm_ct;
4127 SLIST_FOREACH(vm, &vmm_softc->vm_list, vm_link)for((vm) = ((&vmm_softc->vm_list)->slh_first); (vm)
!= ((void *)0); (vm) = ((vm)->vm_link.sle_next))
{
4128 out[i].vir_memory_size = vm->vm_memory_size;
4129 out[i].vir_used_size =
4130 pmap_resident_count(vm->vm_map->pmap)((vm->vm_map->pmap)->pm_stats.resident_count) * PAGE_SIZE(1 << 12);
4131 out[i].vir_ncpus = vm->vm_vcpu_ct;
4132 out[i].vir_id = vm->vm_id;
4133 out[i].vir_creator_pid = vm->vm_creator_pid;
4134 strlcpy(out[i].vir_name, vm->vm_name, VMM_MAX_NAME_LEN64);
4135 rw_enter_read(&vm->vm_vcpu_lock);
4136 for (j = 0; j < vm->vm_vcpu_ct; j++) {
4137 out[i].vir_vcpu_state[j] = VCPU_STATE_UNKNOWN;
4138 SLIST_FOREACH(vcpu, &vm->vm_vcpu_list,for((vcpu) = ((&vm->vm_vcpu_list)->slh_first); (vcpu
) != ((void *)0); (vcpu) = ((vcpu)->vc_vcpu_link.sle_next)
)
4139 vc_vcpu_link)for((vcpu) = ((&vm->vm_vcpu_list)->slh_first); (vcpu
) != ((void *)0); (vcpu) = ((vcpu)->vc_vcpu_link.sle_next)
)
{
4140 if (vcpu->vc_id == j)
4141 out[i].vir_vcpu_state[j] =
4142 vcpu->vc_state;
4143 }
4144 }
4145 rw_exit_read(&vm->vm_vcpu_lock);
4146 i++;
4147 }
4148 rw_exit_read(&vmm_softc->vm_lock);
4149 if (copyout(out, vip->vip_info, need) == EFAULT14) {
4150 free(out, M_DEVBUF2, need);
4151 return (EFAULT14);
4152 }
4153
4154 free(out, M_DEVBUF2, need);
4155 return (0);
4156}
4157
4158/*
4159 * vm_terminate
4160 *
4161 * Terminates the VM indicated by 'vtp'.
4162 *
4163 * Parameters:
4164 * vtp: structure defining the VM to terminate
4165 *
4166 * Return values:
4167 * 0: the VM was terminated
4168 * !0: the VM could not be located
4169 */
4170int
4171vm_terminate(struct vm_terminate_params *vtp)
4172{
4173 struct vm *vm;
4174 struct vcpu *vcpu;
4175 u_int old, next;
4176 int error;
4177
4178 /*
4179 * Find desired VM
4180 */
4181 rw_enter_write(&vmm_softc->vm_lock);
4182 error = vm_find(vtp->vtp_vm_id, &vm);
4183
4184 if (error == 0) {
4185 rw_enter_read(&vm->vm_vcpu_lock);
4186 SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link)for((vcpu) = ((&vm->vm_vcpu_list)->slh_first); (vcpu
) != ((void *)0); (vcpu) = ((vcpu)->vc_vcpu_link.sle_next)
)
{
4187 do {
4188 old = vcpu->vc_state;
4189 if (old == VCPU_STATE_RUNNING)
4190 next = VCPU_STATE_REQTERM;
4191 else if (old == VCPU_STATE_STOPPED)
4192 next = VCPU_STATE_TERMINATED;
4193 else /* must be REQTERM or TERMINATED */
4194 break;
4195 } while (old != atomic_cas_uint(&vcpu->vc_state,_atomic_cas_uint((&vcpu->vc_state), (old), (next))
4196 old, next)_atomic_cas_uint((&vcpu->vc_state), (old), (next)));
4197 }
4198 rw_exit_read(&vm->vm_vcpu_lock);
4199 } else {
4200 rw_exit_write(&vmm_softc->vm_lock);
4201 return (error);
4202 }
4203
4204 SLIST_REMOVE(&vmm_softc->vm_list, vm, vm, vm_link)do { if ((&vmm_softc->vm_list)->slh_first == (vm)) {
do { ((&vmm_softc->vm_list))->slh_first = ((&vmm_softc
->vm_list))->slh_first->vm_link.sle_next; } while (0
); } else { struct vm *curelm = (&vmm_softc->vm_list)->
slh_first; while (curelm->vm_link.sle_next != (vm)) curelm
= curelm->vm_link.sle_next; curelm->vm_link.sle_next =
curelm->vm_link.sle_next->vm_link.sle_next; } ((vm)->
vm_link.sle_next) = ((void *)-1); } while (0)
;
4205 if (vm->vm_vcpus_running == 0)
4206 vm_teardown(vm);
4207
4208 rw_exit_write(&vmm_softc->vm_lock);
4209
4210 return (0);
4211}
4212
4213/*
4214 * vm_run
4215 *
4216 * Run the vm / vcpu specified by 'vrp'
4217 *
4218 * Parameters:
4219 * vrp: structure defining the VM to run
4220 *
4221 * Return value:
4222 * ENOENT: the VM defined in 'vrp' could not be located
4223 * EBUSY: the VM defined in 'vrp' is already running
4224 * EFAULT: error copying data from userspace (vmd) on return from previous
4225 * exit.
4226 * EAGAIN: help is needed from vmd(8) (device I/O or exit vmm(4) cannot
4227 * handle in-kernel.)
4228 * 0: the run loop exited and no help is needed from vmd(8)
4229 */
4230int
4231vm_run(struct vm_run_params *vrp)
4232{
4233 struct vm *vm;
4234 struct vcpu *vcpu;
4235 int ret = 0, error;
4236 u_int old, next;
4237
4238 /*
4239 * Find desired VM
4240 */
4241 rw_enter_read(&vmm_softc->vm_lock);
4242 error = vm_find(vrp->vrp_vm_id, &vm);
4243
4244 /*
4245 * Attempt to locate the requested VCPU. If found, attempt to
4246 * to transition from VCPU_STATE_STOPPED -> VCPU_STATE_RUNNING.
4247 * Failure to make the transition indicates the VCPU is busy.
4248 */
4249 if (error == 0) {
4250 rw_enter_read(&vm->vm_vcpu_lock);
4251 SLIST_FOREACH(vcpu, &vm->vm_vcpu_list, vc_vcpu_link)for((vcpu) = ((&vm->vm_vcpu_list)->slh_first); (vcpu
) != ((void *)0); (vcpu) = ((vcpu)->vc_vcpu_link.sle_next)
)
{
4252 if (vcpu->vc_id == vrp->vrp_vcpu_id)
4253 break;
4254 }
4255
4256 if (vcpu != NULL((void *)0)) {
4257 old = VCPU_STATE_STOPPED;
4258 next = VCPU_STATE_RUNNING;
4259
4260 if (atomic_cas_uint(&vcpu->vc_state, old, next)_atomic_cas_uint((&vcpu->vc_state), (old), (next)) != old)
4261 ret = EBUSY16;
4262 else {
4263 atomic_inc_int(&vm->vm_vcpus_running)_atomic_inc_int(&vm->vm_vcpus_running);
4264 rw_enter_write(&vcpu->vc_lock);
4265 }
4266 } else
4267 ret = ENOENT2;
4268
4269 rw_exit_read(&vm->vm_vcpu_lock);
4270 }
4271 rw_exit_read(&vmm_softc->vm_lock);
4272
4273 if (error != 0)
4274 ret = error;
4275
4276 /* Bail if errors detected in the previous steps */
4277 if (ret)
4278 return (ret);
4279
4280 /*
4281 * We may be returning from userland helping us from the last exit.
4282 * If so (vrp_continue == 1), copy in the exit data from vmd. The
4283 * exit data will be consumed before the next entry (this typically
4284 * comprises VCPU register changes as the result of vmd(8)'s actions).
4285 */
4286 if (vrp->vrp_continue) {
4287 if (copyin(vrp->vrp_exit, &vcpu->vc_exit,
4288 sizeof(struct vm_exit)) == EFAULT14) {
4289 rw_exit_write(&vcpu->vc_lock);
4290 return (EFAULT14);
4291 }
4292 }
4293
4294 /* Run the VCPU specified in vrp */
4295 if (vcpu->vc_virt_mode == VMM_MODE_VMX ||
4296 vcpu->vc_virt_mode == VMM_MODE_EPT) {
4297 ret = vcpu_run_vmx(vcpu, vrp);
4298 } else if (vcpu->vc_virt_mode == VMM_MODE_SVM ||
4299 vcpu->vc_virt_mode == VMM_MODE_RVI) {
4300 ret = vcpu_run_svm(vcpu, vrp);
4301 }
4302
4303 /*
4304 * We can set the VCPU states here without CAS because once
4305 * a VCPU is in state RUNNING or REQTERM, only the VCPU itself
4306 * can switch the state.
4307 */
4308 atomic_dec_int(&vm->vm_vcpus_running)_atomic_dec_int(&vm->vm_vcpus_running);
4309 if (vcpu->vc_state == VCPU_STATE_REQTERM) {
4310 vrp->vrp_exit_reason = VM_EXIT_TERMINATED0xFFFE;
4311 vcpu->vc_state = VCPU_STATE_TERMINATED;
4312 if (vm->vm_vcpus_running == 0) {
4313 rw_enter_write(&vmm_softc->vm_lock);
4314 vm_teardown(vm);
4315 rw_exit_write(&vmm_softc->vm_lock);
4316 }
4317 ret = 0;
4318 } else if (ret == 0 || ret == EAGAIN35) {
4319 /* If we are exiting, populate exit data so vmd can help. */
4320 vrp->vrp_exit_reason = (ret == 0) ? VM_EXIT_NONE0xFFFF
4321 : vcpu->vc_gueststate.vg_exit_reason;
4322 vrp->vrp_irqready = vcpu->vc_irqready;
4323 vcpu->vc_state = VCPU_STATE_STOPPED;
4324
4325 if (copyout(&vcpu->vc_exit, vrp->vrp_exit,
4326 sizeof(struct vm_exit)) == EFAULT14) {
4327 ret = EFAULT14;
4328 } else
4329 ret = 0;
4330 } else {
4331 vrp->vrp_exit_reason = VM_EXIT_TERMINATED0xFFFE;
4332 vcpu->vc_state = VCPU_STATE_TERMINATED;
4333 }
4334
4335 rw_exit_write(&vcpu->vc_lock);
4336
4337 return (ret);
4338}
4339
4340/*
4341 * vcpu_must_stop
4342 *
4343 * Check if we need to (temporarily) stop running the VCPU for some reason,
4344 * such as:
4345 * - the VM was requested to terminate
4346 * - the proc running this VCPU has pending signals
4347 *
4348 * Parameters:
4349 * vcpu: the VCPU to check
4350 *
4351 * Return values:
4352 * 1: the VM owning this VCPU should stop
4353 * 0: no stop is needed
4354 */
4355int
4356vcpu_must_stop(struct vcpu *vcpu)
4357{
4358 struct proc *p = curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
;
4359
4360 if (vcpu->vc_state == VCPU_STATE_REQTERM)
4361 return (1);
4362 if (SIGPENDING(p)(((p)->p_siglist | (p)->p_p->ps_siglist) & ~(p)->
p_sigmask)
!= 0)
4363 return (1);
4364 return (0);
4365}
4366
4367/*
4368 * vmm_fpurestore
4369 *
4370 * Restore the guest's FPU state, saving the existing userland thread's
4371 * FPU context if necessary. Must be called with interrupts disabled.
4372 */
4373int
4374vmm_fpurestore(struct vcpu *vcpu)
4375{
4376 struct cpu_info *ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
4377
4378 /* save vmm's FPU state if we haven't already */
4379 if (ci->ci_flags & CPUF_USERXSTATE0x0200) {
4380 ci->ci_flags &= ~CPUF_USERXSTATE0x0200;
4381 fpusavereset(&curproc({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})->ci_curproc
->p_addr->u_pcb.pcb_savefpu);
4382 }
4383
4384 if (vcpu->vc_fpuinited) {
4385 if (xrstor_user(&vcpu->vc_g_fpu, xsave_mask)) {
4386 DPRINTF("%s: guest attempted to set invalid %s\n",
4387 __func__, "xsave/xrstor state");
4388 return EINVAL22;
4389 }
4390 }
4391
4392 if (xsave_mask) {
4393 /* Restore guest %xcr0 */
4394 if (xsetbv_user(0, vcpu->vc_gueststate.vg_xcr0)) {
4395 DPRINTF("%s: guest attempted to set invalid bits in "
4396 "xcr0 (guest %%xcr0=0x%llx, host %%xcr0=0x%llx)\n",
4397 __func__, vcpu->vc_gueststate.vg_xcr0, xsave_mask);
4398 return EINVAL22;
4399 }
4400 }
4401
4402 return 0;
4403}
4404
4405/*
4406 * vmm_fpusave
4407 *
4408 * Save the guest's FPU state. Must be called with interrupts disabled.
4409 */
4410void
4411vmm_fpusave(struct vcpu *vcpu)
4412{
4413 if (xsave_mask) {
4414 /* Save guest %xcr0 */
4415 vcpu->vc_gueststate.vg_xcr0 = xgetbv(0);
4416
4417 /* Restore host %xcr0 */
4418 xsetbv(0, xsave_mask);
4419 }
4420
4421 /*
4422 * Save full copy of FPU state - guest content is always
4423 * a subset of host's save area (see xsetbv exit handler)
4424 */
4425 fpusavereset(&vcpu->vc_g_fpu);
4426 vcpu->vc_fpuinited = 1;
4427}
4428
4429/*
4430 * vmm_translate_gva
4431 *
4432 * Translates a guest virtual address to a guest physical address by walking
4433 * the currently active page table (if needed).
4434 *
4435 * Note - this function can possibly alter the supplied VCPU state.
4436 * Specifically, it may inject exceptions depending on the current VCPU
4437 * configuration, and may alter %cr2 on #PF. Consequently, this function
4438 * should only be used as part of instruction emulation.
4439 *
4440 * Parameters:
4441 * vcpu: The VCPU this translation should be performed for (guest MMU settings
4442 * are gathered from this VCPU)
4443 * va: virtual address to translate
4444 * pa: pointer to paddr_t variable that will receive the translated physical
4445 * address. 'pa' is unchanged on error.
4446 * mode: one of PROT_READ, PROT_WRITE, PROT_EXEC indicating the mode in which
4447 * the address should be translated
4448 *
4449 * Return values:
4450 * 0: the address was successfully translated - 'pa' contains the physical
4451 * address currently mapped by 'va'.
4452 * EFAULT: the PTE for 'VA' is unmapped. A #PF will be injected in this case
4453 * and %cr2 set in the vcpu structure.
4454 * EINVAL: an error occurred reading paging table structures
4455 */
4456int
4457vmm_translate_gva(struct vcpu *vcpu, uint64_t va, uint64_t *pa, int mode)
4458{
4459 int level, shift, pdidx;
4460 uint64_t pte, pt_paddr, pte_paddr, mask, low_mask, high_mask;
4461 uint64_t shift_width, pte_size, *hva;
4462 paddr_t hpa;
4463 struct vcpu_reg_state vrs;
4464
4465 level = 0;
4466
4467 if (vmm_softc->mode == VMM_MODE_EPT ||
4468 vmm_softc->mode == VMM_MODE_VMX) {
4469 if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), &vrs))
4470 return (EINVAL22);
4471 } else if (vmm_softc->mode == VMM_MODE_RVI ||
4472 vmm_softc->mode == VMM_MODE_SVM) {
4473 if (vcpu_readregs_svm(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), &vrs))
4474 return (EINVAL22);
4475 } else {
4476 printf("%s: unknown vmm mode", __func__);
4477 return (EINVAL22);
4478 }
4479
4480 DPRINTF("%s: guest %%cr0=0x%llx, %%cr3=0x%llx\n", __func__,
4481 vrs.vrs_crs[VCPU_REGS_CR0], vrs.vrs_crs[VCPU_REGS_CR3]);
4482
4483 if (!(vrs.vrs_crs[VCPU_REGS_CR00] & CR0_PG0x80000000)) {
4484 DPRINTF("%s: unpaged, va=pa=0x%llx\n", __func__,
4485 va);
4486 *pa = va;
4487 return (0);
4488 }
4489
4490 pt_paddr = vrs.vrs_crs[VCPU_REGS_CR32];
4491
4492 if (vrs.vrs_crs[VCPU_REGS_CR00] & CR0_PE0x00000001) {
4493 if (vrs.vrs_crs[VCPU_REGS_CR43] & CR4_PAE0x00000020) {
4494 pte_size = sizeof(uint64_t);
4495 shift_width = 9;
4496
4497 if (vrs.vrs_msrs[VCPU_REGS_EFER0] & EFER_LMA0x00000400) {
4498 level = 4;
4499 mask = L4_MASK0x0000ff8000000000UL;
4500 shift = L4_SHIFT39;
4501 } else {
4502 level = 3;
4503 mask = L3_MASK0x0000007fc0000000UL;
4504 shift = L3_SHIFT30;
4505 }
4506 } else {
4507 level = 2;
4508 shift_width = 10;
4509 mask = 0xFFC00000;
4510 shift = 22;
4511 pte_size = sizeof(uint32_t);
4512 }
4513 } else {
4514 return (EINVAL22);
4515 }
4516
4517 DPRINTF("%s: pte size=%lld level=%d mask=0x%llx, shift=%d, "
4518 "shift_width=%lld\n", __func__, pte_size, level, mask, shift,
4519 shift_width);
4520
4521 /* XXX: Check for R bit in segment selector and set A bit */
4522
4523 for (;level > 0; level--) {
4524 pdidx = (va & mask) >> shift;
4525 pte_paddr = (pt_paddr) + (pdidx * pte_size);
4526
4527 DPRINTF("%s: read pte level %d @ GPA 0x%llx\n", __func__,
4528 level, pte_paddr);
4529 if (!pmap_extract(vcpu->vc_parent->vm_map->pmap, pte_paddr,
4530 &hpa)) {
4531 DPRINTF("%s: cannot extract HPA for GPA 0x%llx\n",
4532 __func__, pte_paddr);
4533 return (EINVAL22);
4534 }
4535
4536 hpa = hpa | (pte_paddr & 0xFFF);
4537 hva = (uint64_t *)PMAP_DIRECT_MAP(hpa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (hpa))
;
4538 DPRINTF("%s: GPA 0x%llx -> HPA 0x%llx -> HVA 0x%llx\n",
4539 __func__, pte_paddr, (uint64_t)hpa, (uint64_t)hva);
4540 if (pte_size == 8)
4541 pte = *hva;
4542 else
4543 pte = *(uint32_t *)hva;
4544
4545 DPRINTF("%s: PTE @ 0x%llx = 0x%llx\n", __func__, pte_paddr,
4546 pte);
4547
4548 /* XXX: Set CR2 */
4549 if (!(pte & PG_V0x0000000000000001UL))
4550 return (EFAULT14);
4551
4552 /* XXX: Check for SMAP */
4553 if ((mode == PROT_WRITE0x02) && !(pte & PG_RW0x0000000000000002UL))
4554 return (EPERM1);
4555
4556 if ((vcpu->vc_exit.cpl > 0) && !(pte & PG_u0x0000000000000004UL))
4557 return (EPERM1);
4558
4559 pte = pte | PG_U0x0000000000000020UL;
4560 if (mode == PROT_WRITE0x02)
4561 pte = pte | PG_M0x0000000000000040UL;
4562 *hva = pte;
4563
4564 /* XXX: EINVAL if in 32bit and PG_PS is 1 but CR4.PSE is 0 */
4565 if (pte & PG_PS0x0000000000000080UL)
4566 break;
4567
4568 if (level > 1) {
4569 pt_paddr = pte & PG_FRAME0x000ffffffffff000UL;
4570 shift -= shift_width;
4571 mask = mask >> shift_width;
4572 }
4573 }
4574
4575 low_mask = ((uint64_t)1ULL << shift) - 1;
4576 high_mask = (((uint64_t)1ULL << ((pte_size * 8) - 1)) - 1) ^ low_mask;
4577 *pa = (pte & high_mask) | (va & low_mask);
4578
4579 DPRINTF("%s: final GPA for GVA 0x%llx = 0x%llx\n", __func__,
4580 va, *pa);
4581
4582 return (0);
4583}
4584
4585
4586/*
4587 * vcpu_run_vmx
4588 *
4589 * VMX main loop used to run a VCPU.
4590 *
4591 * Parameters:
4592 * vcpu: The VCPU to run
4593 * vrp: run parameters
4594 *
4595 * Return values:
4596 * 0: The run loop exited and no help is needed from vmd
4597 * EAGAIN: The run loop exited and help from vmd is needed
4598 * EINVAL: an error occurred
4599 */
4600int
4601vcpu_run_vmx(struct vcpu *vcpu, struct vm_run_params *vrp)
4602{
4603 int ret = 0, exitinfo;
4604 struct region_descriptor gdt;
4605 struct cpu_info *ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
4606 uint64_t exit_reason, cr3, insn_error;
4607 struct schedstate_percpu *spc;
4608 struct vmx_invvpid_descriptor vid;
4609 uint64_t eii, procbased, int_st;
4610 uint16_t irq, ldt_sel;
4611 u_long s;
4612 struct region_descriptor gdtr, idtr;
4613
4614 rw_assert_wrlock(&vcpu->vc_lock);
4615
4616 if (vcpu_reload_vmcs_vmx(vcpu)) {
4617 printf("%s: failed (re)loading vmcs\n", __func__);
4618 return (EINVAL22);
4619 }
4620
4621 /*
4622 * If we are returning from userspace (vmd) because we exited
4623 * last time, fix up any needed vcpu state first. Which state
4624 * needs to be fixed up depends on what vmd populated in the
4625 * exit data structure.
4626 */
4627 irq = vrp->vrp_irq;
4628
4629 if (vrp->vrp_continue) {
4630 switch (vcpu->vc_gueststate.vg_exit_reason) {
4631 case VMX_EXIT_IO30:
4632 if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN)
4633 vcpu->vc_gueststate.vg_rax =
4634 vcpu->vc_exit.vei.vei_data;
4635 break;
4636 case VM_EXIT_NONE0xFFFF:
4637 case VMX_EXIT_HLT12:
4638 case VMX_EXIT_INT_WINDOW7:
4639 case VMX_EXIT_EXTINT1:
4640 case VMX_EXIT_EPT_VIOLATION48:
4641 case VMX_EXIT_CPUID10:
4642 case VMX_EXIT_XSETBV55:
4643 break;
4644#ifdef VMM_DEBUG
4645 case VMX_EXIT_TRIPLE_FAULT2:
4646 DPRINTF("%s: vm %d vcpu %d triple fault\n",
4647 __func__, vcpu->vc_parent->vm_id,
4648 vcpu->vc_id);
4649 vmx_vcpu_dump_regs(vcpu);
4650 dump_vcpu(vcpu);
4651 vmx_dump_vmcs(vcpu);
4652 break;
4653 case VMX_EXIT_ENTRY_FAILED_GUEST_STATE33:
4654 DPRINTF("%s: vm %d vcpu %d failed entry "
4655 "due to invalid guest state\n",
4656 __func__, vcpu->vc_parent->vm_id,
4657 vcpu->vc_id);
4658 vmx_vcpu_dump_regs(vcpu);
4659 dump_vcpu(vcpu);
4660 return (EINVAL22);
4661 default:
4662 DPRINTF("%s: unimplemented exit type %d (%s)\n",
4663 __func__,
4664 vcpu->vc_gueststate.vg_exit_reason,
4665 vmx_exit_reason_decode(
4666 vcpu->vc_gueststate.vg_exit_reason));
4667 vmx_vcpu_dump_regs(vcpu);
4668 dump_vcpu(vcpu);
4669 break;
4670#endif /* VMM_DEBUG */
4671 }
4672 }
4673
4674 setregion(&gdt, ci->ci_gdt, GDT_SIZE((6 << 3) + (1 << 4)) - 1);
4675 if (gdt.rd_base == 0) {
4676 printf("%s: setregion\n", __func__);
4677 return (EINVAL22);
4678 }
4679
4680 /* Host GDTR base */
4681 if (vmwrite(VMCS_HOST_IA32_GDTR_BASE0x6C0C, gdt.rd_base)) {
4682 printf("%s: vmwrite(0x%04X, 0x%llx)\n", __func__,
4683 VMCS_HOST_IA32_GDTR_BASE0x6C0C, gdt.rd_base);
4684 return (EINVAL22);
4685 }
4686
4687 /* Host TR base */
4688 if (vmwrite(VMCS_HOST_IA32_TR_BASE0x6C0A, (uint64_t)ci->ci_tss)) {
4689 printf("%s: vmwrite(0x%04X, 0x%llx)\n", __func__,
4690 VMCS_HOST_IA32_TR_BASE0x6C0A, (uint64_t)ci->ci_tss);
4691 return (EINVAL22);
4692 }
4693
4694 /* Host CR3 */
4695 cr3 = rcr3();
4696 if (vmwrite(VMCS_HOST_IA32_CR30x6C02, cr3)) {
4697 printf("%s: vmwrite(0x%04X, 0x%llx)\n", __func__,
4698 VMCS_HOST_IA32_CR30x6C02, cr3);
4699 return (EINVAL22);
4700 }
4701
4702 /* Handle vmd(8) injected interrupts */
4703 /* Is there an interrupt pending injection? */
4704 if (irq != 0xFFFF) {
4705 if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST0x4824, &int_st)) {
4706 printf("%s: can't get interruptibility state\n",
4707 __func__);
4708 return (EINVAL22);
4709 }
4710
4711 /* Interruptibility state 0x3 covers NMIs and STI */
4712 if (!(int_st & 0x3) && vcpu->vc_irqready) {
4713 eii = (irq & 0xFF);
4714 eii |= (1ULL << 31); /* Valid */
4715 eii |= (0ULL << 8); /* Hardware Interrupt */
4716 if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO0x4016, eii)) {
4717 printf("vcpu_run_vmx: can't vector "
4718 "interrupt to guest\n");
4719 return (EINVAL22);
4720 }
4721
4722 irq = 0xFFFF;
4723 }
4724 } else if (!vcpu->vc_intr) {
4725 /*
4726 * Disable window exiting
4727 */
4728 if (vmread(VMCS_PROCBASED_CTLS0x4002, &procbased)) {
4729 printf("%s: can't read procbased ctls on exit\n",
4730 __func__);
4731 return (EINVAL22);
4732 } else {
4733 procbased &= ~IA32_VMX_INTERRUPT_WINDOW_EXITING(1ULL << 2);
4734 if (vmwrite(VMCS_PROCBASED_CTLS0x4002, procbased)) {
4735 printf("%s: can't write procbased ctls "
4736 "on exit\n", __func__);
4737 return (EINVAL22);
4738 }
4739 }
4740 }
4741
4742 while (ret == 0) {
4743#ifdef VMM_DEBUG
4744 paddr_t pa = 0ULL;
4745 vmptrst(&pa);
4746 KASSERT(pa == vcpu->vc_control_pa)((pa == vcpu->vc_control_pa) ? (void)0 : __assert("diagnostic "
, "/usr/src/sys/arch/amd64/amd64/vmm.c", 4746, "pa == vcpu->vc_control_pa"
))
;
4747#endif /* VMM_DEBUG */
4748
4749 vmm_update_pvclock(vcpu);
4750
4751 /* Inject event if present */
4752 if (vcpu->vc_event != 0) {
4753 eii = (vcpu->vc_event & 0xFF);
4754 eii |= (1ULL << 31); /* Valid */
4755
4756 /* Set the "Send error code" flag for certain vectors */
4757 switch (vcpu->vc_event & 0xFF) {
4758 case VMM_EX_DF8:
4759 case VMM_EX_TS10:
4760 case VMM_EX_NP11:
4761 case VMM_EX_SS12:
4762 case VMM_EX_GP13:
4763 case VMM_EX_PF14:
4764 case VMM_EX_AC17:
4765 eii |= (1ULL << 11);
4766 }
4767
4768 eii |= (3ULL << 8); /* Hardware Exception */
4769 if (vmwrite(VMCS_ENTRY_INTERRUPTION_INFO0x4016, eii)) {
4770 printf("%s: can't vector event to guest\n",
4771 __func__);
4772 ret = EINVAL22;
4773 break;
4774 }
4775
4776 if (vmwrite(VMCS_ENTRY_EXCEPTION_ERROR_CODE0x4018, 0)) {
4777 printf("%s: can't write error code to guest\n",
4778 __func__);
4779 ret = EINVAL22;
4780 break;
4781 }
4782
4783 vcpu->vc_event = 0;
4784 }
4785
4786 if (vcpu->vc_vmx_vpid_enabled) {
4787 /* Invalidate old TLB mappings */
4788 vid.vid_vpid = vcpu->vc_parent->vm_id;
4789 vid.vid_addr = 0;
4790 invvpid(IA32_VMX_INVVPID_SINGLE_CTX_GLB0x3, &vid);
4791 }
4792
4793 /* Start / resume the VCPU */
4794
4795 /* Disable interrupts and save the current host FPU state. */
4796 s = intr_disable();
4797 if ((ret = vmm_fpurestore(vcpu))) {
4798 intr_restore(s);
4799 break;
4800 }
4801
4802 sgdt(&gdtr);
4803 sidt(&idtr);
4804 sldt(&ldt_sel);
4805
4806 TRACEPOINT(vmm, guest_enter, vcpu, vrp)do { extern struct dt_probe (dt_static_vmm_guest_enter); struct
dt_probe *dtp = &(dt_static_vmm_guest_enter); struct dt_provider
*dtpv = dtp->dtp_prov; if (__builtin_expect(((dt_tracing)
!= 0), 0) && __builtin_expect(((dtp->dtp_recording
) != 0), 0)) { dtpv->dtpv_enter(dtpv, dtp, vcpu, vrp); } }
while (0)
;
4807
4808 ret = vmx_enter_guest(&vcpu->vc_control_pa,
4809 &vcpu->vc_gueststate,
4810 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED1),
4811 ci->ci_vmm_cap.vcc_vmx.vmx_has_l1_flush_msr);
4812
4813 bare_lgdt(&gdtr);
4814 lidt(&idtr);
4815 lldt(ldt_sel);
4816
4817 /*
4818 * On exit, interrupts are disabled, and we are running with
4819 * the guest FPU state still possibly on the CPU. Save the FPU
4820 * state before re-enabling interrupts.
4821 */
4822 vmm_fpusave(vcpu);
4823 intr_restore(s);
4824
4825 TRACEPOINT(vmm, guest_exit, vcpu, vrp, exit_reason)do { extern struct dt_probe (dt_static_vmm_guest_exit); struct
dt_probe *dtp = &(dt_static_vmm_guest_exit); struct dt_provider
*dtpv = dtp->dtp_prov; if (__builtin_expect(((dt_tracing)
!= 0), 0) && __builtin_expect(((dtp->dtp_recording
) != 0), 0)) { dtpv->dtpv_enter(dtpv, dtp, vcpu, vrp, exit_reason
); } } while (0)
;
4826
4827 atomic_swap_uint(&vcpu->vc_vmx_vmcs_state, VMCS_LAUNCHED)_atomic_swap_uint((&vcpu->vc_vmx_vmcs_state), (1));
4828 exit_reason = VM_EXIT_NONE0xFFFF;
4829
4830 /* If we exited successfully ... */
4831 if (ret == 0) {
4832 /*
4833 * ret == 0 implies we entered the guest, and later
4834 * exited for some valid reason
4835 */
4836 exitinfo = vmx_get_exit_info(
4837 &vcpu->vc_gueststate.vg_rip, &exit_reason);
4838 if (vmread(VMCS_GUEST_IA32_RFLAGS0x6820,
4839 &vcpu->vc_gueststate.vg_rflags)) {
4840 printf("%s: can't read guest rflags during "
4841 "exit\n", __func__);
4842 ret = EINVAL22;
4843 break;
4844 }
4845
4846 /* Update our state */
4847 if (!(exitinfo & VMX_EXIT_INFO_HAVE_RIP0x1)) {
4848 printf("%s: cannot read guest rip\n", __func__);
4849 ret = EINVAL22;
4850 break;
4851 }
4852
4853 if (!(exitinfo & VMX_EXIT_INFO_HAVE_REASON0x2)) {
4854 printf("%s: cant read exit reason\n", __func__);
4855 ret = EINVAL22;
4856 break;
4857 }
4858
4859 /*
4860 * Handle the exit. This will alter "ret" to EAGAIN if
4861 * the exit handler determines help from vmd is needed.
4862 */
4863 vcpu->vc_gueststate.vg_exit_reason = exit_reason;
4864 ret = vmx_handle_exit(vcpu);
4865
4866 if (vcpu->vc_gueststate.vg_rflags & PSL_I0x00000200)
4867 vcpu->vc_irqready = 1;
4868 else
4869 vcpu->vc_irqready = 0;
4870
4871 /*
4872 * If not ready for interrupts, but interrupts pending,
4873 * enable interrupt window exiting.
4874 */
4875 if (vcpu->vc_irqready == 0 && vcpu->vc_intr) {
4876 if (vmread(VMCS_PROCBASED_CTLS0x4002, &procbased)) {
4877 printf("%s: can't read procbased ctls "
4878 "on intwin exit\n", __func__);
4879 ret = EINVAL22;
4880 break;
4881 }
4882
4883 procbased |= IA32_VMX_INTERRUPT_WINDOW_EXITING(1ULL << 2);
4884 if (vmwrite(VMCS_PROCBASED_CTLS0x4002, procbased)) {
4885 printf("%s: can't write procbased ctls "
4886 "on intwin exit\n", __func__);
4887 ret = EINVAL22;
4888 break;
4889 }
4890 }
4891
4892 /*
4893 * Exit to vmd if we are terminating, failed to enter,
4894 * or need help (device I/O)
4895 */
4896 if (ret || vcpu_must_stop(vcpu))
4897 break;
4898
4899 if (vcpu->vc_intr && vcpu->vc_irqready) {
4900 ret = EAGAIN35;
4901 break;
4902 }
4903
4904 /* Check if we should yield - don't hog the {p,v}pu */
4905 spc = &ci->ci_schedstate;
4906 if (spc->spc_schedflags & SPCF_SHOULDYIELD0x0002)
4907 break;
4908
4909 } else {
4910 /*
4911 * We failed vmresume or vmlaunch for some reason,
4912 * typically due to invalid vmcs state or other
4913 * reasons documented in SDM Vol 3C 30.4.
4914 */
4915 switch (ret) {
4916 case VMX_FAIL_LAUNCH_INVALID_VMCS2:
4917 printf("%s: failed %s with invalid vmcs\n",
4918 __func__,
4919 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED1
4920 ? "vmresume" : "vmlaunch"));
4921 break;
4922 case VMX_FAIL_LAUNCH_VALID_VMCS3:
4923 printf("%s: failed %s with valid vmcs\n",
4924 __func__,
4925 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED1
4926 ? "vmresume" : "vmlaunch"));
4927 break;
4928 default:
4929 printf("%s: failed %s for unknown reason\n",
4930 __func__,
4931 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED1
4932 ? "vmresume" : "vmlaunch"));
4933 }
4934
4935 ret = EINVAL22;
4936
4937 /* Try to translate a vmfail error code, if possible. */
4938 if (vmread(VMCS_INSTRUCTION_ERROR0x4400, &insn_error)) {
4939 printf("%s: can't read insn error field\n",
4940 __func__);
4941 } else
4942 printf("%s: error code = %lld, %s\n", __func__,
4943 insn_error,
4944 vmx_instruction_error_decode(insn_error));
4945#ifdef VMM_DEBUG
4946 vmx_vcpu_dump_regs(vcpu);
4947 dump_vcpu(vcpu);
4948#endif /* VMM_DEBUG */
4949 }
4950 }
4951
4952 vcpu->vc_last_pcpu = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
4953
4954 /* Copy the VCPU register state to the exit structure */
4955 if (vcpu_readregs_vmx(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), &vcpu->vc_exit.vrs))
4956 ret = EINVAL22;
4957 vcpu->vc_exit.cpl = vmm_get_guest_cpu_cpl(vcpu);
4958
4959 return (ret);
4960}
4961
4962/*
4963 * vmx_handle_intr
4964 *
4965 * Handle host (external) interrupts. We read which interrupt fired by
4966 * extracting the vector from the VMCS and dispatch the interrupt directly
4967 * to the host using vmm_dispatch_intr.
4968 */
4969void
4970vmx_handle_intr(struct vcpu *vcpu)
4971{
4972 uint8_t vec;
4973 uint64_t eii;
4974 struct gate_descriptor *idte;
4975 vaddr_t handler;
4976
4977 if (vmread(VMCS_EXIT_INTERRUPTION_INFO0x4404, &eii)) {
4978 printf("%s: can't obtain intr info\n", __func__);
4979 return;
4980 }
4981
4982 vec = eii & 0xFF;
4983
4984 /* XXX check "error valid" code in eii, abort if 0 */
4985 idte=&idt[vec];
4986 handler = idte->gd_looffset + ((uint64_t)idte->gd_hioffset << 16);
4987 vmm_dispatch_intr(handler);
4988}
4989
4990/*
4991 * svm_handle_hlt
4992 *
4993 * Handle HLT exits
4994 *
4995 * Parameters
4996 * vcpu: The VCPU that executed the HLT instruction
4997 *
4998 * Return Values:
4999 * EIO: The guest halted with interrupts disabled
5000 * EAGAIN: Normal return to vmd - vmd should halt scheduling this VCPU
5001 * until a virtual interrupt is ready to inject
5002 */
5003int
5004svm_handle_hlt(struct vcpu *vcpu)
5005{
5006 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
5007 uint64_t rflags = vmcb->v_rflags;
5008
5009 /* All HLT insns are 1 byte */
5010 vcpu->vc_gueststate.vg_rip += 1;
5011
5012 if (!(rflags & PSL_I0x00000200)) {
5013 DPRINTF("%s: guest halted with interrupts disabled\n",
5014 __func__);
5015 return (EIO5);
5016 }
5017
5018 return (EAGAIN35);
5019}
5020
5021/*
5022 * vmx_handle_hlt
5023 *
5024 * Handle HLT exits. HLTing the CPU with interrupts disabled will terminate
5025 * the guest (no NMIs handled) by returning EIO to vmd.
5026 *
5027 * Parameters:
5028 * vcpu: The VCPU that executed the HLT instruction
5029 *
5030 * Return Values:
5031 * EINVAL: An error occurred extracting information from the VMCS, or an
5032 * invalid HLT instruction was encountered
5033 * EIO: The guest halted with interrupts disabled
5034 * EAGAIN: Normal return to vmd - vmd should halt scheduling this VCPU
5035 * until a virtual interrupt is ready to inject
5036 *
5037 */
5038int
5039vmx_handle_hlt(struct vcpu *vcpu)
5040{
5041 uint64_t insn_length, rflags;
5042
5043 if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) {
5044 printf("%s: can't obtain instruction length\n", __func__);
5045 return (EINVAL22);
5046 }
5047
5048 if (vmread(VMCS_GUEST_IA32_RFLAGS0x6820, &rflags)) {
5049 printf("%s: can't obtain guest rflags\n", __func__);
5050 return (EINVAL22);
5051 }
5052
5053 if (insn_length != 1) {
5054 DPRINTF("%s: HLT with instruction length %lld not supported\n",
5055 __func__, insn_length);
5056 return (EINVAL22);
5057 }
5058
5059 if (!(rflags & PSL_I0x00000200)) {
5060 DPRINTF("%s: guest halted with interrupts disabled\n",
5061 __func__);
5062 return (EIO5);
5063 }
5064
5065 vcpu->vc_gueststate.vg_rip += insn_length;
5066 return (EAGAIN35);
5067}
5068
5069/*
5070 * vmx_get_exit_info
5071 *
5072 * Returns exit information containing the current guest RIP and exit reason
5073 * in rip and exit_reason. The return value is a bitmask indicating whether
5074 * reading the RIP and exit reason was successful.
5075 */
5076int
5077vmx_get_exit_info(uint64_t *rip, uint64_t *exit_reason)
5078{
5079 int rv = 0;
5080
5081 if (vmread(VMCS_GUEST_IA32_RIP0x681E, rip) == 0) {
5082 rv |= VMX_EXIT_INFO_HAVE_RIP0x1;
5083 if (vmread(VMCS_EXIT_REASON0x4402, exit_reason) == 0)
5084 rv |= VMX_EXIT_INFO_HAVE_REASON0x2;
5085 }
5086 return (rv);
5087}
5088
5089/*
5090 * svm_handle_exit
5091 *
5092 * Handle exits from the VM by decoding the exit reason and calling various
5093 * subhandlers as needed.
5094 */
5095int
5096svm_handle_exit(struct vcpu *vcpu)
5097{
5098 uint64_t exit_reason, rflags;
5099 int update_rip, ret = 0;
5100 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
5101
5102 update_rip = 0;
5103 exit_reason = vcpu->vc_gueststate.vg_exit_reason;
5104 rflags = vcpu->vc_gueststate.vg_rflags;
5105
5106 switch (exit_reason) {
5107 case SVM_VMEXIT_VINTR0x64:
5108 if (!(rflags & PSL_I0x00000200)) {
5109 DPRINTF("%s: impossible interrupt window exit "
5110 "config\n", __func__);
5111 ret = EINVAL22;
5112 break;
5113 }
5114
5115 /*
5116 * Guest is now ready for interrupts, so disable interrupt
5117 * window exiting.
5118 */
5119 vmcb->v_irq = 0;
5120 vmcb->v_intr_vector = 0;
5121 vmcb->v_intercept1 &= ~SVM_INTERCEPT_VINTR(1UL << 4);
5122 svm_set_dirty(vcpu, SVM_CLEANBITS_TPR(1 << 3) | SVM_CLEANBITS_I(1 << 0));
5123
5124 update_rip = 0;
5125 break;
5126 case SVM_VMEXIT_INTR0x60:
5127 update_rip = 0;
5128 break;
5129 case SVM_VMEXIT_SHUTDOWN0x7F:
5130 update_rip = 0;
5131 ret = EAGAIN35;
5132 break;
5133 case SVM_VMEXIT_NPF0x400:
5134 ret = svm_handle_np_fault(vcpu);
5135 break;
5136 case SVM_VMEXIT_CPUID0x72:
5137 ret = vmm_handle_cpuid(vcpu);
5138 update_rip = 1;
5139 break;
5140 case SVM_VMEXIT_MSR0x7C:
5141 ret = svm_handle_msr(vcpu);
5142 update_rip = 1;
5143 break;
5144 case SVM_VMEXIT_XSETBV0x8D:
5145 ret = svm_handle_xsetbv(vcpu);
5146 update_rip = 1;
5147 break;
5148 case SVM_VMEXIT_IOIO0x7B:
5149 ret = svm_handle_inout(vcpu);
5150 update_rip = 1;
5151 break;
5152 case SVM_VMEXIT_HLT0x78:
5153 ret = svm_handle_hlt(vcpu);
5154 update_rip = 1;
5155 break;
5156 case SVM_VMEXIT_MWAIT0x8B:
5157 case SVM_VMEXIT_MWAIT_CONDITIONAL0x8C:
5158 case SVM_VMEXIT_MONITOR0x8A:
5159 case SVM_VMEXIT_VMRUN0x80:
5160 case SVM_VMEXIT_VMMCALL0x81:
5161 case SVM_VMEXIT_VMLOAD0x82:
5162 case SVM_VMEXIT_VMSAVE0x83:
5163 case SVM_VMEXIT_STGI0x84:
5164 case SVM_VMEXIT_CLGI0x85:
5165 case SVM_VMEXIT_SKINIT0x86:
5166 case SVM_VMEXIT_RDTSCP0x87:
5167 case SVM_VMEXIT_ICEBP0x88:
5168 case SVM_VMEXIT_INVLPGA0x7A:
5169 ret = vmm_inject_ud(vcpu);
5170 update_rip = 0;
5171 break;
5172 default:
5173 DPRINTF("%s: unhandled exit 0x%llx (pa=0x%llx)\n", __func__,
5174 exit_reason, (uint64_t)vcpu->vc_control_pa);
5175 return (EINVAL22);
5176 }
5177
5178 if (update_rip) {
5179 vmcb->v_rip = vcpu->vc_gueststate.vg_rip;
5180
5181 if (rflags & PSL_T0x00000100) {
5182 if (vmm_inject_db(vcpu)) {
5183 printf("%s: can't inject #DB exception to "
5184 "guest", __func__);
5185 return (EINVAL22);
5186 }
5187 }
5188 }
5189
5190 /* Enable SVME in EFER (must always be set) */
5191 vmcb->v_efer |= EFER_SVME0x00001000;
5192 svm_set_dirty(vcpu, SVM_CLEANBITS_CR(1 << 5));
5193
5194 return (ret);
5195}
5196
5197/*
5198 * vmx_handle_exit
5199 *
5200 * Handle exits from the VM by decoding the exit reason and calling various
5201 * subhandlers as needed.
5202 */
5203int
5204vmx_handle_exit(struct vcpu *vcpu)
5205{
5206 uint64_t exit_reason, rflags, istate;
5207 int update_rip, ret = 0;
5208
5209 update_rip = 0;
5210 exit_reason = vcpu->vc_gueststate.vg_exit_reason;
5211 rflags = vcpu->vc_gueststate.vg_rflags;
5212
5213 switch (exit_reason) {
1
Control jumps to 'case 10:' at line 5228
5214 case VMX_EXIT_INT_WINDOW7:
5215 if (!(rflags & PSL_I0x00000200)) {
5216 DPRINTF("%s: impossible interrupt window exit "
5217 "config\n", __func__);
5218 ret = EINVAL22;
5219 break;
5220 }
5221
5222 ret = EAGAIN35;
5223 update_rip = 0;
5224 break;
5225 case VMX_EXIT_EPT_VIOLATION48:
5226 ret = vmx_handle_np_fault(vcpu);
5227 break;
5228 case VMX_EXIT_CPUID10:
5229 ret = vmm_handle_cpuid(vcpu);
2
Calling 'vmm_handle_cpuid'
5230 update_rip = 1;
5231 break;
5232 case VMX_EXIT_IO30:
5233 ret = vmx_handle_inout(vcpu);
5234 update_rip = 1;
5235 break;
5236 case VMX_EXIT_EXTINT1:
5237 vmx_handle_intr(vcpu);
5238 update_rip = 0;
5239 break;
5240 case VMX_EXIT_CR_ACCESS28:
5241 ret = vmx_handle_cr(vcpu);
5242 update_rip = 1;
5243 break;
5244 case VMX_EXIT_HLT12:
5245 ret = vmx_handle_hlt(vcpu);
5246 update_rip = 1;
5247 break;
5248 case VMX_EXIT_RDMSR31:
5249 ret = vmx_handle_rdmsr(vcpu);
5250 update_rip = 1;
5251 break;
5252 case VMX_EXIT_WRMSR32:
5253 ret = vmx_handle_wrmsr(vcpu);
5254 update_rip = 1;
5255 break;
5256 case VMX_EXIT_XSETBV55:
5257 ret = vmx_handle_xsetbv(vcpu);
5258 update_rip = 1;
5259 break;
5260 case VMX_EXIT_MWAIT36:
5261 case VMX_EXIT_MONITOR39:
5262 case VMX_EXIT_VMXON27:
5263 case VMX_EXIT_VMWRITE25:
5264 case VMX_EXIT_VMREAD23:
5265 case VMX_EXIT_VMLAUNCH20:
5266 case VMX_EXIT_VMRESUME24:
5267 case VMX_EXIT_VMPTRLD21:
5268 case VMX_EXIT_VMPTRST22:
5269 case VMX_EXIT_VMCLEAR19:
5270 case VMX_EXIT_VMCALL18:
5271 case VMX_EXIT_VMFUNC59:
5272 case VMX_EXIT_VMXOFF26:
5273 case VMX_EXIT_INVVPID53:
5274 case VMX_EXIT_INVEPT50:
5275 ret = vmm_inject_ud(vcpu);
5276 update_rip = 0;
5277 break;
5278 case VMX_EXIT_TRIPLE_FAULT2:
5279#ifdef VMM_DEBUG
5280 DPRINTF("%s: vm %d vcpu %d triple fault\n", __func__,
5281 vcpu->vc_parent->vm_id, vcpu->vc_id);
5282 vmx_vcpu_dump_regs(vcpu);
5283 dump_vcpu(vcpu);
5284 vmx_dump_vmcs(vcpu);
5285#endif /* VMM_DEBUG */
5286 ret = EAGAIN35;
5287 update_rip = 0;
5288 break;
5289 default:
5290#ifdef VMM_DEBUG
5291 DPRINTF("%s: unhandled exit 0x%llx (%s)\n", __func__,
5292 exit_reason, vmx_exit_reason_decode(exit_reason));
5293#endif /* VMM_DEBUG */
5294 return (EINVAL22);
5295 }
5296
5297 if (update_rip) {
5298 if (vmwrite(VMCS_GUEST_IA32_RIP0x681E,
5299 vcpu->vc_gueststate.vg_rip)) {
5300 printf("%s: can't advance rip\n", __func__);
5301 return (EINVAL22);
5302 }
5303
5304 if (vmread(VMCS_GUEST_INTERRUPTIBILITY_ST0x4824,
5305 &istate)) {
5306 printf("%s: can't read interruptibility state\n",
5307 __func__);
5308 return (EINVAL22);
5309 }
5310
5311 /* Interruptibility state 0x3 covers NMIs and STI */
5312 istate &= ~0x3;
5313
5314 if (vmwrite(VMCS_GUEST_INTERRUPTIBILITY_ST0x4824,
5315 istate)) {
5316 printf("%s: can't write interruptibility state\n",
5317 __func__);
5318 return (EINVAL22);
5319 }
5320
5321 if (rflags & PSL_T0x00000100) {
5322 if (vmm_inject_db(vcpu)) {
5323 printf("%s: can't inject #DB exception to "
5324 "guest", __func__);
5325 return (EINVAL22);
5326 }
5327 }
5328 }
5329
5330 return (ret);
5331}
5332
5333/*
5334 * vmm_inject_gp
5335 *
5336 * Injects an #GP exception into the guest VCPU.
5337 *
5338 * Parameters:
5339 * vcpu: vcpu to inject into
5340 *
5341 * Return values:
5342 * Always 0
5343 */
5344int
5345vmm_inject_gp(struct vcpu *vcpu)
5346{
5347 DPRINTF("%s: injecting #GP at guest %%rip 0x%llx\n", __func__,
5348 vcpu->vc_gueststate.vg_rip);
5349 vcpu->vc_event = VMM_EX_GP13;
5350
5351 return (0);
5352}
5353
5354/*
5355 * vmm_inject_ud
5356 *
5357 * Injects an #UD exception into the guest VCPU.
5358 *
5359 * Parameters:
5360 * vcpu: vcpu to inject into
5361 *
5362 * Return values:
5363 * Always 0
5364 */
5365int
5366vmm_inject_ud(struct vcpu *vcpu)
5367{
5368 DPRINTF("%s: injecting #UD at guest %%rip 0x%llx\n", __func__,
5369 vcpu->vc_gueststate.vg_rip);
5370 vcpu->vc_event = VMM_EX_UD6;
5371
5372 return (0);
5373}
5374
5375/*
5376 * vmm_inject_db
5377 *
5378 * Injects a #DB exception into the guest VCPU.
5379 *
5380 * Parameters:
5381 * vcpu: vcpu to inject into
5382 *
5383 * Return values:
5384 * Always 0
5385 */
5386int
5387vmm_inject_db(struct vcpu *vcpu)
5388{
5389 DPRINTF("%s: injecting #DB at guest %%rip 0x%llx\n", __func__,
5390 vcpu->vc_gueststate.vg_rip);
5391 vcpu->vc_event = VMM_EX_DB1;
5392
5393 return (0);
5394}
5395
5396/*
5397 * vmm_get_guest_memtype
5398 *
5399 * Returns the type of memory 'gpa' refers to in the context of vm 'vm'
5400 */
5401int
5402vmm_get_guest_memtype(struct vm *vm, paddr_t gpa)
5403{
5404 int i;
5405 struct vm_mem_range *vmr;
5406
5407 if (gpa >= VMM_PCI_MMIO_BAR_BASE0xF0000000ULL && gpa <= VMM_PCI_MMIO_BAR_END0xFFFFFFFFULL) {
5408 DPRINTF("guest mmio access @ 0x%llx\n", (uint64_t)gpa);
5409 return (VMM_MEM_TYPE_REGULAR);
5410 }
5411
5412 /* XXX Use binary search? */
5413 for (i = 0; i < vm->vm_nmemranges; i++) {
5414 vmr = &vm->vm_memranges[i];
5415
5416 /*
5417 * vm_memranges are ascending. gpa can no longer be in one of
5418 * the memranges
5419 */
5420 if (gpa < vmr->vmr_gpa)
5421 break;
5422
5423 if (gpa < vmr->vmr_gpa + vmr->vmr_size)
5424 return (VMM_MEM_TYPE_REGULAR);
5425 }
5426
5427 DPRINTF("guest memtype @ 0x%llx unknown\n", (uint64_t)gpa);
5428 return (VMM_MEM_TYPE_UNKNOWN);
5429}
5430
5431/*
5432 * vmx_get_exit_qualification
5433 *
5434 * Return the current VMCS' exit qualification information
5435 */
5436int
5437vmx_get_exit_qualification(uint64_t *exit_qualification)
5438{
5439 if (vmread(VMCS_GUEST_EXIT_QUALIFICATION0x6400, exit_qualification)) {
5440 printf("%s: can't extract exit qual\n", __func__);
5441 return (EINVAL22);
5442 }
5443
5444 return (0);
5445}
5446
5447/*
5448 * vmx_get_guest_faulttype
5449 *
5450 * Determines the type (R/W/X) of the last fault on the VCPU last run on
5451 * this PCPU.
5452 */
5453int
5454vmx_get_guest_faulttype(void)
5455{
5456 uint64_t exit_qual;
5457 uint64_t presentmask = IA32_VMX_EPT_FAULT_WAS_READABLE(1ULL << 3) |
5458 IA32_VMX_EPT_FAULT_WAS_WRITABLE(1ULL << 4) | IA32_VMX_EPT_FAULT_WAS_EXECABLE(1ULL << 5);
5459 vm_prot_t prot, was_prot;
5460
5461 if (vmx_get_exit_qualification(&exit_qual))
5462 return (-1);
5463
5464 if ((exit_qual & presentmask) == 0)
5465 return VM_FAULT_INVALID((vm_fault_t) 0x0);
5466
5467 was_prot = 0;
5468 if (exit_qual & IA32_VMX_EPT_FAULT_WAS_READABLE(1ULL << 3))
5469 was_prot |= PROT_READ0x01;
5470 if (exit_qual & IA32_VMX_EPT_FAULT_WAS_WRITABLE(1ULL << 4))
5471 was_prot |= PROT_WRITE0x02;
5472 if (exit_qual & IA32_VMX_EPT_FAULT_WAS_EXECABLE(1ULL << 5))
5473 was_prot |= PROT_EXEC0x04;
5474
5475 prot = 0;
5476 if (exit_qual & IA32_VMX_EPT_FAULT_READ(1ULL << 0))
5477 prot = PROT_READ0x01;
5478 else if (exit_qual & IA32_VMX_EPT_FAULT_WRITE(1ULL << 1))
5479 prot = PROT_WRITE0x02;
5480 else if (exit_qual & IA32_VMX_EPT_FAULT_EXEC(1ULL << 2))
5481 prot = PROT_EXEC0x04;
5482
5483 if ((was_prot & prot) == 0)
5484 return VM_FAULT_PROTECT((vm_fault_t) 0x1);
5485
5486 return (-1);
5487}
5488
5489/*
5490 * svm_get_guest_faulttype
5491 *
5492 * Determines the type (R/W/X) of the last fault on the VCPU last run on
5493 * this PCPU.
5494 */
5495int
5496svm_get_guest_faulttype(struct vmcb *vmcb)
5497{
5498 if (!(vmcb->v_exitinfo1 & 0x1))
5499 return VM_FAULT_INVALID((vm_fault_t) 0x0);
5500 return VM_FAULT_PROTECT((vm_fault_t) 0x1);
5501}
5502
5503/*
5504 * svm_fault_page
5505 *
5506 * Request a new page to be faulted into the UVM map of the VM owning 'vcpu'
5507 * at address 'gpa'.
5508 */
5509int
5510svm_fault_page(struct vcpu *vcpu, paddr_t gpa)
5511{
5512 int ret;
5513
5514 ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, VM_FAULT_WIRE((vm_fault_t) 0x2),
5515 PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04);
5516 if (ret)
5517 printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n",
5518 __func__, ret, (uint64_t)gpa, vcpu->vc_gueststate.vg_rip);
5519
5520 return (ret);
5521}
5522
5523/*
5524 * svm_handle_np_fault
5525 *
5526 * High level nested paging handler for SVM. Verifies that a fault is for a
5527 * valid memory region, then faults a page, or aborts otherwise.
5528 */
5529int
5530svm_handle_np_fault(struct vcpu *vcpu)
5531{
5532 uint64_t gpa;
5533 int gpa_memtype, ret;
5534 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
5535
5536 ret = 0;
5537
5538 gpa = vmcb->v_exitinfo2;
5539
5540 gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa);
5541 switch (gpa_memtype) {
5542 case VMM_MEM_TYPE_REGULAR:
5543 ret = svm_fault_page(vcpu, gpa);
5544 break;
5545 default:
5546 printf("unknown memory type %d for GPA 0x%llx\n",
5547 gpa_memtype, gpa);
5548 return (EINVAL22);
5549 }
5550
5551 return (ret);
5552}
5553
5554/*
5555 * vmx_fault_page
5556 *
5557 * Request a new page to be faulted into the UVM map of the VM owning 'vcpu'
5558 * at address 'gpa'.
5559 *
5560 * Parameters:
5561 * vcpu: guest VCPU requiring the page to be faulted into the UVM map
5562 * gpa: guest physical address that triggered the fault
5563 *
5564 * Return Values:
5565 * 0: if successful
5566 * EINVAL: if fault type could not be determined or VMCS reload fails
5567 * EAGAIN: if a protection fault occurred, ie writing to a read-only page
5568 * errno: if uvm_fault(9) fails to wire in the page
5569 */
5570int
5571vmx_fault_page(struct vcpu *vcpu, paddr_t gpa)
5572{
5573 int fault_type, ret;
5574
5575 fault_type = vmx_get_guest_faulttype();
5576 if (fault_type == -1) {
5577 printf("%s: invalid fault type\n", __func__);
5578 return (EINVAL22);
5579 }
5580
5581 if (fault_type == VM_FAULT_PROTECT((vm_fault_t) 0x1)) {
5582 vcpu->vc_exit.vee.vee_fault_type = VEE_FAULT_PROTECT;
5583 return (EAGAIN35);
5584 }
5585
5586 /* We may sleep during uvm_fault(9), so reload VMCS. */
5587 vcpu->vc_last_pcpu = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
5588 ret = uvm_fault(vcpu->vc_parent->vm_map, gpa, VM_FAULT_WIRE((vm_fault_t) 0x2),
5589 PROT_READ0x01 | PROT_WRITE0x02 | PROT_EXEC0x04);
5590 if (vcpu_reload_vmcs_vmx(vcpu)) {
5591 printf("%s: failed to reload vmcs\n", __func__);
5592 return (EINVAL22);
5593 }
5594
5595 if (ret)
5596 printf("%s: uvm_fault returns %d, GPA=0x%llx, rip=0x%llx\n",
5597 __func__, ret, (uint64_t)gpa, vcpu->vc_gueststate.vg_rip);
5598
5599 return (ret);
5600}
5601
5602/*
5603 * vmx_handle_np_fault
5604 *
5605 * High level nested paging handler for VMX. Verifies that a fault is for a
5606 * valid memory region, then faults a page, or aborts otherwise.
5607 */
5608int
5609vmx_handle_np_fault(struct vcpu *vcpu)
5610{
5611 uint64_t gpa;
5612 int gpa_memtype, ret;
5613
5614 ret = 0;
5615 if (vmread(VMCS_GUEST_PHYSICAL_ADDRESS0x2400, &gpa)) {
5616 printf("%s: cannot extract faulting pa\n", __func__);
5617 return (EINVAL22);
5618 }
5619
5620 gpa_memtype = vmm_get_guest_memtype(vcpu->vc_parent, gpa);
5621 switch (gpa_memtype) {
5622 case VMM_MEM_TYPE_REGULAR:
5623 ret = vmx_fault_page(vcpu, gpa);
5624 break;
5625 default:
5626 printf("unknown memory type %d for GPA 0x%llx\n",
5627 gpa_memtype, gpa);
5628 return (EINVAL22);
5629 }
5630
5631 return (ret);
5632}
5633
5634/*
5635 * vmm_get_guest_cpu_cpl
5636 *
5637 * Determines current CPL of 'vcpu'. On VMX/Intel, this is gathered from the
5638 * VMCS field for the DPL of SS (this seems odd, but is documented that way
5639 * in the SDM). For SVM/AMD, this is gathered directly from the VMCB's 'cpl'
5640 * field, as per the APM.
5641 *
5642 * Parameters:
5643 * vcpu: guest VCPU for which CPL is to be checked
5644 *
5645 * Return Values:
5646 * -1: the CPL could not be determined
5647 * 0-3 indicating the current CPL. For real mode operation, 0 is returned.
5648 */
5649int
5650vmm_get_guest_cpu_cpl(struct vcpu *vcpu)
5651{
5652 int mode;
5653 struct vmcb *vmcb;
5654 uint64_t ss_ar;
5655
5656 mode = vmm_get_guest_cpu_mode(vcpu);
5657
5658 if (mode == VMM_CPU_MODE_UNKNOWN)
5659 return (-1);
5660
5661 if (mode == VMM_CPU_MODE_REAL)
5662 return (0);
5663
5664 if (vmm_softc->mode == VMM_MODE_SVM ||
5665 vmm_softc->mode == VMM_MODE_RVI) {
5666 vmcb = (struct vmcb *)vcpu->vc_control_va;
5667 return (vmcb->v_cpl);
5668 } else if (vmm_softc->mode == VMM_MODE_VMX ||
5669 vmm_softc->mode == VMM_MODE_EPT) {
5670 if (vmread(VMCS_GUEST_IA32_SS_AR0x4818, &ss_ar))
5671 return (-1);
5672 return ((ss_ar & 0x60) >> 5);
5673 } else
5674 return (-1);
5675}
5676
5677/*
5678 * vmm_get_guest_cpu_mode
5679 *
5680 * Determines current CPU mode of 'vcpu'.
5681 *
5682 * Parameters:
5683 * vcpu: guest VCPU for which mode is to be checked
5684 *
5685 * Return Values:
5686 * One of VMM_CPU_MODE_*, or VMM_CPU_MODE_UNKNOWN if the mode could not be
5687 * ascertained.
5688 */
5689int
5690vmm_get_guest_cpu_mode(struct vcpu *vcpu)
5691{
5692 uint64_t cr0, efer, cs_ar;
5693 uint8_t l, dib;
5694 struct vmcb *vmcb;
5695 struct vmx_msr_store *msr_store;
5696
5697 if (vmm_softc->mode == VMM_MODE_SVM ||
5698 vmm_softc->mode == VMM_MODE_RVI) {
5699 vmcb = (struct vmcb *)vcpu->vc_control_va;
5700 cr0 = vmcb->v_cr0;
5701 efer = vmcb->v_efer;
5702 cs_ar = vmcb->v_cs.vs_attr;
5703 cs_ar = (cs_ar & 0xff) | ((cs_ar << 4) & 0xf000);
5704 } else if (vmm_softc->mode == VMM_MODE_VMX ||
5705 vmm_softc->mode == VMM_MODE_EPT) {
5706 if (vmread(VMCS_GUEST_IA32_CR00x6800, &cr0))
5707 return (VMM_CPU_MODE_UNKNOWN);
5708 if (vmread(VMCS_GUEST_IA32_CS_AR0x4816, &cs_ar))
5709 return (VMM_CPU_MODE_UNKNOWN);
5710 msr_store =
5711 (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
5712 efer = msr_store[VCPU_REGS_EFER0].vms_data;
5713 } else
5714 return (VMM_CPU_MODE_UNKNOWN);
5715
5716 l = (cs_ar & 0x2000) >> 13;
5717 dib = (cs_ar & 0x4000) >> 14;
5718
5719 /* Check CR0.PE */
5720 if (!(cr0 & CR0_PE0x00000001))
5721 return (VMM_CPU_MODE_REAL);
5722
5723 /* Check EFER */
5724 if (efer & EFER_LMA0x00000400) {
5725 /* Could be compat or long mode, check CS.L */
5726 if (l)
5727 return (VMM_CPU_MODE_LONG);
5728 else
5729 return (VMM_CPU_MODE_COMPAT);
5730 }
5731
5732 /* Check prot vs prot32 */
5733 if (dib)
5734 return (VMM_CPU_MODE_PROT32);
5735 else
5736 return (VMM_CPU_MODE_PROT);
5737}
5738
5739/*
5740 * svm_handle_inout
5741 *
5742 * Exit handler for IN/OUT instructions.
5743 *
5744 * The vmm can handle certain IN/OUTS without exiting to vmd, but most of these
5745 * will be passed to vmd for completion.
5746 *
5747 * Parameters:
5748 * vcpu: The VCPU where the IN/OUT instruction occurred
5749 *
5750 * Return values:
5751 * 0: if successful
5752 * EINVAL: an invalid IN/OUT instruction was encountered
5753 * EAGAIN: return to vmd - more processing needed in userland
5754 */
5755int
5756svm_handle_inout(struct vcpu *vcpu)
5757{
5758 uint64_t insn_length, exit_qual;
5759 int ret;
5760 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
5761
5762 insn_length = vmcb->v_exitinfo2 - vmcb->v_rip;
5763 if (insn_length != 1 && insn_length != 2) {
5764 DPRINTF("%s: IN/OUT instruction with length %lld not "
5765 "supported\n", __func__, insn_length);
5766 return (EINVAL22);
5767 }
5768
5769 exit_qual = vmcb->v_exitinfo1;
5770
5771 /* Bit 0 - direction */
5772 vcpu->vc_exit.vei.vei_dir = (exit_qual & 0x1);
5773 /* Bit 2 - string instruction? */
5774 vcpu->vc_exit.vei.vei_string = (exit_qual & 0x4) >> 2;
5775 /* Bit 3 - REP prefix? */
5776 vcpu->vc_exit.vei.vei_rep = (exit_qual & 0x8) >> 3;
5777
5778 /* Bits 4:6 - size of exit */
5779 if (exit_qual & 0x10)
5780 vcpu->vc_exit.vei.vei_size = 1;
5781 else if (exit_qual & 0x20)
5782 vcpu->vc_exit.vei.vei_size = 2;
5783 else if (exit_qual & 0x40)
5784 vcpu->vc_exit.vei.vei_size = 4;
5785
5786 /* Bit 16:31 - port */
5787 vcpu->vc_exit.vei.vei_port = (exit_qual & 0xFFFF0000) >> 16;
5788 /* Data */
5789 vcpu->vc_exit.vei.vei_data = vmcb->v_rax;
5790
5791 vcpu->vc_gueststate.vg_rip += insn_length;
5792
5793 /*
5794 * The following ports usually belong to devices owned by vmd.
5795 * Return EAGAIN to signal help needed from userspace (vmd).
5796 * Return 0 to indicate we don't care about this port.
5797 *
5798 * XXX something better than a hardcoded list here, maybe
5799 * configure via vmd via the device list in vm create params?
5800 */
5801 switch (vcpu->vc_exit.vei.vei_port) {
5802 case IO_ICU10x020 ... IO_ICU10x020 + 1:
5803 case 0x40 ... 0x43:
5804 case PCKBC_AUX0x61:
5805 case IO_RTC0x070 ... IO_RTC0x070 + 1:
5806 case IO_ICU20x0A0 ... IO_ICU20x0A0 + 1:
5807 case 0x3f8 ... 0x3ff:
5808 case ELCR00x4D0 ... ELCR10x4D1:
5809 case 0x500 ... 0x511:
5810 case 0x514:
5811 case 0x518:
5812 case 0xcf8:
5813 case 0xcfc ... 0xcff:
5814 case VMM_PCI_IO_BAR_BASE0x1000 ... VMM_PCI_IO_BAR_END0xFFFF:
5815 ret = EAGAIN35;
5816 break;
5817 default:
5818 /* Read from unsupported ports returns FFs */
5819 if (vcpu->vc_exit.vei.vei_dir == 1) {
5820 switch(vcpu->vc_exit.vei.vei_size) {
5821 case 1:
5822 vcpu->vc_gueststate.vg_rax |= 0xFF;
5823 vmcb->v_rax |= 0xFF;
5824 break;
5825 case 2:
5826 vcpu->vc_gueststate.vg_rax |= 0xFFFF;
5827 vmcb->v_rax |= 0xFFFF;
5828 break;
5829 case 4:
5830 vcpu->vc_gueststate.vg_rax |= 0xFFFFFFFF;
5831 vmcb->v_rax |= 0xFFFFFFFF;
5832 break;
5833 }
5834 }
5835 ret = 0;
5836 }
5837
5838 return (ret);
5839}
5840
5841/*
5842 * vmx_handle_inout
5843 *
5844 * Exit handler for IN/OUT instructions.
5845 *
5846 * The vmm can handle certain IN/OUTS without exiting to vmd, but most of these
5847 * will be passed to vmd for completion.
5848 */
5849int
5850vmx_handle_inout(struct vcpu *vcpu)
5851{
5852 uint64_t insn_length, exit_qual;
5853 int ret;
5854
5855 if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) {
5856 printf("%s: can't obtain instruction length\n", __func__);
5857 return (EINVAL22);
5858 }
5859
5860 if (insn_length != 1 && insn_length != 2) {
5861 DPRINTF("%s: IN/OUT instruction with length %lld not "
5862 "supported\n", __func__, insn_length);
5863 return (EINVAL22);
5864 }
5865
5866 if (vmx_get_exit_qualification(&exit_qual)) {
5867 printf("%s: can't get exit qual\n", __func__);
5868 return (EINVAL22);
5869 }
5870
5871 /* Bits 0:2 - size of exit */
5872 vcpu->vc_exit.vei.vei_size = (exit_qual & 0x7) + 1;
5873 /* Bit 3 - direction */
5874 vcpu->vc_exit.vei.vei_dir = (exit_qual & 0x8) >> 3;
5875 /* Bit 4 - string instruction? */
5876 vcpu->vc_exit.vei.vei_string = (exit_qual & 0x10) >> 4;
5877 /* Bit 5 - REP prefix? */
5878 vcpu->vc_exit.vei.vei_rep = (exit_qual & 0x20) >> 5;
5879 /* Bit 6 - Operand encoding */
5880 vcpu->vc_exit.vei.vei_encoding = (exit_qual & 0x40) >> 6;
5881 /* Bit 16:31 - port */
5882 vcpu->vc_exit.vei.vei_port = (exit_qual & 0xFFFF0000) >> 16;
5883 /* Data */
5884 vcpu->vc_exit.vei.vei_data = (uint32_t)vcpu->vc_gueststate.vg_rax;
5885
5886 vcpu->vc_gueststate.vg_rip += insn_length;
5887
5888 /*
5889 * The following ports usually belong to devices owned by vmd.
5890 * Return EAGAIN to signal help needed from userspace (vmd).
5891 * Return 0 to indicate we don't care about this port.
5892 *
5893 * XXX something better than a hardcoded list here, maybe
5894 * configure via vmd via the device list in vm create params?
5895 */
5896 switch (vcpu->vc_exit.vei.vei_port) {
5897 case IO_ICU10x020 ... IO_ICU10x020 + 1:
5898 case 0x40 ... 0x43:
5899 case PCKBC_AUX0x61:
5900 case IO_RTC0x070 ... IO_RTC0x070 + 1:
5901 case IO_ICU20x0A0 ... IO_ICU20x0A0 + 1:
5902 case 0x3f8 ... 0x3ff:
5903 case ELCR00x4D0 ... ELCR10x4D1:
5904 case 0x500 ... 0x511:
5905 case 0x514:
5906 case 0x518:
5907 case 0xcf8:
5908 case 0xcfc ... 0xcff:
5909 case VMM_PCI_IO_BAR_BASE0x1000 ... VMM_PCI_IO_BAR_END0xFFFF:
5910 ret = EAGAIN35;
5911 break;
5912 default:
5913 /* Read from unsupported ports returns FFs */
5914 if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN) {
5915 if (vcpu->vc_exit.vei.vei_size == 4)
5916 vcpu->vc_gueststate.vg_rax |= 0xFFFFFFFF;
5917 else if (vcpu->vc_exit.vei.vei_size == 2)
5918 vcpu->vc_gueststate.vg_rax |= 0xFFFF;
5919 else if (vcpu->vc_exit.vei.vei_size == 1)
5920 vcpu->vc_gueststate.vg_rax |= 0xFF;
5921 }
5922 ret = 0;
5923 }
5924
5925 return (ret);
5926}
5927
5928/*
5929 * vmx_load_pdptes
5930 *
5931 * Update the PDPTEs in the VMCS with the values currently indicated by the
5932 * guest CR3. This is used for 32-bit PAE guests when enabling paging.
5933 *
5934 * Parameters
5935 * vcpu: The vcpu whose PDPTEs should be loaded
5936 *
5937 * Return values:
5938 * 0: if successful
5939 * EINVAL: if the PDPTEs could not be loaded
5940 * ENOMEM: memory allocation failure
5941 */
5942int
5943vmx_load_pdptes(struct vcpu *vcpu)
5944{
5945 uint64_t cr3, cr3_host_phys;
5946 vaddr_t cr3_host_virt;
5947 pd_entry_t *pdptes;
5948 int ret;
5949
5950 if (vmread(VMCS_GUEST_IA32_CR30x6802, &cr3)) {
5951 printf("%s: can't read guest cr3\n", __func__);
5952 return (EINVAL22);
5953 }
5954
5955 if (!pmap_extract(vcpu->vc_parent->vm_map->pmap, (vaddr_t)cr3,
5956 (paddr_t *)&cr3_host_phys)) {
5957 DPRINTF("%s: nonmapped guest CR3, setting PDPTEs to 0\n",
5958 __func__);
5959 if (vmwrite(VMCS_GUEST_PDPTE00x280A, 0)) {
5960 printf("%s: can't write guest PDPTE0\n", __func__);
5961 return (EINVAL22);
5962 }
5963
5964 if (vmwrite(VMCS_GUEST_PDPTE10x280C, 0)) {
5965 printf("%s: can't write guest PDPTE1\n", __func__);
5966 return (EINVAL22);
5967 }
5968
5969 if (vmwrite(VMCS_GUEST_PDPTE20x280E, 0)) {
5970 printf("%s: can't write guest PDPTE2\n", __func__);
5971 return (EINVAL22);
5972 }
5973
5974 if (vmwrite(VMCS_GUEST_PDPTE30x2810, 0)) {
5975 printf("%s: can't write guest PDPTE3\n", __func__);
5976 return (EINVAL22);
5977 }
5978 return (0);
5979 }
5980
5981 ret = 0;
5982
5983 /* We may sleep during km_alloc(9), so reload VMCS. */
5984 vcpu->vc_last_pcpu = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
5985 cr3_host_virt = (vaddr_t)km_alloc(PAGE_SIZE(1 << 12), &kv_any, &kp_none,
5986 &kd_waitok);
5987 if (vcpu_reload_vmcs_vmx(vcpu)) {
5988 printf("%s: failed to reload vmcs\n", __func__);
5989 ret = EINVAL22;
5990 goto exit;
5991 }
5992
5993 if (!cr3_host_virt) {
5994 printf("%s: can't allocate address for guest CR3 mapping\n",
5995 __func__);
5996 return (ENOMEM12);
5997 }
5998
5999 pmap_kenter_pa(cr3_host_virt, cr3_host_phys, PROT_READ0x01);
6000
6001 pdptes = (pd_entry_t *)cr3_host_virt;
6002 if (vmwrite(VMCS_GUEST_PDPTE00x280A, pdptes[0])) {
6003 printf("%s: can't write guest PDPTE0\n", __func__);
6004 ret = EINVAL22;
6005 goto exit;
6006 }
6007
6008 if (vmwrite(VMCS_GUEST_PDPTE10x280C, pdptes[1])) {
6009 printf("%s: can't write guest PDPTE1\n", __func__);
6010 ret = EINVAL22;
6011 goto exit;
6012 }
6013
6014 if (vmwrite(VMCS_GUEST_PDPTE20x280E, pdptes[2])) {
6015 printf("%s: can't write guest PDPTE2\n", __func__);
6016 ret = EINVAL22;
6017 goto exit;
6018 }
6019
6020 if (vmwrite(VMCS_GUEST_PDPTE30x2810, pdptes[3])) {
6021 printf("%s: can't write guest PDPTE3\n", __func__);
6022 ret = EINVAL22;
6023 goto exit;
6024 }
6025
6026exit:
6027 pmap_kremove(cr3_host_virt, PAGE_SIZE(1 << 12));
6028
6029 /* km_free(9) might sleep, so we need to reload VMCS. */
6030 vcpu->vc_last_pcpu = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
6031 km_free((void *)cr3_host_virt, PAGE_SIZE(1 << 12), &kv_any, &kp_none);
6032 if (vcpu_reload_vmcs_vmx(vcpu)) {
6033 printf("%s: failed to reload vmcs after km_free\n", __func__);
6034 ret = EINVAL22;
6035 }
6036
6037 return (ret);
6038}
6039
6040/*
6041 * vmx_handle_cr0_write
6042 *
6043 * Write handler for CR0. This function ensures valid values are written into
6044 * CR0 for the cpu/vmm mode in use (cr0 must-be-0 and must-be-1 bits, etc).
6045 *
6046 * Parameters
6047 * vcpu: The vcpu taking the cr0 write exit
6048 * r: The guest's desired (incoming) cr0 value
6049 *
6050 * Return values:
6051 * 0: if successful
6052 * EINVAL: if an error occurred
6053 */
6054int
6055vmx_handle_cr0_write(struct vcpu *vcpu, uint64_t r)
6056{
6057 struct vmx_msr_store *msr_store;
6058 struct vmx_invvpid_descriptor vid;
6059 uint64_t ectls, oldcr0, cr4, mask;
6060 int ret;
6061
6062 /* Check must-be-0 bits */
6063 mask = vcpu->vc_vmx_cr0_fixed1;
6064 if (~r & mask) {
6065 /* Inject #GP, let the guest handle it */
6066 DPRINTF("%s: guest set invalid bits in %%cr0. Zeros "
6067 "mask=0x%llx, data=0x%llx\n", __func__,
6068 vcpu->vc_vmx_cr0_fixed1, r);
6069 vmm_inject_gp(vcpu);
6070 return (0);
6071 }
6072
6073 /* Check must-be-1 bits */
6074 mask = vcpu->vc_vmx_cr0_fixed0;
6075 if ((r & mask) != mask) {
6076 /* Inject #GP, let the guest handle it */
6077 DPRINTF("%s: guest set invalid bits in %%cr0. Ones "
6078 "mask=0x%llx, data=0x%llx\n", __func__,
6079 vcpu->vc_vmx_cr0_fixed0, r);
6080 vmm_inject_gp(vcpu);
6081 return (0);
6082 }
6083
6084 if (r & 0xFFFFFFFF00000000ULL) {
6085 DPRINTF("%s: setting bits 63:32 of %%cr0 is invalid,"
6086 " inject #GP, cr0=0x%llx\n", __func__, r);
6087 vmm_inject_gp(vcpu);
6088 return (0);
6089 }
6090
6091 if ((r & CR0_PG0x80000000) && (r & CR0_PE0x00000001) == 0) {
6092 DPRINTF("%s: PG flag set when the PE flag is clear,"
6093 " inject #GP, cr0=0x%llx\n", __func__, r);
6094 vmm_inject_gp(vcpu);
6095 return (0);
6096 }
6097
6098 if ((r & CR0_NW0x20000000) && (r & CR0_CD0x40000000) == 0) {
6099 DPRINTF("%s: NW flag set when the CD flag is clear,"
6100 " inject #GP, cr0=0x%llx\n", __func__, r);
6101 vmm_inject_gp(vcpu);
6102 return (0);
6103 }
6104
6105 if (vmread(VMCS_GUEST_IA32_CR00x6800, &oldcr0)) {
6106 printf("%s: can't read guest cr0\n", __func__);
6107 return (EINVAL22);
6108 }
6109
6110 /* CR0 must always have NE set */
6111 r |= CR0_NE0x00000020;
6112
6113 if (vmwrite(VMCS_GUEST_IA32_CR00x6800, r)) {
6114 printf("%s: can't write guest cr0\n", __func__);
6115 return (EINVAL22);
6116 }
6117
6118 /* If the guest hasn't enabled paging ... */
6119 if (!(r & CR0_PG0x80000000) && (oldcr0 & CR0_PG0x80000000)) {
6120 /* Paging was disabled (prev. enabled) - Flush TLB */
6121 if ((vmm_softc->mode == VMM_MODE_VMX ||
6122 vmm_softc->mode == VMM_MODE_EPT) &&
6123 vcpu->vc_vmx_vpid_enabled) {
6124 vid.vid_vpid = vcpu->vc_parent->vm_id;
6125 vid.vid_addr = 0;
6126 invvpid(IA32_VMX_INVVPID_SINGLE_CTX_GLB0x3, &vid);
6127 }
6128 } else if (!(oldcr0 & CR0_PG0x80000000) && (r & CR0_PG0x80000000)) {
6129 /*
6130 * Since the guest has enabled paging, then the IA32_VMX_IA32E_MODE_GUEST
6131 * control must be set to the same as EFER_LME.
6132 */
6133 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
6134
6135 if (vmread(VMCS_ENTRY_CTLS0x4012, &ectls)) {
6136 printf("%s: can't read entry controls", __func__);
6137 return (EINVAL22);
6138 }
6139
6140 if (msr_store[VCPU_REGS_EFER0].vms_data & EFER_LME0x00000100)
6141 ectls |= IA32_VMX_IA32E_MODE_GUEST(1ULL << 9);
6142 else
6143 ectls &= ~IA32_VMX_IA32E_MODE_GUEST(1ULL << 9);
6144
6145 if (vmwrite(VMCS_ENTRY_CTLS0x4012, ectls)) {
6146 printf("%s: can't write entry controls", __func__);
6147 return (EINVAL22);
6148 }
6149
6150 if (vmread(VMCS_GUEST_IA32_CR40x6804, &cr4)) {
6151 printf("%s: can't read guest cr4\n", __func__);
6152 return (EINVAL22);
6153 }
6154
6155 /* Load PDPTEs if PAE guest enabling paging */
6156 if (cr4 & CR4_PAE0x00000020) {
6157 ret = vmx_load_pdptes(vcpu);
6158
6159 if (ret) {
6160 printf("%s: updating PDPTEs failed\n", __func__);
6161 return (ret);
6162 }
6163 }
6164 }
6165
6166 return (0);
6167}
6168
6169/*
6170 * vmx_handle_cr4_write
6171 *
6172 * Write handler for CR4. This function ensures valid values are written into
6173 * CR4 for the cpu/vmm mode in use (cr4 must-be-0 and must-be-1 bits, etc).
6174 *
6175 * Parameters
6176 * vcpu: The vcpu taking the cr4 write exit
6177 * r: The guest's desired (incoming) cr4 value
6178 *
6179 * Return values:
6180 * 0: if successful
6181 * EINVAL: if an error occurred
6182 */
6183int
6184vmx_handle_cr4_write(struct vcpu *vcpu, uint64_t r)
6185{
6186 uint64_t mask;
6187
6188 /* Check must-be-0 bits */
6189 mask = ~(curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
6190 if (r & mask) {
6191 /* Inject #GP, let the guest handle it */
6192 DPRINTF("%s: guest set invalid bits in %%cr4. Zeros "
6193 "mask=0x%llx, data=0x%llx\n", __func__,
6194 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1,
6195 r);
6196 vmm_inject_gp(vcpu);
6197 return (0);
6198 }
6199
6200 /* Check must-be-1 bits */
6201 mask = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0;
6202 if ((r & mask) != mask) {
6203 /* Inject #GP, let the guest handle it */
6204 DPRINTF("%s: guest set invalid bits in %%cr4. Ones "
6205 "mask=0x%llx, data=0x%llx\n", __func__,
6206 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0,
6207 r);
6208 vmm_inject_gp(vcpu);
6209 return (0);
6210 }
6211
6212 /* CR4_VMXE must always be enabled */
6213 r |= CR4_VMXE0x00002000;
6214
6215 if (vmwrite(VMCS_GUEST_IA32_CR40x6804, r)) {
6216 printf("%s: can't write guest cr4\n", __func__);
6217 return (EINVAL22);
6218 }
6219
6220 return (0);
6221}
6222
6223/*
6224 * vmx_handle_cr
6225 *
6226 * Handle reads/writes to control registers (except CR3)
6227 */
6228int
6229vmx_handle_cr(struct vcpu *vcpu)
6230{
6231 uint64_t insn_length, exit_qual, r;
6232 uint8_t crnum, dir, reg;
6233
6234 if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) {
6235 printf("%s: can't obtain instruction length\n", __func__);
6236 return (EINVAL22);
6237 }
6238
6239 if (vmx_get_exit_qualification(&exit_qual)) {
6240 printf("%s: can't get exit qual\n", __func__);
6241 return (EINVAL22);
6242 }
6243
6244 /* Low 4 bits of exit_qual represent the CR number */
6245 crnum = exit_qual & 0xf;
6246
6247 /*
6248 * Bits 5:4 indicate the direction of operation (or special CR-modifying
6249 * instruction)
6250 */
6251 dir = (exit_qual & 0x30) >> 4;
6252
6253 /* Bits 11:8 encode the source/target register */
6254 reg = (exit_qual & 0xf00) >> 8;
6255
6256 switch (dir) {
6257 case CR_WRITE0:
6258 if (crnum == 0 || crnum == 4) {
6259 switch (reg) {
6260 case 0: r = vcpu->vc_gueststate.vg_rax; break;
6261 case 1: r = vcpu->vc_gueststate.vg_rcx; break;
6262 case 2: r = vcpu->vc_gueststate.vg_rdx; break;
6263 case 3: r = vcpu->vc_gueststate.vg_rbx; break;
6264 case 4: if (vmread(VMCS_GUEST_IA32_RSP0x681C, &r)) {
6265 printf("%s: unable to read guest "
6266 "RSP\n", __func__);
6267 return (EINVAL22);
6268 }
6269 break;
6270 case 5: r = vcpu->vc_gueststate.vg_rbp; break;
6271 case 6: r = vcpu->vc_gueststate.vg_rsi; break;
6272 case 7: r = vcpu->vc_gueststate.vg_rdi; break;
6273 case 8: r = vcpu->vc_gueststate.vg_r8; break;
6274 case 9: r = vcpu->vc_gueststate.vg_r9; break;
6275 case 10: r = vcpu->vc_gueststate.vg_r10; break;
6276 case 11: r = vcpu->vc_gueststate.vg_r11; break;
6277 case 12: r = vcpu->vc_gueststate.vg_r12; break;
6278 case 13: r = vcpu->vc_gueststate.vg_r13; break;
6279 case 14: r = vcpu->vc_gueststate.vg_r14; break;
6280 case 15: r = vcpu->vc_gueststate.vg_r15; break;
6281 }
6282 DPRINTF("%s: mov to cr%d @ %llx, data=0x%llx\n",
6283 __func__, crnum, vcpu->vc_gueststate.vg_rip, r);
6284 }
6285
6286 if (crnum == 0)
6287 vmx_handle_cr0_write(vcpu, r);
6288
6289 if (crnum == 4)
6290 vmx_handle_cr4_write(vcpu, r);
6291
6292 break;
6293 case CR_READ1:
6294 DPRINTF("%s: mov from cr%d @ %llx\n", __func__, crnum,
6295 vcpu->vc_gueststate.vg_rip);
6296 break;
6297 case CR_CLTS2:
6298 DPRINTF("%s: clts instruction @ %llx\n", __func__,
6299 vcpu->vc_gueststate.vg_rip);
6300 break;
6301 case CR_LMSW3:
6302 DPRINTF("%s: lmsw instruction @ %llx\n", __func__,
6303 vcpu->vc_gueststate.vg_rip);
6304 break;
6305 default:
6306 DPRINTF("%s: unknown cr access @ %llx\n", __func__,
6307 vcpu->vc_gueststate.vg_rip);
6308 }
6309
6310 vcpu->vc_gueststate.vg_rip += insn_length;
6311
6312 return (0);
6313}
6314
6315/*
6316 * vmx_handle_rdmsr
6317 *
6318 * Handler for rdmsr instructions. Bitmap MSRs are allowed implicit access
6319 * and won't end up here. This handler is primarily intended to catch otherwise
6320 * unknown MSR access for possible later inclusion in the bitmap list. For
6321 * each MSR access that ends up here, we log the access (when VMM_DEBUG is
6322 * enabled)
6323 *
6324 * Parameters:
6325 * vcpu: vcpu structure containing instruction info causing the exit
6326 *
6327 * Return value:
6328 * 0: The operation was successful
6329 * EINVAL: An error occurred
6330 */
6331int
6332vmx_handle_rdmsr(struct vcpu *vcpu)
6333{
6334 uint64_t insn_length;
6335 uint64_t *rax, *rdx;
6336 uint64_t *rcx;
6337 int ret;
6338
6339 if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) {
6340 printf("%s: can't obtain instruction length\n", __func__);
6341 return (EINVAL22);
6342 }
6343
6344 if (insn_length != 2) {
6345 DPRINTF("%s: RDMSR with instruction length %lld not "
6346 "supported\n", __func__, insn_length);
6347 return (EINVAL22);
6348 }
6349
6350 rax = &vcpu->vc_gueststate.vg_rax;
6351 rcx = &vcpu->vc_gueststate.vg_rcx;
6352 rdx = &vcpu->vc_gueststate.vg_rdx;
6353
6354 switch (*rcx) {
6355 case MSR_BIOS_SIGN0x08b:
6356 case MSR_PLATFORM_ID0x017:
6357 /* Ignored */
6358 *rax = 0;
6359 *rdx = 0;
6360 break;
6361 case MSR_CR_PAT0x277:
6362 *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
6363 *rdx = (vcpu->vc_shadow_pat >> 32);
6364 break;
6365 default:
6366 /* Unsupported MSRs causes #GP exception, don't advance %rip */
6367 DPRINTF("%s: unsupported rdmsr (msr=0x%llx), injecting #GP\n",
6368 __func__, *rcx);
6369 ret = vmm_inject_gp(vcpu);
6370 return (ret);
6371 }
6372
6373 vcpu->vc_gueststate.vg_rip += insn_length;
6374
6375 return (0);
6376}
6377
6378/*
6379 * vmx_handle_xsetbv
6380 *
6381 * VMX-specific part of the xsetbv instruction exit handler
6382 *
6383 * Parameters:
6384 * vcpu: vcpu structure containing instruction info causing the exit
6385 *
6386 * Return value:
6387 * 0: The operation was successful
6388 * EINVAL: An error occurred
6389 */
6390int
6391vmx_handle_xsetbv(struct vcpu *vcpu)
6392{
6393 uint64_t insn_length, *rax;
6394 int ret;
6395
6396 if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) {
6397 printf("%s: can't obtain instruction length\n", __func__);
6398 return (EINVAL22);
6399 }
6400
6401 /* All XSETBV instructions are 3 bytes */
6402 if (insn_length != 3) {
6403 DPRINTF("%s: XSETBV with instruction length %lld not "
6404 "supported\n", __func__, insn_length);
6405 return (EINVAL22);
6406 }
6407
6408 rax = &vcpu->vc_gueststate.vg_rax;
6409
6410 ret = vmm_handle_xsetbv(vcpu, rax);
6411
6412 vcpu->vc_gueststate.vg_rip += insn_length;
6413
6414 return ret;
6415}
6416
6417/*
6418 * svm_handle_xsetbv
6419 *
6420 * SVM-specific part of the xsetbv instruction exit handler
6421 *
6422 * Parameters:
6423 * vcpu: vcpu structure containing instruction info causing the exit
6424 *
6425 * Return value:
6426 * 0: The operation was successful
6427 * EINVAL: An error occurred
6428 */
6429int
6430svm_handle_xsetbv(struct vcpu *vcpu)
6431{
6432 uint64_t insn_length, *rax;
6433 int ret;
6434 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
6435
6436 /* All XSETBV instructions are 3 bytes */
6437 insn_length = 3;
6438
6439 rax = &vmcb->v_rax;
6440
6441 ret = vmm_handle_xsetbv(vcpu, rax);
6442
6443 vcpu->vc_gueststate.vg_rip += insn_length;
6444
6445 return ret;
6446}
6447
6448/*
6449 * vmm_handle_xsetbv
6450 *
6451 * Handler for xsetbv instructions. We allow the guest VM to set xcr0 values
6452 * limited to the xsave_mask in use in the host.
6453 *
6454 * Parameters:
6455 * vcpu: vcpu structure containing instruction info causing the exit
6456 * rax: pointer to guest %rax
6457 *
6458 * Return value:
6459 * 0: The operation was successful
6460 * EINVAL: An error occurred
6461 */
6462int
6463vmm_handle_xsetbv(struct vcpu *vcpu, uint64_t *rax)
6464{
6465 uint64_t *rdx, *rcx, val;
6466
6467 rcx = &vcpu->vc_gueststate.vg_rcx;
6468 rdx = &vcpu->vc_gueststate.vg_rdx;
6469
6470 if (vmm_get_guest_cpu_cpl(vcpu) != 0) {
6471 DPRINTF("%s: guest cpl not zero\n", __func__);
6472 return (vmm_inject_gp(vcpu));
6473 }
6474
6475 if (*rcx != 0) {
6476 DPRINTF("%s: guest specified invalid xcr register number "
6477 "%lld\n", __func__, *rcx);
6478 return (vmm_inject_gp(vcpu));
6479 }
6480
6481 val = *rax + (*rdx << 32);
6482 if (val & ~xsave_mask) {
6483 DPRINTF("%s: guest specified xcr0 outside xsave_mask %lld\n",
6484 __func__, val);
6485 return (vmm_inject_gp(vcpu));
6486 }
6487
6488 vcpu->vc_gueststate.vg_xcr0 = val;
6489
6490 return (0);
6491}
6492
6493/*
6494 * vmx_handle_misc_enable_msr
6495 *
6496 * Handler for writes to the MSR_MISC_ENABLE (0x1a0) MSR on Intel CPUs. We
6497 * limit what the guest can write to this MSR (certain hardware-related
6498 * settings like speedstep, etc).
6499 *
6500 * Parameters:
6501 * vcpu: vcpu structure containing information about the wrmsr causing this
6502 * exit
6503 */
6504void
6505vmx_handle_misc_enable_msr(struct vcpu *vcpu)
6506{
6507 uint64_t *rax, *rdx;
6508 struct vmx_msr_store *msr_store;
6509
6510 rax = &vcpu->vc_gueststate.vg_rax;
6511 rdx = &vcpu->vc_gueststate.vg_rdx;
6512 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
6513
6514 /* Filter out guest writes to TCC, EIST, and xTPR */
6515 *rax &= ~(MISC_ENABLE_TCC(1 << 3) | MISC_ENABLE_EIST_ENABLED(1 << 16) |
6516 MISC_ENABLE_xTPR_MESSAGE_DISABLE(1 << 23));
6517
6518 msr_store[VCPU_REGS_MISC_ENABLE6].vms_data = *rax | (*rdx << 32);
6519}
6520
6521/*
6522 * vmx_handle_wrmsr
6523 *
6524 * Handler for wrmsr instructions. This handler logs the access, and discards
6525 * the written data (when VMM_DEBUG is enabled). Any valid wrmsr will not end
6526 * up here (it will be whitelisted in the MSR bitmap).
6527 *
6528 * Parameters:
6529 * vcpu: vcpu structure containing instruction info causing the exit
6530 *
6531 * Return value:
6532 * 0: The operation was successful
6533 * EINVAL: An error occurred
6534 */
6535int
6536vmx_handle_wrmsr(struct vcpu *vcpu)
6537{
6538 uint64_t insn_length, val;
6539 uint64_t *rax, *rdx, *rcx;
6540 int ret;
6541
6542 if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) {
6543 printf("%s: can't obtain instruction length\n", __func__);
6544 return (EINVAL22);
6545 }
6546
6547 if (insn_length != 2) {
6548 DPRINTF("%s: WRMSR with instruction length %lld not "
6549 "supported\n", __func__, insn_length);
6550 return (EINVAL22);
6551 }
6552
6553 rax = &vcpu->vc_gueststate.vg_rax;
6554 rcx = &vcpu->vc_gueststate.vg_rcx;
6555 rdx = &vcpu->vc_gueststate.vg_rdx;
6556 val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
6557
6558 switch (*rcx) {
6559 case MSR_CR_PAT0x277:
6560 if (!vmm_pat_is_valid(val)) {
6561 ret = vmm_inject_gp(vcpu);
6562 return (ret);
6563 }
6564 vcpu->vc_shadow_pat = val;
6565 break;
6566 case MSR_MISC_ENABLE0x1a0:
6567 vmx_handle_misc_enable_msr(vcpu);
6568 break;
6569 case MSR_SMM_MONITOR_CTL0x09b:
6570 /*
6571 * 34.15.5 - Enabling dual monitor treatment
6572 *
6573 * Unsupported, so inject #GP and return without
6574 * advancing %rip.
6575 */
6576 ret = vmm_inject_gp(vcpu);
6577 return (ret);
6578 case KVM_MSR_SYSTEM_TIME0x4b564d01:
6579 vmm_init_pvclock(vcpu,
6580 (*rax & 0xFFFFFFFFULL) | (*rdx << 32));
6581 break;
6582#ifdef VMM_DEBUG
6583 default:
6584 /*
6585 * Log the access, to be able to identify unknown MSRs
6586 */
6587 DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data "
6588 "written from guest=0x%llx:0x%llx\n", __func__,
6589 *rcx, *rdx, *rax);
6590#endif /* VMM_DEBUG */
6591 }
6592
6593 vcpu->vc_gueststate.vg_rip += insn_length;
6594
6595 return (0);
6596}
6597
6598/*
6599 * svm_handle_msr
6600 *
6601 * Handler for MSR instructions.
6602 *
6603 * Parameters:
6604 * vcpu: vcpu structure containing instruction info causing the exit
6605 *
6606 * Return value:
6607 * Always 0 (successful)
6608 */
6609int
6610svm_handle_msr(struct vcpu *vcpu)
6611{
6612 uint64_t insn_length, val;
6613 uint64_t *rax, *rcx, *rdx;
6614 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
6615 int ret;
6616
6617 /* XXX: Validate RDMSR / WRMSR insn_length */
6618 insn_length = 2;
6619
6620 rax = &vmcb->v_rax;
6621 rcx = &vcpu->vc_gueststate.vg_rcx;
6622 rdx = &vcpu->vc_gueststate.vg_rdx;
6623
6624 if (vmcb->v_exitinfo1 == 1) {
6625 /* WRMSR */
6626 val = (*rdx << 32) | (*rax & 0xFFFFFFFFULL);
6627
6628 switch (*rcx) {
6629 case MSR_CR_PAT0x277:
6630 if (!vmm_pat_is_valid(val)) {
6631 ret = vmm_inject_gp(vcpu);
6632 return (ret);
6633 }
6634 vcpu->vc_shadow_pat = val;
6635 break;
6636 case MSR_EFER0xc0000080:
6637 vmcb->v_efer = *rax | EFER_SVME0x00001000;
6638 break;
6639 case KVM_MSR_SYSTEM_TIME0x4b564d01:
6640 vmm_init_pvclock(vcpu,
6641 (*rax & 0xFFFFFFFFULL) | (*rdx << 32));
6642 break;
6643 default:
6644 /* Log the access, to be able to identify unknown MSRs */
6645 DPRINTF("%s: wrmsr exit, msr=0x%llx, discarding data "
6646 "written from guest=0x%llx:0x%llx\n", __func__,
6647 *rcx, *rdx, *rax);
6648 }
6649 } else {
6650 /* RDMSR */
6651 switch (*rcx) {
6652 case MSR_BIOS_SIGN0x08b:
6653 case MSR_INT_PEN_MSG0xc0010055:
6654 case MSR_PLATFORM_ID0x017:
6655 /* Ignored */
6656 *rax = 0;
6657 *rdx = 0;
6658 break;
6659 case MSR_CR_PAT0x277:
6660 *rax = (vcpu->vc_shadow_pat & 0xFFFFFFFFULL);
6661 *rdx = (vcpu->vc_shadow_pat >> 32);
6662 break;
6663 case MSR_DE_CFG0xc0011029:
6664 /* LFENCE serializing bit is set by host */
6665 *rax = DE_CFG_SERIALIZE_LFENCE(1 << 1);
6666 *rdx = 0;
6667 break;
6668 default:
6669 /*
6670 * Unsupported MSRs causes #GP exception, don't advance
6671 * %rip
6672 */
6673 DPRINTF("%s: unsupported rdmsr (msr=0x%llx), "
6674 "injecting #GP\n", __func__, *rcx);
6675 ret = vmm_inject_gp(vcpu);
6676 return (ret);
6677 }
6678 }
6679
6680 vcpu->vc_gueststate.vg_rip += insn_length;
6681
6682 return (0);
6683}
6684
6685/*
6686 * vmm_handle_cpuid
6687 *
6688 * Exit handler for CPUID instruction
6689 *
6690 * Parameters:
6691 * vcpu: vcpu causing the CPUID exit
6692 *
6693 * Return value:
6694 * 0: the exit was processed successfully
6695 * EINVAL: error occurred validating the CPUID instruction arguments
6696 */
6697int
6698vmm_handle_cpuid(struct vcpu *vcpu)
6699{
6700 uint64_t insn_length, cr4;
6701 uint64_t *rax, *rbx, *rcx, *rdx;
6702 struct vmcb *vmcb;
3
'vmcb' declared without an initial value
6703 uint32_t eax, ebx, ecx, edx;
6704 struct vmx_msr_store *msr_store;
6705 int vmm_cpuid_level;
6706
6707 /* what's the cpuid level we support/advertise? */
6708 vmm_cpuid_level = cpuid_level;
6709 if (vmm_cpuid_level < 0x15 && tsc_is_invariant)
4
Assuming 'vmm_cpuid_level' is >= 21
6710 vmm_cpuid_level = 0x15;
6711
6712 if (vmm_softc->mode == VMM_MODE_VMX ||
5
Assuming field 'mode' is equal to VMM_MODE_VMX
6713 vmm_softc->mode == VMM_MODE_EPT) {
6714 if (vmread(VMCS_INSTRUCTION_LENGTH0x440C, &insn_length)) {
6
Assuming the condition is false
7
Taking false branch
6715 DPRINTF("%s: can't obtain instruction length\n",
6716 __func__);
6717 return (EINVAL22);
6718 }
6719
6720 if (vmread(VMCS_GUEST_IA32_CR40x6804, &cr4)) {
8
Assuming the condition is false
9
Taking false branch
6721 DPRINTF("%s: can't obtain cr4\n", __func__);
6722 return (EINVAL22);
6723 }
6724
6725 rax = &vcpu->vc_gueststate.vg_rax;
6726
6727 /*
6728 * "CPUID leaves above 02H and below 80000000H are only
6729 * visible when IA32_MISC_ENABLE MSR has bit 22 set to its
6730 * default value 0"
6731 */
6732 msr_store =
6733 (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
6734 if (msr_store[VCPU_REGS_MISC_ENABLE6].vms_data &
10
Assuming the condition is false
11
Taking false branch
6735 MISC_ENABLE_LIMIT_CPUID_MAXVAL(1 << 22))
6736 vmm_cpuid_level = 0x02;
6737 } else {
6738 /* XXX: validate insn_length 2 */
6739 insn_length = 2;
6740 vmcb = (struct vmcb *)vcpu->vc_control_va;
6741 rax = &vmcb->v_rax;
6742 cr4 = vmcb->v_cr4;
6743 }
6744
6745 rbx = &vcpu->vc_gueststate.vg_rbx;
6746 rcx = &vcpu->vc_gueststate.vg_rcx;
6747 rdx = &vcpu->vc_gueststate.vg_rdx;
6748 vcpu->vc_gueststate.vg_rip += insn_length;
6749
6750 /*
6751 * "If a value entered for CPUID.EAX is higher than the maximum input
6752 * value for basic or extended function for that processor then the
6753 * data for the highest basic information leaf is returned."
6754 *
6755 * "When CPUID returns the highest basic leaf information as a result
6756 * of an invalid input EAX value, any dependence on input ECX value
6757 * in the basic leaf is honored."
6758 *
6759 * This means if rax is between vmm_cpuid_level and 0x40000000 (the start
6760 * of the hypervisor info leaves), clamp to vmm_cpuid_level, but without
6761 * altering subleaf. Also, if rax is greater than the extended function
6762 * info, clamp also to vmm_cpuid_level.
6763 */
6764 if ((*rax > vmm_cpuid_level && *rax < 0x40000000) ||
12
Assuming the condition is true
13
Assuming the condition is true
6765 (*rax > curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_pnfeatset)) {
6766 DPRINTF("%s: invalid cpuid input leaf 0x%llx, guest rip="
6767 "0x%llx - resetting to 0x%x\n", __func__, *rax,
6768 vcpu->vc_gueststate.vg_rip - insn_length,
6769 vmm_cpuid_level);
6770 *rax = vmm_cpuid_level;
6771 }
6772
6773 CPUID_LEAF(*rax, 0, eax, ebx, ecx, edx)__asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d"
(edx) : "a" (*rax), "c" (0))
;
6774
6775 switch (*rax) {
14
Control jumps to the 'default' case at line 7000
6776 case 0x00: /* Max level and vendor ID */
6777 *rax = vmm_cpuid_level;
6778 *rbx = *((uint32_t *)&cpu_vendor);
6779 *rdx = *((uint32_t *)&cpu_vendor + 1);
6780 *rcx = *((uint32_t *)&cpu_vendor + 2);
6781 break;
6782 case 0x01: /* Version, brand, feature info */
6783 *rax = cpu_id;
6784 /* mask off host's APIC ID, reset to vcpu id */
6785 *rbx = cpu_ebxfeature & 0x0000FFFF;
6786 *rbx |= (vcpu->vc_id & 0xFF) << 24;
6787 *rcx = (cpu_ecxfeature | CPUIDECX_HV0x80000000) & VMM_CPUIDECX_MASK~(0x00000080 | 0x00000100 | 0x00000008 | 0x00008000 | 0x00000020
| 0x00000004 | 0x00000010 | 0x00000040 | 0x00000400 | 0x00000800
| 0x00004000 | 0x00020000 | 0x00040000 | 0x00200000 | 0x01000000
)
;
6788
6789 /* Guest CR4.OSXSAVE determines presence of CPUIDECX_OSXSAVE */
6790 if (cr4 & CR4_OSXSAVE0x00040000)
6791 *rcx |= CPUIDECX_OSXSAVE0x08000000;
6792 else
6793 *rcx &= ~CPUIDECX_OSXSAVE0x08000000;
6794
6795 *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_feature_flags & VMM_CPUIDEDX_MASK~(0x00400000 | 0x20000000 | 0x10000000 | 0x00200000 | 0x00000200
| 0x00040000 | 0x08000000 | 0x80000000 | 0x00001000 | 0x00000080
| 0x00004000)
;
6796 break;
6797 case 0x02: /* Cache and TLB information */
6798 *rax = eax;
6799 *rbx = ebx;
6800 *rcx = ecx;
6801 *rdx = edx;
6802 break;
6803 case 0x03: /* Processor serial number (not supported) */
6804 DPRINTF("%s: function 0x03 (processor serial number) not "
6805 "supported\n", __func__);
6806 *rax = 0;
6807 *rbx = 0;
6808 *rcx = 0;
6809 *rdx = 0;
6810 break;
6811 case 0x04: /* Deterministic cache info */
6812 if (*rcx == 0) {
6813 *rax = eax & VMM_CPUID4_CACHE_TOPOLOGY_MASK0x3FF;
6814 *rbx = ebx;
6815 *rcx = ecx;
6816 *rdx = edx;
6817 } else {
6818 CPUID_LEAF(*rax, *rcx, eax, ebx, ecx, edx)__asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d"
(edx) : "a" (*rax), "c" (*rcx))
;
6819 *rax = eax & VMM_CPUID4_CACHE_TOPOLOGY_MASK0x3FF;
6820 *rbx = ebx;
6821 *rcx = ecx;
6822 *rdx = edx;
6823 }
6824 break;
6825 case 0x05: /* MONITOR/MWAIT (not supported) */
6826 DPRINTF("%s: function 0x05 (monitor/mwait) not supported\n",
6827 __func__);
6828 *rax = 0;
6829 *rbx = 0;
6830 *rcx = 0;
6831 *rdx = 0;
6832 break;
6833 case 0x06: /* Thermal / Power management (not supported) */
6834 DPRINTF("%s: function 0x06 (thermal/power mgt) not supported\n",
6835 __func__);
6836 *rax = 0;
6837 *rbx = 0;
6838 *rcx = 0;
6839 *rdx = 0;
6840 break;
6841 case 0x07: /* SEFF */
6842 if (*rcx == 0) {
6843 *rax = 0; /* Highest subleaf supported */
6844 *rbx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_feature_sefflags_ebx & VMM_SEFF0EBX_MASK~(0x00000002 | 0x00000004 | 0x00000010 | 0x00000400 | 0x00000800
| 0x00001000 | 0x00004000 | 0x00400000 | 0x02000000 | 0x00010000
| 0x00020000 | 0x00200000 | 0x04000000 | 0x08000000 | 0x10000000
| 0x40000000 | 0x80000000)
;
6845 *rcx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_feature_sefflags_ecx & VMM_SEFF0ECX_MASK~(0x00000002);
6846 *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_feature_sefflags_edx & VMM_SEFF0EDX_MASK(0x00000400);
6847 } else {
6848 /* Unsupported subleaf */
6849 DPRINTF("%s: function 0x07 (SEFF) unsupported subleaf "
6850 "0x%llx not supported\n", __func__, *rcx);
6851 *rax = 0;
6852 *rbx = 0;
6853 *rcx = 0;
6854 *rdx = 0;
6855 }
6856 break;
6857 case 0x09: /* Direct Cache Access (not supported) */
6858 DPRINTF("%s: function 0x09 (direct cache access) not "
6859 "supported\n", __func__);
6860 *rax = 0;
6861 *rbx = 0;
6862 *rcx = 0;
6863 *rdx = 0;
6864 break;
6865 case 0x0a: /* Architectural perf monitoring (not supported) */
6866 DPRINTF("%s: function 0x0a (arch. perf mon) not supported\n",
6867 __func__);
6868 *rax = 0;
6869 *rbx = 0;
6870 *rcx = 0;
6871 *rdx = 0;
6872 break;
6873 case 0x0b: /* Extended topology enumeration (not supported) */
6874 DPRINTF("%s: function 0x0b (topology enumeration) not "
6875 "supported\n", __func__);
6876 *rax = 0;
6877 *rbx = 0;
6878 *rcx = 0;
6879 *rdx = 0;
6880 break;
6881 case 0x0d: /* Processor ext. state information */
6882 if (*rcx == 0) {
6883 *rax = xsave_mask;
6884 *rbx = ebx;
6885 *rcx = ecx;
6886 *rdx = edx;
6887 } else if (*rcx == 1) {
6888 *rax = 0;
6889 *rbx = 0;
6890 *rcx = 0;
6891 *rdx = 0;
6892 } else {
6893 CPUID_LEAF(*rax, *rcx, eax, ebx, ecx, edx)__asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d"
(edx) : "a" (*rax), "c" (*rcx))
;
6894 *rax = eax;
6895 *rbx = ebx;
6896 *rcx = ecx;
6897 *rdx = edx;
6898 }
6899 break;
6900 case 0x0f: /* QoS info (not supported) */
6901 DPRINTF("%s: function 0x0f (QoS info) not supported\n",
6902 __func__);
6903 *rax = 0;
6904 *rbx = 0;
6905 *rcx = 0;
6906 *rdx = 0;
6907 break;
6908 case 0x14: /* Processor Trace info (not supported) */
6909 DPRINTF("%s: function 0x14 (processor trace info) not "
6910 "supported\n", __func__);
6911 *rax = 0;
6912 *rbx = 0;
6913 *rcx = 0;
6914 *rdx = 0;
6915 break;
6916 case 0x15:
6917 if (cpuid_level >= 0x15) {
6918 *rax = eax;
6919 *rbx = ebx;
6920 *rcx = ecx;
6921 *rdx = edx;
6922 } else {
6923 KASSERT(tsc_is_invariant)((tsc_is_invariant) ? (void)0 : __assert("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm.c"
, 6923, "tsc_is_invariant"))
;
6924 *rax = 1;
6925 *rbx = 100;
6926 *rcx = tsc_frequency / 100;
6927 *rdx = 0;
6928 }
6929 break;
6930 case 0x16: /* Processor frequency info */
6931 *rax = eax;
6932 *rbx = ebx;
6933 *rcx = ecx;
6934 *rdx = edx;
6935 break;
6936 case 0x40000000: /* Hypervisor information */
6937 *rax = 0;
6938 *rbx = *((uint32_t *)&vmm_hv_signature[0]);
6939 *rcx = *((uint32_t *)&vmm_hv_signature[4]);
6940 *rdx = *((uint32_t *)&vmm_hv_signature[8]);
6941 break;
6942 case 0x40000001: /* KVM hypervisor features */
6943 *rax = (1 << KVM_FEATURE_CLOCKSOURCE23) |
6944 (1 << KVM_FEATURE_CLOCKSOURCE_STABLE_BIT24);
6945 *rbx = 0;
6946 *rcx = 0;
6947 *rdx = 0;
6948 break;
6949 case 0x80000000: /* Extended function level */
6950 *rax = 0x80000008; /* curcpu()->ci_pnfeatset */
6951 *rbx = 0;
6952 *rcx = 0;
6953 *rdx = 0;
6954 break;
6955 case 0x80000001: /* Extended function info */
6956 *rax = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_efeature_eax;
6957 *rbx = 0; /* Reserved */
6958 *rcx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_efeature_ecx & VMM_ECPUIDECX_MASK~(0x00000004 | 0x20000000);
6959 *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_feature_eflags & VMM_FEAT_EFLAGS_MASK~(0x08000000);
6960 break;
6961 case 0x80000002: /* Brand string */
6962 *rax = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[0];
6963 *rbx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[1];
6964 *rcx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[2];
6965 *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[3];
6966 break;
6967 case 0x80000003: /* Brand string */
6968 *rax = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[4];
6969 *rbx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[5];
6970 *rcx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[6];
6971 *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[7];
6972 break;
6973 case 0x80000004: /* Brand string */
6974 *rax = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[8];
6975 *rbx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[9];
6976 *rcx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[10];
6977 *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_brand[11];
6978 break;
6979 case 0x80000005: /* Reserved (Intel), cacheinfo (AMD) */
6980 *rax = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_amdcacheinfo[0];
6981 *rbx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_amdcacheinfo[1];
6982 *rcx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_amdcacheinfo[2];
6983 *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_amdcacheinfo[3];
6984 break;
6985 case 0x80000006: /* ext. cache info */
6986 *rax = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_extcacheinfo[0];
6987 *rbx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_extcacheinfo[1];
6988 *rcx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_extcacheinfo[2];
6989 *rdx = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_extcacheinfo[3];
6990 break;
6991 case 0x80000007: /* apmi */
6992 CPUID(0x80000007, *rax, *rbx, *rcx, *rdx)__asm volatile("cpuid" : "=a" (*rax), "=b" (*rbx), "=c" (*rcx
), "=d" (*rdx) : "a" (0x80000007))
;
6993 break;
6994 case 0x80000008: /* Phys bits info and topology (AMD) */
6995 CPUID(0x80000008, *rax, *rbx, *rcx, *rdx)__asm volatile("cpuid" : "=a" (*rax), "=b" (*rbx), "=c" (*rcx
), "=d" (*rdx) : "a" (0x80000008))
;
6996 *rbx &= VMM_AMDSPEC_EBX_MASK~((1ULL << 12) | (1ULL << 14) | (1ULL << 15
) | (1ULL << 16) | (1ULL << 17) | (1ULL << 18
) | (1ULL << 24) | (1ULL << 25) | (1ULL << 26
))
;
6997 /* Reset %rcx (topology) */
6998 *rcx = 0;
6999 break;
7000 default:
7001 DPRINTF("%s: unsupported rax=0x%llx\n", __func__, *rax);
7002 *rax = 0;
7003 *rbx = 0;
7004 *rcx = 0;
7005 *rdx = 0;
7006 }
7007
7008
7009 if (vmm_softc->mode == VMM_MODE_SVM ||
15
Assuming field 'mode' is equal to VMM_MODE_SVM
7010 vmm_softc->mode == VMM_MODE_RVI) {
7011 /*
7012 * update %rax. the rest of the registers get updated in
7013 * svm_enter_guest
7014 */
7015 vmcb->v_rax = *rax;
16
Dereference of undefined pointer value
7016 }
7017
7018 return (0);
7019}
7020
7021/*
7022 * vcpu_run_svm
7023 *
7024 * SVM main loop used to run a VCPU.
7025 *
7026 * Parameters:
7027 * vcpu: The VCPU to run
7028 * vrp: run parameters
7029 *
7030 * Return values:
7031 * 0: The run loop exited and no help is needed from vmd
7032 * EAGAIN: The run loop exited and help from vmd is needed
7033 * EINVAL: an error occurred
7034 */
7035int
7036vcpu_run_svm(struct vcpu *vcpu, struct vm_run_params *vrp)
7037{
7038 int ret = 0;
7039 struct region_descriptor gdt;
7040 struct cpu_info *ci = NULL((void *)0);
7041 uint64_t exit_reason;
7042 struct schedstate_percpu *spc;
7043 uint16_t irq;
7044 struct vmcb *vmcb = (struct vmcb *)vcpu->vc_control_va;
7045
7046 irq = vrp->vrp_irq;
7047
7048 /*
7049 * If we are returning from userspace (vmd) because we exited
7050 * last time, fix up any needed vcpu state first. Which state
7051 * needs to be fixed up depends on what vmd populated in the
7052 * exit data structure.
7053 */
7054 if (vrp->vrp_continue) {
7055 switch (vcpu->vc_gueststate.vg_exit_reason) {
7056 case SVM_VMEXIT_IOIO0x7B:
7057 if (vcpu->vc_exit.vei.vei_dir == VEI_DIR_IN) {
7058 vcpu->vc_gueststate.vg_rax =
7059 vcpu->vc_exit.vei.vei_data;
7060 vmcb->v_rax = vcpu->vc_gueststate.vg_rax;
7061 }
7062 }
7063 }
7064
7065 while (ret == 0) {
7066 vmm_update_pvclock(vcpu);
7067 if (ci != curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
) {
7068 /*
7069 * We are launching for the first time, or we are
7070 * resuming from a different pcpu, so we need to
7071 * reset certain pcpu-specific values.
7072 */
7073 ci = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
;
7074 setregion(&gdt, ci->ci_gdt, GDT_SIZE((6 << 3) + (1 << 4)) - 1);
7075
7076 if (ci != vcpu->vc_last_pcpu) {
7077 /*
7078 * Flush TLB by guest ASID if feature
7079 * available, flush entire TLB if not.
7080 */
7081 if (ci->ci_vmm_cap.vcc_svm.svm_flush_by_asid)
7082 vmcb->v_tlb_control =
7083 SVM_TLB_CONTROL_FLUSH_ASID3;
7084 else
7085 vmcb->v_tlb_control =
7086 SVM_TLB_CONTROL_FLUSH_ALL1;
7087
7088 svm_set_dirty(vcpu, SVM_CLEANBITS_ALL((1 << 0) | (1 << 1) | (1 << 2) | (1 <<
3) | (1 << 4) | (1 << 5) | (1 << 6) | (1 <<
7) | (1 << 8) | (1 << 9) | (1 << 10) | (1 <<
11) )
);
7089 }
7090
7091 vcpu->vc_last_pcpu = ci;
7092
7093 if (gdt.rd_base == 0) {
7094 ret = EINVAL22;
7095 break;
7096 }
7097 }
7098
7099 /* Handle vmd(8) injected interrupts */
7100 /* Is there an interrupt pending injection? */
7101 if (irq != 0xFFFF && vcpu->vc_irqready) {
7102 vmcb->v_eventinj = (irq & 0xFF) | (1 << 31);
7103 irq = 0xFFFF;
7104 }
7105
7106 /* Inject event if present */
7107 if (vcpu->vc_event != 0) {
7108 DPRINTF("%s: inject event %d\n", __func__,
7109 vcpu->vc_event);
7110 vmcb->v_eventinj = 0;
7111 /* Set the "Event Valid" flag for certain vectors */
7112 switch (vcpu->vc_event & 0xFF) {
7113 case VMM_EX_DF8:
7114 case VMM_EX_TS10:
7115 case VMM_EX_NP11:
7116 case VMM_EX_SS12:
7117 case VMM_EX_GP13:
7118 case VMM_EX_PF14:
7119 case VMM_EX_AC17:
7120 vmcb->v_eventinj |= (1ULL << 11);
7121 }
7122 vmcb->v_eventinj |= (vcpu->vc_event) | (1 << 31);
7123 vmcb->v_eventinj |= (3ULL << 8); /* Exception */
7124 vcpu->vc_event = 0;
7125 }
7126
7127 TRACEPOINT(vmm, guest_enter, vcpu, vrp)do { extern struct dt_probe (dt_static_vmm_guest_enter); struct
dt_probe *dtp = &(dt_static_vmm_guest_enter); struct dt_provider
*dtpv = dtp->dtp_prov; if (__builtin_expect(((dt_tracing)
!= 0), 0) && __builtin_expect(((dtp->dtp_recording
) != 0), 0)) { dtpv->dtpv_enter(dtpv, dtp, vcpu, vrp); } }
while (0)
;
7128
7129 /* Start / resume the VCPU */
7130 /* Disable interrupts and save the current host FPU state. */
7131 clgi();
7132 if ((ret = vmm_fpurestore(vcpu))) {
7133 stgi();
7134 break;
7135 }
7136
7137 KASSERT(vmcb->v_intercept1 & SVM_INTERCEPT_INTR)((vmcb->v_intercept1 & (1UL << 0)) ? (void)0 : __assert
("diagnostic ", "/usr/src/sys/arch/amd64/amd64/vmm.c", 7137, "vmcb->v_intercept1 & SVM_INTERCEPT_INTR"
))
;
7138 wrmsr(MSR_AMD_VM_HSAVE_PA0xc0010117, vcpu->vc_svm_hsa_pa);
7139
7140 ret = svm_enter_guest(vcpu->vc_control_pa,
7141 &vcpu->vc_gueststate, &gdt);
7142
7143 /*
7144 * On exit, interrupts are disabled, and we are running with
7145 * the guest FPU state still possibly on the CPU. Save the FPU
7146 * state before re-enabling interrupts.
7147 */
7148 vmm_fpusave(vcpu);
7149
7150 /*
7151 * Enable interrupts now. Note that if the exit was due to INTR
7152 * (external interrupt), the interrupt will be processed now.
7153 */
7154 stgi();
7155
7156 vcpu->vc_gueststate.vg_rip = vmcb->v_rip;
7157 vmcb->v_tlb_control = SVM_TLB_CONTROL_FLUSH_NONE0;
7158 svm_set_clean(vcpu, SVM_CLEANBITS_ALL((1 << 0) | (1 << 1) | (1 << 2) | (1 <<
3) | (1 << 4) | (1 << 5) | (1 << 6) | (1 <<
7) | (1 << 8) | (1 << 9) | (1 << 10) | (1 <<
11) )
);
7159
7160 /* Record the exit reason on successful exit */
7161 if (ret == 0) {
7162 exit_reason = vmcb->v_exitcode;
7163 vcpu->vc_gueststate.vg_exit_reason = exit_reason;
7164 }
7165
7166 TRACEPOINT(vmm, guest_exit, vcpu, vrp, exit_reason)do { extern struct dt_probe (dt_static_vmm_guest_exit); struct
dt_probe *dtp = &(dt_static_vmm_guest_exit); struct dt_provider
*dtpv = dtp->dtp_prov; if (__builtin_expect(((dt_tracing)
!= 0), 0) && __builtin_expect(((dtp->dtp_recording
) != 0), 0)) { dtpv->dtpv_enter(dtpv, dtp, vcpu, vrp, exit_reason
); } } while (0)
;
7167
7168 /* If we exited successfully ... */
7169 if (ret == 0) {
7170 vcpu->vc_gueststate.vg_rflags = vmcb->v_rflags;
7171
7172 /*
7173 * Handle the exit. This will alter "ret" to EAGAIN if
7174 * the exit handler determines help from vmd is needed.
7175 */
7176 ret = svm_handle_exit(vcpu);
7177
7178 if (vcpu->vc_gueststate.vg_rflags & PSL_I0x00000200)
7179 vcpu->vc_irqready = 1;
7180 else
7181 vcpu->vc_irqready = 0;
7182
7183 /*
7184 * If not ready for interrupts, but interrupts pending,
7185 * enable interrupt window exiting.
7186 */
7187 if (vcpu->vc_irqready == 0 && vcpu->vc_intr) {
7188 vmcb->v_intercept1 |= SVM_INTERCEPT_VINTR(1UL << 4);
7189 vmcb->v_irq = 1;
7190 vmcb->v_intr_misc = SVM_INTR_MISC_V_IGN_TPR0x10;
7191 vmcb->v_intr_vector = 0;
7192 svm_set_dirty(vcpu, SVM_CLEANBITS_TPR(1 << 3) |
7193 SVM_CLEANBITS_I(1 << 0));
7194 }
7195
7196 /*
7197 * Exit to vmd if we are terminating, failed to enter,
7198 * or need help (device I/O)
7199 */
7200 if (ret || vcpu_must_stop(vcpu))
7201 break;
7202
7203 if (vcpu->vc_intr && vcpu->vc_irqready) {
7204 ret = EAGAIN35;
7205 break;
7206 }
7207
7208 /* Check if we should yield - don't hog the cpu */
7209 spc = &ci->ci_schedstate;
7210 if (spc->spc_schedflags & SPCF_SHOULDYIELD0x0002)
7211 break;
7212 }
7213 }
7214
7215 /*
7216 * We are heading back to userspace (vmd), either because we need help
7217 * handling an exit, a guest interrupt is pending, or we failed in some
7218 * way to enter the guest. Copy the guest registers to the exit struct
7219 * and return to vmd.
7220 */
7221 if (vcpu_readregs_svm(vcpu, VM_RWREGS_ALL(0x1 | 0x2 | 0x4 | 0x8 | 0x10), &vcpu->vc_exit.vrs))
7222 ret = EINVAL22;
7223
7224 return (ret);
7225}
7226
7227/*
7228 * vmm_alloc_vpid
7229 *
7230 * Sets the memory location pointed to by "vpid" to the next available VPID
7231 * or ASID.
7232 *
7233 * Parameters:
7234 * vpid: Pointer to location to receive the next VPID/ASID
7235 *
7236 * Return Values:
7237 * 0: The operation completed successfully
7238 * ENOMEM: No VPIDs/ASIDs were available. Content of 'vpid' is unchanged.
7239 */
7240int
7241vmm_alloc_vpid(uint16_t *vpid)
7242{
7243 uint16_t i;
7244 uint8_t idx, bit;
7245 struct vmm_softc *sc = vmm_softc;
7246
7247 rw_enter_write(&vmm_softc->vpid_lock);
7248 for (i = 1; i <= sc->max_vpid; i++) {
7249 idx = i / 8;
7250 bit = i - (idx * 8);
7251
7252 if (!(sc->vpids[idx] & (1 << bit))) {
7253 sc->vpids[idx] |= (1 << bit);
7254 *vpid = i;
7255 DPRINTF("%s: allocated VPID/ASID %d\n", __func__,
7256 i);
7257 rw_exit_write(&vmm_softc->vpid_lock);
7258 return 0;
7259 }
7260 }
7261
7262 printf("%s: no available %ss\n", __func__,
7263 (sc->mode == VMM_MODE_EPT || sc->mode == VMM_MODE_VMX) ? "VPID" :
7264 "ASID");
7265
7266 rw_exit_write(&vmm_softc->vpid_lock);
7267 return ENOMEM12;
7268}
7269
7270/*
7271 * vmm_free_vpid
7272 *
7273 * Frees the VPID/ASID id supplied in "vpid".
7274 *
7275 * Parameters:
7276 * vpid: VPID/ASID to free.
7277 */
7278void
7279vmm_free_vpid(uint16_t vpid)
7280{
7281 uint8_t idx, bit;
7282 struct vmm_softc *sc = vmm_softc;
7283
7284 rw_enter_write(&vmm_softc->vpid_lock);
7285 idx = vpid / 8;
7286 bit = vpid - (idx * 8);
7287 sc->vpids[idx] &= ~(1 << bit);
7288
7289 DPRINTF("%s: freed VPID/ASID %d\n", __func__, vpid);
7290 rw_exit_write(&vmm_softc->vpid_lock);
7291}
7292
7293
7294/* vmm_gpa_is_valid
7295 *
7296 * Check if the given gpa is within guest memory space.
7297 *
7298 * Parameters:
7299 * vcpu: The virtual cpu we are running on.
7300 * gpa: The address to check.
7301 * obj_size: The size of the object assigned to gpa
7302 *
7303 * Return values:
7304 * 1: gpa is within the memory ranges allocated for the vcpu
7305 * 0: otherwise
7306 */
7307int
7308vmm_gpa_is_valid(struct vcpu *vcpu, paddr_t gpa, size_t obj_size)
7309{
7310 struct vm *vm = vcpu->vc_parent;
7311 struct vm_mem_range *vmr;
7312 size_t i;
7313
7314 for (i = 0; i < vm->vm_nmemranges; ++i) {
7315 vmr = &vm->vm_memranges[i];
7316 if (vmr->vmr_size >= obj_size &&
7317 vmr->vmr_gpa <= gpa &&
7318 gpa < (vmr->vmr_gpa + vmr->vmr_size - obj_size)) {
7319 return 1;
7320 }
7321 }
7322 return 0;
7323}
7324
7325void
7326vmm_init_pvclock(struct vcpu *vcpu, paddr_t gpa)
7327{
7328 paddr_t pvclock_gpa = gpa & 0xFFFFFFFFFFFFFFF0;
7329 if (!vmm_gpa_is_valid(vcpu, pvclock_gpa,
7330 sizeof(struct pvclock_time_info))) {
7331 /* XXX: Kill guest? */
7332 vmm_inject_gp(vcpu);
7333 return;
7334 }
7335
7336 /* XXX: handle case when this struct goes over page boundaries */
7337 if ((pvclock_gpa & PAGE_MASK((1 << 12) - 1)) + sizeof(struct pvclock_time_info) >
7338 PAGE_SIZE(1 << 12)) {
7339 vmm_inject_gp(vcpu);
7340 return;
7341 }
7342
7343 vcpu->vc_pvclock_system_gpa = gpa;
7344 if (tsc_frequency > 0)
7345 vcpu->vc_pvclock_system_tsc_mul =
7346 (int) ((1000000000L << 20) / tsc_frequency);
7347 else
7348 vcpu->vc_pvclock_system_tsc_mul = 0;
7349 vmm_update_pvclock(vcpu);
7350}
7351
7352int
7353vmm_update_pvclock(struct vcpu *vcpu)
7354{
7355 struct pvclock_time_info *pvclock_ti;
7356 struct timespec tv;
7357 struct vm *vm = vcpu->vc_parent;
7358 paddr_t pvclock_hpa, pvclock_gpa;
7359
7360 if (vcpu->vc_pvclock_system_gpa & PVCLOCK_SYSTEM_TIME_ENABLE0x01) {
7361 pvclock_gpa = vcpu->vc_pvclock_system_gpa & 0xFFFFFFFFFFFFFFF0;
7362 if (!pmap_extract(vm->vm_map->pmap, pvclock_gpa, &pvclock_hpa))
7363 return (EINVAL22);
7364 pvclock_ti = (void*) PMAP_DIRECT_MAP(pvclock_hpa)((vaddr_t)(((((511 - 4) * (1ULL << 39))) | 0xffff000000000000
)) + (pvclock_hpa))
;
7365
7366 /* START next cycle (must be odd) */
7367 pvclock_ti->ti_version =
7368 (++vcpu->vc_pvclock_version << 1) | 0x1;
7369
7370 pvclock_ti->ti_tsc_timestamp = rdtsc();
7371 nanotime(&tv);
7372 pvclock_ti->ti_system_time =
7373 tv.tv_sec * 1000000000L + tv.tv_nsec;
7374 pvclock_ti->ti_tsc_shift = 12;
7375 pvclock_ti->ti_tsc_to_system_mul =
7376 vcpu->vc_pvclock_system_tsc_mul;
7377 pvclock_ti->ti_flags = PVCLOCK_FLAG_TSC_STABLE0x01;
7378
7379 /* END (must be even) */
7380 pvclock_ti->ti_version &= ~0x1;
7381 }
7382 return (0);
7383}
7384
7385int
7386vmm_pat_is_valid(uint64_t pat)
7387{
7388 int i;
7389 uint8_t *byte = (uint8_t *)&pat;
7390
7391 /* Intel SDM Vol 3A, 11.12.2: 0x02, 0x03, and 0x08-0xFF result in #GP */
7392 for (i = 0; i < 8; i++) {
7393 if (byte[i] == 0x02 || byte[i] == 0x03 || byte[i] > 0x07) {
7394 DPRINTF("%s: invalid pat %llx\n", __func__, pat);
7395 return 0;
7396 }
7397 }
7398
7399 return 1;
7400}
7401
7402/*
7403 * vmx_exit_reason_decode
7404 *
7405 * Returns a human readable string describing exit type 'code'
7406 */
7407const char *
7408vmx_exit_reason_decode(uint32_t code)
7409{
7410 switch (code) {
7411 case VMX_EXIT_NMI0: return "NMI";
7412 case VMX_EXIT_EXTINT1: return "External interrupt";
7413 case VMX_EXIT_TRIPLE_FAULT2: return "Triple fault";
7414 case VMX_EXIT_INIT3: return "INIT signal";
7415 case VMX_EXIT_SIPI4: return "SIPI signal";
7416 case VMX_EXIT_IO_SMI5: return "I/O SMI";
7417 case VMX_EXIT_OTHER_SMI6: return "other SMI";
7418 case VMX_EXIT_INT_WINDOW7: return "Interrupt window";
7419 case VMX_EXIT_NMI_WINDOW8: return "NMI window";
7420 case VMX_EXIT_TASK_SWITCH9: return "Task switch";
7421 case VMX_EXIT_CPUID10: return "CPUID instruction";
7422 case VMX_EXIT_GETSEC11: return "GETSEC instruction";
7423 case VMX_EXIT_HLT12: return "HLT instruction";
7424 case VMX_EXIT_INVD13: return "INVD instruction";
7425 case VMX_EXIT_INVLPG14: return "INVLPG instruction";
7426 case VMX_EXIT_RDPMC15: return "RDPMC instruction";
7427 case VMX_EXIT_RDTSC16: return "RDTSC instruction";
7428 case VMX_EXIT_RSM17: return "RSM instruction";
7429 case VMX_EXIT_VMCALL18: return "VMCALL instruction";
7430 case VMX_EXIT_VMCLEAR19: return "VMCLEAR instruction";
7431 case VMX_EXIT_VMLAUNCH20: return "VMLAUNCH instruction";
7432 case VMX_EXIT_VMPTRLD21: return "VMPTRLD instruction";
7433 case VMX_EXIT_VMPTRST22: return "VMPTRST instruction";
7434 case VMX_EXIT_VMREAD23: return "VMREAD instruction";
7435 case VMX_EXIT_VMRESUME24: return "VMRESUME instruction";
7436 case VMX_EXIT_VMWRITE25: return "VMWRITE instruction";
7437 case VMX_EXIT_VMXOFF26: return "VMXOFF instruction";
7438 case VMX_EXIT_VMXON27: return "VMXON instruction";
7439 case VMX_EXIT_CR_ACCESS28: return "CR access";
7440 case VMX_EXIT_MOV_DR29: return "MOV DR instruction";
7441 case VMX_EXIT_IO30: return "I/O instruction";
7442 case VMX_EXIT_RDMSR31: return "RDMSR instruction";
7443 case VMX_EXIT_WRMSR32: return "WRMSR instruction";
7444 case VMX_EXIT_ENTRY_FAILED_GUEST_STATE33: return "guest state invalid";
7445 case VMX_EXIT_ENTRY_FAILED_MSR_LOAD34: return "MSR load failed";
7446 case VMX_EXIT_MWAIT36: return "MWAIT instruction";
7447 case VMX_EXIT_MTF37: return "monitor trap flag";
7448 case VMX_EXIT_MONITOR39: return "MONITOR instruction";
7449 case VMX_EXIT_PAUSE40: return "PAUSE instruction";
7450 case VMX_EXIT_ENTRY_FAILED_MCE41: return "MCE during entry";
7451 case VMX_EXIT_TPR_BELOW_THRESHOLD43: return "TPR below threshold";
7452 case VMX_EXIT_APIC_ACCESS44: return "APIC access";
7453 case VMX_EXIT_VIRTUALIZED_EOI45: return "virtualized EOI";
7454 case VMX_EXIT_GDTR_IDTR46: return "GDTR/IDTR access";
7455 case VMX_EXIT_LDTR_TR47: return "LDTR/TR access";
7456 case VMX_EXIT_EPT_VIOLATION48: return "EPT violation";
7457 case VMX_EXIT_EPT_MISCONFIGURATION49: return "EPT misconfiguration";
7458 case VMX_EXIT_INVEPT50: return "INVEPT instruction";
7459 case VMX_EXIT_RDTSCP51: return "RDTSCP instruction";
7460 case VMX_EXIT_VMX_PREEMPTION_TIMER_EXPIRED52:
7461 return "preemption timer expired";
7462 case VMX_EXIT_INVVPID53: return "INVVPID instruction";
7463 case VMX_EXIT_WBINVD54: return "WBINVD instruction";
7464 case VMX_EXIT_XSETBV55: return "XSETBV instruction";
7465 case VMX_EXIT_APIC_WRITE56: return "APIC write";
7466 case VMX_EXIT_RDRAND57: return "RDRAND instruction";
7467 case VMX_EXIT_INVPCID58: return "INVPCID instruction";
7468 case VMX_EXIT_VMFUNC59: return "VMFUNC instruction";
7469 case VMX_EXIT_RDSEED61: return "RDSEED instruction";
7470 case VMX_EXIT_XSAVES63: return "XSAVES instruction";
7471 case VMX_EXIT_XRSTORS64: return "XRSTORS instruction";
7472 default: return "unknown";
7473 }
7474}
7475
7476/*
7477 * svm_exit_reason_decode
7478 *
7479 * Returns a human readable string describing exit type 'code'
7480 */
7481const char *
7482svm_exit_reason_decode(uint32_t code)
7483{
7484 switch (code) {
7485 case SVM_VMEXIT_CR0_READ0x00: return "CR0 read"; /* 0x00 */
7486 case SVM_VMEXIT_CR1_READ0x01: return "CR1 read"; /* 0x01 */
7487 case SVM_VMEXIT_CR2_READ0x02: return "CR2 read"; /* 0x02 */
7488 case SVM_VMEXIT_CR3_READ0x03: return "CR3 read"; /* 0x03 */
7489 case SVM_VMEXIT_CR4_READ0x04: return "CR4 read"; /* 0x04 */
7490 case SVM_VMEXIT_CR5_READ0x05: return "CR5 read"; /* 0x05 */
7491 case SVM_VMEXIT_CR6_READ0x06: return "CR6 read"; /* 0x06 */
7492 case SVM_VMEXIT_CR7_READ0x07: return "CR7 read"; /* 0x07 */
7493 case SVM_VMEXIT_CR8_READ0x08: return "CR8 read"; /* 0x08 */
7494 case SVM_VMEXIT_CR9_READ0x09: return "CR9 read"; /* 0x09 */
7495 case SVM_VMEXIT_CR10_READ0x0A: return "CR10 read"; /* 0x0A */
7496 case SVM_VMEXIT_CR11_READ0x0B: return "CR11 read"; /* 0x0B */
7497 case SVM_VMEXIT_CR12_READ0x0C: return "CR12 read"; /* 0x0C */
7498 case SVM_VMEXIT_CR13_READ0x0D: return "CR13 read"; /* 0x0D */
7499 case SVM_VMEXIT_CR14_READ0x0E: return "CR14 read"; /* 0x0E */
7500 case SVM_VMEXIT_CR15_READ0x0F: return "CR15 read"; /* 0x0F */
7501 case SVM_VMEXIT_CR0_WRITE0x10: return "CR0 write"; /* 0x10 */
7502 case SVM_VMEXIT_CR1_WRITE0x11: return "CR1 write"; /* 0x11 */
7503 case SVM_VMEXIT_CR2_WRITE0x12: return "CR2 write"; /* 0x12 */
7504 case SVM_VMEXIT_CR3_WRITE0x13: return "CR3 write"; /* 0x13 */
7505 case SVM_VMEXIT_CR4_WRITE0x14: return "CR4 write"; /* 0x14 */
7506 case SVM_VMEXIT_CR5_WRITE0x15: return "CR5 write"; /* 0x15 */
7507 case SVM_VMEXIT_CR6_WRITE0x16: return "CR6 write"; /* 0x16 */
7508 case SVM_VMEXIT_CR7_WRITE0x17: return "CR7 write"; /* 0x17 */
7509 case SVM_VMEXIT_CR8_WRITE0x18: return "CR8 write"; /* 0x18 */
7510 case SVM_VMEXIT_CR9_WRITE0x19: return "CR9 write"; /* 0x19 */
7511 case SVM_VMEXIT_CR10_WRITE0x1A: return "CR10 write"; /* 0x1A */
7512 case SVM_VMEXIT_CR11_WRITE0x1B: return "CR11 write"; /* 0x1B */
7513 case SVM_VMEXIT_CR12_WRITE0x1C: return "CR12 write"; /* 0x1C */
7514 case SVM_VMEXIT_CR13_WRITE0x1D: return "CR13 write"; /* 0x1D */
7515 case SVM_VMEXIT_CR14_WRITE0x1E: return "CR14 write"; /* 0x1E */
7516 case SVM_VMEXIT_CR15_WRITE0x1F: return "CR15 write"; /* 0x1F */
7517 case SVM_VMEXIT_DR0_READ0x20: return "DR0 read"; /* 0x20 */
7518 case SVM_VMEXIT_DR1_READ0x21: return "DR1 read"; /* 0x21 */
7519 case SVM_VMEXIT_DR2_READ0x22: return "DR2 read"; /* 0x22 */
7520 case SVM_VMEXIT_DR3_READ0x23: return "DR3 read"; /* 0x23 */
7521 case SVM_VMEXIT_DR4_READ0x24: return "DR4 read"; /* 0x24 */
7522 case SVM_VMEXIT_DR5_READ0x25: return "DR5 read"; /* 0x25 */
7523 case SVM_VMEXIT_DR6_READ0x26: return "DR6 read"; /* 0x26 */
7524 case SVM_VMEXIT_DR7_READ0x27: return "DR7 read"; /* 0x27 */
7525 case SVM_VMEXIT_DR8_READ0x28: return "DR8 read"; /* 0x28 */
7526 case SVM_VMEXIT_DR9_READ0x29: return "DR9 read"; /* 0x29 */
7527 case SVM_VMEXIT_DR10_READ0x2A: return "DR10 read"; /* 0x2A */
7528 case SVM_VMEXIT_DR11_READ0x2B: return "DR11 read"; /* 0x2B */
7529 case SVM_VMEXIT_DR12_READ0x2C: return "DR12 read"; /* 0x2C */
7530 case SVM_VMEXIT_DR13_READ0x2D: return "DR13 read"; /* 0x2D */
7531 case SVM_VMEXIT_DR14_READ0x2E: return "DR14 read"; /* 0x2E */
7532 case SVM_VMEXIT_DR15_READ0x2F: return "DR15 read"; /* 0x2F */
7533 case SVM_VMEXIT_DR0_WRITE0x30: return "DR0 write"; /* 0x30 */
7534 case SVM_VMEXIT_DR1_WRITE0x31: return "DR1 write"; /* 0x31 */
7535 case SVM_VMEXIT_DR2_WRITE0x32: return "DR2 write"; /* 0x32 */
7536 case SVM_VMEXIT_DR3_WRITE0x33: return "DR3 write"; /* 0x33 */
7537 case SVM_VMEXIT_DR4_WRITE0x34: return "DR4 write"; /* 0x34 */
7538 case SVM_VMEXIT_DR5_WRITE0x35: return "DR5 write"; /* 0x35 */
7539 case SVM_VMEXIT_DR6_WRITE0x36: return "DR6 write"; /* 0x36 */
7540 case SVM_VMEXIT_DR7_WRITE0x37: return "DR7 write"; /* 0x37 */
7541 case SVM_VMEXIT_DR8_WRITE0x38: return "DR8 write"; /* 0x38 */
7542 case SVM_VMEXIT_DR9_WRITE0x39: return "DR9 write"; /* 0x39 */
7543 case SVM_VMEXIT_DR10_WRITE0x3A: return "DR10 write"; /* 0x3A */
7544 case SVM_VMEXIT_DR11_WRITE0x3B: return "DR11 write"; /* 0x3B */
7545 case SVM_VMEXIT_DR12_WRITE0x3C: return "DR12 write"; /* 0x3C */
7546 case SVM_VMEXIT_DR13_WRITE0x3D: return "DR13 write"; /* 0x3D */
7547 case SVM_VMEXIT_DR14_WRITE0x3E: return "DR14 write"; /* 0x3E */
7548 case SVM_VMEXIT_DR15_WRITE0x3F: return "DR15 write"; /* 0x3F */
7549 case SVM_VMEXIT_EXCP00x40: return "Exception 0x00"; /* 0x40 */
7550 case SVM_VMEXIT_EXCP10x41: return "Exception 0x01"; /* 0x41 */
7551 case SVM_VMEXIT_EXCP20x42: return "Exception 0x02"; /* 0x42 */
7552 case SVM_VMEXIT_EXCP30x43: return "Exception 0x03"; /* 0x43 */
7553 case SVM_VMEXIT_EXCP40x44: return "Exception 0x04"; /* 0x44 */
7554 case SVM_VMEXIT_EXCP50x45: return "Exception 0x05"; /* 0x45 */
7555 case SVM_VMEXIT_EXCP60x46: return "Exception 0x06"; /* 0x46 */
7556 case SVM_VMEXIT_EXCP70x47: return "Exception 0x07"; /* 0x47 */
7557 case SVM_VMEXIT_EXCP80x48: return "Exception 0x08"; /* 0x48 */
7558 case SVM_VMEXIT_EXCP90x49: return "Exception 0x09"; /* 0x49 */
7559 case SVM_VMEXIT_EXCP100x4A: return "Exception 0x0A"; /* 0x4A */
7560 case SVM_VMEXIT_EXCP110x4B: return "Exception 0x0B"; /* 0x4B */
7561 case SVM_VMEXIT_EXCP120x4C: return "Exception 0x0C"; /* 0x4C */
7562 case SVM_VMEXIT_EXCP130x4D: return "Exception 0x0D"; /* 0x4D */
7563 case SVM_VMEXIT_EXCP140x4E: return "Exception 0x0E"; /* 0x4E */
7564 case SVM_VMEXIT_EXCP150x4F: return "Exception 0x0F"; /* 0x4F */
7565 case SVM_VMEXIT_EXCP160x50: return "Exception 0x10"; /* 0x50 */
7566 case SVM_VMEXIT_EXCP170x51: return "Exception 0x11"; /* 0x51 */
7567 case SVM_VMEXIT_EXCP180x52: return "Exception 0x12"; /* 0x52 */
7568 case SVM_VMEXIT_EXCP190x53: return "Exception 0x13"; /* 0x53 */
7569 case SVM_VMEXIT_EXCP200x54: return "Exception 0x14"; /* 0x54 */
7570 case SVM_VMEXIT_EXCP210x55: return "Exception 0x15"; /* 0x55 */
7571 case SVM_VMEXIT_EXCP220x56: return "Exception 0x16"; /* 0x56 */
7572 case SVM_VMEXIT_EXCP230x57: return "Exception 0x17"; /* 0x57 */
7573 case SVM_VMEXIT_EXCP240x58: return "Exception 0x18"; /* 0x58 */
7574 case SVM_VMEXIT_EXCP250x59: return "Exception 0x19"; /* 0x59 */
7575 case SVM_VMEXIT_EXCP260x5A: return "Exception 0x1A"; /* 0x5A */
7576 case SVM_VMEXIT_EXCP270x5B: return "Exception 0x1B"; /* 0x5B */
7577 case SVM_VMEXIT_EXCP280x5C: return "Exception 0x1C"; /* 0x5C */
7578 case SVM_VMEXIT_EXCP290x5D: return "Exception 0x1D"; /* 0x5D */
7579 case SVM_VMEXIT_EXCP300x5E: return "Exception 0x1E"; /* 0x5E */
7580 case SVM_VMEXIT_EXCP310x5F: return "Exception 0x1F"; /* 0x5F */
7581 case SVM_VMEXIT_INTR0x60: return "External interrupt"; /* 0x60 */
7582 case SVM_VMEXIT_NMI0x61: return "NMI"; /* 0x61 */
7583 case SVM_VMEXIT_SMI0x62: return "SMI"; /* 0x62 */
7584 case SVM_VMEXIT_INIT0x63: return "INIT"; /* 0x63 */
7585 case SVM_VMEXIT_VINTR0x64: return "Interrupt window"; /* 0x64 */
7586 case SVM_VMEXIT_CR0_SEL_WRITE0x65: return "Sel CR0 write"; /* 0x65 */
7587 case SVM_VMEXIT_IDTR_READ0x66: return "IDTR read"; /* 0x66 */
7588 case SVM_VMEXIT_GDTR_READ0x67: return "GDTR read"; /* 0x67 */
7589 case SVM_VMEXIT_LDTR_READ0x68: return "LDTR read"; /* 0x68 */
7590 case SVM_VMEXIT_TR_READ0x69: return "TR read"; /* 0x69 */
7591 case SVM_VMEXIT_IDTR_WRITE0x6A: return "IDTR write"; /* 0x6A */
7592 case SVM_VMEXIT_GDTR_WRITE0x6B: return "GDTR write"; /* 0x6B */
7593 case SVM_VMEXIT_LDTR_WRITE0x6C: return "LDTR write"; /* 0x6C */
7594 case SVM_VMEXIT_TR_WRITE0x6D: return "TR write"; /* 0x6D */
7595 case SVM_VMEXIT_RDTSC0x6E: return "RDTSC instruction"; /* 0x6E */
7596 case SVM_VMEXIT_RDPMC0x6F: return "RDPMC instruction"; /* 0x6F */
7597 case SVM_VMEXIT_PUSHF0x70: return "PUSHF instruction"; /* 0x70 */
7598 case SVM_VMEXIT_POPF0x71: return "POPF instruction"; /* 0x71 */
7599 case SVM_VMEXIT_CPUID0x72: return "CPUID instruction"; /* 0x72 */
7600 case SVM_VMEXIT_RSM0x73: return "RSM instruction"; /* 0x73 */
7601 case SVM_VMEXIT_IRET0x74: return "IRET instruction"; /* 0x74 */
7602 case SVM_VMEXIT_SWINT0x75: return "SWINT instruction"; /* 0x75 */
7603 case SVM_VMEXIT_INVD0x76: return "INVD instruction"; /* 0x76 */
7604 case SVM_VMEXIT_PAUSE0x77: return "PAUSE instruction"; /* 0x77 */
7605 case SVM_VMEXIT_HLT0x78: return "HLT instruction"; /* 0x78 */
7606 case SVM_VMEXIT_INVLPG0x79: return "INVLPG instruction"; /* 0x79 */
7607 case SVM_VMEXIT_INVLPGA0x7A: return "INVLPGA instruction"; /* 0x7A */
7608 case SVM_VMEXIT_IOIO0x7B: return "I/O instruction"; /* 0x7B */
7609 case SVM_VMEXIT_MSR0x7C: return "RDMSR/WRMSR instruction"; /* 0x7C */
7610 case SVM_VMEXIT_TASK_SWITCH0x7D: return "Task switch"; /* 0x7D */
7611 case SVM_VMEXIT_FERR_FREEZE0x7E: return "FERR_FREEZE"; /* 0x7E */
7612 case SVM_VMEXIT_SHUTDOWN0x7F: return "Triple fault"; /* 0x7F */
7613 case SVM_VMEXIT_VMRUN0x80: return "VMRUN instruction"; /* 0x80 */
7614 case SVM_VMEXIT_VMMCALL0x81: return "VMMCALL instruction"; /* 0x81 */
7615 case SVM_VMEXIT_VMLOAD0x82: return "VMLOAD instruction"; /* 0x82 */
7616 case SVM_VMEXIT_VMSAVE0x83: return "VMSAVE instruction"; /* 0x83 */
7617 case SVM_VMEXIT_STGI0x84: return "STGI instruction"; /* 0x84 */
7618 case SVM_VMEXIT_CLGI0x85: return "CLGI instruction"; /* 0x85 */
7619 case SVM_VMEXIT_SKINIT0x86: return "SKINIT instruction"; /* 0x86 */
7620 case SVM_VMEXIT_RDTSCP0x87: return "RDTSCP instruction"; /* 0x87 */
7621 case SVM_VMEXIT_ICEBP0x88: return "ICEBP instruction"; /* 0x88 */
7622 case SVM_VMEXIT_WBINVD0x89: return "WBINVD instruction"; /* 0x89 */
7623 case SVM_VMEXIT_MONITOR0x8A: return "MONITOR instruction"; /* 0x8A */
7624 case SVM_VMEXIT_MWAIT0x8B: return "MWAIT instruction"; /* 0x8B */
7625 case SVM_VMEXIT_MWAIT_CONDITIONAL0x8C: return "Cond MWAIT"; /* 0x8C */
7626 case SVM_VMEXIT_NPF0x400: return "NPT violation"; /* 0x400 */
7627 default: return "unknown";
7628 }
7629}
7630
7631/*
7632 * vmx_instruction_error_decode
7633 *
7634 * Returns a human readable string describing the instruction error in 'code'
7635 */
7636const char *
7637vmx_instruction_error_decode(uint32_t code)
7638{
7639 switch (code) {
7640 case 1: return "VMCALL: unsupported in VMX root";
7641 case 2: return "VMCLEAR: invalid paddr";
7642 case 3: return "VMCLEAR: VMXON pointer";
7643 case 4: return "VMLAUNCH: non-clear VMCS";
7644 case 5: return "VMRESUME: non-launched VMCS";
7645 case 6: return "VMRESUME: executed after VMXOFF";
7646 case 7: return "VM entry: invalid control field(s)";
7647 case 8: return "VM entry: invalid host state field(s)";
7648 case 9: return "VMPTRLD: invalid paddr";
7649 case 10: return "VMPTRLD: VMXON pointer";
7650 case 11: return "VMPTRLD: incorrect VMCS revid";
7651 case 12: return "VMREAD/VMWRITE: unsupported VMCS field";
7652 case 13: return "VMWRITE: RO VMCS field";
7653 case 15: return "VMXON: unsupported in VMX root";
7654 case 20: return "VMCALL: invalid VM exit control fields";
7655 case 26: return "VM entry: blocked by MOV SS";
7656 case 28: return "Invalid operand to INVEPT/INVVPID";
7657 case 0x80000021: return "VM entry: invalid guest state";
7658 case 0x80000022: return "VM entry: failure due to MSR loading";
7659 case 0x80000029: return "VM entry: machine-check event";
7660 default: return "unknown";
7661 }
7662}
7663
7664/*
7665 * vcpu_state_decode
7666 *
7667 * Returns a human readable string describing the vcpu state in 'state'.
7668 */
7669const char *
7670vcpu_state_decode(u_int state)
7671{
7672 switch (state) {
7673 case VCPU_STATE_STOPPED: return "stopped";
7674 case VCPU_STATE_RUNNING: return "running";
7675 case VCPU_STATE_REQTERM: return "requesting termination";
7676 case VCPU_STATE_TERMINATED: return "terminated";
7677 case VCPU_STATE_UNKNOWN: return "unknown";
7678 default: return "invalid";
7679 }
7680}
7681
7682#ifdef VMM_DEBUG
7683/*
7684 * dump_vcpu
7685 *
7686 * Dumps the VMX capabilities of vcpu 'vcpu'
7687 */
7688void
7689dump_vcpu(struct vcpu *vcpu)
7690{
7691 printf("vcpu @ %p\n", vcpu);
7692 printf(" parent vm @ %p\n", vcpu->vc_parent);
7693 printf(" mode: ");
7694 if (vcpu->vc_virt_mode == VMM_MODE_VMX ||
7695 vcpu->vc_virt_mode == VMM_MODE_EPT) {
7696 printf("VMX\n");
7697 printf(" pinbased ctls: 0x%llx\n",
7698 vcpu->vc_vmx_pinbased_ctls);
7699 printf(" true pinbased ctls: 0x%llx\n",
7700 vcpu->vc_vmx_true_pinbased_ctls);
7701 CTRL_DUMP(vcpu, PINBASED, EXTERNAL_INT_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "EXTERNAL_INT_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 0), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 0), 0
) ? "Yes" : "No");
;
7702 CTRL_DUMP(vcpu, PINBASED, NMI_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "NMI_EXITING" , vcpu_vmx_check_cap
(vcpu, 0x481, (1ULL << 3), 1) ? "Yes" : "No", vcpu_vmx_check_cap
(vcpu, 0x481, (1ULL << 3), 0) ? "Yes" : "No");
;
7703 CTRL_DUMP(vcpu, PINBASED, VIRTUAL_NMIS)printf(" %s: Can set:%s Can clear:%s\n", "VIRTUAL_NMIS" ,
vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 5), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 5), 0
) ? "Yes" : "No");
;
7704 CTRL_DUMP(vcpu, PINBASED, ACTIVATE_VMX_PREEMPTION_TIMER)printf(" %s: Can set:%s Can clear:%s\n", "ACTIVATE_VMX_PREEMPTION_TIMER"
, vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 6), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 6), 0
) ? "Yes" : "No");
;
7705 CTRL_DUMP(vcpu, PINBASED, PROCESS_POSTED_INTERRUPTS)printf(" %s: Can set:%s Can clear:%s\n", "PROCESS_POSTED_INTERRUPTS"
, vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 7), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x481, (1ULL << 7), 0
) ? "Yes" : "No");
;
7706 printf(" procbased ctls: 0x%llx\n",
7707 vcpu->vc_vmx_procbased_ctls);
7708 printf(" true procbased ctls: 0x%llx\n",
7709 vcpu->vc_vmx_true_procbased_ctls);
7710 CTRL_DUMP(vcpu, PROCBASED, INTERRUPT_WINDOW_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "INTERRUPT_WINDOW_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 2), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 2), 0
) ? "Yes" : "No");
;
7711 CTRL_DUMP(vcpu, PROCBASED, USE_TSC_OFFSETTING)printf(" %s: Can set:%s Can clear:%s\n", "USE_TSC_OFFSETTING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 3), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 3), 0
) ? "Yes" : "No");
;
7712 CTRL_DUMP(vcpu, PROCBASED, HLT_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "HLT_EXITING" , vcpu_vmx_check_cap
(vcpu, 0x482, (1ULL << 7), 1) ? "Yes" : "No", vcpu_vmx_check_cap
(vcpu, 0x482, (1ULL << 7), 0) ? "Yes" : "No");
;
7713 CTRL_DUMP(vcpu, PROCBASED, INVLPG_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "INVLPG_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 9), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 9), 0
) ? "Yes" : "No");
;
7714 CTRL_DUMP(vcpu, PROCBASED, MWAIT_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "MWAIT_EXITING" ,
vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 10), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 10), 0
) ? "Yes" : "No");
;
7715 CTRL_DUMP(vcpu, PROCBASED, RDPMC_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "RDPMC_EXITING" ,
vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 11), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 11), 0
) ? "Yes" : "No");
;
7716 CTRL_DUMP(vcpu, PROCBASED, RDTSC_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "RDTSC_EXITING" ,
vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 12), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 12), 0
) ? "Yes" : "No");
;
7717 CTRL_DUMP(vcpu, PROCBASED, CR3_LOAD_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "CR3_LOAD_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 15), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 15), 0
) ? "Yes" : "No");
;
7718 CTRL_DUMP(vcpu, PROCBASED, CR3_STORE_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "CR3_STORE_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 16), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 16), 0
) ? "Yes" : "No");
;
7719 CTRL_DUMP(vcpu, PROCBASED, CR8_LOAD_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "CR8_LOAD_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 19), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 19), 0
) ? "Yes" : "No");
;
7720 CTRL_DUMP(vcpu, PROCBASED, CR8_STORE_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "CR8_STORE_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 20), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 20), 0
) ? "Yes" : "No");
;
7721 CTRL_DUMP(vcpu, PROCBASED, USE_TPR_SHADOW)printf(" %s: Can set:%s Can clear:%s\n", "USE_TPR_SHADOW"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 21), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 21), 0
) ? "Yes" : "No");
;
7722 CTRL_DUMP(vcpu, PROCBASED, NMI_WINDOW_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "NMI_WINDOW_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 22), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 22), 0
) ? "Yes" : "No");
;
7723 CTRL_DUMP(vcpu, PROCBASED, MOV_DR_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "MOV_DR_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 23), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 23), 0
) ? "Yes" : "No");
;
7724 CTRL_DUMP(vcpu, PROCBASED, UNCONDITIONAL_IO_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "UNCONDITIONAL_IO_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 24), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 24), 0
) ? "Yes" : "No");
;
7725 CTRL_DUMP(vcpu, PROCBASED, USE_IO_BITMAPS)printf(" %s: Can set:%s Can clear:%s\n", "USE_IO_BITMAPS"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 25), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 25), 0
) ? "Yes" : "No");
;
7726 CTRL_DUMP(vcpu, PROCBASED, MONITOR_TRAP_FLAG)printf(" %s: Can set:%s Can clear:%s\n", "MONITOR_TRAP_FLAG"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 27), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 27), 0
) ? "Yes" : "No");
;
7727 CTRL_DUMP(vcpu, PROCBASED, USE_MSR_BITMAPS)printf(" %s: Can set:%s Can clear:%s\n", "USE_MSR_BITMAPS"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 28), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 28), 0
) ? "Yes" : "No");
;
7728 CTRL_DUMP(vcpu, PROCBASED, MONITOR_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "MONITOR_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 29), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 29), 0
) ? "Yes" : "No");
;
7729 CTRL_DUMP(vcpu, PROCBASED, PAUSE_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "PAUSE_EXITING" ,
vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 30), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x482, (1ULL << 30), 0
) ? "Yes" : "No");
;
7730 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482,
7731 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1)) {
7732 printf(" procbased2 ctls: 0x%llx\n",
7733 vcpu->vc_vmx_procbased2_ctls);
7734 CTRL_DUMP(vcpu, PROCBASED2, VIRTUALIZE_APIC)printf(" %s: Can set:%s Can clear:%s\n", "VIRTUALIZE_APIC"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 0), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 0), 0
) ? "Yes" : "No");
;
7735 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_EPT)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_EPT" , vcpu_vmx_check_cap
(vcpu, 0x48B, (1ULL << 1), 1) ? "Yes" : "No", vcpu_vmx_check_cap
(vcpu, 0x48B, (1ULL << 1), 0) ? "Yes" : "No");
;
7736 CTRL_DUMP(vcpu, PROCBASED2, DESCRIPTOR_TABLE_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "DESCRIPTOR_TABLE_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 2), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 2), 0
) ? "Yes" : "No");
;
7737 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_RDTSCP)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_RDTSCP" ,
vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 3), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 3), 0
) ? "Yes" : "No");
;
7738 CTRL_DUMP(vcpu, PROCBASED2, VIRTUALIZE_X2APIC_MODE)printf(" %s: Can set:%s Can clear:%s\n", "VIRTUALIZE_X2APIC_MODE"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 4), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 4), 0
) ? "Yes" : "No");
;
7739 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_VPID)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_VPID" , vcpu_vmx_check_cap
(vcpu, 0x48B, (1ULL << 5), 1) ? "Yes" : "No", vcpu_vmx_check_cap
(vcpu, 0x48B, (1ULL << 5), 0) ? "Yes" : "No");
;
7740 CTRL_DUMP(vcpu, PROCBASED2, WBINVD_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "WBINVD_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 6), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 6), 0
) ? "Yes" : "No");
;
7741 CTRL_DUMP(vcpu, PROCBASED2, UNRESTRICTED_GUEST)printf(" %s: Can set:%s Can clear:%s\n", "UNRESTRICTED_GUEST"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 7), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 7), 0
) ? "Yes" : "No");
;
7742 CTRL_DUMP(vcpu, PROCBASED2,printf(" %s: Can set:%s Can clear:%s\n", "APIC_REGISTER_VIRTUALIZATION"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 8), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 8), 0
) ? "Yes" : "No");
7743 APIC_REGISTER_VIRTUALIZATION)printf(" %s: Can set:%s Can clear:%s\n", "APIC_REGISTER_VIRTUALIZATION"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 8), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 8), 0
) ? "Yes" : "No");
;
7744 CTRL_DUMP(vcpu, PROCBASED2,printf(" %s: Can set:%s Can clear:%s\n", "VIRTUAL_INTERRUPT_DELIVERY"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 9), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 9), 0
) ? "Yes" : "No");
7745 VIRTUAL_INTERRUPT_DELIVERY)printf(" %s: Can set:%s Can clear:%s\n", "VIRTUAL_INTERRUPT_DELIVERY"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 9), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 9), 0
) ? "Yes" : "No");
;
7746 CTRL_DUMP(vcpu, PROCBASED2, PAUSE_LOOP_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "PAUSE_LOOP_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 10), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 10), 0
) ? "Yes" : "No");
;
7747 CTRL_DUMP(vcpu, PROCBASED2, RDRAND_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "RDRAND_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 11), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 11), 0
) ? "Yes" : "No");
;
7748 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_INVPCID)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_INVPCID"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 12), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 12), 0
) ? "Yes" : "No");
;
7749 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_VM_FUNCTIONS)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_VM_FUNCTIONS"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 13), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 13), 0
) ? "Yes" : "No");
;
7750 CTRL_DUMP(vcpu, PROCBASED2, VMCS_SHADOWING)printf(" %s: Can set:%s Can clear:%s\n", "VMCS_SHADOWING"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 14), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 14), 0
) ? "Yes" : "No");
;
7751 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_ENCLS_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_ENCLS_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 15), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 15), 0
) ? "Yes" : "No");
;
7752 CTRL_DUMP(vcpu, PROCBASED2, RDSEED_EXITING)printf(" %s: Can set:%s Can clear:%s\n", "RDSEED_EXITING"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 16), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 16), 0
) ? "Yes" : "No");
;
7753 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_PML)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_PML" , vcpu_vmx_check_cap
(vcpu, 0x48B, (1ULL << 17), 1) ? "Yes" : "No", vcpu_vmx_check_cap
(vcpu, 0x48B, (1ULL << 17), 0) ? "Yes" : "No");
;
7754 CTRL_DUMP(vcpu, PROCBASED2, EPT_VIOLATION_VE)printf(" %s: Can set:%s Can clear:%s\n", "EPT_VIOLATION_VE"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 18), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 18), 0
) ? "Yes" : "No");
;
7755 CTRL_DUMP(vcpu, PROCBASED2, CONCEAL_VMX_FROM_PT)printf(" %s: Can set:%s Can clear:%s\n", "CONCEAL_VMX_FROM_PT"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 19), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 19), 0
) ? "Yes" : "No");
;
7756 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_XSAVES_XRSTORS)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_XSAVES_XRSTORS"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 20), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 20), 0
) ? "Yes" : "No");
;
7757 CTRL_DUMP(vcpu, PROCBASED2, ENABLE_TSC_SCALING)printf(" %s: Can set:%s Can clear:%s\n", "ENABLE_TSC_SCALING"
, vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 25), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x48B, (1ULL << 25), 0
) ? "Yes" : "No");
;
7758 }
7759 printf(" entry ctls: 0x%llx\n",
7760 vcpu->vc_vmx_entry_ctls);
7761 printf(" true entry ctls: 0x%llx\n",
7762 vcpu->vc_vmx_true_entry_ctls);
7763 CTRL_DUMP(vcpu, ENTRY, LOAD_DEBUG_CONTROLS)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_DEBUG_CONTROLS"
, vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 2), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 2), 0
) ? "Yes" : "No");
;
7764 CTRL_DUMP(vcpu, ENTRY, IA32E_MODE_GUEST)printf(" %s: Can set:%s Can clear:%s\n", "IA32E_MODE_GUEST"
, vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 9), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 9), 0
) ? "Yes" : "No");
;
7765 CTRL_DUMP(vcpu, ENTRY, ENTRY_TO_SMM)printf(" %s: Can set:%s Can clear:%s\n", "ENTRY_TO_SMM" ,
vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 10), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 10), 0
) ? "Yes" : "No");
;
7766 CTRL_DUMP(vcpu, ENTRY, DEACTIVATE_DUAL_MONITOR_TREATMENT)printf(" %s: Can set:%s Can clear:%s\n", "DEACTIVATE_DUAL_MONITOR_TREATMENT"
, vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 11), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 11), 0
) ? "Yes" : "No");
;
7767 CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY"
, vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 13), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 13), 0
) ? "Yes" : "No");
;
7768 CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_PAT_ON_ENTRY)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_PAT_ON_ENTRY"
, vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 14), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 14), 0
) ? "Yes" : "No");
;
7769 CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_EFER_ON_ENTRY)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_EFER_ON_ENTRY"
, vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 15), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 15), 0
) ? "Yes" : "No");
;
7770 CTRL_DUMP(vcpu, ENTRY, LOAD_IA32_BNDCFGS_ON_ENTRY)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_BNDCFGS_ON_ENTRY"
, vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 16), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 16), 0
) ? "Yes" : "No");
;
7771 CTRL_DUMP(vcpu, ENTRY, CONCEAL_VM_ENTRIES_FROM_PT)printf(" %s: Can set:%s Can clear:%s\n", "CONCEAL_VM_ENTRIES_FROM_PT"
, vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 17), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x484, (1ULL << 17), 0
) ? "Yes" : "No");
;
7772 printf(" exit ctls: 0x%llx\n",
7773 vcpu->vc_vmx_exit_ctls);
7774 printf(" true exit ctls: 0x%llx\n",
7775 vcpu->vc_vmx_true_exit_ctls);
7776 CTRL_DUMP(vcpu, EXIT, SAVE_DEBUG_CONTROLS)printf(" %s: Can set:%s Can clear:%s\n", "SAVE_DEBUG_CONTROLS"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 2), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 2), 0
) ? "Yes" : "No");
;
7777 CTRL_DUMP(vcpu, EXIT, HOST_SPACE_ADDRESS_SIZE)printf(" %s: Can set:%s Can clear:%s\n", "HOST_SPACE_ADDRESS_SIZE"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 9), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 9), 0
) ? "Yes" : "No");
;
7778 CTRL_DUMP(vcpu, EXIT, LOAD_IA32_PERF_GLOBAL_CTRL_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_PERF_GLOBAL_CTRL_ON_EXIT"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 12), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 12), 0
) ? "Yes" : "No");
;
7779 CTRL_DUMP(vcpu, EXIT, ACKNOWLEDGE_INTERRUPT_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "ACKNOWLEDGE_INTERRUPT_ON_EXIT"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 15), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 15), 0
) ? "Yes" : "No");
;
7780 CTRL_DUMP(vcpu, EXIT, SAVE_IA32_PAT_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "SAVE_IA32_PAT_ON_EXIT"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 18), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 18), 0
) ? "Yes" : "No");
;
7781 CTRL_DUMP(vcpu, EXIT, LOAD_IA32_PAT_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_PAT_ON_EXIT"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 19), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 19), 0
) ? "Yes" : "No");
;
7782 CTRL_DUMP(vcpu, EXIT, SAVE_IA32_EFER_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "SAVE_IA32_EFER_ON_EXIT"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 20), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 20), 0
) ? "Yes" : "No");
;
7783 CTRL_DUMP(vcpu, EXIT, LOAD_IA32_EFER_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "LOAD_IA32_EFER_ON_EXIT"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 21), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 21), 0
) ? "Yes" : "No");
;
7784 CTRL_DUMP(vcpu, EXIT, SAVE_VMX_PREEMPTION_TIMER)printf(" %s: Can set:%s Can clear:%s\n", "SAVE_VMX_PREEMPTION_TIMER"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 22), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 22), 0
) ? "Yes" : "No");
;
7785 CTRL_DUMP(vcpu, EXIT, CLEAR_IA32_BNDCFGS_ON_EXIT)printf(" %s: Can set:%s Can clear:%s\n", "CLEAR_IA32_BNDCFGS_ON_EXIT"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 23), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 23), 0
) ? "Yes" : "No");
;
7786 CTRL_DUMP(vcpu, EXIT, CONCEAL_VM_EXITS_FROM_PT)printf(" %s: Can set:%s Can clear:%s\n", "CONCEAL_VM_EXITS_FROM_PT"
, vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 24), 1) ? "Yes"
: "No", vcpu_vmx_check_cap(vcpu, 0x483, (1ULL << 24), 0
) ? "Yes" : "No");
;
7787 }
7788}
7789
7790/*
7791 * vmx_dump_vmcs_field
7792 *
7793 * Debug function to dump the contents of a single VMCS field
7794 *
7795 * Parameters:
7796 * fieldid: VMCS Field ID
7797 * msg: string to display
7798 */
7799void
7800vmx_dump_vmcs_field(uint16_t fieldid, const char *msg)
7801{
7802 uint8_t width;
7803 uint64_t val;
7804
7805
7806 DPRINTF("%s (0x%04x): ", msg, fieldid);
7807 if (vmread(fieldid, &val))
7808 DPRINTF("???? ");
7809 else {
7810 /*
7811 * Field width encoding : bits 13:14
7812 *
7813 * 0: 16-bit
7814 * 1: 64-bit
7815 * 2: 32-bit
7816 * 3: natural width
7817 */
7818 width = (fieldid >> 13) & 0x3;
7819 switch (width) {
7820 case 0: DPRINTF("0x%04llx ", val); break;
7821 case 1:
7822 case 3: DPRINTF("0x%016llx ", val); break;
7823 case 2: DPRINTF("0x%08llx ", val);
7824 }
7825 }
7826}
7827
7828/*
7829 * vmx_dump_vmcs
7830 *
7831 * Debug function to dump the contents of the current VMCS.
7832 */
7833void
7834vmx_dump_vmcs(struct vcpu *vcpu)
7835{
7836 int has_sec, i;
7837 uint32_t cr3_tgt_ct;
7838
7839 /* XXX save and load new vmcs, restore at end */
7840
7841 DPRINTF("--CURRENT VMCS STATE--\n");
7842 printf("VMCS launched: %s\n",
7843 (vcpu->vc_vmx_vmcs_state == VMCS_LAUNCHED1) ? "Yes" : "No");
7844 DPRINTF("VMXON revision : 0x%x\n",
7845 curcpu()->ci_vmm_cap.vcc_vmx.vmx_vmxon_revision);
7846 DPRINTF("CR0 fixed0: 0x%llx\n",
7847 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed0);
7848 DPRINTF("CR0 fixed1: 0x%llx\n",
7849 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr0_fixed1);
7850 DPRINTF("CR4 fixed0: 0x%llx\n",
7851 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed0);
7852 DPRINTF("CR4 fixed1: 0x%llx\n",
7853 curcpu()->ci_vmm_cap.vcc_vmx.vmx_cr4_fixed1);
7854 DPRINTF("MSR table size: 0x%x\n",
7855 512 * (curcpu()->ci_vmm_cap.vcc_vmx.vmx_msr_table_size + 1));
7856
7857 has_sec = vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482,
7858 IA32_VMX_ACTIVATE_SECONDARY_CONTROLS(1ULL << 31), 1);
7859
7860 if (has_sec) {
7861 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7862 IA32_VMX_ENABLE_VPID(1ULL << 5), 1)) {
7863 vmx_dump_vmcs_field(VMCS_GUEST_VPID0x0000, "VPID");
7864 }
7865 }
7866
7867 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS0x481,
7868 IA32_VMX_PROCESS_POSTED_INTERRUPTS(1ULL << 7), 1)) {
7869 vmx_dump_vmcs_field(VMCS_POSTED_INT_NOTIF_VECTOR0x0002,
7870 "Posted Int Notif Vec");
7871 }
7872
7873 if (has_sec) {
7874 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7875 IA32_VMX_EPT_VIOLATION_VE(1ULL << 18), 1)) {
7876 vmx_dump_vmcs_field(VMCS_EPTP_INDEX0x0004, "EPTP idx");
7877 }
7878 }
7879
7880 DPRINTF("\n");
7881 vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_SEL0x0800, "G.ES");
7882 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_SEL0x0802, "G.CS");
7883 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_SEL0x0804, "G.SS");
7884 DPRINTF("\n");
7885 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_SEL0x0806, "G.DS");
7886 vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_SEL0x0808, "G.FS");
7887 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_SEL0x080A, "G.GS");
7888 DPRINTF("\n");
7889 vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_SEL0x080C, "LDTR");
7890 vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_SEL0x080E, "G.TR");
7891
7892 if (has_sec) {
7893 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7894 IA32_VMX_VIRTUAL_INTERRUPT_DELIVERY(1ULL << 9), 1)) {
7895 vmx_dump_vmcs_field(VMCS_GUEST_INTERRUPT_STATUS0x0810,
7896 "Int sts");
7897 }
7898
7899 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7900 IA32_VMX_ENABLE_PML(1ULL << 17), 1)) {
7901 vmx_dump_vmcs_field(VMCS_GUEST_PML_INDEX0x0812, "PML Idx");
7902 }
7903 }
7904
7905 DPRINTF("\n");
7906 vmx_dump_vmcs_field(VMCS_HOST_IA32_ES_SEL0x0C00, "H.ES");
7907 vmx_dump_vmcs_field(VMCS_HOST_IA32_CS_SEL0x0C02, "H.CS");
7908 vmx_dump_vmcs_field(VMCS_HOST_IA32_SS_SEL0x0C04, "H.SS");
7909 DPRINTF("\n");
7910 vmx_dump_vmcs_field(VMCS_HOST_IA32_DS_SEL0x0C06, "H.DS");
7911 vmx_dump_vmcs_field(VMCS_HOST_IA32_FS_SEL0x0C08, "H.FS");
7912 vmx_dump_vmcs_field(VMCS_HOST_IA32_GS_SEL0x0C0A, "H.GS");
7913 DPRINTF("\n");
7914
7915 vmx_dump_vmcs_field(VMCS_IO_BITMAP_A0x2000, "I/O Bitmap A");
7916 DPRINTF("\n");
7917 vmx_dump_vmcs_field(VMCS_IO_BITMAP_B0x2002, "I/O Bitmap B");
7918 DPRINTF("\n");
7919
7920 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482,
7921 IA32_VMX_USE_MSR_BITMAPS(1ULL << 28), 1)) {
7922 vmx_dump_vmcs_field(VMCS_MSR_BITMAP_ADDRESS0x2004, "MSR Bitmap");
7923 DPRINTF("\n");
7924 }
7925
7926 vmx_dump_vmcs_field(VMCS_EXIT_STORE_MSR_ADDRESS0x2006, "Exit Store MSRs");
7927 DPRINTF("\n");
7928 vmx_dump_vmcs_field(VMCS_EXIT_LOAD_MSR_ADDRESS0x2008, "Exit Load MSRs");
7929 DPRINTF("\n");
7930 vmx_dump_vmcs_field(VMCS_ENTRY_LOAD_MSR_ADDRESS0x200A, "Entry Load MSRs");
7931 DPRINTF("\n");
7932 vmx_dump_vmcs_field(VMCS_EXECUTIVE_VMCS_POINTER0x200C, "Exec VMCS Ptr");
7933 DPRINTF("\n");
7934
7935 if (has_sec) {
7936 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7937 IA32_VMX_ENABLE_PML(1ULL << 17), 1)) {
7938 vmx_dump_vmcs_field(VMCS_PML_ADDRESS0x200E, "PML Addr");
7939 DPRINTF("\n");
7940 }
7941 }
7942
7943 vmx_dump_vmcs_field(VMCS_TSC_OFFSET0x2010, "TSC Offset");
7944 DPRINTF("\n");
7945
7946 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482,
7947 IA32_VMX_USE_TPR_SHADOW(1ULL << 21), 1)) {
7948 vmx_dump_vmcs_field(VMCS_VIRTUAL_APIC_ADDRESS0x2012,
7949 "Virtual APIC Addr");
7950 DPRINTF("\n");
7951 }
7952
7953 if (has_sec) {
7954 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7955 IA32_VMX_VIRTUALIZE_APIC(1ULL << 0), 1)) {
7956 vmx_dump_vmcs_field(VMCS_APIC_ACCESS_ADDRESS0x2014,
7957 "APIC Access Addr");
7958 DPRINTF("\n");
7959 }
7960 }
7961
7962 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS0x481,
7963 IA32_VMX_PROCESS_POSTED_INTERRUPTS(1ULL << 7), 1)) {
7964 vmx_dump_vmcs_field(VMCS_POSTED_INTERRUPT_DESC0x2016,
7965 "Posted Int Desc Addr");
7966 DPRINTF("\n");
7967 }
7968
7969 if (has_sec) {
7970 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7971 IA32_VMX_ENABLE_VM_FUNCTIONS(1ULL << 13), 1)) {
7972 vmx_dump_vmcs_field(VMCS_VM_FUNCTION_CONTROLS0x2018,
7973 "VM Function Controls");
7974 DPRINTF("\n");
7975 }
7976
7977 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7978 IA32_VMX_ENABLE_EPT(1ULL << 1), 1)) {
7979 vmx_dump_vmcs_field(VMCS_GUEST_IA32_EPTP0x201A,
7980 "EPT Pointer");
7981 DPRINTF("\n");
7982 }
7983
7984 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
7985 IA32_VMX_VIRTUAL_INTERRUPT_DELIVERY(1ULL << 9), 1)) {
7986 vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_00x201C,
7987 "EOI Exit Bitmap 0");
7988 DPRINTF("\n");
7989 vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_10x201E,
7990 "EOI Exit Bitmap 1");
7991 DPRINTF("\n");
7992 vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_20x2020,
7993 "EOI Exit Bitmap 2");
7994 DPRINTF("\n");
7995 vmx_dump_vmcs_field(VMCS_EOI_EXIT_BITMAP_30x2022,
7996 "EOI Exit Bitmap 3");
7997 DPRINTF("\n");
7998 }
7999
8000 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
8001 IA32_VMX_ENABLE_VM_FUNCTIONS(1ULL << 13), 1)) {
8002 /* We assume all CPUs have the same VMFUNC caps */
8003 if (curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_vm_func & 0x1) {
8004 vmx_dump_vmcs_field(VMCS_EPTP_LIST_ADDRESS0x2024,
8005 "EPTP List Addr");
8006 DPRINTF("\n");
8007 }
8008 }
8009
8010 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
8011 IA32_VMX_VMCS_SHADOWING(1ULL << 14), 1)) {
8012 vmx_dump_vmcs_field(VMCS_VMREAD_BITMAP_ADDRESS0x2026,
8013 "VMREAD Bitmap Addr");
8014 DPRINTF("\n");
8015 vmx_dump_vmcs_field(VMCS_VMWRITE_BITMAP_ADDRESS0x2028,
8016 "VMWRITE Bitmap Addr");
8017 DPRINTF("\n");
8018 }
8019
8020 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
8021 IA32_VMX_EPT_VIOLATION_VE(1ULL << 18), 1)) {
8022 vmx_dump_vmcs_field(VMCS_VIRTUALIZATION_EXC_ADDRESS0x202A,
8023 "#VE Addr");
8024 DPRINTF("\n");
8025 }
8026
8027 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
8028 IA32_VMX_ENABLE_XSAVES_XRSTORS(1ULL << 20), 1)) {
8029 vmx_dump_vmcs_field(VMCS_XSS_EXITING_BITMAP0x202C,
8030 "XSS exiting bitmap addr");
8031 DPRINTF("\n");
8032 }
8033
8034 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
8035 IA32_VMX_ENABLE_ENCLS_EXITING(1ULL << 15), 1)) {
8036 vmx_dump_vmcs_field(VMCS_ENCLS_EXITING_BITMAP0x202E,
8037 "Encls exiting bitmap addr");
8038 DPRINTF("\n");
8039 }
8040
8041 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
8042 IA32_VMX_ENABLE_TSC_SCALING(1ULL << 25), 1)) {
8043 vmx_dump_vmcs_field(VMCS_TSC_MULTIPLIER0x2032,
8044 "TSC scaling factor");
8045 DPRINTF("\n");
8046 }
8047
8048 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
8049 IA32_VMX_ENABLE_EPT(1ULL << 1), 1)) {
8050 vmx_dump_vmcs_field(VMCS_GUEST_PHYSICAL_ADDRESS0x2400,
8051 "Guest PA");
8052 DPRINTF("\n");
8053 }
8054 }
8055
8056 vmx_dump_vmcs_field(VMCS_LINK_POINTER0x2800, "VMCS Link Pointer");
8057 DPRINTF("\n");
8058 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DEBUGCTL0x2802, "Guest DEBUGCTL");
8059 DPRINTF("\n");
8060
8061 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS0x484,
8062 IA32_VMX_LOAD_IA32_PAT_ON_ENTRY(1ULL << 14), 1) ||
8063 vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS0x483,
8064 IA32_VMX_SAVE_IA32_PAT_ON_EXIT(1ULL << 18), 1)) {
8065 vmx_dump_vmcs_field(VMCS_GUEST_IA32_PAT0x2804,
8066 "Guest PAT");
8067 DPRINTF("\n");
8068 }
8069
8070 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS0x484,
8071 IA32_VMX_LOAD_IA32_EFER_ON_ENTRY(1ULL << 15), 1) ||
8072 vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS0x483,
8073 IA32_VMX_SAVE_IA32_EFER_ON_EXIT(1ULL << 20), 1)) {
8074 vmx_dump_vmcs_field(VMCS_GUEST_IA32_EFER0x2806,
8075 "Guest EFER");
8076 DPRINTF("\n");
8077 }
8078
8079 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS0x484,
8080 IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_ENTRY(1ULL << 13), 1)) {
8081 vmx_dump_vmcs_field(VMCS_GUEST_IA32_PERF_GBL_CTRL0x2808,
8082 "Guest Perf Global Ctrl");
8083 DPRINTF("\n");
8084 }
8085
8086 if (has_sec) {
8087 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
8088 IA32_VMX_ENABLE_EPT(1ULL << 1), 1)) {
8089 vmx_dump_vmcs_field(VMCS_GUEST_PDPTE00x280A, "Guest PDPTE0");
8090 DPRINTF("\n");
8091 vmx_dump_vmcs_field(VMCS_GUEST_PDPTE10x280C, "Guest PDPTE1");
8092 DPRINTF("\n");
8093 vmx_dump_vmcs_field(VMCS_GUEST_PDPTE20x280E, "Guest PDPTE2");
8094 DPRINTF("\n");
8095 vmx_dump_vmcs_field(VMCS_GUEST_PDPTE30x2810, "Guest PDPTE3");
8096 DPRINTF("\n");
8097 }
8098 }
8099
8100 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_ENTRY_CTLS0x484,
8101 IA32_VMX_LOAD_IA32_BNDCFGS_ON_ENTRY(1ULL << 16), 1) ||
8102 vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS0x483,
8103 IA32_VMX_CLEAR_IA32_BNDCFGS_ON_EXIT(1ULL << 23), 1)) {
8104 vmx_dump_vmcs_field(VMCS_GUEST_IA32_BNDCFGS0x2812,
8105 "Guest BNDCFGS");
8106 DPRINTF("\n");
8107 }
8108
8109 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS0x483,
8110 IA32_VMX_LOAD_IA32_PAT_ON_EXIT(1ULL << 19), 1)) {
8111 vmx_dump_vmcs_field(VMCS_HOST_IA32_PAT0x2C00,
8112 "Host PAT");
8113 DPRINTF("\n");
8114 }
8115
8116 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS0x483,
8117 IA32_VMX_LOAD_IA32_EFER_ON_EXIT(1ULL << 21), 1)) {
8118 vmx_dump_vmcs_field(VMCS_HOST_IA32_EFER0x2C02,
8119 "Host EFER");
8120 DPRINTF("\n");
8121 }
8122
8123 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_EXIT_CTLS0x483,
8124 IA32_VMX_LOAD_IA32_PERF_GLOBAL_CTRL_ON_EXIT(1ULL << 12), 1)) {
8125 vmx_dump_vmcs_field(VMCS_HOST_IA32_PERF_GBL_CTRL0x2C04,
8126 "Host Perf Global Ctrl");
8127 DPRINTF("\n");
8128 }
8129
8130 vmx_dump_vmcs_field(VMCS_PINBASED_CTLS0x4000, "Pinbased Ctrls");
8131 vmx_dump_vmcs_field(VMCS_PROCBASED_CTLS0x4002, "Procbased Ctrls");
8132 DPRINTF("\n");
8133 vmx_dump_vmcs_field(VMCS_EXCEPTION_BITMAP0x4004, "Exception Bitmap");
8134 vmx_dump_vmcs_field(VMCS_PF_ERROR_CODE_MASK0x4006, "#PF Err Code Mask");
8135 DPRINTF("\n");
8136 vmx_dump_vmcs_field(VMCS_PF_ERROR_CODE_MATCH0x4008, "#PF Err Code Match");
8137 vmx_dump_vmcs_field(VMCS_CR3_TARGET_COUNT0x400A, "CR3 Tgt Count");
8138 DPRINTF("\n");
8139 vmx_dump_vmcs_field(VMCS_EXIT_CTLS0x400C, "Exit Ctrls");
8140 vmx_dump_vmcs_field(VMCS_EXIT_MSR_STORE_COUNT0x400E, "Exit MSR Store Ct");
8141 DPRINTF("\n");
8142 vmx_dump_vmcs_field(VMCS_EXIT_MSR_LOAD_COUNT0x4010, "Exit MSR Load Ct");
8143 vmx_dump_vmcs_field(VMCS_ENTRY_CTLS0x4012, "Entry Ctrls");
8144 DPRINTF("\n");
8145 vmx_dump_vmcs_field(VMCS_ENTRY_MSR_LOAD_COUNT0x4014, "Entry MSR Load Ct");
8146 vmx_dump_vmcs_field(VMCS_ENTRY_INTERRUPTION_INFO0x4016, "Entry Int. Info");
8147 DPRINTF("\n");
8148 vmx_dump_vmcs_field(VMCS_ENTRY_EXCEPTION_ERROR_CODE0x4018,
8149 "Entry Ex. Err Code");
8150 vmx_dump_vmcs_field(VMCS_ENTRY_INSTRUCTION_LENGTH0x401A, "Entry Insn Len");
8151 DPRINTF("\n");
8152
8153 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED_CTLS0x482,
8154 IA32_VMX_USE_TPR_SHADOW(1ULL << 21), 1)) {
8155 vmx_dump_vmcs_field(VMCS_TPR_THRESHOLD0x401C, "TPR Threshold");
8156 DPRINTF("\n");
8157 }
8158
8159 if (has_sec) {
8160 vmx_dump_vmcs_field(VMCS_PROCBASED2_CTLS0x401E, "2ndary Ctrls");
8161 DPRINTF("\n");
8162 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PROCBASED2_CTLS0x48B,
8163 IA32_VMX_PAUSE_LOOP_EXITING(1ULL << 10), 1)) {
8164 vmx_dump_vmcs_field(VMCS_PLE_GAP0x4020, "PLE Gap");
8165 vmx_dump_vmcs_field(VMCS_PLE_WINDOW0x4022, "PLE Window");
8166 }
8167 DPRINTF("\n");
8168 }
8169
8170 vmx_dump_vmcs_field(VMCS_INSTRUCTION_ERROR0x4400, "Insn Error");
8171 vmx_dump_vmcs_field(VMCS_EXIT_REASON0x4402, "Exit Reason");
8172 DPRINTF("\n");
8173
8174 vmx_dump_vmcs_field(VMCS_EXIT_INTERRUPTION_INFO0x4404, "Exit Int. Info");
8175 vmx_dump_vmcs_field(VMCS_EXIT_INTERRUPTION_ERR_CODE0x4406,
8176 "Exit Int. Err Code");
8177 DPRINTF("\n");
8178
8179 vmx_dump_vmcs_field(VMCS_IDT_VECTORING_INFO0x4408, "IDT vect info");
8180 vmx_dump_vmcs_field(VMCS_IDT_VECTORING_ERROR_CODE0x440A,
8181 "IDT vect err code");
8182 DPRINTF("\n");
8183
8184 vmx_dump_vmcs_field(VMCS_INSTRUCTION_LENGTH0x440C, "Insn Len");
8185 vmx_dump_vmcs_field(VMCS_EXIT_INSTRUCTION_INFO0x440E, "Exit Insn Info");
8186 DPRINTF("\n");
8187
8188 vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_LIMIT0x4800, "G. ES Lim");
8189 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_LIMIT0x4802, "G. CS Lim");
8190 DPRINTF("\n");
8191
8192 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_LIMIT0x4804, "G. SS Lim");
8193 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_LIMIT0x4806, "G. DS Lim");
8194 DPRINTF("\n");
8195
8196 vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_LIMIT0x4808, "G. FS Lim");
8197 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_LIMIT0x480A, "G. GS Lim");
8198 DPRINTF("\n");
8199
8200 vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_LIMIT0x480C, "G. LDTR Lim");
8201 vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_LIMIT0x480E, "G. TR Lim");
8202 DPRINTF("\n");
8203
8204 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GDTR_LIMIT0x4810, "G. GDTR Lim");
8205 vmx_dump_vmcs_field(VMCS_GUEST_IA32_IDTR_LIMIT0x4812, "G. IDTR Lim");
8206 DPRINTF("\n");
8207
8208 vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_AR0x4814, "G. ES AR");
8209 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_AR0x4816, "G. CS AR");
8210 DPRINTF("\n");
8211
8212 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_AR0x4818, "G. SS AR");
8213 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_AR0x481A, "G. DS AR");
8214 DPRINTF("\n");
8215
8216 vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_AR0x481C, "G. FS AR");
8217 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_AR0x481E, "G. GS AR");
8218 DPRINTF("\n");
8219
8220 vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_AR0x4820, "G. LDTR AR");
8221 vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_AR0x4822, "G. TR AR");
8222 DPRINTF("\n");
8223
8224 vmx_dump_vmcs_field(VMCS_GUEST_INTERRUPTIBILITY_ST0x4824, "G. Int St.");
8225 vmx_dump_vmcs_field(VMCS_GUEST_ACTIVITY_STATE0x4826, "G. Act St.");
8226 DPRINTF("\n");
8227
8228 vmx_dump_vmcs_field(VMCS_GUEST_SMBASE0x4828, "G. SMBASE");
8229 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_CS0x482A, "G. SYSENTER CS");
8230 DPRINTF("\n");
8231
8232 if (vcpu_vmx_check_cap(vcpu, IA32_VMX_PINBASED_CTLS0x481,
8233 IA32_VMX_ACTIVATE_VMX_PREEMPTION_TIMER(1ULL << 6), 1)) {
8234 vmx_dump_vmcs_field(VMCS_VMX_PREEMPTION_TIMER_VAL0x482E,
8235 "VMX Preempt Timer");
8236 DPRINTF("\n");
8237 }
8238
8239 vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_CS0x4C00, "H. SYSENTER CS");
8240 DPRINTF("\n");
8241
8242 vmx_dump_vmcs_field(VMCS_CR0_MASK0x6000, "CR0 Mask");
8243 DPRINTF("\n");
8244 vmx_dump_vmcs_field(VMCS_CR4_MASK0x6002, "CR4 Mask");
8245 DPRINTF("\n");
8246
8247 vmx_dump_vmcs_field(VMCS_CR0_READ_SHADOW0x6004, "CR0 RD Shadow");
8248 DPRINTF("\n");
8249 vmx_dump_vmcs_field(VMCS_CR4_READ_SHADOW0x6006, "CR4 RD Shadow");
8250 DPRINTF("\n");
8251
8252 /* We assume all CPUs have the same max CR3 target ct */
8253 cr3_tgt_ct = curcpu()({struct cpu_info *__ci; asm volatile("movq %%gs:%P1,%0" : "=r"
(__ci) :"n" (__builtin_offsetof(struct cpu_info, ci_self)));
__ci;})
->ci_vmm_cap.vcc_vmx.vmx_cr3_tgt_count;
8254 DPRINTF("Max CR3 target count: 0x%x\n", cr3_tgt_ct);
8255 if (cr3_tgt_ct <= VMX_MAX_CR3_TARGETS256) {
8256 for (i = 0 ; i < cr3_tgt_ct; i++) {
8257 vmx_dump_vmcs_field(VMCS_CR3_TARGET_00x6008 + (2 * i),
8258 "CR3 Target");
8259 DPRINTF("\n");
8260 }
8261 } else {
8262 DPRINTF("(Bogus CR3 Target Count > %d", VMX_MAX_CR3_TARGETS);
8263 }
8264
8265 vmx_dump_vmcs_field(VMCS_GUEST_EXIT_QUALIFICATION0x6400, "G. Exit Qual");
8266 DPRINTF("\n");
8267 vmx_dump_vmcs_field(VMCS_IO_RCX0x6402, "I/O RCX");
8268 DPRINTF("\n");
8269 vmx_dump_vmcs_field(VMCS_IO_RSI0x6404, "I/O RSI");
8270 DPRINTF("\n");
8271 vmx_dump_vmcs_field(VMCS_IO_RDI0x6406, "I/O RDI");
8272 DPRINTF("\n");
8273 vmx_dump_vmcs_field(VMCS_IO_RIP0x6408, "I/O RIP");
8274 DPRINTF("\n");
8275 vmx_dump_vmcs_field(VMCS_GUEST_LINEAR_ADDRESS0x640A, "G. Lin Addr");
8276 DPRINTF("\n");
8277 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR00x6800, "G. CR0");
8278 DPRINTF("\n");
8279 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR30x6802, "G. CR3");
8280 DPRINTF("\n");
8281 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CR40x6804, "G. CR4");
8282 DPRINTF("\n");
8283 vmx_dump_vmcs_field(VMCS_GUEST_IA32_ES_BASE0x6806, "G. ES Base");
8284 DPRINTF("\n");
8285 vmx_dump_vmcs_field(VMCS_GUEST_IA32_CS_BASE0x6808, "G. CS Base");
8286 DPRINTF("\n");
8287 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SS_BASE0x680A, "G. SS Base");
8288 DPRINTF("\n");
8289 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DS_BASE0x680C, "G. DS Base");
8290 DPRINTF("\n");
8291 vmx_dump_vmcs_field(VMCS_GUEST_IA32_FS_BASE0x680E, "G. FS Base");
8292 DPRINTF("\n");
8293 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GS_BASE0x6810, "G. GS Base");
8294 DPRINTF("\n");
8295 vmx_dump_vmcs_field(VMCS_GUEST_IA32_LDTR_BASE0x6812, "G. LDTR Base");
8296 DPRINTF("\n");
8297 vmx_dump_vmcs_field(VMCS_GUEST_IA32_TR_BASE0x6814, "G. TR Base");
8298 DPRINTF("\n");
8299 vmx_dump_vmcs_field(VMCS_GUEST_IA32_GDTR_BASE0x6816, "G. GDTR Base");
8300 DPRINTF("\n");
8301 vmx_dump_vmcs_field(VMCS_GUEST_IA32_IDTR_BASE0x6818, "G. IDTR Base");
8302 DPRINTF("\n");
8303 vmx_dump_vmcs_field(VMCS_GUEST_IA32_DR70x681A, "G. DR7");
8304 DPRINTF("\n");
8305 vmx_dump_vmcs_field(VMCS_GUEST_IA32_RSP0x681C, "G. RSP");
8306 DPRINTF("\n");
8307 vmx_dump_vmcs_field(VMCS_GUEST_IA32_RIP0x681E, "G. RIP");
8308 DPRINTF("\n");
8309 vmx_dump_vmcs_field(VMCS_GUEST_IA32_RFLAGS0x6820, "G. RFLAGS");
8310 DPRINTF("\n");
8311 vmx_dump_vmcs_field(VMCS_GUEST_PENDING_DBG_EXC0x6822, "G. Pend Dbg Exc");
8312 DPRINTF("\n");
8313 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_ESP0x6824, "G. SYSENTER ESP");
8314 DPRINTF("\n");
8315 vmx_dump_vmcs_field(VMCS_GUEST_IA32_SYSENTER_EIP0x6826, "G. SYSENTER EIP");
8316 DPRINTF("\n");
8317 vmx_dump_vmcs_field(VMCS_HOST_IA32_CR00x6C00, "H. CR0");
8318 DPRINTF("\n");
8319 vmx_dump_vmcs_field(VMCS_HOST_IA32_CR30x6C02, "H. CR3");
8320 DPRINTF("\n");
8321 vmx_dump_vmcs_field(VMCS_HOST_IA32_CR40x6C04, "H. CR4");
8322 DPRINTF("\n");
8323 vmx_dump_vmcs_field(VMCS_HOST_IA32_FS_BASE0x6C06, "H. FS Base");
8324 DPRINTF("\n");
8325 vmx_dump_vmcs_field(VMCS_HOST_IA32_GS_BASE0x6C08, "H. GS Base");
8326 DPRINTF("\n");
8327 vmx_dump_vmcs_field(VMCS_HOST_IA32_TR_BASE0x6C0A, "H. TR Base");
8328 DPRINTF("\n");
8329 vmx_dump_vmcs_field(VMCS_HOST_IA32_GDTR_BASE0x6C0C, "H. GDTR Base");
8330 DPRINTF("\n");
8331 vmx_dump_vmcs_field(VMCS_HOST_IA32_IDTR_BASE0x6C0E, "H. IDTR Base");
8332 DPRINTF("\n");
8333 vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_ESP0x6C10, "H. SYSENTER ESP");
8334 DPRINTF("\n");
8335 vmx_dump_vmcs_field(VMCS_HOST_IA32_SYSENTER_EIP0x6C12, "H. SYSENTER EIP");
8336 DPRINTF("\n");
8337 vmx_dump_vmcs_field(VMCS_HOST_IA32_RSP0x6C14, "H. RSP");
8338 DPRINTF("\n");
8339 vmx_dump_vmcs_field(VMCS_HOST_IA32_RIP0x6C16, "H. RIP");
8340 DPRINTF("\n");
8341}
8342
8343/*
8344 * vmx_vcpu_dump_regs
8345 *
8346 * Debug function to print vcpu regs from the current vcpu
8347 * note - vmcs for 'vcpu' must be on this pcpu.
8348 *
8349 * Parameters:
8350 * vcpu - vcpu whose registers should be dumped
8351 */
8352void
8353vmx_vcpu_dump_regs(struct vcpu *vcpu)
8354{
8355 uint64_t r;
8356 int i;
8357 struct vmx_msr_store *msr_store;
8358
8359 /* XXX reformat this for 32 bit guest as needed */
8360 DPRINTF("vcpu @ %p in %s mode\n", vcpu, vmm_decode_cpu_mode(vcpu));
8361 i = vmm_get_guest_cpu_cpl(vcpu);
8362 if (i == -1)
8363 DPRINTF(" CPL=unknown\n");
8364 else
8365 DPRINTF(" CPL=%d\n", i);
8366 DPRINTF(" rax=0x%016llx rbx=0x%016llx rcx=0x%016llx\n",
8367 vcpu->vc_gueststate.vg_rax, vcpu->vc_gueststate.vg_rbx,
8368 vcpu->vc_gueststate.vg_rcx);
8369 DPRINTF(" rdx=0x%016llx rbp=0x%016llx rdi=0x%016llx\n",
8370 vcpu->vc_gueststate.vg_rdx, vcpu->vc_gueststate.vg_rbp,
8371 vcpu->vc_gueststate.vg_rdi);
8372 DPRINTF(" rsi=0x%016llx r8=0x%016llx r9=0x%016llx\n",
8373 vcpu->vc_gueststate.vg_rsi, vcpu->vc_gueststate.vg_r8,
8374 vcpu->vc_gueststate.vg_r9);
8375 DPRINTF(" r10=0x%016llx r11=0x%016llx r12=0x%016llx\n",
8376 vcpu->vc_gueststate.vg_r10, vcpu->vc_gueststate.vg_r11,
8377 vcpu->vc_gueststate.vg_r12);
8378 DPRINTF(" r13=0x%016llx r14=0x%016llx r15=0x%016llx\n",
8379 vcpu->vc_gueststate.vg_r13, vcpu->vc_gueststate.vg_r14,
8380 vcpu->vc_gueststate.vg_r15);
8381
8382 DPRINTF(" rip=0x%016llx rsp=", vcpu->vc_gueststate.vg_rip);
8383 if (vmread(VMCS_GUEST_IA32_RSP0x681C, &r))
8384 DPRINTF("(error reading)\n");
8385 else
8386 DPRINTF("0x%016llx\n", r);
8387
8388 DPRINTF(" rflags=");
8389 if (vmread(VMCS_GUEST_IA32_RFLAGS0x6820, &r))
8390 DPRINTF("(error reading)\n");
8391 else {
8392 DPRINTF("0x%016llx ", r);
8393 vmm_decode_rflags(r);
8394 }
8395
8396 DPRINTF(" cr0=");
8397 if (vmread(VMCS_GUEST_IA32_CR00x6800, &r))
8398 DPRINTF("(error reading)\n");
8399 else {
8400 DPRINTF("0x%016llx ", r);
8401 vmm_decode_cr0(r);
8402 }
8403
8404 DPRINTF(" cr2=0x%016llx\n", vcpu->vc_gueststate.vg_cr2);
8405
8406 DPRINTF(" cr3=");
8407 if (vmread(VMCS_GUEST_IA32_CR30x6802, &r))
8408 DPRINTF("(error reading)\n");
8409 else {
8410 DPRINTF("0x%016llx ", r);
8411 vmm_decode_cr3(r);
8412 }
8413
8414 DPRINTF(" cr4=");
8415 if (vmread(VMCS_GUEST_IA32_CR40x6804, &r))
8416 DPRINTF("(error reading)\n");
8417 else {
8418 DPRINTF("0x%016llx ", r);
8419 vmm_decode_cr4(r);
8420 }
8421
8422 DPRINTF(" --Guest Segment Info--\n");
8423
8424 DPRINTF(" cs=");
8425 if (vmread(VMCS_GUEST_IA32_CS_SEL0x0802, &r))
8426 DPRINTF("(error reading)");
8427 else
8428 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
8429
8430 DPRINTF(" base=");
8431 if (vmread(VMCS_GUEST_IA32_CS_BASE0x6808, &r))
8432 DPRINTF("(error reading)");
8433 else
8434 DPRINTF("0x%016llx", r);
8435
8436 DPRINTF(" limit=");
8437 if (vmread(VMCS_GUEST_IA32_CS_LIMIT0x4802, &r))
8438 DPRINTF("(error reading)");
8439 else
8440 DPRINTF("0x%016llx", r);
8441
8442 DPRINTF(" a/r=");
8443 if (vmread(VMCS_GUEST_IA32_CS_AR0x4816, &r))
8444 DPRINTF("(error reading)\n");
8445 else {
8446 DPRINTF("0x%04llx\n ", r);
8447 vmm_segment_desc_decode(r);
8448 }
8449
8450 DPRINTF(" ds=");
8451 if (vmread(VMCS_GUEST_IA32_DS_SEL0x0806, &r))
8452 DPRINTF("(error reading)");
8453 else
8454 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
8455
8456 DPRINTF(" base=");
8457 if (vmread(VMCS_GUEST_IA32_DS_BASE0x680C, &r))
8458 DPRINTF("(error reading)");
8459 else
8460 DPRINTF("0x%016llx", r);
8461
8462 DPRINTF(" limit=");
8463 if (vmread(VMCS_GUEST_IA32_DS_LIMIT0x4806, &r))
8464 DPRINTF("(error reading)");
8465 else
8466 DPRINTF("0x%016llx", r);
8467
8468 DPRINTF(" a/r=");
8469 if (vmread(VMCS_GUEST_IA32_DS_AR0x481A, &r))
8470 DPRINTF("(error reading)\n");
8471 else {
8472 DPRINTF("0x%04llx\n ", r);
8473 vmm_segment_desc_decode(r);
8474 }
8475
8476 DPRINTF(" es=");
8477 if (vmread(VMCS_GUEST_IA32_ES_SEL0x0800, &r))
8478 DPRINTF("(error reading)");
8479 else
8480 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
8481
8482 DPRINTF(" base=");
8483 if (vmread(VMCS_GUEST_IA32_ES_BASE0x6806, &r))
8484 DPRINTF("(error reading)");
8485 else
8486 DPRINTF("0x%016llx", r);
8487
8488 DPRINTF(" limit=");
8489 if (vmread(VMCS_GUEST_IA32_ES_LIMIT0x4800, &r))
8490 DPRINTF("(error reading)");
8491 else
8492 DPRINTF("0x%016llx", r);
8493
8494 DPRINTF(" a/r=");
8495 if (vmread(VMCS_GUEST_IA32_ES_AR0x4814, &r))
8496 DPRINTF("(error reading)\n");
8497 else {
8498 DPRINTF("0x%04llx\n ", r);
8499 vmm_segment_desc_decode(r);
8500 }
8501
8502 DPRINTF(" fs=");
8503 if (vmread(VMCS_GUEST_IA32_FS_SEL0x0808, &r))
8504 DPRINTF("(error reading)");
8505 else
8506 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
8507
8508 DPRINTF(" base=");
8509 if (vmread(VMCS_GUEST_IA32_FS_BASE0x680E, &r))
8510 DPRINTF("(error reading)");
8511 else
8512 DPRINTF("0x%016llx", r);
8513
8514 DPRINTF(" limit=");
8515 if (vmread(VMCS_GUEST_IA32_FS_LIMIT0x4808, &r))
8516 DPRINTF("(error reading)");
8517 else
8518 DPRINTF("0x%016llx", r);
8519
8520 DPRINTF(" a/r=");
8521 if (vmread(VMCS_GUEST_IA32_FS_AR0x481C, &r))
8522 DPRINTF("(error reading)\n");
8523 else {
8524 DPRINTF("0x%04llx\n ", r);
8525 vmm_segment_desc_decode(r);
8526 }
8527
8528 DPRINTF(" gs=");
8529 if (vmread(VMCS_GUEST_IA32_GS_SEL0x080A, &r))
8530 DPRINTF("(error reading)");
8531 else
8532 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
8533
8534 DPRINTF(" base=");
8535 if (vmread(VMCS_GUEST_IA32_GS_BASE0x6810, &r))
8536 DPRINTF("(error reading)");
8537 else
8538 DPRINTF("0x%016llx", r);
8539
8540 DPRINTF(" limit=");
8541 if (vmread(VMCS_GUEST_IA32_GS_LIMIT0x480A, &r))
8542 DPRINTF("(error reading)");
8543 else
8544 DPRINTF("0x%016llx", r);
8545
8546 DPRINTF(" a/r=");
8547 if (vmread(VMCS_GUEST_IA32_GS_AR0x481E, &r))
8548 DPRINTF("(error reading)\n");
8549 else {
8550 DPRINTF("0x%04llx\n ", r);
8551 vmm_segment_desc_decode(r);
8552 }
8553
8554 DPRINTF(" ss=");
8555 if (vmread(VMCS_GUEST_IA32_SS_SEL0x0804, &r))
8556 DPRINTF("(error reading)");
8557 else
8558 DPRINTF("0x%04llx rpl=%lld", r, r & 0x3);
8559
8560 DPRINTF(" base=");
8561 if (vmread(VMCS_GUEST_IA32_SS_BASE0x680A, &r))
8562 DPRINTF("(error reading)");
8563 else
8564 DPRINTF("0x%016llx", r);
8565
8566 DPRINTF(" limit=");
8567 if (vmread(VMCS_GUEST_IA32_SS_LIMIT0x4804, &r))
8568 DPRINTF("(error reading)");
8569 else
8570 DPRINTF("0x%016llx", r);
8571
8572 DPRINTF(" a/r=");
8573 if (vmread(VMCS_GUEST_IA32_SS_AR0x4818, &r))
8574 DPRINTF("(error reading)\n");
8575 else {
8576 DPRINTF("0x%04llx\n ", r);
8577 vmm_segment_desc_decode(r);
8578 }
8579
8580 DPRINTF(" tr=");
8581 if (vmread(VMCS_GUEST_IA32_TR_SEL0x080E, &r))
8582 DPRINTF("(error reading)");
8583 else
8584 DPRINTF("0x%04llx", r);
8585
8586 DPRINTF(" base=");
8587 if (vmread(VMCS_GUEST_IA32_TR_BASE0x6814, &r))
8588 DPRINTF("(error reading)");
8589 else
8590 DPRINTF("0x%016llx", r);
8591
8592 DPRINTF(" limit=");
8593 if (vmread(VMCS_GUEST_IA32_TR_LIMIT0x480E, &r))
8594 DPRINTF("(error reading)");
8595 else
8596 DPRINTF("0x%016llx", r);
8597
8598 DPRINTF(" a/r=");
8599 if (vmread(VMCS_GUEST_IA32_TR_AR0x4822, &r))
8600 DPRINTF("(error reading)\n");
8601 else {
8602 DPRINTF("0x%04llx\n ", r);
8603 vmm_segment_desc_decode(r);
8604 }
8605
8606 DPRINTF(" gdtr base=");
8607 if (vmread(VMCS_GUEST_IA32_GDTR_BASE0x6816, &r))
8608 DPRINTF("(error reading) ");
8609 else
8610 DPRINTF("0x%016llx", r);
8611
8612 DPRINTF(" limit=");
8613 if (vmread(VMCS_GUEST_IA32_GDTR_LIMIT0x4810, &r))
8614 DPRINTF("(error reading)\n");
8615 else
8616 DPRINTF("0x%016llx\n", r);
8617
8618 DPRINTF(" idtr base=");
8619 if (vmread(VMCS_GUEST_IA32_IDTR_BASE0x6818, &r))
8620 DPRINTF("(error reading) ");
8621 else
8622 DPRINTF("0x%016llx", r);
8623
8624 DPRINTF(" limit=");
8625 if (vmread(VMCS_GUEST_IA32_IDTR_LIMIT0x4812, &r))
8626 DPRINTF("(error reading)\n");
8627 else
8628 DPRINTF("0x%016llx\n", r);
8629
8630 DPRINTF(" ldtr=");
8631 if (vmread(VMCS_GUEST_IA32_LDTR_SEL0x080C, &r))
8632 DPRINTF("(error reading)");
8633 else
8634 DPRINTF("0x%04llx", r);
8635
8636 DPRINTF(" base=");
8637 if (vmread(VMCS_GUEST_IA32_LDTR_BASE0x6812, &r))
8638 DPRINTF("(error reading)");
8639 else
8640 DPRINTF("0x%016llx", r);
8641
8642 DPRINTF(" limit=");
8643 if (vmread(VMCS_GUEST_IA32_LDTR_LIMIT0x480C, &r))
8644 DPRINTF("(error reading)");
8645 else
8646 DPRINTF("0x%016llx", r);
8647
8648 DPRINTF(" a/r=");
8649 if (vmread(VMCS_GUEST_IA32_LDTR_AR0x4820, &r))
8650 DPRINTF("(error reading)\n");
8651 else {
8652 DPRINTF("0x%04llx\n ", r);
8653 vmm_segment_desc_decode(r);
8654 }
8655
8656 DPRINTF(" --Guest MSRs @ 0x%016llx (paddr: 0x%016llx)--\n",
8657 (uint64_t)vcpu->vc_vmx_msr_exit_save_va,
8658 (uint64_t)vcpu->vc_vmx_msr_exit_save_pa);
8659
8660 msr_store = (struct vmx_msr_store *)vcpu->vc_vmx_msr_exit_save_va;
8661
8662 for (i = 0; i < VMX_NUM_MSR_STORE7; i++) {
8663 DPRINTF(" MSR %d @ %p : 0x%08llx (%s), "
8664 "value=0x%016llx ",
8665 i, &msr_store[i], msr_store[i].vms_index,
8666 msr_name_decode(msr_store[i].vms_index),
8667 msr_store[i].vms_data);
8668 vmm_decode_msr_value(msr_store[i].vms_index,
8669 msr_store[i].vms_data);
8670 }
8671}
8672
8673/*
8674 * msr_name_decode
8675 *
8676 * Returns a human-readable name for the MSR supplied in 'msr'.
8677 *
8678 * Parameters:
8679 * msr - The MSR to decode
8680 *
8681 * Return value:
8682 * NULL-terminated character string containing the name of the MSR requested
8683 */
8684const char *
8685msr_name_decode(uint32_t msr)
8686{
8687 /*
8688 * Add as needed. Also consider adding a decode function when
8689 * adding to this table.
8690 */
8691
8692 switch (msr) {
8693 case MSR_TSC0x010: return "TSC";
8694 case MSR_APICBASE0x01b: return "APIC base";
8695 case MSR_IA32_FEATURE_CONTROL0x03a: return "IA32 feature control";
8696 case MSR_PERFCTR00x0c1: return "perf counter 0";
8697 case MSR_PERFCTR10x0c2: return "perf counter 1";
8698 case MSR_TEMPERATURE_TARGET0x1a2: return "temperature target";
8699 case MSR_MTRRcap0x0fe: return "MTRR cap";
8700 case MSR_PERF_STATUS0x198: return "perf status";
8701 case MSR_PERF_CTL0x199: return "perf control";
8702 case MSR_MTRRvarBase0x200: return "MTRR variable base";
8703 case MSR_MTRRfix64K_000000x250: return "MTRR fixed 64K";
8704 case MSR_MTRRfix16K_800000x258: return "MTRR fixed 16K";
8705 case MSR_MTRRfix4K_C00000x268: return "MTRR fixed 4K";
8706 case MSR_CR_PAT0x277: return "PAT";
8707 case MSR_MTRRdefType0x2ff: return "MTRR default type";
8708 case MSR_EFER0xc0000080: return "EFER";
8709 case MSR_STAR0xc0000081: return "STAR";
8710 case MSR_LSTAR0xc0000082: return "LSTAR";
8711 case MSR_CSTAR0xc0000083: return "CSTAR";
8712 case MSR_SFMASK0xc0000084: return "SFMASK";
8713 case MSR_FSBASE0xc0000100: return "FSBASE";
8714 case MSR_GSBASE0xc0000101: return "GSBASE";
8715 case MSR_KERNELGSBASE0xc0000102: return "KGSBASE";
8716 case MSR_MISC_ENABLE0x1a0: return "Misc Enable";
8717 default: return "Unknown MSR";
8718 }
8719}
8720
8721/*
8722 * vmm_segment_desc_decode
8723 *
8724 * Debug function to print segment information for supplied descriptor
8725 *
8726 * Parameters:
8727 * val - The A/R bytes for the segment descriptor to decode
8728 */
8729void
8730vmm_segment_desc_decode(uint64_t val)
8731{
8732 uint16_t ar;
8733 uint8_t g, type, s, dpl, p, dib, l;
8734 uint32_t unusable;
8735
8736 /* Exit early on unusable descriptors */
8737 unusable = val & 0x10000;
8738 if (unusable) {
8739 DPRINTF("(unusable)\n");
8740 return;
8741 }
8742
8743 ar = (uint16_t)val;
8744
8745 g = (ar & 0x8000) >> 15;
8746 dib = (ar & 0x4000) >> 14;
8747 l = (ar & 0x2000) >> 13;
8748 p = (ar & 0x80) >> 7;
8749 dpl = (ar & 0x60) >> 5;
8750 s = (ar & 0x10) >> 4;
8751 type = (ar & 0xf);
8752
8753 DPRINTF("granularity=%d dib=%d l(64 bit)=%d present=%d sys=%d ",
8754 g, dib, l, p, s);
8755
8756 DPRINTF("type=");
8757 if (!s) {
8758 switch (type) {
8759 case SDT_SYSLDT2: DPRINTF("ldt\n"); break;
8760 case SDT_SYS386TSS9: DPRINTF("tss (available)\n"); break;
8761 case SDT_SYS386BSY11: DPRINTF("tss (busy)\n"); break;
8762 case SDT_SYS386CGT12: DPRINTF("call gate\n"); break;
8763 case SDT_SYS386IGT14: DPRINTF("interrupt gate\n"); break;
8764 case SDT_SYS386TGT15: DPRINTF("trap gate\n"); break;
8765 /* XXX handle 32 bit segment types by inspecting mode */
8766 default: DPRINTF("unknown");
8767 }
8768 } else {
8769 switch (type + 16) {
8770 case SDT_MEMRO16: DPRINTF("data, r/o\n"); break;
8771 case SDT_MEMROA17: DPRINTF("data, r/o, accessed\n"); break;
8772 case SDT_MEMRW18: DPRINTF("data, r/w\n"); break;
8773 case SDT_MEMRWA19: DPRINTF("data, r/w, accessed\n"); break;
8774 case SDT_MEMROD20: DPRINTF("data, r/o, expand down\n"); break;
8775 case SDT_MEMRODA21: DPRINTF("data, r/o, expand down, "
8776 "accessed\n");
8777 break;
8778 case SDT_MEMRWD22: DPRINTF("data, r/w, expand down\n"); break;
8779 case SDT_MEMRWDA23: DPRINTF("data, r/w, expand down, "
8780 "accessed\n");
8781 break;
8782 case SDT_MEME24: DPRINTF("code, x only\n"); break;
8783 case SDT_MEMEA25: DPRINTF("code, x only, accessed\n");
8784 case SDT_MEMER26: DPRINTF("code, r/x\n"); break;
8785 case SDT_MEMERA27: DPRINTF("code, r/x, accessed\n"); break;
8786 case SDT_MEMEC28: DPRINTF("code, x only, conforming\n"); break;
8787 case SDT_MEMEAC29: DPRINTF("code, x only, conforming, "
8788 "accessed\n");
8789 break;
8790 case SDT_MEMERC30: DPRINTF("code, r/x, conforming\n"); break;
8791 case SDT_MEMERAC31: DPRINTF("code, r/x, conforming, accessed\n");
8792 break;
8793 }
8794 }
8795}
8796
8797void
8798vmm_decode_cr0(uint64_t cr0)
8799{
8800 struct vmm_reg_debug_info cr0_info[11] = {
8801 { CR0_PG0x80000000, "PG ", "pg " },
8802 { CR0_CD0x40000000, "CD ", "cd " },
8803 { CR0_NW0x20000000, "NW ", "nw " },
8804 { CR0_AM0x00040000, "AM ", "am " },
8805 { CR0_WP0x00010000, "WP ", "wp " },
8806 { CR0_NE0x00000020, "NE ", "ne " },
8807 { CR0_ET0x00000010, "ET ", "et " },
8808 { CR0_TS0x00000008, "TS ", "ts " },
8809 { CR0_EM0x00000004, "EM ", "em " },
8810 { CR0_MP0x00000002, "MP ", "mp " },
8811 { CR0_PE0x00000001, "PE", "pe" }
8812 };
8813
8814 uint8_t i;
8815
8816 DPRINTF("(");
8817 for (i = 0; i < nitems(cr0_info)(sizeof((cr0_info)) / sizeof((cr0_info)[0])); i++)
8818 if (cr0 & cr0_info[i].vrdi_bit)
8819 DPRINTF("%s", cr0_info[i].vrdi_present);
8820 else
8821 DPRINTF("%s", cr0_info[i].vrdi_absent);
8822
8823 DPRINTF(")\n");
8824}
8825
8826void
8827vmm_decode_cr3(uint64_t cr3)
8828{
8829 struct vmm_reg_debug_info cr3_info[2] = {
8830 { CR3_PWT(1ULL << 3), "PWT ", "pwt "},
8831 { CR3_PCD(1ULL << 4), "PCD", "pcd"}
8832 };
8833
8834 uint64_t cr4;
8835 uint8_t i;
8836
8837 if (vmread(VMCS_GUEST_IA32_CR40x6804, &cr4)) {
8838 DPRINTF("(error)\n");
8839 return;
8840 }
8841
8842 /* If CR4.PCIDE = 0, interpret CR3.PWT and CR3.PCD */
8843 if ((cr4 & CR4_PCIDE0x00020000) == 0) {
8844 DPRINTF("(");
8845 for (i = 0 ; i < nitems(cr3_info)(sizeof((cr3_info)) / sizeof((cr3_info)[0])) ; i++)
8846 if (cr3 & cr3_info[i].vrdi_bit)
8847 DPRINTF("%s", cr3_info[i].vrdi_present);
8848 else
8849 DPRINTF("%s", cr3_info[i].vrdi_absent);
8850
8851 DPRINTF(")\n");
8852 } else {
8853 DPRINTF("(pcid=0x%llx)\n", cr3 & 0xFFF);
8854 }
8855}
8856
8857void
8858vmm_decode_cr4(uint64_t cr4)
8859{
8860 struct vmm_reg_debug_info cr4_info[19] = {
8861 { CR4_PKE0x00400000, "PKE ", "pke "},
8862 { CR4_SMAP0x00200000, "SMAP ", "smap "},
8863 { CR4_SMEP0x00100000, "SMEP ", "smep "},
8864 { CR4_OSXSAVE0x00040000, "OSXSAVE ", "osxsave "},
8865 { CR4_PCIDE0x00020000, "PCIDE ", "pcide "},
8866 { CR4_FSGSBASE0x00010000, "FSGSBASE ", "fsgsbase "},
8867 { CR4_SMXE0x00004000, "SMXE ", "smxe "},
8868 { CR4_VMXE0x00002000, "VMXE ", "vmxe "},
8869 { CR4_OSXMMEXCPT0x00000400, "OSXMMEXCPT ", "osxmmexcpt "},
8870 { CR4_OSFXSR0x00000200, "OSFXSR ", "osfxsr "},
8871 { CR4_PCE0x00000100, "PCE ", "pce "},
8872 { CR4_PGE0x00000080, "PGE ", "pge "},
8873 { CR4_MCE0x00000040, "MCE ", "mce "},
8874 { CR4_PAE0x00000020, "PAE ", "pae "},
8875 { CR4_PSE0x00000010, "PSE ", "pse "},
8876 { CR4_DE0x00000008, "DE ", "de "},
8877 { CR4_TSD0x00000004, "TSD ", "tsd "},
8878 { CR4_PVI0x00000002, "PVI ", "pvi "},
8879 { CR4_VME0x00000001, "VME", "vme"}
8880 };
8881
8882 uint8_t i;
8883
8884 DPRINTF("(");
8885 for (i = 0; i < nitems(cr4_info)(sizeof((cr4_info)) / sizeof((cr4_info)[0])); i++)
8886 if (cr4 & cr4_info[i].vrdi_bit)
8887 DPRINTF("%s", cr4_info[i].vrdi_present);
8888 else
8889 DPRINTF("%s", cr4_info[i].vrdi_absent);
8890
8891 DPRINTF(")\n");
8892}
8893
8894void
8895vmm_decode_apicbase_msr_value(uint64_t apicbase)
8896{
8897 struct vmm_reg_debug_info apicbase_info[3] = {
8898 { APICBASE_BSP0x100, "BSP ", "bsp "},
8899 { APICBASE_ENABLE_X2APIC0x400, "X2APIC ", "x2apic "},
8900 { APICBASE_GLOBAL_ENABLE0x800, "GLB_EN", "glb_en"}
8901 };
8902
8903 uint8_t i;
8904
8905 DPRINTF("(");
8906 for (i = 0; i < nitems(apicbase_info)(sizeof((apicbase_info)) / sizeof((apicbase_info)[0])); i++)
8907 if (apicbase & apicbase_info[i].vrdi_bit)
8908 DPRINTF("%s", apicbase_info[i].vrdi_present);
8909 else
8910 DPRINTF("%s", apicbase_info[i].vrdi_absent);
8911
8912 DPRINTF(")\n");
8913}
8914
8915void
8916vmm_decode_ia32_fc_value(uint64_t fcr)
8917{
8918 struct vmm_reg_debug_info fcr_info[4] = {
8919 { IA32_FEATURE_CONTROL_LOCK0x01, "LOCK ", "lock "},
8920 { IA32_FEATURE_CONTROL_SMX_EN0x02, "SMX ", "smx "},
8921 { IA32_FEATURE_CONTROL_VMX_EN0x04, "VMX ", "vmx "},
8922 { IA32_FEATURE_CONTROL_SENTER_EN(1ULL << 15), "SENTER ", "senter "}
8923 };
8924
8925 uint8_t i;
8926
8927 DPRINTF("(");
8928 for (i = 0; i < nitems(fcr_info)(sizeof((fcr_info)) / sizeof((fcr_info)[0])); i++)
8929 if (fcr & fcr_info[i].vrdi_bit)
8930 DPRINTF("%s", fcr_info[i].vrdi_present);
8931 else
8932 DPRINTF("%s", fcr_info[i].vrdi_absent);
8933
8934 if (fcr & IA32_FEATURE_CONTROL_SENTER_EN(1ULL << 15))
8935 DPRINTF(" [SENTER param = 0x%llx]",
8936 (fcr & IA32_FEATURE_CONTROL_SENTER_PARAM_MASK) >> 8);
8937
8938 DPRINTF(")\n");
8939}
8940
8941void
8942vmm_decode_mtrrcap_value(uint64_t val)
8943{
8944 struct vmm_reg_debug_info mtrrcap_info[3] = {
8945 { MTRRcap_FIXED0x100, "FIXED ", "fixed "},
8946 { MTRRcap_WC0x400, "WC ", "wc "},
8947 { MTRRcap_SMRR0x800, "SMRR ", "smrr "}
8948 };
8949
8950 uint8_t i;
8951
8952 DPRINTF("(");
8953 for (i = 0; i < nitems(mtrrcap_info)(sizeof((mtrrcap_info)) / sizeof((mtrrcap_info)[0])); i++)
8954 if (val & mtrrcap_info[i].vrdi_bit)
8955 DPRINTF("%s", mtrrcap_info[i].vrdi_present);
8956 else
8957 DPRINTF("%s", mtrrcap_info[i].vrdi_absent);
8958
8959 if (val & MTRRcap_FIXED0x100)
8960 DPRINTF(" [nr fixed ranges = 0x%llx]",
8961 (val & 0xff));
8962
8963 DPRINTF(")\n");
8964}
8965
8966void
8967vmm_decode_perf_status_value(uint64_t val)
8968{
8969 DPRINTF("(pstate ratio = 0x%llx)\n", (val & 0xffff));
8970}
8971
8972void vmm_decode_perf_ctl_value(uint64_t val)
8973{
8974 DPRINTF("(%s ", (val & PERF_CTL_TURBO) ? "TURBO" : "turbo");
8975 DPRINTF("pstate req = 0x%llx)\n", (val & 0xfffF));
8976}
8977
8978void
8979vmm_decode_mtrrdeftype_value(uint64_t mtrrdeftype)
8980{
8981 struct vmm_reg_debug_info mtrrdeftype_info[2] = {
8982 { MTRRdefType_FIXED_ENABLE0x400, "FIXED ", "fixed "},
8983 { MTRRdefType_ENABLE0x800, "ENABLED ", "enabled "},
8984 };
8985
8986 uint8_t i;
8987 int type;
8988
8989 DPRINTF("(");
8990 for (i = 0; i < nitems(mtrrdeftype_info)(sizeof((mtrrdeftype_info)) / sizeof((mtrrdeftype_info)[0])); i++)
8991 if (mtrrdeftype & mtrrdeftype_info[i].vrdi_bit)
8992 DPRINTF("%s", mtrrdeftype_info[i].vrdi_present);
8993 else
8994 DPRINTF("%s", mtrrdeftype_info[i].vrdi_absent);
8995
8996 DPRINTF("type = ");
8997 type = mtrr2mrt(mtrrdeftype & 0xff);
8998 switch (type) {
8999 case MDF_UNCACHEABLE(1<<0): DPRINTF("UC"); break;
9000 case MDF_WRITECOMBINE(1<<1): DPRINTF("WC"); break;
9001 case MDF_WRITETHROUGH(1<<2): DPRINTF("WT"); break;
9002 case MDF_WRITEPROTECT(1<<4): DPRINTF("RO"); break;
9003 case MDF_WRITEBACK(1<<3): DPRINTF("WB"); break;
9004 case MDF_UNKNOWN(1<<5):
9005 default:
9006 DPRINTF("??");
9007 break;
9008 }
9009
9010 DPRINTF(")\n");
9011}
9012
9013void
9014vmm_decode_efer_value(uint64_t efer)
9015{
9016 struct vmm_reg_debug_info efer_info[4] = {
9017 { EFER_SCE0x00000001, "SCE ", "sce "},
9018 { EFER_LME0x00000100, "LME ", "lme "},
9019 { EFER_LMA0x00000400, "LMA ", "lma "},
9020 { EFER_NXE0x00000800, "NXE", "nxe"},
9021 };
9022
9023 uint8_t i;
9024
9025 DPRINTF("(");
9026 for (i = 0; i < nitems(efer_info)(sizeof((efer_info)) / sizeof((efer_info)[0])); i++)
9027 if (efer & efer_info[i].vrdi_bit)
9028 DPRINTF("%s", efer_info[i].vrdi_present);
9029 else
9030 DPRINTF("%s", efer_info[i].vrdi_absent);
9031
9032 DPRINTF(")\n");
9033}
9034
9035void
9036vmm_decode_msr_value(uint64_t msr, uint64_t val)
9037{
9038 switch (msr) {
9039 case MSR_APICBASE0x01b: vmm_decode_apicbase_msr_value(val); break;
9040 case MSR_IA32_FEATURE_CONTROL0x03a: vmm_decode_ia32_fc_value(val); break;
9041 case MSR_MTRRcap0x0fe: vmm_decode_mtrrcap_value(val); break;
9042 case MSR_PERF_STATUS0x198: vmm_decode_perf_status_value(val); break;
9043 case MSR_PERF_CTL0x199: vmm_decode_perf_ctl_value(val); break;
9044 case MSR_MTRRdefType0x2ff: vmm_decode_mtrrdeftype_value(val); break;
9045 case MSR_EFER0xc0000080: vmm_decode_efer_value(val); break;
9046 case MSR_MISC_ENABLE0x1a0: vmm_decode_misc_enable_value(val); break;
9047 default: DPRINTF("\n");
9048 }
9049}
9050
9051void
9052vmm_decode_rflags(uint64_t rflags)
9053{
9054 struct vmm_reg_debug_info rflags_info[16] = {
9055 { PSL_C0x00000001, "CF ", "cf "},
9056 { PSL_PF0x00000004, "PF ", "pf "},
9057 { PSL_AF0x00000010, "AF ", "af "},
9058 { PSL_Z0x00000040, "ZF ", "zf "},
9059 { PSL_N0x00000080, "SF ", "sf "}, /* sign flag */
9060 { PSL_T0x00000100, "TF ", "tf "},
9061 { PSL_I0x00000200, "IF ", "if "},
9062 { PSL_D0x00000400, "DF ", "df "},
9063 { PSL_V0x00000800, "OF ", "of "}, /* overflow flag */
9064 { PSL_NT0x00004000, "NT ", "nt "},
9065 { PSL_RF0x00010000, "RF ", "rf "},
9066 { PSL_VM0x00020000, "VM ", "vm "},
9067 { PSL_AC0x00040000, "AC ", "ac "},
9068 { PSL_VIF0x00080000, "VIF ", "vif "},
9069 { PSL_VIP0x00100000, "VIP ", "vip "},
9070 { PSL_ID0x00200000, "ID ", "id "},
9071 };
9072
9073 uint8_t i, iopl;
9074
9075 DPRINTF("(");
9076 for (i = 0; i < nitems(rflags_info)(sizeof((rflags_info)) / sizeof((rflags_info)[0])); i++)
9077 if (rflags & rflags_info[i].vrdi_bit)
9078 DPRINTF("%s", rflags_info[i].vrdi_present);
9079 else
9080 DPRINTF("%s", rflags_info[i].vrdi_absent);
9081
9082 iopl = (rflags & PSL_IOPL0x00003000) >> 12;
9083 DPRINTF("IOPL=%d", iopl);
9084
9085 DPRINTF(")\n");
9086}
9087
9088void
9089vmm_decode_misc_enable_value(uint64_t misc)
9090{
9091 struct vmm_reg_debug_info misc_info[10] = {
9092 { MISC_ENABLE_FAST_STRINGS(1 << 0), "FSE ", "fse "},
9093 { MISC_ENABLE_TCC(1 << 3), "TCC ", "tcc "},
9094 { MISC_ENABLE_PERF_MON_AVAILABLE(1 << 7), "PERF ", "perf "},
9095 { MISC_ENABLE_BTS_UNAVAILABLE(1 << 11), "BTSU ", "btsu "},
9096 { MISC_ENABLE_PEBS_UNAVAILABLE(1 << 12), "PEBSU ", "pebsu "},
9097 { MISC_ENABLE_EIST_ENABLED(1 << 16), "EIST ", "eist "},
9098 { MISC_ENABLE_ENABLE_MONITOR_FSM(1 << 18), "MFSM ", "mfsm "},
9099 { MISC_ENABLE_LIMIT_CPUID_MAXVAL(1 << 22), "CMAX ", "cmax "},
9100 { MISC_ENABLE_xTPR_MESSAGE_DISABLE(1 << 23), "xTPRD ", "xtprd "},
9101 { MISC_ENABLE_XD_BIT_DISABLE(1 << 2), "NXD", "nxd"},
9102 };
9103
9104 uint8_t i;
9105
9106 DPRINTF("(");
9107 for (i = 0; i < nitems(misc_info)(sizeof((misc_info)) / sizeof((misc_info)[0])); i++)
9108 if (misc & misc_info[i].vrdi_bit)
9109 DPRINTF("%s", misc_info[i].vrdi_present);
9110 else
9111 DPRINTF("%s", misc_info[i].vrdi_absent);
9112
9113 DPRINTF(")\n");
9114}
9115
9116const char *
9117vmm_decode_cpu_mode(struct vcpu *vcpu)
9118{
9119 int mode = vmm_get_guest_cpu_mode(vcpu);
9120
9121 switch (mode) {
9122 case VMM_CPU_MODE_REAL: return "real";
9123 case VMM_CPU_MODE_PROT: return "16 bit protected";
9124 case VMM_CPU_MODE_PROT32: return "32 bit protected";
9125 case VMM_CPU_MODE_COMPAT: return "compatibility";
9126 case VMM_CPU_MODE_LONG: return "long";
9127 default: return "unknown";
9128 }
9129}
9130#endif /* VMM_DEBUG */